diff options
author | alexnick <alexnick@yandex-team.ru> | 2022-02-10 16:47:45 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:45 +0300 |
commit | b609303efcd1218868ca0eca806ea3cea2e01a8b (patch) | |
tree | c0748b5dcbade83af788c0abfa89c0383d6b779c | |
parent | 80ba7327fdd90a6281bcec18f03e84ff856c3559 (diff) | |
download | ydb-b609303efcd1218868ca0eca806ea3cea2e01a8b.tar.gz |
Restoring authorship annotation for <alexnick@yandex-team.ru>. Commit 2 of 2.
270 files changed, 29888 insertions, 29888 deletions
diff --git a/contrib/libs/aws-sdk-cpp/ya.make b/contrib/libs/aws-sdk-cpp/ya.make index c41743553c..d9fcb0818d 100644 --- a/contrib/libs/aws-sdk-cpp/ya.make +++ b/contrib/libs/aws-sdk-cpp/ya.make @@ -11,7 +11,7 @@ RECURSE( aws-cpp-sdk-s3 aws-cpp-sdk-s3-integration-tests aws-cpp-sdk-sqs - aws-cpp-sdk-kinesis + aws-cpp-sdk-kinesis aws-cpp-sdk-transfer testing-resources ) diff --git a/contrib/libs/grpc/src/core/ext/transport/chttp2/transport/flow_control.cc b/contrib/libs/grpc/src/core/ext/transport/chttp2/transport/flow_control.cc index 7f5715b831..fd76c4b3d9 100644 --- a/contrib/libs/grpc/src/core/ext/transport/chttp2/transport/flow_control.cc +++ b/contrib/libs/grpc/src/core/ext/transport/chttp2/transport/flow_control.cc @@ -236,17 +236,17 @@ grpc_error* StreamFlowControl::RecvData(int64_t incoming_frame_size) { int64_t acked_stream_window = announced_window_delta_ + acked_init_window; int64_t sent_stream_window = announced_window_delta_ + sent_init_window; if (incoming_frame_size > acked_stream_window) { - //hotfix - do not check incoming_frame_size <= sent_stream_window - with old grpc version this is not true - gpr_log(GPR_ERROR, - "Incoming frame of size %" PRId64 - " exceeds local window size of %" PRId64 - ".\n" - "The (un-acked, future) window size would be %" PRId64 - " which is not exceeded.\n" - "This would usually cause a disconnection, but allowing it due to" - "broken HTTP2 implementations in the wild.\n" - "See (for example) https://github.com/netty/netty/issues/6520.", - incoming_frame_size, acked_stream_window, sent_stream_window); + //hotfix - do not check incoming_frame_size <= sent_stream_window - with old grpc version this is not true + gpr_log(GPR_ERROR, + "Incoming frame of size %" PRId64 + " exceeds local window size of %" PRId64 + ".\n" + "The (un-acked, future) window size would be %" PRId64 + " which is not exceeded.\n" + "This would usually cause a disconnection, but allowing it due to" + "broken HTTP2 implementations in the wild.\n" + "See (for example) https://github.com/netty/netty/issues/6520.", + incoming_frame_size, acked_stream_window, sent_stream_window); } UpdateAnnouncedWindowDelta(tfc_, -incoming_frame_size); diff --git a/contrib/tools/protoc/plugins/cpp_styleguide/cpp_styleguide.cpp b/contrib/tools/protoc/plugins/cpp_styleguide/cpp_styleguide.cpp index 03c3d8ab46..cd96aa0565 100644 --- a/contrib/tools/protoc/plugins/cpp_styleguide/cpp_styleguide.cpp +++ b/contrib/tools/protoc/plugins/cpp_styleguide/cpp_styleguide.cpp @@ -20,23 +20,23 @@ namespace NPlugins { typedef std::map<TProtoStringType, TProtoStringType> TVariables; - bool GenerateYaStyle(const FileDescriptor* fileDescriptor) { - const auto& extension = fileDescriptor->FindExtensionByName("GenerateYaStyle"); - return extension; - } - - bool GenerateYaStyle(const FieldDescriptor* descriptor) { - const auto& fileDescriptor = descriptor->file(); - return GenerateYaStyle(fileDescriptor); - } - - + bool GenerateYaStyle(const FileDescriptor* fileDescriptor) { + const auto& extension = fileDescriptor->FindExtensionByName("GenerateYaStyle"); + return extension; + } + + bool GenerateYaStyle(const FieldDescriptor* descriptor) { + const auto& fileDescriptor = descriptor->file(); + return GenerateYaStyle(fileDescriptor); + } + + void SetCommonFieldVariables(const FieldDescriptor* descriptor, TVariables* variables) { - const auto& name = descriptor->name(); - if (GenerateYaStyle(descriptor)) - (*variables)["rname"] = UnderscoresToCamelCase(name, true); - else - (*variables)["rname"] = name; + const auto& name = descriptor->name(); + if (GenerateYaStyle(descriptor)) + (*variables)["rname"] = UnderscoresToCamelCase(name, true); + else + (*variables)["rname"] = name; (*variables)["name"] = FieldName(descriptor); } @@ -596,11 +596,11 @@ namespace NPlugins { void GenerateDeclarations(io::Printer* printer) const { printer->Print(Variables_, "$camel_oneof_name$Case Get$rname$Case() const { return $rname$_case(); }\n"); printer->Print(Variables_, "void Clear$rname$() { clear_$rname$(); }\n"); - - if (Descriptor_->name() != UnderscoresToCamelCase(Descriptor_->name(), true)) { - printer->Print(Variables_, "$camel_oneof_name$Case Get$camel_oneof_name$Case() const { return $rname$_case(); }\n"); - printer->Print(Variables_, "void Clear$camel_oneof_name$() { clear_$rname$(); }\n"); - } + + if (Descriptor_->name() != UnderscoresToCamelCase(Descriptor_->name(), true)) { + printer->Print(Variables_, "$camel_oneof_name$Case Get$camel_oneof_name$Case() const { return $rname$_case(); }\n"); + printer->Print(Variables_, "void Clear$camel_oneof_name$() { clear_$rname$(); }\n"); + } } private: @@ -699,15 +699,15 @@ namespace NPlugins { } } - void GenerateTypedefOutputExtension(bool nested) { - GenerateTypedefOutput(nested); - + void GenerateTypedefOutputExtension(bool nested) { + GenerateTypedefOutput(nested); + for (auto& nestedGenerator: NestedGenerators_) { nestedGenerator.GenerateTypedefOutputExtension(true); - } - } - - + } + } + + void GenerateClassExtension() { GenerateDebugStringImplementation(); for (auto& nestedGenerator: NestedGenerators_) { @@ -901,29 +901,29 @@ namespace NPlugins { } } - void GenerateTypedefOutput(bool nested) { - if (!GenerateYaStyle(Descriptor_->file())) - return; - TProtoStringType fileName = HeaderFileName(Descriptor_->file()); - TProtoStringType scope = nested ? "class_scope:" + Descriptor_->full_name().substr(0, - Descriptor_->full_name().size() - Descriptor_->name().size() - 1) - : "namespace_scope"; + void GenerateTypedefOutput(bool nested) { + if (!GenerateYaStyle(Descriptor_->file())) + return; + TProtoStringType fileName = HeaderFileName(Descriptor_->file()); + TProtoStringType scope = nested ? "class_scope:" + Descriptor_->full_name().substr(0, + Descriptor_->full_name().size() - Descriptor_->name().size() - 1) + : "namespace_scope"; std::unique_ptr<io::ZeroCopyOutputStream> output( - OutputDirectory_->OpenForInsert(fileName, scope)); - io::Printer printer(output.get(), '$'); - TString name = Descriptor_->name(); - bool isOk = name.size() >= 2 && name[0] == 'T' && name[1] >= 'A' && name[1] <= 'Z'; - if (!isOk) { - printer.Print("// Yandex typedef extension\n"); - TVariables vars; - vars["class"] = name; - vars["base_class"] = ClassName(Descriptor_, true); - printer.Print(vars, "typedef $base_class$ T$class$;\n"); - printer.Print("// End of Yandex typedef extension\n"); - } - } - - + OutputDirectory_->OpenForInsert(fileName, scope)); + io::Printer printer(output.get(), '$'); + TString name = Descriptor_->name(); + bool isOk = name.size() >= 2 && name[0] == 'T' && name[1] >= 'A' && name[1] <= 'Z'; + if (!isOk) { + printer.Print("// Yandex typedef extension\n"); + TVariables vars; + vars["class"] = name; + vars["base_class"] = ClassName(Descriptor_, true); + printer.Print(vars, "typedef $base_class$ T$class$;\n"); + printer.Print("// End of Yandex typedef extension\n"); + } + } + + private: const Descriptor* Descriptor_; TProtoStringType Classname_; diff --git a/library/cpp/actors/core/actor.cpp b/library/cpp/actors/core/actor.cpp index 3d67d080d8..6f9ba6a42b 100644 --- a/library/cpp/actors/core/actor.cpp +++ b/library/cpp/actors/core/actor.cpp @@ -41,10 +41,10 @@ namespace NActors { TlsActivationContext->ExecutorThread.Schedule(deadline, ev, cookie); } - void TActivationContext::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) { + void TActivationContext::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) { TlsActivationContext->ExecutorThread.Schedule(delta, ev, cookie); - } - + } + bool TActorIdentity::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const { return TActivationContext::Send(new IEventHandle(recipient, *this, ev, flags, cookie, nullptr, std::move(traceId))); } diff --git a/library/cpp/actors/core/actor.h b/library/cpp/actors/core/actor.h index 1575f22d22..ed29bd14b9 100644 --- a/library/cpp/actors/core/actor.h +++ b/library/cpp/actors/core/actor.h @@ -62,7 +62,7 @@ namespace NActors { * @param ev the event to send * @param cookie cookie that will be piggybacked with event */ - static void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); + static void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); static TInstant Now(); static TMonotonic Monotonic(); diff --git a/library/cpp/actors/http/http.cpp b/library/cpp/actors/http/http.cpp index c2ebcec98f..7125f9d8b0 100644 --- a/library/cpp/actors/http/http.cpp +++ b/library/cpp/actors/http/http.cpp @@ -111,65 +111,65 @@ void THttpParser<THttpRequest, TSocketBuffer>::Advance(size_t len) { break; } case EParseStage::Body: { - if (!ContentLength.empty()) { - if (ProcessData(Content, data, FromString(ContentLength))) { - Body = Content; - Stage = EParseStage::Done; - } - } else if (TransferEncoding == "chunked") { - Stage = EParseStage::ChunkLength; - } else { - // Invalid body encoding - Stage = EParseStage::Error; + if (!ContentLength.empty()) { + if (ProcessData(Content, data, FromString(ContentLength))) { + Body = Content; + Stage = EParseStage::Done; + } + } else if (TransferEncoding == "chunked") { + Stage = EParseStage::ChunkLength; + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } + break; + } + case EParseStage::ChunkLength: { + if (ProcessData(Line, data, "\r\n", MaxChunkLengthSize)) { + if (!Line.empty()) { + ChunkLength = ParseHex(Line); + if (ChunkLength <= MaxChunkSize) { + ContentSize = Content.size() + ChunkLength; + if (ContentSize <= MaxChunkContentSize) { + Stage = EParseStage::ChunkData; + Line.Clear(); + } else { + // Invalid chunk content length + Stage = EParseStage::Error; + } + } else { + // Invalid chunk length + Stage = EParseStage::Error; + } + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } } break; } - case EParseStage::ChunkLength: { - if (ProcessData(Line, data, "\r\n", MaxChunkLengthSize)) { - if (!Line.empty()) { - ChunkLength = ParseHex(Line); - if (ChunkLength <= MaxChunkSize) { - ContentSize = Content.size() + ChunkLength; - if (ContentSize <= MaxChunkContentSize) { - Stage = EParseStage::ChunkData; - Line.Clear(); - } else { - // Invalid chunk content length - Stage = EParseStage::Error; - } - } else { - // Invalid chunk length - Stage = EParseStage::Error; - } - } else { - // Invalid body encoding - Stage = EParseStage::Error; - } - } - break; - } - case EParseStage::ChunkData: { - if (!IsError()) { - if (ProcessData(Content, data, ContentSize)) { - if (ProcessData(Line, data, 2)) { - if (Line == "\r\n") { - if (ChunkLength == 0) { - Body = Content; - Stage = EParseStage::Done; - } else { - Stage = EParseStage::ChunkLength; - } - Line.Clear(); - } else { - // Invalid body encoding - Stage = EParseStage::Error; - } - } - } - } - break; - } - + case EParseStage::ChunkData: { + if (!IsError()) { + if (ProcessData(Content, data, ContentSize)) { + if (ProcessData(Line, data, 2)) { + if (Line == "\r\n") { + if (ChunkLength == 0) { + Body = Content; + Stage = EParseStage::Done; + } else { + Stage = EParseStage::ChunkLength; + } + Line.Clear(); + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } + } + } + } + break; + } + case EParseStage::Done: case EParseStage::Error: { data.Clear(); diff --git a/library/cpp/actors/testlib/test_runtime.cpp b/library/cpp/actors/testlib/test_runtime.cpp index 4bb7c79009..6fa25b9965 100644 --- a/library/cpp/actors/testlib/test_runtime.cpp +++ b/library/cpp/actors/testlib/test_runtime.cpp @@ -1708,13 +1708,13 @@ namespace NActors { Y_FAIL("Don't use this method."); } - TActorSystem* TTestActorRuntimeBase::GetActorSystem(ui32 nodeId) { - auto it = Nodes.find(GetNodeId(nodeId)); - Y_VERIFY(it != Nodes.end()); - return it->second->ActorSystem.Get(); - } + TActorSystem* TTestActorRuntimeBase::GetActorSystem(ui32 nodeId) { + auto it = Nodes.find(GetNodeId(nodeId)); + Y_VERIFY(it != Nodes.end()); + return it->second->ActorSystem.Get(); + } + - TEventMailBox& TTestActorRuntimeBase::GetMailbox(ui32 nodeId, ui32 hint) { TGuard<TMutex> guard(Mutex); auto mboxId = TEventMailboxId(nodeId, hint); diff --git a/library/cpp/actors/testlib/test_runtime.h b/library/cpp/actors/testlib/test_runtime.h index 8fe2ce88c9..26e3b45c98 100644 --- a/library/cpp/actors/testlib/test_runtime.h +++ b/library/cpp/actors/testlib/test_runtime.h @@ -286,7 +286,7 @@ namespace NActors { TActorSystem* SingleSys() const; TActorSystem* GetAnyNodeActorSystem(); - TActorSystem* GetActorSystem(ui32 nodeId); + TActorSystem* GetActorSystem(ui32 nodeId); template <typename TEvent> TEvent* GrabEdgeEventIf(TAutoPtr<IEventHandle>& handle, std::function<bool(const TEvent&)> predicate, TDuration simTimeout = TDuration::Max()) { handle.Destroy(); diff --git a/library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h b/library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h index 53108ad90f..1f899c9991 100644 --- a/library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h +++ b/library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h @@ -222,15 +222,15 @@ private: // try to extend interval if (p != Tree.end() && p->second == begin) { p->second = end; - //Try to merge 2 intervals - p and next one if possible - auto next = p; - // Next is not Tree.end() here. - ++next; - if (next != Tree.end() && next->first == end) { - p->second = next->second; - Tree.erase(next); - } - // Maybe new interval extends right interval + //Try to merge 2 intervals - p and next one if possible + auto next = p; + // Next is not Tree.end() here. + ++next; + if (next != Tree.end() && next->first == end) { + p->second = next->second; + Tree.erase(next); + } + // Maybe new interval extends right interval } else if (l != Tree.end() && end == l->first) { T& leftBorder = const_cast<T&>(l->first); // Optimization hack. diff --git a/library/cpp/containers/disjoint_interval_tree/ut/disjoint_interval_tree_ut.cpp b/library/cpp/containers/disjoint_interval_tree/ut/disjoint_interval_tree_ut.cpp index a9a9fd1a07..8474ae89b0 100644 --- a/library/cpp/containers/disjoint_interval_tree/ut/disjoint_interval_tree_ut.cpp +++ b/library/cpp/containers/disjoint_interval_tree/ut/disjoint_interval_tree_ut.cpp @@ -55,22 +55,22 @@ Y_UNIT_TEST_SUITE(DisjointIntervalTreeTest) { UNIT_ASSERT_VALUES_EQUAL(begin->first, 2); UNIT_ASSERT_VALUES_EQUAL(begin->second, 7); } - - // Merge all intervals. - { - TDisjointIntervalTree<ui64> tree; - tree.InsertInterval(0, 3); - tree.InsertInterval(6, 10); - tree.InsertInterval(3, 6); - - UNIT_ASSERT_VALUES_EQUAL(tree.GetNumIntervals(), 1); - UNIT_ASSERT_VALUES_EQUAL(tree.GetNumElements(), 10); - - auto begin = tree.begin(); - UNIT_ASSERT_VALUES_EQUAL(begin->first, 0); - UNIT_ASSERT_VALUES_EQUAL(begin->second, 10); - } - + + // Merge all intervals. + { + TDisjointIntervalTree<ui64> tree; + tree.InsertInterval(0, 3); + tree.InsertInterval(6, 10); + tree.InsertInterval(3, 6); + + UNIT_ASSERT_VALUES_EQUAL(tree.GetNumIntervals(), 1); + UNIT_ASSERT_VALUES_EQUAL(tree.GetNumElements(), 10); + + auto begin = tree.begin(); + UNIT_ASSERT_VALUES_EQUAL(begin->first, 0); + UNIT_ASSERT_VALUES_EQUAL(begin->second, 10); + } + } Y_UNIT_TEST(EraseIntervalTest) { diff --git a/util/stream/null.cpp b/util/stream/null.cpp index ca43137c23..4e8b298145 100644 --- a/util/stream/null.cpp +++ b/util/stream/null.cpp @@ -3,7 +3,7 @@ #include <util/generic/singleton.h> TNullIO& NPrivate::StdNullStream() noexcept { - return *SingletonWithPriority<TNullIO, 4>(); + return *SingletonWithPriority<TNullIO, 4>(); } TNullInput::TNullInput() noexcept { diff --git a/ydb/core/base/appdata.cpp b/ydb/core/base/appdata.cpp index 1120b3e027..f9e517fc42 100644 --- a/ydb/core/base/appdata.cpp +++ b/ydb/core/base/appdata.cpp @@ -35,8 +35,8 @@ TAppData::TDefaultTabletTypes::TDefaultTabletTypes() : SchemeShard(TTabletTypes::FLAT_SCHEMESHARD) , DataShard(TTabletTypes::FLAT_DATASHARD) , KeyValue(TTabletTypes::KEYVALUEFLAT) - , PersQueue(TTabletTypes::PERSQUEUE) - , PersQueueReadBalancer(TTabletTypes::PERSQUEUE_READ_BALANCER) + , PersQueue(TTabletTypes::PERSQUEUE) + , PersQueueReadBalancer(TTabletTypes::PERSQUEUE_READ_BALANCER) , Dummy(TTabletTypes::TX_DUMMY) , Coordinator(TTabletTypes::FLAT_TX_COORDINATOR) , Mediator(TTabletTypes::TX_MEDIATOR) diff --git a/ydb/core/base/appdata.h b/ydb/core/base/appdata.h index c962dca232..c666f7468c 100644 --- a/ydb/core/base/appdata.h +++ b/ydb/core/base/appdata.h @@ -52,14 +52,14 @@ namespace NDataShard { class IExportFactory; } -namespace NSQS { +namespace NSQS { class IEventsWriterFactory; class IAuthFactory; -} - -namespace NPQ { +} + +namespace NPQ { class IPersQueueMirrorReaderFactory; -} +} class TFormatFactory; diff --git a/ydb/core/base/blobstorage_grouptype.cpp b/ydb/core/base/blobstorage_grouptype.cpp index 50d5deec72..66c79ba42c 100644 --- a/ydb/core/base/blobstorage_grouptype.cpp +++ b/ydb/core/base/blobstorage_grouptype.cpp @@ -31,7 +31,7 @@ static TString DebugFormatBits(ui64 value) { namespace NKikimr { -struct TBlobStorageErasureParameters { +struct TBlobStorageErasureParameters { ui32 Handoff; // number of selected hinted handoff (1 | 2) }; @@ -60,12 +60,12 @@ static const std::array<TBlobStorageErasureParameters, TErasureType::ErasureSpec ui32 TBlobStorageGroupType::BlobSubgroupSize() const { - const TBlobStorageErasureParameters& erasure = BlobStorageGroupErasureSpeciesParameters[ErasureSpecies]; - return DataParts() + ParityParts() + erasure.Handoff; + const TBlobStorageErasureParameters& erasure = BlobStorageGroupErasureSpeciesParameters[ErasureSpecies]; + return DataParts() + ParityParts() + erasure.Handoff; } ui32 TBlobStorageGroupType::Handoff() const { - const TBlobStorageErasureParameters& erasure = BlobStorageGroupErasureSpeciesParameters[ErasureSpecies]; + const TBlobStorageErasureParameters& erasure = BlobStorageGroupErasureSpeciesParameters[ErasureSpecies]; return erasure.Handoff; } diff --git a/ydb/core/base/blobstorage_grouptype.h b/ydb/core/base/blobstorage_grouptype.h index 5d18fce4b2..cd38dfcfa8 100644 --- a/ydb/core/base/blobstorage_grouptype.h +++ b/ydb/core/base/blobstorage_grouptype.h @@ -9,14 +9,14 @@ #include <util/generic/list.h> #include <library/cpp/containers/stack_vector/stack_vec.h> - + namespace NKikimr { static constexpr ui32 BlobProtobufHeaderMaxSize = 80; -struct TBloblStorageErasureParameters; +struct TBloblStorageErasureParameters; -struct TBlobStorageGroupType : public TErasureType { +struct TBlobStorageGroupType : public TErasureType { TBlobStorageGroupType(TErasureType::EErasureSpecies s = TErasureType::ErasureNone) : TErasureType(s) diff --git a/ydb/core/base/counters.cpp b/ydb/core/base/counters.cpp index e729dfca9f..b192fa36a1 100644 --- a/ydb/core/base/counters.cpp +++ b/ydb/core/base/counters.cpp @@ -26,22 +26,22 @@ static const THashSet<TString> DATABASE_SERVICES TString("utils"), TString("auth"), TString("ydb"), - TString("pqproxy|writeInfo"), - TString("pqproxy|writeTimeLag"), - TString("pqproxy|writeSession"), - TString("pqproxy|writingTime"), - TString("pqproxy|SLI"), - TString("pqproxy|cache"), - TString("pqproxy|partitionWriteQuotaWait"), - TString("pqproxy|topicWriteQuotaWait"), - TString("pqproxy|readTimeLag"), - TString("pqproxy|readingTime"), - TString("pqproxy|readSession"), - TString("pqproxy|schemecache"), - TString("pqproxy|mirrorWriteTimeLag"), + TString("pqproxy|writeInfo"), + TString("pqproxy|writeTimeLag"), + TString("pqproxy|writeSession"), + TString("pqproxy|writingTime"), + TString("pqproxy|SLI"), + TString("pqproxy|cache"), + TString("pqproxy|partitionWriteQuotaWait"), + TString("pqproxy|topicWriteQuotaWait"), + TString("pqproxy|readTimeLag"), + TString("pqproxy|readingTime"), + TString("pqproxy|readSession"), + TString("pqproxy|schemecache"), + TString("pqproxy|mirrorWriteTimeLag"), }}; - + static const THashSet<TString> DATABASE_ATTRIBUTE_SERVICES = {{ TString("ydb") }}; @@ -58,14 +58,14 @@ const THashSet<TString> &GetDatabaseSensorServices() return DATABASE_SERVICES; } -void ReplaceSubgroup(TIntrusivePtr<TDynamicCounters> root, const TString &service) -{ - auto serviceGroup = GetServiceCounters(root, service); - const auto &[svc, subSvc] = ExtractSubServiceName(service); - auto rt = GetServiceCountersRoot(root, service); - rt->ReplaceSubgroup(subSvc.empty() ? "counters" : "subsystem", subSvc.empty() ? svc : subSvc, serviceGroup); -} - +void ReplaceSubgroup(TIntrusivePtr<TDynamicCounters> root, const TString &service) +{ + auto serviceGroup = GetServiceCounters(root, service); + const auto &[svc, subSvc] = ExtractSubServiceName(service); + auto rt = GetServiceCountersRoot(root, service); + rt->ReplaceSubgroup(subSvc.empty() ? "counters" : "subsystem", subSvc.empty() ? svc : subSvc, serviceGroup); +} + const THashSet<TString> &GetDatabaseAttributeSensorServices() { return DATABASE_ATTRIBUTE_SERVICES; @@ -99,23 +99,23 @@ void OnCounterLookup(const char *methodName, const TString &name, const TString GLOBAL_LWPROBE(MONITORING_PROVIDER, MonitoringCounterLookup, methodName, name, value); } - -std::pair<TString, TString> ExtractSubServiceName(const TString &service) -{ - TStringBuf svc = TStringBuf(service); - TStringBuf subSvc = svc.SplitOff('|'); - return {TString(svc), TString(subSvc)}; -} - -TIntrusivePtr<TDynamicCounters> GetServiceCountersRoot(TIntrusivePtr<TDynamicCounters> root, + +std::pair<TString, TString> ExtractSubServiceName(const TString &service) +{ + TStringBuf svc = TStringBuf(service); + TStringBuf subSvc = svc.SplitOff('|'); + return {TString(svc), TString(subSvc)}; +} + +TIntrusivePtr<TDynamicCounters> GetServiceCountersRoot(TIntrusivePtr<TDynamicCounters> root, const TString &service) { - auto pair = ExtractSubServiceName(service); - if (pair.second.empty()) - return root; - return root->GetSubgroup("counters", pair.first); -} - + auto pair = ExtractSubServiceName(service); + if (pair.second.empty()) + return root; + return root->GetSubgroup("counters", pair.first); +} + static THashSet<TString> MakeServiceCountersExtraLabels() { THashSet<TString> extraLabels; extraLabels.insert(DATABASE_LABEL); @@ -128,17 +128,17 @@ static THashSet<TString> MakeServiceCountersExtraLabels() { static const THashSet<TString> SERVICE_COUNTERS_EXTRA_LABELS = MakeServiceCountersExtraLabels(); -TIntrusivePtr<TDynamicCounters> GetServiceCounters(TIntrusivePtr<TDynamicCounters> root, - const TString &service, bool skipAddedLabels) -{ - const auto &[svc, subSvc] = ExtractSubServiceName(service); - auto res = root->GetSubgroup("counters", svc); - if (!subSvc.empty()) { - res = res->GetSubgroup("subsystem", subSvc); - } - if (!skipAddedLabels) - return res; - +TIntrusivePtr<TDynamicCounters> GetServiceCounters(TIntrusivePtr<TDynamicCounters> root, + const TString &service, bool skipAddedLabels) +{ + const auto &[svc, subSvc] = ExtractSubServiceName(service); + auto res = root->GetSubgroup("counters", svc); + if (!subSvc.empty()) { + res = res->GetSubgroup("subsystem", subSvc); + } + if (!skipAddedLabels) + return res; + res = SkipLabels(res, SERVICE_COUNTERS_EXTRA_LABELS); auto utils = root->GetSubgroup("counters", "utils"); diff --git a/ydb/core/base/counters.h b/ydb/core/base/counters.h index 658522725d..01dc8c4c0f 100644 --- a/ydb/core/base/counters.h +++ b/ydb/core/base/counters.h @@ -12,17 +12,17 @@ namespace NKikimr { // Get counters group for specified service. Skip tenant and slot labels. TIntrusivePtr<NMonitoring::TDynamicCounters> GetServiceCounters(TIntrusivePtr<NMonitoring::TDynamicCounters> root, - const TString &service, bool skipAddedLabels = true); - // Get parent node for subsvc/svc if any. root->svc->subvc => svc, root->svc => root. - TIntrusivePtr<NMonitoring::TDynamicCounters> GetServiceCountersRoot(TIntrusivePtr<NMonitoring::TDynamicCounters> root, + const TString &service, bool skipAddedLabels = true); + // Get parent node for subsvc/svc if any. root->svc->subvc => svc, root->svc => root. + TIntrusivePtr<NMonitoring::TDynamicCounters> GetServiceCountersRoot(TIntrusivePtr<NMonitoring::TDynamicCounters> root, const TString &service); - // Extract subservice name if any. aba|caba => aba, caba. - std::pair<TString, TString> ExtractSubServiceName(const TString &service); + // Extract subservice name if any. aba|caba => aba, caba. + std::pair<TString, TString> ExtractSubServiceName(const TString &service); // Get list of services which use top-level database labels for own sensors. const THashSet<TString> &GetDatabaseSensorServices(); // Get list of services which use top-level database attribute labels for own sensors. const THashSet<TString> &GetDatabaseAttributeSensorServices(); const THashSet<TString> &GetDatabaseAttributeLabels(); - // Drop all extra labels. - void ReplaceSubgroup(TIntrusivePtr<NMonitoring::TDynamicCounters> root, const TString &service); + // Drop all extra labels. + void ReplaceSubgroup(TIntrusivePtr<NMonitoring::TDynamicCounters> root, const TString &service); } // namespace NKikimr diff --git a/ydb/core/base/events.h b/ydb/core/base/events.h index 8142899370..f5fedfe19b 100644 --- a/ydb/core/base/events.h +++ b/ydb/core/base/events.h @@ -58,7 +58,7 @@ struct TKikimrEvents : TEvents { ES_FLAT_EXECUTOR, ES_NODE_WHITEBOARD, ES_FLAT_TX_SCHEMESHARD, // 4137 - ES_PQ, + ES_PQ, ES_YQL_KIKIMR_PROXY, ES_PQ_META_CACHE, ES_DEPRECATED_4141, @@ -80,7 +80,7 @@ struct TKikimrEvents : TEvents { ES_RTMR_HOST, ES_RESOURCE_BROKER, ES_VIEWER, - ES_SUB_DOMAIN, + ES_SUB_DOMAIN, ES_GRPC_PROXY_STATUS, //OLD ES_SQS, ES_BLOCKSTORE, //4162 @@ -102,7 +102,7 @@ struct TKikimrEvents : TEvents { ES_COORDINATED_QUOTA, ES_ACCESS_SERVICE, ES_USER_ACCOUNT_SERVICE, - ES_PQ_PROXY_NEW, + ES_PQ_PROXY_NEW, ES_GRPC_STREAMING, ES_SCHEME_BOARD, ES_FLAT_TX_SCHEMESHARD_PROTECTED, diff --git a/ydb/core/base/user_registry.h b/ydb/core/base/user_registry.h index ee2a7b4f68..74484c3d06 100644 --- a/ydb/core/base/user_registry.h +++ b/ydb/core/base/user_registry.h @@ -16,23 +16,23 @@ namespace NKikimr { struct TEvGetUserById : TEventLocal<TEvGetUserById, EvGetUserById> { const ui64 UID; - const TString UserNameHint; + const TString UserNameHint; - TEvGetUserById(const ui64 uid, const TString& userNameHint = "") + TEvGetUserById(const ui64 uid, const TString& userNameHint = "") : UID(uid) - , UserNameHint(userNameHint) + , UserNameHint(userNameHint) {} }; struct TEvGetUserByIdResult : TEventLocal<TEvGetUserByIdResult, EvGetUserByIdResult> { const ui64 UID; - const TString UserNameHint; + const TString UserNameHint; const TString User; const TString Error; - TEvGetUserByIdResult(const ui64 uid, const TString& userNameHint, const TString& user = "", const TString& error = "") + TEvGetUserByIdResult(const ui64 uid, const TString& userNameHint, const TString& user = "", const TString& error = "") : UID(uid) - , UserNameHint(userNameHint) + , UserNameHint(userNameHint) , User(user) , Error(error) {} diff --git a/ydb/core/base/ya.make b/ydb/core/base/ya.make index 39e0cdbc37..83db5825c3 100644 --- a/ydb/core/base/ya.make +++ b/ydb/core/base/ya.make @@ -84,7 +84,7 @@ SRCS( tracing.h tx_processing.h tx_processing.cpp - user_registry.h + user_registry.h blobstorage_grouptype.cpp ) diff --git a/ydb/core/client/server/grpc_base.h b/ydb/core/client/server/grpc_base.h index 8fa881f99b..e104ef0c1b 100644 --- a/ydb/core/client/server/grpc_base.h +++ b/ydb/core/client/server/grpc_base.h @@ -2,5 +2,5 @@ #include <ydb/core/protos/grpc.grpc.pb.h> #include <ydb/core/base/counters.h> - + #include <library/cpp/uri/uri.h> diff --git a/ydb/core/client/server/grpc_proxy_status.cpp b/ydb/core/client/server/grpc_proxy_status.cpp index 70ef959842..827335ad40 100644 --- a/ydb/core/client/server/grpc_proxy_status.cpp +++ b/ydb/core/client/server/grpc_proxy_status.cpp @@ -1,6 +1,6 @@ -#include "grpc_proxy_status.h" -#include "msgbus_servicereq.h" - +#include "grpc_proxy_status.h" +#include "msgbus_servicereq.h" + #include <library/cpp/actors/core/log.h> #include <library/cpp/actors/core/interconnect.h> #include <library/cpp/actors/interconnect/interconnect.h> @@ -8,421 +8,421 @@ #include <ydb/core/base/tablet_pipe.h> #include <ydb/core/base/appdata.h> #include <ydb/core/base/location.h> - -//TODO: add here bucket counter for speed - find out borders from grpc -//////////////////////////////////////////// -namespace NKikimr { - + +//TODO: add here bucket counter for speed - find out borders from grpc +//////////////////////////////////////////// +namespace NKikimr { + TActorId MakeGRpcProxyStatusID(ui32 node) { - char x[12] = {'g','r','p','c','p','r','x','y','s','t','a','t'}; + char x[12] = {'g','r','p','c','p','r','x','y','s','t','a','t'}; return TActorId(node, TStringBuf(x, 12)); -} - - - -//////////////////////////////////////////// - -const ui32 WAKEUP_TIMEOUT_MS = 100; -const ui32 TIMEOUT_SECONDS = 10; - - -/////////////////////////////////////////// - -class TChooseProxyActorImpl : public TActorBootstrapped<TChooseProxyActorImpl> { - - using TBase = TActorBootstrapped<TChooseProxyActorImpl>; +} + + + +//////////////////////////////////////////// + +const ui32 WAKEUP_TIMEOUT_MS = 100; +const ui32 TIMEOUT_SECONDS = 10; + + +/////////////////////////////////////////// + +class TChooseProxyActorImpl : public TActorBootstrapped<TChooseProxyActorImpl> { + + using TBase = TActorBootstrapped<TChooseProxyActorImpl>; TActorId Sender; - ui32 NodesRequested; - ui32 NodesReceived; - THolder<NMsgBusProxy::TBusChooseProxy> Request; - TVector<ui32> Nodes; - THashMap<ui32, TString> NodeNames; + ui32 NodesRequested; + ui32 NodesReceived; + THolder<NMsgBusProxy::TBusChooseProxy> Request; + TVector<ui32> Nodes; + THashMap<ui32, TString> NodeNames; THashMap<ui32, TString> NodeDataCenter; THashMap<ui32, std::shared_ptr<TEvGRpcProxyStatus::TEvGetStatusResponse>> PerNodeResponse; - - -public: + + +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_CHOOSE_RROXY; - } - - // + } + + // TChooseProxyActorImpl(const TActorId& sender) - : Sender(sender) - , NodesRequested(0) - , NodesReceived(0) - { - } - - virtual ~TChooseProxyActorImpl() - {} - - - void SendRequest(ui32 nodeId, const TActorContext &ctx) { + : Sender(sender) + , NodesRequested(0) + , NodesReceived(0) + { + } + + virtual ~TChooseProxyActorImpl() + {} + + + void SendRequest(ui32 nodeId, const TActorContext &ctx) { TActorId proxyNodeServiceId = MakeGRpcProxyStatusID(nodeId); - ctx.Send(proxyNodeServiceId, new TEvGRpcProxyStatus::TEvGetStatusRequest(), - IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - Nodes.emplace_back(nodeId); - } - - void Die(const TActorContext& ctx) override { - for (const ui32 node : Nodes) { + ctx.Send(proxyNodeServiceId, new TEvGRpcProxyStatus::TEvGetStatusRequest(), + IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); + Nodes.emplace_back(nodeId); + } + + void Die(const TActorContext& ctx) override { + for (const ui32 node : Nodes) { ctx.Send(TActivationContext::InterconnectProxy(node), new TEvents::TEvUnsubscribe()); - } - TBase::Die(ctx); - } - - void Bootstrap(const TActorContext& ctx) { + } + TBase::Die(ctx); + } + + void Bootstrap(const TActorContext& ctx) { const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - TBase::Become(&TThis::StateRequestedBrowse); - ctx.Schedule(TDuration::Seconds(TIMEOUT_SECONDS), new TEvents::TEvWakeup()); - } - - STFUNC(StateRequestedBrowse) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvInterconnect::TEvNodesInfo, HandleBrowse); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - STFUNC(StateRequested) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvGRpcProxyStatus::TEvGetStatusResponse, HandleResponse); - HFunc(TEvents::TEvUndelivered, Undelivered); - HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void HandleBrowse(TEvInterconnect::TEvNodesInfo::TPtr &ev, const TActorContext &ctx) { - const TEvInterconnect::TEvNodesInfo* nodesInfo = ev->Get(); - Y_VERIFY(!nodesInfo->Nodes.empty()); - Nodes.reserve(nodesInfo->Nodes.size()); - for (const auto& ni : nodesInfo->Nodes) { - NodeNames[ni.NodeId] = ni.Host; + ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); + TBase::Become(&TThis::StateRequestedBrowse); + ctx.Schedule(TDuration::Seconds(TIMEOUT_SECONDS), new TEvents::TEvWakeup()); + } + + STFUNC(StateRequestedBrowse) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvInterconnect::TEvNodesInfo, HandleBrowse); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + STFUNC(StateRequested) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvGRpcProxyStatus::TEvGetStatusResponse, HandleResponse); + HFunc(TEvents::TEvUndelivered, Undelivered); + HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + void HandleBrowse(TEvInterconnect::TEvNodesInfo::TPtr &ev, const TActorContext &ctx) { + const TEvInterconnect::TEvNodesInfo* nodesInfo = ev->Get(); + Y_VERIFY(!nodesInfo->Nodes.empty()); + Nodes.reserve(nodesInfo->Nodes.size()); + for (const auto& ni : nodesInfo->Nodes) { + NodeNames[ni.NodeId] = ni.Host; NodeDataCenter[ni.NodeId] = ni.Location.GetDataCenterId(); - SendRequest(ni.NodeId, ctx); - ++NodesRequested; - } - if (NodesRequested > 0) { - TBase::Become(&TThis::StateRequested); - } else { - ReplyAndDie(ctx); - } - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { - ui32 nodeId = ev->Cookie; - if (PerNodeResponse.emplace(nodeId, nullptr).second) { - NodeResponseReceived(ctx); - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) { - ui32 nodeId = ev->Get()->NodeId; - if (PerNodeResponse.emplace(nodeId, nullptr).second) { - NodeResponseReceived(ctx); - } - } - - void HandleResponse(TEvGRpcProxyStatus::TEvGetStatusResponse::TPtr &ev, const TActorContext &ctx) { - ui64 nodeId = ev->Cookie; + SendRequest(ni.NodeId, ctx); + ++NodesRequested; + } + if (NodesRequested > 0) { + TBase::Become(&TThis::StateRequested); + } else { + ReplyAndDie(ctx); + } + } + + void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { + ui32 nodeId = ev->Cookie; + if (PerNodeResponse.emplace(nodeId, nullptr).second) { + NodeResponseReceived(ctx); + } + } + + void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) { + ui32 nodeId = ev->Get()->NodeId; + if (PerNodeResponse.emplace(nodeId, nullptr).second) { + NodeResponseReceived(ctx); + } + } + + void HandleResponse(TEvGRpcProxyStatus::TEvGetStatusResponse::TPtr &ev, const TActorContext &ctx) { + ui64 nodeId = ev->Cookie; PerNodeResponse[nodeId].reset(ev->Release().Release()); - NodeResponseReceived(ctx); - } - - void NodeResponseReceived(const TActorContext &ctx) { - ++NodesReceived; - if (NodesReceived >= NodesRequested) { - ReplyAndDie(ctx); - } - } - - void HandleTimeout(const TActorContext &ctx) { - ReplyAndDie(ctx); - } - - - void ReplyAndDie(const TActorContext& ctx) { + NodeResponseReceived(ctx); + } + + void NodeResponseReceived(const TActorContext &ctx) { + ++NodesReceived; + if (NodesReceived >= NodesRequested) { + ReplyAndDie(ctx); + } + } + + void HandleTimeout(const TActorContext &ctx) { + ReplyAndDie(ctx); + } + + + void ReplyAndDie(const TActorContext& ctx) { THolder<TEvGRpcProxyStatus::TEvResponse> response = MakeHolder<TEvGRpcProxyStatus::TEvResponse>(); - - response->PerNodeResponse = PerNodeResponse; - response->NodeNames = NodeNames; - response->NodeDataCenter = NodeDataCenter; - - ctx.Send(Sender, response.Release()); - Die(ctx); - } -}; - - - - -//////////////////////////////////////////// -/// The TGRpcProxyStatusActor class -//////////////////////////////////////////// -class TGRpcProxyStatusActor : public TActorBootstrapped<TGRpcProxyStatusActor> { -public: + + response->PerNodeResponse = PerNodeResponse; + response->NodeNames = NodeNames; + response->NodeDataCenter = NodeDataCenter; + + ctx.Send(Sender, response.Release()); + Die(ctx); + } +}; + + + + +//////////////////////////////////////////// +/// The TGRpcProxyStatusActor class +//////////////////////////////////////////// +class TGRpcProxyStatusActor : public TActorBootstrapped<TGRpcProxyStatusActor> { +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_GRPC_PROXY_STATUS; - } - - // - TGRpcProxyStatusActor(); - virtual ~TGRpcProxyStatusActor(); - - // - void Bootstrap(const TActorContext &ctx); - - // - STFUNC(StateFunc); - -private: - // - void Handle(TEvGRpcProxyStatus::TEvSetup::TPtr &ev, const TActorContext &ctx); - void Handle(TEvGRpcProxyStatus::TEvUpdateStatus::TPtr &ev, const TActorContext &ctx); - void Handle(TEvGRpcProxyStatus::TEvGetStatusRequest::TPtr &ev, const TActorContext &ctx); - void HandleWakeup(const TActorContext &ctx); - - void Handle(TEvGRpcProxyStatus::TEvRequest::TPtr &ev, const TActorContext &ctx); - void Handle(TEvGRpcProxyStatus::TEvResponse::TPtr &ev, const TActorContext &ctx); - - - bool Allowed; - ui32 MaxWriteSessions; - ui32 MaxReadSessions; - - ui32 WriteSessions; - ui32 ReadSessions; - + } + + // + TGRpcProxyStatusActor(); + virtual ~TGRpcProxyStatusActor(); + + // + void Bootstrap(const TActorContext &ctx); + + // + STFUNC(StateFunc); + +private: + // + void Handle(TEvGRpcProxyStatus::TEvSetup::TPtr &ev, const TActorContext &ctx); + void Handle(TEvGRpcProxyStatus::TEvUpdateStatus::TPtr &ev, const TActorContext &ctx); + void Handle(TEvGRpcProxyStatus::TEvGetStatusRequest::TPtr &ev, const TActorContext &ctx); + void HandleWakeup(const TActorContext &ctx); + + void Handle(TEvGRpcProxyStatus::TEvRequest::TPtr &ev, const TActorContext &ctx); + void Handle(TEvGRpcProxyStatus::TEvResponse::TPtr &ev, const TActorContext &ctx); + + + bool Allowed; + ui32 MaxWriteSessions; + ui32 MaxReadSessions; + + ui32 WriteSessions; + ui32 ReadSessions; + TActorId Worker; std::deque<TActorId> Requests; - -}; - -TGRpcProxyStatusActor::TGRpcProxyStatusActor() - : Allowed(false) - , MaxWriteSessions(1) - , MaxReadSessions(1) - , WriteSessions(0) - , ReadSessions(0) - , Worker() -{} - -//////////////////////////////////////////// -TGRpcProxyStatusActor::~TGRpcProxyStatusActor() -{} - -//////////////////////////////////////////// -void -TGRpcProxyStatusActor::Bootstrap(const TActorContext &ctx) { - Become(&TThis::StateFunc); - ctx.Schedule(TDuration::MilliSeconds(WAKEUP_TIMEOUT_MS), new TEvents::TEvWakeup()); -} - -//////////////////////////////////////////// -void -TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvRequest::TPtr &ev, const TActorContext& ctx) { - Requests.push_back(ev->Sender); + +}; + +TGRpcProxyStatusActor::TGRpcProxyStatusActor() + : Allowed(false) + , MaxWriteSessions(1) + , MaxReadSessions(1) + , WriteSessions(0) + , ReadSessions(0) + , Worker() +{} + +//////////////////////////////////////////// +TGRpcProxyStatusActor::~TGRpcProxyStatusActor() +{} + +//////////////////////////////////////////// +void +TGRpcProxyStatusActor::Bootstrap(const TActorContext &ctx) { + Become(&TThis::StateFunc); + ctx.Schedule(TDuration::MilliSeconds(WAKEUP_TIMEOUT_MS), new TEvents::TEvWakeup()); +} + +//////////////////////////////////////////// +void +TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvRequest::TPtr &ev, const TActorContext& ctx) { + Requests.push_back(ev->Sender); if (Worker == TActorId()) { - Worker = ctx.Register(new TChooseProxyActorImpl(ctx.SelfID)); - } -} - -//////////////////////////////////////////// -void -TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvResponse::TPtr &ev, const TActorContext& ctx) { - - for (auto & sender : Requests) { + Worker = ctx.Register(new TChooseProxyActorImpl(ctx.SelfID)); + } +} + +//////////////////////////////////////////// +void +TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvResponse::TPtr &ev, const TActorContext& ctx) { + + for (auto & sender : Requests) { THolder<TEvGRpcProxyStatus::TEvResponse> response = MakeHolder<TEvGRpcProxyStatus::TEvResponse>(); - response->PerNodeResponse = ev->Get()->PerNodeResponse; - response->NodeNames = ev->Get()->NodeNames; - response->NodeDataCenter = ev->Get()->NodeDataCenter; - ctx.Send(sender, response.Release()); - } - Requests.clear(); + response->PerNodeResponse = ev->Get()->PerNodeResponse; + response->NodeNames = ev->Get()->NodeNames; + response->NodeDataCenter = ev->Get()->NodeDataCenter; + ctx.Send(sender, response.Release()); + } + Requests.clear(); Worker = TActorId(); -} - - -//////////////////////////////////////////// -void -TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvSetup::TPtr &ev, const TActorContext&) { - Allowed = ev->Get()->Allowed; - MaxReadSessions = ev->Get()->MaxReadSessions; - MaxWriteSessions = ev->Get()->MaxWriteSessions; -} - -//////////////////////////////////////////// -void -TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvUpdateStatus::TPtr &ev, const TActorContext&) { - - Y_VERIFY((i32)WriteSessions + ev->Get()->WriteSessions >= 0); - Y_VERIFY((i32)ReadSessions + ev->Get()->ReadSessions >= 0); - WriteSessions += ev->Get()->WriteSessions; - ReadSessions += ev->Get()->ReadSessions; - //TODO: count here write/read speed -} - -//////////////////////////////////////////// -void -TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvGetStatusRequest::TPtr &ev, const TActorContext &ctx) { - - THolder<TEvGRpcProxyStatus::TEvGetStatusResponse> resp(new TEvGRpcProxyStatus::TEvGetStatusResponse); - ui64 weight = Allowed * 1000000; - - if (MaxWriteSessions <= WriteSessions) - weight = 0; - else - weight = Min<ui64>(weight, ui64(MaxWriteSessions - WriteSessions) * 1000000 / MaxWriteSessions); - - if (MaxReadSessions <= ReadSessions) - weight = 0; - else - weight = Min<ui64>(weight, ui64(MaxReadSessions - ReadSessions) * 1000000 / MaxReadSessions); - - resp->Record.SetWeight(weight); - ctx.Send(ev->Sender, resp.Release(), 0, ev->Cookie); -} - - -//////////////////////////////////////////// -void -TGRpcProxyStatusActor::HandleWakeup(const TActorContext &ctx) { - - ctx.Schedule(TDuration::Seconds(WAKEUP_TIMEOUT_MS), new TEvents::TEvWakeup()); - //TODO: update statistics here -} - - -//////////////////////////////////////////// -/// public state functions -//////////////////////////////////////////// -STFUNC(TGRpcProxyStatusActor::StateFunc) { - Y_UNUSED(ctx); - switch (ev->GetTypeRewrite()) { - HFunc(TEvGRpcProxyStatus::TEvSetup, Handle); - HFunc(TEvGRpcProxyStatus::TEvGetStatusRequest, Handle); - HFunc(TEvGRpcProxyStatus::TEvUpdateStatus, Handle); - HFunc(TEvGRpcProxyStatus::TEvRequest, Handle); - HFunc(TEvGRpcProxyStatus::TEvResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleWakeup); - - // HFunc(TEvents::TEvPoisonPill, Handle); // we do not need PoisonPill for the actor - } -} - - -/////////////////////////////////////////// - -class TChooseProxyActor : public TActorBootstrapped<TChooseProxyActor>, public NMsgBusProxy::TMessageBusSessionIdentHolder { - - using TBase = TActorBootstrapped<TChooseProxyActor>; +} + + +//////////////////////////////////////////// +void +TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvSetup::TPtr &ev, const TActorContext&) { + Allowed = ev->Get()->Allowed; + MaxReadSessions = ev->Get()->MaxReadSessions; + MaxWriteSessions = ev->Get()->MaxWriteSessions; +} + +//////////////////////////////////////////// +void +TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvUpdateStatus::TPtr &ev, const TActorContext&) { + + Y_VERIFY((i32)WriteSessions + ev->Get()->WriteSessions >= 0); + Y_VERIFY((i32)ReadSessions + ev->Get()->ReadSessions >= 0); + WriteSessions += ev->Get()->WriteSessions; + ReadSessions += ev->Get()->ReadSessions; + //TODO: count here write/read speed +} + +//////////////////////////////////////////// +void +TGRpcProxyStatusActor::Handle(TEvGRpcProxyStatus::TEvGetStatusRequest::TPtr &ev, const TActorContext &ctx) { + + THolder<TEvGRpcProxyStatus::TEvGetStatusResponse> resp(new TEvGRpcProxyStatus::TEvGetStatusResponse); + ui64 weight = Allowed * 1000000; + + if (MaxWriteSessions <= WriteSessions) + weight = 0; + else + weight = Min<ui64>(weight, ui64(MaxWriteSessions - WriteSessions) * 1000000 / MaxWriteSessions); + + if (MaxReadSessions <= ReadSessions) + weight = 0; + else + weight = Min<ui64>(weight, ui64(MaxReadSessions - ReadSessions) * 1000000 / MaxReadSessions); + + resp->Record.SetWeight(weight); + ctx.Send(ev->Sender, resp.Release(), 0, ev->Cookie); +} + + +//////////////////////////////////////////// +void +TGRpcProxyStatusActor::HandleWakeup(const TActorContext &ctx) { + + ctx.Schedule(TDuration::Seconds(WAKEUP_TIMEOUT_MS), new TEvents::TEvWakeup()); + //TODO: update statistics here +} + + +//////////////////////////////////////////// +/// public state functions +//////////////////////////////////////////// +STFUNC(TGRpcProxyStatusActor::StateFunc) { + Y_UNUSED(ctx); + switch (ev->GetTypeRewrite()) { + HFunc(TEvGRpcProxyStatus::TEvSetup, Handle); + HFunc(TEvGRpcProxyStatus::TEvGetStatusRequest, Handle); + HFunc(TEvGRpcProxyStatus::TEvUpdateStatus, Handle); + HFunc(TEvGRpcProxyStatus::TEvRequest, Handle); + HFunc(TEvGRpcProxyStatus::TEvResponse, Handle); + CFunc(TEvents::TSystem::Wakeup, HandleWakeup); + + // HFunc(TEvents::TEvPoisonPill, Handle); // we do not need PoisonPill for the actor + } +} + + +/////////////////////////////////////////// + +class TChooseProxyActor : public TActorBootstrapped<TChooseProxyActor>, public NMsgBusProxy::TMessageBusSessionIdentHolder { + + using TBase = TActorBootstrapped<TChooseProxyActor>; THolder<NMsgBusProxy::TBusChooseProxy> Request; THashMap<ui32, TString> NodeNames; THashMap<ui32, TString> NodeDataCenter; THashMap<ui32, std::shared_ptr<TEvGRpcProxyStatus::TEvGetStatusResponse>> PerNodeResponse; - - -public: + + +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_CHOOSE_RROXY; - } - - // - TChooseProxyActor(NMsgBusProxy::TBusMessageContext &msg) - : TMessageBusSessionIdentHolder(msg) + } + + // + TChooseProxyActor(NMsgBusProxy::TBusMessageContext &msg) + : TMessageBusSessionIdentHolder(msg) , Request(static_cast<NMsgBusProxy::TBusChooseProxy*>(msg.ReleaseMessage())) - { - } - - virtual ~TChooseProxyActor() - {} - - void Bootstrap(const TActorContext& ctx) { - ctx.Send(MakeGRpcProxyStatusID(ctx.SelfID.NodeId()), new TEvGRpcProxyStatus::TEvRequest()); - TBase::Become(&TThis::StateWork); - ctx.Schedule(TDuration::Seconds(TIMEOUT_SECONDS), new TEvents::TEvWakeup()); - } - - STFUNC(StateWork) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvGRpcProxyStatus::TEvResponse, HandleResponse); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void HandleResponse(TEvGRpcProxyStatus::TEvResponse::TPtr &ev, const TActorContext &ctx) { - NodeNames = ev->Get()->NodeNames; - PerNodeResponse = ev->Get()->PerNodeResponse; - NodeDataCenter = ev->Get()->NodeDataCenter; - ReplyAndDie(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ReplyAndDie(ctx); - } - - - void ReplyAndDie(const TActorContext& ctx) { - auto response = MakeHolder<NMsgBusProxy::TBusResponse>(); - TString name; - ui64 totalWeight = 0; - ui64 cookie = 0; + { + } + + virtual ~TChooseProxyActor() + {} + + void Bootstrap(const TActorContext& ctx) { + ctx.Send(MakeGRpcProxyStatusID(ctx.SelfID.NodeId()), new TEvGRpcProxyStatus::TEvRequest()); + TBase::Become(&TThis::StateWork); + ctx.Schedule(TDuration::Seconds(TIMEOUT_SECONDS), new TEvents::TEvWakeup()); + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvGRpcProxyStatus::TEvResponse, HandleResponse); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + void HandleResponse(TEvGRpcProxyStatus::TEvResponse::TPtr &ev, const TActorContext &ctx) { + NodeNames = ev->Get()->NodeNames; + PerNodeResponse = ev->Get()->PerNodeResponse; + NodeDataCenter = ev->Get()->NodeDataCenter; + ReplyAndDie(ctx); + } + + void HandleTimeout(const TActorContext &ctx) { + ReplyAndDie(ctx); + } + + + void ReplyAndDie(const TActorContext& ctx) { + auto response = MakeHolder<NMsgBusProxy::TBusResponse>(); + TString name; + ui64 totalWeight = 0; + ui64 cookie = 0; const auto& record = Request->Record; const std::optional<TString> filterDataCenter = record.HasDataCenter() ? std::make_optional(record.GetDataCenter()) : record.HasDataCenterNum() ? std::make_optional(DataCenterToString(record.GetDataCenterNum())) : std::nullopt; - const bool preferLocalProxy = Request->Record.GetPreferLocalProxy(); - const ui32 localNodeId = ctx.SelfID.NodeId(); - //choose random proxy - TStringBuilder s; - s << "ChooseProxyResponses [preferLocal " << preferLocalProxy << " localId " << localNodeId << "]:"; - for (auto& resp : PerNodeResponse) { - if (!resp.second) - continue; - s << " " << NodeNames[resp.first] << "[" << resp.first << "], " << resp.second->Record.GetWeight() << " "; + const bool preferLocalProxy = Request->Record.GetPreferLocalProxy(); + const ui32 localNodeId = ctx.SelfID.NodeId(); + //choose random proxy + TStringBuilder s; + s << "ChooseProxyResponses [preferLocal " << preferLocalProxy << " localId " << localNodeId << "]:"; + for (auto& resp : PerNodeResponse) { + if (!resp.second) + continue; + s << " " << NodeNames[resp.first] << "[" << resp.first << "], " << resp.second->Record.GetWeight() << " "; if (filterDataCenter && filterDataCenter != NodeDataCenter[resp.first]) continue; - ui64 weight = resp.second->Record.GetWeight(); - ui64 rand = TAppData::RandomProvider->GenRand64(); - if ((weight > 0 && rand % (totalWeight + weight) >= totalWeight || preferLocalProxy && resp.first == localNodeId) && //random choosed this node or it is prefered local node - (!preferLocalProxy || cookie != localNodeId)) { //and curent node will not kick prefered local - name = NodeNames[resp.first]; - cookie = resp.first; - } - totalWeight += weight; - } - - s << " : result " << cookie << " " << name; - LOG_DEBUG(ctx, NKikimrServices::CHOOSE_PROXY, "%s", s.c_str()); - - if (name.empty()) { - response->Record.SetStatus(NMsgBusProxy::MSTATUS_ERROR); - } else { - response->Record.SetProxyName(name); - response->Record.SetProxyCookie(cookie); //TODO: encode here name and current time - response->Record.SetStatus(NMsgBusProxy::MSTATUS_OK); - } - - SendReplyMove(response.Release()); - Die(ctx); - } -}; - - - - -IActor* CreateGRpcProxyStatus() { - return new TGRpcProxyStatusActor(); -} -namespace NMsgBusProxy { - - IActor* CreateMessageBusChooseProxy(NMsgBusProxy::TBusMessageContext &msg) { - return new TChooseProxyActor(msg); - } - -} - -} + ui64 weight = resp.second->Record.GetWeight(); + ui64 rand = TAppData::RandomProvider->GenRand64(); + if ((weight > 0 && rand % (totalWeight + weight) >= totalWeight || preferLocalProxy && resp.first == localNodeId) && //random choosed this node or it is prefered local node + (!preferLocalProxy || cookie != localNodeId)) { //and curent node will not kick prefered local + name = NodeNames[resp.first]; + cookie = resp.first; + } + totalWeight += weight; + } + + s << " : result " << cookie << " " << name; + LOG_DEBUG(ctx, NKikimrServices::CHOOSE_PROXY, "%s", s.c_str()); + + if (name.empty()) { + response->Record.SetStatus(NMsgBusProxy::MSTATUS_ERROR); + } else { + response->Record.SetProxyName(name); + response->Record.SetProxyCookie(cookie); //TODO: encode here name and current time + response->Record.SetStatus(NMsgBusProxy::MSTATUS_OK); + } + + SendReplyMove(response.Release()); + Die(ctx); + } +}; + + + + +IActor* CreateGRpcProxyStatus() { + return new TGRpcProxyStatusActor(); +} +namespace NMsgBusProxy { + + IActor* CreateMessageBusChooseProxy(NMsgBusProxy::TBusMessageContext &msg) { + return new TChooseProxyActor(msg); + } + +} + +} diff --git a/ydb/core/client/server/grpc_proxy_status.h b/ydb/core/client/server/grpc_proxy_status.h index cbf5a2d768..13ea930262 100644 --- a/ydb/core/client/server/grpc_proxy_status.h +++ b/ydb/core/client/server/grpc_proxy_status.h @@ -1,84 +1,84 @@ -#pragma once - -//////////////////////////////////////////// +#pragma once + +//////////////////////////////////////////// #include <library/cpp/actors/core/defs.h> #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/core/event.h> - + #include <ydb/core/base/blobstorage.h> #include <ydb/core/protos/grpc_status_proxy.pb.h> - -//////////////////////////////////////////// -namespace NKikimr { - -//////////////////////////////////////////// + +//////////////////////////////////////////// +namespace NKikimr { + +//////////////////////////////////////////// TActorId MakeGRpcProxyStatusID(ui32 node); - -//////////////////////////////////////////// -struct TEvGRpcProxyStatus { - // - enum EEv { - EvGetStatusRequest = EventSpaceBegin(TKikimrEvents::ES_GRPC_PROXY_STATUS), - EvGetStatusResponse, - EvUpdateStatus, //local event - EvSetup, //local event - EvRequest, //local event - EvResponse, //local event - EvEnd - }; - - static_assert(EvEnd < EventSpaceEnd(TKikimrEvents::ES_GRPC_PROXY_STATUS), "expect EvEnd < EventSpaceEnd(TKikimrEvents::ES_GRPC_PROXY_STATUS)"); - - struct TEvUpdateStatus : public TEventLocal<TEvUpdateStatus, EvUpdateStatus> { - // - ui64 ReadBytes; //diff - ui64 WriteBytes; //diff - i32 WriteSessions; //diff - i32 ReadSessions; //diff - - TEvUpdateStatus(const ui64 readBytes, const ui64 writeBytes, const i32 writeSessions, const i32 readSessions) - : ReadBytes(readBytes) - , WriteBytes(writeBytes) - , WriteSessions(writeSessions) - , ReadSessions(readSessions) - {} - }; - - struct TEvSetup : public TEventLocal<TEvSetup, EvSetup> { - // - bool Allowed; - ui32 MaxWriteSessions; - ui32 MaxReadSessions; - - TEvSetup(bool allowed, ui32 maxWriteSessions, ui32 maxReadSessions) - : Allowed(allowed) - , MaxWriteSessions(maxWriteSessions) - , MaxReadSessions(maxReadSessions) - {} - }; - - - struct TEvRequest : public TEventLocal<TEvRequest, EvRequest> { - }; - - - struct TEvGetStatusRequest : public TEventPB<TEvGetStatusRequest, NKikimrGRpcProxy::TEvGetStatusRequest, EvGetStatusRequest> { - }; - - struct TEvGetStatusResponse : public TEventPB<TEvGetStatusResponse, NKikimrGRpcProxy::TEvGetStatusResponse, EvGetStatusResponse> { - }; - - - struct TEvResponse : public TEventLocal<TEvResponse, EvResponse> { + +//////////////////////////////////////////// +struct TEvGRpcProxyStatus { + // + enum EEv { + EvGetStatusRequest = EventSpaceBegin(TKikimrEvents::ES_GRPC_PROXY_STATUS), + EvGetStatusResponse, + EvUpdateStatus, //local event + EvSetup, //local event + EvRequest, //local event + EvResponse, //local event + EvEnd + }; + + static_assert(EvEnd < EventSpaceEnd(TKikimrEvents::ES_GRPC_PROXY_STATUS), "expect EvEnd < EventSpaceEnd(TKikimrEvents::ES_GRPC_PROXY_STATUS)"); + + struct TEvUpdateStatus : public TEventLocal<TEvUpdateStatus, EvUpdateStatus> { + // + ui64 ReadBytes; //diff + ui64 WriteBytes; //diff + i32 WriteSessions; //diff + i32 ReadSessions; //diff + + TEvUpdateStatus(const ui64 readBytes, const ui64 writeBytes, const i32 writeSessions, const i32 readSessions) + : ReadBytes(readBytes) + , WriteBytes(writeBytes) + , WriteSessions(writeSessions) + , ReadSessions(readSessions) + {} + }; + + struct TEvSetup : public TEventLocal<TEvSetup, EvSetup> { + // + bool Allowed; + ui32 MaxWriteSessions; + ui32 MaxReadSessions; + + TEvSetup(bool allowed, ui32 maxWriteSessions, ui32 maxReadSessions) + : Allowed(allowed) + , MaxWriteSessions(maxWriteSessions) + , MaxReadSessions(maxReadSessions) + {} + }; + + + struct TEvRequest : public TEventLocal<TEvRequest, EvRequest> { + }; + + + struct TEvGetStatusRequest : public TEventPB<TEvGetStatusRequest, NKikimrGRpcProxy::TEvGetStatusRequest, EvGetStatusRequest> { + }; + + struct TEvGetStatusResponse : public TEventPB<TEvGetStatusResponse, NKikimrGRpcProxy::TEvGetStatusResponse, EvGetStatusResponse> { + }; + + + struct TEvResponse : public TEventLocal<TEvResponse, EvResponse> { THashMap<ui32, std::shared_ptr<TEvGRpcProxyStatus::TEvGetStatusResponse>> PerNodeResponse; - THashMap<ui32, TString> NodeNames; + THashMap<ui32, TString> NodeNames; THashMap<ui32, TString> NodeDataCenter; - }; - -}; - -IActor* CreateGRpcProxyStatus(); - - -} // end of the NKikimr namespace - + }; + +}; + +IActor* CreateGRpcProxyStatus(); + + +} // end of the NKikimr namespace + diff --git a/ydb/core/client/server/grpc_server.cpp b/ydb/core/client/server/grpc_server.cpp index 0adf6f2b37..aa122da36a 100644 --- a/ydb/core/client/server/grpc_server.cpp +++ b/ydb/core/client/server/grpc_server.cpp @@ -1,5 +1,5 @@ #include "grpc_server.h" -#include "grpc_proxy_status.h" +#include "grpc_proxy_status.h" #include <ydb/core/client/server/msgbus_server_persqueue.h> #include <ydb/core/grpc_services/grpc_helper.h> @@ -45,7 +45,7 @@ using TGrpcBaseAsyncContext = NGrpc::TBaseAsyncContext<NGRpcProxy::TGRpcService> template <typename TIn, typename TOut = TResponse> class TSimpleRequest : public IQueueEvent - , public TGrpcBaseAsyncContext + , public TGrpcBaseAsyncContext , public IRequestContext { using TOnRequest = std::function<void (IRequestContext* ctx)>; @@ -63,15 +63,15 @@ public: TActorSystem& as, const char* name, NGrpc::ICounterBlockPtr counters) - : TGrpcBaseAsyncContext(service, cq) - , Server(server) - , Cb(cb) - , RequestCallback(requestCallback) + : TGrpcBaseAsyncContext(service, cq) + , Server(server) + , Cb(cb) + , RequestCallback(requestCallback) , ActorSystem(as) , Name(name) , Counters(std::move(counters)) , Writer(new ServerAsyncResponseWriter<TOut>(&Context)) - , StateFunc(&TSimpleRequest::RequestDone) + , StateFunc(&TSimpleRequest::RequestDone) , RequestSize(0) , ResponseSize(0) , ResponseStatus(0) @@ -111,7 +111,7 @@ public: } bool Execute(bool ok) override { - return (this->*StateFunc)(ok); + return (this->*StateFunc)(ok); } void DestroyRequest() override { @@ -125,7 +125,7 @@ public: public: //! Get pointer to the request's message. const NProtoBuf::Message* GetRequest() const override { - return &Request; + return &Request; } //! Send reply. @@ -237,7 +237,7 @@ public: } NMsgBusProxy::TBusMessageContext BindBusContext(int type) override { - return BusContext.ConstructInPlace(this, type); + return BusContext.ConstructInPlace(this, type); } TString GetPeer() const override { @@ -305,11 +305,11 @@ private: Clone(); - if (!ok) { + if (!ok) { Counters->CountNotOkRequest(); - return false; - } - + return false; + } + if (Server->IncRequest()) { RequestSize = Request.ByteSize(); @@ -347,22 +347,22 @@ private: using TStateFunc = bool (TSimpleRequest::*)(bool); TGRpcService* const Server; - TOnRequest Cb; - TRequestCallback RequestCallback; + TOnRequest Cb; + TRequestCallback RequestCallback; TActorSystem& ActorSystem; const char* const Name; NGrpc::ICounterBlockPtr Counters; THolder<ServerAsyncResponseWriter<TOut>> Writer; - TStateFunc StateFunc; - TIn Request; + TStateFunc StateFunc; + TIn Request; ui32 RequestSize; ui32 ResponseSize; ui32 ResponseStatus; THPTimer RequestTimer; - TMaybe<NMsgBusProxy::TBusMessageContext> BusContext; + TMaybe<NMsgBusProxy::TBusMessageContext> BusContext; bool InProgress_; bool RequestRegistered_ = false; }; @@ -416,7 +416,7 @@ i64 TGRpcService::GetCurrentInFlight() const { void TGRpcService::Start() { Y_VERIFY(ActorSystem); - ui32 nodeId = ActorSystem->NodeId; + ui32 nodeId = ActorSystem->NodeId; ActorSystem->Send(MakeGRpcProxyStatusID(nodeId), new TEvGRpcProxyStatus::TEvSetup(true, PersQueueWriteSessionsMaxCount, PersQueueReadSessionsMaxCount)); SetupIncomingRequests(); @@ -445,7 +445,7 @@ void TGRpcService::SetupIncomingRequests() { RegisterRequestActor(CreateMessageBus ## NAME(msg)); \ }) - + // actor requests ADD_ACTOR_REQUEST(BSAdm, TBSAdm, MTYPE_CLIENT_BSADM) ADD_ACTOR_REQUEST(BlobStorageConfig, TBlobStorageConfigRequest, MTYPE_CLIENT_BLOB_STORAGE_CONFIG_REQUEST) @@ -459,7 +459,7 @@ void TGRpcService::SetupIncomingRequests() { ADD_ACTOR_REQUEST(SchemeOperationStatus, TSchemeOperationStatus, MTYPE_CLIENT_FLAT_TX_STATUS_REQUEST) ADD_ACTOR_REQUEST(BlobStorageLoadRequest, TBsTestLoadRequest, MTYPE_CLIENT_LOAD_REQUEST) ADD_ACTOR_REQUEST(BlobStorageGetRequest, TBsGetRequest, MTYPE_CLIENT_GET_REQUEST) - ADD_ACTOR_REQUEST(ChooseProxy, TChooseProxyRequest, MTYPE_CLIENT_CHOOSE_PROXY) + ADD_ACTOR_REQUEST(ChooseProxy, TChooseProxyRequest, MTYPE_CLIENT_CHOOSE_PROXY) ADD_ACTOR_REQUEST(WhoAmI, TWhoAmI, MTYPE_CLIENT_WHOAMI) ADD_ACTOR_REQUEST(ResolveNode, TResolveNodeRequest, MTYPE_CLIENT_RESOLVE_NODE) ADD_ACTOR_REQUEST(FillNode, TFillNodeRequest, MTYPE_CLIENT_FILL_NODE) @@ -499,9 +499,9 @@ void TGRpcService::SetupIncomingRequests() { #define ADD_PROXY_REQUEST_BASE(NAME, TYPE, RES_TYPE, EVENT_TYPE, MTYPE) \ ADD_REQUEST(NAME, TYPE, RES_TYPE, { \ - if (MsgBusProxy) { \ + if (MsgBusProxy) { \ NMsgBusProxy::TBusMessageContext msg(ctx->BindBusContext(NMsgBusProxy::MTYPE)); \ - ActorSystem->Send(MsgBusProxy, new NMsgBusProxy::EVENT_TYPE(msg)); \ + ActorSystem->Send(MsgBusProxy, new NMsgBusProxy::EVENT_TYPE(msg)); \ } else { \ ctx->ReplyError("no MessageBus proxy"); \ } \ @@ -513,11 +513,11 @@ void TGRpcService::SetupIncomingRequests() { // proxy requests ADD_PROXY_REQUEST(SchemeInitRoot, TSchemeInitRoot, TEvBusProxy::TEvInitRoot, MTYPE_CLIENT_SCHEME_INITROOT) - ADD_PROXY_REQUEST(PersQueueRequest, TPersQueueRequest, TEvBusProxy::TEvPersQueue, MTYPE_CLIENT_PERSQUEUE) - ADD_PROXY_REQUEST(Request, TRequest, TEvBusProxy::TEvRequest, MTYPE_CLIENT_REQUEST) - ADD_PROXY_REQUEST(SchemeOperation, TSchemeOperation, TEvBusProxy::TEvFlatTxRequest, MTYPE_CLIENT_FLAT_TX_REQUEST) - ADD_PROXY_REQUEST(SchemeDescribe, TSchemeDescribe, TEvBusProxy::TEvFlatDescribeRequest, MTYPE_CLIENT_FLAT_DESCRIBE_REQUEST) - + ADD_PROXY_REQUEST(PersQueueRequest, TPersQueueRequest, TEvBusProxy::TEvPersQueue, MTYPE_CLIENT_PERSQUEUE) + ADD_PROXY_REQUEST(Request, TRequest, TEvBusProxy::TEvRequest, MTYPE_CLIENT_REQUEST) + ADD_PROXY_REQUEST(SchemeOperation, TSchemeOperation, TEvBusProxy::TEvFlatTxRequest, MTYPE_CLIENT_FLAT_TX_REQUEST) + ADD_PROXY_REQUEST(SchemeDescribe, TSchemeDescribe, TEvBusProxy::TEvFlatDescribeRequest, MTYPE_CLIENT_FLAT_DESCRIBE_REQUEST) + #define ADD_PROXY_REQUEST_JJ(NAME, EVENT_TYPE, MTYPE) \ ADD_PROXY_REQUEST_BASE(NAME, TJSON, TJSON, EVENT_TYPE, MTYPE) diff --git a/ydb/core/client/server/grpc_server.h b/ydb/core/client/server/grpc_server.h index 0c3ac495ed..2f2ae1da6b 100644 --- a/ydb/core/client/server/grpc_server.h +++ b/ydb/core/client/server/grpc_server.h @@ -86,7 +86,7 @@ private: size_t PersQueueWriteSessionsMaxCount = 1000000; size_t PersQueueReadSessionsMaxCount = 100000; - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; std::function<void()> InitCb_; // In flight request management. diff --git a/ydb/core/client/server/msgbus_server.cpp b/ydb/core/client/server/msgbus_server.cpp index 69af44bfc8..8a4d8a3bbd 100644 --- a/ydb/core/client/server/msgbus_server.cpp +++ b/ydb/core/client/server/msgbus_server.cpp @@ -121,7 +121,7 @@ public: MTYPE(TBusBlobStorageConfigRequest) MTYPE(TBusNodeRegistrationRequest) MTYPE(TBusCmsRequest) - MTYPE(TBusChooseProxy) + MTYPE(TBusChooseProxy) MTYPE(TBusSqsRequest) MTYPE(TBusWhoAmI) MTYPE(TBusStreamRequest) @@ -478,7 +478,7 @@ void TMessageBusServer::OnMessage(NBus::TOnMessageContext &msg) { void TMessageBusServer::OnMessage(TBusMessageContext &msg) { const ui32 msgType = msg.GetMessage()->GetHeader()->Type; - + switch (msgType) { case MTYPE_CLIENT_REQUEST: return ClientProxyRequest<TEvBusProxy::TEvRequest>(msg); @@ -499,10 +499,10 @@ void TMessageBusServer::OnMessage(TBusMessageContext &msg) { case MTYPE_CLIENT_KEYVALUE: case MTYPE_CLIENT_OLD_KEYVALUE: return ClientActorRequest(CreateMessageBusKeyValue, msg); - case MTYPE_CLIENT_PERSQUEUE: + case MTYPE_CLIENT_PERSQUEUE: return ClientProxyRequest<TEvBusProxy::TEvPersQueue>(msg); - case MTYPE_CLIENT_CHOOSE_PROXY: - return ClientActorRequest(CreateMessageBusChooseProxy, msg); + case MTYPE_CLIENT_CHOOSE_PROXY: + return ClientActorRequest(CreateMessageBusChooseProxy, msg); case MTYPE_CLIENT_TABLET_STATE_REQUEST: return ClientActorRequest(CreateMessageBusTabletStateRequest, msg); case MTYPE_CLIENT_TABLET_COUNTERS_REQUEST: diff --git a/ydb/core/client/server/msgbus_server.h b/ydb/core/client/server/msgbus_server.h index 5c9d2de549..3d40fc9001 100644 --- a/ydb/core/client/server/msgbus_server.h +++ b/ydb/core/client/server/msgbus_server.h @@ -96,12 +96,12 @@ struct TEvBusProxy { EvNavigate, EvFlatTxRequest, EvFlatDescribeRequest, - EvPersQueue, + EvPersQueue, EvDbSchema, EvDbOperation, EvDbBatch, EvInitRoot, - EvChooseProxy, + EvChooseProxy, EvStreamIsReadyNotUsed = EvRequest + 512, EvStreamIsDeadNotUsed, @@ -122,8 +122,8 @@ struct TEvBusProxy { typedef TEvMsgBusRequest<EvNavigate> TEvNavigate; typedef TEvMsgBusRequest<EvFlatTxRequest> TEvFlatTxRequest; typedef TEvMsgBusRequest<EvFlatDescribeRequest> TEvFlatDescribeRequest; - typedef TEvMsgBusRequest<EvPersQueue> TEvPersQueue; - typedef TEvMsgBusRequest<EvChooseProxy> TEvChooseProxy; + typedef TEvMsgBusRequest<EvPersQueue> TEvPersQueue; + typedef TEvMsgBusRequest<EvChooseProxy> TEvChooseProxy; typedef TEvMsgBusRequest<EvDbSchema> TEvDbSchema; typedef TEvMsgBusRequest<EvDbOperation> TEvDbOperation; typedef TEvMsgBusRequest<EvDbBatch> TEvDbBatch; @@ -289,8 +289,8 @@ IActor* CreateMessageBusGetTypes(TBusMessageContext &msg); IActor* CreateMessageBusHiveCreateTablet(TBusMessageContext &msg); IActor* CreateMessageBusLocalEnumerateTablets(TBusMessageContext &msg); IActor* CreateMessageBusKeyValue(TBusMessageContext &msg); -IActor* CreateMessageBusPersQueue(TBusMessageContext &msg); -IActor* CreateMessageBusChooseProxy(TBusMessageContext &msg); +IActor* CreateMessageBusPersQueue(TBusMessageContext &msg); +IActor* CreateMessageBusChooseProxy(TBusMessageContext &msg); IActor* CreateMessageBusTabletStateRequest(TBusMessageContext &msg); IActor* CreateMessageBusTabletKillRequest(TBusMessageContext &msg); IActor* CreateMessageBusSchemeOperationStatus(TBusMessageContext &msg); diff --git a/ydb/core/client/server/msgbus_server_persqueue.cpp b/ydb/core/client/server/msgbus_server_persqueue.cpp index 9a456c3482..78b83ef3a3 100644 --- a/ydb/core/client/server/msgbus_server_persqueue.cpp +++ b/ydb/core/client/server/msgbus_server_persqueue.cpp @@ -1,6 +1,6 @@ -#include "msgbus_tabletreq.h" - -#include "msgbus_server_persqueue.h" +#include "msgbus_tabletreq.h" + +#include "msgbus_server_persqueue.h" #include "msgbus_server_pq_metacache.h" #include "msgbus_server_pq_metarequest.h" #include <library/cpp/actors/core/interconnect.h> @@ -8,30 +8,30 @@ #include <ydb/core/persqueue/events/global.h> #include <ydb/core/base/appdata.h> #include <ydb/core/tx/tx_proxy/proxy.h> - + #include <util/generic/is_in.h> -namespace NKikimr { -namespace NMsgBusProxy { - +namespace NKikimr { +namespace NMsgBusProxy { + using namespace NSchemeCache; using namespace NPqMetaCacheV2; const TDuration TPersQueueBaseRequestProcessor::TIMEOUT = TDuration::MilliSeconds(90000); -namespace { - const ui32 DefaultTimeout = 90000; - const TDuration CHECK_INFLY_SEMAPHORE_DURATION = TDuration::Seconds(1); - const ui32 MAX_INFLY = 100000; -} - -static TAtomic Infly = 0; - -const TString& TopicPrefix(const TActorContext& ctx) { - static const TString topicPrefix = AppData(ctx)->PQConfig.GetRoot() + "/"; - return topicPrefix; -} - +namespace { + const ui32 DefaultTimeout = 90000; + const TDuration CHECK_INFLY_SEMAPHORE_DURATION = TDuration::Seconds(1); + const ui32 MAX_INFLY = 100000; +} + +static TAtomic Infly = 0; + +const TString& TopicPrefix(const TActorContext& ctx) { + static const TString topicPrefix = AppData(ctx)->PQConfig.GetRoot() + "/"; + return topicPrefix; +} + TProcessingResult ProcessMetaCacheAllTopicsResponse(TEvPqNewMetaCache::TEvDescribeAllTopicsResponse::TPtr& ev) { auto *res = ev->Get()->Result.Get(); const TString& path = ev->Get()->Path; @@ -130,40 +130,40 @@ TProcessingResult ProcessMetaCacheSingleTopicsResponse( return {}; } -NKikimrClient::TResponse CreateErrorReply(EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { +NKikimrClient::TResponse CreateErrorReply(EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { NKikimrClient::TResponse rec; rec.SetStatus(status); rec.SetErrorCode(code); rec.SetErrorReason(errorReason); return rec; } - -struct TTopicInfo { + +struct TTopicInfo { TVector<ui64> Tablets; THashMap<ui32, ui64> PartitionToTablet; - ui64 BalancerTabletId = 0; - + ui64 BalancerTabletId = 0; + THolder<NKikimrPQ::TReadSessionsInfoResponse> ReadSessionsInfo; - - NKikimrPQ::TPQTabletConfig Config; + + NKikimrPQ::TPQTabletConfig Config; TIntrusiveConstPtr<TSchemeCacheNavigate::TPQGroupInfo> PQInfo; - ui32 NumParts = 0; + ui32 NumParts = 0; THashSet<ui32> PartitionsToRequest; - - //fetchRequest part + + //fetchRequest part THashMap<ui32, TAutoPtr<TEvPersQueue::TEvHasDataInfo>> FetchInfo; -}; - -struct TTabletInfo { - ui32 NodeId = 0; +}; + +struct TTabletInfo { + ui32 NodeId = 0; TString Topic; TActorId PipeClient; - bool BrokenPipe = false; - bool IsBalancer = false; + bool BrokenPipe = false; + bool IsBalancer = false; TVector<NKikimrPQ::TOffsetsResponse::TPartResult> OffsetResponses; TVector<NKikimrPQ::TStatusResponse::TPartResult> StatusResponses; -}; - +}; + TPersQueueBaseRequestProcessor::TPersQueueBaseRequestProcessor(const NKikimrClient::TPersQueueRequest& request, const TActorId& pqMetaCacheId, bool listNodes) : RequestProto(new NKikimrClient::TPersQueueRequest(request)) , RequestId(RequestProto->HasRequestId() ? RequestProto->GetRequestId() : "<none>") @@ -171,35 +171,35 @@ TPersQueueBaseRequestProcessor::TPersQueueBaseRequestProcessor(const NKikimrClie , ListNodes(listNodes) { } - -void TPersQueueBaseRequestProcessor::SendErrorReplyAndDie(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { + +void TPersQueueBaseRequestProcessor::SendErrorReplyAndDie(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { SendReplyAndDie(CreateErrorReply(status, code, errorReason), ctx); } - -bool TPersQueueBaseRequestProcessor::ReadyForAnswer(const TActorContext& ) { - return ReadyToCreateChildren() && NeedChildrenCreation == false && ChildrenAnswered == Children.size(); + +bool TPersQueueBaseRequestProcessor::ReadyForAnswer(const TActorContext& ) { + return ReadyToCreateChildren() && NeedChildrenCreation == false && ChildrenAnswered == Children.size(); } void TPersQueueBaseRequestProcessor::AnswerAndDie(const TActorContext& ctx) { try { SendReplyAndDie(MergeSubactorReplies(), ctx); } catch (const std::exception& ex) { - SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::ERROR, ex.what()); + SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::ERROR, ex.what()); } } void TPersQueueBaseRequestProcessor::Bootstrap(const TActorContext& ctx) { LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "proxy got request " << RequestId); - StartTimestamp = ctx.Now(); + StartTimestamp = ctx.Now(); ctx.Send(PqMetaCache, new NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeAllTopicsRequest(TopicPrefix(ctx))); - + if (ListNodes) { const TActorId nameserviceId = GetNameserviceActorId(); ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); } - Become(&TPersQueueBaseRequestProcessor::StateFunc, ctx, CHECK_INFLY_SEMAPHORE_DURATION, new TEvents::TEvWakeup()); + Become(&TPersQueueBaseRequestProcessor::StateFunc, ctx, CHECK_INFLY_SEMAPHORE_DURATION, new TEvents::TEvWakeup()); } void TPersQueueBaseRequestProcessor::Die(const TActorContext& ctx) { @@ -233,7 +233,7 @@ void TPersQueueBaseRequestProcessor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, ++ChildrenAnswered; Y_VERIFY(ChildrenAnswered <= Children.size()); - if (ReadyForAnswer(ctx)) { + if (ReadyForAnswer(ctx)) { return AnswerAndDie(ctx); } } @@ -249,17 +249,17 @@ void TPersQueueBaseRequestProcessor::Handle(TEvInterconnect::TEvNodesInfo::TPtr& } void TPersQueueBaseRequestProcessor::HandleTimeout(const TActorContext& ctx) { - if (ctx.Now() - StartTimestamp > TIMEOUT) { - SendErrorReplyAndDie(ctx, MSTATUS_TIMEOUT, NPersQueue::NErrorCode::ERROR, "Timeout while waiting for response, may be just slow, Marker# PQ16"); - return; - } - if (NeedChildrenCreation) { - CreateChildrenIfNeeded(ctx); - } - ctx.Schedule(CHECK_INFLY_SEMAPHORE_DURATION, new TEvents::TEvWakeup()); + if (ctx.Now() - StartTimestamp > TIMEOUT) { + SendErrorReplyAndDie(ctx, MSTATUS_TIMEOUT, NPersQueue::NErrorCode::ERROR, "Timeout while waiting for response, may be just slow, Marker# PQ16"); + return; + } + if (NeedChildrenCreation) { + CreateChildrenIfNeeded(ctx); + } + ctx.Schedule(CHECK_INFLY_SEMAPHORE_DURATION, new TEvents::TEvWakeup()); } - + void TPersQueueBaseRequestProcessor::GetTopicsListOrThrow(const ::google::protobuf::RepeatedPtrField<::NKikimrClient::TPersQueueMetaRequest::TTopicRequest>& requests, THashMap<TString, std::shared_ptr<THashSet<ui64>>>& partitionsToRequest) { for (const auto& topicRequest : requests) { if (topicRequest.GetTopic().empty()) { @@ -298,7 +298,7 @@ void TPersQueueBaseRequestProcessor::Handle( ) { auto& path = ev->Get()->Path; if (!ev->Get()->Success) { - return SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, + return SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, TStringBuilder() << "no path '" << path << "', Marker# PQ17"); } @@ -330,43 +330,43 @@ bool TPersQueueBaseRequestProcessor::CreateChildren(const TActorContext& ctx) { if (!TopicsToRequest.empty() && !IsIn(TopicsToRequest, name)) { continue; } - ChildrenToCreate.emplace_back(new TPerTopicInfo(child)); + ChildrenToCreate.emplace_back(new TPerTopicInfo(child)); } } - NeedChildrenCreation = true; - return CreateChildrenIfNeeded(ctx); -} - - -TPersQueueBaseRequestProcessor::~TPersQueueBaseRequestProcessor() { - AtomicSub(Infly, ChildrenCreated); -} - -bool TPersQueueBaseRequestProcessor::CreateChildrenIfNeeded(const TActorContext& ctx) { - Y_VERIFY(NeedChildrenCreation); - - if (AtomicAdd(Infly, ChildrenToCreate.size()) > MAX_INFLY) { - AtomicSub(Infly, ChildrenToCreate.size()); - return false; - } - - ChildrenCreated = ChildrenToCreate.size(); - NeedChildrenCreation = false; - - THashSet<TString> topics; - while (!ChildrenToCreate.empty()) { - THolder<TPerTopicInfo> perTopicInfo(ChildrenToCreate.front().Release()); - ChildrenToCreate.pop_front(); + NeedChildrenCreation = true; + return CreateChildrenIfNeeded(ctx); +} + + +TPersQueueBaseRequestProcessor::~TPersQueueBaseRequestProcessor() { + AtomicSub(Infly, ChildrenCreated); +} + +bool TPersQueueBaseRequestProcessor::CreateChildrenIfNeeded(const TActorContext& ctx) { + Y_VERIFY(NeedChildrenCreation); + + if (AtomicAdd(Infly, ChildrenToCreate.size()) > MAX_INFLY) { + AtomicSub(Infly, ChildrenToCreate.size()); + return false; + } + + ChildrenCreated = ChildrenToCreate.size(); + NeedChildrenCreation = false; + + THashSet<TString> topics; + while (!ChildrenToCreate.empty()) { + THolder<TPerTopicInfo> perTopicInfo(ChildrenToCreate.front().Release()); + ChildrenToCreate.pop_front(); const auto& name = perTopicInfo->TopicEntry.PQGroupInfo->Description.GetName(); THolder<IActor> childActor = CreateTopicSubactor(perTopicInfo->TopicEntry, name); - if (childActor.Get() != nullptr) { + if (childActor.Get() != nullptr) { const TActorId actorId = ctx.Register(childActor.Release()); - perTopicInfo->ActorId = actorId; + perTopicInfo->ActorId = actorId; topics.emplace(name); - Children.emplace(actorId, std::move(perTopicInfo)); - } - } - + Children.emplace(actorId, std::move(perTopicInfo)); + } + } + Y_VERIFY(topics.size() == Children.size()); if (!TopicsToRequest.empty() && TopicsToRequest.size() != topics.size()) { @@ -379,11 +379,11 @@ bool TPersQueueBaseRequestProcessor::CreateChildrenIfNeeded(const TActorContext& errorDesc << topic << ", "; } } - SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, + SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, errorDesc << "Marker# PQ95"); return true; } - if (ReadyForAnswer(ctx)) { + if (ReadyForAnswer(ctx)) { AnswerAndDie(ctx); return true; } @@ -393,7 +393,7 @@ bool TPersQueueBaseRequestProcessor::CreateChildrenIfNeeded(const TActorContext& NKikimrClient::TResponse TPersQueueBaseRequestProcessor::MergeSubactorReplies() { NKikimrClient::TResponse response; response.SetStatus(MSTATUS_OK); // We need to have status event if we have no children - response.SetErrorCode(NPersQueue::NErrorCode::OK); + response.SetErrorCode(NPersQueue::NErrorCode::OK); for (const auto& child : Children) { response.MergeFrom(child.second->Response); } @@ -435,455 +435,455 @@ class TMessageBusServerPersQueueImpl : public TActorBootstrapped<TMessageBusServ using TEvAllTopicsDescribeRequest = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeAllTopicsRequest; using TEvAllTopicsDescribeResponse = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeAllTopicsResponse; -protected: +protected: NKikimrClient::TPersQueueRequest RequestProto; const TString RequestId; const bool IsMetaRequest; const bool IsFetchRequest; - - bool CanProcessFetchRequest; //any partitions answered that it has data or WaitMs timeout occured - ui32 FetchRequestReadsDone; - ui64 FetchRequestCurrentReadTablet; //if zero then no read at this time - ui64 CurrentCookie; - ui32 FetchRequestBytesLeft; + + bool CanProcessFetchRequest; //any partitions answered that it has data or WaitMs timeout occured + ui32 FetchRequestReadsDone; + ui64 FetchRequestCurrentReadTablet; //if zero then no read at this time + ui64 CurrentCookie; + ui32 FetchRequestBytesLeft; NKikimrClient::TPersQueueFetchResponse FetchResponse; TVector<TActorId> PQClient; const TActorId SchemeCache; - - TAutoPtr<TEvInterconnect::TEvNodesInfo> NodesInfo; - + + TAutoPtr<TEvInterconnect::TEvNodesInfo> NodesInfo; + THashMap<TString, TTopicInfo> TopicInfo; THashMap<ui64, TTabletInfo> TabletInfo; - - ui32 TopicsAnswered; + + ui32 TopicsAnswered; THashSet<ui64> TabletsDiscovered; THashSet<ui64> TabletsAnswered; ui32 AclRequests; - ui32 DescribeRequests; - ui32 PartTabletsRequested; + ui32 DescribeRequests; + ui32 PartTabletsRequested; TString ErrorReason; - bool NoTopicsAtStart; + bool NoTopicsAtStart; -public: +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::MSGBUS_PROXY_ACTOR; } - - virtual ~TMessageBusServerPersQueueImpl() {} - + + virtual ~TMessageBusServerPersQueueImpl() {} + virtual void SendReplyAndDie(NKikimrClient::TResponse&& response, const TActorContext& ctx) = 0; - + TMessageBusServerPersQueueImpl(const NKikimrClient::TPersQueueRequest& request, const TActorId& schemeCache) - : RequestProto(request) - , RequestId(RequestProto.HasRequestId() ? RequestProto.GetRequestId() : "<none>") - , IsMetaRequest(RequestProto.HasMetaRequest()) - , IsFetchRequest(RequestProto.HasFetchRequest()) - , CanProcessFetchRequest(false) - , FetchRequestReadsDone(0) - , FetchRequestCurrentReadTablet(0) - , CurrentCookie(1) - , FetchRequestBytesLeft(0) - , SchemeCache(schemeCache) - , TopicsAnswered(0) - , AclRequests(0) - , DescribeRequests(0) - , PartTabletsRequested(0) - , NoTopicsAtStart(true) - { - const auto& record = RequestProto; - - if (record.HasMetaRequest() + record.HasPartitionRequest() + record.HasFetchRequest() > 1) { - ErrorReason = "only one from meta partition or fetch requests must be filled"; - return; - } - if (record.HasMetaRequest()) { + : RequestProto(request) + , RequestId(RequestProto.HasRequestId() ? RequestProto.GetRequestId() : "<none>") + , IsMetaRequest(RequestProto.HasMetaRequest()) + , IsFetchRequest(RequestProto.HasFetchRequest()) + , CanProcessFetchRequest(false) + , FetchRequestReadsDone(0) + , FetchRequestCurrentReadTablet(0) + , CurrentCookie(1) + , FetchRequestBytesLeft(0) + , SchemeCache(schemeCache) + , TopicsAnswered(0) + , AclRequests(0) + , DescribeRequests(0) + , PartTabletsRequested(0) + , NoTopicsAtStart(true) + { + const auto& record = RequestProto; + + if (record.HasMetaRequest() + record.HasPartitionRequest() + record.HasFetchRequest() > 1) { + ErrorReason = "only one from meta partition or fetch requests must be filled"; + return; + } + if (record.HasMetaRequest()) { Y_VERIFY(IsMetaRequest); - auto& meta = record.GetMetaRequest(); - ui32 count = meta.HasCmdGetPartitionLocations() + meta.HasCmdGetPartitionOffsets() + - meta.HasCmdGetTopicMetadata() + meta.HasCmdGetPartitionStatus() + meta.HasCmdGetReadSessionsInfo(); - if (count != 1) { - ErrorReason = "multiple or none requests in MetaRequest"; - return; - } - if (meta.HasCmdGetPartitionLocations()) { + auto& meta = record.GetMetaRequest(); + ui32 count = meta.HasCmdGetPartitionLocations() + meta.HasCmdGetPartitionOffsets() + + meta.HasCmdGetTopicMetadata() + meta.HasCmdGetPartitionStatus() + meta.HasCmdGetReadSessionsInfo(); + if (count != 1) { + ErrorReason = "multiple or none requests in MetaRequest"; + return; + } + if (meta.HasCmdGetPartitionLocations()) { if (!GetTopicsList(meta.GetCmdGetPartitionLocations().topicrequest())) - return; - } else if (meta.HasCmdGetPartitionOffsets()) { + return; + } else if (meta.HasCmdGetPartitionOffsets()) { if (!GetTopicsList(meta.GetCmdGetPartitionOffsets().topicrequest())) - return; - } else if (meta.HasCmdGetTopicMetadata()) { - auto& d = meta.GetCmdGetTopicMetadata(); - for (ui32 i = 0; i < d.TopicSize(); ++i) { - if (d.GetTopic(i).empty()) { - ErrorReason = "empty topic in GetTopicMetadata request"; - return; - } - TopicInfo[d.GetTopic(i)]; - } - } else if (meta.HasCmdGetPartitionStatus()) { + return; + } else if (meta.HasCmdGetTopicMetadata()) { + auto& d = meta.GetCmdGetTopicMetadata(); + for (ui32 i = 0; i < d.TopicSize(); ++i) { + if (d.GetTopic(i).empty()) { + ErrorReason = "empty topic in GetTopicMetadata request"; + return; + } + TopicInfo[d.GetTopic(i)]; + } + } else if (meta.HasCmdGetPartitionStatus()) { if (!GetTopicsList(meta.GetCmdGetPartitionStatus().topicrequest())) - return; - } else if (meta.HasCmdGetReadSessionsInfo()) { - auto& d = meta.GetCmdGetReadSessionsInfo(); - for (ui32 i = 0; i < d.TopicSize(); ++i) { - if (d.GetTopic(i).empty()) { + return; + } else if (meta.HasCmdGetReadSessionsInfo()) { + auto& d = meta.GetCmdGetReadSessionsInfo(); + for (ui32 i = 0; i < d.TopicSize(); ++i) { + if (d.GetTopic(i).empty()) { ErrorReason = "empty topic in GetReadSessionsInfo request"; - return; - } - TopicInfo[d.GetTopic(i)]; - } - } - else - ErrorReason = "Not implemented yet"; - } else if (record.HasPartitionRequest()) { - auto& part = record.GetPartitionRequest(); - if (!part.HasTopic() || !part.HasPartition() || part.GetTopic().empty()) { - ErrorReason = "no Topic or Partition in PartitionRequest"; - return; - } - TopicInfo[part.GetTopic()].PartitionsToRequest.insert(part.GetPartition()); - } else if (record.HasFetchRequest()) { - auto& fetch = record.GetFetchRequest(); - ui64 deadline = TAppData::TimeProvider->Now().MilliSeconds() + Min<ui32>(fetch.GetWaitMs(), 30000); - if (!fetch.HasWaitMs() || !fetch.HasTotalMaxBytes() || !fetch.HasClientId()) { - ErrorReason = "no WaitMs, TotalMaxBytes or ClientId in FetchRequest"; - return; - } - FetchRequestBytesLeft = fetch.GetTotalMaxBytes(); - for (ui32 i = 0; i < fetch.PartitionSize(); ++i) { - auto& part = fetch.GetPartition(i); - if (!part.HasTopic() || part.GetTopic().empty() || !part.HasPartition() || !part.HasOffset() || !part.HasMaxBytes()) { - ErrorReason = "no Topic, Partition, Offset or MaxBytes in FetchRequest::Partition"; - return; - } - bool res = TopicInfo[part.GetTopic()].PartitionsToRequest.insert(part.GetPartition()).second; - if (!res) { - ErrorReason = "same partition specified multiple times"; - return; - } - TAutoPtr<TEvPersQueue::TEvHasDataInfo> fetchInfo(new TEvPersQueue::TEvHasDataInfo()); - fetchInfo->Record.SetPartition(part.GetPartition()); - fetchInfo->Record.SetOffset(part.GetOffset()); - fetchInfo->Record.SetDeadline(deadline); - fetchInfo->Record.SetClientId(fetch.GetClientId()); - TopicInfo[part.GetTopic()].FetchInfo[part.GetPartition()] = fetchInfo; - } - } else { - ErrorReason = "empty request"; - } - } - - - //returns true if answered - void AnswerGetPartitionLocations(const TActorContext& ctx) - { - auto& meta = RequestProto.GetMetaRequest(); - + return; + } + TopicInfo[d.GetTopic(i)]; + } + } + else + ErrorReason = "Not implemented yet"; + } else if (record.HasPartitionRequest()) { + auto& part = record.GetPartitionRequest(); + if (!part.HasTopic() || !part.HasPartition() || part.GetTopic().empty()) { + ErrorReason = "no Topic or Partition in PartitionRequest"; + return; + } + TopicInfo[part.GetTopic()].PartitionsToRequest.insert(part.GetPartition()); + } else if (record.HasFetchRequest()) { + auto& fetch = record.GetFetchRequest(); + ui64 deadline = TAppData::TimeProvider->Now().MilliSeconds() + Min<ui32>(fetch.GetWaitMs(), 30000); + if (!fetch.HasWaitMs() || !fetch.HasTotalMaxBytes() || !fetch.HasClientId()) { + ErrorReason = "no WaitMs, TotalMaxBytes or ClientId in FetchRequest"; + return; + } + FetchRequestBytesLeft = fetch.GetTotalMaxBytes(); + for (ui32 i = 0; i < fetch.PartitionSize(); ++i) { + auto& part = fetch.GetPartition(i); + if (!part.HasTopic() || part.GetTopic().empty() || !part.HasPartition() || !part.HasOffset() || !part.HasMaxBytes()) { + ErrorReason = "no Topic, Partition, Offset or MaxBytes in FetchRequest::Partition"; + return; + } + bool res = TopicInfo[part.GetTopic()].PartitionsToRequest.insert(part.GetPartition()).second; + if (!res) { + ErrorReason = "same partition specified multiple times"; + return; + } + TAutoPtr<TEvPersQueue::TEvHasDataInfo> fetchInfo(new TEvPersQueue::TEvHasDataInfo()); + fetchInfo->Record.SetPartition(part.GetPartition()); + fetchInfo->Record.SetOffset(part.GetOffset()); + fetchInfo->Record.SetDeadline(deadline); + fetchInfo->Record.SetClientId(fetch.GetClientId()); + TopicInfo[part.GetTopic()].FetchInfo[part.GetPartition()] = fetchInfo; + } + } else { + ErrorReason = "empty request"; + } + } + + + //returns true if answered + void AnswerGetPartitionLocations(const TActorContext& ctx) + { + auto& meta = RequestProto.GetMetaRequest(); + THashMap<ui32, TString> hostName(NodesInfo->Nodes.size()); for (const auto& n : NodesInfo->Nodes) hostName.insert(std::make_pair(n.NodeId, n.Host)); - - - NKikimrClient::TResponse record; - record.SetStatus(MSTATUS_OK); - record.SetErrorCode(NPersQueue::NErrorCode::OK); - auto res = record.MutableMetaResponse()->MutableCmdGetPartitionLocationsResult(); + + + NKikimrClient::TResponse record; + record.SetStatus(MSTATUS_OK); + record.SetErrorCode(NPersQueue::NErrorCode::OK); + auto res = record.MutableMetaResponse()->MutableCmdGetPartitionLocationsResult(); for (const auto& p : TopicInfo) { - auto topicResult = res->AddTopicResult(); - topicResult->SetTopic(p.first); - if (p.second.PartitionToTablet.empty()) { - topicResult->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - } else { - topicResult->SetErrorCode(NPersQueue::NErrorCode::OK); - } + auto topicResult = res->AddTopicResult(); + topicResult->SetTopic(p.first); + if (p.second.PartitionToTablet.empty()) { + topicResult->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + } else { + topicResult->SetErrorCode(NPersQueue::NErrorCode::OK); + } for (const auto& pp : p.second.PartitionToTablet) { - auto it = TabletInfo.find(pp.second); - Y_VERIFY(it != TabletInfo.end()); - auto ht = hostName.find(it->second.NodeId); - if (ht != hostName.end()) { - if (meta.GetCmdGetPartitionLocations().HasHost() && meta.GetCmdGetPartitionLocations().GetHost() != ht->second) { - continue; - } - auto partResult = topicResult->AddPartitionLocation(); - partResult->SetPartition(pp.first); - - partResult->SetHostId(it->second.NodeId); - partResult->SetHost(ht->second); - partResult->SetErrorCode(NPersQueue::NErrorCode::OK); - } else { - auto partResult = topicResult->AddPartitionLocation(); - partResult->SetPartition(pp.first); - - partResult->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - partResult->SetErrorReason("Tablet for that partition is no running"); - } - } - } + auto it = TabletInfo.find(pp.second); + Y_VERIFY(it != TabletInfo.end()); + auto ht = hostName.find(it->second.NodeId); + if (ht != hostName.end()) { + if (meta.GetCmdGetPartitionLocations().HasHost() && meta.GetCmdGetPartitionLocations().GetHost() != ht->second) { + continue; + } + auto partResult = topicResult->AddPartitionLocation(); + partResult->SetPartition(pp.first); + + partResult->SetHostId(it->second.NodeId); + partResult->SetHost(ht->second); + partResult->SetErrorCode(NPersQueue::NErrorCode::OK); + } else { + auto partResult = topicResult->AddPartitionLocation(); + partResult->SetPartition(pp.first); + + partResult->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + partResult->SetErrorReason("Tablet for that partition is no running"); + } + } + } SendReplyAndDie(std::move(record), ctx); - } - - void AnswerGetTopicMetadata(const TActorContext& ctx) - { - NKikimrClient::TResponse record; - record.SetStatus(MSTATUS_OK); - record.SetErrorCode(NPersQueue::NErrorCode::OK); - auto res = record.MutableMetaResponse()->MutableCmdGetTopicMetadataResult(); - for (const auto& ti : TopicInfo) { + } + + void AnswerGetTopicMetadata(const TActorContext& ctx) + { + NKikimrClient::TResponse record; + record.SetStatus(MSTATUS_OK); + record.SetErrorCode(NPersQueue::NErrorCode::OK); + auto res = record.MutableMetaResponse()->MutableCmdGetTopicMetadataResult(); + for (const auto& ti : TopicInfo) { auto topicInfo = res->AddTopicInfo(); - topicInfo->SetTopic(ti.first); - topicInfo->MutableConfig()->CopyFrom(ti.second.Config); - topicInfo->SetNumPartitions(ti.second.NumParts); - if (ti.second.NumParts == 0) { - topicInfo->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - } else { - topicInfo->SetErrorCode(NPersQueue::NErrorCode::OK); - } - - } + topicInfo->SetTopic(ti.first); + topicInfo->MutableConfig()->CopyFrom(ti.second.Config); + topicInfo->SetNumPartitions(ti.second.NumParts); + if (ti.second.NumParts == 0) { + topicInfo->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + } else { + topicInfo->SetErrorCode(NPersQueue::NErrorCode::OK); + } + + } SendReplyAndDie(std::move(record), ctx); - } - - void AnswerGetPartitionOffsets(const TActorContext& ctx) - { - NKikimrClient::TResponse record; - record.SetStatus(MSTATUS_OK); - record.SetErrorCode(NPersQueue::NErrorCode::OK); - auto offs = record.MutableMetaResponse()->MutableCmdGetPartitionOffsetsResult(); - - for (auto& p : TopicInfo) { - auto topicResult = offs->AddTopicResult(); - topicResult->SetTopic(p.first); - if (p.second.PartitionToTablet.empty()) { - topicResult->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - } else { - topicResult->SetErrorCode(NPersQueue::NErrorCode::OK); - } + } + + void AnswerGetPartitionOffsets(const TActorContext& ctx) + { + NKikimrClient::TResponse record; + record.SetStatus(MSTATUS_OK); + record.SetErrorCode(NPersQueue::NErrorCode::OK); + auto offs = record.MutableMetaResponse()->MutableCmdGetPartitionOffsetsResult(); + + for (auto& p : TopicInfo) { + auto topicResult = offs->AddTopicResult(); + topicResult->SetTopic(p.first); + if (p.second.PartitionToTablet.empty()) { + topicResult->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + } else { + topicResult->SetErrorCode(NPersQueue::NErrorCode::OK); + } for (const auto& tablet: p.second.Tablets) { - auto it = TabletInfo.find(tablet); - Y_VERIFY(it != TabletInfo.end()); - for (const auto& r : it->second.OffsetResponses) { - if (p.second.PartitionToTablet.find(r.GetPartition()) == p.second.PartitionToTablet.end()) - continue; - p.second.PartitionToTablet.erase(r.GetPartition()); - auto res = topicResult->AddPartitionResult(); - res->CopyFrom(r); - } - } + auto it = TabletInfo.find(tablet); + Y_VERIFY(it != TabletInfo.end()); + for (const auto& r : it->second.OffsetResponses) { + if (p.second.PartitionToTablet.find(r.GetPartition()) == p.second.PartitionToTablet.end()) + continue; + p.second.PartitionToTablet.erase(r.GetPartition()); + auto res = topicResult->AddPartitionResult(); + res->CopyFrom(r); + } + } for (const auto& part : p.second.PartitionToTablet) { - auto res = topicResult->AddPartitionResult(); - res->SetPartition(part.first); - res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - res->SetErrorReason("partition is not ready yet"); - } - } + auto res = topicResult->AddPartitionResult(); + res->SetPartition(part.first); + res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + res->SetErrorReason("partition is not ready yet"); + } + } SendReplyAndDie(std::move(record), ctx); - } - - void AnswerGetPartitionStatus(const TActorContext& ctx) - { - NKikimrClient::TResponse record; - record.SetStatus(MSTATUS_OK); - record.SetErrorCode(NPersQueue::NErrorCode::OK); - auto stat = record.MutableMetaResponse()->MutableCmdGetPartitionStatusResult(); - - for (auto& p : TopicInfo) { - auto topicResult = stat->AddTopicResult(); - topicResult->SetTopic(p.first); - if (p.second.PartitionToTablet.empty()) { - topicResult->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - } else { - topicResult->SetErrorCode(NPersQueue::NErrorCode::OK); - } - + } + + void AnswerGetPartitionStatus(const TActorContext& ctx) + { + NKikimrClient::TResponse record; + record.SetStatus(MSTATUS_OK); + record.SetErrorCode(NPersQueue::NErrorCode::OK); + auto stat = record.MutableMetaResponse()->MutableCmdGetPartitionStatusResult(); + + for (auto& p : TopicInfo) { + auto topicResult = stat->AddTopicResult(); + topicResult->SetTopic(p.first); + if (p.second.PartitionToTablet.empty()) { + topicResult->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + } else { + topicResult->SetErrorCode(NPersQueue::NErrorCode::OK); + } + for (const auto& tablet: p.second.Tablets) { - auto it = TabletInfo.find(tablet); - Y_VERIFY(it != TabletInfo.end()); - for (const auto& r : it->second.StatusResponses) { - if (p.second.PartitionToTablet.find(r.GetPartition()) == p.second.PartitionToTablet.end()) - continue; - p.second.PartitionToTablet.erase(r.GetPartition()); - auto res = topicResult->AddPartitionResult(); - res->CopyFrom(r); - } - } + auto it = TabletInfo.find(tablet); + Y_VERIFY(it != TabletInfo.end()); + for (const auto& r : it->second.StatusResponses) { + if (p.second.PartitionToTablet.find(r.GetPartition()) == p.second.PartitionToTablet.end()) + continue; + p.second.PartitionToTablet.erase(r.GetPartition()); + auto res = topicResult->AddPartitionResult(); + res->CopyFrom(r); + } + } for (const auto& part : p.second.PartitionToTablet) { - auto res = topicResult->AddPartitionResult(); - res->SetPartition(part.first); - res->SetStatus(NKikimrPQ::TStatusResponse::STATUS_UNKNOWN); - } - } + auto res = topicResult->AddPartitionResult(); + res->SetPartition(part.first); + res->SetStatus(NKikimrPQ::TStatusResponse::STATUS_UNKNOWN); + } + } SendReplyAndDie(std::move(record), ctx); - } - - void AnswerGetReadSessionsInfo(const TActorContext& ctx) - { - NKikimrClient::TResponse record; - record.SetStatus(MSTATUS_OK); - record.SetErrorCode(NPersQueue::NErrorCode::OK); - auto stat = record.MutableMetaResponse()->MutableCmdGetReadSessionsInfoResult(); - + } + + void AnswerGetReadSessionsInfo(const TActorContext& ctx) + { + NKikimrClient::TResponse record; + record.SetStatus(MSTATUS_OK); + record.SetErrorCode(NPersQueue::NErrorCode::OK); + auto stat = record.MutableMetaResponse()->MutableCmdGetReadSessionsInfoResult(); + THashMap<ui32, TString> hostName(NodesInfo->Nodes.size()); - for (const auto& n : NodesInfo->Nodes) - hostName.insert(std::make_pair(n.NodeId, n.Host)); - - - for (auto& p : TopicInfo) { - auto topicRes = stat->AddTopicResult(); - topicRes->SetTopic(p.first); - if (p.second.Tablets.empty()) { - topicRes->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - } else { - topicRes->SetErrorCode(NPersQueue::NErrorCode::OK); - } - + for (const auto& n : NodesInfo->Nodes) + hostName.insert(std::make_pair(n.NodeId, n.Host)); + + + for (auto& p : TopicInfo) { + auto topicRes = stat->AddTopicResult(); + topicRes->SetTopic(p.first); + if (p.second.Tablets.empty()) { + topicRes->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + } else { + topicRes->SetErrorCode(NPersQueue::NErrorCode::OK); + } + THashMap<ui32, ui32> partitionToResp; - ui32 sz = 0; + ui32 sz = 0; Y_VERIFY(p.second.ReadSessionsInfo); auto* sessionsInfo = p.second.ReadSessionsInfo.Get(); for (ui32 i = 0; i < sessionsInfo->PartitionInfoSize(); ++i) { const auto& resp = sessionsInfo->GetPartitionInfo(i); - partitionToResp[resp.GetPartition()] = sz++; - auto res = topicRes->AddPartitionResult(); - res->SetPartition(resp.GetPartition()); - res->SetSession(resp.GetSession()); - res->SetClientNode(resp.GetClientNode()); - res->SetTimestamp(resp.GetTimestamp() > 0 ? TInstant::Seconds(resp.GetTimestamp()).ToString() : ""); - res->SetProxyNode(resp.GetProxyNodeId() > 0 ? hostName[resp.GetProxyNodeId()] : ""); - } - for (const auto& tablet: p.second.Tablets) { - auto it = TabletInfo.find(tablet); - Y_VERIFY(it != TabletInfo.end()); - for (const auto& r : it->second.OffsetResponses) { - if (p.second.PartitionToTablet.find(r.GetPartition()) == p.second.PartitionToTablet.end()) - continue; - p.second.PartitionToTablet.erase(r.GetPartition()); - ui32 part = r.GetPartition(); - auto jt = partitionToResp.find(part); - auto res = (jt == partitionToResp.end()) ? topicRes->AddPartitionResult() : topicRes->MutablePartitionResult(jt->second); - res->SetPartition(part); - res->SetClientOffset(r.HasClientOffset() ? r.GetClientOffset() : 0); - res->SetStartOffset(r.GetStartOffset()); - res->SetEndOffset(r.GetEndOffset()); - res->SetTimeLag(r.HasWriteTimestampMS() ? Max<i64>(TAppData::TimeProvider->Now().MilliSeconds() - r.GetWriteTimestampMS(), 0) : 0); - res->SetReadTimeLag(r.HasReadWriteTimestampMS() ? Max<i64>(TAppData::TimeProvider->Now().MilliSeconds() - r.GetReadWriteTimestampMS(), 0) : 0); - res->SetClientReadOffset(r.HasClientReadOffset() ? r.GetClientReadOffset() : 0); - res->SetTabletNode(it->second.NodeId > 0 ? hostName[it->second.NodeId] : ""); - } - } - } - + partitionToResp[resp.GetPartition()] = sz++; + auto res = topicRes->AddPartitionResult(); + res->SetPartition(resp.GetPartition()); + res->SetSession(resp.GetSession()); + res->SetClientNode(resp.GetClientNode()); + res->SetTimestamp(resp.GetTimestamp() > 0 ? TInstant::Seconds(resp.GetTimestamp()).ToString() : ""); + res->SetProxyNode(resp.GetProxyNodeId() > 0 ? hostName[resp.GetProxyNodeId()] : ""); + } + for (const auto& tablet: p.second.Tablets) { + auto it = TabletInfo.find(tablet); + Y_VERIFY(it != TabletInfo.end()); + for (const auto& r : it->second.OffsetResponses) { + if (p.second.PartitionToTablet.find(r.GetPartition()) == p.second.PartitionToTablet.end()) + continue; + p.second.PartitionToTablet.erase(r.GetPartition()); + ui32 part = r.GetPartition(); + auto jt = partitionToResp.find(part); + auto res = (jt == partitionToResp.end()) ? topicRes->AddPartitionResult() : topicRes->MutablePartitionResult(jt->second); + res->SetPartition(part); + res->SetClientOffset(r.HasClientOffset() ? r.GetClientOffset() : 0); + res->SetStartOffset(r.GetStartOffset()); + res->SetEndOffset(r.GetEndOffset()); + res->SetTimeLag(r.HasWriteTimestampMS() ? Max<i64>(TAppData::TimeProvider->Now().MilliSeconds() - r.GetWriteTimestampMS(), 0) : 0); + res->SetReadTimeLag(r.HasReadWriteTimestampMS() ? Max<i64>(TAppData::TimeProvider->Now().MilliSeconds() - r.GetReadWriteTimestampMS(), 0) : 0); + res->SetClientReadOffset(r.HasClientReadOffset() ? r.GetClientReadOffset() : 0); + res->SetTabletNode(it->second.NodeId > 0 ? hostName[it->second.NodeId] : ""); + } + } + } + SendReplyAndDie(std::move(record), ctx); - } - - - bool AnswerIfCanForMeta(const TActorContext& ctx) { + } + + + bool AnswerIfCanForMeta(const TActorContext& ctx) { Y_VERIFY(IsMetaRequest); - Y_VERIFY(RequestProto.HasMetaRequest()); + Y_VERIFY(RequestProto.HasMetaRequest()); if (AclRequests) return false; - if (DescribeRequests) - return false; + if (DescribeRequests) + return false; const auto& meta = RequestProto.GetMetaRequest(); - if (meta.HasCmdGetPartitionLocations()) { + if (meta.HasCmdGetPartitionLocations()) { if (TopicsAnswered != TopicInfo.size() || TabletInfo.size() != TabletsDiscovered.size() || !NodesInfo) - return false; - AnswerGetPartitionLocations(ctx); - return true; - } else if (meta.HasCmdGetTopicMetadata()) { - if (TopicsAnswered != TopicInfo.size()) - return false; - AnswerGetTopicMetadata(ctx); - return true; - } else if (meta.HasCmdGetPartitionOffsets()) { - if (TopicsAnswered != TopicInfo.size() || TabletsAnswered.size() < PartTabletsRequested) - return false; - Y_VERIFY(PartTabletsRequested == TabletInfo.size()); - AnswerGetPartitionOffsets(ctx); - return true; - } else if (meta.HasCmdGetPartitionStatus()) { - if (TopicsAnswered != TopicInfo.size() || TabletsAnswered.size() < PartTabletsRequested) //not all responses got - return false; - Y_VERIFY(PartTabletsRequested == TabletInfo.size()); //there could be balancers and partTablets in TabletInfo only - AnswerGetPartitionStatus(ctx); - return true; - } else if (meta.HasCmdGetReadSessionsInfo()) { - if (TopicsAnswered != TopicInfo.size() || TabletsAnswered.size() < TabletInfo.size() || !NodesInfo) //not all responses got; waiting respose from all balancers and partitions - return false; - Y_VERIFY(PartTabletsRequested + TopicInfo.size() >= TabletInfo.size()); //there could be balancers and partTablets in TabletInfo only - AnswerGetReadSessionsInfo(ctx); - return true; - - } - Y_FAIL("UNKNOWN request"); - } - - void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { - if (IsFetchRequest) { - ProcessFetchRequestResult(ev, ctx); - return; - } + return false; + AnswerGetPartitionLocations(ctx); + return true; + } else if (meta.HasCmdGetTopicMetadata()) { + if (TopicsAnswered != TopicInfo.size()) + return false; + AnswerGetTopicMetadata(ctx); + return true; + } else if (meta.HasCmdGetPartitionOffsets()) { + if (TopicsAnswered != TopicInfo.size() || TabletsAnswered.size() < PartTabletsRequested) + return false; + Y_VERIFY(PartTabletsRequested == TabletInfo.size()); + AnswerGetPartitionOffsets(ctx); + return true; + } else if (meta.HasCmdGetPartitionStatus()) { + if (TopicsAnswered != TopicInfo.size() || TabletsAnswered.size() < PartTabletsRequested) //not all responses got + return false; + Y_VERIFY(PartTabletsRequested == TabletInfo.size()); //there could be balancers and partTablets in TabletInfo only + AnswerGetPartitionStatus(ctx); + return true; + } else if (meta.HasCmdGetReadSessionsInfo()) { + if (TopicsAnswered != TopicInfo.size() || TabletsAnswered.size() < TabletInfo.size() || !NodesInfo) //not all responses got; waiting respose from all balancers and partitions + return false; + Y_VERIFY(PartTabletsRequested + TopicInfo.size() >= TabletInfo.size()); //there could be balancers and partTablets in TabletInfo only + AnswerGetReadSessionsInfo(ctx); + return true; + + } + Y_FAIL("UNKNOWN request"); + } + + void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { + if (IsFetchRequest) { + ProcessFetchRequestResult(ev, ctx); + return; + } SendReplyAndDie(std::move(ev->Get()->Record), ctx); - } - - void Handle(TEvPersQueue::TEvOffsetsResponse::TPtr& ev, const TActorContext& ctx) { - const auto& response = ev->Get()->Record; - Y_VERIFY(response.HasTabletId()); - auto it = TabletInfo.find(response.GetTabletId()); - Y_VERIFY(it != TabletInfo.end()); - for (ui32 i = 0; i < response.PartResultSize(); ++i) { - it->second.OffsetResponses.push_back(response.GetPartResult(i)); - } - TabletsAnswered.insert(it->first); - AnswerIfCanForMeta(ctx); - } - - void Handle(TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) { - const auto& response = ev->Get()->Record; - Y_VERIFY(response.HasTabletId()); - auto it = TabletInfo.find(response.GetTabletId()); - Y_VERIFY(it != TabletInfo.end()); - TabletsAnswered.insert(it->first); - - auto jt = TopicInfo.find(it->second.Topic); - Y_VERIFY(jt != TopicInfo.end()); + } + + void Handle(TEvPersQueue::TEvOffsetsResponse::TPtr& ev, const TActorContext& ctx) { + const auto& response = ev->Get()->Record; + Y_VERIFY(response.HasTabletId()); + auto it = TabletInfo.find(response.GetTabletId()); + Y_VERIFY(it != TabletInfo.end()); + for (ui32 i = 0; i < response.PartResultSize(); ++i) { + it->second.OffsetResponses.push_back(response.GetPartResult(i)); + } + TabletsAnswered.insert(it->first); + AnswerIfCanForMeta(ctx); + } + + void Handle(TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) { + const auto& response = ev->Get()->Record; + Y_VERIFY(response.HasTabletId()); + auto it = TabletInfo.find(response.GetTabletId()); + Y_VERIFY(it != TabletInfo.end()); + TabletsAnswered.insert(it->first); + + auto jt = TopicInfo.find(it->second.Topic); + Y_VERIFY(jt != TopicInfo.end()); jt->second.ReadSessionsInfo = MakeHolder<NKikimrPQ::TReadSessionsInfoResponse>(std::move(response)); - - AnswerIfCanForMeta(ctx); - } - - - - void Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) { - const auto& response = ev->Get()->Record; - Y_VERIFY(response.HasTabletId()); - auto it = TabletInfo.find(response.GetTabletId()); - Y_VERIFY(it != TabletInfo.end()); - for (ui32 i = 0; i < response.PartResultSize(); ++i) { - it->second.StatusResponses.push_back(response.GetPartResult(i)); - } - TabletsAnswered.insert(it->first); - - AnswerIfCanForMeta(ctx); - } - - void Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr&, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "got HasDatainfoResponse"); - ProceedFetchRequest(ctx); - } - - + + AnswerIfCanForMeta(ctx); + } + + + + void Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) { + const auto& response = ev->Get()->Record; + Y_VERIFY(response.HasTabletId()); + auto it = TabletInfo.find(response.GetTabletId()); + Y_VERIFY(it != TabletInfo.end()); + for (ui32 i = 0; i < response.PartResultSize(); ++i) { + it->second.StatusResponses.push_back(response.GetPartResult(i)); + } + TabletsAnswered.insert(it->first); + + AnswerIfCanForMeta(ctx); + } + + void Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr&, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "got HasDatainfoResponse"); + ProceedFetchRequest(ctx); + } + + void Handle(TEvAllTopicsDescribeResponse::TPtr& ev, const TActorContext& ctx) { - --DescribeRequests; + --DescribeRequests; auto* res = ev->Get()->Result.Get(); auto processResult = ProcessMetaCacheAllTopicsResponse(ev); if (processResult.IsFatal) { ErrorReason = processResult.Reason; return SendReplyAndDie(CreateErrorReply(processResult.Status, processResult.ErrorCode, ctx), ctx); - } - - NoTopicsAtStart = TopicInfo.empty(); - bool hasTopics = !NoTopicsAtStart; - + } + + NoTopicsAtStart = TopicInfo.empty(); + bool hasTopics = !NoTopicsAtStart; + for (const auto& entry : res->ResultSet) { if (entry.Kind == TSchemeCacheNavigate::EKind::KindTopic && entry.PQGroupInfo) { auto& description = entry.PQGroupInfo->Description; @@ -891,262 +891,262 @@ public: auto& topicInfo = TopicInfo[description.GetName()]; topicInfo.BalancerTabletId = description.GetBalancerTabletID(); topicInfo.PQInfo = entry.PQGroupInfo; - } - } - } - + } + } + } + for (auto& p: TopicInfo) { - const TString& topic = p.first; - - if (!p.second.BalancerTabletId) { + const TString& topic = p.first; + + if (!p.second.BalancerTabletId) { ErrorReason = Sprintf("topic '%s' is not created, Marker# PQ94", topic.c_str()); - return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); - } + return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); + } ProcessMetadata(p.first, p.second, ctx); - } - - if (RequestProto.HasMetaRequest()) { - AnswerIfCanForMeta(ctx); //if no topics at all - } - } - + } + + if (RequestProto.HasMetaRequest()) { + AnswerIfCanForMeta(ctx); //if no topics at all + } + } + void ProcessMetadata(const TString& name, TTopicInfo& info, const TActorContext& ctx) { //const TString& name = info.PQInfo->Description.GetName(); if (!info.PQInfo) { //not supposed to happen anymore - if (RequestProto.HasMetaRequest() && NoTopicsAtStart && !RequestProto.GetMetaRequest().HasCmdGetTopicMetadata()) { - ++TopicsAnswered; - AnswerIfCanForMeta(ctx); - } else { + if (RequestProto.HasMetaRequest() && NoTopicsAtStart && !RequestProto.GetMetaRequest().HasCmdGetTopicMetadata()) { + ++TopicsAnswered; + AnswerIfCanForMeta(ctx); + } else { ErrorReason = Sprintf("topic '%s' is not ready, Marker# PQ85", name.c_str()); SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); - } - return; - } - + } + return; + } + const auto& pqDescr = info.PQInfo->Description; const TString& topic = pqDescr.GetName(); Y_VERIFY(topic == name, "topic '%s' path '%s'", topic.c_str(), name.c_str()); - - bool mirrorerRequest = false; - if (RequestProto.HasPartitionRequest()) { - for (const auto& req : RequestProto.GetPartitionRequest().GetCmdWrite()) { - if (req.HasDisableDeduplication() && req.GetDisableDeduplication()) mirrorerRequest = true; - } - if (RequestProto.GetPartitionRequest().HasCmdRead()) { - mirrorerRequest = RequestProto.GetPartitionRequest().GetCmdRead().GetMirrorerRequest(); - } - if (RequestProto.GetPartitionRequest().HasCmdGetMaxSeqNo()) { - for (const auto& sid : RequestProto.GetPartitionRequest().GetCmdGetMaxSeqNo().GetSourceId()) { - mirrorerRequest = sid.substr(1).StartsWith("frontend-status"); - } - } - if (RequestProto.GetPartitionRequest().HasCmdSetClientOffset()) { - mirrorerRequest = RequestProto.GetPartitionRequest().GetCmdSetClientOffset().GetMirrorerRequest(); - } - } else if (RequestProto.HasFetchRequest()){ - mirrorerRequest = RequestProto.GetFetchRequest().GetMirrorerRequest(); - } - - TMaybe<NKikimrPQ::EOperation> operation = TMaybe<NKikimrPQ::EOperation>(); - if (RequestProto.HasFetchRequest()) { - operation = NKikimrPQ::EOperation::READ_OP; - } else if (RequestProto.HasPartitionRequest()) { - auto& partitionRequest = RequestProto.GetPartitionRequest(); - if (partitionRequest.CmdWriteSize() > 0 || partitionRequest.HasCmdGetMaxSeqNo()) { - operation = NKikimrPQ::EOperation::WRITE_OP; - } else if (partitionRequest.HasCmdRead() || partitionRequest.HasCmdSetClientOffset()) { - operation = NKikimrPQ::EOperation::READ_OP; - } - } - - if (AppData(ctx)->PQConfig.GetCheckACL() && operation && !mirrorerRequest) { + + bool mirrorerRequest = false; + if (RequestProto.HasPartitionRequest()) { + for (const auto& req : RequestProto.GetPartitionRequest().GetCmdWrite()) { + if (req.HasDisableDeduplication() && req.GetDisableDeduplication()) mirrorerRequest = true; + } + if (RequestProto.GetPartitionRequest().HasCmdRead()) { + mirrorerRequest = RequestProto.GetPartitionRequest().GetCmdRead().GetMirrorerRequest(); + } + if (RequestProto.GetPartitionRequest().HasCmdGetMaxSeqNo()) { + for (const auto& sid : RequestProto.GetPartitionRequest().GetCmdGetMaxSeqNo().GetSourceId()) { + mirrorerRequest = sid.substr(1).StartsWith("frontend-status"); + } + } + if (RequestProto.GetPartitionRequest().HasCmdSetClientOffset()) { + mirrorerRequest = RequestProto.GetPartitionRequest().GetCmdSetClientOffset().GetMirrorerRequest(); + } + } else if (RequestProto.HasFetchRequest()){ + mirrorerRequest = RequestProto.GetFetchRequest().GetMirrorerRequest(); + } + + TMaybe<NKikimrPQ::EOperation> operation = TMaybe<NKikimrPQ::EOperation>(); + if (RequestProto.HasFetchRequest()) { + operation = NKikimrPQ::EOperation::READ_OP; + } else if (RequestProto.HasPartitionRequest()) { + auto& partitionRequest = RequestProto.GetPartitionRequest(); + if (partitionRequest.CmdWriteSize() > 0 || partitionRequest.HasCmdGetMaxSeqNo()) { + operation = NKikimrPQ::EOperation::WRITE_OP; + } else if (partitionRequest.HasCmdRead() || partitionRequest.HasCmdSetClientOffset()) { + operation = NKikimrPQ::EOperation::READ_OP; + } + } + + if (AppData(ctx)->PQConfig.GetCheckACL() && operation && !mirrorerRequest) { if (*operation == NKikimrPQ::EOperation::WRITE_OP && pqDescr.GetPQTabletConfig().GetRequireAuthWrite() || *operation == NKikimrPQ::EOperation::READ_OP && pqDescr.GetPQTabletConfig().GetRequireAuthRead()) { - ErrorReason = Sprintf("unauthenticated access to '%s' is denied, Marker# PQ419", topic.c_str()); + ErrorReason = Sprintf("unauthenticated access to '%s' is denied, Marker# PQ419", topic.c_str()); return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::ACCESS_DENIED, ctx), ctx); - } - } - - if (RequestProto.HasPartitionRequest()) { - ui64 tabletId = 0; - auto it = TopicInfo.find(topic); - Y_VERIFY(it != TopicInfo.end()); - Y_VERIFY(it->second.PartitionsToRequest.size() == 1); - ui32 partition = *(it->second.PartitionsToRequest.begin()); + } + } + + if (RequestProto.HasPartitionRequest()) { + ui64 tabletId = 0; + auto it = TopicInfo.find(topic); + Y_VERIFY(it != TopicInfo.end()); + Y_VERIFY(it->second.PartitionsToRequest.size() == 1); + ui32 partition = *(it->second.PartitionsToRequest.begin()); for (ui32 i = 0; i < pqDescr.PartitionsSize(); ++i) { const auto& pi = pqDescr.GetPartitions(i); if (pi.GetPartitionId() == partition) { - tabletId = pi.GetTabletId(); - break; - } - } - - if (!tabletId) { - ErrorReason = Sprintf("no partition %u in topic '%s', Marker# PQ4", partition, topic.c_str()); - return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); - } - - NTabletPipe::TClientConfig clientConfig; + tabletId = pi.GetTabletId(); + break; + } + } + + if (!tabletId) { + ErrorReason = Sprintf("no partition %u in topic '%s', Marker# PQ4", partition, topic.c_str()); + return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); + } + + NTabletPipe::TClientConfig clientConfig; PQClient.push_back(ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, tabletId, clientConfig))); ActorIdToProto(PQClient.back(), RequestProto.MutablePartitionRequest()->MutablePipeClient()); - - TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); - req->Record.Swap(&RequestProto); - NTabletPipe::SendData(ctx, PQClient.back(), req.Release()); - return; - } - - - if (RequestProto.HasMetaRequest() || RequestProto.HasFetchRequest()) { //answer or request locations - - bool needResolving = RequestProto.HasMetaRequest() && RequestProto.GetMetaRequest().HasCmdGetPartitionLocations(); - bool needAskOffset = RequestProto.HasMetaRequest() && (RequestProto.GetMetaRequest().HasCmdGetPartitionOffsets() - || RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo()); - bool needAskStatus = RequestProto.HasMetaRequest() && RequestProto.GetMetaRequest().HasCmdGetPartitionStatus(); - bool needAskFetch = RequestProto.HasFetchRequest(); - bool metadataOnly = RequestProto.HasMetaRequest() && RequestProto.GetMetaRequest().HasCmdGetTopicMetadata(); - bool needAskBalancer = RequestProto.HasMetaRequest() && RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo(); - - Y_VERIFY((needResolving + needAskOffset + needAskStatus + needAskFetch + metadataOnly) == 1); - ++TopicsAnswered; - auto it = TopicInfo.find(topic); + + TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); + req->Record.Swap(&RequestProto); + NTabletPipe::SendData(ctx, PQClient.back(), req.Release()); + return; + } + + + if (RequestProto.HasMetaRequest() || RequestProto.HasFetchRequest()) { //answer or request locations + + bool needResolving = RequestProto.HasMetaRequest() && RequestProto.GetMetaRequest().HasCmdGetPartitionLocations(); + bool needAskOffset = RequestProto.HasMetaRequest() && (RequestProto.GetMetaRequest().HasCmdGetPartitionOffsets() + || RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo()); + bool needAskStatus = RequestProto.HasMetaRequest() && RequestProto.GetMetaRequest().HasCmdGetPartitionStatus(); + bool needAskFetch = RequestProto.HasFetchRequest(); + bool metadataOnly = RequestProto.HasMetaRequest() && RequestProto.GetMetaRequest().HasCmdGetTopicMetadata(); + bool needAskBalancer = RequestProto.HasMetaRequest() && RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo(); + + Y_VERIFY((needResolving + needAskOffset + needAskStatus + needAskFetch + metadataOnly) == 1); + ++TopicsAnswered; + auto it = TopicInfo.find(topic); Y_VERIFY(it != TopicInfo.end(), "topic '%s'", topic.c_str()); it->second.Config = pqDescr.GetPQTabletConfig(); it->second.Config.SetVersion(pqDescr.GetAlterVersion()); it->second.NumParts = pqDescr.PartitionsSize(); - if (metadataOnly) { - AnswerIfCanForMeta(ctx); - return; - } - Y_VERIFY(it->second.BalancerTabletId); - - if (needAskBalancer) { - - if (!RequestProto.GetMetaRequest().GetCmdGetReadSessionsInfo().HasClientId()) { - ErrorReason = "No clientId specified in CmdGetReadSessionsInfo"; - return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); - } - - auto& tabletInfo = TabletInfo[it->second.BalancerTabletId]; - tabletInfo.IsBalancer = true; - tabletInfo.Topic = topic; - - NTabletPipe::TClientConfig clientConfig; + if (metadataOnly) { + AnswerIfCanForMeta(ctx); + return; + } + Y_VERIFY(it->second.BalancerTabletId); + + if (needAskBalancer) { + + if (!RequestProto.GetMetaRequest().GetCmdGetReadSessionsInfo().HasClientId()) { + ErrorReason = "No clientId specified in CmdGetReadSessionsInfo"; + return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); + } + + auto& tabletInfo = TabletInfo[it->second.BalancerTabletId]; + tabletInfo.IsBalancer = true; + tabletInfo.Topic = topic; + + NTabletPipe::TClientConfig clientConfig; TActorId pipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, it->second.BalancerTabletId, clientConfig)); - tabletInfo.PipeClient = pipeClient; - PQClient.push_back(pipeClient); - - THolder<TEvPersQueue::TEvGetReadSessionsInfo> ev(new TEvPersQueue::TEvGetReadSessionsInfo()); - ev->Record.SetClientId(RequestProto.GetMetaRequest().GetCmdGetReadSessionsInfo().GetClientId()); - NTabletPipe::SendData(ctx, pipeClient, ev.Release()); - } - + tabletInfo.PipeClient = pipeClient; + PQClient.push_back(pipeClient); + + THolder<TEvPersQueue::TEvGetReadSessionsInfo> ev(new TEvPersQueue::TEvGetReadSessionsInfo()); + ev->Record.SetClientId(RequestProto.GetMetaRequest().GetCmdGetReadSessionsInfo().GetClientId()); + NTabletPipe::SendData(ctx, pipeClient, ev.Release()); + } + for (ui32 i = 0; i < pqDescr.PartitionsSize(); ++i) { ui32 part = pqDescr.GetPartitions(i).GetPartitionId(); ui64 tabletId = pqDescr.GetPartitions(i).GetTabletId(); if (!it->second.PartitionsToRequest.empty() && !it->second.PartitionsToRequest.contains(part)) { - continue; - } - bool res = it->second.PartitionToTablet.insert({part, tabletId}).second; - Y_VERIFY(res); - if (TabletInfo.find(tabletId) == TabletInfo.end()) { + continue; + } + bool res = it->second.PartitionToTablet.insert({part, tabletId}).second; + Y_VERIFY(res); + if (TabletInfo.find(tabletId) == TabletInfo.end()) { auto& tabletInfo = TabletInfo[tabletId]; tabletInfo.Topic = topic; - it->second.Tablets.push_back(tabletId); - // Tablet node resolution relies on opening a pipe - - NTabletPipe::TClientConfig clientConfig; + it->second.Tablets.push_back(tabletId); + // Tablet node resolution relies on opening a pipe + + NTabletPipe::TClientConfig clientConfig; TActorId pipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, tabletId, clientConfig)); - tabletInfo.PipeClient = pipeClient; - PQClient.push_back(pipeClient); - if (needAskOffset) { - THolder<TEvPersQueue::TEvOffsets> ev(new TEvPersQueue::TEvOffsets()); - TString clientId; - if (RequestProto.GetMetaRequest().HasCmdGetPartitionOffsets() - && RequestProto.GetMetaRequest().GetCmdGetPartitionOffsets().HasClientId()) - clientId = RequestProto.GetMetaRequest().GetCmdGetPartitionOffsets().GetClientId(); - if (RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo()) - clientId = RequestProto.GetMetaRequest().GetCmdGetReadSessionsInfo().GetClientId(); - if (!clientId.empty()) - ev->Record.SetClientId(clientId); - NTabletPipe::SendData(ctx, pipeClient, ev.Release()); - } else if (needAskStatus) { - TAutoPtr<TEvPersQueue::TEvStatus> ev = new TEvPersQueue::TEvStatus(); - if (RequestProto.GetMetaRequest().GetCmdGetPartitionStatus().HasClientId()) - ev->Record.SetClientId(RequestProto.GetMetaRequest().GetCmdGetPartitionStatus().GetClientId()); - NTabletPipe::SendData(ctx, pipeClient, ev.Release()); - } - ++PartTabletsRequested; - } - if (needAskFetch) { - if (CanProcessFetchRequest) { - ProceedFetchRequest(ctx); - } else { + tabletInfo.PipeClient = pipeClient; + PQClient.push_back(pipeClient); + if (needAskOffset) { + THolder<TEvPersQueue::TEvOffsets> ev(new TEvPersQueue::TEvOffsets()); + TString clientId; + if (RequestProto.GetMetaRequest().HasCmdGetPartitionOffsets() + && RequestProto.GetMetaRequest().GetCmdGetPartitionOffsets().HasClientId()) + clientId = RequestProto.GetMetaRequest().GetCmdGetPartitionOffsets().GetClientId(); + if (RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo()) + clientId = RequestProto.GetMetaRequest().GetCmdGetReadSessionsInfo().GetClientId(); + if (!clientId.empty()) + ev->Record.SetClientId(clientId); + NTabletPipe::SendData(ctx, pipeClient, ev.Release()); + } else if (needAskStatus) { + TAutoPtr<TEvPersQueue::TEvStatus> ev = new TEvPersQueue::TEvStatus(); + if (RequestProto.GetMetaRequest().GetCmdGetPartitionStatus().HasClientId()) + ev->Record.SetClientId(RequestProto.GetMetaRequest().GetCmdGetPartitionStatus().GetClientId()); + NTabletPipe::SendData(ctx, pipeClient, ev.Release()); + } + ++PartTabletsRequested; + } + if (needAskFetch) { + if (CanProcessFetchRequest) { + ProceedFetchRequest(ctx); + } else { const auto& tabletInfo = TabletInfo[tabletId]; - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "sending HasDataInfoResponse " << it->second.FetchInfo[part]->Record); - - NTabletPipe::SendData(ctx, tabletInfo.PipeClient, it->second.FetchInfo[part].Release()); - ++PartTabletsRequested; - } - } - } - if (!it->second.PartitionsToRequest.empty() && it->second.PartitionsToRequest.size() != it->second.PartitionToTablet.size()) { - ErrorReason = Sprintf("no one of requested partitions in topic '%s', Marker# PQ12", topic.c_str()); - return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); - } - - Y_VERIFY(!TabletInfo.empty()); // if TabletInfo is empty - topic is empty - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev, const TActorContext& ctx) { - NodesInfo = ev->Release(); - AnswerIfCanForMeta(ctx); - } - - bool HandlePipeError(const ui64 tabletId, const TActorContext& ctx) - { - if (IsMetaRequest) { - auto it = TabletInfo.find(tabletId); - if (it != TabletInfo.end()) { - TabletsAnswered.insert(tabletId); - if (RequestProto.HasMetaRequest() && (RequestProto.GetMetaRequest().HasCmdGetPartitionLocations() || RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo())) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "sending HasDataInfoResponse " << it->second.FetchInfo[part]->Record); + + NTabletPipe::SendData(ctx, tabletInfo.PipeClient, it->second.FetchInfo[part].Release()); + ++PartTabletsRequested; + } + } + } + if (!it->second.PartitionsToRequest.empty() && it->second.PartitionsToRequest.size() != it->second.PartitionToTablet.size()) { + ErrorReason = Sprintf("no one of requested partitions in topic '%s', Marker# PQ12", topic.c_str()); + return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx), ctx); + } + + Y_VERIFY(!TabletInfo.empty()); // if TabletInfo is empty - topic is empty + } + } + + void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev, const TActorContext& ctx) { + NodesInfo = ev->Release(); + AnswerIfCanForMeta(ctx); + } + + bool HandlePipeError(const ui64 tabletId, const TActorContext& ctx) + { + if (IsMetaRequest) { + auto it = TabletInfo.find(tabletId); + if (it != TabletInfo.end()) { + TabletsAnswered.insert(tabletId); + if (RequestProto.HasMetaRequest() && (RequestProto.GetMetaRequest().HasCmdGetPartitionLocations() || RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo())) { TabletsDiscovered.insert(tabletId); // Disconnect event can arrive after connect event and this hash set will take it into account. - } - AnswerIfCanForMeta(ctx); - return true; - } - } - if (IsFetchRequest) { - auto it = TabletInfo.find(tabletId); - if (it != TabletInfo.end()) { - it->second.BrokenPipe = true; - if (FetchRequestCurrentReadTablet == tabletId) { - //fail current read - ctx.Send(ctx.SelfID, FormEmptyCurrentRead(CurrentCookie).Release()); - } - return true; - } - } - return false; - } - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { - TEvTabletPipe::TEvClientConnected *msg = ev->Get(); + } + AnswerIfCanForMeta(ctx); + return true; + } + } + if (IsFetchRequest) { + auto it = TabletInfo.find(tabletId); + if (it != TabletInfo.end()) { + it->second.BrokenPipe = true; + if (FetchRequestCurrentReadTablet == tabletId) { + //fail current read + ctx.Send(ctx.SelfID, FormEmptyCurrentRead(CurrentCookie).Release()); + } + return true; + } + } + return false; + } + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + TEvTabletPipe::TEvClientConnected *msg = ev->Get(); const ui64 tabletId = ev->Get()->TabletId; - if (msg->Status != NKikimrProto::OK) { + if (msg->Status != NKikimrProto::OK) { - if (HandlePipeError(tabletId, ctx)) - return; + if (HandlePipeError(tabletId, ctx)) + return; ErrorReason = Sprintf("Client pipe to %" PRIu64 " connection error, Status# %s, Marker# PQ6", tabletId, NKikimrProto::EReplyStatus_Name(msg->Status).data()); - return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::ERROR, ctx), ctx); - } + return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::ERROR, ctx), ctx); + } // Update node resolution info for GetPartitionLocations request - if (RequestProto.HasMetaRequest() && (RequestProto.GetMetaRequest().HasCmdGetPartitionLocations() - || RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo())) { + if (RequestProto.HasMetaRequest() && (RequestProto.GetMetaRequest().HasCmdGetPartitionLocations() + || RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo())) { auto it = TabletInfo.find(ev->Get()->TabletId); if (it != TabletInfo.end()) { ui32 nodeId = ev->Get()->ServerId.NodeId(); @@ -1156,238 +1156,238 @@ public: AnswerIfCanForMeta(ctx); } } - } - - void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { - - ui64 tabletId = ev->Get()->TabletId; - if (HandlePipeError(tabletId, ctx)) - return; - - ErrorReason = Sprintf("Client pipe to %" PRIu64 " destroyed (connection lost), Marker# PQ7", tabletId); - SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::ERROR, ctx), ctx); - } - - void HandleTimeout(const TActorContext& ctx) { - ErrorReason = Sprintf("Timeout while waiting for response, may be just slow, Marker# PQ11"); - return SendReplyAndDie(CreateErrorReply(MSTATUS_TIMEOUT, NPersQueue::NErrorCode::ERROR, ctx), ctx); - } - - void Die(const TActorContext& ctx) override { - for (auto& actor: PQClient) { - NTabletPipe::CloseClient(ctx, actor); - } - TActorBootstrapped<TMessageBusServerPersQueueImpl>::Die(ctx); - } - - TAutoPtr<TEvPersQueue::TEvResponse> FormEmptyCurrentRead(ui64 cookie) { - TAutoPtr<TEvPersQueue::TEvResponse> req(new TEvPersQueue::TEvResponse); - auto read = req->Record.MutablePartitionResponse()->MutableCmdReadResult(); - req->Record.MutablePartitionResponse()->SetCookie(cookie); - read->SetErrorCode(NPersQueue::NErrorCode::READ_NOT_DONE); - return req.Release(); - } - - void ProceedFetchRequest(const TActorContext& ctx) { - if (FetchRequestCurrentReadTablet) { //already got active read request - return; - } - CanProcessFetchRequest = true; - Y_VERIFY(IsFetchRequest); + } + + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + + ui64 tabletId = ev->Get()->TabletId; + if (HandlePipeError(tabletId, ctx)) + return; + + ErrorReason = Sprintf("Client pipe to %" PRIu64 " destroyed (connection lost), Marker# PQ7", tabletId); + SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::ERROR, ctx), ctx); + } + + void HandleTimeout(const TActorContext& ctx) { + ErrorReason = Sprintf("Timeout while waiting for response, may be just slow, Marker# PQ11"); + return SendReplyAndDie(CreateErrorReply(MSTATUS_TIMEOUT, NPersQueue::NErrorCode::ERROR, ctx), ctx); + } + + void Die(const TActorContext& ctx) override { + for (auto& actor: PQClient) { + NTabletPipe::CloseClient(ctx, actor); + } + TActorBootstrapped<TMessageBusServerPersQueueImpl>::Die(ctx); + } + + TAutoPtr<TEvPersQueue::TEvResponse> FormEmptyCurrentRead(ui64 cookie) { + TAutoPtr<TEvPersQueue::TEvResponse> req(new TEvPersQueue::TEvResponse); + auto read = req->Record.MutablePartitionResponse()->MutableCmdReadResult(); + req->Record.MutablePartitionResponse()->SetCookie(cookie); + read->SetErrorCode(NPersQueue::NErrorCode::READ_NOT_DONE); + return req.Release(); + } + + void ProceedFetchRequest(const TActorContext& ctx) { + if (FetchRequestCurrentReadTablet) { //already got active read request + return; + } + CanProcessFetchRequest = true; + Y_VERIFY(IsFetchRequest); const auto& fetch = RequestProto.GetFetchRequest(); - - if (FetchRequestReadsDone == fetch.PartitionSize()) { - NKikimrClient::TResponse record; - record.MutableFetchResponse()->Swap(&FetchResponse); - record.SetStatus(MSTATUS_OK); - record.SetErrorCode(NPersQueue::NErrorCode::OK); + + if (FetchRequestReadsDone == fetch.PartitionSize()) { + NKikimrClient::TResponse record; + record.MutableFetchResponse()->Swap(&FetchResponse); + record.SetStatus(MSTATUS_OK); + record.SetErrorCode(NPersQueue::NErrorCode::OK); return SendReplyAndDie(std::move(record), ctx); - } + } const auto& clientId = fetch.GetClientId(); - Y_VERIFY(FetchRequestReadsDone < fetch.PartitionSize()); - const auto& req = fetch.GetPartition(FetchRequestReadsDone); - const auto& topic = req.GetTopic(); - const auto& offset = req.GetOffset(); - const auto& part = req.GetPartition(); - const auto& maxBytes = req.GetMaxBytes(); + Y_VERIFY(FetchRequestReadsDone < fetch.PartitionSize()); + const auto& req = fetch.GetPartition(FetchRequestReadsDone); + const auto& topic = req.GetTopic(); + const auto& offset = req.GetOffset(); + const auto& part = req.GetPartition(); + const auto& maxBytes = req.GetMaxBytes(); const auto& readTimestampMs = req.GetReadTimestampMs(); - auto it = TopicInfo.find(topic); - Y_VERIFY(it != TopicInfo.end()); - if (it->second.PartitionToTablet.find(part) == it->second.PartitionToTablet.end()) { //tablet's info is not filled for this topic yet - return; - } - ui64 tabletId = it->second.PartitionToTablet[part]; - Y_VERIFY(tabletId); - FetchRequestCurrentReadTablet = tabletId; - ++CurrentCookie; - auto jt = TabletInfo.find(tabletId); - Y_VERIFY(jt != TabletInfo.end()); - if (jt->second.BrokenPipe || FetchRequestBytesLeft == 0) { //answer right now - ctx.Send(ctx.SelfID, FormEmptyCurrentRead(CurrentCookie).Release()); - return; - } - - //Form read request - TAutoPtr<TEvPersQueue::TEvRequest> preq(new TEvPersQueue::TEvRequest); - TStringBuilder reqId; - reqId << RequestId << "-id-" << FetchRequestReadsDone << "-" << fetch.PartitionSize(); - preq->Record.SetRequestId(reqId); - auto partReq = preq->Record.MutablePartitionRequest(); - partReq->SetCookie(CurrentCookie); - partReq->SetTopic(topic); - partReq->SetPartition(part); - auto read = partReq->MutableCmdRead(); - read->SetClientId(clientId); - read->SetOffset(offset); - read->SetCount(1000000); - read->SetTimeoutMs(0); - read->SetBytes(Min<ui32>(maxBytes, FetchRequestBytesLeft)); + auto it = TopicInfo.find(topic); + Y_VERIFY(it != TopicInfo.end()); + if (it->second.PartitionToTablet.find(part) == it->second.PartitionToTablet.end()) { //tablet's info is not filled for this topic yet + return; + } + ui64 tabletId = it->second.PartitionToTablet[part]; + Y_VERIFY(tabletId); + FetchRequestCurrentReadTablet = tabletId; + ++CurrentCookie; + auto jt = TabletInfo.find(tabletId); + Y_VERIFY(jt != TabletInfo.end()); + if (jt->second.BrokenPipe || FetchRequestBytesLeft == 0) { //answer right now + ctx.Send(ctx.SelfID, FormEmptyCurrentRead(CurrentCookie).Release()); + return; + } + + //Form read request + TAutoPtr<TEvPersQueue::TEvRequest> preq(new TEvPersQueue::TEvRequest); + TStringBuilder reqId; + reqId << RequestId << "-id-" << FetchRequestReadsDone << "-" << fetch.PartitionSize(); + preq->Record.SetRequestId(reqId); + auto partReq = preq->Record.MutablePartitionRequest(); + partReq->SetCookie(CurrentCookie); + partReq->SetTopic(topic); + partReq->SetPartition(part); + auto read = partReq->MutableCmdRead(); + read->SetClientId(clientId); + read->SetOffset(offset); + read->SetCount(1000000); + read->SetTimeoutMs(0); + read->SetBytes(Min<ui32>(maxBytes, FetchRequestBytesLeft)); read->SetReadTimestampMs(readTimestampMs); - NTabletPipe::SendData(ctx, jt->second.PipeClient, preq.Release()); - } - - void ProcessFetchRequestResult(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { - auto& record = ev->Get()->Record; - Y_VERIFY(record.HasPartitionResponse()); - if (record.GetPartitionResponse().GetCookie() != CurrentCookie || FetchRequestCurrentReadTablet == 0) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "proxy fetch error: got response from tablet " << record.GetPartitionResponse().GetCookie() - << " while waiting from " << CurrentCookie << " and requested tablet is " << FetchRequestCurrentReadTablet); - return; - } - - if (FetchRequestBytesLeft >= (ui32)record.ByteSize()) - FetchRequestBytesLeft -= (ui32)record.ByteSize(); - else - FetchRequestBytesLeft = 0; - FetchRequestCurrentReadTablet = 0; - - auto res = FetchResponse.AddPartResult(); - auto& fetch = RequestProto.GetFetchRequest(); - Y_VERIFY(FetchRequestReadsDone < fetch.PartitionSize()); - const auto& req = fetch.GetPartition(FetchRequestReadsDone); - const auto& topic = req.GetTopic(); - const auto& part = req.GetPartition(); - - res->SetTopic(topic); - res->SetPartition(part); - auto read = res->MutableReadResult(); - if (record.HasPartitionResponse() && record.GetPartitionResponse().HasCmdReadResult()) - read->CopyFrom(record.GetPartitionResponse().GetCmdReadResult()); - if (record.HasErrorCode()) - read->SetErrorCode(record.GetErrorCode()); - if (record.HasErrorReason()) - read->SetErrorReason(record.GetErrorReason()); - - ++FetchRequestReadsDone; - ProceedFetchRequest(ctx); - } - - - NKikimrClient::TResponse CreateErrorReply(EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TActorContext& ctx) { - Y_UNUSED(ctx); - NKikimrClient::TResponse rec; - rec.SetStatus(status); - rec.SetErrorCode(code); - + NTabletPipe::SendData(ctx, jt->second.PipeClient, preq.Release()); + } + + void ProcessFetchRequestResult(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { + auto& record = ev->Get()->Record; + Y_VERIFY(record.HasPartitionResponse()); + if (record.GetPartitionResponse().GetCookie() != CurrentCookie || FetchRequestCurrentReadTablet == 0) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "proxy fetch error: got response from tablet " << record.GetPartitionResponse().GetCookie() + << " while waiting from " << CurrentCookie << " and requested tablet is " << FetchRequestCurrentReadTablet); + return; + } + + if (FetchRequestBytesLeft >= (ui32)record.ByteSize()) + FetchRequestBytesLeft -= (ui32)record.ByteSize(); + else + FetchRequestBytesLeft = 0; + FetchRequestCurrentReadTablet = 0; + + auto res = FetchResponse.AddPartResult(); + auto& fetch = RequestProto.GetFetchRequest(); + Y_VERIFY(FetchRequestReadsDone < fetch.PartitionSize()); + const auto& req = fetch.GetPartition(FetchRequestReadsDone); + const auto& topic = req.GetTopic(); + const auto& part = req.GetPartition(); + + res->SetTopic(topic); + res->SetPartition(part); + auto read = res->MutableReadResult(); + if (record.HasPartitionResponse() && record.GetPartitionResponse().HasCmdReadResult()) + read->CopyFrom(record.GetPartitionResponse().GetCmdReadResult()); + if (record.HasErrorCode()) + read->SetErrorCode(record.GetErrorCode()); + if (record.HasErrorReason()) + read->SetErrorReason(record.GetErrorReason()); + + ++FetchRequestReadsDone; + ProceedFetchRequest(ctx); + } + + + NKikimrClient::TResponse CreateErrorReply(EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TActorContext& ctx) { + Y_UNUSED(ctx); + NKikimrClient::TResponse rec; + rec.SetStatus(status); + rec.SetErrorCode(code); + if (ErrorReason.size()) { - rec.SetErrorReason(ErrorReason); - } else { - rec.SetErrorReason("Unknown, Marker# PQ12"); - } - return rec; - } - - - void Bootstrap(const TActorContext& ctx) { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "proxy got request " << RequestId << " IsMetaRequest " << IsMetaRequest << " IsFetchRequest " << IsFetchRequest); - - // handle error from constructor - if (!!ErrorReason) { - return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::BAD_REQUEST, ctx), ctx); - } - if (IsFetchRequest) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "scheduling HasDataInfoResponse in " << RequestProto.GetFetchRequest().GetWaitMs()); - ctx.Schedule(TDuration::MilliSeconds(Min<ui32>(RequestProto.GetFetchRequest().GetWaitMs(), 30000)), new TEvPersQueue::TEvHasDataInfoResponse); - } - + rec.SetErrorReason(ErrorReason); + } else { + rec.SetErrorReason("Unknown, Marker# PQ12"); + } + return rec; + } + + + void Bootstrap(const TActorContext& ctx) { + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "proxy got request " << RequestId << " IsMetaRequest " << IsMetaRequest << " IsFetchRequest " << IsFetchRequest); + + // handle error from constructor + if (!!ErrorReason) { + return SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::BAD_REQUEST, ctx), ctx); + } + if (IsFetchRequest) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "scheduling HasDataInfoResponse in " << RequestProto.GetFetchRequest().GetWaitMs()); + ctx.Schedule(TDuration::MilliSeconds(Min<ui32>(RequestProto.GetFetchRequest().GetWaitMs(), 30000)), new TEvPersQueue::TEvHasDataInfoResponse); + } + auto* request = new TEvAllTopicsDescribeRequest(TopicPrefix(ctx)); ctx.Send(SchemeCache, request); - ++DescribeRequests; - - if (RequestProto.HasMetaRequest() && (RequestProto.GetMetaRequest().HasCmdGetPartitionLocations() - || RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo())) { + ++DescribeRequests; + + if (RequestProto.HasMetaRequest() && (RequestProto.GetMetaRequest().HasCmdGetPartitionLocations() + || RequestProto.GetMetaRequest().HasCmdGetReadSessionsInfo())) { //only for this request NodeId-s and Nodes names are required const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - } - - Become(&TMessageBusServerPersQueueImpl::StateFunc, ctx, TDuration::MilliSeconds(DefaultTimeout), new TEvents::TEvWakeup()); - } - + ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); + } + + Become(&TMessageBusServerPersQueueImpl::StateFunc, ctx, TDuration::MilliSeconds(DefaultTimeout), new TEvents::TEvWakeup()); + } + STRICT_STFUNC(StateFunc, - HFunc(TEvInterconnect::TEvNodesInfo, Handle); + HFunc(TEvInterconnect::TEvNodesInfo, Handle); HFunc(TEvAllTopicsDescribeResponse, Handle); - HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); - HFunc(TEvTabletPipe::TEvClientConnected, Handle); - HFunc(TEvPersQueue::TEvResponse, Handle); - HFunc(TEvPersQueue::TEvOffsetsResponse, Handle); - HFunc(TEvPersQueue::TEvStatusResponse, Handle); - HFunc(TEvPersQueue::TEvHasDataInfoResponse, Handle); - HFunc(TEvPersQueue::TEvReadSessionsInfoResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + HFunc(TEvPersQueue::TEvResponse, Handle); + HFunc(TEvPersQueue::TEvOffsetsResponse, Handle); + HFunc(TEvPersQueue::TEvStatusResponse, Handle); + HFunc(TEvPersQueue::TEvHasDataInfoResponse, Handle); + HFunc(TEvPersQueue::TEvReadSessionsInfoResponse, Handle); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); CFunc(NActors::TEvents::TSystem::PoisonPill, Die); ) -private: +private: bool GetTopicsList(const ::google::protobuf::RepeatedPtrField<::NKikimrClient::TPersQueueMetaRequest::TTopicRequest>& requests) { for (auto ri = requests.begin(); ri != requests.end(); ++ri) { const auto& topicRequest = *ri; - if (!topicRequest.HasTopic() || topicRequest.GetTopic().empty()) { - ErrorReason = "TopicRequest must have Topic field."; - return false; - - } - TTopicInfo topicInfo; - for (ui32 j = 0; j < topicRequest.PartitionSize(); ++j) { - bool res = topicInfo.PartitionsToRequest.insert(topicRequest.GetPartition(j)).second; - if (!res) { - ErrorReason = Sprintf("multiple partition %d in TopicRequest for topic '%s'", topicRequest.GetPartition(j), - topicRequest.GetTopic().c_str()); - return false; - } - } + if (!topicRequest.HasTopic() || topicRequest.GetTopic().empty()) { + ErrorReason = "TopicRequest must have Topic field."; + return false; + + } + TTopicInfo topicInfo; + for (ui32 j = 0; j < topicRequest.PartitionSize(); ++j) { + bool res = topicInfo.PartitionsToRequest.insert(topicRequest.GetPartition(j)).second; + if (!res) { + ErrorReason = Sprintf("multiple partition %d in TopicRequest for topic '%s'", topicRequest.GetPartition(j), + topicRequest.GetTopic().c_str()); + return false; + } + } const auto& topic = topicRequest.GetTopic(); if (TopicInfo.contains(topic)) { ErrorReason = Sprintf("multiple TopicRequest for topic '%s'", topic.c_str()); - return false; + return false; } else { TopicInfo[topic] = std::move(topicInfo); - } - } - return true; - } -}; - + } + } + return true; + } +}; + class TErrorReplier : public TActorBootstrapped<TErrorReplier> { -public: +public: TErrorReplier(const NKikimrClient::TPersQueueRequest& request, const TActorId& /*schemeCache*/) : RequestId(request.HasRequestId() ? request.GetRequestId() : "<none>") { } - + virtual void SendReplyAndDie(NKikimrClient::TResponse&& response, const TActorContext& ctx) = 0; - + void Bootstrap(const TActorContext& ctx) { - SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::BAD_REQUEST, ErrorText), ctx); + SendReplyAndDie(CreateErrorReply(MSTATUS_ERROR, NPersQueue::NErrorCode::BAD_REQUEST, ErrorText), ctx); } - + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PQ_BASE_REQUEST_PROCESSOR; } - + TString ErrorText; TString RequestId; -}; - +}; + template <template <class TImpl, class... TArgs> class TSenderImpl, class... T> IActor* CreatePersQueueRequestProcessor( const NKikimrClient::TPersQueueRequest& request, @@ -1433,32 +1433,32 @@ IActor* CreatePersQueueRequestProcessor( replier->ErrorText = ex.what(); return replier; } -} - - +} + + template <class TImplActor> class TMessageBusServerPersQueue : public TImplActor, TMessageBusSessionIdentHolder { -public: +public: template <class... T> TMessageBusServerPersQueue(TBusMessageContext& msg, T&&... constructorParams) : TImplActor(static_cast<TBusPersQueue*>(msg.GetMessage())->Record, std::forward<T>(constructorParams)...) , TMessageBusSessionIdentHolder(msg) - {} - + {} + virtual ~TMessageBusServerPersQueue() = default; - + void SendReplyAndDie(NKikimrClient::TResponse&& record, const TActorContext& ctx) override { THolder<TBusResponse> result(new TBusResponse()); result->Record.Swap(&record); LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "proxy answer " << TImplActor::RequestId); - + SendReplyMove(result.Release()); TImplActor::Die(ctx); - } -}; - - + } +}; + + IActor* CreateMessageBusServerPersQueue( TBusMessageContext& msg, const TActorId& schemeCache, @@ -1486,7 +1486,7 @@ IActor* CreateActorServerPersQueue( request, schemeCache ); -} - -} -} +} + +} +} diff --git a/ydb/core/client/server/msgbus_server_persqueue.h b/ydb/core/client/server/msgbus_server_persqueue.h index 6b7ee6e65a..52c67832e4 100644 --- a/ydb/core/client/server/msgbus_server_persqueue.h +++ b/ydb/core/client/server/msgbus_server_persqueue.h @@ -1,8 +1,8 @@ -#pragma once +#pragma once #include "grpc_server.h" -#include "msgbus_tabletreq.h" - +#include "msgbus_tabletreq.h" + #include <ydb/core/base/tablet_pipe.h> #include <ydb/core/persqueue/events/global.h> #include <ydb/core/tx/scheme_cache/scheme_cache.h> @@ -12,11 +12,11 @@ #include <util/generic/ptr.h> #include <util/system/compiler.h> -namespace NKikimr { -namespace NMsgBusProxy { - -const TString& TopicPrefix(const TActorContext& ctx); - +namespace NKikimr { +namespace NMsgBusProxy { + +const TString& TopicPrefix(const TActorContext& ctx); + struct TProcessingResult { EResponseStatus Status = MSTATUS_OK; NPersQueue::NErrorCode::EErrorCode ErrorCode; @@ -39,19 +39,19 @@ IActor* CreateActorServerPersQueue( const TActorId& schemeCache, std::shared_ptr<IPersQueueGetReadSessionsInfoWorkerFactory> pqReadSessionsInfoWorkerFactory = nullptr ); - -NKikimrClient::TResponse CreateErrorReply(EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason); + +NKikimrClient::TResponse CreateErrorReply(EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason); template <class TResponseEvent> inline ui64 GetTabletId(const TResponseEvent* ev) { return ev->Record.GetTabletId(); -} +} template <> inline ui64 GetTabletId<TEvTabletPipe::TEvClientConnected>(const TEvTabletPipe::TEvClientConnected* ev) { return ev->TabletId; -} +} // Base class for PQ requests. It requests EvGetNode and creates worker actors for concrete topics. // Than it starts merge over children responses. @@ -65,8 +65,8 @@ protected: using ESchemeStatus = NSchemeCache::TSchemeCacheNavigate::EStatus; struct TPerTopicInfo { - TPerTopicInfo() - { } + TPerTopicInfo() + { } explicit TPerTopicInfo(const TSchemeEntry& topicEntry) : TopicEntry(topicEntry) { @@ -89,13 +89,13 @@ public: public: static const TDuration TIMEOUT; - TInstant StartTimestamp = TInstant::Zero(); - bool NeedChildrenCreation = false; - - ui32 ChildrenCreated = 0; - - std::deque<THolder<TPerTopicInfo>> ChildrenToCreate; - + TInstant StartTimestamp = TInstant::Zero(); + bool NeedChildrenCreation = false; + + ui32 ChildrenCreated = 0; + + std::deque<THolder<TPerTopicInfo>> ChildrenToCreate; + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PQ_BASE_REQUEST_PROCESSOR; } @@ -103,30 +103,30 @@ public: protected: TPersQueueBaseRequestProcessor(const NKikimrClient::TPersQueueRequest& request, const TActorId& pqMetaCacheId, bool listNodes); - ~TPersQueueBaseRequestProcessor(); - + ~TPersQueueBaseRequestProcessor(); + public: void Bootstrap(const TActorContext& ctx); protected: - bool CreateChildrenIfNeeded(const TActorContext& ctx); - + bool CreateChildrenIfNeeded(const TActorContext& ctx); + virtual THolder<IActor> CreateTopicSubactor(const TSchemeEntry& topicEntry, const TString& name) = 0; // Creates actor for processing one concrete topic. virtual NKikimrClient::TResponse MergeSubactorReplies(); virtual void SendReplyAndDie(NKikimrClient::TResponse&& record, const TActorContext& ctx) = 0; - void SendErrorReplyAndDie(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason); + void SendErrorReplyAndDie(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason); bool ReadyToCreateChildren() const; // true returned from this function means that we called Die(). [[nodiscard]] bool CreateChildren(const TActorContext& ctx); - virtual bool ReadyForAnswer(const TActorContext& ctx); + virtual bool ReadyForAnswer(const TActorContext& ctx); void AnswerAndDie(const TActorContext& ctx); void GetTopicsListOrThrow(const ::google::protobuf::RepeatedPtrField<::NKikimrClient::TPersQueueMetaRequest::TTopicRequest>& requests, THashMap<TString, std::shared_ptr<THashSet<ui64>>>& partitionsToRequest); - virtual STFUNC(StateFunc); + virtual STFUNC(StateFunc); void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev, const TActorContext& ctx); void Handle(NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsResponse::TPtr& ev, const TActorContext& ctx); @@ -162,7 +162,7 @@ protected: TTopicInfoBasedActor(const TSchemeEntry& topicEntry, const TString& topicName); virtual void BootstrapImpl(const TActorContext& ctx) = 0; - virtual void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) = 0; + virtual void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) = 0; virtual void SendReplyAndDie(NKikimrClient::TResponse&& record, const TActorContext& ctx) = 0; @@ -198,13 +198,13 @@ protected: ctx.Send(Parent, result.Release()); - Die(ctx); - } - void Die(const TActorContext& ctx) override { + Die(ctx); + } + void Die(const TActorContext& ctx) override { TBase::Die(ctx); } - void SendErrorReplyAndDie(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { + void SendErrorReplyAndDie(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { SendReplyAndDie(CreateErrorReply(status, code, errorReason), ctx); } diff --git a/ydb/core/client/server/msgbus_server_pq_metacache.cpp b/ydb/core/client/server/msgbus_server_pq_metacache.cpp index 0ab43955d6..f44ab6f480 100644 --- a/ydb/core/client/server/msgbus_server_pq_metacache.cpp +++ b/ydb/core/client/server/msgbus_server_pq_metacache.cpp @@ -22,7 +22,7 @@ IActor* CreateSchemeCache(NActors::TActorSystem* ActorSystem, TIntrusivePtr<NMon auto cacheConfig = MakeIntrusive<NSchemeCache::TSchemeCacheConfig>(appData, cacheCounters); return CreateSchemeBoardSchemeCache(cacheConfig.Get()); } - + class TPersQueueMetaCacheActor : public TActorBootstrapped<TPersQueueMetaCacheActor> { using TBase = TActorBootstrapped<TPersQueueMetaCacheActor>; public: @@ -44,7 +44,7 @@ public: : Counters(counters) , VersionCheckInterval(versionCheckInterval) , Generation(std::make_shared<TAtomicCounter>()) - { + { } void Bootstrap(const TActorContext& ctx) { diff --git a/ydb/core/client/server/msgbus_server_pq_metacache.h b/ydb/core/client/server/msgbus_server_pq_metacache.h index 278952e756..6c12b76b04 100644 --- a/ydb/core/client/server/msgbus_server_pq_metacache.h +++ b/ydb/core/client/server/msgbus_server_pq_metacache.h @@ -12,14 +12,14 @@ namespace NKikimr::NMsgBusProxy { -static const ui32 PQ_METACACHE_TIMEOUT_SECONDS = 120; +static const ui32 PQ_METACACHE_TIMEOUT_SECONDS = 120; static const ui32 PQ_METACACHE_REFRESH_INTERVAL_SECONDS = 10; - - + + inline TActorId CreatePersQueueMetaCacheV2Id() { return TActorId(0, "PQMetaCache"); -} - +} + namespace NPqMetaCacheV2 { enum class EQueryType { diff --git a/ydb/core/client/server/msgbus_server_pq_metarequest.cpp b/ydb/core/client/server/msgbus_server_pq_metarequest.cpp index dc3b207dbb..32f37251cc 100644 --- a/ydb/core/client/server/msgbus_server_pq_metarequest.cpp +++ b/ydb/core/client/server/msgbus_server_pq_metarequest.cpp @@ -15,13 +15,13 @@ void SetErrorCode( code = NPersQueue::NErrorCode::UNKNOWN_TOPIC; } topicResult->SetErrorCode(code); - if (code == NPersQueue::NErrorCode::UNKNOWN_TOPIC) { - topicResult->SetErrorReason("topic not found"); - } else if (code == NPersQueue::NErrorCode::INITIALIZING) { - topicResult->SetErrorReason("could not describe topic"); - } else if (code != NPersQueue::NErrorCode::OK) { - topicResult->SetErrorReason("internal error"); - } + if (code == NPersQueue::NErrorCode::UNKNOWN_TOPIC) { + topicResult->SetErrorReason("topic not found"); + } else if (code == NPersQueue::NErrorCode::INITIALIZING) { + topicResult->SetErrorReason("could not describe topic"); + } else if (code != NPersQueue::NErrorCode::OK) { + topicResult->SetErrorReason("internal error"); + } } // @@ -52,20 +52,20 @@ TPersQueueGetTopicMetadataTopicWorker::TPersQueueGetTopicMetadataTopicWorker( SetActivityType(NKikimrServices::TActivity::PQ_META_REQUEST_PROCESSOR); } - + void TPersQueueGetTopicMetadataTopicWorker::BootstrapImpl(const TActorContext& ctx) { auto processingResult = ProcessMetaCacheSingleTopicsResponse(SchemeEntry); Answer(ctx, processingResult.Status, processingResult.ErrorCode, processingResult.Reason); -} - -void TPersQueueGetTopicMetadataTopicWorker::Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { +} + +void TPersQueueGetTopicMetadataTopicWorker::Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { NKikimrClient::TResponse response; - response.SetStatus(status); - response.SetErrorCode(code); - if (!errorReason.empty()) - response.SetErrorReason(errorReason); - if (code == NPersQueue::NErrorCode::OK) { - auto* topicInfo = response.MutableMetaResponse()->MutableCmdGetTopicMetadataResult()->AddTopicInfo(); + response.SetStatus(status); + response.SetErrorCode(code); + if (!errorReason.empty()) + response.SetErrorReason(errorReason); + if (code == NPersQueue::NErrorCode::OK) { + auto* topicInfo = response.MutableMetaResponse()->MutableCmdGetTopicMetadataResult()->AddTopicInfo(); SetErrorCode(topicInfo, SchemeEntry); if (SchemeEntry.PQGroupInfo != nullptr) { const auto& desc = SchemeEntry.PQGroupInfo->Description; @@ -113,7 +113,7 @@ TPersQueueGetPartitionOffsetsTopicWorker::TPersQueueGetPartitionOffsetsTopicWork void TPersQueueGetPartitionOffsetsTopicWorker::BootstrapImpl(const TActorContext &ctx) { size_t partitionsAsked = 0; - THashSet<ui64> parts; + THashSet<ui64> parts; if (SchemeEntry.PQGroupInfo) { const auto& pqDescr = SchemeEntry.PQGroupInfo->Description; for (const auto& partition : pqDescr.GetPartitions()) { @@ -122,7 +122,7 @@ void TPersQueueGetPartitionOffsetsTopicWorker::BootstrapImpl(const TActorContext if (PartitionsToRequest.get() != nullptr && !PartitionsToRequest->empty() && !PartitionsToRequest->contains(partIndex)) { continue; } - parts.insert(partIndex); + parts.insert(partIndex); ++partitionsAsked; if (HasTabletPipe(tabletId)) { // Take all partitions for tablet from one TEvOffsetsResponse event continue; @@ -136,14 +136,14 @@ void TPersQueueGetPartitionOffsetsTopicWorker::BootstrapImpl(const TActorContext } } if (PartitionsToRequest.get() != nullptr && !PartitionsToRequest->empty() && PartitionsToRequest->size() != partitionsAsked) { - SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, + SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, TStringBuilder() << "no one of requested partitions in topic '" << Name << "', Marker# PQ96"); return; } if (!PartitionsToRequest.get() || PartitionsToRequest->empty()) { PartitionsToRequest.reset(new THashSet<ui64>()); - PartitionsToRequest->swap(parts); - } + PartitionsToRequest->swap(parts); + } if(WaitAllPipeEvents(ctx)) { return; } @@ -153,40 +153,40 @@ bool TPersQueueGetPartitionOffsetsTopicWorker::OnPipeEventsAreReady(const TActor auto processResult = ProcessMetaCacheSingleTopicsResponse(SchemeEntry); Answer(ctx, processResult.Status, processResult.ErrorCode, processResult.Reason); - return true; -} - -void TPersQueueGetPartitionOffsetsTopicWorker::Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { + return true; +} + +void TPersQueueGetPartitionOffsetsTopicWorker::Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { NKikimrClient::TResponse response; - response.SetStatus(status); - response.SetErrorCode(code); - if (!errorReason.empty()) - response.SetErrorReason(errorReason); - if (code == NPersQueue::NErrorCode::OK) { - auto& topicResult = *response.MutableMetaResponse()->MutableCmdGetPartitionOffsetsResult()->AddTopicResult(); + response.SetStatus(status); + response.SetErrorCode(code); + if (!errorReason.empty()) + response.SetErrorReason(errorReason); + if (code == NPersQueue::NErrorCode::OK) { + auto& topicResult = *response.MutableMetaResponse()->MutableCmdGetPartitionOffsetsResult()->AddTopicResult(); topicResult.SetTopic(Name); SetErrorCode(&topicResult, SchemeEntry); - THashSet<ui64> partitionsInserted; - for (auto& ans : PipeAnswers) { - if (ans.second.Get() != nullptr) { - for (auto& partResult : *ans.second->Get()->Record.MutablePartResult()) { - const ui64 partitionIndex = partResult.GetPartition(); + THashSet<ui64> partitionsInserted; + for (auto& ans : PipeAnswers) { + if (ans.second.Get() != nullptr) { + for (auto& partResult : *ans.second->Get()->Record.MutablePartResult()) { + const ui64 partitionIndex = partResult.GetPartition(); if (PartitionsToRequest.get() == nullptr || PartitionsToRequest->empty() || PartitionsToRequest->contains(partitionIndex)) { - topicResult.AddPartitionResult()->Swap(&partResult); + topicResult.AddPartitionResult()->Swap(&partResult); partitionsInserted.insert(partitionIndex); } } } } if (PartitionsToRequest.get() != nullptr && !PartitionsToRequest->empty() && PartitionsToRequest->size() != partitionsInserted.size() && topicResult.GetErrorCode() == (ui32)NPersQueue::NErrorCode::OK) { - const TString reason = "partition is not ready yet"; - for (ui64 partitionIndex : *PartitionsToRequest) { - if (!IsIn(partitionsInserted, partitionIndex)) { - auto res = topicResult.AddPartitionResult(); - res->SetPartition(partitionIndex); - res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - res->SetErrorReason(reason); - } + const TString reason = "partition is not ready yet"; + for (ui64 partitionIndex : *PartitionsToRequest) { + if (!IsIn(partitionsInserted, partitionIndex)) { + auto res = topicResult.AddPartitionResult(); + res->SetPartition(partitionIndex); + res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + res->SetErrorReason(reason); + } } } } @@ -226,7 +226,7 @@ TPersQueueGetPartitionStatusTopicWorker::TPersQueueGetPartitionStatusTopicWorker void TPersQueueGetPartitionStatusTopicWorker::BootstrapImpl(const TActorContext &ctx) { size_t partitionsAsked = 0; - THashSet<ui64> parts; + THashSet<ui64> parts; if (!ProcessingResult.IsFatal) { const auto& pqDescr = SchemeEntry.PQGroupInfo->Description; for (const auto& partition : pqDescr.GetPartitions()) { @@ -235,14 +235,14 @@ void TPersQueueGetPartitionStatusTopicWorker::BootstrapImpl(const TActorContext if (PartitionsToRequest != nullptr && !PartitionsToRequest->empty() && !PartitionsToRequest->contains(partIndex)) { continue; } - parts.insert(partIndex); + parts.insert(partIndex); ++partitionsAsked; if (HasTabletPipe(tabletId)) { // Take all partitions for tablet from one TEvStatusResponse event continue; } THolder<TEvPersQueue::TEvStatus> ev(new TEvPersQueue::TEvStatus()); - if (RequestProto->GetMetaRequest().GetCmdGetPartitionStatus().HasClientId()) - ev->Record.SetClientId(RequestProto->GetMetaRequest().GetCmdGetPartitionStatus().GetClientId()); + if (RequestProto->GetMetaRequest().GetCmdGetPartitionStatus().HasClientId()) + ev->Record.SetClientId(RequestProto->GetMetaRequest().GetCmdGetPartitionStatus().GetClientId()); CreatePipeAndSend(tabletId, ctx, std::move(ev)); } } else { @@ -250,15 +250,15 @@ void TPersQueueGetPartitionStatusTopicWorker::BootstrapImpl(const TActorContext return; } if (PartitionsToRequest != nullptr && !PartitionsToRequest->empty() && PartitionsToRequest->size() != partitionsAsked) { - SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, + SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, TStringBuilder() << "no one of requested partitions in topic '" << Name << "', Marker# PQ97"); return; } if (!PartitionsToRequest.get() || PartitionsToRequest->empty()) { PartitionsToRequest.reset(new THashSet<ui64>()); - PartitionsToRequest->swap(parts); - } - + PartitionsToRequest->swap(parts); + } + if (WaitAllPipeEvents(ctx)) return; } @@ -266,44 +266,44 @@ void TPersQueueGetPartitionStatusTopicWorker::BootstrapImpl(const TActorContext bool TPersQueueGetPartitionStatusTopicWorker::OnPipeEventsAreReady(const TActorContext& ctx) { auto processResult = ProcessMetaCacheSingleTopicsResponse(SchemeEntry); Answer(ctx, processResult.Status, processResult.ErrorCode, processResult.Reason); - return true; -} - + return true; +} + void TPersQueueGetPartitionStatusTopicWorker::Answer( const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason ) { NKikimrClient::TResponse response; - response.SetStatus(status); - response.SetErrorCode(code); - if (!errorReason.empty()) - response.SetErrorReason(errorReason); - if (code == NPersQueue::NErrorCode::OK) { - auto& topicResult = *response.MutableMetaResponse()->MutableCmdGetPartitionStatusResult()->AddTopicResult(); + response.SetStatus(status); + response.SetErrorCode(code); + if (!errorReason.empty()) + response.SetErrorReason(errorReason); + if (code == NPersQueue::NErrorCode::OK) { + auto& topicResult = *response.MutableMetaResponse()->MutableCmdGetPartitionStatusResult()->AddTopicResult(); topicResult.SetTopic(Name); SetErrorCode(&topicResult, SchemeEntry); - THashSet<ui64> partitionsInserted; - for (auto& ans : PipeAnswers) { - if (ans.second.Get() != nullptr) { - for (auto& partResult : *ans.second->Get()->Record.MutablePartResult()) { - const ui64 partitionIndex = partResult.GetPartition(); + THashSet<ui64> partitionsInserted; + for (auto& ans : PipeAnswers) { + if (ans.second.Get() != nullptr) { + for (auto& partResult : *ans.second->Get()->Record.MutablePartResult()) { + const ui64 partitionIndex = partResult.GetPartition(); if (PartitionsToRequest.get() == nullptr || PartitionsToRequest->empty() || PartitionsToRequest->contains(partitionIndex)) { - topicResult.AddPartitionResult()->Swap(&partResult); + topicResult.AddPartitionResult()->Swap(&partResult); if (PartitionsToRequest.get() != nullptr && !PartitionsToRequest->empty()) { - partitionsInserted.insert(partitionIndex); - } + partitionsInserted.insert(partitionIndex); + } } } } } if (PartitionsToRequest.get() != nullptr && !PartitionsToRequest->empty() && PartitionsToRequest->size() != partitionsInserted.size() && topicResult.GetErrorCode() == (ui32)NPersQueue::NErrorCode::OK) { - const TString reason = "partition is not ready yet"; - for (ui64 partitionIndex : *PartitionsToRequest) { - if (!IsIn(partitionsInserted, partitionIndex)) { - auto res = topicResult.AddPartitionResult(); - res->SetPartition(partitionIndex); - res->SetStatus(NKikimrPQ::TStatusResponse::STATUS_UNKNOWN); - } + const TString reason = "partition is not ready yet"; + for (ui64 partitionIndex : *PartitionsToRequest) { + if (!IsIn(partitionsInserted, partitionIndex)) { + auto res = topicResult.AddPartitionResult(); + res->SetPartition(partitionIndex); + res->SetStatus(NKikimrPQ::TStatusResponse::STATUS_UNKNOWN); + } } } } @@ -350,7 +350,7 @@ TPersQueueGetPartitionLocationsTopicWorker::TPersQueueGetPartitionLocationsTopic void TPersQueueGetPartitionLocationsTopicWorker::BootstrapImpl(const TActorContext& ctx) { size_t partitionsAsked = 0; - THashSet<ui64> parts; + THashSet<ui64> parts; if (SchemeEntry.PQGroupInfo) { const auto& pqDescr = SchemeEntry.PQGroupInfo->Description; for (const auto& partition : pqDescr.GetPartitions()) { @@ -361,7 +361,7 @@ void TPersQueueGetPartitionLocationsTopicWorker::BootstrapImpl(const TActorConte } PartitionToTablet[partIndex] = tabletId; ++partitionsAsked; - parts.insert(partIndex); + parts.insert(partIndex); if (HasTabletPipe(tabletId)) { // Take all partitions for tablet from one TEvStatusResponse event continue; } @@ -369,15 +369,15 @@ void TPersQueueGetPartitionLocationsTopicWorker::BootstrapImpl(const TActorConte } } if (PartitionsToRequest.get() != nullptr && !PartitionsToRequest->empty() && PartitionsToRequest->size() != partitionsAsked) { - SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, + SendErrorReplyAndDie(ctx, MSTATUS_ERROR, NPersQueue::NErrorCode::UNKNOWN_TOPIC, TStringBuilder() << "no one of requested partitions in topic '" << Name << "', Marker# PQ98"); return; } if (!PartitionsToRequest.get() || PartitionsToRequest->empty()) { PartitionsToRequest.reset(new THashSet<ui64>()); - PartitionsToRequest->swap(parts); - } - + PartitionsToRequest->swap(parts); + } + if(WaitAllConnections(ctx)) return; } @@ -385,49 +385,49 @@ void TPersQueueGetPartitionLocationsTopicWorker::BootstrapImpl(const TActorConte bool TPersQueueGetPartitionLocationsTopicWorker::OnPipeEventsAreReady(const TActorContext& ctx) { auto processResult = ProcessMetaCacheSingleTopicsResponse(SchemeEntry); Answer(ctx, processResult.Status, processResult.ErrorCode, processResult.Reason); - return true; -} - + return true; +} + void TPersQueueGetPartitionLocationsTopicWorker::Answer( const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason ) { NKikimrClient::TResponse response; - response.SetStatus(status); - response.SetErrorCode(code); - if (!errorReason.empty()) - response.SetErrorReason(errorReason); - if (code == NPersQueue::NErrorCode::OK) { + response.SetStatus(status); + response.SetErrorCode(code); + if (!errorReason.empty()) + response.SetErrorReason(errorReason); + if (code == NPersQueue::NErrorCode::OK) { - auto& topicResult = *response.MutableMetaResponse()->MutableCmdGetPartitionLocationsResult()->AddTopicResult(); + auto& topicResult = *response.MutableMetaResponse()->MutableCmdGetPartitionLocationsResult()->AddTopicResult(); topicResult.SetTopic(Name); SetErrorCode(&topicResult, SchemeEntry); - for (const auto& partitionToTablet : PartitionToTablet) { - const ui32 partition = partitionToTablet.first; - const ui64 tabletId = partitionToTablet.second; - auto& location = *topicResult.AddPartitionLocation(); - location.SetPartition(partition); - - const auto ansIt = PipeAnswers.find(tabletId); - Y_VERIFY(ansIt != PipeAnswers.end()); - bool statusInitializing = false; - if (ansIt->second.Get() != nullptr && ansIt->second->Get()->Status == NKikimrProto::OK) { - const ui32 nodeId = ansIt->second->Get()->ServerId.NodeId(); - const auto hostName = NodesInfo->HostNames.find(nodeId); - if (hostName != NodesInfo->HostNames.end()) { - location.SetHost(hostName->second); - location.SetHostId(nodeId); - location.SetErrorCode(NPersQueue::NErrorCode::OK); - } else { - statusInitializing = true; - } + for (const auto& partitionToTablet : PartitionToTablet) { + const ui32 partition = partitionToTablet.first; + const ui64 tabletId = partitionToTablet.second; + auto& location = *topicResult.AddPartitionLocation(); + location.SetPartition(partition); + + const auto ansIt = PipeAnswers.find(tabletId); + Y_VERIFY(ansIt != PipeAnswers.end()); + bool statusInitializing = false; + if (ansIt->second.Get() != nullptr && ansIt->second->Get()->Status == NKikimrProto::OK) { + const ui32 nodeId = ansIt->second->Get()->ServerId.NodeId(); + const auto hostName = NodesInfo->HostNames.find(nodeId); + if (hostName != NodesInfo->HostNames.end()) { + location.SetHost(hostName->second); + location.SetHostId(nodeId); + location.SetErrorCode(NPersQueue::NErrorCode::OK); + } else { + statusInitializing = true; + } } else { statusInitializing = true; } - if (statusInitializing) { - location.SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - location.SetErrorReason("Tablet for that partition is not running"); - } + if (statusInitializing) { + location.SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + location.SetErrorReason("Tablet for that partition is not running"); + } } } SendReplyAndDie(std::move(response), ctx); @@ -536,13 +536,13 @@ THolder<IActor> TPersQueueGetReadSessionsInfoProcessor::CreateSessionsSubactor( ) { if (PQReadSessionsInfoWorkerFactory) { return PQReadSessionsInfoWorkerFactory->Create(SelfId(), std::move(readSessions), NodesInfo); - } + } return MakeHolder<TPersQueueGetReadSessionsInfoWorker>(SelfId(), std::move(readSessions), NodesInfo); -} - -STFUNC(TPersQueueGetReadSessionsInfoTopicWorker::WaitAllPipeEventsStateFunc) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvPersQueue::TEvReadSessionsInfoResponse, Handle); +} + +STFUNC(TPersQueueGetReadSessionsInfoTopicWorker::WaitAllPipeEventsStateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPersQueue::TEvReadSessionsInfoResponse, Handle); case TEvTabletPipe::TEvClientDestroyed::EventType: if (!HandleDestroy(ev->Get<TEvTabletPipe::TEvClientDestroyed>(), ctx)) { TPipesWaiterActor::WaitAllPipeEventsStateFunc(ev, ctx); @@ -586,14 +586,14 @@ bool TPersQueueGetReadSessionsInfoTopicWorker::HandleDestroy(TEvTabletPipe::TEvC void TPersQueueGetReadSessionsInfoTopicWorker::Handle(TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) { BalancerReplied = true; - BalancerResponse = ev; + BalancerResponse = ev; if (ReadyToAnswer()) { - Answer(ctx, MSTATUS_OK, NPersQueue::NErrorCode::OK, ""); + Answer(ctx, MSTATUS_OK, NPersQueue::NErrorCode::OK, ""); } } - - + + bool TPersQueueGetReadSessionsInfoTopicWorker::OnPipeEventsAreReady(const TActorContext& ctx) { PipeEventsAreReady = true; if (ReadyToAnswer()) { @@ -604,92 +604,92 @@ bool TPersQueueGetReadSessionsInfoTopicWorker::OnPipeEventsAreReady(const TActor } bool TPersQueueGetReadSessionsInfoTopicWorker::ReadyToAnswer() const { - return PipeEventsAreReady && BalancerReplied; + return PipeEventsAreReady && BalancerReplied; } - + TString TPersQueueGetReadSessionsInfoTopicWorker::GetHostName(ui32 hostId) const { const auto host = NodesInfo->HostNames.find(hostId); return host != NodesInfo->HostNames.end() ? host->second : TString(); } -void TPersQueueGetReadSessionsInfoTopicWorker::Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { +void TPersQueueGetReadSessionsInfoTopicWorker::Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) { NKikimrClient::TResponse response; - response.SetStatus(status); - response.SetErrorCode(code); - if (!errorReason.empty()) - response.SetErrorReason(errorReason); - if (code == NPersQueue::NErrorCode::OK) { - - auto stat = response.MutableMetaResponse()->MutableCmdGetReadSessionsInfoResult(); - auto topicRes = stat->AddTopicResult(); + response.SetStatus(status); + response.SetErrorCode(code); + if (!errorReason.empty()) + response.SetErrorReason(errorReason); + if (code == NPersQueue::NErrorCode::OK) { + + auto stat = response.MutableMetaResponse()->MutableCmdGetReadSessionsInfoResult(); + auto topicRes = stat->AddTopicResult(); topicRes->SetTopic(Name); SetErrorCode(topicRes, SchemeEntry); - THashMap<ui32, ui32> partitionToResp; - ui32 index = 0; - if (BalancerResponse.Get() != nullptr) { - for (const auto& resp : BalancerResponse->Get()->Record.GetPartitionInfo()) { - partitionToResp[resp.GetPartition()] = index++; - auto res = topicRes->AddPartitionResult(); - res->SetPartition(resp.GetPartition()); - res->SetSession(resp.GetSession()); - res->SetClientNode(resp.GetClientNode()); - res->SetTimestamp(resp.GetTimestamp() > 0 ? TInstant::Seconds(resp.GetTimestamp()).ToString() : ""); - res->SetProxyNode(GetHostName(resp.GetProxyNodeId())); - - res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - res->SetErrorReason("Getting of session info failed"); - } + THashMap<ui32, ui32> partitionToResp; + ui32 index = 0; + if (BalancerResponse.Get() != nullptr) { + for (const auto& resp : BalancerResponse->Get()->Record.GetPartitionInfo()) { + partitionToResp[resp.GetPartition()] = index++; + auto res = topicRes->AddPartitionResult(); + res->SetPartition(resp.GetPartition()); + res->SetSession(resp.GetSession()); + res->SetClientNode(resp.GetClientNode()); + res->SetTimestamp(resp.GetTimestamp() > 0 ? TInstant::Seconds(resp.GetTimestamp()).ToString() : ""); + res->SetProxyNode(GetHostName(resp.GetProxyNodeId())); + + res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + res->SetErrorReason("Getting of session info failed"); + } THolder<TEvPersQueue::TEvReadSessionsInfoResponse> request = MakeHolder<TEvPersQueue::TEvReadSessionsInfoResponse>(); - request->Record.Swap(&(BalancerResponse->Get()->Record)); - request->Record.ClearPartitionInfo(); - - ctx.Send(Parent, request.Release()); - } else if (topicRes->GetErrorCode() == (ui32)NPersQueue::NErrorCode::OK) { + request->Record.Swap(&(BalancerResponse->Get()->Record)); + request->Record.ClearPartitionInfo(); + + ctx.Send(Parent, request.Release()); + } else if (topicRes->GetErrorCode() == (ui32)NPersQueue::NErrorCode::OK) { for (const auto& partition : SchemeEntry.PQGroupInfo->Description.GetPartitions()) { const ui32 partitionIndex = partition.GetPartitionId(); - partitionToResp[partitionIndex] = index++; - auto res = topicRes->AddPartitionResult(); - res->SetPartition(partitionIndex); - res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - res->SetErrorReason("balancer tablet for partition is not running"); - } - SendReplyAndDie(std::move(response), ctx); - return; + partitionToResp[partitionIndex] = index++; + auto res = topicRes->AddPartitionResult(); + res->SetPartition(partitionIndex); + res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + res->SetErrorReason("balancer tablet for partition is not running"); + } + SendReplyAndDie(std::move(response), ctx); + return; } - for (const auto& pipeAnswer : PipeAnswers) { - if (!pipeAnswer.second) { + for (const auto& pipeAnswer : PipeAnswers) { + if (!pipeAnswer.second) { continue; } - const auto& offsetResp = pipeAnswer.second->Get()->Record; - const ui64 tabletId = pipeAnswer.first; - for (const auto& partResult : offsetResp.GetPartResult()) { - const ui32 partitionIndex = partResult.GetPartition(); - if (PartitionToTablet.find(partitionIndex) == PartitionToTablet.end()) { - continue; - } - const auto responseIndex = partitionToResp.find(partitionIndex); - auto res = responseIndex != partitionToResp.end() ? topicRes->MutablePartitionResult(responseIndex->second) : topicRes->AddPartitionResult(); - res->SetPartition(partitionIndex); - res->SetClientOffset(partResult.GetClientOffset()); // 0 if there is no offset - res->SetStartOffset(partResult.GetStartOffset()); - res->SetEndOffset(partResult.GetEndOffset()); - const ui64 nowMS = TAppData::TimeProvider->Now().MilliSeconds(); - res->SetTimeLag(partResult.HasWriteTimestampMS() ? Max<i64>(nowMS - partResult.GetWriteTimestampMS(), 0) : 0); - res->SetReadTimeLag(partResult.HasReadWriteTimestampMS() ? Max<i64>(nowMS - partResult.GetReadWriteTimestampMS(), 0) : 0); - res->SetClientReadOffset(partResult.GetClientReadOffset()); - res->SetErrorCode(NPersQueue::NErrorCode::OK); - res->ClearErrorReason(); - - auto itTabletNode = TabletNodes.find(tabletId); - if (itTabletNode != TabletNodes.end()) { - res->SetTabletNode(GetHostName(itTabletNode->second)); - res->SetTabletNodeId(itTabletNode->second); - } - if (res->GetTabletNode().empty()) { - res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - res->SetErrorReason("Tablet for partition is not running"); - } + const auto& offsetResp = pipeAnswer.second->Get()->Record; + const ui64 tabletId = pipeAnswer.first; + for (const auto& partResult : offsetResp.GetPartResult()) { + const ui32 partitionIndex = partResult.GetPartition(); + if (PartitionToTablet.find(partitionIndex) == PartitionToTablet.end()) { + continue; + } + const auto responseIndex = partitionToResp.find(partitionIndex); + auto res = responseIndex != partitionToResp.end() ? topicRes->MutablePartitionResult(responseIndex->second) : topicRes->AddPartitionResult(); + res->SetPartition(partitionIndex); + res->SetClientOffset(partResult.GetClientOffset()); // 0 if there is no offset + res->SetStartOffset(partResult.GetStartOffset()); + res->SetEndOffset(partResult.GetEndOffset()); + const ui64 nowMS = TAppData::TimeProvider->Now().MilliSeconds(); + res->SetTimeLag(partResult.HasWriteTimestampMS() ? Max<i64>(nowMS - partResult.GetWriteTimestampMS(), 0) : 0); + res->SetReadTimeLag(partResult.HasReadWriteTimestampMS() ? Max<i64>(nowMS - partResult.GetReadWriteTimestampMS(), 0) : 0); + res->SetClientReadOffset(partResult.GetClientReadOffset()); + res->SetErrorCode(NPersQueue::NErrorCode::OK); + res->ClearErrorReason(); + + auto itTabletNode = TabletNodes.find(tabletId); + if (itTabletNode != TabletNodes.end()) { + res->SetTabletNode(GetHostName(itTabletNode->second)); + res->SetTabletNodeId(itTabletNode->second); + } + if (res->GetTabletNode().empty()) { + res->SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + res->SetErrorReason("Tablet for partition is not running"); + } } } } diff --git a/ydb/core/client/server/msgbus_server_pq_metarequest.h b/ydb/core/client/server/msgbus_server_pq_metarequest.h index 9b0b361306..1d56c28dc0 100644 --- a/ydb/core/client/server/msgbus_server_pq_metarequest.h +++ b/ydb/core/client/server/msgbus_server_pq_metarequest.h @@ -21,7 +21,7 @@ public: TPersQueueGetTopicMetadataTopicWorker(const TActorId& parent, const TSchemeEntry& topicEntry, const TString& name); void BootstrapImpl(const TActorContext& ctx) override; - void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; + void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; }; @@ -49,7 +49,7 @@ public: void BootstrapImpl(const TActorContext& ctx) override; bool OnPipeEventsAreReady(const TActorContext& ctx) override; - void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; + void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; private: std::shared_ptr<THashSet<ui64>> PartitionsToRequest; @@ -82,7 +82,7 @@ public: void BootstrapImpl(const TActorContext& ctx) override; bool OnPipeEventsAreReady(const TActorContext& ctx) override; - void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; + void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; private: std::shared_ptr<THashSet<ui64>> PartitionsToRequest; @@ -115,7 +115,7 @@ public: void BootstrapImpl(const TActorContext& ctx) override; bool OnPipeEventsAreReady(const TActorContext& ctx) override; - void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; + void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; private: std::shared_ptr<THashSet<ui64>> PartitionsToRequest; @@ -137,47 +137,47 @@ public: std::shared_ptr<IPersQueueGetReadSessionsInfoWorkerFactory> pqReadSessionsInfoWorkerFactory ); - bool ReadyForAnswer(const TActorContext& ctx) override { - if (TPersQueueBaseRequestProcessor::ReadyForAnswer(ctx)) { - if (HasSessionsRequest || ReadSessions.empty()) { - return true; - } - HasSessionsRequest = true; - auto actorId = ctx.Register(CreateSessionsSubactor(std::move(ReadSessions)).Release()); - Children.emplace(actorId, MakeHolder<TPerTopicInfo>()); - } - return false; - } - - void Handle(TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext&) { - for (auto & s : ev->Get()->Record.GetReadSessions()) { - if (!s.GetSession().empty()) { + bool ReadyForAnswer(const TActorContext& ctx) override { + if (TPersQueueBaseRequestProcessor::ReadyForAnswer(ctx)) { + if (HasSessionsRequest || ReadSessions.empty()) { + return true; + } + HasSessionsRequest = true; + auto actorId = ctx.Register(CreateSessionsSubactor(std::move(ReadSessions)).Release()); + Children.emplace(actorId, MakeHolder<TPerTopicInfo>()); + } + return false; + } + + void Handle(TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext&) { + for (auto & s : ev->Get()->Record.GetReadSessions()) { + if (!s.GetSession().empty()) { TActorId actor = ActorIdFromProto(s.GetSessionActor()); - ReadSessions.insert(std::make_pair(s.GetSession(), actor)); - } - } - } - - - STFUNC(StateFunc) override { - switch (ev->GetTypeRewrite()) { - HFunc(TEvPersQueue::TEvReadSessionsInfoResponse, Handle); - default: - TPersQueueBaseRequestProcessor::StateFunc(ev, ctx); - } - } - + ReadSessions.insert(std::make_pair(s.GetSession(), actor)); + } + } + } + + + STFUNC(StateFunc) override { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPersQueue::TEvReadSessionsInfoResponse, Handle); + default: + TPersQueueBaseRequestProcessor::StateFunc(ev, ctx); + } + } + private: - + THolder<IActor> CreateTopicSubactor(const TSchemeEntry& topicEntry, const TString& name) override; THolder<IActor> CreateSessionsSubactor(const THashMap<TString, TActorId>&& readSessions); - + std::shared_ptr<IPersQueueGetReadSessionsInfoWorkerFactory> PQReadSessionsInfoWorkerFactory; - mutable bool HasSessionsRequest = false; + mutable bool HasSessionsRequest = false; THashMap<TString, TActorId> ReadSessions; }; - + class TPersQueueGetReadSessionsInfoTopicWorker : public TReplierToParent<TPipesWaiterActor<TTopicInfoBasedActor, TEvPersQueue::TEvOffsetsResponse>> { public: TPersQueueGetReadSessionsInfoTopicWorker(const TActorId& parent, @@ -186,7 +186,7 @@ public: std::shared_ptr<const TPersQueueBaseRequestProcessor::TNodesInfo> nodesInfo); void BootstrapImpl(const TActorContext& ctx) override; - void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; + void Answer(const TActorContext& ctx, EResponseStatus status, NPersQueue::NErrorCode::EErrorCode code, const TString& errorReason) override; bool OnPipeEventsAreReady(const TActorContext& ctx) override; void Die(const TActorContext& ctx) override; diff --git a/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp b/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp index 141e6dcd2e..237bba147b 100644 --- a/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp +++ b/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp @@ -266,59 +266,59 @@ protected: + p.HasCmdGetOwnershipResult(); } - - template<class T> - bool AssertTopicResponsesImpl(const T& t, const TString& topic, NPersQueue::NErrorCode::EErrorCode code, ui32 numParts) { - if (t.GetTopic() != topic) return false; - UNIT_ASSERT_C(t.GetErrorCode() == code, "for topic " << topic << " code is " << (ui32) t.GetErrorCode() << " but waiting for " << (ui32) code << " resp: " << t); - UNIT_ASSERT_C(t.PartitionResultSize() == numParts, "for topic " << topic << " parts size is not " << numParts << " resp: " << t); - return true; - } - - template<class T> - bool AssertTopicResponsesImpl(const T& t, const TString& topic, NPersQueue::NErrorCode::EErrorCode code) { - if (t.GetTopic() != topic) return false; - UNIT_ASSERT_C(t.GetErrorCode() == code, "for topic " << topic << " code is " << (ui32) t.GetErrorCode() << " but waiting for " << (ui32) code << " resp: " << t); - return true; - } - - - void AssertTopicResponses(const TString& topic, NPersQueue::NErrorCode::EErrorCode code, ui32 numParts) { - const TEvPersQueue::TEvResponse* resp = GetResponse(); - UNIT_ASSERT(resp != nullptr); - for (auto& r : resp->Record.GetMetaResponse().GetCmdGetReadSessionsInfoResult().GetTopicResult()) { - if (AssertTopicResponsesImpl(r, topic, code, numParts)) - return; - } - for (auto& r : resp->Record.GetMetaResponse().GetCmdGetPartitionLocationsResult().GetTopicResult()) { - if (r.GetTopic() != topic) continue; - UNIT_ASSERT_C(r.GetErrorCode() == code, "for topic " << topic << " code is " << (ui32) r.GetErrorCode() << " but waiting for " << (ui32) code << " resp: " << r); - UNIT_ASSERT_C(r.PartitionLocationSize() == numParts, "for topic " << topic << " parts size is not " << numParts << " resp: " << r); - return; - } - for (auto& r : resp->Record.GetMetaResponse().GetCmdGetPartitionStatusResult().GetTopicResult()) { - if (AssertTopicResponsesImpl(r, topic, code, numParts)) - return; - } - for (auto& r : resp->Record.GetMetaResponse().GetCmdGetPartitionOffsetsResult().GetTopicResult()) { - if (AssertTopicResponsesImpl(r, topic, code, numParts)) - return; - } - for (auto& r : resp->Record.GetMetaResponse().GetCmdGetTopicMetadataResult().GetTopicInfo()) { - if (AssertTopicResponsesImpl(r, topic, code)) - return; - } - UNIT_ASSERT_C(false, "topic " << topic << " not found in response " << resp->Record); - } - - - void AssertFailedResponse(NPersQueue::NErrorCode::EErrorCode code, const THashSet<TString>& markers = {}, EResponseStatus status = MSTATUS_ERROR) { + + template<class T> + bool AssertTopicResponsesImpl(const T& t, const TString& topic, NPersQueue::NErrorCode::EErrorCode code, ui32 numParts) { + if (t.GetTopic() != topic) return false; + UNIT_ASSERT_C(t.GetErrorCode() == code, "for topic " << topic << " code is " << (ui32) t.GetErrorCode() << " but waiting for " << (ui32) code << " resp: " << t); + UNIT_ASSERT_C(t.PartitionResultSize() == numParts, "for topic " << topic << " parts size is not " << numParts << " resp: " << t); + return true; + } + + template<class T> + bool AssertTopicResponsesImpl(const T& t, const TString& topic, NPersQueue::NErrorCode::EErrorCode code) { + if (t.GetTopic() != topic) return false; + UNIT_ASSERT_C(t.GetErrorCode() == code, "for topic " << topic << " code is " << (ui32) t.GetErrorCode() << " but waiting for " << (ui32) code << " resp: " << t); + return true; + } + + + void AssertTopicResponses(const TString& topic, NPersQueue::NErrorCode::EErrorCode code, ui32 numParts) { + const TEvPersQueue::TEvResponse* resp = GetResponse(); + UNIT_ASSERT(resp != nullptr); + for (auto& r : resp->Record.GetMetaResponse().GetCmdGetReadSessionsInfoResult().GetTopicResult()) { + if (AssertTopicResponsesImpl(r, topic, code, numParts)) + return; + } + for (auto& r : resp->Record.GetMetaResponse().GetCmdGetPartitionLocationsResult().GetTopicResult()) { + if (r.GetTopic() != topic) continue; + UNIT_ASSERT_C(r.GetErrorCode() == code, "for topic " << topic << " code is " << (ui32) r.GetErrorCode() << " but waiting for " << (ui32) code << " resp: " << r); + UNIT_ASSERT_C(r.PartitionLocationSize() == numParts, "for topic " << topic << " parts size is not " << numParts << " resp: " << r); + return; + } + for (auto& r : resp->Record.GetMetaResponse().GetCmdGetPartitionStatusResult().GetTopicResult()) { + if (AssertTopicResponsesImpl(r, topic, code, numParts)) + return; + } + for (auto& r : resp->Record.GetMetaResponse().GetCmdGetPartitionOffsetsResult().GetTopicResult()) { + if (AssertTopicResponsesImpl(r, topic, code, numParts)) + return; + } + for (auto& r : resp->Record.GetMetaResponse().GetCmdGetTopicMetadataResult().GetTopicInfo()) { + if (AssertTopicResponsesImpl(r, topic, code)) + return; + } + UNIT_ASSERT_C(false, "topic " << topic << " not found in response " << resp->Record); + } + + + void AssertFailedResponse(NPersQueue::NErrorCode::EErrorCode code, const THashSet<TString>& markers = {}, EResponseStatus status = MSTATUS_ERROR) { const TEvPersQueue::TEvResponse* resp = GetResponse(); Cerr << "Assert failed: Check response: " << resp->Record << Endl; UNIT_ASSERT(resp != nullptr); UNIT_ASSERT_C(resp->Record.HasStatus(), "Response: " << resp->Record); UNIT_ASSERT_UNEQUAL_C(resp->Record.GetStatus(), 1, "Response: " << resp->Record); - UNIT_ASSERT_EQUAL_C(resp->Record.GetErrorCode(), code, "code: " << (ui32)code << " Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(resp->Record.GetErrorCode(), code, "code: " << (ui32)code << " Response: " << resp->Record); UNIT_ASSERT_C(!resp->Record.GetErrorReason().empty(), "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(resp->Record.GetStatus(), status, "Response: " << resp->Record); if (!markers.empty()) { @@ -332,7 +332,7 @@ protected: UNIT_ASSERT_VALUES_EQUAL_C(ResponseFieldsCount(), 0, "Response: " << resp->Record); } - void AssertFailedResponse(NPersQueue::NErrorCode::EErrorCode code, const char* marker, EResponseStatus status = MSTATUS_ERROR) { + void AssertFailedResponse(NPersQueue::NErrorCode::EErrorCode code, const char* marker, EResponseStatus status = MSTATUS_ERROR) { AssertFailedResponse(code, THashSet<TString>({marker}), status); } @@ -341,7 +341,7 @@ protected: UNIT_ASSERT(resp != nullptr); UNIT_ASSERT_VALUES_EQUAL_C(resp->Record.GetStatus(), 1, "Response: " << resp->Record); UNIT_ASSERT_C(resp->Record.HasErrorCode(), "Response: " << resp->Record); - UNIT_ASSERT_EQUAL_C(resp->Record.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(resp->Record.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(resp->Record.GetErrorReason().empty(), "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(ResponseFieldsCount(), 1, "Response: " << resp->Record); } @@ -527,7 +527,7 @@ public: Runtime->UpdateCurrentTime(Runtime->GetCurrentTime() + TDuration::MilliSeconds(90000 + 1)); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::ERROR, {"Marker# PQ11", "Marker# PQ16"}, MSTATUS_TIMEOUT); + AssertFailedResponse(NPersQueue::NErrorCode::ERROR, {"Marker# PQ11", "Marker# PQ16"}, MSTATUS_TIMEOUT); } void FailsOnFailedGetAllTopicsRequest() { @@ -537,7 +537,7 @@ public: RegisterActor(request); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::UNKNOWN_TOPIC, {"Marker# PQ15", "Marker# PQ17"}); + AssertFailedResponse(NPersQueue::NErrorCode::UNKNOWN_TOPIC, {"Marker# PQ15", "Marker# PQ17"}); } void FailsOnNotOkStatusInGetNodeRequest() { @@ -601,7 +601,7 @@ public: RegisterActor(request); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::UNKNOWN_TOPIC, {"Marker# PQ94", "Marker# PQ22"}); + AssertFailedResponse(NPersQueue::NErrorCode::UNKNOWN_TOPIC, {"Marker# PQ94", "Marker# PQ22"}); } void FailsOnBalancerDescribeResultFailureWhenTopicsAreGivenExplicitly() { @@ -741,7 +741,7 @@ public: req.MutableMetaRequest()->MutableCmdGetTopicMetadata()->AddTopic(""); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "empty topic in GetTopicMetadata request"); } @@ -833,7 +833,7 @@ public: MakeEmptyTopic(*req.MutableMetaRequest()->MutableCmdGetPartitionLocations()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "TopicRequest must have Topic field"); } @@ -842,7 +842,7 @@ public: MakeDuplicatedTopic(*req.MutableMetaRequest()->MutableCmdGetPartitionLocations()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "multiple TopicRequest"); } @@ -851,7 +851,7 @@ public: MakeDuplicatedPartition(*req.MutableMetaRequest()->MutableCmdGetPartitionLocations()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "multiple partition"); } @@ -882,7 +882,7 @@ public: UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.PartitionLocationSize(), 1, "Response: " << resp->Record); const auto& partition1 = topic1Result.GetPartitionLocation(0); UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 0, "Response: " << resp->Record); - UNIT_ASSERT_EQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!partition1.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(!partition1.GetHost().empty(), "Response: " << resp->Record); } @@ -897,11 +897,11 @@ public: UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 1, "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(partition2.GetPartition(), 2, "Response: " << resp->Record); - UNIT_ASSERT_UNEQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_UNEQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(partition1.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(partition1.GetHost().empty(), "Response: " << resp->Record); // No data - UNIT_ASSERT_EQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!partition2.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(!partition2.GetHost().empty(), "Response: " << resp->Record); // Data was passed } @@ -935,7 +935,7 @@ public: UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.PartitionLocationSize(), 1, "Response: " << resp->Record); const auto& partition1 = topic1Result.GetPartitionLocation(0); UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 0, "Response: " << resp->Record); - UNIT_ASSERT_EQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!partition1.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(!partition1.GetHost().empty(), "Response: " << resp->Record); } @@ -950,16 +950,16 @@ public: UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 1, "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(partition2.GetPartition(), 2, "Response: " << resp->Record); - UNIT_ASSERT_EQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!partition1.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(!partition1.GetHost().empty(), "Response: " << resp->Record); if (disconnectionMode == EDisconnectionMode::AnswerDoesNotArrive) { - UNIT_ASSERT_UNEQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_UNEQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(partition2.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(partition2.GetHost().empty(), "Response: " << resp->Record); // Data was passed } else { - UNIT_ASSERT_EQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!partition2.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(!partition2.GetHost().empty(), "Response: " << resp->Record); // Data was passed } @@ -996,7 +996,7 @@ public: MakeEmptyTopic(*req.MutableMetaRequest()->MutableCmdGetPartitionOffsets()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "TopicRequest must have Topic field"); } @@ -1005,7 +1005,7 @@ public: MakeDuplicatedTopic(*req.MutableMetaRequest()->MutableCmdGetPartitionOffsets()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "multiple TopicRequest"); } @@ -1014,7 +1014,7 @@ public: MakeDuplicatedPartition(*req.MutableMetaRequest()->MutableCmdGetPartitionOffsets()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "multiple partition"); } @@ -1044,7 +1044,7 @@ public: UNIT_ASSERT_STRINGS_EQUAL(topic1Result.GetTopic(), "topic1"); UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.PartitionResultSize(), 1, "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.GetPartitionResult(0).GetPartition(), 0, "Response: " << resp->Record); - UNIT_ASSERT_EQUAL_C(topic1Result.GetPartitionResult(0).GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(topic1Result.GetPartitionResult(0).GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!topic1Result.GetPartitionResult(0).HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(topic1Result.GetPartitionResult(0).HasStartOffset(), "Response: " << resp->Record); } @@ -1059,11 +1059,11 @@ public: UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 1, "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(partition2.GetPartition(), 2, "Response: " << resp->Record); - UNIT_ASSERT_UNEQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_UNEQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(partition1.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(!partition1.HasStartOffset(), "Response: " << resp->Record); // No data - UNIT_ASSERT_EQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!partition2.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(partition2.HasStartOffset(), "Response: " << resp->Record); // Data was passed } @@ -1096,7 +1096,7 @@ public: UNIT_ASSERT_STRINGS_EQUAL(topic1Result.GetTopic(), "topic1"); UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.PartitionResultSize(), 1, "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.GetPartitionResult(0).GetPartition(), 0, "Response: " << resp->Record); - UNIT_ASSERT_EQUAL_C(topic1Result.GetPartitionResult(0).GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(topic1Result.GetPartitionResult(0).GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!topic1Result.GetPartitionResult(0).HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(topic1Result.GetPartitionResult(0).HasStartOffset(), "Response: " << resp->Record); } @@ -1111,16 +1111,16 @@ public: UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 1, "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(partition2.GetPartition(), 2, "Response: " << resp->Record); - UNIT_ASSERT_EQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(partition1.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!partition1.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(partition1.HasStartOffset(), "Response: " << resp->Record); // Data was passed if (disconnectionMode == EDisconnectionMode::AnswerDoesNotArrive) { - UNIT_ASSERT_UNEQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_UNEQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(partition2.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(!partition2.HasStartOffset(), "Response: " << resp->Record); // Data was passed } else { - UNIT_ASSERT_EQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); + UNIT_ASSERT_EQUAL_C(partition2.GetErrorCode(), NPersQueue::NErrorCode::OK, "Response: " << resp->Record); UNIT_ASSERT_C(!partition2.HasErrorReason(), "Response: " << resp->Record); UNIT_ASSERT_C(partition2.HasStartOffset(), "Response: " << resp->Record); // Data was passed } @@ -1157,7 +1157,7 @@ public: MakeEmptyTopic(*req.MutableMetaRequest()->MutableCmdGetPartitionStatus()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "TopicRequest must have Topic field"); } @@ -1166,7 +1166,7 @@ public: MakeDuplicatedTopic(*req.MutableMetaRequest()->MutableCmdGetPartitionStatus()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "multiple TopicRequest"); } @@ -1175,7 +1175,7 @@ public: MakeDuplicatedPartition(*req.MutableMetaRequest()->MutableCmdGetPartitionStatus()->MutableTopicRequest()); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "multiple partition"); } @@ -1313,7 +1313,7 @@ public: req.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->AddTopic(""); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "empty topic in GetReadSessionsInfo request"); } @@ -1322,7 +1322,7 @@ public: req.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->ClearClientId(); RegisterActor(req); GrabResponseEvent(); - AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); + AssertFailedResponse(NPersQueue::NErrorCode::BAD_REQUEST); UNIT_ASSERT_STRING_CONTAINS(GetResponse()->Record.GetErrorReason(), "No clientId specified in CmdGetReadSessionsInfo"); } @@ -1353,26 +1353,26 @@ public: UNIT_ASSERT_STRINGS_EQUAL(topic1Result.GetTopic(), "topic1"); UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.PartitionResultSize(), 1, "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.GetPartitionResult(0).GetPartition(), 0, "Response: " << resp->Record); - UNIT_ASSERT_C(topic1Result.GetPartitionResult(0).GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); + UNIT_ASSERT_C(topic1Result.GetPartitionResult(0).GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); } { const auto& topic2Result = perTopicResults.Get(0).GetTopic() == "topic2" ? perTopicResults.Get(0) : perTopicResults.Get(1); UNIT_ASSERT_STRINGS_EQUAL(topic2Result.GetTopic(), "topic2"); - UNIT_ASSERT_VALUES_EQUAL_C(topic2Result.PartitionResultSize(), 3, "Response: " << resp->Record); + UNIT_ASSERT_VALUES_EQUAL_C(topic2Result.PartitionResultSize(), 3, "Response: " << resp->Record); // Partitions (order is not specified) -// const auto& partition1 = topic2Result.GetPartitionResult(0).GetPartition() == 1 ? topic2Result.GetPartitionResult(0) : topic2Result.GetPartitionResult(1); -// const auto& partition2 = topic2Result.GetPartitionResult(0).GetPartition() == 2 ? topic2Result.GetPartitionResult(0) : topic2Result.GetPartitionResult(1); -// UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 1, "Response: " << resp->Record); -// UNIT_ASSERT_VALUES_EQUAL_C(partition2.GetPartition(), 2, "Response: " << resp->Record); - - UNIT_ASSERT_C(topic2Result.GetPartitionResult(0).GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); - UNIT_ASSERT_C(topic2Result.GetPartitionResult(1).GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); - UNIT_ASSERT_C(topic2Result.GetPartitionResult(2).GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); - - -// UNIT_ASSERT_C(partition1.HasStartOffset(), "Response: " << resp->Record); -// UNIT_ASSERT_C(partition2.HasStartOffset(), "Response: " << resp->Record); +// const auto& partition1 = topic2Result.GetPartitionResult(0).GetPartition() == 1 ? topic2Result.GetPartitionResult(0) : topic2Result.GetPartitionResult(1); +// const auto& partition2 = topic2Result.GetPartitionResult(0).GetPartition() == 2 ? topic2Result.GetPartitionResult(0) : topic2Result.GetPartitionResult(1); +// UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 1, "Response: " << resp->Record); +// UNIT_ASSERT_VALUES_EQUAL_C(partition2.GetPartition(), 2, "Response: " << resp->Record); + + UNIT_ASSERT_C(topic2Result.GetPartitionResult(0).GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); + UNIT_ASSERT_C(topic2Result.GetPartitionResult(1).GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); + UNIT_ASSERT_C(topic2Result.GetPartitionResult(2).GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); + + +// UNIT_ASSERT_C(partition1.HasStartOffset(), "Response: " << resp->Record); +// UNIT_ASSERT_C(partition2.HasStartOffset(), "Response: " << resp->Record); } } @@ -1404,14 +1404,14 @@ public: UNIT_ASSERT_VALUES_EQUAL_C(topic1Result.PartitionResultSize(), 1, "Response: " << resp->Record); const auto& partition = topic1Result.GetPartitionResult(0); UNIT_ASSERT_VALUES_EQUAL_C(partition.GetPartition(), 0, "Response: " << resp->Record); - UNIT_ASSERT_C(partition.GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); - + UNIT_ASSERT_C(partition.GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); + } { auto& topic2Result = perTopicResults.Get(0).GetTopic() == "topic2" ? *perTopicResults.Mutable(0) : *perTopicResults.Mutable(1); UNIT_ASSERT_STRINGS_EQUAL(topic2Result.GetTopic(), "topic2"); - const size_t expectedPartitionsSize = 3; - //disconnectionMode == EDisconnectionMode::AnswerDoesNotArrive ? 2 : 3; + const size_t expectedPartitionsSize = 3; + //disconnectionMode == EDisconnectionMode::AnswerDoesNotArrive ? 2 : 3; UNIT_ASSERT_VALUES_EQUAL_C(topic2Result.PartitionResultSize(), expectedPartitionsSize, "Response: " << resp->Record); // Partitions (order is not specified) @@ -1422,16 +1422,16 @@ public: }); const auto& partition0 = topic2Result.GetPartitionResult(0); const auto& partition1 = topic2Result.GetPartitionResult(1); - const auto& partition2 = topic2Result.GetPartitionResult(2); + const auto& partition2 = topic2Result.GetPartitionResult(2); UNIT_ASSERT_VALUES_EQUAL_C(partition0.GetPartition(), 0, "Response: " << resp->Record); UNIT_ASSERT_VALUES_EQUAL_C(partition1.GetPartition(), 1, "Response: " << resp->Record); - UNIT_ASSERT_VALUES_EQUAL_C(partition2.GetPartition(), 2, "Response: " << resp->Record); + UNIT_ASSERT_VALUES_EQUAL_C(partition2.GetPartition(), 2, "Response: " << resp->Record); - UNIT_ASSERT_C(partition0.GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); - UNIT_ASSERT_C(partition1.GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); - UNIT_ASSERT_C(partition2.GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); - Y_UNUSED(disconnectionMode); + UNIT_ASSERT_C(partition0.GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); + UNIT_ASSERT_C(partition1.GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); + UNIT_ASSERT_C(partition2.GetErrorCode() == (ui32)NPersQueue::NErrorCode::INITIALIZING, "Response: " << resp->Record); + Y_UNUSED(disconnectionMode); } }; TMessageBusServerPersQueueRequestCommonTest::HandlesPipeDisconnectionImpl<TEvPersQueue::TEvOffsetsResponse>(disconnectionMode, validation, true); diff --git a/ydb/core/client/server/msgbus_server_proxy.cpp b/ydb/core/client/server/msgbus_server_proxy.cpp index 03215000fa..7f3146d21a 100644 --- a/ydb/core/client/server/msgbus_server_proxy.cpp +++ b/ydb/core/client/server/msgbus_server_proxy.cpp @@ -171,7 +171,7 @@ void TMessageBusServerProxy::Bootstrap(const TActorContext& ctx) { TxProxy = MakeTxProxyID(); - SchemeCacheCounters = GetServiceCounters(AppData(ctx)->Counters, "pqproxy|cache"); + SchemeCacheCounters = GetServiceCounters(AppData(ctx)->Counters, "pqproxy|cache"); DbOperationsCounters = new TMessageBusDbOpsCounters(AppData(ctx)->Counters); auto cacheConfig = MakeIntrusive<NSchemeCache::TSchemeCacheConfig>(AppData(ctx), SchemeCacheCounters); diff --git a/ydb/core/client/server/msgbus_server_scheme_request.cpp b/ydb/core/client/server/msgbus_server_scheme_request.cpp index 288d210c49..7df5e262d0 100644 --- a/ydb/core/client/server/msgbus_server_scheme_request.cpp +++ b/ydb/core/client/server/msgbus_server_scheme_request.cpp @@ -32,38 +32,38 @@ class TMessageBusServerSchemeRequest : public TMessageBusSecureRequest<TMessageB void ReplyWithResult(EResponseStatus status, const NKikimrTxUserProxy::TEvProposeTransactionStatus &result, const TActorContext &ctx); - void FillStatus(EResponseStatus status, const NKikimrTxUserProxy::TEvProposeTransactionStatus &result, TBusResponse* response) - { - response->Record.SetStatus(status); - if (result.HasPathId()) { - response->Record.MutableFlatTxId()->SetPathId(result.GetPathId()); - } - - if (result.HasPathCreateTxId()) { - response->Record.MutableFlatTxId()->SetTxId(result.GetPathCreateTxId()); + void FillStatus(EResponseStatus status, const NKikimrTxUserProxy::TEvProposeTransactionStatus &result, TBusResponse* response) + { + response->Record.SetStatus(status); + if (result.HasPathId()) { + response->Record.MutableFlatTxId()->SetPathId(result.GetPathId()); + } + + if (result.HasPathCreateTxId()) { + response->Record.MutableFlatTxId()->SetTxId(result.GetPathCreateTxId()); } else if (result.HasPathDropTxId()) { response->Record.MutableFlatTxId()->SetTxId(result.GetPathDropTxId()); - } else if (result.HasTxId()) { - response->Record.MutableFlatTxId()->SetTxId(result.GetTxId()); - } - - if (result.HasSchemeShardTabletId()) - response->Record.MutableFlatTxId()->SetSchemeShardTabletId(result.GetSchemeShardTabletId()); - - if (result.HasSchemeShardReason()) { - response->Record.SetErrorReason(result.GetSchemeShardReason()); - } - - if (result.HasSchemeShardStatus()) { - response->Record.SetSchemeStatus(result.GetSchemeShardStatus()); - } - - if (result.HasStatus()) { - response->Record.SetProxyErrorCode(result.GetStatus()); - } - } - - + } else if (result.HasTxId()) { + response->Record.MutableFlatTxId()->SetTxId(result.GetTxId()); + } + + if (result.HasSchemeShardTabletId()) + response->Record.MutableFlatTxId()->SetSchemeShardTabletId(result.GetSchemeShardTabletId()); + + if (result.HasSchemeShardReason()) { + response->Record.SetErrorReason(result.GetSchemeShardReason()); + } + + if (result.HasSchemeShardStatus()) { + response->Record.SetSchemeStatus(result.GetSchemeShardStatus()); + } + + if (result.HasStatus()) { + response->Record.SetProxyErrorCode(result.GetStatus()); + } + } + + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_SCHEME_REQUEST; } @@ -99,24 +99,24 @@ void TMessageBusServerSchemeRequest<TBusPersQueue>::SendProposeRequest(const TAc if (Request->Record.HasMetaRequest() && Request->Record.GetMetaRequest().HasCmdCreateTopic()) { const auto& cmd = Request->Record.GetMetaRequest().GetCmdCreateTopic(); auto *transaction = record.MutableTransaction()->MutableModifyScheme(); - transaction->SetWorkingDir(TopicPrefix(ctx)); + transaction->SetWorkingDir(TopicPrefix(ctx)); transaction->SetOperationType(NKikimrSchemeOp::ESchemeOpCreatePersQueueGroup); auto *pqgroup = transaction->MutableCreatePersQueueGroup(); pqgroup->SetName(cmd.GetTopic()); - pqgroup->SetTotalGroupCount(cmd.GetNumPartitions()); - pqgroup->SetPartitionPerTablet(cmd.GetNumPartitionsPerTablet()); + pqgroup->SetTotalGroupCount(cmd.GetNumPartitions()); + pqgroup->SetPartitionPerTablet(cmd.GetNumPartitionsPerTablet()); pqgroup->MutablePQTabletConfig()->MergeFrom(cmd.GetConfig()); } if (Request->Record.HasMetaRequest() && Request->Record.GetMetaRequest().HasCmdChangeTopic()) { const auto& cmd = Request->Record.GetMetaRequest().GetCmdChangeTopic(); auto *transaction = record.MutableTransaction()->MutableModifyScheme(); - transaction->SetWorkingDir(TopicPrefix(ctx)); + transaction->SetWorkingDir(TopicPrefix(ctx)); transaction->SetOperationType(NKikimrSchemeOp::ESchemeOpAlterPersQueueGroup); auto *pqgroup = transaction->MutableAlterPersQueueGroup(); pqgroup->SetName(cmd.GetTopic()); if (cmd.HasNumPartitions()) - pqgroup->SetTotalGroupCount(cmd.GetNumPartitions()); + pqgroup->SetTotalGroupCount(cmd.GetNumPartitions()); if (cmd.HasConfig()) pqgroup->MutablePQTabletConfig()->MergeFrom(cmd.GetConfig()); } @@ -124,7 +124,7 @@ void TMessageBusServerSchemeRequest<TBusPersQueue>::SendProposeRequest(const TAc if (Request->Record.HasMetaRequest() && Request->Record.GetMetaRequest().HasCmdDeleteTopic()) { const auto& cmd = Request->Record.GetMetaRequest().GetCmdDeleteTopic(); auto *transaction = record.MutableTransaction()->MutableModifyScheme(); - transaction->SetWorkingDir(TopicPrefix(ctx)); + transaction->SetWorkingDir(TopicPrefix(ctx)); transaction->SetOperationType(NKikimrSchemeOp::ESchemeOpDropPersQueueGroup); auto *pqgroup = transaction->MutableDrop(); pqgroup->SetName(cmd.GetTopic()); @@ -138,13 +138,13 @@ void TMessageBusServerSchemeRequest<TBusPersQueue>::SendProposeRequest(const TAc template <> void TMessageBusServerSchemeRequest<TBusPersQueue>::ReplyWithResult(EResponseStatus status, const NKikimrTxUserProxy::TEvProposeTransactionStatus &result, const TActorContext &ctx) { TAutoPtr<TBusResponse> response(new TBusResponse()); - FillStatus(status, result, response.Get()); + FillStatus(status, result, response.Get()); if (result.GetSchemeShardStatus() == NKikimrScheme::StatusPathDoesNotExist) { - response->Record.SetErrorCode(NPersQueue::NErrorCode::UNKNOWN_TOPIC); - } else if (status == MSTATUS_OK || status == MSTATUS_INPROGRESS) - response->Record.SetErrorCode(NPersQueue::NErrorCode::OK); + response->Record.SetErrorCode(NPersQueue::NErrorCode::UNKNOWN_TOPIC); + } else if (status == MSTATUS_OK || status == MSTATUS_INPROGRESS) + response->Record.SetErrorCode(NPersQueue::NErrorCode::OK); else - response->Record.SetErrorCode(NPersQueue::NErrorCode::ERROR); + response->Record.SetErrorCode(NPersQueue::NErrorCode::ERROR); if (result.HasSchemeShardReason()) { response->Record.SetErrorReason(result.GetSchemeShardReason()); } @@ -197,12 +197,12 @@ void TMessageBusServerSchemeRequest<TBusSchemeOperation>::SendProposeRequest(con req->Record.SetUserToken(TBase::GetSerializedToken()); ctx.Send(MakeTxProxyID(), req.Release()); } - + template <> void TMessageBusServerSchemeRequest<TBusSchemeOperation>::ReplyWithResult(EResponseStatus status, const NKikimrTxUserProxy::TEvProposeTransactionStatus &result, const TActorContext &ctx) { TAutoPtr<TBusResponse> response(new TBusResponse()); - FillStatus(status, result, response.Get()); + FillStatus(status, result, response.Get()); SendReplyAutoPtr(response); Request.Destroy(); diff --git a/ydb/core/client/server/msgbus_server_tablet_state.cpp b/ydb/core/client/server/msgbus_server_tablet_state.cpp index 95bdae73f8..c8582fc550 100644 --- a/ydb/core/client/server/msgbus_server_tablet_state.cpp +++ b/ydb/core/client/server/msgbus_server_tablet_state.cpp @@ -99,8 +99,8 @@ public: TAutoPtr<TBusResponse> response = new TBusResponse(); auto& record = response->Record; record.SetStatus(MSTATUS_OK); - for (const auto& ni : NodesInfo->Nodes) { - const auto& pr_node = *PerNodeTabletInfo.find(ni.NodeId); + for (const auto& ni : NodesInfo->Nodes) { + const auto& pr_node = *PerNodeTabletInfo.find(ni.NodeId); if (pr_node.second.Get() != nullptr) { for (const NKikimrWhiteboard::TTabletStateInfo& st_info : pr_node.second.Get()->Record.GetTabletStateInfo()) { if (Record.HasAlive()) { @@ -124,9 +124,9 @@ public: if (!found) continue; } - auto state = record.AddTabletStateInfo(); - state->CopyFrom(st_info); - state->SetHost(ni.Host); + auto state = record.AddTabletStateInfo(); + state->CopyFrom(st_info); + state->SetHost(ni.Host); } } } diff --git a/ydb/core/client/server/ya.make b/ydb/core/client/server/ya.make index 3da18f6e3c..8cbeec21b7 100644 --- a/ydb/core/client/server/ya.make +++ b/ydb/core/client/server/ya.make @@ -25,8 +25,8 @@ SRCS( msgbus_server_get.cpp msgbus_server_hive_create_tablet.cpp msgbus_server_keyvalue.cpp - msgbus_server_persqueue.cpp - msgbus_server_persqueue.h + msgbus_server_persqueue.cpp + msgbus_server_persqueue.h msgbus_server_pq_metacache.h msgbus_server_pq_metacache.cpp msgbus_server_pq_metarequest.h @@ -59,9 +59,9 @@ SRCS( msgbus_servicereq.h msgbus_tabletreq.h grpc_server.cpp - grpc_server.h - grpc_proxy_status.h - grpc_proxy_status.cpp + grpc_server.h + grpc_proxy_status.h + grpc_proxy_status.cpp ) PEERDIR( diff --git a/ydb/core/cms/console/net_classifier_updater.cpp b/ydb/core/cms/console/net_classifier_updater.cpp index 856c1bb19e..121a65efc3 100644 --- a/ydb/core/cms/console/net_classifier_updater.cpp +++ b/ydb/core/cms/console/net_classifier_updater.cpp @@ -6,7 +6,7 @@ #include <library/cpp/actors/core/hfunc.h> #include <library/cpp/actors/http/http_proxy.h> #include <library/cpp/actors/interconnect/interconnect.h> -#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_reader.h> #include <util/stream/zlib.h> @@ -173,22 +173,22 @@ private: } } - auto FormNetDataFromJson(TStringBuf jsonData) const { - NKikimrNetClassifier::TNetData netData; + auto FormNetDataFromJson(TStringBuf jsonData) const { + NKikimrNetClassifier::TNetData netData; TVector<TString> tagsToFilter(UpdaterConfig().GetNetBoxTags().begin(), UpdaterConfig().GetNetBoxTags().end()); - NJson::TJsonValue value; - bool res = NJson::ReadJsonTree(jsonData, &value); - if (!res) - return netData; - if (!value["results"].IsArray()) - return netData; - for (auto& v : value["results"].GetArray()) { - if (!v["prefix"].IsString()) - return NKikimrNetClassifier::TNetData{}; - TString mask = v["prefix"].GetString(); - - if (!v["tags"].IsArray() || v["tags"].GetArray().size() == 0) - return NKikimrNetClassifier::TNetData{}; + NJson::TJsonValue value; + bool res = NJson::ReadJsonTree(jsonData, &value); + if (!res) + return netData; + if (!value["results"].IsArray()) + return netData; + for (auto& v : value["results"].GetArray()) { + if (!v["prefix"].IsString()) + return NKikimrNetClassifier::TNetData{}; + TString mask = v["prefix"].GetString(); + + if (!v["tags"].IsArray() || v["tags"].GetArray().size() == 0) + return NKikimrNetClassifier::TNetData{}; const auto& tags = v["tags"].GetArray(); TString label; for (auto& tag : tags) { @@ -205,13 +205,13 @@ private: if (!label) { continue; } - auto& subnet = *netData.AddSubnets(); - subnet.SetMask(mask); - subnet.SetLabel(label); - } - return netData; - } - + auto& subnet = *netData.AddSubnets(); + subnet.SetMask(mask); + subnet.SetLabel(label); + } + return netData; + } + auto FormNetData(TStringBuf tsvData) const { NKikimrNetClassifier::TNetData netData; @@ -260,11 +260,11 @@ private: } } else { BLOG_ERROR("NetClassifierUpdater failed to get subnets: http_status=" <<ev->Get()->Response->Status); - } + } } else { BLOG_ERROR("NetClassifierUpdater failed to get subnets: " << ev->Get()->Error); } - InitializeAgain(); + InitializeAgain(); } void HandleWhileWorking(TEvConsole::TEvConfigureResponse::TPtr& ev) { diff --git a/ydb/core/cms/console/net_classifier_updater_ut.cpp b/ydb/core/cms/console/net_classifier_updater_ut.cpp index ccb1cbbf2d..da41ee51f2 100644 --- a/ydb/core/cms/console/net_classifier_updater_ut.cpp +++ b/ydb/core/cms/console/net_classifier_updater_ut.cpp @@ -86,33 +86,33 @@ static TString ConvertToTsv(const NKikimrNetClassifier::TNetData& netData) { return builder; } - -static TString ConvertToJson(const NKikimrNetClassifier::TNetData& netData) { - - TString res; - TStringOutput ss(res); - - NJson::TJsonWriter writer(&ss, true); - - writer.OpenMap(); - writer.Write("count", netData.SubnetsSize()); - writer.OpenArray("results"); - for (size_t i = 0; i < netData.SubnetsSize(); ++i) { - const auto& subnet = netData.GetSubnets(i); - writer.OpenMap(); - writer.Write("prefix", subnet.GetMask()); - writer.OpenArray("tags"); - writer.Write(subnet.GetLabel()); - writer.CloseArray(); - writer.CloseMap(); - } - writer.CloseArray(); - writer.CloseMap(); - writer.Flush(); - ss.Flush(); - return res; -} - + +static TString ConvertToJson(const NKikimrNetClassifier::TNetData& netData) { + + TString res; + TStringOutput ss(res); + + NJson::TJsonWriter writer(&ss, true); + + writer.OpenMap(); + writer.Write("count", netData.SubnetsSize()); + writer.OpenArray("results"); + for (size_t i = 0; i < netData.SubnetsSize(); ++i) { + const auto& subnet = netData.GetSubnets(i); + writer.OpenMap(); + writer.Write("prefix", subnet.GetMask()); + writer.OpenArray("tags"); + writer.Write(subnet.GetLabel()); + writer.CloseArray(); + writer.CloseMap(); + } + writer.CloseArray(); + writer.CloseMap(); + writer.Flush(); + ss.Flush(); + return res; +} + NKikimrNetClassifier::TNetClassifierUpdaterConfig CreateUpdaterConfig( ui16 netDataSourcePort, TNetClassifierUpdaterConfig::EFormat format, @@ -125,7 +125,7 @@ NKikimrNetClassifier::TNetClassifierUpdaterConfig CreateUpdaterConfig( *updaterConfig.MutableNetBoxTags() = {netBoxTags.begin(), netBoxTags.end()}; return updaterConfig; } - + Y_UNIT_TEST_SUITE(TNetClassifierUpdaterTest) { void TestGetUpdatesFromHttpServer( const TString& sourceResponce, @@ -182,13 +182,13 @@ Y_UNIT_TEST_SUITE(TNetClassifierUpdaterTest) { Sleep(TDuration::Seconds(1)); } } - - Y_UNIT_TEST(TestGetUpdatesFromHttpServer) { + + Y_UNIT_TEST(TestGetUpdatesFromHttpServer) { auto netData = FormNetData(); TestGetUpdatesFromHttpServer(ConvertToTsv(netData), netData); TestGetUpdatesFromHttpServer(ConvertToJson(netData), netData, TNetClassifierUpdaterConfig::NETBOX); - } - + } + Y_UNIT_TEST(TestFiltrationByNetboxTags) { const TString netboxResponce = "{ \ \"count\": 5, \ diff --git a/ydb/core/debug/valgrind_check.h b/ydb/core/debug/valgrind_check.h index d09c3be484..8c17867e59 100644 --- a/ydb/core/debug/valgrind_check.h +++ b/ydb/core/debug/valgrind_check.h @@ -1,27 +1,27 @@ -#pragma once - -#ifdef WITH_VALGRIND -#define ENABLE_VALGRIND_REQUESTS 1 -#else -#define ENABLE_VALGRIND_REQUESTS 0 -#endif - -#if ENABLE_VALGRIND_REQUESTS -# include <util/system/valgrind.h> -# include <valgrind/memcheck.h> -# define REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size) VALGRIND_CHECK_MEM_IS_DEFINED(data, size) -# define REQUEST_VALGRIND_CHECK_MEM_IS_ADDRESSABLE(data, size) VALGRIND_CHECK_MEM_IS_ADDRESSABLE(data, size) -# define REQUEST_VALGRIND_MAKE_MEM_NOACCESS(data, size) VALGRIND_MAKE_MEM_NOACCESS(data, size) -# define REQUEST_VALGRIND_MALLOCLIKE_BLOCK(data, size, rz, iz) VALGRIND_MALLOCLIKE_BLOCK(data, size, rz, iz) -# define REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(data, size) VALGRIND_MAKE_MEM_UNDEFINED(data, size) +#pragma once + +#ifdef WITH_VALGRIND +#define ENABLE_VALGRIND_REQUESTS 1 +#else +#define ENABLE_VALGRIND_REQUESTS 0 +#endif + +#if ENABLE_VALGRIND_REQUESTS +# include <util/system/valgrind.h> +# include <valgrind/memcheck.h> +# define REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size) VALGRIND_CHECK_MEM_IS_DEFINED(data, size) +# define REQUEST_VALGRIND_CHECK_MEM_IS_ADDRESSABLE(data, size) VALGRIND_CHECK_MEM_IS_ADDRESSABLE(data, size) +# define REQUEST_VALGRIND_MAKE_MEM_NOACCESS(data, size) VALGRIND_MAKE_MEM_NOACCESS(data, size) +# define REQUEST_VALGRIND_MALLOCLIKE_BLOCK(data, size, rz, iz) VALGRIND_MALLOCLIKE_BLOCK(data, size, rz, iz) +# define REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(data, size) VALGRIND_MAKE_MEM_UNDEFINED(data, size) # define REQUEST_VALGRIND_MAKE_MEM_DEFINED(data, size) VALGRIND_MAKE_MEM_DEFINED(data, size) -# define REQUEST_VALGRIND_FREELIKE_BLOCK(data, rz) VALGRIND_FREELIKE_BLOCK(data, rz) -#else -# define REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size) while (false) {} -# define REQUEST_VALGRIND_CHECK_MEM_IS_ADDRESSABLE(data, size) while (false) {} -# define REQUEST_VALGRIND_MAKE_MEM_NOACCESS(data, size) while (false) {} -# define REQUEST_VALGRIND_MALLOCLIKE_BLOCK(data, size, rz, iz) while (false) {} -# define REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(data, size) while (false) {} +# define REQUEST_VALGRIND_FREELIKE_BLOCK(data, rz) VALGRIND_FREELIKE_BLOCK(data, rz) +#else +# define REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size) while (false) {} +# define REQUEST_VALGRIND_CHECK_MEM_IS_ADDRESSABLE(data, size) while (false) {} +# define REQUEST_VALGRIND_MAKE_MEM_NOACCESS(data, size) while (false) {} +# define REQUEST_VALGRIND_MALLOCLIKE_BLOCK(data, size, rz, iz) while (false) {} +# define REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(data, size) while (false) {} # define REQUEST_VALGRIND_MAKE_MEM_DEFINED(data, size) while (false) {} -# define REQUEST_VALGRIND_FREELIKE_BLOCK(data, size) while (false) {} -#endif //ENABLE_VALGRIND_REQUESTS +# define REQUEST_VALGRIND_FREELIKE_BLOCK(data, size) while (false) {} +#endif //ENABLE_VALGRIND_REQUESTS diff --git a/ydb/core/debug/ya.make b/ydb/core/debug/ya.make index 0367a33580..daf0100a62 100644 --- a/ydb/core/debug/ya.make +++ b/ydb/core/debug/ya.make @@ -1,14 +1,14 @@ -LIBRARY() - -OWNER( - ddoarn - vvvv +LIBRARY() + +OWNER( + ddoarn + vvvv g:kikimr -) - -SRCS( - valgrind_check.h +) + +SRCS( + valgrind_check.h valgrind_check.cpp -) - -END() +) + +END() diff --git a/ydb/core/driver_lib/cli_utils/cli.h b/ydb/core/driver_lib/cli_utils/cli.h index 1c7c81799f..099664792e 100644 --- a/ydb/core/driver_lib/cli_utils/cli.h +++ b/ydb/core/driver_lib/cli_utils/cli.h @@ -32,8 +32,8 @@ namespace NDriverClient { int CompileAndExecMiniKQL(TCommandConfig &cmdConf, int argc, char **argv); int MessageBusTrace(TCommandConfig &cmdConf, int argc, char** argv); int KeyValueRequest(TCommandConfig &cmdConf, int argc, char **argv); - int PersQueueRequest(TCommandConfig &cmdConf, int argc, char **argv); - int PersQueueStress(TCommandConfig &cmdConf, int argc, char **argv); + int PersQueueRequest(TCommandConfig &cmdConf, int argc, char **argv); + int PersQueueStress(TCommandConfig &cmdConf, int argc, char **argv); int PersQueueDiscoverClustersRequest(TCommandConfig &cmdConf, int argc, char **argv); int LoadRequest(TCommandConfig &cmdConf, int argc, char **argv); int ActorsysPerfTest(TCommandConfig &cmdConf, int argc, char **argv); diff --git a/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp b/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp index 79a181f2ef..6766dd3171 100644 --- a/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp @@ -628,34 +628,34 @@ protected: queueConfig->SetName(ProxyBusQueueConfig.Name); queueConfig->SetNumWorkers(ProxyBusQueueConfig.NumWorkers); - auto sessionConfig = messageBusConfig->MutableProxyBusSessionConfig(); - - // TODO use macro from messagebus header file - sessionConfig->SetName(ProxyBusSessionConfig.Name); - sessionConfig->SetNumRetries(ProxyBusSessionConfig.NumRetries); - sessionConfig->SetRetryInterval(ProxyBusSessionConfig.RetryInterval); - sessionConfig->SetReconnectWhenIdle(ProxyBusSessionConfig.ReconnectWhenIdle); - sessionConfig->SetMaxInFlight(ProxyBusSessionConfig.MaxInFlight); - sessionConfig->SetPerConnectionMaxInFlight(ProxyBusSessionConfig.PerConnectionMaxInFlight); - sessionConfig->SetPerConnectionMaxInFlightBySize(ProxyBusSessionConfig.PerConnectionMaxInFlightBySize); - sessionConfig->SetMaxInFlightBySize(ProxyBusSessionConfig.MaxInFlightBySize); - sessionConfig->SetTotalTimeout(ProxyBusSessionConfig.TotalTimeout); - sessionConfig->SetSendTimeout(ProxyBusSessionConfig.SendTimeout); - sessionConfig->SetConnectTimeout(ProxyBusSessionConfig.ConnectTimeout); - sessionConfig->SetDefaultBufferSize(ProxyBusSessionConfig.DefaultBufferSize); - sessionConfig->SetMaxBufferSize(ProxyBusSessionConfig.MaxBufferSize); - sessionConfig->SetSocketRecvBufferSize(ProxyBusSessionConfig.SocketRecvBufferSize); - sessionConfig->SetSocketSendBufferSize(ProxyBusSessionConfig.SocketSendBufferSize); - sessionConfig->SetSocketToS(ProxyBusSessionConfig.SocketToS); - sessionConfig->SetSendThreshold(ProxyBusSessionConfig.SendThreshold); - sessionConfig->SetCork(ProxyBusSessionConfig.Cork.MilliSeconds()); - sessionConfig->SetMaxMessageSize(ProxyBusSessionConfig.MaxMessageSize); - sessionConfig->SetTcpNoDelay(ProxyBusSessionConfig.TcpNoDelay); - sessionConfig->SetTcpCork(ProxyBusSessionConfig.TcpCork); - sessionConfig->SetExecuteOnMessageInWorkerPool(ProxyBusSessionConfig.ExecuteOnMessageInWorkerPool); - sessionConfig->SetExecuteOnReplyInWorkerPool(ProxyBusSessionConfig.ExecuteOnReplyInWorkerPool); - sessionConfig->SetListenPort(ProxyBusSessionConfig.ListenPort); - + auto sessionConfig = messageBusConfig->MutableProxyBusSessionConfig(); + + // TODO use macro from messagebus header file + sessionConfig->SetName(ProxyBusSessionConfig.Name); + sessionConfig->SetNumRetries(ProxyBusSessionConfig.NumRetries); + sessionConfig->SetRetryInterval(ProxyBusSessionConfig.RetryInterval); + sessionConfig->SetReconnectWhenIdle(ProxyBusSessionConfig.ReconnectWhenIdle); + sessionConfig->SetMaxInFlight(ProxyBusSessionConfig.MaxInFlight); + sessionConfig->SetPerConnectionMaxInFlight(ProxyBusSessionConfig.PerConnectionMaxInFlight); + sessionConfig->SetPerConnectionMaxInFlightBySize(ProxyBusSessionConfig.PerConnectionMaxInFlightBySize); + sessionConfig->SetMaxInFlightBySize(ProxyBusSessionConfig.MaxInFlightBySize); + sessionConfig->SetTotalTimeout(ProxyBusSessionConfig.TotalTimeout); + sessionConfig->SetSendTimeout(ProxyBusSessionConfig.SendTimeout); + sessionConfig->SetConnectTimeout(ProxyBusSessionConfig.ConnectTimeout); + sessionConfig->SetDefaultBufferSize(ProxyBusSessionConfig.DefaultBufferSize); + sessionConfig->SetMaxBufferSize(ProxyBusSessionConfig.MaxBufferSize); + sessionConfig->SetSocketRecvBufferSize(ProxyBusSessionConfig.SocketRecvBufferSize); + sessionConfig->SetSocketSendBufferSize(ProxyBusSessionConfig.SocketSendBufferSize); + sessionConfig->SetSocketToS(ProxyBusSessionConfig.SocketToS); + sessionConfig->SetSendThreshold(ProxyBusSessionConfig.SendThreshold); + sessionConfig->SetCork(ProxyBusSessionConfig.Cork.MilliSeconds()); + sessionConfig->SetMaxMessageSize(ProxyBusSessionConfig.MaxMessageSize); + sessionConfig->SetTcpNoDelay(ProxyBusSessionConfig.TcpNoDelay); + sessionConfig->SetTcpCork(ProxyBusSessionConfig.TcpCork); + sessionConfig->SetExecuteOnMessageInWorkerPool(ProxyBusSessionConfig.ExecuteOnMessageInWorkerPool); + sessionConfig->SetExecuteOnReplyInWorkerPool(ProxyBusSessionConfig.ExecuteOnReplyInWorkerPool); + sessionConfig->SetListenPort(ProxyBusSessionConfig.ListenPort); + for (auto proxy : ProxyBindToProxy) { messageBusConfig->AddProxyBindToProxy(proxy); } diff --git a/ydb/core/driver_lib/cli_utils/cli_cmds_tablet.cpp b/ydb/core/driver_lib/cli_utils/cli_cmds_tablet.cpp index 3ba7c10595..361e757ca1 100644 --- a/ydb/core/driver_lib/cli_utils/cli_cmds_tablet.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_cmds_tablet.cpp @@ -92,7 +92,7 @@ public: TString Program; TString Params; - virtual void Config(TConfig& config) override { + virtual void Config(TConfig& config) override { TClientCommand::Config(config); config.Opts->AddLongOption("follower", "connect to follower").NoArgument(); config.Opts->AddLongOption("json-ui64-as-string", "json output ui64 as string").NoArgument(); @@ -102,7 +102,7 @@ public: SetFreeArgTitle(1, "<PARAMS>", "Parameters of the program"); } - virtual void Parse(TConfig& config) override { + virtual void Parse(TConfig& config) override { TClientCommand::Parse(config); Program = GetMiniKQL(config.ParseResult->GetFreeArgs().at(0)); @@ -132,7 +132,7 @@ public: } virtual int Run(TConfig& config) override { - return MessageBusCall(config, Request); + return MessageBusCall(config, Request); } }; diff --git a/ydb/core/driver_lib/cli_utils/cli_persqueue.cpp b/ydb/core/driver_lib/cli_utils/cli_persqueue.cpp index 2dd0ba7b72..66bc9151e5 100644 --- a/ydb/core/driver_lib/cli_utils/cli_persqueue.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_persqueue.cpp @@ -1,61 +1,61 @@ -#include "cli.h" +#include "cli.h" #include <ydb/public/lib/deprecated/client/msgbus_client.h> #include <ydb/core/base/tablet_types.h> - -namespace NKikimr { -namespace NDriverClient { - -struct TCmdPersQueueStressConfig : public TCliCmdConfig { + +namespace NKikimr { +namespace NDriverClient { + +struct TCmdPersQueueStressConfig : public TCliCmdConfig { TString Proto; //for config - TCmdPersQueueStressConfig(); - - void Parse(int argc, char **argv); -}; - -int PersQueueRequest(TCommandConfig &cmdConf, int argc, char** argv) { - Y_UNUSED(cmdConf); - - - TCmdPersQueueStressConfig config; - config.Parse(argc, argv); - - TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); - const bool isOk = ::google::protobuf::TextFormat::ParseFromString(config.Proto, &request->Record); - if (!isOk) { - ythrow TWithBackTrace<yexception>() << "Error parsing protobuf: \'" << config.Proto << "\'"; - } - TAutoPtr<NBus::TBusMessage> reply; + TCmdPersQueueStressConfig(); + + void Parse(int argc, char **argv); +}; + +int PersQueueRequest(TCommandConfig &cmdConf, int argc, char** argv) { + Y_UNUSED(cmdConf); + + + TCmdPersQueueStressConfig config; + config.Parse(argc, argv); + + TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); + const bool isOk = ::google::protobuf::TextFormat::ParseFromString(config.Proto, &request->Record); + if (!isOk) { + ythrow TWithBackTrace<yexception>() << "Error parsing protobuf: \'" << config.Proto << "\'"; + } + TAutoPtr<NBus::TBusMessage> reply; NBus::EMessageStatus status = config.SyncCall(request, reply); - Cerr << status << "\n"; - Y_VERIFY(status == NBus::MESSAGE_OK); - const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; - Cerr << result.DebugString() << "\n"; - return 0; -} - -TCmdPersQueueStressConfig::TCmdPersQueueStressConfig() -{} - -void TCmdPersQueueStressConfig::Parse(int argc, char **argv) { - using namespace NLastGetopt; - + Cerr << status << "\n"; + Y_VERIFY(status == NBus::MESSAGE_OK); + const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; + Cerr << result.DebugString() << "\n"; + return 0; +} + +TCmdPersQueueStressConfig::TCmdPersQueueStressConfig() +{} + +void TCmdPersQueueStressConfig::Parse(int argc, char **argv) { + using namespace NLastGetopt; + TString fileName; - - TOpts opts = TOpts::Default(); - opts.AddLongOption("protobuf", "string representation of the request protobuf").Optional().StoreResult(&Proto); - opts.AddLongOption("protofile", "file with protobuf").Optional().StoreResult(&fileName); - ConfigureBaseLastGetopt(opts); - - TOptsParseResult res(&opts, argc, argv); - if (fileName.empty() == Proto.empty()) { - Cerr << "one of protobuf or protofile must be set\n"; - exit(1); - } - if (fileName) { + + TOpts opts = TOpts::Default(); + opts.AddLongOption("protobuf", "string representation of the request protobuf").Optional().StoreResult(&Proto); + opts.AddLongOption("protofile", "file with protobuf").Optional().StoreResult(&fileName); + ConfigureBaseLastGetopt(opts); + + TOptsParseResult res(&opts, argc, argv); + if (fileName.empty() == Proto.empty()) { + Cerr << "one of protobuf or protofile must be set\n"; + exit(1); + } + if (fileName) { Proto = TUnbufferedFileInput(fileName).ReadAll(); - } - ConfigureMsgBusLastGetopt(res, argc, argv); -} - -} -} + } + ConfigureMsgBusLastGetopt(res, argc, argv); +} + +} +} diff --git a/ydb/core/driver_lib/cli_utils/cli_persqueue_stress.cpp b/ydb/core/driver_lib/cli_utils/cli_persqueue_stress.cpp index bb1f76c83c..39b6a0c07c 100644 --- a/ydb/core/driver_lib/cli_utils/cli_persqueue_stress.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_persqueue_stress.cpp @@ -1,204 +1,204 @@ -#include "cli.h" +#include "cli.h" #include <ydb/public/lib/deprecated/client/msgbus_client.h> #include <ydb/core/base/tablet_types.h> - -namespace NKikimr { -namespace NDriverClient { - -struct TCmdPersQueueConfig : public TCliCmdConfig { - bool IsWrite; + +namespace NKikimr { +namespace NDriverClient { + +struct TCmdPersQueueConfig : public TCliCmdConfig { + bool IsWrite; TString Topic; - ui32 Partition; - //write options + ui32 Partition; + //write options TString SourceId; - ui32 OneCmdMinSize; - ui32 OneCmdMaxSize; - //read options - ui64 StartOffset; - ui32 TimeRead; - ui32 TimeSleep; - //both commands options - ui32 BatchSize; - ui32 Speed; - TCmdPersQueueConfig(); - - void Parse(int argc, char **argv); -}; - -int PersQueueStress(TCommandConfig &cmdConf, int argc, char** argv) { - Y_UNUSED(cmdConf); - - TCmdPersQueueConfig config; - config.Parse(argc, argv); - - if (config.IsWrite) { - ui64 DoneBytes = 0; - TInstant timestamp = TInstant::Now(); - + ui32 OneCmdMinSize; + ui32 OneCmdMaxSize; + //read options + ui64 StartOffset; + ui32 TimeRead; + ui32 TimeSleep; + //both commands options + ui32 BatchSize; + ui32 Speed; + TCmdPersQueueConfig(); + + void Parse(int argc, char **argv); +}; + +int PersQueueStress(TCommandConfig &cmdConf, int argc, char** argv) { + Y_UNUSED(cmdConf); + + TCmdPersQueueConfig config; + config.Parse(argc, argv); + + if (config.IsWrite) { + ui64 DoneBytes = 0; + TInstant timestamp = TInstant::Now(); + TVector<ui32> buckets(3000); - + TString cookie; - { - TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); - auto pr = request->Record.MutablePartitionRequest(); - pr->SetTopic(config.Topic); - pr->SetPartition(config.Partition); - pr->MutableCmdGetOwnership(); - TAutoPtr<NBus::TBusMessage> reply; + { + TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); + auto pr = request->Record.MutablePartitionRequest(); + pr->SetTopic(config.Topic); + pr->SetPartition(config.Partition); + pr->MutableCmdGetOwnership(); + TAutoPtr<NBus::TBusMessage> reply; NBus::EMessageStatus status = config.SyncCall(request, reply); - Y_VERIFY(status == NBus::MESSAGE_OK); - const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; - Cerr << result.DebugString() << "\n"; - Y_VERIFY(result.GetStatus() == NMsgBusProxy::MSTATUS_OK); - Y_VERIFY(result.HasPartitionResponse()); - cookie = result.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); - Y_VERIFY(!cookie.empty()); - } - ui64 messageNo = 0; - - TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); - auto pr = request->Record.MutablePartitionRequest(); - pr->SetTopic(config.Topic); - pr->SetPartition(config.Partition); - pr->MutableCmdGetMaxSeqNo()->AddSourceId(config.SourceId); - TAutoPtr<NBus::TBusMessage> reply; + Y_VERIFY(status == NBus::MESSAGE_OK); + const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; + Cerr << result.DebugString() << "\n"; + Y_VERIFY(result.GetStatus() == NMsgBusProxy::MSTATUS_OK); + Y_VERIFY(result.HasPartitionResponse()); + cookie = result.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); + Y_VERIFY(!cookie.empty()); + } + ui64 messageNo = 0; + + TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); + auto pr = request->Record.MutablePartitionRequest(); + pr->SetTopic(config.Topic); + pr->SetPartition(config.Partition); + pr->MutableCmdGetMaxSeqNo()->AddSourceId(config.SourceId); + TAutoPtr<NBus::TBusMessage> reply; NBus::EMessageStatus status = config.SyncCall(request, reply); - Y_VERIFY(status == NBus::MESSAGE_OK); - const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; - Cerr << result.DebugString() << "\n"; - Y_VERIFY(result.GetStatus() == NMsgBusProxy::MSTATUS_OK); - Y_VERIFY(result.HasPartitionResponse()); - ui64 seqNo = result.GetPartitionResponse().GetCmdGetMaxSeqNoResult().GetSourceIdInfo(0).HasSeqNo() ? result.GetPartitionResponse().GetCmdGetMaxSeqNoResult().GetSourceIdInfo(0).GetSeqNo() : 0; - while (true) { - request.Reset(new NMsgBusProxy::TBusPersQueue); - auto pr = request->Record.MutablePartitionRequest(); - pr->SetTopic(config.Topic); - pr->SetPartition(config.Partition); - pr->SetOwnerCookie(cookie); - pr->SetMessageNo(messageNo++); - - ui32 totalSize = 0; - for (ui32 i = 0; i < config.BatchSize; ++i) { - auto write = pr->AddCmdWrite(); - write->SetSourceId(config.SourceId); - write->SetSeqNo(++seqNo); - ui32 size = config.OneCmdMinSize + rand() % (config.OneCmdMaxSize - config.OneCmdMinSize + 1); + Y_VERIFY(status == NBus::MESSAGE_OK); + const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; + Cerr << result.DebugString() << "\n"; + Y_VERIFY(result.GetStatus() == NMsgBusProxy::MSTATUS_OK); + Y_VERIFY(result.HasPartitionResponse()); + ui64 seqNo = result.GetPartitionResponse().GetCmdGetMaxSeqNoResult().GetSourceIdInfo(0).HasSeqNo() ? result.GetPartitionResponse().GetCmdGetMaxSeqNoResult().GetSourceIdInfo(0).GetSeqNo() : 0; + while (true) { + request.Reset(new NMsgBusProxy::TBusPersQueue); + auto pr = request->Record.MutablePartitionRequest(); + pr->SetTopic(config.Topic); + pr->SetPartition(config.Partition); + pr->SetOwnerCookie(cookie); + pr->SetMessageNo(messageNo++); + + ui32 totalSize = 0; + for (ui32 i = 0; i < config.BatchSize; ++i) { + auto write = pr->AddCmdWrite(); + write->SetSourceId(config.SourceId); + write->SetSeqNo(++seqNo); + ui32 size = config.OneCmdMinSize + rand() % (config.OneCmdMaxSize - config.OneCmdMinSize + 1); write->SetData(TString(size, 'a')); - totalSize += size; - } - TInstant tt = TInstant::Now(); + totalSize += size; + } + TInstant tt = TInstant::Now(); status = config.SyncCall(request, reply); - if (status != NBus::MESSAGE_OK) - Cerr << status << "\n"; - Y_VERIFY(status == NBus::MESSAGE_OK); - const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; - DoneBytes += totalSize; - ui32 i = (TInstant::Now() - tt).MilliSeconds() / 10; - if (i >= buckets.size()) i = buckets.size() - 1; - ++buckets[i]; - Cerr << "done upto " << seqNo << " " << DoneBytes/1024.0/1024.0 << " Mb speed " << DoneBytes / ((TInstant::Now() - timestamp).MilliSeconds() + 0.0) * 1000.0 /1024.0 /1024.0 << " Mb/s " - << (TInstant::Now() - tt).MilliSeconds() / 1000.0 << " seconds\n"; - if (seqNo % 100 == 0) { - for (i = 0; i < buckets.size(); ++i) { - Cout << buckets[i] << " "; - } - Cout << "\n"; - } - Cout.Flush(); - if (result.GetStatus() != NMsgBusProxy::MSTATUS_OK || result.GetErrorCode() != NPersQueue::NErrorCode::OK) { - Cerr << result.DebugString() << "\n"; - } - Y_VERIFY(result.GetStatus() == NMsgBusProxy::MSTATUS_OK); - Y_VERIFY(result.GetErrorCode() == NPersQueue::NErrorCode::OK); - if (config.Speed) { - TInstant T2 = timestamp + TDuration::MilliSeconds(DoneBytes * 1000 / 1024.0 / config.Speed); - Sleep(T2 - TInstant::Now()); - } - } - } else { - ui64 offset = config.StartOffset; - while (true) { - Sleep(TDuration::MilliSeconds(config.TimeSleep)); - TInstant timestamp = TInstant::Now(); - ui64 bytes = 0; - ui32 count = 0; - while((TInstant::Now() - timestamp).MilliSeconds() < config.TimeRead) { - TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); - auto pr = request->Record.MutablePartitionRequest(); - pr->SetTopic(config.Topic); - pr->SetPartition(config.Partition); - auto read = pr->MutableCmdRead(); - read->SetOffset(offset); - read->SetCount(config.BatchSize); - read->SetClientId("user"); - TAutoPtr<NBus::TBusMessage> reply; + if (status != NBus::MESSAGE_OK) + Cerr << status << "\n"; + Y_VERIFY(status == NBus::MESSAGE_OK); + const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; + DoneBytes += totalSize; + ui32 i = (TInstant::Now() - tt).MilliSeconds() / 10; + if (i >= buckets.size()) i = buckets.size() - 1; + ++buckets[i]; + Cerr << "done upto " << seqNo << " " << DoneBytes/1024.0/1024.0 << " Mb speed " << DoneBytes / ((TInstant::Now() - timestamp).MilliSeconds() + 0.0) * 1000.0 /1024.0 /1024.0 << " Mb/s " + << (TInstant::Now() - tt).MilliSeconds() / 1000.0 << " seconds\n"; + if (seqNo % 100 == 0) { + for (i = 0; i < buckets.size(); ++i) { + Cout << buckets[i] << " "; + } + Cout << "\n"; + } + Cout.Flush(); + if (result.GetStatus() != NMsgBusProxy::MSTATUS_OK || result.GetErrorCode() != NPersQueue::NErrorCode::OK) { + Cerr << result.DebugString() << "\n"; + } + Y_VERIFY(result.GetStatus() == NMsgBusProxy::MSTATUS_OK); + Y_VERIFY(result.GetErrorCode() == NPersQueue::NErrorCode::OK); + if (config.Speed) { + TInstant T2 = timestamp + TDuration::MilliSeconds(DoneBytes * 1000 / 1024.0 / config.Speed); + Sleep(T2 - TInstant::Now()); + } + } + } else { + ui64 offset = config.StartOffset; + while (true) { + Sleep(TDuration::MilliSeconds(config.TimeSleep)); + TInstant timestamp = TInstant::Now(); + ui64 bytes = 0; + ui32 count = 0; + while((TInstant::Now() - timestamp).MilliSeconds() < config.TimeRead) { + TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); + auto pr = request->Record.MutablePartitionRequest(); + pr->SetTopic(config.Topic); + pr->SetPartition(config.Partition); + auto read = pr->MutableCmdRead(); + read->SetOffset(offset); + read->SetCount(config.BatchSize); + read->SetClientId("user"); + TAutoPtr<NBus::TBusMessage> reply; NBus::EMessageStatus status = config.SyncCall(request, reply); - Y_VERIFY(status == NBus::MESSAGE_OK); - const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; - if (result.GetStatus() != NMsgBusProxy::MSTATUS_OK || result.GetErrorCode() != NPersQueue::NErrorCode::OK) { - Cerr << result.DebugString() << "\n"; - } - Y_VERIFY(result.GetStatus() == NMsgBusProxy::MSTATUS_OK); - Y_VERIFY(result.GetErrorCode() == NPersQueue::NErrorCode::OK); - const auto& rd = result.GetPartitionResponse().GetCmdReadResult(); - for (ui32 i = 0; i < rd.ResultSize(); ++i) { - bytes += rd.GetResult(i).ByteSize(); - count++; - offset = rd.GetResult(i).GetOffset() + 1; - } - request.Reset(new NMsgBusProxy::TBusPersQueue); - pr = request->Record.MutablePartitionRequest(); - pr->SetTopic(config.Topic); - pr->SetPartition(config.Partition); - auto set = pr->MutableCmdSetClientOffset(); - set->SetOffset(offset); - set->SetClientId("user"); - + Y_VERIFY(status == NBus::MESSAGE_OK); + const auto& result = static_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; + if (result.GetStatus() != NMsgBusProxy::MSTATUS_OK || result.GetErrorCode() != NPersQueue::NErrorCode::OK) { + Cerr << result.DebugString() << "\n"; + } + Y_VERIFY(result.GetStatus() == NMsgBusProxy::MSTATUS_OK); + Y_VERIFY(result.GetErrorCode() == NPersQueue::NErrorCode::OK); + const auto& rd = result.GetPartitionResponse().GetCmdReadResult(); + for (ui32 i = 0; i < rd.ResultSize(); ++i) { + bytes += rd.GetResult(i).ByteSize(); + count++; + offset = rd.GetResult(i).GetOffset() + 1; + } + request.Reset(new NMsgBusProxy::TBusPersQueue); + pr = request->Record.MutablePartitionRequest(); + pr->SetTopic(config.Topic); + pr->SetPartition(config.Partition); + auto set = pr->MutableCmdSetClientOffset(); + set->SetOffset(offset); + set->SetClientId("user"); + status = config.SyncCall(request, reply); - Y_VERIFY(status == NBus::MESSAGE_OK); - if (config.Speed) { - TInstant T2 = timestamp + TDuration::MilliSeconds(bytes * 1000 / 1024.0 / config.Speed); - Sleep(T2 - TInstant::Now()); - } - Cerr << "offset " << offset << " from cache " << rd.GetBlobsFromCache() << " from disk " << rd.GetBlobsFromDisk() << " read done " << count << " size " << bytes << " speed " << bytes * 1000.0 / ((TInstant::Now() - timestamp).MilliSeconds() + 0.0) / 1024.0/1024.0 << " Mb/s\n"; - } - } - - } - return 0; -} - -TCmdPersQueueConfig::TCmdPersQueueConfig() -{} - -void TCmdPersQueueConfig::Parse(int argc, char **argv) { - using namespace NLastGetopt; - - TOpts opts = TOpts::Default(); - + Y_VERIFY(status == NBus::MESSAGE_OK); + if (config.Speed) { + TInstant T2 = timestamp + TDuration::MilliSeconds(bytes * 1000 / 1024.0 / config.Speed); + Sleep(T2 - TInstant::Now()); + } + Cerr << "offset " << offset << " from cache " << rd.GetBlobsFromCache() << " from disk " << rd.GetBlobsFromDisk() << " read done " << count << " size " << bytes << " speed " << bytes * 1000.0 / ((TInstant::Now() - timestamp).MilliSeconds() + 0.0) / 1024.0/1024.0 << " Mb/s\n"; + } + } + + } + return 0; +} + +TCmdPersQueueConfig::TCmdPersQueueConfig() +{} + +void TCmdPersQueueConfig::Parse(int argc, char **argv) { + using namespace NLastGetopt; + + TOpts opts = TOpts::Default(); + TString command; - opts.AddLongOption('c', "command", "type of action").Required().RequiredArgument("[write|read]").StoreResult(&command); - opts.AddLongOption("topic", "topic").Required().RequiredArgument("STR").StoreResult(&Topic); - opts.AddLongOption("partition", "partition").Required().RequiredArgument("NUM").StoreResult(&Partition); - - opts.AddLongOption("sourceid", "sourceId").Optional().RequiredArgument("STR").StoreResult(&SourceId); - opts.AddLongOption('b', "batchsize", "batch size").Optional().RequiredArgument("NUM").StoreResult(&BatchSize); - opts.AddLongOption('m', "minsize", "msg min size").Optional().RequiredArgument("NUM").StoreResult(&OneCmdMinSize); - opts.AddLongOption('M', "maxsize", "msg max size").Optional().RequiredArgument("NUM").StoreResult(&OneCmdMaxSize); - - opts.AddLongOption('o', "offset", "StartOffset").Optional().RequiredArgument("NUM").DefaultValue("0").StoreResult(&StartOffset); - opts.AddLongOption('R', "readtime", "read time, ms").Optional().RequiredArgument("NUM").DefaultValue("1000").StoreResult(&TimeRead); - opts.AddLongOption('S', "sleetime", "sleep time before reads, ms").Optional().RequiredArgument("NUM").DefaultValue("0").StoreResult(&TimeSleep); - - opts.AddLongOption('B', "speed", "speed of read/write, Kb/s").Optional().RequiredArgument("NUM").DefaultValue("0").StoreResult(&Speed); - - - ConfigureBaseLastGetopt(opts); - - TOptsParseResult res(&opts, argc, argv); - IsWrite = command == "write"; - ConfigureMsgBusLastGetopt(res, argc, argv); -} - -} -} + opts.AddLongOption('c', "command", "type of action").Required().RequiredArgument("[write|read]").StoreResult(&command); + opts.AddLongOption("topic", "topic").Required().RequiredArgument("STR").StoreResult(&Topic); + opts.AddLongOption("partition", "partition").Required().RequiredArgument("NUM").StoreResult(&Partition); + + opts.AddLongOption("sourceid", "sourceId").Optional().RequiredArgument("STR").StoreResult(&SourceId); + opts.AddLongOption('b', "batchsize", "batch size").Optional().RequiredArgument("NUM").StoreResult(&BatchSize); + opts.AddLongOption('m', "minsize", "msg min size").Optional().RequiredArgument("NUM").StoreResult(&OneCmdMinSize); + opts.AddLongOption('M', "maxsize", "msg max size").Optional().RequiredArgument("NUM").StoreResult(&OneCmdMaxSize); + + opts.AddLongOption('o', "offset", "StartOffset").Optional().RequiredArgument("NUM").DefaultValue("0").StoreResult(&StartOffset); + opts.AddLongOption('R', "readtime", "read time, ms").Optional().RequiredArgument("NUM").DefaultValue("1000").StoreResult(&TimeRead); + opts.AddLongOption('S', "sleetime", "sleep time before reads, ms").Optional().RequiredArgument("NUM").DefaultValue("0").StoreResult(&TimeSleep); + + opts.AddLongOption('B', "speed", "speed of read/write, Kb/s").Optional().RequiredArgument("NUM").DefaultValue("0").StoreResult(&Speed); + + + ConfigureBaseLastGetopt(opts); + + TOptsParseResult res(&opts, argc, argv); + IsWrite = command == "write"; + ConfigureMsgBusLastGetopt(res, argc, argv); +} + +} +} diff --git a/ydb/core/driver_lib/cli_utils/ya.make b/ydb/core/driver_lib/cli_utils/ya.make index 8f46950ebe..cb1f9670be 100644 --- a/ydb/core/driver_lib/cli_utils/ya.make +++ b/ydb/core/driver_lib/cli_utils/ya.make @@ -26,9 +26,9 @@ SRCS( cli_cmds_tenant.cpp cli_fakeinitshard.cpp cli_keyvalue.cpp - cli_persqueue.cpp + cli_persqueue.cpp cli_persqueue_cluster_discovery.cpp - cli_persqueue_stress.cpp + cli_persqueue_stress.cpp cli_load.cpp cli_minikql_compile_and_exec.cpp cli_mb_trace.cpp diff --git a/ydb/core/driver_lib/run/config.h b/ydb/core/driver_lib/run/config.h index 08a23d1184..faf1797413 100644 --- a/ydb/core/driver_lib/run/config.h +++ b/ydb/core/driver_lib/run/config.h @@ -46,7 +46,7 @@ union TBasicKikimrServicesMask { bool EnableNodeIdentifier:1; bool EnableCms:1; bool EnableNodeTable:1; - bool EnableGRpcProxyStatus:1; + bool EnableGRpcProxyStatus:1; bool EnablePQ:1; bool EnableSqs:1; bool EnableConfigsDispatcher:1; diff --git a/ydb/core/driver_lib/run/driver.h b/ydb/core/driver_lib/run/driver.h index fba56e60eb..7ad96718d1 100644 --- a/ydb/core/driver_lib/run/driver.h +++ b/ydb/core/driver_lib/run/driver.h @@ -32,13 +32,13 @@ namespace NKikimr { XX(EDM_COMPILE_AND_EXEC_MINIKQL, "minikql-exec", "compile and execute MiniKQL program") \ XX(EDM_TRACE, "mb-trace", "control message bus trace") \ XX(EDM_KEYVALUE_REQUEST, "keyvalue-request", "send protobuf request to a keyvalue tablet") \ - XX(EDM_PERSQUEUE_REQUEST, "persqueue-request", "send protobuf request to a persqueue tablet") \ - XX(EDM_PERSQUEUE_STRESS, "persqueue-stress", "stress read or write to a persqueue tablet") \ + XX(EDM_PERSQUEUE_REQUEST, "persqueue-request", "send protobuf request to a persqueue tablet") \ + XX(EDM_PERSQUEUE_STRESS, "persqueue-stress", "stress read or write to a persqueue tablet") \ XX(EDM_PERSQUEUE_DISCOVER_CLUSTERS, "persqueue-discover-clusters", "persqueue session clusters discovery") \ XX(EDM_LOAD_REQUEST, "bs-load-test", "send protobuf request to blobstorage test load actor (https://wiki.yandex-team.ru/kikimr/developers/BSLoadTest/)") \ XX(EDM_ACTORSYS_PERFTEST, "actorsys-perf-test", "make actorsystem performance test") \ - + CLI_MODES_IMPL(EDriverMode, EDM_NO, MODE_MAP); } diff --git a/ydb/core/driver_lib/run/factories.h b/ydb/core/driver_lib/run/factories.h index 1c76fd0b9a..41a45b44e0 100644 --- a/ydb/core/driver_lib/run/factories.h +++ b/ydb/core/driver_lib/run/factories.h @@ -49,11 +49,11 @@ struct TModuleFactories { NKikimr::TYdbCredentialsProviderFactory YdbCredentialProviderFactory; // Factory for grpc services TGrpcServiceFactory GrpcServiceFactory; - + std::shared_ptr<NPQ::IPersQueueMirrorReaderFactory> PersQueueMirrorReaderFactory; /// Factory for pdisk's aio engines std::shared_ptr<NPDisk::IIoContextFactory> IoContextFactory; - + std::function<NActors::TMon* (NActors::TMon::TConfig)> MonitoringFactory; std::shared_ptr<NSQS::IAuthFactory> SqsAuthFactory; diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index 238c767e71..819c1478d1 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -906,10 +906,10 @@ void TLocalServiceInitializer::InitializeServices( new TTabletSetupInfo(&CreateDataShard, TMailboxType::ReadAsFilled, appData->UserPoolId, TMailboxType::ReadAsFilled, appData->SystemPoolId)); localConfig->TabletClassInfo[appData->DefaultTabletTypes.KeyValue] = TLocalConfig::TTabletClassInfo( new TTabletSetupInfo(&CreateKeyValueFlat, TMailboxType::ReadAsFilled, appData->UserPoolId, TMailboxType::ReadAsFilled, appData->SystemPoolId)); - localConfig->TabletClassInfo[appData->DefaultTabletTypes.PersQueue] = TLocalConfig::TTabletClassInfo( + localConfig->TabletClassInfo[appData->DefaultTabletTypes.PersQueue] = TLocalConfig::TTabletClassInfo( new TTabletSetupInfo(&CreatePersQueue, TMailboxType::ReadAsFilled, appData->UserPoolId, TMailboxType::ReadAsFilled, appData->SystemPoolId)); - localConfig->TabletClassInfo[appData->DefaultTabletTypes.PersQueueReadBalancer] = TLocalConfig::TTabletClassInfo( - new TTabletSetupInfo(&CreatePersQueueReadBalancer, TMailboxType::ReadAsFilled, appData->UserPoolId, TMailboxType::ReadAsFilled, appData->SystemPoolId)); + localConfig->TabletClassInfo[appData->DefaultTabletTypes.PersQueueReadBalancer] = TLocalConfig::TTabletClassInfo( + new TTabletSetupInfo(&CreatePersQueueReadBalancer, TMailboxType::ReadAsFilled, appData->UserPoolId, TMailboxType::ReadAsFilled, appData->SystemPoolId)); localConfig->TabletClassInfo[appData->DefaultTabletTypes.Coordinator] = TLocalConfig::TTabletClassInfo( new TTabletSetupInfo(&CreateFlatTxCoordinator, TMailboxType::Revolving, importantPoolId, TMailboxType::ReadAsFilled, appData->SystemPoolId)); localConfig->TabletClassInfo[appData->DefaultTabletTypes.Mediator] = TLocalConfig::TTabletClassInfo( @@ -1224,21 +1224,21 @@ void TTabletCountersAggregatorInitializer::InitializeServices( } } -//TGRpcProxyStatusInitializer - -TGRpcProxyStatusInitializer::TGRpcProxyStatusInitializer(const TKikimrRunConfig& runConfig) - : IKikimrServicesInitializer(runConfig) { -} - -void TGRpcProxyStatusInitializer::InitializeServices( - NActors::TActorSystemSetup* setup, - const NKikimr::TAppData* appData) { - TActorSetupCmd gRpcProxyStatusSetup(CreateGRpcProxyStatus(), TMailboxType::ReadAsFilled, appData->UserPoolId); +//TGRpcProxyStatusInitializer + +TGRpcProxyStatusInitializer::TGRpcProxyStatusInitializer(const TKikimrRunConfig& runConfig) + : IKikimrServicesInitializer(runConfig) { +} + +void TGRpcProxyStatusInitializer::InitializeServices( + NActors::TActorSystemSetup* setup, + const NKikimr::TAppData* appData) { + TActorSetupCmd gRpcProxyStatusSetup(CreateGRpcProxyStatus(), TMailboxType::ReadAsFilled, appData->UserPoolId); setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>(MakeGRpcProxyStatusID(NodeId), gRpcProxyStatusSetup)); - -} - - + +} + + // This code is shared between default kikimr bootstrapper and alternative bootstrapper static TIntrusivePtr<TTabletSetupInfo> CreateTablet( @@ -1816,14 +1816,14 @@ void TPersQueueL2CacheInitializer::InitializeServices(NActors::TActorSystemSetup } } - if (Config.HasPQConfig() && Config.GetPQConfig().HasPersQueueNodeConfig()) { - auto cfg = Config.GetPQConfig().GetPersQueueNodeConfig(); - if (cfg.HasSharedCacheSizeMb()) - params.MaxSizeMB = cfg.GetSharedCacheSizeMb(); - if (cfg.HasCacheKeepTimeSec()) - params.KeepTime = TDuration::Seconds(cfg.GetCacheKeepTimeSec()); - } - + if (Config.HasPQConfig() && Config.GetPQConfig().HasPersQueueNodeConfig()) { + auto cfg = Config.GetPQConfig().GetPersQueueNodeConfig(); + if (cfg.HasSharedCacheSizeMb()) + params.MaxSizeMB = cfg.GetSharedCacheSizeMb(); + if (cfg.HasCacheKeepTimeSec()) + params.KeepTime = TDuration::Seconds(cfg.GetCacheKeepTimeSec()); + } + TIntrusivePtr<NMonitoring::TDynamicCounters> tabletGroup = GetServiceCounters(appData->Counters, "tablets"); TIntrusivePtr<NMonitoring::TDynamicCounters> pqCacheGroup = tabletGroup->GetSubgroup("type", "PQ_CACHE"); @@ -1866,12 +1866,12 @@ TPersQueueLibSharedInstanceInitializer::TPersQueueLibSharedInstanceInitializer(c : IKikimrServicesInitializer(runConfig) {} -void TPersQueueLibSharedInstanceInitializer::InitializeServices(NActors::TActorSystemSetup*, const NKikimr::TAppData* appData) { +void TPersQueueLibSharedInstanceInitializer::InitializeServices(NActors::TActorSystemSetup*, const NKikimr::TAppData* appData) { if (Config.HasPQConfig() && Config.GetPQConfig().GetEnabled()) { if (Config.GetPQConfig().GetMirrorConfig().GetEnabled()) { if (appData->PersQueueMirrorReaderFactory) { appData->PersQueueMirrorReaderFactory->Initialize(Config.GetPQConfig().GetMirrorConfig().GetPQLibSettings()); - } + } } } } diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.h b/ydb/core/driver_lib/run/kikimr_services_initializers.h index 6dbf3739f5..407ce1bb7b 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.h +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.h @@ -161,12 +161,12 @@ public: }; class TGRpcProxyStatusInitializer : public IKikimrServicesInitializer { -public: - TGRpcProxyStatusInitializer(const TKikimrRunConfig& runConfig); - +public: + TGRpcProxyStatusInitializer(const TKikimrRunConfig& runConfig); + void InitializeServices(NActors::TActorSystemSetup *setup, const NKikimr::TAppData *appData) override; -}; - +}; + class TRestartsCountPublisher : public IKikimrServicesInitializer { static void PublishRestartsCount(const NMonitoring::TDynamicCounters::TCounterPtr& counter, const TString& restartsCountFile); diff --git a/ydb/core/driver_lib/run/main.cpp b/ydb/core/driver_lib/run/main.cpp index 90a2c39cd6..f0a9315685 100644 --- a/ydb/core/driver_lib/run/main.cpp +++ b/ydb/core/driver_lib/run/main.cpp @@ -134,10 +134,10 @@ int MainRun(const TKikimrRunConfig& runConfig, std::shared_ptr<TModuleFactories> return NDriverClient::MessageBusTrace(cmdConf, argc, argv); case EDM_KEYVALUE_REQUEST: return NDriverClient::KeyValueRequest(cmdConf, argc, argv); - case EDM_PERSQUEUE_REQUEST: - return NDriverClient::PersQueueRequest(cmdConf, argc, argv); - case EDM_PERSQUEUE_STRESS: - return NDriverClient::PersQueueStress(cmdConf, argc, argv); + case EDM_PERSQUEUE_REQUEST: + return NDriverClient::PersQueueRequest(cmdConf, argc, argv); + case EDM_PERSQUEUE_STRESS: + return NDriverClient::PersQueueStress(cmdConf, argc, argv); case EDM_PERSQUEUE_DISCOVER_CLUSTERS: return NDriverClient::PersQueueDiscoverClustersRequest(cmdConf, argc, argv); case EDM_LOAD_REQUEST: diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index 8e625cdcba..a4f74aa4e0 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -511,7 +511,7 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) { names["yql_internal"] = &hasYqlInternal; bool hasPQ = services.empty(); names["pq"] = &hasPQ; - bool hasPQv1 = false; + bool hasPQv1 = false; names["pqv1"] = &hasPQv1; bool hasPQCD = false; names["pqcd"] = &hasPQCD; @@ -673,10 +673,10 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) { server.AddService(new NKesus::TKesusGRpcService(ActorSystem.Get(), Counters, grpcRequestProxyId)); } - if (hasPQv1) { + if (hasPQv1) { server.AddService(new NGRpcService::V1::TGRpcPersQueueService(ActorSystem.Get(), Counters, NMsgBusProxy::CreatePersQueueMetaCacheV2Id(), grpcRequestProxyId)); - } - + } + if (hasPQCD) { // the service has its own flag since it should be capable of using custom grpc port const auto& pqcdConfig = AppData->PQClusterDiscoveryConfig; @@ -854,7 +854,7 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig) AppData->SqsEventsWriterFactory = ModuleFactories ? ModuleFactories->SqsEventsWriterFactory.get() : nullptr; AppData->PersQueueMirrorReaderFactory = ModuleFactories ? ModuleFactories->PersQueueMirrorReaderFactory.get() : nullptr; AppData->IoContextFactory = ModuleFactories ? ModuleFactories->IoContextFactory.get() : nullptr; - + AppData->SqsAuthFactory = ModuleFactories ? ModuleFactories->SqsAuthFactory.get() : nullptr; @@ -1190,9 +1190,9 @@ TIntrusivePtr<TServiceInitializersList> TKikimrRunner::CreateServiceInitializers if (serviceMask.EnableTabletCountersAggregator) { sil->AddServiceInitializer(new TTabletCountersAggregatorInitializer(runConfig)); } - if (serviceMask.EnableGRpcProxyStatus) { - sil->AddServiceInitializer(new TGRpcProxyStatusInitializer(runConfig)); - } + if (serviceMask.EnableGRpcProxyStatus) { + sil->AddServiceInitializer(new TGRpcProxyStatusInitializer(runConfig)); + } if (serviceMask.EnableRestartsCountPublisher) { sil->AddServiceInitializer(new TRestartsCountPublisher(runConfig)); } diff --git a/ydb/core/erasure/erasure.cpp b/ydb/core/erasure/erasure.cpp index d0201b5dd2..a41b027932 100644 --- a/ydb/core/erasure/erasure.cpp +++ b/ydb/core/erasure/erasure.cpp @@ -1,38 +1,38 @@ -#include "erasure.h" - -#include <util/generic/yexception.h> +#include "erasure.h" + +#include <util/generic/yexception.h> #include <util/system/unaligned_mem.h> #include <library/cpp/containers/stack_vector/stack_vec.h> #include <library/cpp/digest/crc32c/crc32c.h> - -#define MAX_TOTAL_PARTS 8 + +#define MAX_TOTAL_PARTS 8 #define MAX_LINES_IN_BLOCK 8 - -#define IS_VERBOSE 0 + +#define IS_VERBOSE 0 #define IS_TRACE 0 - -#if IS_VERBOSE -# include <util/stream/str.h> -# define VERBOSE_COUT(a) \ + +#if IS_VERBOSE +# include <util/stream/str.h> +# define VERBOSE_COUT(a) \ Cerr << a - + static TString DebugFormatBits(ui64 value) { - TStringStream s; - for (size_t i = 7; i >=4; --i) { - s << ((value >> i) & 1); - } - s << "_"; - for (size_t i = 3; i <= 3; --i) { - s << ((value >> i) & 1); - } - return s.Str(); -} -#else + TStringStream s; + for (size_t i = 7; i >=4; --i) { + s << ((value >> i) & 1); + } + s << "_"; + for (size_t i = 3; i <= 3; --i) { + s << ((value >> i) & 1); + } + return s.Str(); +} +#else # define VERBOSE_COUT(a) \ do { \ } while (false) -#endif - +#endif + #if IS_TRACE # define TRACE(a) \ Cerr << a @@ -42,8 +42,8 @@ static TString DebugFormatBits(ui64 value) { } while (false) #endif -namespace NKikimr { - +namespace NKikimr { + static void Refurbish(TString &str, ui64 size) { if (str.size() != size) { str = TString::Uninitialized(size); @@ -82,13 +82,13 @@ const char *TErasureType::ErasureSpeciesToStr(TErasureType::EErasureSpecies es) } } -struct TErasureParameters { +struct TErasureParameters { TErasureType::EErasureFamily ErasureFamily; - ui32 DataParts; // for parity - number of data parts, for mirror - 1 - ui32 ParityParts; // for parity - number of parity parts (1 | 2 | 3), for mirror - number of additional copies - ui32 Prime; // for parity - smallest prime number >= DataParts, for mirror - 1 -}; - + ui32 DataParts; // for parity - number of data parts, for mirror - 1 + ui32 ParityParts; // for parity - number of parity parts (1 | 2 | 3), for mirror - number of additional copies + ui32 Prime; // for parity - smallest prime number >= DataParts, for mirror - 1 +}; + static const std::array<TErasureParameters, TErasureType::ErasureSpeciesCount> ErasureSpeciesParameters{{ {TErasureType::ErasureMirror, 1, 0, 1} // 0 = ErasureSpicies::ErasureNone ,{TErasureType::ErasureMirror, 1, 2, 1} // 1 = ErasureSpicies::ErasureMirror3 @@ -110,7 +110,7 @@ static const std::array<TErasureParameters, TErasureType::ErasureSpeciesCount> E ,{TErasureType::ErasureParityStripe, 2, 2, 3} // 17 = ErasureSpicies::Erasure2Plus2Stripe ,{TErasureType::ErasureMirror, 1, 2, 1} // 18 = ErasureSpicies::ErasureMirror3of4 }}; - + void PadAndCrcAtTheEnd(char *data, ui64 dataSize, ui64 bufferSize) { ui64 marginSize = bufferSize - dataSize - sizeof(ui32); if (marginSize) { @@ -119,7 +119,7 @@ void PadAndCrcAtTheEnd(char *data, ui64 dataSize, ui64 bufferSize) { ui32 hash = Crc32c(data, dataSize); memcpy(data + bufferSize - sizeof(ui32), &hash, sizeof(ui32)); } - + bool CheckCrcAtTheEnd(TErasureType::ECrcMode crcMode, const TString& buf) { switch (crcMode) { case TErasureType::CrcModeNone: @@ -138,78 +138,78 @@ bool CheckCrcAtTheEnd(TErasureType::ECrcMode crcMode, const TString& buf) { ythrow TWithBackTrace<yexception>() << "Unknown crcMode = " << (i32)crcMode; } -class TBlockParams { -public: - ui64 DataSize; +class TBlockParams { +public: + ui64 DataSize; ui64 PartUserSize; ui64 PartContainerSize; - ui32 DataParts; - + ui32 DataParts; + ui64 BlockSize; // Whole data is split into blocks of BlockSize bytes ui64 ColumnSize; // Each block consists of DataParts columns, each containing ColumnSize bytes - ui64 WholeColumns; - ui64 LineCount; - - ui64 SmallPartColumns; - ui64 LargePartColumns; - - ui64 LastPartTailSize; - - ui64 SmallPartSize; - ui64 LargePartSize; - - ui32 FirstSmallPartIdx; - - ui32 TotalParts; + ui64 WholeColumns; + ui64 LineCount; + + ui64 SmallPartColumns; + ui64 LargePartColumns; + + ui64 LastPartTailSize; + + ui64 SmallPartSize; + ui64 LargePartSize; + + ui32 FirstSmallPartIdx; + + ui32 TotalParts; ui64 WholeBlocks; // Data consists of (WholeBlocks * BlockSize + TailSize) bytes - ui32 TailSize; - - ui32 Prime; + ui32 TailSize; + + ui32 Prime; TErasureType::ECrcMode CrcMode; - + using TBufferDataPart = TStackVec<ui64*, MAX_TOTAL_PARTS>; TBufferDataPart BufferDataPart; - char *Data; - + char *Data; + // Maximum blocks to be split during one run in incremental mode static constexpr ui64 IncrementalSplitMaxBlocks = 1024; TBlockParams(TErasureType::ECrcMode crcMode, const TErasureType &type, ui64 dataSize) { - DataSize = dataSize; + DataSize = dataSize; PartUserSize = type.PartUserSize(dataSize); PartContainerSize = type.PartSize(crcMode, dataSize); - DataParts = type.DataParts(); - - BlockSize = type.MinimalBlockSize(); - ColumnSize = BlockSize / DataParts; - WholeColumns = dataSize / ColumnSize; - LineCount = ColumnSize / sizeof(ui64); - - SmallPartColumns = WholeColumns / DataParts; - LargePartColumns = SmallPartColumns + 1; - - LastPartTailSize = DataSize - WholeColumns * ColumnSize; - - SmallPartSize = SmallPartColumns * ColumnSize; - LargePartSize = LargePartColumns * ColumnSize; - - FirstSmallPartIdx = WholeColumns % DataParts; - - TotalParts = type.TotalPartCount(); - WholeBlocks = DataSize / BlockSize; - TailSize = (ui32)(DataSize % BlockSize); - - Prime = type.Prime(); + DataParts = type.DataParts(); + + BlockSize = type.MinimalBlockSize(); + ColumnSize = BlockSize / DataParts; + WholeColumns = dataSize / ColumnSize; + LineCount = ColumnSize / sizeof(ui64); + + SmallPartColumns = WholeColumns / DataParts; + LargePartColumns = SmallPartColumns + 1; + + LastPartTailSize = DataSize - WholeColumns * ColumnSize; + + SmallPartSize = SmallPartColumns * ColumnSize; + LargePartSize = LargePartColumns * ColumnSize; + + FirstSmallPartIdx = WholeColumns % DataParts; + + TotalParts = type.TotalPartCount(); + WholeBlocks = DataSize / BlockSize; + TailSize = (ui32)(DataSize % BlockSize); + + Prime = type.Prime(); CrcMode = crcMode; Data = nullptr; - } - - template <bool isStripe> - void PrepareInputDataPointers(char* data) { - if (isStripe) { - Data = data; - } else { + } + + template <bool isStripe> + void PrepareInputDataPointers(char* data) { + if (isStripe) { + Data = data; + } else { // // All data is a matrix. Matrix cell is ColumnSize bytes. // Each part is a matrix column (continuous in memory). @@ -227,81 +227,81 @@ public: // BufferDataPart.resize(DataParts); - for (ui32 i = 0; i < FirstSmallPartIdx; ++i) { - BufferDataPart[i] = (ui64*)(data + i * LargePartSize); - } - for (ui32 i = FirstSmallPartIdx; i < DataParts; ++i) { - BufferDataPart[i] = (ui64*)(data + FirstSmallPartIdx * LargePartSize + - (i - FirstSmallPartIdx) * SmallPartSize); - } - } - } - - template <bool isStripe> + for (ui32 i = 0; i < FirstSmallPartIdx; ++i) { + BufferDataPart[i] = (ui64*)(data + i * LargePartSize); + } + for (ui32 i = FirstSmallPartIdx; i < DataParts; ++i) { + BufferDataPart[i] = (ui64*)(data + FirstSmallPartIdx * LargePartSize + + (i - FirstSmallPartIdx) * SmallPartSize); + } + } + } + + template <bool isStripe> void XorSplitWhole(char* data, TBufferDataPart &bufferDataPart, TDataPartSet &outPartSet, ui64 writePosition, ui32 blocks) { - ui64 readPosition = 0; - VERBOSE_COUT("XorSplitWhole:" << Endl); - for (ui64 blockIdx = 0; blockIdx < blocks; ++blockIdx) { - for (ui64 lineIdx = 0; lineIdx < LineCount; ++lineIdx) { - ui64 xored = 0; - for (ui32 part = 0; part < DataParts; ++part) { - ui64 sourceData; - if (isStripe) { - sourceData = *((ui64*)data + readPosition + part); - } else { - sourceData = bufferDataPart[part][readPosition]; - } - xored ^= sourceData; - VERBOSE_COUT(DebugFormatBits(sourceData) << ", "); + ui64 readPosition = 0; + VERBOSE_COUT("XorSplitWhole:" << Endl); + for (ui64 blockIdx = 0; blockIdx < blocks; ++blockIdx) { + for (ui64 lineIdx = 0; lineIdx < LineCount; ++lineIdx) { + ui64 xored = 0; + for (ui32 part = 0; part < DataParts; ++part) { + ui64 sourceData; + if (isStripe) { + sourceData = *((ui64*)data + readPosition + part); + } else { + sourceData = bufferDataPart[part][readPosition]; + } + xored ^= sourceData; + VERBOSE_COUT(DebugFormatBits(sourceData) << ", "); *(ui64*)(outPartSet.Parts[part].GetDataAt(writePosition)) = sourceData; - } + } *(ui64*)(outPartSet.Parts[DataParts].GetDataAt(writePosition)) = xored; - VERBOSE_COUT(DebugFormatBits(xored) << Endl); - writePosition += sizeof(ui64); - if (isStripe) { - readPosition += DataParts; - } else { - ++readPosition; - } - } - } - VERBOSE_COUT(Endl); - } - - template <bool isStripe> - void XorSplit(TDataPartSet &outPartSet) { - VERBOSE_COUT("XorSplit:" << Endl); - // Write data and parity + VERBOSE_COUT(DebugFormatBits(xored) << Endl); + writePosition += sizeof(ui64); + if (isStripe) { + readPosition += DataParts; + } else { + ++readPosition; + } + } + } + VERBOSE_COUT(Endl); + } + + template <bool isStripe> + void XorSplit(TDataPartSet &outPartSet) { + VERBOSE_COUT("XorSplit:" << Endl); + // Write data and parity XorSplitWhole<isStripe>(Data, BufferDataPart, outPartSet, 0ull, WholeBlocks); - - // Use the remaining parts to fill in the last block - // Write the tail of the data - if (TailSize) { + + // Use the remaining parts to fill in the last block + // Write the tail of the data + if (TailSize) { char lastBlockSource[MAX_TOTAL_PARTS * (MAX_TOTAL_PARTS - 2) * sizeof(ui64)] = {}; TBufferDataPart bufferDataPart; - PrepareLastBlockData<isStripe>(lastBlockSource, bufferDataPart); - - XorSplitWhole<isStripe>(lastBlockSource, bufferDataPart, outPartSet, WholeBlocks * ColumnSize, 1); - } - } - -#if IS_VERBOSE + PrepareLastBlockData<isStripe>(lastBlockSource, bufferDataPart); + + XorSplitWhole<isStripe>(lastBlockSource, bufferDataPart, outPartSet, WholeBlocks * ColumnSize, 1); + } + } + +#if IS_VERBOSE # define VERBOSE_COUT_BLOCK(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1) \ - do { \ - for (ui32 row = 0; row < LineCount; ++row) { \ - VERBOSE_COUT(Endl); \ - for (ui32 col = 0; col < DataParts; ++col) { \ - if (IS_FULL_DATA) { \ - VERBOSE_COUT(DebugFormatBits(FULL_DATA_ELEM(row, col)) << ", "); \ - } else { \ - VERBOSE_COUT(DebugFormatBits(PART_ELEM(row, col)) << ", "); \ - } \ - } \ - VERBOSE_COUT(DebugFormatBits(COL_M(row)) << ", "); \ - VERBOSE_COUT(DebugFormatBits(COL_M1(row))); \ - } \ - VERBOSE_COUT(Endl); \ - } while (false) + do { \ + for (ui32 row = 0; row < LineCount; ++row) { \ + VERBOSE_COUT(Endl); \ + for (ui32 col = 0; col < DataParts; ++col) { \ + if (IS_FULL_DATA) { \ + VERBOSE_COUT(DebugFormatBits(FULL_DATA_ELEM(row, col)) << ", "); \ + } else { \ + VERBOSE_COUT(DebugFormatBits(PART_ELEM(row, col)) << ", "); \ + } \ + } \ + VERBOSE_COUT(DebugFormatBits(COL_M(row)) << ", "); \ + VERBOSE_COUT(DebugFormatBits(COL_M1(row))); \ + } \ + VERBOSE_COUT(Endl); \ + } while (false) # define VERBOSE_COUT_BLOCK_M2(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1, COL_M2) \ do { \ for (ui32 row = 0; row < LineCount; ++row) { \ @@ -319,15 +319,15 @@ public: } \ VERBOSE_COUT(Endl); \ } while (false) -#else +#else # define VERBOSE_COUT_BLOCK(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1) \ do { \ } while (false) # define VERBOSE_COUT_BLOCK_M2(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1, COL_M2) \ do { \ } while (false) -#endif - +#endif + template <bool isStripe, bool isFromDataParts> void EoSplitWhole(char *data, TBufferDataPart &bufferDataPart, TDataPartSet &outPartSet, ui64 writePosition, ui64 firstBlock, ui64 lastBlock) { @@ -335,8 +335,8 @@ public: const ui32 m = Prime; for (ui64 blockIdx = firstBlock; blockIdx != lastBlock; ++blockIdx) { -#define IN_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) -#define IN_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] +#define IN_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) +#define IN_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] #define OUT_EL(row, column) *((ui64*)(outPartSet.Parts[column].GetDataAt(writePosition + (row) * sizeof(ui64)))) #define OUT_M(row) *((ui64*)(outPartSet.Parts[DataParts].GetDataAt(writePosition + (row) *sizeof(ui64)))) #define OUT_M1(row) *((ui64*)(outPartSet.Parts[lastPartIdx].GetDataAt(writePosition + (row) * sizeof(ui64)))) @@ -344,11 +344,11 @@ public: OUT_EL((row), (column)) :\ (isStripe ? IN_EL_STRIPE((row), (column)) : IN_EL_BLOCK((row), (column)))) - if (isStripe) { - VERBOSE_COUT_BLOCK(true, IN_EL_STRIPE, IN_EL_STRIPE, OUT_M, OUT_M1); - } else { - VERBOSE_COUT_BLOCK(true, IN_EL_BLOCK, IN_EL_BLOCK, OUT_M, OUT_M1); - } + if (isStripe) { + VERBOSE_COUT_BLOCK(true, IN_EL_STRIPE, IN_EL_STRIPE, OUT_M, OUT_M1); + } else { + VERBOSE_COUT_BLOCK(true, IN_EL_BLOCK, IN_EL_BLOCK, OUT_M, OUT_M1); + } ui64 adj = 0; const ui32 mint = (m - 2 < LineCount ? 1 : m - 2 - LineCount); VERBOSE_COUT("mint = " << mint << " m - 1 - t = " << (m - 1 - mint) << Endl); @@ -357,7 +357,7 @@ public: VERBOSE_COUT("s: " << adj << " el[" << (m - 1 - t) << ", " << t << "]: " << DebugFormatBits(IN_EL(m - 1 - t, t)) << Endl); } - for (ui32 l = 0; l < LineCount; ++l) { + for (ui32 l = 0; l < LineCount; ++l) { ui64 sourceData = IN_EL(l, 0); OUT_M1(l) = adj ^ sourceData; OUT_M(l) = sourceData; @@ -373,8 +373,8 @@ public: OUT_EL(l, t) = sourceData; } VERBOSE_COUT("OUT_M(" << l << ") = " << DebugFormatBits(OUT_M(l)) << Endl); - } - } + } + } for (ui32 t = 1; t < DataParts; ++t) { for (ui32 l = 0; l < LineCount - t; ++l) { ui32 row = l + t; @@ -417,20 +417,20 @@ public: VERBOSE_COUT_BLOCK(true, IN_EL_BLOCK, IN_EL_BLOCK, OUT_M, OUT_M1); } ui64 s1 = 0; - const ui32 mint = (m - 2 < LineCount ? 1 : m - 2 - LineCount); - VERBOSE_COUT("mint = " << mint << " m - 1 - t = " << (m - 1 - mint) << Endl); - for (ui32 t = mint; t < DataParts; ++t) { + const ui32 mint = (m - 2 < LineCount ? 1 : m - 2 - LineCount); + VERBOSE_COUT("mint = " << mint << " m - 1 - t = " << (m - 1 - mint) << Endl); + for (ui32 t = mint; t < DataParts; ++t) { s1 ^= IN_EL(m - 1 - t, t); VERBOSE_COUT("s1: " << s1 << " el[" << (m - 1 - t) << ", " << t << "]: " << DebugFormatBits(isStripe ? IN_EL_STRIPE(m - 1 - t, t): IN_EL_BLOCK(m - 1 - t, t)) << Endl); - } + } ui64 s2 = 0; for (ui32 t = 1; t < DataParts; ++t) { s2 ^= IN_EL(t - 1, t); VERBOSE_COUT("s2: " << s2 << " el[" << (t - 1) << ", " << t << "]: " << DebugFormatBits(IN_EL(t - 1, t)) << Endl); } - for (ui32 l = 0; l < LineCount; ++l) { + for (ui32 l = 0; l < LineCount; ++l) { ui64 dataIN_EL = IN_EL(l, 0); OUT_M(l) = dataIN_EL; OUT_M1(l) = s1 ^ dataIN_EL; @@ -447,7 +447,7 @@ public: if (row1 < LineCount) { OUT_M1(row1) ^= dataIN_EL; VERBOSE_COUT(IN_EL(row1, t) << Endl); - } + } ui32 row2 = (m + l - t) % m; if (row2 < LineCount) { OUT_M2(row2) ^= dataIN_EL; @@ -456,8 +456,8 @@ public: if (!isFromDataParts) { OUT_EL(l, t) = dataIN_EL; } - } - } + } + } #if IS_VERBOSE for (ui32 l = 0; l < LineCount; ++l) { VERBOSE_COUT("OUT_M1(" << l << ") = " << DebugFormatBits(OUT_M1(l)) << Endl); @@ -467,65 +467,65 @@ public: writePosition += ColumnSize; } #undef OUT_M2 -#undef OUT_M1 -#undef OUT_M -#undef OUT_EL +#undef OUT_M1 +#undef OUT_M +#undef OUT_EL #undef IN_EL -#undef IN_EL_BLOCK -#undef IN_EL_STRIPE - } - - template<bool isStripe> +#undef IN_EL_BLOCK +#undef IN_EL_STRIPE + } + + template<bool isStripe> void PrepareLastBlockData(char *lastBlockSource, TBufferDataPart &bufferDataPart) { - if (isStripe) { - memcpy(lastBlockSource, Data + WholeBlocks * BlockSize, TailSize); - memset(lastBlockSource + TailSize, 0, BlockSize - TailSize); - } else { + if (isStripe) { + memcpy(lastBlockSource, Data + WholeBlocks * BlockSize, TailSize); + memset(lastBlockSource + TailSize, 0, BlockSize - TailSize); + } else { bufferDataPart.resize(DataParts); - for (ui32 i = 0; i < FirstSmallPartIdx; ++i) { - bufferDataPart[i] = (ui64*)(lastBlockSource + i * ColumnSize); + for (ui32 i = 0; i < FirstSmallPartIdx; ++i) { + bufferDataPart[i] = (ui64*)(lastBlockSource + i * ColumnSize); memcpy(bufferDataPart[i], reinterpret_cast<const char*>(BufferDataPart[i]) + WholeBlocks * ColumnSize, ColumnSize); - } - for (ui32 i = FirstSmallPartIdx; i < DataParts - 1; ++i) { - bufferDataPart[i] = (ui64*)(lastBlockSource + i * ColumnSize); - memset(bufferDataPart[i], 0, ColumnSize); - } - bufferDataPart[DataParts - 1] = (ui64*)(lastBlockSource + (DataParts - 1) * ColumnSize); + } + for (ui32 i = FirstSmallPartIdx; i < DataParts - 1; ++i) { + bufferDataPart[i] = (ui64*)(lastBlockSource + i * ColumnSize); + memset(bufferDataPart[i], 0, ColumnSize); + } + bufferDataPart[DataParts - 1] = (ui64*)(lastBlockSource + (DataParts - 1) * ColumnSize); char *lastColumnData = reinterpret_cast<char*>(bufferDataPart[DataParts - 1]); - if (LastPartTailSize) { - memcpy(lastColumnData, + if (LastPartTailSize) { + memcpy(lastColumnData, reinterpret_cast<const char*>(BufferDataPart[DataParts - 1]) + WholeBlocks * ColumnSize, - LastPartTailSize); - } - memset(lastColumnData + LastPartTailSize, 0, ColumnSize - LastPartTailSize); - } - } - - template <bool isStripe> + LastPartTailSize); + } + memset(lastColumnData + LastPartTailSize, 0, ColumnSize - LastPartTailSize); + } + } + + template <bool isStripe> void PrepareLastBlockPointers(char *lastBlockSource, TBufferDataPart &bufferDataPart) { - if (!isStripe) { + if (!isStripe) { bufferDataPart.resize(DataParts); - for (ui32 i = 0; i < DataParts; ++i) { - bufferDataPart[i] = (ui64*)(lastBlockSource + i * ColumnSize); - } - } - } - - template <bool isStripe> + for (ui32 i = 0; i < DataParts; ++i) { + bufferDataPart[i] = (ui64*)(lastBlockSource + i * ColumnSize); + } + } + } + + template <bool isStripe> void PlaceLastBlock(TBufferDataPart &bufferDataPart, char *lastBlock) { - if (isStripe) { - memcpy(Data + WholeBlocks * BlockSize, lastBlock, TailSize); - } else { - for (ui32 i = 0; i < FirstSmallPartIdx; ++i) { + if (isStripe) { + memcpy(Data + WholeBlocks * BlockSize, lastBlock, TailSize); + } else { + for (ui32 i = 0; i < FirstSmallPartIdx; ++i) { memcpy(reinterpret_cast<char*>(BufferDataPart[i]) + WholeBlocks * ColumnSize, - bufferDataPart[i], ColumnSize); - } + bufferDataPart[i], ColumnSize); + } memcpy(reinterpret_cast<char*>(BufferDataPart[DataParts - 1]) + WholeBlocks * ColumnSize, - bufferDataPart[DataParts - 1], LastPartTailSize); - } - } - + bufferDataPart[DataParts - 1], LastPartTailSize); + } + } + template <bool isStripe, bool isFromDataParts> void StarSplit(TDataPartSet &outPartSet) { // Use all whole columns of all the parts @@ -545,7 +545,7 @@ public: } template <bool isStripe, bool isFromDataParts, bool isIncremental = false> - void EoSplit(TDataPartSet &outPartSet) { + void EoSplit(TDataPartSet &outPartSet) { ui64 readPosition = isIncremental? ColumnSize * outPartSet.CurBlockIdx: 0; ui64 firstBlock = isIncremental? outPartSet.CurBlockIdx: 0; ui64 lastBlock = isIncremental? Min(WholeBlocks, firstBlock + IncrementalSplitMaxBlocks): WholeBlocks; @@ -563,24 +563,24 @@ public: << " hasTail# " << hasTail << " fullDataSize# " << outPartSet.FullDataSize << Endl); - // Use all whole columns of all the parts + // Use all whole columns of all the parts EoSplitWhole<isStripe, isFromDataParts>(Data, BufferDataPart, outPartSet, readPosition, firstBlock, lastBlock); - - // Use the remaining parts to fill in the last block - // Write the tail of the data + + // Use the remaining parts to fill in the last block + // Write the tail of the data if (hasTail && outPartSet.IsSplitDone()) { char lastBlockSource[MAX_TOTAL_PARTS * (MAX_TOTAL_PARTS - 2) * sizeof(ui64)] = {}; TBufferDataPart bufferDataPart; if (!isFromDataParts) { PrepareLastBlockData<isStripe>(lastBlockSource, bufferDataPart); } - + EoSplitWhole<isStripe, isFromDataParts>(lastBlockSource, bufferDataPart, outPartSet, WholeBlocks * ColumnSize, 0, 1); - } - } - - void GlueBlockParts(char* dst, const TDataPartSet& partSet) const { + } + } + + void GlueBlockParts(char* dst, const TDataPartSet& partSet) const { if (LargePartSize) { for (ui32 i = 0; i < FirstSmallPartIdx; ++i) { memcpy(dst + i * LargePartSize, @@ -594,37 +594,37 @@ public: partSet.Parts[i].GetDataAt(0), SmallPartSize); } } - } + } if (SmallPartSize + LastPartTailSize) { ui64 offset = LargePartSize * FirstSmallPartIdx + ((DataParts - 1) - FirstSmallPartIdx) * SmallPartSize; memcpy(dst + offset, partSet.Parts[DataParts - 1].GetDataAt(0), SmallPartSize + LastPartTailSize); - } - return; - } - - // s = a[(m + missingDataPartIdx - 1) % m][m + 1]; - // for (l = 0; l < m; ++l) { - // s ^= a[(m + missingDataPartIdx - l - 1) % m][l]; - // } - // for (k = 0; k < m - 1; ++k) { - // ui64 res = s; - // for (l = 0; l < missingDataPartIdx; ++l) { - // res ^= a[(m + k + missingDataPartIdx - l) % m][l]; - // } - // for (l = missingDataPartIdx + 1; l < m; ++l) { - // res ^= a[(m + k + missingDataPartIdx - l) % m][l]; - // } - // a[k][missingDataPartIdx] = res; - // } + } + return; + } + + // s = a[(m + missingDataPartIdx - 1) % m][m + 1]; + // for (l = 0; l < m; ++l) { + // s ^= a[(m + missingDataPartIdx - l - 1) % m][l]; + // } + // for (k = 0; k < m - 1; ++k) { + // ui64 res = s; + // for (l = 0; l < missingDataPartIdx; ++l) { + // res ^= a[(m + k + missingDataPartIdx - l) % m][l]; + // } + // for (l = missingDataPartIdx + 1; l < m; ++l) { + // res ^= a[(m + k + missingDataPartIdx - l) % m][l]; + // } + // a[k][missingDataPartIdx] = res; + // } template <bool isStripe, bool restoreParts, bool restoreFullData, bool reversed, bool restoreParityParts> void EoDiagonalRestorePartWhole(char *data, TBufferDataPart &bufferDataPart, TDataPartSet &partSet, ui64 readPosition, - ui32 beginBlockIdx, ui32 endBlockIdx, ui32 missingDataPartIdx) { + ui32 beginBlockIdx, ui32 endBlockIdx, ui32 missingDataPartIdx) { ui32 lastColumn = reversed ? DataParts + 2 : DataParts + 1; const ui32 m = Prime; - // Use all whole columns of all the parts - for (ui64 blockIdx = beginBlockIdx; blockIdx < endBlockIdx; ++blockIdx) { + // Use all whole columns of all the parts + for (ui64 blockIdx = beginBlockIdx; blockIdx < endBlockIdx; ++blockIdx) { #define RIGHT_ROW(row) (reversed ? LineCount - 1 - (row) : (row)) #define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + RIGHT_ROW(row)] #define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + RIGHT_ROW(row)) * DataParts + (column)) @@ -632,79 +632,79 @@ public: #define IN_M(row) *((ui64*)(partSet.Parts[DataParts].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) #define IN_M12(row) *((ui64*)(partSet.Parts[lastColumn].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) VERBOSE_COUT_BLOCK(true, IN_EL, IN_EL, IN_M, IN_M12); - ui64 s = 0; - ui32 colLimit = DataParts; - ui32 rowLimit = LineCount; - { - ui32 idx = (m + missingDataPartIdx - 1) % m; - if (idx < rowLimit) { + ui64 s = 0; + ui32 colLimit = DataParts; + ui32 rowLimit = LineCount; + { + ui32 idx = (m + missingDataPartIdx - 1) % m; + if (idx < rowLimit) { s = IN_M12(idx); - VERBOSE_COUT("s(" << idx << ", m1): " << DebugFormatBits(s) << Endl); - } - } - for (ui32 l = 0; l < colLimit; ++l) { - ui32 idx = (m + missingDataPartIdx - l - 1) % m; - if (idx < LineCount) { - ui64 value = IN_EL(idx, l); - s ^= value; - if (restoreFullData) { - VERBOSE_COUT("a [" << idx << ", " << l << "] = " << DebugFormatBits(value) << Endl); - if (isStripe) { - OUT_EL_STRIPE(idx, l) = value; - } else { - OUT_EL_BLOCK(idx, l) = value; - } - } - } - } - VERBOSE_COUT("s: " << DebugFormatBits(s) << Endl); - for (ui32 k = 0; k < LineCount; ++k) { - ui64 res = s; - for (ui32 l = 0; l < missingDataPartIdx; ++l) { - ui32 idx = (m + k + missingDataPartIdx - l) % m; - if (idx < LineCount) { - ui64 value = IN_EL(idx, l); - res ^= value; - if (restoreFullData) { - VERBOSE_COUT("b [" << idx << ", " << l << "] = " << DebugFormatBits(value) << Endl); - if (isStripe) { - OUT_EL_STRIPE(idx, l) = value; - } else { - OUT_EL_BLOCK(idx, l) = value; - } - } - } - } - for (ui32 l = missingDataPartIdx + 1; l < colLimit; ++l) { - ui32 idx = (m + k + missingDataPartIdx - l) % m; - if (idx < LineCount) { - ui64 value = IN_EL(idx, l); - res ^= value; - if (restoreFullData) { - VERBOSE_COUT("c [" << idx << ", " << l << "] = " << DebugFormatBits(value) << Endl); - if (isStripe) { - OUT_EL_STRIPE(idx, l) = value; - } else { - OUT_EL_BLOCK(idx, l) = value; - } - } - } - } - ui32 idx = (m + k + missingDataPartIdx) % m; - if (idx < LineCount) { - VERBOSE_COUT("idx = " << idx); + VERBOSE_COUT("s(" << idx << ", m1): " << DebugFormatBits(s) << Endl); + } + } + for (ui32 l = 0; l < colLimit; ++l) { + ui32 idx = (m + missingDataPartIdx - l - 1) % m; + if (idx < LineCount) { + ui64 value = IN_EL(idx, l); + s ^= value; + if (restoreFullData) { + VERBOSE_COUT("a [" << idx << ", " << l << "] = " << DebugFormatBits(value) << Endl); + if (isStripe) { + OUT_EL_STRIPE(idx, l) = value; + } else { + OUT_EL_BLOCK(idx, l) = value; + } + } + } + } + VERBOSE_COUT("s: " << DebugFormatBits(s) << Endl); + for (ui32 k = 0; k < LineCount; ++k) { + ui64 res = s; + for (ui32 l = 0; l < missingDataPartIdx; ++l) { + ui32 idx = (m + k + missingDataPartIdx - l) % m; + if (idx < LineCount) { + ui64 value = IN_EL(idx, l); + res ^= value; + if (restoreFullData) { + VERBOSE_COUT("b [" << idx << ", " << l << "] = " << DebugFormatBits(value) << Endl); + if (isStripe) { + OUT_EL_STRIPE(idx, l) = value; + } else { + OUT_EL_BLOCK(idx, l) = value; + } + } + } + } + for (ui32 l = missingDataPartIdx + 1; l < colLimit; ++l) { + ui32 idx = (m + k + missingDataPartIdx - l) % m; + if (idx < LineCount) { + ui64 value = IN_EL(idx, l); + res ^= value; + if (restoreFullData) { + VERBOSE_COUT("c [" << idx << ", " << l << "] = " << DebugFormatBits(value) << Endl); + if (isStripe) { + OUT_EL_STRIPE(idx, l) = value; + } else { + OUT_EL_BLOCK(idx, l) = value; + } + } + } + } + ui32 idx = (m + k + missingDataPartIdx) % m; + if (idx < LineCount) { + VERBOSE_COUT("idx = " << idx); res ^= IN_M12(idx); // This is missing in the article! - } - if (restoreFullData) { - VERBOSE_COUT("out [" << k << ", " << missingDataPartIdx << "] = " << DebugFormatBits(res) << Endl); - if (isStripe) { - OUT_EL_STRIPE(k, missingDataPartIdx) = res; - } else { - OUT_EL_BLOCK(k, missingDataPartIdx) = res; - } - } - if (restoreParts) { - IN_EL(k, missingDataPartIdx) = res; + } + if (restoreFullData) { + VERBOSE_COUT("out [" << k << ", " << missingDataPartIdx << "] = " << DebugFormatBits(res) << Endl); + if (isStripe) { + OUT_EL_STRIPE(k, missingDataPartIdx) = res; + } else { + OUT_EL_BLOCK(k, missingDataPartIdx) = res; + } + } + if (restoreParts) { + IN_EL(k, missingDataPartIdx) = res; if (restoreParityParts) { ui64 tmp = 0; for (ui32 l = 0; l < DataParts; ++l) { @@ -712,26 +712,26 @@ public: } IN_M(k) = tmp; } - } - } - if (isStripe) { + } + } + if (isStripe) { VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL_STRIPE, IN_EL, IN_M, IN_M12); - } else { + } else { VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL_BLOCK, IN_EL, IN_M, IN_M12); - } + } #undef IN_M12 -#undef IN_M -#undef IN_EL -#undef OUT_EL_BLOCK -#undef OUT_EL_STRIPE +#undef IN_M +#undef IN_EL +#undef OUT_EL_BLOCK +#undef OUT_EL_STRIPE #undef RIGHT_ROW - readPosition += ColumnSize; - } - } - + readPosition += ColumnSize; + } + } + template <bool isStripe, bool restoreParts, bool restoreFullData, bool reversed, bool restoreParityParts> - void EoDiagonalRestorePart(TDataPartSet& partSet, ui32 missingDataPartIdx) { + void EoDiagonalRestorePart(TDataPartSet& partSet, ui32 missingDataPartIdx) { TRACE("Line# " << __LINE__ << " Diagonal restore: LineCount=" << LineCount << Endl); TRACE("EoDiagonalRestorePart fullSize# " << partSet.FullDataSize @@ -750,25 +750,25 @@ public: EoDiagonalRestorePartWhole<isStripe, restoreParts, restoreFullData, reversed, restoreParityParts>( Data, BufferDataPart, partSet, readPosition, beginBlock, endBlock, missingDataPartIdx); - // Read the tail of the data + // Read the tail of the data if (TailSize && (partSet.Parts[presentPartIdx].Size + readPosition > WholeBlocks * ColumnSize)) { TRACE("EoDiagonalRestorePart tail" << Endl); char lastBlock[MAX_TOTAL_PARTS * (MAX_TOTAL_PARTS - 2) * sizeof(ui64)] = {}; TBufferDataPart bufferDataPart; - PrepareLastBlockPointers<isStripe>(lastBlock, bufferDataPart); - + PrepareLastBlockPointers<isStripe>(lastBlock, bufferDataPart); + EoDiagonalRestorePartWhole<isStripe, restoreParts, restoreFullData, reversed, restoreParityParts>(lastBlock, bufferDataPart, partSet, WholeBlocks * ColumnSize, 0, 1, missingDataPartIdx); - - if (restoreFullData) { - PlaceLastBlock<isStripe>(bufferDataPart, lastBlock); - } - } + + if (restoreFullData) { + PlaceLastBlock<isStripe>(bufferDataPart, lastBlock); + } + } if (restoreParts && missingDataPartIdx < partSet.Parts.size()) { PadAndCrcPart(partSet, missingDataPartIdx); } - } - + } + template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void StarMainRestorePartsWholeSymmetric(char *data, TBufferDataPart &bufferDataPart, TDataPartSet& partSet, ui64 readPosition, ui32 endBlockIdx, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB, @@ -838,7 +838,7 @@ public: if (row < m) { s2[row] ^= IN_EL(i, j); VERBOSE_COUT("s2[" << i << "] ^= IN_EL(" << row << "," << j << ");" << Endl;); - + } } } @@ -892,9 +892,9 @@ public: const ui32 r = missingDataPartIdxA; const ui32 s = missingDataPartIdxB; const ui32 dr = (m + s - r) % m; - // Use all whole columns of all the parts -#define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] -#define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) + // Use all whole columns of all the parts +#define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] +#define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) #define OUT_EL(row, column) (isStripe ? OUT_EL_STRIPE(row, column) : OUT_EL_BLOCK(row, column)) #define IN_EL(row, column) *((ui64*)(partSet.Parts[column].GetDataAt(readPosition + (row) * sizeof(ui64)))) #define IN_M(row) *((ui64*)(partSet.Parts[DataParts].GetDataAt(readPosition + (row) * sizeof(ui64)))) @@ -987,129 +987,129 @@ public: #define IN_M12(row) *((ui64*)(partSet.Parts[lastColumn].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) for (ui64 blockIdx = 0; blockIdx < endBlockIdx; ++blockIdx) { VERBOSE_COUT_BLOCK(true, IN_EL, IN_EL, IN_M, IN_M12); - // compute diagonal partiy s - ui64 s = 0; + // compute diagonal partiy s + ui64 s = 0; ui64 s0[MAX_LINES_IN_BLOCK]; - for (ui32 l = 0; l < LineCount; ++l) { + for (ui32 l = 0; l < LineCount; ++l) { ui64 tmp = IN_M(l); s0[l] = tmp; s ^= tmp; s ^= IN_M12(l); - VERBOSE_COUT("Diag [l,m] s:" << DebugFormatBits(s) << Endl); - } - - // compute horizontal syndromes s0 + VERBOSE_COUT("Diag [l,m] s:" << DebugFormatBits(s) << Endl); + } + + // compute horizontal syndromes s0 for (ui32 t = 0; t < DataParts; ++t) { if (t == missingDataPartIdxA || t == missingDataPartIdxB) { continue; - } + } for (ui32 l = 0; l < LineCount; ++l) { ui64 val = IN_EL(l, t); s0[l] ^= val; - if (restoreFullData) { + if (restoreFullData) { OUT_EL(l, t) = val; - } - } - } - - // compute diagonal syndromes s1 + } + } + } + + // compute diagonal syndromes s1 ui64 s1[MAX_LINES_IN_BLOCK]; - for (ui32 u = 0; u < m; ++u) { - s1[u] = s; - VERBOSE_COUT("S1 = s = " << DebugFormatBits(s1[u]) << Endl); - if (u < LineCount) { + for (ui32 u = 0; u < m; ++u) { + s1[u] = s; + VERBOSE_COUT("S1 = s = " << DebugFormatBits(s1[u]) << Endl); + if (u < LineCount) { s1[u] ^= IN_M12(u); - VERBOSE_COUT("S1 ^= a[" << u << ", m+1] = " << DebugFormatBits(s1[u]) << Endl); - } - for (ui32 l = 0; l < missingDataPartIdxA; ++l) { - ui32 idx = (m + u - l) % m; - if (idx < LineCount) { - ui64 val = IN_EL(idx, l); - s1[u] ^= val; - } - VERBOSE_COUT("S1 ^= a[" << idx << ", " << l << "] = " << DebugFormatBits(s1[u]) << Endl); - } - for (ui32 l = missingDataPartIdxA + 1; l < missingDataPartIdxB; ++l) { - ui32 idx = (m + u - l) % m; - if (idx < LineCount) { - ui64 val = IN_EL(idx, l); - s1[u] ^= val; - } - VERBOSE_COUT("S1 ^= a[" << idx << ", " << l << "] = " << DebugFormatBits(s1[u]) << Endl); - } - for (ui32 l = missingDataPartIdxB + 1; l < DataParts; ++l) { - ui32 idx = (m + u - l) % m; - if (idx < LineCount) { - ui64 val = IN_EL(idx, l); - s1[u] ^= val; - } - VERBOSE_COUT("S1 ^= a[" << idx << ", " << l << "] = " << DebugFormatBits(s1[u]) << Endl); - } - VERBOSE_COUT("S1[" << u << "] = " << DebugFormatBits(s1[u]) << Endl); - } - - s = (m - (missingDataPartIdxB - missingDataPartIdxA) - 1) % m; + VERBOSE_COUT("S1 ^= a[" << u << ", m+1] = " << DebugFormatBits(s1[u]) << Endl); + } + for (ui32 l = 0; l < missingDataPartIdxA; ++l) { + ui32 idx = (m + u - l) % m; + if (idx < LineCount) { + ui64 val = IN_EL(idx, l); + s1[u] ^= val; + } + VERBOSE_COUT("S1 ^= a[" << idx << ", " << l << "] = " << DebugFormatBits(s1[u]) << Endl); + } + for (ui32 l = missingDataPartIdxA + 1; l < missingDataPartIdxB; ++l) { + ui32 idx = (m + u - l) % m; + if (idx < LineCount) { + ui64 val = IN_EL(idx, l); + s1[u] ^= val; + } + VERBOSE_COUT("S1 ^= a[" << idx << ", " << l << "] = " << DebugFormatBits(s1[u]) << Endl); + } + for (ui32 l = missingDataPartIdxB + 1; l < DataParts; ++l) { + ui32 idx = (m + u - l) % m; + if (idx < LineCount) { + ui64 val = IN_EL(idx, l); + s1[u] ^= val; + } + VERBOSE_COUT("S1 ^= a[" << idx << ", " << l << "] = " << DebugFormatBits(s1[u]) << Endl); + } + VERBOSE_COUT("S1[" << u << "] = " << DebugFormatBits(s1[u]) << Endl); + } + + s = (m - (missingDataPartIdxB - missingDataPartIdxA) - 1) % m; ui64 aVal = 0; - do { - if (s < LineCount) { - ui64 bVal = s1[(missingDataPartIdxB + s) % m]; - VERBOSE_COUT("bVal = s1[" << ((missingDataPartIdxB + s ) % m) << "] = " << DebugFormatBits(bVal) - << Endl); - ui32 bRow = (m + s + (missingDataPartIdxB - missingDataPartIdxA)) % m; - if (bRow < LineCount) { - VERBOSE_COUT("read [" << bRow << ", " << missingDataPartIdxA << "] = "); + do { + if (s < LineCount) { + ui64 bVal = s1[(missingDataPartIdxB + s) % m]; + VERBOSE_COUT("bVal = s1[" << ((missingDataPartIdxB + s ) % m) << "] = " << DebugFormatBits(bVal) + << Endl); + ui32 bRow = (m + s + (missingDataPartIdxB - missingDataPartIdxA)) % m; + if (bRow < LineCount) { + VERBOSE_COUT("read [" << bRow << ", " << missingDataPartIdxA << "] = "); bVal ^= aVal; - if (restoreParts) { - VERBOSE_COUT("i " << DebugFormatBits(IN_EL(bRow, missingDataPartIdxA)) << Endl); - } else { + if (restoreParts) { + VERBOSE_COUT("i " << DebugFormatBits(IN_EL(bRow, missingDataPartIdxA)) << Endl); + } else { VERBOSE_COUT("o " << DebugFormatBits(OUT_EL_STRIPE(bRow,missingDataPartIdxA)) << Endl); - } - } - if (restoreParts) { - IN_EL(s, missingDataPartIdxB) = bVal; - VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxB << "] = " << DebugFormatBits(bVal) - << Endl); - } - if (restoreFullData) { + } + } + if (restoreParts) { + IN_EL(s, missingDataPartIdxB) = bVal; + VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxB << "] = " << DebugFormatBits(bVal) + << Endl); + } + if (restoreFullData) { OUT_EL(s, missingDataPartIdxB) = bVal; - VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxB << "] = " << DebugFormatBits(bVal) - << Endl); - } - + VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxB << "] = " << DebugFormatBits(bVal) + << Endl); + } + aVal = s0[s]; - VERBOSE_COUT("aVal = s0[" << s << "] = " << DebugFormatBits(aVal) << Endl); - VERBOSE_COUT("read [" << s << ", " << missingDataPartIdxB << "] = "); + VERBOSE_COUT("aVal = s0[" << s << "] = " << DebugFormatBits(aVal) << Endl); + VERBOSE_COUT("read [" << s << ", " << missingDataPartIdxB << "] = "); aVal ^= bVal; - if (restoreParts) { - VERBOSE_COUT("i " << DebugFormatBits(IN_EL(s,missingDataPartIdxB)) << Endl); - } else { + if (restoreParts) { + VERBOSE_COUT("i " << DebugFormatBits(IN_EL(s,missingDataPartIdxB)) << Endl); + } else { VERBOSE_COUT("o " << DebugFormatBits(OUT_EL_STRIPE(s,missingDataPartIdxB)) << Endl); - } - - if (restoreParts) { - IN_EL(s, missingDataPartIdxA) = aVal; - VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxA << "] = " << DebugFormatBits(bVal) - << Endl); - } - if (restoreFullData) { + } + + if (restoreParts) { + IN_EL(s, missingDataPartIdxA) = aVal; + VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxA << "] = " << DebugFormatBits(bVal) + << Endl); + } + if (restoreFullData) { OUT_EL(s, missingDataPartIdxA) = aVal; - VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxA << "] = " << DebugFormatBits(bVal) - << Endl); - } - } - - s = (m + s - (missingDataPartIdxB - missingDataPartIdxA)) % m; - } while (s != m - 1); + VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxA << "] = " << DebugFormatBits(bVal) + << Endl); + } + } + + s = (m + s - (missingDataPartIdxB - missingDataPartIdxA)) % m; + } while (s != m - 1); VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL, IN_EL, IN_M, IN_M12); #undef IN_M12 -#undef IN_M -#undef IN_EL -#undef OUT_EL_BLOCK -#undef OUT_EL_STRIPE - readPosition += ColumnSize; - } - } - +#undef IN_M +#undef IN_EL +#undef OUT_EL_BLOCK +#undef OUT_EL_STRIPE + readPosition += ColumnSize; + } + } + template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void StarRestoreHorizontalPart(TDataPartSet& partSet, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB) { @@ -1177,9 +1177,9 @@ public: } template <bool isStripe, bool restoreParts, bool restoreFullData, bool reversed, bool restoreParityParts> - void EoMainRestoreParts(TDataPartSet& partSet, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB) { - // Read data and parity - VERBOSE_COUT("EoMainRestorePart" << Endl); + void EoMainRestoreParts(TDataPartSet& partSet, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB) { + // Read data and parity + VERBOSE_COUT("EoMainRestorePart" << Endl); TRACE("EoMainRestorePart fullSize# " << partSet.FullDataSize << " partSet p0 Size# " << partSet.Parts[0].Size << " p1 Size# " << partSet.Parts[1].Size @@ -1190,7 +1190,7 @@ public: Y_VERIFY(partSet.Parts[presentPartIdx].Offset % ColumnSize == 0); ui64 readPosition = partSet.Parts[presentPartIdx].Offset; ui64 wholeBlocks = Min(WholeBlocks - readPosition / ColumnSize, partSet.Parts[presentPartIdx].Size / ColumnSize); - + TRACE("wholeBlocks# " << wholeBlocks << " blockSize# " << BlockSize << Endl); EoMainRestorePartsWhole<isStripe, restoreParts, restoreFullData, reversed, restoreParityParts>(Data, BufferDataPart, partSet, readPosition, wholeBlocks, missingDataPartIdxA, missingDataPartIdxB); @@ -1199,15 +1199,15 @@ public: TRACE("EoMainRestoreParts restore tail" << Endl); char lastBlockSource[MAX_TOTAL_PARTS * (MAX_TOTAL_PARTS - 2) * sizeof(ui64)] = {}; TBufferDataPart bufferDataPart; - PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); - + PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); + EoMainRestorePartsWhole<isStripe, restoreParts, restoreFullData, reversed, restoreParityParts>(lastBlockSource, bufferDataPart, partSet, WholeBlocks * ColumnSize, 1, missingDataPartIdxA, missingDataPartIdxB); - - if (restoreFullData) { - PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); - } - } + + if (restoreFullData) { + PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); + } + } if (restoreParts) { if (missingDataPartIdxA < partSet.Parts.size()) { @@ -1217,93 +1217,93 @@ public: PadAndCrcPart(partSet, missingDataPartIdxB); } } - } - + } + template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void XorRestorePartWhole(char* data, TBufferDataPart &bufferDataPart, TDataPartSet& partSet, ui64 readPosition, ui32 beginBlockIdx, ui32 endBlockIdx, ui32 missingDataPartIdx) { - VERBOSE_COUT("XorRestorePartWhole: read:" << readPosition << " LineCount: " << LineCount << Endl); - ui64 writePosition = 0; - for (ui64 blockIdx = beginBlockIdx; blockIdx < endBlockIdx; ++blockIdx) { -#if IS_VERBOSE - for (ui64 lineIdx = 0; lineIdx < LineCount; ++lineIdx) { - for (ui32 part = 0; part <= DataParts; ++part) { - if (part != missingDataPartIdx) { + VERBOSE_COUT("XorRestorePartWhole: read:" << readPosition << " LineCount: " << LineCount << Endl); + ui64 writePosition = 0; + for (ui64 blockIdx = beginBlockIdx; blockIdx < endBlockIdx; ++blockIdx) { +#if IS_VERBOSE + for (ui64 lineIdx = 0; lineIdx < LineCount; ++lineIdx) { + for (ui32 part = 0; part <= DataParts; ++part) { + if (part != missingDataPartIdx) { ui64 partData = *reinterpret_cast<const ui64*>(partSet.Parts[part].GetDataAt(readPosition + lineIdx * sizeof(ui64))); - VERBOSE_COUT(DebugFormatBits(partData) << ", "); - } else { - VERBOSE_COUT(", "); - } - } - VERBOSE_COUT(Endl); - } - VERBOSE_COUT(Endl); -#endif - for (ui64 lineIdx = 0; lineIdx < LineCount; ++lineIdx) { - ui64 restoredData = 0; + VERBOSE_COUT(DebugFormatBits(partData) << ", "); + } else { + VERBOSE_COUT(", "); + } + } + VERBOSE_COUT(Endl); + } + VERBOSE_COUT(Endl); +#endif + for (ui64 lineIdx = 0; lineIdx < LineCount; ++lineIdx) { + ui64 restoredData = 0; ui64 *destination; if (isStripe) { destination = (ui64*)data + writePosition; } - for (ui32 part = 0; part < DataParts; ++part) { - if (part != missingDataPartIdx) { + for (ui32 part = 0; part < DataParts; ++part) { + if (part != missingDataPartIdx) { ui64 partData = *reinterpret_cast<const ui64*>(partSet.Parts[part].GetDataAt(readPosition)); - restoredData ^= partData; - if (restoreFullData) { - if (isStripe) { - destination[part] = partData; - } else { - bufferDataPart[part][writePosition] = partData; - } - } - } - } - if (missingDataPartIdx < DataParts) { + restoredData ^= partData; + if (restoreFullData) { + if (isStripe) { + destination[part] = partData; + } else { + bufferDataPart[part][writePosition] = partData; + } + } + } + } + if (missingDataPartIdx < DataParts) { ui64 partData = *reinterpret_cast<const ui64*>(partSet.Parts[DataParts].GetDataAt(readPosition)); - restoredData ^= partData; - if (restoreFullData) { - if (isStripe) { - destination[missingDataPartIdx] = restoredData; - } else { - bufferDataPart[missingDataPartIdx][writePosition] = restoredData; - } - } - if (restoreParts) { + restoredData ^= partData; + if (restoreFullData) { + if (isStripe) { + destination[missingDataPartIdx] = restoredData; + } else { + bufferDataPart[missingDataPartIdx][writePosition] = restoredData; + } + } + if (restoreParts) { *reinterpret_cast<ui64*>(partSet.Parts[missingDataPartIdx].GetDataAt(readPosition)) = restoredData; - } - } else if (restoreParts && missingDataPartIdx == DataParts) { + } + } else if (restoreParts && missingDataPartIdx == DataParts) { *reinterpret_cast<ui64*>(partSet.Parts[DataParts].GetDataAt(readPosition)) = restoredData; - } - readPosition += sizeof(ui64); - if (restoreFullData) { - if (isStripe) { - writePosition += DataParts; - } else { - ++writePosition; - } - } - } -#if IS_VERBOSE - VERBOSE_COUT("Out: " << Endl); - for (ui64 lineIdx = 0; lineIdx < LineCount; ++lineIdx) { - for (ui32 part = 0; part <= DataParts; ++part) { + } + readPosition += sizeof(ui64); + if (restoreFullData) { + if (isStripe) { + writePosition += DataParts; + } else { + ++writePosition; + } + } + } +#if IS_VERBOSE + VERBOSE_COUT("Out: " << Endl); + for (ui64 lineIdx = 0; lineIdx < LineCount; ++lineIdx) { + for (ui32 part = 0; part <= DataParts; ++part) { ui64 partData = *reinterpret_cast<const ui64*>( partSet.Parts[part].GetDataAt(readPosition - ColumnSize + lineIdx * sizeof(ui64))); - VERBOSE_COUT(DebugFormatBits(partData) << ", "); - } - VERBOSE_COUT(Endl); - } - VERBOSE_COUT(Endl); -#endif - } - } - + VERBOSE_COUT(DebugFormatBits(partData) << ", "); + } + VERBOSE_COUT(Endl); + } + VERBOSE_COUT(Endl); +#endif + } + } + template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> - void XorRestorePart(TDataPartSet &partSet, ui32 missingDataPartIdx) { - // Read data and parity - VERBOSE_COUT("XorRestorePart" << Endl); + void XorRestorePart(TDataPartSet &partSet, ui32 missingDataPartIdx) { + // Read data and parity + VERBOSE_COUT("XorRestorePart" << Endl); TRACE("XorRestorePart partSet p0 Size# " << partSet.Parts[0].Size << " p1 Size# " << partSet.Parts[1].Size << Endl); ui32 presentPartIdx = (missingDataPartIdx == 0 ? 1 : 0); @@ -1314,27 +1314,27 @@ public: TRACE("XorRestore beginBlockIdx# " << beginBlockIdx << " wholeBlocks# " << wholeBlocks << Endl); XorRestorePartWhole<isStripe, restoreParts, restoreFullData, restoreParityParts>(Data, BufferDataPart, partSet, readPosition, beginBlockIdx, beginBlockIdx + wholeBlocks, missingDataPartIdx); - + if (TailSize && (partSet.Parts[presentPartIdx].Size + readPosition > WholeBlocks * ColumnSize)) { TRACE("Restore tail, restoreFullData# " << restoreFullData << " restoreParts# " << restoreParts << Endl); char lastBlockSource[MAX_TOTAL_PARTS * (MAX_TOTAL_PARTS - 2) * sizeof(ui64)] = {}; TBufferDataPart bufferDataPart; - PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); - + PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); + XorRestorePartWhole<isStripe, restoreParts, restoreFullData, restoreParityParts>(lastBlockSource, bufferDataPart, - partSet, WholeBlocks * ColumnSize, WholeBlocks, WholeBlocks + 1, missingDataPartIdx); - - if (restoreFullData) { - PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); - } - } + partSet, WholeBlocks * ColumnSize, WholeBlocks, WholeBlocks + 1, missingDataPartIdx); + + if (restoreFullData) { + PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); + } + } if (restoreParts && missingDataPartIdx < partSet.Parts.size()) { if (restoreParityParts || missingDataPartIdx < DataParts) { PadAndCrcPart(partSet, missingDataPartIdx); } } - } + } void PadAndCrcPart(TDataPartSet &inOutPartSet, ui32 partIdx) { if (inOutPartSet.IsFragment) { @@ -1353,8 +1353,8 @@ public: } ythrow TWithBackTrace<yexception>() << "Unknown crcMode = " << (i32)CrcMode; } -}; - +}; + void PadAndCrcParts(TErasureType::ECrcMode crcMode, const TBlockParams &p, TDataPartSet &inOutPartSet) { if (inOutPartSet.IsFragment) { return; @@ -1377,7 +1377,7 @@ void PadAndCrcParts(TErasureType::ECrcMode crcMode, const TBlockParams &p, TData ythrow TWithBackTrace<yexception>() << "Unknown crcMode = " << (i32)crcMode; } -template <bool isStripe> +template <bool isStripe> void StarBlockSplit(TErasureType::ECrcMode crcMode, const TErasureType &type, const TString &buffer, TDataPartSet &outPartSet) { TBlockParams p(crcMode, type, buffer.size()); @@ -1402,10 +1402,10 @@ template <bool isStripe> void EoBlockSplit(TErasureType::ECrcMode crcMode, const TErasureType &type, const TString &buffer, TDataPartSet &outPartSet) { TBlockParams p(crcMode, type, buffer.size()); - - // Prepare input data pointers + + // Prepare input data pointers p.PrepareInputDataPointers<isStripe>(const_cast<char*>(buffer.data())); - + // Prepare if not yet if (!outPartSet.IsSplitStarted()) { outPartSet.StartSplit(p.WholeBlocks); @@ -1418,135 +1418,135 @@ void EoBlockSplit(TErasureType::ECrcMode crcMode, const TErasureType &type, cons Refurbish(outPartSet.Parts[i], p.PartContainerSize); } outPartSet.MemoryConsumed = p.TotalParts * outPartSet.Parts[0].MemoryConsumed(); - } - + } + p.EoSplit<isStripe, false, true>(outPartSet); // Finalize if split has been done to completion if (outPartSet.IsSplitDone()) { PadAndCrcParts(crcMode, p, outPartSet); } -} - -template <bool isStripe> +} + +template <bool isStripe> void XorBlockSplit(TErasureType::ECrcMode crcMode, const TErasureType &type, const TString& buffer, TDataPartSet& outPartSet) { TBlockParams p(crcMode, type, buffer.size()); - - // Prepare input data pointers + + // Prepare input data pointers p.PrepareInputDataPointers<isStripe>(const_cast<char*>(buffer.data())); - + outPartSet.FullDataSize = buffer.size(); - outPartSet.PartsMask = ~((~(ui32)0) << p.TotalParts); - outPartSet.Parts.resize(p.TotalParts); - for (ui32 i = 0; i < p.TotalParts; ++i) { + outPartSet.PartsMask = ~((~(ui32)0) << p.TotalParts); + outPartSet.Parts.resize(p.TotalParts); + for (ui32 i = 0; i < p.TotalParts; ++i) { TRACE("Line# " << __LINE__ << Endl); Refurbish(outPartSet.Parts[i], p.PartContainerSize); - } + } outPartSet.MemoryConsumed = p.TotalParts * outPartSet.Parts[0].MemoryConsumed(); - - p.XorSplit<isStripe>(outPartSet); + + p.XorSplit<isStripe>(outPartSet); PadAndCrcParts(crcMode, p, outPartSet); -} - +} + template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TDataPartSet& partSet) { TString &outBuffer = partSet.FullDataFragment.OwnedString; - ui32 totalParts = type.TotalPartCount(); + ui32 totalParts = type.TotalPartCount(); Y_VERIFY(partSet.Parts.size() >= totalParts); - - ui32 missingDataPartIdxA = totalParts; - ui32 missingDataPartIdxB = totalParts; - ui32 missingDataPartCount = 0; + + ui32 missingDataPartIdxA = totalParts; + ui32 missingDataPartIdxB = totalParts; + ui32 missingDataPartCount = 0; ui64 expectedPartSize = type.PartSize(crcMode, partSet.FullDataSize); - ui32 i = 0; - for (; i < totalParts; ++i) { - if (!(partSet.PartsMask & (1 << i))) { - missingDataPartIdxA = i; - ++missingDataPartCount; - break; - } else { + ui32 i = 0; + for (; i < totalParts; ++i) { + if (!(partSet.PartsMask & (1 << i))) { + missingDataPartIdxA = i; + ++missingDataPartCount; + break; + } else { Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size(): %" PRIu64 - " expectedPartSize: %" PRIu64 " erasure: %s partSet.FullDataSize: %" PRIu64, + " expectedPartSize: %" PRIu64 " erasure: %s partSet.FullDataSize: %" PRIu64, (ui32)i, (ui64)partSet.Parts[i].size(), expectedPartSize, type.ErasureName[type.GetErasure()].data(), - (ui64)partSet.FullDataSize); - } - } - ++i; - for (; i < totalParts; ++i) { - if (!(partSet.PartsMask & (1 << i))) { - missingDataPartIdxB = i; - ++missingDataPartCount; - } else { + (ui64)partSet.FullDataSize); + } + } + ++i; + for (; i < totalParts; ++i) { + if (!(partSet.PartsMask & (1 << i))) { + missingDataPartIdxB = i; + ++missingDataPartCount; + } else { Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size()# %" PRIu32 - " != expectedPartSize# %" PRIu32 " erasure: %s partSet.FullDataSize: %" PRIu64, + " != expectedPartSize# %" PRIu32 " erasure: %s partSet.FullDataSize: %" PRIu64, (ui32)i, (ui32)partSet.Parts[i].size(), (ui32)expectedPartSize, type.ErasureName[type.GetErasure()].data(), - (ui64)partSet.FullDataSize); - } - } + (ui64)partSet.FullDataSize); + } + } Y_VERIFY(missingDataPartCount <= 2); - - ui64 dataSize = partSet.FullDataSize; - if (restoreParts) { - if (missingDataPartIdxA != totalParts) { + + ui64 dataSize = partSet.FullDataSize; + if (restoreParts) { + if (missingDataPartIdxA != totalParts) { TRACE("Line# " << __LINE__ << Endl); Refurbish(partSet.Parts[missingDataPartIdxA], expectedPartSize); - } - if (missingDataPartIdxB != totalParts) { + } + if (missingDataPartIdxB != totalParts) { TRACE("Line# " << __LINE__ << Endl); Refurbish(partSet.Parts[missingDataPartIdxB], expectedPartSize); - } - } - if (restoreFullData) { + } + } + if (restoreFullData) { Refurbish(outBuffer, dataSize); - } else if (missingDataPartCount == 0) { - return; - } - - if (missingDataPartCount == 2) { - VERBOSE_COUT("missing parts " << missingDataPartIdxA << " and " << missingDataPartIdxB << Endl); - } else if (missingDataPartCount == 1) { - VERBOSE_COUT("missing part " << missingDataPartIdxA << Endl); - } + } else if (missingDataPartCount == 0) { + return; + } + + if (missingDataPartCount == 2) { + VERBOSE_COUT("missing parts " << missingDataPartIdxA << " and " << missingDataPartIdxB << Endl); + } else if (missingDataPartCount == 1) { + VERBOSE_COUT("missing part " << missingDataPartIdxA << Endl); + } TBlockParams p(crcMode, type, dataSize); - - // Restore the fast way if all data parts are present - if (missingDataPartCount == 0 || + + // Restore the fast way if all data parts are present + if (missingDataPartCount == 0 || (!restoreParts && missingDataPartIdxA >= p.TotalParts - 2)) { - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); - if (isStripe) { + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); + if (isStripe) { p.PrepareInputDataPointers<isStripe>(outBuffer.Detach()); p.XorRestorePart<isStripe, false, true, false>(partSet, p.DataParts); - } else { + } else { p.GlueBlockParts(outBuffer.Detach(), partSet); - } - return; - } - - // Prepare output data pointers - if (restoreFullData) { + } + return; + } + + // Prepare output data pointers + if (restoreFullData) { p.PrepareInputDataPointers<isStripe>(outBuffer.Detach()); - } - + } + // Consider failed disk cases - // a) < m - // b) m - // 'xor-restore' + // a) < m + // b) m + // 'xor-restore' // d) m, m+1 // TODO: 1-pass // just glue the data // use 'eo split' to restore the remaining parts - // f) <m, m+1 - // use 'xor-restore' to restore the data + // f) <m, m+1 + // use 'xor-restore' to restore the data // TODO: use 2-nd part of 'eo-split' to restore m+1 part // TODO: 1-pass - if (missingDataPartIdxA <= p.DataParts && missingDataPartIdxB >= p.TotalParts - 1) { + if (missingDataPartIdxA <= p.DataParts && missingDataPartIdxB >= p.TotalParts - 1) { TString temp; TString &buffer = restoreFullData ? outBuffer : temp; - if (!restoreFullData && restoreParts && missingDataPartIdxB == p.TotalParts - 1) { + if (!restoreFullData && restoreParts && missingDataPartIdxB == p.TotalParts - 1) { // The (f1) case, but no full data needed, only parts TRACE("case# f1" << Endl); - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); if (isStripe) { Refurbish(buffer, dataSize); p.PrepareInputDataPointers<isStripe>(buffer.Detach()); @@ -1556,36 +1556,36 @@ void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TD p.EoSplit<isStripe, true>(partSet); p.PadAndCrcPart(partSet, missingDataPartIdxA); p.PadAndCrcPart(partSet, missingDataPartIdxB); - } else { + } else { // Cases (a), (b) and (d2), case (f2) with full data and maybe parts needed TRACE("case# a b d2 f2" << Endl); - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << " missing " << missingDataPartIdxA << Endl); + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << " missing " << missingDataPartIdxA << Endl); p.XorRestorePart<isStripe, restoreParts, restoreFullData, restoreParityParts>(partSet, missingDataPartIdxA); if (restoreParts && missingDataPartIdxB == p.TotalParts - 1 && restoreParityParts) { // The (d2a) or (f2a) case with full data and parts needed TRACE("case# d2a f2a" << Endl); - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); p.EoSplit<isStripe, true>(partSet); p.PadAndCrcPart(partSet, missingDataPartIdxB); - } + } if (restoreParts) { p.PadAndCrcPart(partSet, missingDataPartIdxA); } - } - return; - } - - // c) m+1 - // TODO: use 2-nd part of 'eo-split' to restore m+1 part, while glueing the data - // TODO: 1-pass - // just glue the data - // use 'eo split' to restore the missing part + } + return; + } + + // c) m+1 + // TODO: use 2-nd part of 'eo-split' to restore m+1 part, while glueing the data + // TODO: 1-pass + // just glue the data + // use 'eo split' to restore the missing part if (missingDataPartIdxA == p.TotalParts - 1 && missingDataPartIdxB == p.TotalParts) { TRACE("case# c" << Endl); - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); TString temp; TString &buffer = restoreFullData ? outBuffer : temp; - if (!restoreFullData) { + if (!restoreFullData) { TRACE(__LINE__ << Endl); if (!restoreParityParts) { TRACE(__LINE__ << Endl); @@ -1595,31 +1595,31 @@ void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TD if (isStripe) { Refurbish(buffer, dataSize); } - } - if (isStripe) { + } + if (isStripe) { TRACE(__LINE__ << Endl); p.PrepareInputDataPointers<isStripe>(buffer.Detach()); p.XorRestorePart<isStripe, false, true, false>(partSet, p.DataParts); - } else { + } else { TRACE(__LINE__ << Endl); if (restoreFullData) { p.GlueBlockParts(buffer.Detach(), partSet); } - } - if (restoreParts) { + } + if (restoreParts) { TRACE(__LINE__ << Endl); - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); p.EoSplit<isStripe, true>(partSet); p.PadAndCrcPart(partSet, missingDataPartIdxA); - } - return; - } - - // e) <m, m - // TODO: 1-pass - // use diagonal-sums to restore the data - // use 'xor restore' with 'restore part' to restore m part - if (missingDataPartIdxA < p.DataParts && missingDataPartIdxB == p.DataParts) { + } + return; + } + + // e) <m, m + // TODO: 1-pass + // use diagonal-sums to restore the data + // use 'xor restore' with 'restore part' to restore m part + if (missingDataPartIdxA < p.DataParts && missingDataPartIdxB == p.DataParts) { TRACE(__LINE__ << " of " << __FILE__ << " case# e restore part missing# " << missingDataPartIdxA << ", " << missingDataPartIdxB << " restoreParts# " << restoreParts << " restoreParityParts# " << restoreParityParts @@ -1631,13 +1631,13 @@ void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TD p.PadAndCrcPart(partSet, missingDataPartIdxB); } } - return; - } - - // g) <m, <m - // the main case :( + return; + } + + // g) <m, <m + // the main case :( TRACE("case# g" << Endl); - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); Y_VERIFY(missingDataPartIdxA < p.DataParts && missingDataPartIdxB < p.DataParts); p.EoMainRestoreParts<isStripe, restoreParts, restoreFullData, false, restoreParityParts>(partSet, missingDataPartIdxA, missingDataPartIdxB); @@ -1645,8 +1645,8 @@ void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TD p.PadAndCrcPart(partSet, missingDataPartIdxA); p.PadAndCrcPart(partSet, missingDataPartIdxB); } -} - +} + // restorePartiyParts may be set only togehter with restore parts template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void StarBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TDataPartSet& partSet) { @@ -1881,67 +1881,67 @@ void StarBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void XorBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TDataPartSet &partSet) { TString &outBuffer = partSet.FullDataFragment.OwnedString; - ui32 totalParts = type.TotalPartCount(); + ui32 totalParts = type.TotalPartCount(); Y_VERIFY(partSet.Parts.size() == totalParts, "partSet.Parts.size(): %" PRIu64 " totalParts: %" PRIu32 " erasure: %s", (ui64)partSet.Parts.size(), (ui32)totalParts, type.ErasureName[type.GetErasure()].data()); - - ui32 missingDataPartIdx = totalParts; - ui32 missingDataPartCount = 0; + + ui32 missingDataPartIdx = totalParts; + ui32 missingDataPartCount = 0; ui64 expectedPartSize = type.PartSize(crcMode, partSet.FullDataSize); - for (ui32 i = 0; i < totalParts; ++i) { - if (!(partSet.PartsMask & (1 << i))) { - missingDataPartIdx = i; - ++missingDataPartCount; - } else { + for (ui32 i = 0; i < totalParts; ++i) { + if (!(partSet.PartsMask & (1 << i))) { + missingDataPartIdx = i; + ++missingDataPartCount; + } else { Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size(): %" PRIu64 - " expectedPartSize: %" PRIu64 " erasure: %s partSet.FullDataSize: %" PRIu64, + " expectedPartSize: %" PRIu64 " erasure: %s partSet.FullDataSize: %" PRIu64, (ui32)i, (ui64)partSet.Parts[i].size(), expectedPartSize, type.ErasureName[type.GetErasure()].data(), - (ui64)partSet.FullDataSize); - } - } + (ui64)partSet.FullDataSize); + } + } Y_VERIFY(missingDataPartCount <= 1); - - ui64 dataSize = partSet.FullDataSize; - if (restoreParts && missingDataPartIdx != totalParts) { + + ui64 dataSize = partSet.FullDataSize; + if (restoreParts && missingDataPartIdx != totalParts) { TRACE("Line# " << __LINE__ << Endl); Refurbish(partSet.Parts[missingDataPartIdx], partSet.Parts[missingDataPartIdx == 0 ? 1 : 0].size()); - } - if (restoreFullData) { + } + if (restoreFullData) { Refurbish(outBuffer, dataSize); - } else if (missingDataPartCount == 0) { - return; - } - + } else if (missingDataPartCount == 0) { + return; + } + TBlockParams p(crcMode, type, dataSize); - - // Restore the fast way if all data parts are present - if (missingDataPartCount == 0 || - (missingDataPartCount == 1 && !restoreParts && missingDataPartIdx == p.TotalParts - 1)) { - if (isStripe) { + + // Restore the fast way if all data parts are present + if (missingDataPartCount == 0 || + (missingDataPartCount == 1 && !restoreParts && missingDataPartIdx == p.TotalParts - 1)) { + if (isStripe) { p.PrepareInputDataPointers<isStripe>(outBuffer.Detach()); p.XorRestorePart<isStripe, false, true, false>(partSet, p.DataParts); - } else { + } else { p.GlueBlockParts(outBuffer.Detach(), partSet); - } - return; - } - // Prepare output data pointers - if (restoreFullData) { + } + return; + } + // Prepare output data pointers + if (restoreFullData) { p.PrepareInputDataPointers<isStripe>(outBuffer.Detach()); - } - + } + p.XorRestorePart<isStripe, restoreParts, restoreFullData, restoreParityParts>(partSet, missingDataPartIdx); -} - +} + const std::array<TString, TErasureType::ErasureSpeciesCount> TErasureType::ErasureName{{ - "none", - "mirror-3", - "block-3-1", - "stripe-3-1", - "block-4-2", - "block-3-2", - "stripe-4-2", + "none", + "mirror-3", + "block-3-1", + "stripe-3-1", + "block-4-2", + "block-3-2", + "stripe-4-2", "stripe-3-2", "mirror-3-2", "mirror-3-dc", @@ -1955,76 +1955,76 @@ const std::array<TString, TErasureType::ErasureSpeciesCount> TErasureType::Erasu "stripe-2-2", "mirror-3of4", }}; - + TErasureType::EErasureFamily TErasureType::ErasureFamily() const { const TErasureParameters &erasure = ErasureSpeciesParameters[ErasureSpecies]; return erasure.ErasureFamily; } -ui32 TErasureType::ParityParts() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - return erasure.ParityParts; -} - -ui32 TErasureType::DataParts() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - return erasure.DataParts; -} - -ui32 TErasureType::TotalPartCount() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - return erasure.DataParts + erasure.ParityParts; -} - -ui32 TErasureType::MinimalRestorablePartCount() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - return erasure.DataParts; -} - -ui32 TErasureType::ColumnSize() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - switch (erasure.ErasureFamily) { +ui32 TErasureType::ParityParts() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + return erasure.ParityParts; +} + +ui32 TErasureType::DataParts() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + return erasure.DataParts; +} + +ui32 TErasureType::TotalPartCount() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + return erasure.DataParts + erasure.ParityParts; +} + +ui32 TErasureType::MinimalRestorablePartCount() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + return erasure.DataParts; +} + +ui32 TErasureType::ColumnSize() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + switch (erasure.ErasureFamily) { case TErasureType::ErasureMirror: - return 1; + return 1; case TErasureType::ErasureParityStripe: case TErasureType::ErasureParityBlock: - if (erasure.ParityParts == 1) { - return sizeof(ui64); - } - return (erasure.Prime - 1) * sizeof(ui64); - } - ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; -} -/* -ui32 TErasureType::PartialRestoreStep() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - switch (erasure.ErasureFamily) { + if (erasure.ParityParts == 1) { + return sizeof(ui64); + } + return (erasure.Prime - 1) * sizeof(ui64); + } + ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; +} +/* +ui32 TErasureType::PartialRestoreStep() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + switch (erasure.ErasureFamily) { case TErasureType::ErasureMirror: - return 1; + return 1; case TErasureType::ErasureParityStripe: - if (erasure.ParityParts == 1) { - return erasure.DataParts * sizeof(ui64); - } - return erasure.DataParts * (erasure.Prime - 1) * sizeof(ui64); + if (erasure.ParityParts == 1) { + return erasure.DataParts * sizeof(ui64); + } + return erasure.DataParts * (erasure.Prime - 1) * sizeof(ui64); case TErasureType::ErasureParityBlock: - if (erasure.ParityParts == 1) { - return sizeof(ui64); - } - return (erasure.Prime - 1) * sizeof(ui64); - } - ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; -}*/ - -ui32 TErasureType::MinimalBlockSize() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - switch (erasure.ErasureFamily) { + if (erasure.ParityParts == 1) { + return sizeof(ui64); + } + return (erasure.Prime - 1) * sizeof(ui64); + } + ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; +}*/ + +ui32 TErasureType::MinimalBlockSize() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + switch (erasure.ErasureFamily) { case TErasureType::ErasureMirror: - return 1; + return 1; case TErasureType::ErasureParityStripe: case TErasureType::ErasureParityBlock: - if (erasure.ParityParts == 1) { - return erasure.DataParts * sizeof(ui64); - } + if (erasure.ParityParts == 1) { + return erasure.DataParts * sizeof(ui64); + } if (erasure.ParityParts == 2) { return (erasure.Prime - 1) * erasure.DataParts * sizeof(ui64); } @@ -2032,31 +2032,31 @@ ui32 TErasureType::MinimalBlockSize() const { return (erasure.Prime - 1) * erasure.DataParts * sizeof(ui64); } ythrow TWithBackTrace<yexception>() << "Unsupported partiy part count = " << erasure.ParityParts << - " for ErasureFamily = " << (i32)erasure.ErasureFamily; - } - ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; -} - + " for ErasureFamily = " << (i32)erasure.ErasureFamily; + } + ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; +} + ui64 TErasureType::PartUserSize(ui64 dataSize) const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - switch (erasure.ErasureFamily) { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + switch (erasure.ErasureFamily) { case TErasureType::ErasureMirror: return dataSize; case TErasureType::ErasureParityStripe: case TErasureType::ErasureParityBlock: - { - ui32 blockSize = MinimalBlockSize(); - ui64 dataSizeBlocks = (dataSize + blockSize - 1) / blockSize; - ui64 partSize = dataSizeBlocks * sizeof(ui64) * (erasure.ParityParts == 1 ? 1 : (erasure.Prime - 1)); - return partSize; - } - } - ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; -} - + { + ui32 blockSize = MinimalBlockSize(); + ui64 dataSizeBlocks = (dataSize + blockSize - 1) / blockSize; + ui64 partSize = dataSizeBlocks * sizeof(ui64) * (erasure.ParityParts == 1 ? 1 : (erasure.Prime - 1)); + return partSize; + } + } + ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; +} + ui64 TErasureType::PartSize(ECrcMode crcMode, ui64 dataSize) const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - switch (erasure.ErasureFamily) { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + switch (erasure.ErasureFamily) { case TErasureType::ErasureMirror: switch (crcMode) { case CrcModeNone: @@ -2071,8 +2071,8 @@ ui64 TErasureType::PartSize(ECrcMode crcMode, ui64 dataSize) const { ythrow TWithBackTrace<yexception>() << "Unknown crcMode = " << (i32)crcMode; case TErasureType::ErasureParityStripe: case TErasureType::ErasureParityBlock: - { - ui32 blockSize = MinimalBlockSize(); + { + ui32 blockSize = MinimalBlockSize(); ui64 dataSizeBlocks = (dataSize + blockSize - 1) / blockSize; ui64 partSize = dataSizeBlocks * sizeof(ui64) * (erasure.ParityParts == 1 ? 1 : (erasure.Prime - 1)); switch (crcMode) { @@ -2122,24 +2122,24 @@ ui64 TErasureType::SuggestDataSize(ECrcMode crcMode, ui64 partSize, bool roundDo ythrow TWithBackTrace<yexception>() << "Unknown crcMode = " << (i32)crcMode; } ui32 blockSize = MinimalBlockSize(); - ui64 dataSizeBlocks = (combinedDataSize + (roundDown ? 0 : blockSize - 1)) / blockSize; - return dataSizeBlocks * blockSize; - } - } - ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; -} - -ui32 TErasureType::Prime() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - return erasure.Prime; -} - -// Block consists of columns. -// block = [column1, column2, ... ,columnN], where N == erasure.DataParts -// -// Input partitioning: -// | large, ... | small, ... | small + tail | - + ui64 dataSizeBlocks = (combinedDataSize + (roundDown ? 0 : blockSize - 1)) / blockSize; + return dataSizeBlocks * blockSize; + } + } + ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; +} + +ui32 TErasureType::Prime() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + return erasure.Prime; +} + +// Block consists of columns. +// block = [column1, column2, ... ,columnN], where N == erasure.DataParts +// +// Input partitioning: +// | large, ... | small, ... | small + tail | + bool TErasureType::IsSinglePartRequest(ui32 fullDataSize, ui32 shift, ui32 size, ui32 &outPartIdx) const { const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; @@ -2187,77 +2187,77 @@ bool TErasureType::IsPartialDataRequestPossible() const { } } -bool TErasureType::IsUnknownFullDataSizePartialDataRequestPossible() const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - switch (erasure.ErasureFamily) { +bool TErasureType::IsUnknownFullDataSizePartialDataRequestPossible() const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + switch (erasure.ErasureFamily) { case TErasureType::ErasureParityBlock: - return false; + return false; case TErasureType::ErasureParityStripe: - return true; + return true; case TErasureType::ErasureMirror: return true; - default: - ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; - } - //return false; -} - -void TErasureType::AlignPartialDataRequest(ui64 shift, ui64 size, ui64 fullDataSize, ui64 &outShift, - ui64 &outSize) const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - ui64 blockSize = MinimalBlockSize(); - ui64 columnSize = blockSize / erasure.DataParts; - - switch (erasure.ErasureFamily) { + default: + ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; + } + //return false; +} + +void TErasureType::AlignPartialDataRequest(ui64 shift, ui64 size, ui64 fullDataSize, ui64 &outShift, + ui64 &outSize) const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + ui64 blockSize = MinimalBlockSize(); + ui64 columnSize = blockSize / erasure.DataParts; + + switch (erasure.ErasureFamily) { case TErasureType::ErasureParityBlock: - { + { if (size == 0) { size = fullDataSize - shift; } - ui64 firstPartOffset = 0; - ui64 firstPartIdx = BlockSplitPartIndex(shift, fullDataSize, firstPartOffset); - - ui64 lastPartOffset = 0; + ui64 firstPartOffset = 0; + ui64 firstPartIdx = BlockSplitPartIndex(shift, fullDataSize, firstPartOffset); + + ui64 lastPartOffset = 0; ui64 lastPartIdx = BlockSplitPartIndex(shift + size, fullDataSize, lastPartOffset); - - // TODO: Consider data on the edge of 2 parts ( data ..... ...xx x.... => request x..xx of each part ) - if (firstPartIdx == lastPartIdx) { - outShift = (firstPartOffset / columnSize) * columnSize; + + // TODO: Consider data on the edge of 2 parts ( data ..... ...xx x.... => request x..xx of each part ) + if (firstPartIdx == lastPartIdx) { + outShift = (firstPartOffset / columnSize) * columnSize; outSize = ((lastPartOffset + columnSize - 1) / columnSize) * columnSize - outShift; - break; - } - - outShift = 0; - outSize = 0; - break; - } + break; + } + + outShift = 0; + outSize = 0; + break; + } case TErasureType::ErasureParityStripe: - { - ui64 beginBlockIdx = (shift / blockSize); - outShift = beginBlockIdx * columnSize; - if (size == 0) { - outSize = 0; - } else { - ui64 endBlockIdx = ((shift + size + blockSize - 1) / blockSize); - outSize = (endBlockIdx - beginBlockIdx) * columnSize; - } - break; - } + { + ui64 beginBlockIdx = (shift / blockSize); + outShift = beginBlockIdx * columnSize; + if (size == 0) { + outSize = 0; + } else { + ui64 endBlockIdx = ((shift + size + blockSize - 1) / blockSize); + outSize = (endBlockIdx - beginBlockIdx) * columnSize; + } + break; + } case TErasureType::ErasureMirror: - { - outShift = shift; - outSize = size; - break; - } - default: - outShift = shift; - outSize = size; - ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; - break; - } - return; -} - + { + outShift = shift; + outSize = size; + break; + } + default: + outShift = shift; + outSize = size; + ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; + break; + } + return; +} + void TErasureType::BlockSplitRange(ECrcMode crcMode, ui64 blobSize, ui64 wholeBegin, ui64 wholeEnd, TBlockSplitRange *outRange) const { Y_VERIFY(wholeBegin <= wholeEnd && outRange, "wholeBegin# %" PRIu64 " wholeEnd# %" PRIu64 " outRange# %" PRIu64, @@ -2424,37 +2424,37 @@ void TErasureType::BlockSplitRange(ECrcMode crcMode, ui64 blobSize, ui64 wholeBe Y_VERIFY_DEBUG(outRange->EndPartIdx != Max<ui64>()); } -ui32 TErasureType::BlockSplitPartIndex(ui64 offset, ui64 dataSize, ui64 &outPartOffset) const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - ui64 blockSize = MinimalBlockSize(); - ui64 columnSize = blockSize / erasure.DataParts; - ui64 wholeColumns = dataSize / columnSize; - - ui64 smallPartColumns = wholeColumns / erasure.DataParts; - ui64 largePartColumns = smallPartColumns + 1; - - ui64 smallPartSize = smallPartColumns * columnSize; - ui64 largePartSize = largePartColumns * columnSize; - - ui32 firstSmallPartIdx = wholeColumns % erasure.DataParts; - - ui64 firstSmallPartOffset = firstSmallPartIdx * largePartSize; - if (offset < firstSmallPartOffset) { - ui64 index = offset / largePartSize; - outPartOffset = offset - index * largePartSize; - return (ui32)index; - } +ui32 TErasureType::BlockSplitPartIndex(ui64 offset, ui64 dataSize, ui64 &outPartOffset) const { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + ui64 blockSize = MinimalBlockSize(); + ui64 columnSize = blockSize / erasure.DataParts; + ui64 wholeColumns = dataSize / columnSize; + + ui64 smallPartColumns = wholeColumns / erasure.DataParts; + ui64 largePartColumns = smallPartColumns + 1; + + ui64 smallPartSize = smallPartColumns * columnSize; + ui64 largePartSize = largePartColumns * columnSize; + + ui32 firstSmallPartIdx = wholeColumns % erasure.DataParts; + + ui64 firstSmallPartOffset = firstSmallPartIdx * largePartSize; + if (offset < firstSmallPartOffset) { + ui64 index = offset / largePartSize; + outPartOffset = offset - index * largePartSize; + return (ui32)index; + } ui64 lastPartOffset = firstSmallPartOffset + smallPartSize * (erasure.DataParts - firstSmallPartIdx - 1); - if (offset < lastPartOffset) { - offset -= firstSmallPartOffset; + if (offset < lastPartOffset) { + offset -= firstSmallPartOffset; ui64 smallIndex = offset / smallPartSize; outPartOffset = offset - smallIndex * smallPartSize; return (ui32)(smallIndex + firstSmallPartIdx); - } - outPartOffset = offset - lastPartOffset; - return (erasure.DataParts - 1); -} - + } + outPartOffset = offset - lastPartOffset; + return (erasure.DataParts - 1); +} + ui64 TErasureType::BlockSplitWholeOffset(ui64 dataSize, ui64 partIdx, ui64 offset) const { const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; ui64 blockSize = MinimalBlockSize(); @@ -2577,15 +2577,15 @@ void MirrorRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TDa } static void VerifyPartSizes(TDataPartSet& partSet, size_t definedPartEndIdx) { size_t partSize = partSet.Parts[0].size(); - for (size_t idx = 0; idx < partSet.Parts.size(); ++idx) { + for (size_t idx = 0; idx < partSet.Parts.size(); ++idx) { Y_VERIFY(partSet.Parts[idx].size() == partSize); if (partSize && idx < definedPartEndIdx) { REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(partSet.Parts[idx].GetDataAt(partSet.Parts[idx].Offset), partSet.Parts[idx].Size); } - } -} - + } +} + void TErasureType::SplitData(ECrcMode crcMode, const TString& buffer, TDataPartSet& outPartSet) const { outPartSet.ResetSplit(); do { @@ -2594,54 +2594,54 @@ void TErasureType::SplitData(ECrcMode crcMode, const TString& buffer, TDataPartS } void TErasureType::IncrementalSplitData(ECrcMode crcMode, const TString& buffer, TDataPartSet& outPartSet) const { - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - switch (erasure.ErasureFamily) { + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + switch (erasure.ErasureFamily) { case TErasureType::ErasureMirror: MirrorSplit(crcMode, *this, buffer, outPartSet); - break; + break; case TErasureType::ErasureParityStripe: - switch (erasure.ParityParts) { + switch (erasure.ParityParts) { case 1: XorBlockSplit<true>(crcMode ,*this, buffer, outPartSet); - break; - case 2: + break; + case 2: EoBlockSplit<true>(crcMode, *this, buffer, outPartSet); - break; + break; case 3: StarBlockSplit<true>(crcMode, *this, buffer, outPartSet); break; - default: + default: ythrow TWithBackTrace<yexception>() << "Unsupported number of parity parts: " << erasure.ParityParts; - break; - } - break; + break; + } + break; case TErasureType::ErasureParityBlock: - switch (erasure.ParityParts) { - case 1: + switch (erasure.ParityParts) { + case 1: XorBlockSplit<false>(crcMode, *this, buffer, outPartSet); - break; - case 2: + break; + case 2: EoBlockSplit<false>(crcMode, *this, buffer, outPartSet); - break; + break; case 3: StarBlockSplit<false>(crcMode, *this, buffer, outPartSet); break; - default: + default: ythrow TWithBackTrace<yexception>() << "Unsupported number of parity parts: " << erasure.ParityParts; - break; - } - break; - default: - ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; - break; - } + break; + } + break; + default: + ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; + break; + } if (outPartSet.IsSplitDone()) { VerifyPartSizes(outPartSet, Max<size_t>()); } -} - +} + void MirrorSplitDiff(const TErasureType &type, const TVector<TDiff> &diffs, TPartDiffSet& outDiffSet) { outDiffSet.PartDiffs.resize(type.TotalPartCount()); ui32 parityParts = type.ParityParts(); @@ -2980,37 +2980,37 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res if (restoreParityParts) { restoreParts = true; } - const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; - ui32 totalParts = TotalPartCount(); - if (partSet.Parts.size() != totalParts) { - ythrow TWithBackTrace<yexception>() << "Incorrect partSet size, received " << partSet.Parts.size() - << " while expected " << (erasure.DataParts + erasure.ParityParts); - } + const TErasureParameters& erasure = ErasureSpeciesParameters[ErasureSpecies]; + ui32 totalParts = TotalPartCount(); + if (partSet.Parts.size() != totalParts) { + ythrow TWithBackTrace<yexception>() << "Incorrect partSet size, received " << partSet.Parts.size() + << " while expected " << (erasure.DataParts + erasure.ParityParts); + } Y_VERIFY_DEBUG(restoreFullData || restoreParts); Y_VERIFY_DEBUG(erasure.Prime <= MAX_LINES_IN_BLOCK); - switch (erasure.ErasureFamily) { + switch (erasure.ErasureFamily) { case TErasureType::ErasureMirror: if (restoreParts) { if (restoreFullData) { MirrorRestore<true, true>(crcMode, *this, partSet); } else { MirrorRestore<true, false>(crcMode, *this, partSet); - } + } VerifyPartSizes(partSet, Max<size_t>()); } else if (restoreFullData) { MirrorRestore<false, true>(crcMode, *this, partSet); - } + } if (restoreFullData) { Y_VERIFY(partSet.FullDataSize == partSet.FullDataFragment.PartSize, "Incorrect data part size = %" PRIu64 ", expected size = %" PRIu64, (ui64)partSet.FullDataFragment.PartSize, (ui64)partSet.FullDataSize); } - break; + break; case TErasureType::ErasureParityStripe: - switch (erasure.ParityParts) { - case 1: - if (restoreParts) { - if (restoreFullData) { + switch (erasure.ParityParts) { + case 1: + if (restoreParts) { + if (restoreFullData) { if (restoreParityParts) { XorBlockRestore<true, true, true, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3018,7 +3018,7 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res XorBlockRestore<true, true, true, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } else { + } else { if (restoreParityParts) { XorBlockRestore<true, true, false, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3026,15 +3026,15 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res XorBlockRestore<true, true, false, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } + } partSet.MemoryConsumed = partSet.Parts[0].MemoryConsumed() * partSet.Parts.size(); - } else if (restoreFullData) { + } else if (restoreFullData) { XorBlockRestore<true, false, true, false>(crcMode, *this, partSet); - } - break; - case 2: - if (restoreParts) { - if (restoreFullData) { + } + break; + case 2: + if (restoreParts) { + if (restoreFullData) { if (restoreParityParts) { EoBlockRestore<true, true, true, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3042,7 +3042,7 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res EoBlockRestore<true, true, true, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } else { + } else { if (restoreParityParts) { EoBlockRestore<true, true, false, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3050,12 +3050,12 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res EoBlockRestore<true, true, false, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } + } partSet.MemoryConsumed = partSet.Parts[0].MemoryConsumed() * partSet.Parts.size(); - } else if (restoreFullData) { + } else if (restoreFullData) { EoBlockRestore<true, false, true, false>(crcMode, *this, partSet); - } - break; + } + break; case 3: if (restoreParts) { if (restoreFullData) { @@ -3080,17 +3080,17 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res StarBlockRestore<true, false, true, false>(crcMode, *this, partSet); } break; - default: + default: ythrow TWithBackTrace<yexception>() << "Unsupported number of parity parts: " << erasure.ParityParts; - break; - } - break; + break; + } + break; case TErasureType::ErasureParityBlock: - switch (erasure.ParityParts) { - case 1: - if (restoreParts) { - if (restoreFullData) { + switch (erasure.ParityParts) { + case 1: + if (restoreParts) { + if (restoreFullData) { if (restoreParityParts) { XorBlockRestore<false, true, true, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3098,7 +3098,7 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res XorBlockRestore<false, true, true, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } else { + } else { if (restoreParityParts) { XorBlockRestore<false, true, false, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3106,15 +3106,15 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res XorBlockRestore<false, true, false, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } + } partSet.MemoryConsumed = partSet.Parts[0].MemoryConsumed() * partSet.Parts.size(); - } else if (restoreFullData) { + } else if (restoreFullData) { XorBlockRestore<false, false, true, false>(crcMode, *this, partSet); - } - break; - case 2: - if (restoreParts) { - if (restoreFullData) { + } + break; + case 2: + if (restoreParts) { + if (restoreFullData) { if (restoreParityParts) { EoBlockRestore<false, true, true, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3122,7 +3122,7 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res EoBlockRestore<false, true, true, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } else { + } else { if (restoreParityParts) { EoBlockRestore<false, true, false, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3130,12 +3130,12 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res EoBlockRestore<false, true, false, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } + } partSet.MemoryConsumed = partSet.Parts[0].MemoryConsumed() * partSet.Parts.size(); - } else if (restoreFullData) { + } else if (restoreFullData) { EoBlockRestore<false, false, true, false>(crcMode, *this, partSet); - } - break; + } + break; case 3: if (restoreParts) { if (restoreFullData) { @@ -3161,17 +3161,17 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res StarBlockRestore<false, false, true, false>(crcMode, *this, partSet); } break; - default: + default: ythrow TWithBackTrace<yexception>() << "Unsupported number of parity parts: " << erasure.ParityParts; - break; - } - break; - } -} - + break; + } + break; + } +} + } // NKikimr Y_DECLARE_OUT_SPEC(, NKikimr::TErasureType::EErasureSpecies, stream, value) { stream << NKikimr::TErasureType::ErasureSpeciesToStr(value); -} +} diff --git a/ydb/core/erasure/erasure.h b/ydb/core/erasure/erasure.h index 95db35664e..35bd68c2d2 100644 --- a/ydb/core/erasure/erasure.h +++ b/ydb/core/erasure/erasure.h @@ -1,20 +1,20 @@ -#pragma once +#pragma once #include <array> #include <ydb/core/debug/valgrind_check.h> #include <ydb/core/util/yverify_stream.h> - -#include <util/stream/str.h> + +#include <util/stream/str.h> #include <util/generic/string.h> -#include <util/generic/bt_exception.h> +#include <util/generic/bt_exception.h> #include <util/string/builder.h> - -#include <util/generic/list.h> + +#include <util/generic/list.h> #include <library/cpp/containers/stack_vector/stack_vec.h> - -namespace NKikimr { - + +namespace NKikimr { + struct TDiff { TString Buffer; ui32 Offset = 0; @@ -146,14 +146,14 @@ struct TPartFragment { } }; -struct TDataPartSet { +struct TDataPartSet { ui64 FullDataSize = 0; ui32 PartsMask = 0; TStackVec<TPartFragment, 8> Parts; TPartFragment FullDataFragment; ui64 MemoryConsumed = 0; bool IsFragment = false; - + // Incremental split KIKIMR-10794 ui64 WholeBlocks = 0; // Blocks to be split (not including tail) ui64 CurBlockIdx = 0; // Blocks have been already split @@ -184,8 +184,8 @@ struct TDataPartSet { WholeBlocks = 0; CurBlockIdx = 0; } -}; - +}; + struct TPartOffsetRange { // [Begin, End) ui64 Begin = 0; ui64 End = 0; @@ -223,21 +223,21 @@ struct TBlockSplitRange { TStackVec<TPartOffsetRange, 8> PartRanges; }; -struct TErasureParameters; - -struct TErasureType { - - enum EErasureSpecies { - ErasureNone = 0, - ErasureMirror3 = 1, - Erasure3Plus1Block = 2, - Erasure3Plus1Stripe = 3, - - Erasure4Plus2Block = 4, - Erasure3Plus2Block = 5, - Erasure4Plus2Stripe = 6, - Erasure3Plus2Stripe = 7, - +struct TErasureParameters; + +struct TErasureType { + + enum EErasureSpecies { + ErasureNone = 0, + ErasureMirror3 = 1, + Erasure3Plus1Block = 2, + Erasure3Plus1Stripe = 3, + + Erasure4Plus2Block = 4, + Erasure3Plus2Block = 5, + Erasure4Plus2Stripe = 6, + Erasure3Plus2Stripe = 7, + ErasureMirror3Plus2 = 8, ErasureMirror3dc = 9, @@ -254,8 +254,8 @@ struct TErasureType { ErasureMirror3of4 = 18, ErasureSpeciesCount = 19 - }; - + }; + static const char *ErasureSpeciesToStr(EErasureSpecies es); enum EErasureFamily { @@ -272,54 +272,54 @@ struct TErasureType { TErasureType(EErasureSpecies s = ErasureNone) : ErasureSpecies(s) {} - + virtual ~TErasureType() = default; TErasureType(const TErasureType &) = default; TErasureType &operator =(const TErasureType &) = default; - + EErasureSpecies GetErasure() const { return ErasureSpecies; } TString ToString() const { Y_VERIFY((ui64)ErasureSpecies < ErasureSpeciesCount); - return ErasureName[ErasureSpecies]; - } - + return ErasureName[ErasureSpecies]; + } + static TString ErasureSpeciesName(ui32 erasureSpecies) { - if (erasureSpecies < ErasureSpeciesCount) { - return ErasureName[erasureSpecies]; - } - TStringStream str; - str << "Unknown" << erasureSpecies; - return str.Str(); - } - + if (erasureSpecies < ErasureSpeciesCount) { + return ErasureName[erasureSpecies]; + } + TStringStream str; + str << "Unknown" << erasureSpecies; + return str.Str(); + } + static EErasureSpecies ErasureSpeciesByName(TString name) { - for (ui32 species = 0; species < TErasureType::ErasureSpeciesCount; ++species) { - if (TErasureType::ErasureName[species] == name) { - return TErasureType::EErasureSpecies(species); - } - } - return TErasureType::ErasureSpeciesCount; - } - + for (ui32 species = 0; species < TErasureType::ErasureSpeciesCount; ++species) { + if (TErasureType::ErasureName[species] == name) { + return TErasureType::EErasureSpecies(species); + } + } + return TErasureType::ErasureSpeciesCount; + } + TErasureType::EErasureFamily ErasureFamily() const; - ui32 ParityParts() const; // 4 + _2_ - ui32 DataParts() const; // _4_ + 2 - ui32 TotalPartCount() const; // _4_+_2_ - ui32 MinimalRestorablePartCount() const; // ? _4_ + 2 - ui32 MinimalBlockSize() const; + ui32 ParityParts() const; // 4 + _2_ + ui32 DataParts() const; // _4_ + 2 + ui32 TotalPartCount() const; // _4_+_2_ + ui32 MinimalRestorablePartCount() const; // ? _4_ + 2 + ui32 MinimalBlockSize() const; // Size of user data contained in the part. ui64 PartUserSize(ui64 dataSize) const; // Size of the part including user data and crcs ui64 PartSize(ECrcMode crcMode, ui64 dataSize) const; ui64 SuggestDataSize(ECrcMode crcMode, ui64 partSize, bool roundDown) const; - ui32 Prime() const; - + ui32 Prime() const; + void SplitData(ECrcMode crcMode, const TString& buffer, TDataPartSet& outPartSet) const; void IncrementalSplitData(ECrcMode crcMode, const TString& buffer, TDataPartSet& outPartSet) const; - + void SplitDiffs(ECrcMode crcMode, ui32 dataSize, const TVector<TDiff> &diffs, TPartDiffSet& outDiffSet) const; void ApplyDiff(ECrcMode crcMode, ui8 *dst, const TVector<TDiff> &diffs) const; void MakeXorDiff(ECrcMode crcMode, ui32 dataSize, const ui8 *src, const TVector<TDiff> &inDiffs, @@ -331,25 +331,25 @@ struct TErasureType { bool restoreFullData, bool restoreParityParts) const; void RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool restoreParts, bool restoreFullData, bool restoreParityParts) const; - + bool IsSinglePartRequest(ui32 fullDataSize, ui32 shift, ui32 size, ui32 &outPartIdx) const; bool IsPartialDataRequestPossible() const; - bool IsUnknownFullDataSizePartialDataRequestPossible() const; - void AlignPartialDataRequest(ui64 shift, ui64 size, ui64 fullDataSize, ui64 &outShift, ui64 &outSize) const; + bool IsUnknownFullDataSizePartialDataRequestPossible() const; + void AlignPartialDataRequest(ui64 shift, ui64 size, ui64 fullDataSize, ui64 &outShift, ui64 &outSize) const; void BlockSplitRange(ECrcMode crcMode, ui64 blobSize, ui64 wholeBegin, ui64 wholeEnd, TBlockSplitRange *outRange) const; ui64 BlockSplitPartUsedSize(ui64 dataSize, ui32 partIdx) const; - ui32 BlockSplitPartIndex(ui64 offset, ui64 dataSize, ui64 &outPartOffset) const; + ui32 BlockSplitPartIndex(ui64 offset, ui64 dataSize, ui64 &outPartOffset) const; ui64 BlockSplitWholeOffset(ui64 dataSize, ui64 partIdx, ui64 offset) const; - + static const std::array<TString, ErasureSpeciesCount> ErasureName; -protected: +protected: EErasureSpecies ErasureSpecies; - ui32 ColumnSize() const; -}; - + ui32 ColumnSize() const; +}; + bool CheckCrcAtTheEnd(TErasureType::ECrcMode crcMode, const TString& buf); -} - +} + diff --git a/ydb/core/erasure/erasure_ut.cpp b/ydb/core/erasure/erasure_ut.cpp index 24a0d5f5e0..2473bd1650 100644 --- a/ydb/core/erasure/erasure_ut.cpp +++ b/ydb/core/erasure/erasure_ut.cpp @@ -1,9 +1,9 @@ -#include "erasure.h" +#include "erasure.h" #include "ut_util.h" - - -namespace NKikimr { - + + +namespace NKikimr { + void TestMissingPartWithRandomData(TErasureType &groupType, ui32 *missingPartIdx, ui32 missingParts, ui32 dataSize, bool isRestoreParts, bool isRestoreFullData, TString &info) { @@ -306,138 +306,138 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { Y_UNIT_TEST(TestEo) { - ui32 species = (ui32)TErasureType::Erasure4Plus2Block; - { + ui32 species = (ui32)TErasureType::Erasure4Plus2Block; + { TErasureType groupType((TErasureType::EErasureSpecies)species); - - ui32 startingDataSize = 248; - - ui32 dataSize = startingDataSize; - { - const ui32 maxMissingParts = 4; - ui32 missingPartIdx[maxMissingParts]; - for (ui32 i = 0; i < maxMissingParts; ++i) { - missingPartIdx[i] = groupType.TotalPartCount(); - } - missingPartIdx[0] = 2; - missingPartIdx[1] = 3; - + + ui32 startingDataSize = 248; + + ui32 dataSize = startingDataSize; + { + const ui32 maxMissingParts = 4; + ui32 missingPartIdx[maxMissingParts]; + for (ui32 i = 0; i < maxMissingParts; ++i) { + missingPartIdx[i] = groupType.TotalPartCount(); + } + missingPartIdx[0] = 2; + missingPartIdx[1] = 3; + ui32 maxMissingPartsTolerable = groupType.TotalPartCount() - groupType.MinimalRestorablePartCount(); - { - ui32 partMask = ~(ui32)0; + { + ui32 partMask = ~(ui32)0; for (ui32 idx = maxMissingPartsTolerable - 1; idx != (ui32)-1; --idx) { - partMask &= ~(ui32)(1 << missingPartIdx[idx]); - } + partMask &= ~(ui32)(1 << missingPartIdx[idx]); + } char mask[33]; for (ui32 idx = 0; idx < 32; ++idx) { mask[idx] = (partMask & ((1ul << 31) >> idx)) ? '1' : '0'; } mask[32] = 0; - + TString errorInfo = Sprintf("species=%d (%s) dataSize=%d partMask=0x%x (%s)", species, TErasureType::ErasureSpeciesName(species).c_str(), dataSize, partMask, mask); - + TString testString; - testString.resize(dataSize); - for (ui32 i = 0; i < testString.size(); ++i) { - ui32 col = (i / 8) % 4; - ui32 row = (i / (2 * 8 * 4)) % 4; + testString.resize(dataSize); + for (ui32 i = 0; i < testString.size(); ++i) { + ui32 col = (i / 8) % 4; + ui32 row = (i / (2 * 8 * 4)) % 4; ui8 val = ui8(1 << col) | ui8(1 << (row + 4)); - ((char*)testString.data())[i] = val; - } - TDataPartSet partSet; - try { + ((char*)testString.data())[i] = val; + } + TDataPartSet partSet; + try { groupType.SplitData(TErasureType::CrcModeNone, testString, partSet); - } catch (yexception ex) { - ex << " [in SplitData while testing " << errorInfo << "]"; - throw ex; - } - + } catch (yexception ex) { + ex << " [in SplitData while testing " << errorInfo << "]"; + throw ex; + } + ui64 partSize = groupType.PartSize(TErasureType::CrcModeNone, dataSize); - for (ui32 part = 0; part < groupType.TotalPartCount(); ++part) { - UNIT_ASSERT_EQUAL_C(partSize, partSet.Parts[part].size(), errorInfo); - } - - TDataPartSet originalPartSet = partSet; - - // Restore full data - for (int type = 0; type < 3; ++type) { - bool isRestoreFullData = false; - bool isRestoreParts = false; - switch (type) { - case 0: - isRestoreFullData = true; - break; - case 1: - isRestoreParts = true; - break; - case 2: - isRestoreFullData = true; - isRestoreParts = true; - break; + for (ui32 part = 0; part < groupType.TotalPartCount(); ++part) { + UNIT_ASSERT_EQUAL_C(partSize, partSet.Parts[part].size(), errorInfo); + } + + TDataPartSet originalPartSet = partSet; + + // Restore full data + for (int type = 0; type < 3; ++type) { + bool isRestoreFullData = false; + bool isRestoreParts = false; + switch (type) { + case 0: + isRestoreFullData = true; + break; + case 1: + isRestoreParts = true; + break; + case 2: + isRestoreFullData = true; + isRestoreParts = true; + break; default: Y_FAIL(); - } - - partSet = originalPartSet; + } + + partSet = originalPartSet; for (ui32 idx = maxMissingPartsTolerable - 1; idx != (ui32)-1; --idx) { - if (missingPartIdx[idx] < partSet.Parts.size()) { - partSet.PartsMask &= partMask; + if (missingPartIdx[idx] < partSet.Parts.size()) { + partSet.PartsMask &= partMask; partSet.Parts[missingPartIdx[idx]].clear(); - } - } - + } + } + TString mode = Sprintf(" restoreParts=%s restoreFullData=%s ", - (isRestoreParts ? "true" : "false"), - (isRestoreFullData ? "true" : "false")); - + (isRestoreParts ? "true" : "false"), + (isRestoreFullData ? "true" : "false")); + TString restoredString; - try { + try { groupType.RestoreData(TErasureType::CrcModeNone, partSet, restoredString, isRestoreParts, isRestoreFullData, isRestoreParts); - } catch (yexception ex) { - ex << " [in RestoreData while testing " << errorInfo << mode << "]"; - throw ex; - } - + } catch (yexception ex) { + ex << " [in RestoreData while testing " << errorInfo << mode << "]"; + throw ex; + } + VERBOSE_COUT("testing " << errorInfo << mode << " (full data)" << Endl); - if (isRestoreFullData) { - UNIT_ASSERT_EQUAL_C(testString.size(), restoredString.size(), errorInfo); - for (ui32 i = 0; i < testString.size(); ++i) { - UNIT_ASSERT_EQUAL_C(((char*)testString.data())[i], ((char*)restoredString.data())[i], - (errorInfo + mode + " (full data)")); - if (((char*)testString.data())[i] != ((char*)restoredString.data())[i]) { + if (isRestoreFullData) { + UNIT_ASSERT_EQUAL_C(testString.size(), restoredString.size(), errorInfo); + for (ui32 i = 0; i < testString.size(); ++i) { + UNIT_ASSERT_EQUAL_C(((char*)testString.data())[i], ((char*)restoredString.data())[i], + (errorInfo + mode + " (full data)")); + if (((char*)testString.data())[i] != ((char*)restoredString.data())[i]) { VERBOSE_COUT("mismatch " << errorInfo << mode << " (full data)" << Endl); - break; - } - } - } - if (isRestoreParts) { + break; + } + } + } + if (isRestoreParts) { for (ui32 idx = maxMissingPartsTolerable - 1; idx != (ui32)-1; --idx) { - if (missingPartIdx[idx] < partSet.Parts.size()) { - UNIT_ASSERT_EQUAL_C(partSet.Parts[missingPartIdx[idx]].size(), - originalPartSet.Parts[missingPartIdx[idx]].size(), errorInfo); + if (missingPartIdx[idx] < partSet.Parts.size()) { + UNIT_ASSERT_EQUAL_C(partSet.Parts[missingPartIdx[idx]].size(), + originalPartSet.Parts[missingPartIdx[idx]].size(), errorInfo); ui32 size = (ui32)originalPartSet.Parts[missingPartIdx[idx]].size(); char *restored = (char*)partSet.Parts[missingPartIdx[idx]].GetDataAt(0); char *original = (char*)originalPartSet.Parts[missingPartIdx[idx]].GetDataAt(0); - for (ui32 i = 0; i < size; ++i) { - UNIT_ASSERT_EQUAL_C(restored[i], original[i], - (errorInfo + mode + Sprintf(" (part %d byte %d)", missingPartIdx[idx], i))); - if (restored[i] != original[i]) { + for (ui32 i = 0; i < size; ++i) { + UNIT_ASSERT_EQUAL_C(restored[i], original[i], + (errorInfo + mode + Sprintf(" (part %d byte %d)", missingPartIdx[idx], i))); + if (restored[i] != original[i]) { VERBOSE_COUT(" wrong part " << errorInfo << mode << Sprintf(" (part %d byte %d)", missingPartIdx[idx], i) << Endl); - break; - } - } - } - } - } - } - } - } - } - } - + break; + } + } + } + } + } + } + } + } + } + } + void BaseCheckDiffSpliting(TErasureType type, ui32 dataSize, ui32 diffCount, ui32 diffSize, ui32 diffOffset) { @@ -623,10 +623,10 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { void TestErasure(TErasureType::ECrcMode crcMode, ui32 species) { TErasureType groupType((TErasureType::EErasureSpecies)species); TString erasureName = TErasureType::ErasureName[species]; - + ui32 startingDataSize = 0; ui32 maxDataSize = groupType.MinimalBlockSize() * 8; - + for (ui32 dataSize = startingDataSize; dataSize < maxDataSize; ++dataSize) { //+= groupType.MinimalBlockSize()) const ui32 maxMissingParts = 4; @@ -641,13 +641,13 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { ui32 partMask = ~(ui32)0; for (ui32 idx = maxMissingPartsTolerable - 1; idx != (ui32)-1; --idx) { partMask &= ~(ui32)(1 << missingPartIdx[idx]); - } + } char mask[33]; for (ui32 idx = 0; idx < 32; ++idx) { mask[idx] = (partMask & ((1ul << 31) >> idx)) ? '1' : '0'; } mask[32] = 0; - + TString errorInfo = Sprintf("crcMode=%d species=%d (%s) dataSize=%d partMask=0x%x (%s)", (i32)crcMode, species, TErasureType::ErasureSpeciesName(species).c_str(), dataSize, partMask, mask); @@ -710,25 +710,25 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { partSet.PartsMask &= partMask; partSet.Parts[missingPartIdx[idx]].clear(); } - } + } partSet.FullDataFragment.UninitializedOwnedWhole(dataSize);; - + TString mode = Sprintf(" restoreParts=%s isRestoreParityParts=%s restoreFullData=%s ", (isRestoreParts ? "true" : "false"), (isRestoreParityParts ? "true" : "false"), (isRestoreFullData ? "true" : "false")); - + VERBOSE_COUT("RestoreData " << errorInfo << Endl); TString restoredString; - try { + try { groupType.RestoreData(crcMode, partSet, restoredString, isRestoreParts, isRestoreFullData, isRestoreParityParts); - } catch (yexception ex) { + } catch (yexception ex) { ex << " [in RestoreData while testing " << errorInfo << mode << "]"; - throw ex; - } - + throw ex; + } + VERBOSE_COUT("testing " << errorInfo << mode << " (full data)" << Endl); if (isRestoreFullData) { UNIT_ASSERT_EQUAL_C(testString.size(), restoredString.size(), errorInfo); @@ -736,7 +736,7 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { UNIT_ASSERT_EQUAL_C(((char*)testString.data())[i], ((char*)restoredString.data())[i], (errorInfo + erasureName + mode + " (full data)")); } - } + } if (isRestoreParts) { for (ui32 idx = maxMissingPartsTolerable - 1; idx != (ui32)-1; --idx) { ui32 missingIdx = missingPartIdx[idx]; @@ -757,12 +757,12 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { } else { UNIT_ASSERT(partSet.Parts[missingIdx].size() == 0); UNIT_ASSERT(originalPartSet.Parts[missingIdx].size() == 0); - } - } - } - } + } + } + } + } } - + if (maxMissingPartsTolerable == 0) { isComplete = true; } @@ -772,19 +772,19 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { break; } if (idx == 0) { - isComplete = true; - } + isComplete = true; + } missingPartIdx[idx] = groupType.TotalPartCount() - 1; - } + } } // while !isComplete } // for datasize } - + Y_UNIT_TEST(TestAllSpeciesCrcWhole1of2) { for (ui32 species = 0; species < (ui32)TErasureType::ErasureSpeciesCount; species += 2) { TestErasure(TErasureType::CrcModeWholePart, species); - } - } + } + } Y_UNIT_TEST(TestAllSpeciesCrcWhole2of2) { for (ui32 species = 1; species < (ui32)TErasureType::ErasureSpeciesCount; species += 2) { @@ -1096,7 +1096,7 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { Y_UNIT_TEST(TestBlock42PartialRestore3) { TestBlock42PartialRestore(3); } -} - -} // namespace NKikimr - +} + +} // namespace NKikimr + diff --git a/ydb/core/erasure/ut/ya.make b/ydb/core/erasure/ut/ya.make index 19cd585c41..549ac7f896 100644 --- a/ydb/core/erasure/ut/ya.make +++ b/ydb/core/erasure/ut/ya.make @@ -1,8 +1,8 @@ UNITTEST_FOR(ydb/core/erasure) - -FORK_SUBTESTS() + +FORK_SUBTESTS() SPLIT_FACTOR(30) - + IF (WITH_VALGRIND) TIMEOUT(1800) SIZE(LARGE) @@ -13,13 +13,13 @@ ELSE() ENDIF() OWNER(ddoarn cthulhu fomichev g:kikimr) - -PEERDIR( + +PEERDIR( library/cpp/digest/crc32c -) - -SRCS( - erasure_ut.cpp -) - -END() +) + +SRCS( + erasure_ut.cpp +) + +END() diff --git a/ydb/core/erasure/ya.make b/ydb/core/erasure/ya.make index 41c266c384..63483c1b96 100644 --- a/ydb/core/erasure/ya.make +++ b/ydb/core/erasure/ya.make @@ -1,36 +1,36 @@ -LIBRARY() - -OWNER( +LIBRARY() + +OWNER( cthulhu - ddoarn - fomichev + ddoarn + fomichev va-kuznecov g:kikimr -) - -SRCS( - erasure.cpp - erasure.h +) + +SRCS( + erasure.cpp + erasure.h erasure_rope.cpp erasure_rope.h erasure_perf_test.cpp -) - -PEERDIR( +) + +PEERDIR( library/cpp/actors/util library/cpp/containers/stack_vector library/cpp/digest/crc32c library/cpp/digest/old_crc ydb/core/debug -) - -IF (MSVC) +) + +IF (MSVC) CFLAGS( /wd4503 ) -ENDIF() - -END() +ENDIF() + +END() RECURSE_FOR_TESTS( ut diff --git a/ydb/core/grpc_services/base/base.h b/ydb/core/grpc_services/base/base.h index 1382e29255..44b25c4a5f 100644 --- a/ydb/core/grpc_services/base/base.h +++ b/ydb/core/grpc_services/base/base.h @@ -99,8 +99,8 @@ struct TRpcServices { EvRefreshTokenRequest, // internal call EvGetShardLocations, EvExperimentalStreamQuery, - EvStreamPQWrite, - EvStreamPQRead, + EvStreamPQWrite, + EvStreamPQRead, EvPQReadInfo, EvListOperations, EvExportToYt, @@ -120,10 +120,10 @@ struct TRpcServices { EvExportToS3, EvSelfCheck, EvStreamExecuteScanQuery, - EvPQDropTopic, - EvPQCreateTopic, - EvPQAlterTopic, - EvPQDescribeTopic, + EvPQDropTopic, + EvPQCreateTopic, + EvPQAlterTopic, + EvPQDescribeTopic, EvPQAddReadRule, EvPQRemoveReadRule, EvGetDiskSpaceUsage, @@ -880,9 +880,9 @@ public: } TString GetPeerName() const override { - return Ctx_->GetPeer(); - } - + return Ctx_->GetPeer(); + } + bool SslServer() const { return Ctx_->SslServer(); } diff --git a/ydb/core/grpc_services/grpc_request_check_actor.h b/ydb/core/grpc_services/grpc_request_check_actor.h index ae3817314f..04c72abe6d 100644 --- a/ydb/core/grpc_services/grpc_request_check_actor.h +++ b/ydb/core/grpc_services/grpc_request_check_actor.h @@ -420,21 +420,21 @@ const TVector<TString>& TGrpcRequestCheckActor<TEvent>::GetPermissions() { return permissions; } -// yds behavior -template <> +// yds behavior +template <> inline const TVector<TString>& TGrpcRequestCheckActor<TEvDataStreamsPutRecordRequest>::GetPermissions() { //full list of permissions for compatility. remove old permissions later. - static const TVector<TString> permissions = {"yds.streams.write", "ydb.databases.list", "ydb.databases.create", "ydb.databases.connect"}; + static const TVector<TString> permissions = {"yds.streams.write", "ydb.databases.list", "ydb.databases.create", "ydb.databases.connect"}; return permissions; -} -// yds behavior -template <> +} +// yds behavior +template <> inline const TVector<TString>& TGrpcRequestCheckActor<TEvDataStreamsPutRecordsRequest>::GetPermissions() { //full list of permissions for compatility. remove old permissions later. - static const TVector<TString> permissions = {"yds.streams.write", "ydb.databases.list", "ydb.databases.create", "ydb.databases.connect"}; + static const TVector<TString> permissions = {"yds.streams.write", "ydb.databases.list", "ydb.databases.create", "ydb.databases.connect"}; return permissions; -} - +} + template <typename TEvent> IActor* CreateGrpcRequestCheckActor( const TActorId& owner, diff --git a/ydb/core/grpc_services/grpc_request_proxy.cpp b/ydb/core/grpc_services/grpc_request_proxy.cpp index 2bd21d4db6..a1e7accb87 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.cpp +++ b/ydb/core/grpc_services/grpc_request_proxy.cpp @@ -669,7 +669,7 @@ void TGRpcRequestProxyImpl::StateFunc(TAutoPtr<IEventHandle>& ev, const TActorCo HFunc(TEvDataStreamsSplitShardRequest, PreHandle); HFunc(TEvDataStreamsStartStreamEncryptionRequest, PreHandle); HFunc(TEvDataStreamsStopStreamEncryptionRequest, PreHandle); - + HFunc(TEvProxyRuntimeEvent, PreHandle); default: diff --git a/ydb/core/grpc_services/grpc_request_proxy.h b/ydb/core/grpc_services/grpc_request_proxy.h index bb2da83911..97315f6e9f 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.h +++ b/ydb/core/grpc_services/grpc_request_proxy.h @@ -87,15 +87,15 @@ protected: void Handle(TEvS3ListingRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvBiStreamPingRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvExperimentalStreamQueryRequest::TPtr& ev, const TActorContext& ctx); - void Handle(TEvStreamPQWriteRequest::TPtr& ev, const TActorContext& ctx); - void Handle(TEvStreamPQReadRequest::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQCreateTopicRequest::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvStreamPQWriteRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvStreamPQReadRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQCreateTopicRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQAddReadRuleRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQRemoveReadRuleRequest::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvExportToYtRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvExportToS3Request::TPtr& ev, const TActorContext& ctx); void Handle(TEvImportFromS3Request::TPtr& ev, const TActorContext& ctx); diff --git a/ydb/core/grpc_services/rpc_calls.cpp b/ydb/core/grpc_services/rpc_calls.cpp index 7c466d27ca..b94d168147 100644 --- a/ydb/core/grpc_services/rpc_calls.cpp +++ b/ydb/core/grpc_services/rpc_calls.cpp @@ -46,7 +46,7 @@ std::pair<TString, TString> SplitPath(const TMaybe<TString>& database, const TSt std::pair<TString, TString> SplitPath(const TString& path) { auto splitPos = path.find_last_of('/'); if (splitPos == path.npos || splitPos + 1 == path.size()) { - ythrow yexception() << "wrong path format '" << path << "'" ; + ythrow yexception() << "wrong path format '" << path << "'" ; } return {path.substr(0, splitPos), path.substr(splitPos + 1)}; } diff --git a/ydb/core/keyvalue/keyvalue_flat_impl.h b/ydb/core/keyvalue/keyvalue_flat_impl.h index fc316fca51..292f2feff9 100644 --- a/ydb/core/keyvalue/keyvalue_flat_impl.h +++ b/ydb/core/keyvalue/keyvalue_flat_impl.h @@ -38,7 +38,7 @@ constexpr ui64 CollectorMaxErrors = 20; constexpr ui64 PeriodicRefreshMs = 15000; class TKeyValueFlat : public TActor<TKeyValueFlat>, public NTabletFlatExecutor::TTabletExecutedFlat { -protected: +protected: struct TTxInit : public NTabletFlatExecutor::ITransaction { TActorId KeyValueActorId; TKeyValueFlat &Self; @@ -64,7 +64,7 @@ protected: // Init log batching settings alter.SetExecutorAllowLogBatching(true); alter.SetExecutorLogFlushPeriod(TDuration::MicroSeconds(500)); - Self.State.Clear(); + Self.State.Clear(); } else { LOG_DEBUG_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << txc.Tablet << " TTxInit flat ReadDb Tree"); if (!LoadStateFromDB(Self.State, txc.DB)) { @@ -84,7 +84,7 @@ protected: } static bool LoadStateFromDB(TKeyValueState& state, NTable::TDatabase& db) { - state.Clear(); + state.Clear(); // Just walk through the DB and read all the keys and values const std::array<ui32, 2> tags {{ KEY_TAG, VALUE_TAG }}; auto mode = NTable::ELookup::GreaterOrEqualThan; @@ -107,11 +107,11 @@ protected: return iter->Last() != NTable::EReady::Page; } - - void Complete(const TActorContext &ctx) override { - Self.InitSchemeComplete(ctx); - Self.CreatedHook(ctx); - } + + void Complete(const TActorContext &ctx) override { + Self.InitSchemeComplete(ctx); + Self.CreatedHook(ctx); + } }; struct TTxRequest : public NTabletFlatExecutor::ITransaction { @@ -252,13 +252,13 @@ protected: void OnDetach(const TActorContext &ctx) override { LOG_DEBUG_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID() << " OnDetach"); - HandleDie(ctx); + HandleDie(ctx); } void OnTabletDead(TEvTablet::TEvTabletDead::TPtr &ev, const TActorContext &ctx) override { LOG_DEBUG_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID() << " OnTabletDead " << ev->Get()->ToString()); - HandleDie(ctx); + HandleDie(ctx); } void OnActivateExecutor(const TActorContext &ctx) override { @@ -424,38 +424,38 @@ public: : TActor(&TThis::StateInit) , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) { - TAutoPtr<TTabletCountersBase> counters( - new TProtobufTabletCounters< - ESimpleCounters_descriptor, - ECumulativeCounters_descriptor, - EPercentileCounters_descriptor, - ETxTypes_descriptor - >()); - State.SetupTabletCounters(counters); - State.Clear(); - } - - virtual void HandleDie(const TActorContext &ctx) - { + TAutoPtr<TTabletCountersBase> counters( + new TProtobufTabletCounters< + ESimpleCounters_descriptor, + ECumulativeCounters_descriptor, + EPercentileCounters_descriptor, + ETxTypes_descriptor + >()); + State.SetupTabletCounters(counters); + State.Clear(); + } + + virtual void HandleDie(const TActorContext &ctx) + { if (CollectorActorId) { ctx.Send(CollectorActorId, new TEvents::TEvPoisonPill); } State.Terminate(ctx); - Die(ctx); - } - - virtual void CreatedHook(const TActorContext &ctx) - { - Y_UNUSED(ctx); - } - - virtual bool HandleHook(STFUNC_SIG) - { - Y_UNUSED(ev); - Y_UNUSED(ctx); - return false; - } - + Die(ctx); + } + + virtual void CreatedHook(const TActorContext &ctx) + { + Y_UNUSED(ctx); + } + + virtual bool HandleHook(STFUNC_SIG) + { + Y_UNUSED(ev); + Y_UNUSED(ctx); + return false; + } + STFUNC(StateInit) { RestoreActorActivity(); LOG_DEBUG_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID() @@ -465,8 +465,8 @@ public: } STFUNC(StateWork) { - if (HandleHook(ev, ctx)) - return; + if (HandleHook(ev, ctx)) + return; RestoreActorActivity(); switch (ev->GetTypeRewrite()) { hFunc(TEvKeyValue::TEvRead, Handle); diff --git a/ydb/core/keyvalue/keyvalue_state.cpp b/ydb/core/keyvalue/keyvalue_state.cpp index 1e1ac46d7e..6b31c463f8 100644 --- a/ydb/core/keyvalue/keyvalue_state.cpp +++ b/ydb/core/keyvalue/keyvalue_state.cpp @@ -69,7 +69,7 @@ TKeyValueState::TKeyValueState() { Clear(); } -void TKeyValueState::Clear() { +void TKeyValueState::Clear() { IsStatePresent = false; IsEmptyDbStart = true; IsDamaged = false; @@ -111,8 +111,8 @@ void TKeyValueState::Clear() { // // Tablet Counters // -void TKeyValueState::SetupTabletCounters(TAutoPtr<TTabletCountersBase> counters) { - TabletCountersPtr = counters; +void TKeyValueState::SetupTabletCounters(TAutoPtr<TTabletCountersBase> counters) { + TabletCountersPtr = counters; TabletCounters = TabletCountersPtr.Get(); } @@ -137,7 +137,7 @@ void TKeyValueState::SetupResourceMetrics(NMetrics::TResourceMetrics* resourceMe void TKeyValueState::CountRequestComplete(NMsgBusProxy::EResponseStatus status, const TRequestStat &stat, const TActorContext &ctx) { - ui64 fullLatencyMs = (TAppData::TimeProvider->Now() - stat.IntermediateCreatedAt).MilliSeconds(); + ui64 fullLatencyMs = (TAppData::TimeProvider->Now() - stat.IntermediateCreatedAt).MilliSeconds(); if (stat.RequestType == TRequestType::WriteOnly) { TabletCounters->Percentile()[COUNTER_LATENCY_FULL_WO].IncrementFor(fullLatencyMs); TabletCounters->Simple()[COUNTER_REQ_WO_IN_FLY].Add((ui64)-1); @@ -205,7 +205,7 @@ void TKeyValueState::CountRequestComplete(NMsgBusProxy::EResponseStatus status, for (const auto& pr : stat.GroupWrittenIops) { ResourceMetrics->WriteIops[pr.first].Increment(pr.second, now); } - + if (status == NMsgBusProxy::MSTATUS_OK) { TabletCounters->Cumulative()[COUNTER_CMD_READ_BYTES_OK].Increment(stat.ReadBytes); TabletCounters->Cumulative()[COUNTER_CMD_READ_OK].Increment(stat.Reads); @@ -323,17 +323,17 @@ void TKeyValueState::CountOverrun() { } void TKeyValueState::CountLatencyBsOps(const TRequestStat &stat) { - ui64 bsDuration = (TAppData::TimeProvider->Now() - stat.KeyvalueStorageRequestSentAt).MilliSeconds(); + ui64 bsDuration = (TAppData::TimeProvider->Now() - stat.KeyvalueStorageRequestSentAt).MilliSeconds(); TabletCounters->Percentile()[COUNTER_LATENCY_BS_OPS].IncrementFor(bsDuration); } void TKeyValueState::CountLatencyBsCollect() { - ui64 collectDurationMs = (TAppData::TimeProvider->Now() - LastCollectStartedAt).MilliSeconds(); + ui64 collectDurationMs = (TAppData::TimeProvider->Now() - LastCollectStartedAt).MilliSeconds(); TabletCounters->Percentile()[COUNTER_LATENCY_BS_COLLECT].IncrementFor(collectDurationMs); } void TKeyValueState::CountLatencyQueue(const TRequestStat &stat) { - ui64 enqueuedMs = (TAppData::TimeProvider->Now() - stat.IntermediateCreatedAt).MilliSeconds(); + ui64 enqueuedMs = (TAppData::TimeProvider->Now() - stat.IntermediateCreatedAt).MilliSeconds(); if (stat.RequestType == TRequestType::WriteOnly) { TabletCounters->Percentile()[COUNTER_LATENCY_QUEUE_WO].IncrementFor(enqueuedMs); } else { @@ -1227,7 +1227,7 @@ void TKeyValueState::CmdDelete(THolder<TIntermediate> &intermediate, ISimpleDb & } void TKeyValueState::CmdWrite(THolder<TIntermediate> &intermediate, ISimpleDb &db, const TActorContext &ctx) { - ui64 unixTime = TAppData::TimeProvider->Now().Seconds(); + ui64 unixTime = TAppData::TimeProvider->Now().Seconds(); for (ui32 i = 0; i < intermediate->Writes.size(); ++i) { auto& request = intermediate->Writes[i]; auto *response = intermediate->Response.AddWriteResult(); @@ -1278,7 +1278,7 @@ void TKeyValueState::CmdCopyRange(THolder<TIntermediate>& intermediate, ISimpleD } void TKeyValueState::CmdConcat(THolder<TIntermediate>& intermediate, ISimpleDb& db, const TActorContext& ctx) { - ui64 unixTime = TAppData::TimeProvider->Now().Seconds(); + ui64 unixTime = TAppData::TimeProvider->Now().Seconds(); for (const auto& request : intermediate->Concats) { auto *response = intermediate->Response.AddConcatResult(); ProcessCmd(request, response, nullptr, db, ctx, intermediate->Stat, unixTime); @@ -2544,17 +2544,17 @@ void TKeyValueState::ReplyError(const TActorContext &ctx, TString errorDescripti } response->Record.SetErrorReason(errorDescription); response->Record.SetStatus(status); - + ResourceMetrics->Network.Increment(response->Record.ByteSize()); - + intermediate->IsReplied = true; ctx.Send(intermediate->RespondTo, response.Release()); if (info) { intermediate->UpdateStat(); OnRequestComplete(intermediate->RequestUid, intermediate->CreatedAtGeneration, intermediate->CreatedAtStep, ctx, info, status, intermediate->Stat); - } else { //metrics change report in OnRequestComplete is not done - ResourceMetrics->TryUpdate(ctx); + } else { //metrics change report in OnRequestComplete is not done + ResourceMetrics->TryUpdate(ctx); RequestInputTime.erase(intermediate->RequestUid); } } @@ -2979,7 +2979,7 @@ void TKeyValueState::OnEvObtainLock(TEvKeyValue::TEvObtainLock::TPtr &ev, const void TKeyValueState::OnEvIntermediate(TIntermediate &intermediate, const TActorContext &ctx) { Y_UNUSED(ctx); CountLatencyBsOps(intermediate.Stat); - intermediate.Stat.LocalBaseTxCreatedAt = TAppData::TimeProvider->Now(); + intermediate.Stat.LocalBaseTxCreatedAt = TAppData::TimeProvider->Now(); } void TKeyValueState::OnEvRequest(TEvKeyValue::TEvRequest::TPtr &ev, const TActorContext &ctx, @@ -2989,7 +2989,7 @@ void TKeyValueState::OnEvRequest(TEvKeyValue::TEvRequest::TPtr &ev, const TActor ResourceMetrics->Network.Increment(request.ByteSize()); ResourceMetrics->TryUpdate(ctx); - + bool hasWrites = request.CmdWriteSize() || request.CmdDeleteRangeSize() || request.CmdRenameSize() || request.CmdCopyRangeSize() || request.CmdConcatSize() || request.HasCmdSetExecutorFastLogPolicy(); diff --git a/ydb/core/keyvalue/keyvalue_state.h b/ydb/core/keyvalue/keyvalue_state.h index 8685fbb0fc..f57a69d35d 100644 --- a/ydb/core/keyvalue/keyvalue_state.h +++ b/ydb/core/keyvalue/keyvalue_state.h @@ -232,10 +232,10 @@ public: } }; - ui32 GetGeneration() const { - return StoredState.UserGeneration; - } - + ui32 GetGeneration() const { + return StoredState.UserGeneration; + } + protected: TKeyValueStoredStateData StoredState; ui32 NextLogoBlobStep; @@ -285,8 +285,8 @@ protected: public: TKeyValueState(); - void Clear(); - void SetupTabletCounters(TAutoPtr<TTabletCountersBase> counters); + void Clear(); + void SetupTabletCounters(TAutoPtr<TTabletCountersBase> counters); void ClearTabletCounters(); TAutoPtr<TTabletCountersBase> TakeTabletCounters(); TTabletCountersBase& GetTabletCounters(); diff --git a/ydb/core/keyvalue/keyvalue_storage_request.cpp b/ydb/core/keyvalue/keyvalue_storage_request.cpp index 03ee2729f8..e9032d2ed3 100644 --- a/ydb/core/keyvalue/keyvalue_storage_request.cpp +++ b/ydb/core/keyvalue/keyvalue_storage_request.cpp @@ -83,7 +83,7 @@ public: , IntermediateResults(std::move(intermediate)) , TabletInfo(const_cast<TTabletStorageInfo*>(tabletInfo)) { - IntermediateResults->Stat.KeyvalueStorageRequestSentAt = TAppData::TimeProvider->Now(); + IntermediateResults->Stat.KeyvalueStorageRequestSentAt = TAppData::TimeProvider->Now(); } void CheckYellow(const TStorageStatusFlags &statusFlags, ui32 currentGroup) { @@ -227,7 +227,7 @@ public: InFlightQueries -= request.ReadQueue.size(); - ui64 durationMs = (TAppData::TimeProvider->Now() - request.SentAt).MilliSeconds(); + ui64 durationMs = (TAppData::TimeProvider->Now() - request.SentAt).MilliSeconds(); IntermediateResults->Stat.GetLatencies.push_back(durationMs); auto resetReadItems = [&](NKikimrProto::EReplyStatus status) { diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index a99bf21ebf..7e9a83b354 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -1070,7 +1070,7 @@ public: case TTabletTypes::PersQueue: return "PQ"; case TTabletTypes::PersQueueReadBalancer: - return "PQRB"; + return "PQRB"; case TTabletTypes::Dummy: return "DY"; case TTabletTypes::Coordinator: diff --git a/ydb/core/mind/labels_maintainer.cpp b/ydb/core/mind/labels_maintainer.cpp index 75b1fbdfd7..19138c7d23 100644 --- a/ydb/core/mind/labels_maintainer.cpp +++ b/ydb/core/mind/labels_maintainer.cpp @@ -179,7 +179,7 @@ private: LOG_DEBUG_S(ctx, NKikimrServices::LABELS_MAINTAINER, "Removing database labels from " << service << " counters"); - ReplaceSubgroup(root, service); + ReplaceSubgroup(root, service); } } @@ -199,7 +199,7 @@ private: LOG_DEBUG_S(ctx, NKikimrServices::LABELS_MAINTAINER, "Removing database attribute labels from " << service << " counters"); - ReplaceSubgroup(root, service); + ReplaceSubgroup(root, service); } } @@ -240,10 +240,10 @@ private: LOG_DEBUG_S(ctx, NKikimrServices::LABELS_MAINTAINER, "Add labels to service " << service << " counters" << " labels=" << PrintLabels(labels)); - const auto &[svc, subSvc] = ExtractSubServiceName(service); - auto oldGroup = root->GetSubgroup("counters", svc); - if (!subSvc.empty()) - oldGroup = oldGroup->GetSubgroup("subsystem", subSvc); + const auto &[svc, subSvc] = ExtractSubServiceName(service); + auto oldGroup = root->GetSubgroup("counters", svc); + if (!subSvc.empty()) + oldGroup = oldGroup->GetSubgroup("subsystem", subSvc); TIntrusivePtr<NMonitoring::TDynamicCounters> serviceGroup = new NMonitoring::TDynamicCounters; TIntrusivePtr<NMonitoring::TDynamicCounters> curGroup = serviceGroup; @@ -266,8 +266,8 @@ private: } curGroup->RegisterSubgroup(actualLabels->back().first, actualLabels->back().second, oldGroup); - auto rt = GetServiceCountersRoot(root, service); - rt->ReplaceSubgroup(subSvc.empty() ? "counters" : "subsystem", subSvc.empty() ? svc : subSvc, serviceGroup); + auto rt = GetServiceCountersRoot(root, service); + rt->ReplaceSubgroup(subSvc.empty() ? "counters" : "subsystem", subSvc.empty() ? svc : subSvc, serviceGroup); } } } diff --git a/ydb/core/mind/tenant_ut_pool.cpp b/ydb/core/mind/tenant_ut_pool.cpp index 10bd3f0897..9ac6b6843c 100644 --- a/ydb/core/mind/tenant_ut_pool.cpp +++ b/ydb/core/mind/tenant_ut_pool.cpp @@ -200,7 +200,7 @@ void CheckLabels(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, } if (attrServices.contains(service)) labels.insert(attrLabels.begin(), attrLabels.end()); - auto serviceGroup = GetServiceCounters(counters, service, false); + auto serviceGroup = GetServiceCounters(counters, service, false); while (!labels.empty()) { TString name; TString value; @@ -498,7 +498,7 @@ Y_UNIT_TEST_SUITE(TTenantPoolTests) { auto &attrServices = GetDatabaseAttributeSensorServices(); for (auto &service : services) { - auto serviceGroup = GetServiceCounters(counters, service, false); + auto serviceGroup = GetServiceCounters(counters, service, false); auto tenantGroup = serviceGroup->FindSubgroup(DATABASE_LABEL, TENANT1_1_NAME); UNIT_ASSERT(tenantGroup); TIntrusivePtr<NMonitoring::TDynamicCounters> slotGroup; @@ -520,7 +520,7 @@ Y_UNIT_TEST_SUITE(TTenantPoolTests) { CheckLabels(counters, "", ""); for (auto &service : services) { - auto serviceGroup = GetServiceCounters(counters, service, false); + auto serviceGroup = GetServiceCounters(counters, service, false); auto counter = serviceGroup->GetCounter("counter", true); UNIT_ASSERT(*counter == 0); *counter = 1; @@ -533,7 +533,7 @@ Y_UNIT_TEST_SUITE(TTenantPoolTests) { CheckLabels(counters, "", ""); for (auto &service : services) { - auto serviceGroup = GetServiceCounters(counters, service, false); + auto serviceGroup = GetServiceCounters(counters, service, false); auto counter = serviceGroup->GetCounter("counter", true); UNIT_ASSERT(*counter == 0); } @@ -588,7 +588,7 @@ Y_UNIT_TEST_SUITE(TTenantPoolTests) { auto &attrServices = GetDatabaseAttributeSensorServices(); for (auto &service : services) { - auto serviceGroup = GetServiceCounters(counters, service, false); + auto serviceGroup = GetServiceCounters(counters, service, false); auto tenantGroup = serviceGroup->FindSubgroup(DATABASE_LABEL, TENANT1_1_NAME); UNIT_ASSERT(tenantGroup); NMonitoring::TDynamicCounterPtr slotGroup; @@ -609,7 +609,7 @@ Y_UNIT_TEST_SUITE(TTenantPoolTests) { runtime.WaitForHiveState({{{DOMAIN1_NAME, 1, 1, 1}}}); for (auto &service : services) { - auto serviceGroup = GetServiceCounters(counters, service, false); + auto serviceGroup = GetServiceCounters(counters, service, false); auto tenantGroup = serviceGroup->FindSubgroup(DATABASE_LABEL, CanonizePath(DOMAIN1_NAME)); UNIT_ASSERT(tenantGroup); NMonitoring::TDynamicCounterPtr slotGroup; @@ -630,7 +630,7 @@ Y_UNIT_TEST_SUITE(TTenantPoolTests) { runtime.WaitForHiveState({}); for (auto &service : services) { - auto serviceGroup = GetServiceCounters(counters, service, false); + auto serviceGroup = GetServiceCounters(counters, service, false); auto tenantGroup = serviceGroup->FindSubgroup(DATABASE_LABEL, "<none>"); UNIT_ASSERT(tenantGroup); NMonitoring::TDynamicCounterPtr slotGroup; diff --git a/ydb/core/persqueue/actor_persqueue_client_iface.h b/ydb/core/persqueue/actor_persqueue_client_iface.h index 7d519763af..2f8a8d4ab8 100644 --- a/ydb/core/persqueue/actor_persqueue_client_iface.h +++ b/ydb/core/persqueue/actor_persqueue_client_iface.h @@ -1,43 +1,43 @@ -#pragma once - +#pragma once + #include <ydb/core/protos/pqconfig.pb.h> #include <ydb/public/sdk/cpp/client/ydb_driver/driver.h> #include <ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h> -#include <library/cpp/actors/core/actor.h> +#include <library/cpp/actors/core/actor.h> #include <library/cpp/logger/log.h> - + #include <util/datetime/base.h> - -namespace NKikimr::NPQ { - + +namespace NKikimr::NPQ { + class IPersQueueMirrorReaderFactory { -public: +public: virtual void Initialize(const NKikimrPQ::TPQConfig::TPQLibSettings& settings) const = 0; - + virtual std::shared_ptr<NYdb::ICredentialsProviderFactory> GetCredentialsProvider( const NKikimrPQ::TMirrorPartitionConfig::TCredentials& cred ) const = 0; - + virtual std::shared_ptr<NYdb::NPersQueue::IReadSession> GetReadSession( const NKikimrPQ::TMirrorPartitionConfig& config, ui32 partition, std::shared_ptr<NYdb::ICredentialsProviderFactory> credentialsProviderFactory, ui64 maxMemoryUsageBytes ) const = 0; - + virtual ~IPersQueueMirrorReaderFactory() = default; -}; - +}; + class TPersQueueMirrorReaderFactory : public IPersQueueMirrorReaderFactory { -public: +public: void Initialize(const NKikimrPQ::TPQConfig::TPQLibSettings& settings) const override { auto driverConfig = NYdb::TDriverConfig() .SetNetworkThreadsNum(settings.GetThreadsCount()); Driver = std::make_shared<NYdb::TDriver>(driverConfig); } - + std::shared_ptr<NYdb::ICredentialsProviderFactory> GetCredentialsProvider( const NKikimrPQ::TMirrorPartitionConfig::TCredentials& cred ) const override { @@ -50,7 +50,7 @@ public: } } } - + std::shared_ptr<NYdb::NPersQueue::IReadSession> GetReadSession( const NKikimrPQ::TMirrorPartitionConfig& config, ui32 partition, @@ -64,7 +64,7 @@ public: if (config.HasDatabase()) { clientSettings.Database(config.GetDatabase()); } - + NYdb::NPersQueue::TReadSessionSettings settings = NYdb::NPersQueue::TReadSessionSettings() .ConsumerName(config.GetConsumer()) .MaxMemoryUsageBytes(maxMemoryUsageBytes) @@ -85,6 +85,6 @@ public: private: mutable std::shared_ptr<NYdb::TDriver> Driver; -}; - -} // namespace NKikimr::NSQS +}; + +} // namespace NKikimr::NSQS diff --git a/ydb/core/persqueue/blob.cpp b/ydb/core/persqueue/blob.cpp index bc93f75de9..7727f12a13 100644 --- a/ydb/core/persqueue/blob.cpp +++ b/ydb/core/persqueue/blob.cpp @@ -1,92 +1,92 @@ -#include "blob.h" +#include "blob.h" #include "type_codecs.h" #include <util/string/builder.h> -#include <util/string/escape.h> +#include <util/string/escape.h> #include <util/system/unaligned_mem.h> - -namespace NKikimr { -namespace NPQ { - - + +namespace NKikimr { +namespace NPQ { + + TBlobIterator::TBlobIterator(const TKey& key, const TString& blob) -: Key(key) -, Data(blob.c_str()) -, End(Data + blob.size()) -, Batch() -, Offset(key.GetOffset()) -, Count(0) -, InternalPartsCount(0) -{ - Y_VERIFY(Data != End); - ParseBatch(true); -} - -void TBlobIterator::ParseBatch(bool isFirst) { - Y_VERIFY(Data < End); - auto header = ExtractHeader(Data, End - Data); - Y_VERIFY(header.GetOffset() == Offset); - if (isFirst) - Y_VERIFY(header.GetPartNo() == Key.GetPartNo()); - Count += header.GetCount(); - Offset += header.GetCount(); - InternalPartsCount += header.GetInternalPartsCount(); - Y_VERIFY(Count <= Key.GetCount()); - Y_VERIFY(InternalPartsCount <= Key.GetInternalPartsCount()); - - Batch = TBatch(header, Data + sizeof(ui16) + header.ByteSize()); -} - -bool TBlobIterator::IsValid() -{ - return Data != End; -} - -bool TBlobIterator::Next() -{ - Y_VERIFY(IsValid()); - auto& header = Batch.Header; - Data += header.GetPayloadSize() + sizeof(ui16) + header.ByteSize(); - if (Data == End) { //this was last batch - Y_VERIFY(Count == Key.GetCount()); - Y_VERIFY(InternalPartsCount == Key.GetInternalPartsCount()); - return false; - } - ParseBatch(false); - return true; -} - -const TBatch& TBlobIterator::GetBatch() -{ - Y_VERIFY(IsValid()); - return Batch; -} - +: Key(key) +, Data(blob.c_str()) +, End(Data + blob.size()) +, Batch() +, Offset(key.GetOffset()) +, Count(0) +, InternalPartsCount(0) +{ + Y_VERIFY(Data != End); + ParseBatch(true); +} + +void TBlobIterator::ParseBatch(bool isFirst) { + Y_VERIFY(Data < End); + auto header = ExtractHeader(Data, End - Data); + Y_VERIFY(header.GetOffset() == Offset); + if (isFirst) + Y_VERIFY(header.GetPartNo() == Key.GetPartNo()); + Count += header.GetCount(); + Offset += header.GetCount(); + InternalPartsCount += header.GetInternalPartsCount(); + Y_VERIFY(Count <= Key.GetCount()); + Y_VERIFY(InternalPartsCount <= Key.GetInternalPartsCount()); + + Batch = TBatch(header, Data + sizeof(ui16) + header.ByteSize()); +} + +bool TBlobIterator::IsValid() +{ + return Data != End; +} + +bool TBlobIterator::Next() +{ + Y_VERIFY(IsValid()); + auto& header = Batch.Header; + Data += header.GetPayloadSize() + sizeof(ui16) + header.ByteSize(); + if (Data == End) { //this was last batch + Y_VERIFY(Count == Key.GetCount()); + Y_VERIFY(InternalPartsCount == Key.GetInternalPartsCount()); + return false; + } + ParseBatch(false); + return true; +} + +const TBatch& TBlobIterator::GetBatch() +{ + Y_VERIFY(IsValid()); + return Batch; +} + void CheckBlob(const TKey& key, const TString& blob) -{ - for (TBlobIterator it(key, blob); it.IsValid(); it.Next()); -} - - -void TClientBlob::Serialize(TBuffer& res) const -{ - ui32 totalSize = GetBlobSize(); - ui32 psize = res.Size(); - res.Reserve(res.Size() + totalSize); - res.Append((const char*)&totalSize, sizeof(ui32)); - res.Append((const char*)&SeqNo, sizeof(ui64)); - ui8 outputUncompressedSize = UncompressedSize == 0 ? 0 : HAS_US; +{ + for (TBlobIterator it(key, blob); it.IsValid(); it.Next()); +} + + +void TClientBlob::Serialize(TBuffer& res) const +{ + ui32 totalSize = GetBlobSize(); + ui32 psize = res.Size(); + res.Reserve(res.Size() + totalSize); + res.Append((const char*)&totalSize, sizeof(ui32)); + res.Append((const char*)&SeqNo, sizeof(ui64)); + ui8 outputUncompressedSize = UncompressedSize == 0 ? 0 : HAS_US; ui8 outputKinesisData = PartitionKey.empty() ? 0 : HAS_KINESIS; - if (PartData) { + if (PartData) { ui8 hasPartDataAndTS = HAS_PARTDATA + HAS_TS + HAS_TS2 + outputUncompressedSize + outputKinesisData; //mask - res.Append((const char*)&hasPartDataAndTS, sizeof(char)); - res.Append((const char*)&(PartData->PartNo), sizeof(ui16)); - res.Append((const char*)&(PartData->TotalParts), sizeof(ui16)); - res.Append((const char*)&(PartData->TotalSize), sizeof(ui32)); - } else { + res.Append((const char*)&hasPartDataAndTS, sizeof(char)); + res.Append((const char*)&(PartData->PartNo), sizeof(ui16)); + res.Append((const char*)&(PartData->TotalParts), sizeof(ui16)); + res.Append((const char*)&(PartData->TotalSize), sizeof(ui32)); + } else { ui8 hasTS = HAS_TS + HAS_TS2 + outputUncompressedSize + outputKinesisData; //mask - res.Append((const char*)&hasTS, sizeof(char)); - } + res.Append((const char*)&hasTS, sizeof(char)); + } if (outputKinesisData) { ui8 partitionKeySize = PartitionKey.size(); @@ -101,47 +101,47 @@ void TClientBlob::Serialize(TBuffer& res) const ui64 createTimestampMs = CreateTimestamp.MilliSeconds(); res.Append((const char*)&writeTimestampMs, sizeof(ui64)); res.Append((const char*)&createTimestampMs, sizeof(ui64)); - if (outputUncompressedSize) - res.Append((const char*)&(UncompressedSize), sizeof(ui32)); - - ui16 sz = SourceId.size(); - res.Append((const char*)&sz, sizeof(ui16)); + if (outputUncompressedSize) + res.Append((const char*)&(UncompressedSize), sizeof(ui32)); + + ui16 sz = SourceId.size(); + res.Append((const char*)&sz, sizeof(ui16)); res.Append(SourceId.data(), SourceId.size()); res.Append(Data.data(), Data.size()); - Y_VERIFY(res.Size() == psize + totalSize); -} - -TClientBlob TClientBlob::Deserialize(const char* data, ui32 size) -{ - Y_VERIFY(size > OVERHEAD); + Y_VERIFY(res.Size() == psize + totalSize); +} + +TClientBlob TClientBlob::Deserialize(const char* data, ui32 size) +{ + Y_VERIFY(size > OVERHEAD); ui32 totalSize = ReadUnaligned<ui32>(data); - Y_VERIFY(size >= totalSize); - const char *end = data + totalSize; - data += sizeof(ui32); + Y_VERIFY(size >= totalSize); + const char *end = data + totalSize; + data += sizeof(ui32); ui64 seqNo = ReadUnaligned<ui64>(data); - data += sizeof(ui64); - TMaybe<TPartData> partData; - bool hasPartData = (data[0] & HAS_PARTDATA);//data[0] is mask - bool hasTS = (data[0] & HAS_TS); - bool hasTS2 = (data[0] & HAS_TS2); - bool hasUS = (data[0] & HAS_US); + data += sizeof(ui64); + TMaybe<TPartData> partData; + bool hasPartData = (data[0] & HAS_PARTDATA);//data[0] is mask + bool hasTS = (data[0] & HAS_TS); + bool hasTS2 = (data[0] & HAS_TS2); + bool hasUS = (data[0] & HAS_US); bool hasKinesisData = (data[0] & HAS_KINESIS); - ++data; + ++data; TString partitionKey; TString explicitHashKey; - if (hasPartData) { + if (hasPartData) { ui16 partNo = ReadUnaligned<ui16>(data); - data += sizeof(ui16); + data += sizeof(ui16); ui16 totalParts = ReadUnaligned<ui16>(data); - data += sizeof(ui16); + data += sizeof(ui16); ui32 totalSize = ReadUnaligned<ui32>(data); - data += sizeof(ui32); - partData = TPartData{partNo, totalParts, totalSize}; - } - + data += sizeof(ui32); + partData = TPartData{partNo, totalParts, totalSize}; + } + if (hasKinesisData) { ui8 keySize = ReadUnaligned<ui8>(data); data += sizeof(ui8); @@ -155,63 +155,63 @@ TClientBlob TClientBlob::Deserialize(const char* data, ui32 size) TInstant writeTimestamp; TInstant createTimestamp; - ui32 us = 0; - if (hasTS) { + ui32 us = 0; + if (hasTS) { writeTimestamp = TInstant::MilliSeconds(ReadUnaligned<ui64>(data)); - data += sizeof(ui64); - } - if (hasTS2) { + data += sizeof(ui64); + } + if (hasTS2) { createTimestamp = TInstant::MilliSeconds(ReadUnaligned<ui64>(data)); - data += sizeof(ui64); - } - if (hasUS) { - us = ReadUnaligned<ui32>(data); - data += sizeof(ui32); - } - - Y_VERIFY(data < end); + data += sizeof(ui64); + } + if (hasUS) { + us = ReadUnaligned<ui32>(data); + data += sizeof(ui32); + } + + Y_VERIFY(data < end); ui16 sz = ReadUnaligned<ui16>(data); - data += sizeof(ui16); - Y_VERIFY(data + sz < end); + data += sizeof(ui16); + Y_VERIFY(data + sz < end); TString sourceId(data, sz); - data += sz; - Y_VERIFY(data < end, "size %u SeqNo %" PRIu64 " SourceId %s", size, seqNo, sourceId.c_str()); + data += sz; + Y_VERIFY(data < end, "size %u SeqNo %" PRIu64 " SourceId %s", size, seqNo, sourceId.c_str()); TString dt(data, end - data); return TClientBlob(sourceId, seqNo, dt, std::move(partData), writeTimestamp, createTimestamp, us, partitionKey, explicitHashKey); -} - +} + TString TBatch::Serialize() { - Y_VERIFY(Packed); + Y_VERIFY(Packed); TString res; - ui16 sz = Header.ByteSize(); - bool rs = Header.SerializeToString(&res); - Y_VERIFY(rs); - return TStringBuf((const char*)&sz, sizeof(ui16)) + res + PackedData; -} - -template <typename TCodec> + ui16 sz = Header.ByteSize(); + bool rs = Header.SerializeToString(&res); + Y_VERIFY(rs); + return TStringBuf((const char*)&sz, sizeof(ui16)) + res + PackedData; +} + +template <typename TCodec> TAutoPtr<NScheme::IChunkCoder> MakeChunk(TAutoPtr<TFlatBlobDataOutputStream>& output) -{ - output.Reset(new TFlatBlobDataOutputStream); - TCodec codec; - return codec.MakeChunk(output.Get()); -} - +{ + output.Reset(new TFlatBlobDataOutputStream); + TCodec codec; + return codec.MakeChunk(output.Get()); +} + void OutputChunk(TAutoPtr<NScheme::IChunkCoder> chunk, TAutoPtr<TFlatBlobDataOutputStream> output, TBuffer& res) -{ - chunk->Seal(); +{ + chunk->Seal(); ui32 size = output->CurrentBuffer().size(); - res.Append((const char*)&size, sizeof(ui32)); + res.Append((const char*)&size, sizeof(ui32)); res.Append(output->CurrentBuffer().data(), output->CurrentBuffer().size()); -} - -void TBatch::Pack() { - if (Packed) - return; - Packed = true; - TBuffer res; - +} + +void TBatch::Pack() { + if (Packed) + return; + Packed = true; + TBuffer res; + bool hasUncompressed = false; bool hasKinesis = false; for (ui32 i = 0; i < Blobs.size(); ++i) { @@ -223,106 +223,106 @@ void TBatch::Pack() { } } - Header.SetFormat(NKikimrPQ::TBatchHeader::ECompressed); + Header.SetFormat(NKikimrPQ::TBatchHeader::ECompressed); Header.SetHasKinesis(hasKinesis); - ui32 totalCount = Blobs.size(); - Y_VERIFY(totalCount == Header.GetCount() + Header.GetInternalPartsCount()); - ui32 cnt = 0; + ui32 totalCount = Blobs.size(); + Y_VERIFY(totalCount == Header.GetCount() + Header.GetInternalPartsCount()); + ui32 cnt = 0; THashMap<TStringBuf, ui32> reorderMap; - for (ui32 i = 0; i < Blobs.size(); ++i) { - if (Blobs[i].IsLastPart()) - ++cnt; - ++reorderMap[TStringBuf(Blobs[i].SourceId)]; - } - Y_VERIFY(cnt == Header.GetCount()); + for (ui32 i = 0; i < Blobs.size(); ++i) { + if (Blobs[i].IsLastPart()) + ++cnt; + ++reorderMap[TStringBuf(Blobs[i].SourceId)]; + } + Y_VERIFY(cnt == Header.GetCount()); TVector<ui32> start(reorderMap.size(), 0); TVector<ui32> pos(Blobs.size(), 0); - ui32 sum = 0; - ui32 i = 0; - for (auto it = reorderMap.begin(); it != reorderMap.end(); ++it) { - start[i] = sum; - sum += it->second; - it->second = i; - ++i; - } - for (ui32 i = 0; i < Blobs.size(); ++i) { - pos[start[reorderMap[TStringBuf(Blobs[i].SourceId)]]++] = i; - } - TAutoPtr<TFlatBlobDataOutputStream> output; - - //output order - { + ui32 sum = 0; + ui32 i = 0; + for (auto it = reorderMap.begin(); it != reorderMap.end(); ++it) { + start[i] = sum; + sum += it->second; + it->second = i; + ++i; + } + for (ui32 i = 0; i < Blobs.size(); ++i) { + pos[start[reorderMap[TStringBuf(Blobs[i].SourceId)]]++] = i; + } + TAutoPtr<TFlatBlobDataOutputStream> output; + + //output order + { auto chunk = MakeChunk<NScheme::TVarIntCodec<ui32,false>>(output); - for (const auto& p : pos) { - chunk->AddData((const char*)&p, sizeof(p)); - } - ui32 size = start.size(); - chunk->AddData((const char*)&size, sizeof(size)); - for (const auto& p : start) { - chunk->AddData((const char*)&p, sizeof(p)); - } - OutputChunk(chunk, output, res); - } - - //output SourceId - { + for (const auto& p : pos) { + chunk->AddData((const char*)&p, sizeof(p)); + } + ui32 size = start.size(); + chunk->AddData((const char*)&size, sizeof(size)); + for (const auto& p : start) { + chunk->AddData((const char*)&p, sizeof(p)); + } + OutputChunk(chunk, output, res); + } + + //output SourceId + { auto chunk = MakeChunk<NScheme::TVarLenCodec<false>>(output); - for (auto it = reorderMap.begin(); it != reorderMap.end(); ++it) { + for (auto it = reorderMap.begin(); it != reorderMap.end(); ++it) { chunk->AddData(it->first.data(), it->first.size()); - } - OutputChunk(chunk, output, res); - } - - //output SeqNo - { + } + OutputChunk(chunk, output, res); + } + + //output SeqNo + { auto chunk = MakeChunk<NScheme::TDeltaVarIntCodec<ui64, false>>(output); - for (const auto& p : pos) { - chunk->AddData((const char*)&Blobs[p].SeqNo, sizeof(ui64)); - } - OutputChunk(chunk, output, res); - } - - //output Data - { + for (const auto& p : pos) { + chunk->AddData((const char*)&Blobs[p].SeqNo, sizeof(ui64)); + } + OutputChunk(chunk, output, res); + } + + //output Data + { auto chunk = MakeChunk<NScheme::TVarLenCodec<false>>(output); - for (const auto& p : pos) { + for (const auto& p : pos) { chunk->AddData(Blobs[p].Data.data(), Blobs[p].Data.size()); - } - OutputChunk(chunk, output, res); - } - - //output PartData::Pos + payload - { + } + OutputChunk(chunk, output, res); + } + + //output PartData::Pos + payload + { auto chunk = MakeChunk<NScheme::TVarIntCodec<ui32, false>>(output); - ui32 cnt = 0; - for (ui32 i = 0; i < Blobs.size(); ++i) { - if (Blobs[i].PartData) - ++cnt; - } - chunk->AddData((const char*)&cnt, sizeof(ui32)); - for (ui32 i = 0; i < Blobs.size(); ++i) { - if (Blobs[i].PartData) { - chunk->AddData((const char*)&i, sizeof(ui32)); - ui32 t = Blobs[i].PartData->PartNo; - chunk->AddData((const char*)&t, sizeof(ui32)); - t = Blobs[i].PartData->TotalParts; - chunk->AddData((const char*)&t, sizeof(ui32)); - chunk->AddData((const char*)&Blobs[i].PartData->TotalSize, sizeof(ui32)); - } - } - OutputChunk(chunk, output, res); - } - - //output Wtime - { + ui32 cnt = 0; + for (ui32 i = 0; i < Blobs.size(); ++i) { + if (Blobs[i].PartData) + ++cnt; + } + chunk->AddData((const char*)&cnt, sizeof(ui32)); + for (ui32 i = 0; i < Blobs.size(); ++i) { + if (Blobs[i].PartData) { + chunk->AddData((const char*)&i, sizeof(ui32)); + ui32 t = Blobs[i].PartData->PartNo; + chunk->AddData((const char*)&t, sizeof(ui32)); + t = Blobs[i].PartData->TotalParts; + chunk->AddData((const char*)&t, sizeof(ui32)); + chunk->AddData((const char*)&Blobs[i].PartData->TotalSize, sizeof(ui32)); + } + } + OutputChunk(chunk, output, res); + } + + //output Wtime + { auto chunk = MakeChunk<NScheme::TDeltaVarIntCodec<ui64, false>>(output); - for (ui32 i = 0; i < Blobs.size(); ++i) { + for (ui32 i = 0; i < Blobs.size(); ++i) { ui64 writeTimestampMs = Blobs[i].WriteTimestamp.MilliSeconds(); chunk->AddData((const char*)&writeTimestampMs, sizeof(ui64)); - } - OutputChunk(chunk, output, res); - } - + } + OutputChunk(chunk, output, res); + } + if (hasKinesis) { { auto chunk = MakeChunk<NScheme::TVarLenCodec<false>>(output); @@ -341,173 +341,173 @@ void TBatch::Pack() { } } - //output Ctime - { + //output Ctime + { auto chunk = MakeChunk<NScheme::TDeltaVarIntCodec<ui64, false>>(output); - for (ui32 i = 0; i < Blobs.size(); ++i) { + for (ui32 i = 0; i < Blobs.size(); ++i) { ui64 createTimestampMs = Blobs[i].CreateTimestamp.MilliSeconds(); chunk->AddData((const char*)&createTimestampMs, sizeof(ui64)); - } - OutputChunk(chunk, output, res); - } - - //output Uncompressed - if (hasUncompressed) { - auto chunk = MakeChunk<NScheme::TVarIntCodec<ui32, false>>(output); - for (ui32 i = 0; i < Blobs.size(); ++i) { - chunk->AddData((const char*)&Blobs[i].UncompressedSize, sizeof(ui32)); - } - OutputChunk(chunk, output, res); - } - + } + OutputChunk(chunk, output, res); + } + + //output Uncompressed + if (hasUncompressed) { + auto chunk = MakeChunk<NScheme::TVarIntCodec<ui32, false>>(output); + for (ui32 i = 0; i < Blobs.size(); ++i) { + chunk->AddData((const char*)&Blobs[i].UncompressedSize, sizeof(ui32)); + } + OutputChunk(chunk, output, res); + } + PackedData = TString{res.Data(), res.Size()}; - Header.SetPayloadSize(PackedData.size()); - - if (GetPackedSize() > GetUnpackedSize() + GetMaxHeaderSize()) { //packing is not effective, write as-is - Header.SetFormat(NKikimrPQ::TBatchHeader::EUncompressed); - res.Clear(); - for (ui32 i = 0; i < Blobs.size(); ++i) { - Blobs[i].Serialize(res); - } + Header.SetPayloadSize(PackedData.size()); + + if (GetPackedSize() > GetUnpackedSize() + GetMaxHeaderSize()) { //packing is not effective, write as-is + Header.SetFormat(NKikimrPQ::TBatchHeader::EUncompressed); + res.Clear(); + for (ui32 i = 0; i < Blobs.size(); ++i) { + Blobs[i].Serialize(res); + } PackedData = TString{res.Data(), res.Size()}; - Header.SetPayloadSize(PackedData.size()); - } + Header.SetPayloadSize(PackedData.size()); + } TVector<TClientBlob> tmp; - Blobs.swap(tmp); - InternalPartsPos.resize(0); - Y_VERIFY(GetPackedSize() <= GetUnpackedSize() + GetMaxHeaderSize()); //be sure that PackedSize is not bigger than packed size for packing type 0 -} - -void TBatch::Unpack() { - if (!Packed) - return; - Packed = false; - Y_VERIFY(Blobs.empty()); - UnpackTo(&Blobs); - Y_VERIFY(InternalPartsPos.empty()); - for (ui32 i = 0; i < Blobs.size(); ++i) { - if (!Blobs[i].IsLastPart()) - InternalPartsPos.push_back(i); - } - Y_VERIFY(InternalPartsPos.size() == GetInternalPartsCount()); + Blobs.swap(tmp); + InternalPartsPos.resize(0); + Y_VERIFY(GetPackedSize() <= GetUnpackedSize() + GetMaxHeaderSize()); //be sure that PackedSize is not bigger than packed size for packing type 0 +} + +void TBatch::Unpack() { + if (!Packed) + return; + Packed = false; + Y_VERIFY(Blobs.empty()); + UnpackTo(&Blobs); + Y_VERIFY(InternalPartsPos.empty()); + for (ui32 i = 0; i < Blobs.size(); ++i) { + if (!Blobs[i].IsLastPart()) + InternalPartsPos.push_back(i); + } + Y_VERIFY(InternalPartsPos.size() == GetInternalPartsCount()); TString tmp; - tmp.swap(PackedData); -} - + tmp.swap(PackedData); +} + void TBatch::UnpackTo(TVector<TClientBlob> *blobs) -{ - Y_VERIFY(!PackedData.empty()); - auto type = Header.GetFormat(); - switch (type) { - case NKikimrPQ::TBatchHeader::EUncompressed: - UnpackToType0(blobs); - break; - case NKikimrPQ::TBatchHeader::ECompressed: - UnpackToType1(blobs); - break; - default: - Y_FAIL("uknown type"); - }; -} - +{ + Y_VERIFY(!PackedData.empty()); + auto type = Header.GetFormat(); + switch (type) { + case NKikimrPQ::TBatchHeader::EUncompressed: + UnpackToType0(blobs); + break; + case NKikimrPQ::TBatchHeader::ECompressed: + UnpackToType1(blobs); + break; + default: + Y_FAIL("uknown type"); + }; +} + NScheme::TDataRef GetChunk(const char*& data, const char *end) -{ +{ ui32 size = ReadUnaligned<ui32>(data); - data += sizeof(ui32) + size; - Y_VERIFY(data <= end); + data += sizeof(ui32) + size; + Y_VERIFY(data <= end); return NScheme::TDataRef(data - size, size); -} - +} + void TBatch::UnpackToType1(TVector<TClientBlob> *blobs) { - Y_VERIFY(Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); - Y_VERIFY(!PackedData.empty()); - ui32 totalBlobs = Header.GetCount() + Header.GetInternalPartsCount(); - ui32 partsSize = 0; + Y_VERIFY(Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); + Y_VERIFY(!PackedData.empty()); + ui32 totalBlobs = Header.GetCount() + Header.GetInternalPartsCount(); + ui32 partsSize = 0; TVector<ui32> end; TVector<ui32> pos; - pos.reserve(totalBlobs); + pos.reserve(totalBlobs); const char* data = PackedData.data(); const char* dataEnd = PackedData.data() + PackedData.size(); - ui32 sourceIdCount = 0; + ui32 sourceIdCount = 0; TVector<TString> sourceIds; - + NScheme::TTypeCodecs ui32Codecs(NScheme::NTypeIds::Uint32), ui64Codecs(NScheme::NTypeIds::Uint64), stringCodecs(NScheme::NTypeIds::String); - //read order - { + //read order + { auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &ui32Codecs); - auto iter = chunk->MakeIterator(); - for (ui32 i = 0; i < totalBlobs; ++i) { - pos.push_back(*((ui32*)iter->Next().Data())); - } - sourceIdCount = *((ui32*)iter->Next().Data()); - end.reserve(sourceIdCount); - for (ui32 i = 0; i < sourceIdCount; ++i) { - end.push_back(*((ui32*)iter->Next().Data())); - } - } - - sourceIds.reserve(sourceIdCount); - //read SourceId - { + auto iter = chunk->MakeIterator(); + for (ui32 i = 0; i < totalBlobs; ++i) { + pos.push_back(*((ui32*)iter->Next().Data())); + } + sourceIdCount = *((ui32*)iter->Next().Data()); + end.reserve(sourceIdCount); + for (ui32 i = 0; i < sourceIdCount; ++i) { + end.push_back(*((ui32*)iter->Next().Data())); + } + } + + sourceIds.reserve(sourceIdCount); + //read SourceId + { auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &stringCodecs); - auto iter = chunk->MakeIterator(); - for (ui32 i = 0; i < sourceIdCount; ++i) { - auto ref = iter->Next(); - sourceIds.emplace_back(ref.Data(), ref.Size()); - } - } + auto iter = chunk->MakeIterator(); + for (ui32 i = 0; i < sourceIdCount; ++i) { + auto ref = iter->Next(); + sourceIds.emplace_back(ref.Data(), ref.Size()); + } + } TVector<ui64> seqNo; - seqNo.reserve(totalBlobs); - - //read SeqNo - { + seqNo.reserve(totalBlobs); + + //read SeqNo + { auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &ui64Codecs); - auto iter = chunk->MakeIterator(); - for (ui32 i = 0; i < totalBlobs; ++i) { - seqNo.push_back(*(ui64*)iter->Next().Data()); - } - } + auto iter = chunk->MakeIterator(); + for (ui32 i = 0; i < totalBlobs; ++i) { + seqNo.push_back(*(ui64*)iter->Next().Data()); + } + } TVector<TString> dt; - dt.reserve(totalBlobs); - - //read Data - { + dt.reserve(totalBlobs); + + //read Data + { auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &stringCodecs); - auto iter = chunk->MakeIterator(); - for (ui32 i = 0; i < totalBlobs; ++i) { - auto ref = iter->Next(); - dt.emplace_back(ref.Data(), ref.Size()); - } - } + auto iter = chunk->MakeIterator(); + for (ui32 i = 0; i < totalBlobs; ++i) { + auto ref = iter->Next(); + dt.emplace_back(ref.Data(), ref.Size()); + } + } THashMap<ui32, TPartData> partData; - - //read PartData - { + + //read PartData + { auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &ui32Codecs); - auto iter = chunk->MakeIterator(); - partsSize = *(ui32*)iter->Next().Data(); - partData.reserve(partsSize); - for (ui32 i = 0; i < partsSize; ++i) { - ui32 ps = *(ui32*)iter->Next().Data(); - ui16 partNo = *(ui32*)iter->Next().Data(); - ui16 totalParts = *(ui32*)iter->Next().Data(); - ui32 totalSize = *(ui32*)iter->Next().Data(); - partData.insert(std::make_pair(ps, TPartData(partNo, totalParts, totalSize))); - } - } + auto iter = chunk->MakeIterator(); + partsSize = *(ui32*)iter->Next().Data(); + partData.reserve(partsSize); + for (ui32 i = 0; i < partsSize; ++i) { + ui32 ps = *(ui32*)iter->Next().Data(); + ui16 partNo = *(ui32*)iter->Next().Data(); + ui16 totalParts = *(ui32*)iter->Next().Data(); + ui32 totalSize = *(ui32*)iter->Next().Data(); + partData.insert(std::make_pair(ps, TPartData(partNo, totalParts, totalSize))); + } + } TVector<TInstant> wtime; - wtime.reserve(totalBlobs); - { + wtime.reserve(totalBlobs); + { auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &ui64Codecs); - auto iter = chunk->MakeIterator(); - for (ui32 i = 0; i < totalBlobs; ++i) { + auto iter = chunk->MakeIterator(); + for (ui32 i = 0; i < totalBlobs; ++i) { ui64 timestampMs = *(ui64*)iter->Next().Data(); wtime.push_back(TInstant::MilliSeconds(timestampMs)); - } - } + } + } TVector<TInstant> ctime; - ctime.reserve(totalBlobs); - + ctime.reserve(totalBlobs); + TVector<TString> partitionKey; TVector<TString> explicitHash; partitionKey.reserve(totalBlobs); @@ -535,321 +535,321 @@ void TBatch::UnpackToType1(TVector<TClientBlob> *blobs) { explicitHash.resize(totalBlobs); } - if (data < dataEnd) { //old versions could not have CTime - { + if (data < dataEnd) { //old versions could not have CTime + { auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &ui64Codecs); - auto iter = chunk->MakeIterator(); - for (ui32 i = 0; i < totalBlobs; ++i) { + auto iter = chunk->MakeIterator(); + for (ui32 i = 0; i < totalBlobs; ++i) { ui64 timestampMs = *(ui64*)iter->Next().Data(); ctime.push_back(TInstant::MilliSeconds(timestampMs)); - } - } - } else { - ctime.resize(totalBlobs); //fill with zero-s - } - - TVector<ui64> uncompressedSize; - uncompressedSize.reserve(totalBlobs); - if (data < dataEnd) { //old versions could not have UncompressedSize - { - auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &ui64Codecs); - auto iter = chunk->MakeIterator(); - for (ui32 i = 0; i < totalBlobs; ++i) { - uncompressedSize.push_back(*(ui64*)iter->Next().Data()); - } - } - } else { - uncompressedSize.resize(totalBlobs); //fill with zero-s - } - - Y_VERIFY(data == dataEnd); - - blobs->resize(totalBlobs); - ui32 currentSID = 0; - for (ui32 i = 0; i < totalBlobs; ++i) { + } + } + } else { + ctime.resize(totalBlobs); //fill with zero-s + } + + TVector<ui64> uncompressedSize; + uncompressedSize.reserve(totalBlobs); + if (data < dataEnd) { //old versions could not have UncompressedSize + { + auto chunk = NScheme::IChunkDecoder::ReadChunk(GetChunk(data, dataEnd), &ui64Codecs); + auto iter = chunk->MakeIterator(); + for (ui32 i = 0; i < totalBlobs; ++i) { + uncompressedSize.push_back(*(ui64*)iter->Next().Data()); + } + } + } else { + uncompressedSize.resize(totalBlobs); //fill with zero-s + } + + Y_VERIFY(data == dataEnd); + + blobs->resize(totalBlobs); + ui32 currentSID = 0; + for (ui32 i = 0; i < totalBlobs; ++i) { TMaybe<TPartData> pd; - auto it = partData.find(pos[i]); - if (it != partData.end()) - pd = it->second; + auto it = partData.find(pos[i]); + if (it != partData.end()) + pd = it->second; (*blobs)[pos[i]] = TClientBlob(sourceIds[currentSID], seqNo[i], dt[i], std::move(pd), wtime[pos[i]], ctime[pos[i]], uncompressedSize[pos[i]], partitionKey[i], explicitHash[i]); - if (i + 1 == end[currentSID]) - ++currentSID; - } -} - + if (i + 1 == end[currentSID]) + ++currentSID; + } +} + void TBatch::UnpackToType0(TVector<TClientBlob> *blobs) { - Y_VERIFY(Header.GetFormat() == NKikimrPQ::TBatchHeader::EUncompressed); - Y_VERIFY(!PackedData.empty()); - ui32 shift = 0; - - for (ui32 i = 0; i < GetCount() + GetInternalPartsCount(); ++i) { - Y_VERIFY(shift < PackedData.size()); - blobs->push_back(TClientBlob::Deserialize(PackedData.c_str() + shift, PackedData.size() - shift)); - shift += *(ui32*)(PackedData.c_str() + shift); - } - Y_VERIFY(shift == PackedData.size()); -} - - -ui32 TBatch::FindPos(const ui64 offset, const ui16 partNo) const { - Y_VERIFY(!Packed); - if (offset < GetOffset() || offset == GetOffset() && partNo < GetPartNo()) - return Max<ui32>(); - if (offset == GetOffset()) { - ui32 pos = partNo - GetPartNo(); - return pos < Blobs.size() ? pos : Max<ui32>(); - } - ui32 pos = offset - GetOffset(); - for (ui32 i = 0; i < InternalPartsPos.size() && InternalPartsPos[i] < pos; ++i) - ++pos; - //now pos is position of first client blob from offset - pos += partNo; - return pos < Blobs.size() ? pos : Max<ui32>(); -} - - -void THead::Clear() -{ - Offset = PartNo = PackedSize = 0; - Batches.clear(); -} - -ui64 THead::GetNextOffset() const -{ - return Offset + GetCount(); -} - -ui16 THead::GetInternalPartsCount() const -{ - ui16 res = 0; - for (auto& b : Batches) { - res += b.GetInternalPartsCount(); - } - return res; -} - -ui32 THead::GetCount() const -{ - if (Batches.empty()) - return 0; - - //how much offsets before last batch and how much offsets in last batch - Y_VERIFY(Batches.front().GetOffset() == Offset); - return Batches.back().GetOffset() - Offset + Batches.back().GetCount(); -} - + Y_VERIFY(Header.GetFormat() == NKikimrPQ::TBatchHeader::EUncompressed); + Y_VERIFY(!PackedData.empty()); + ui32 shift = 0; + + for (ui32 i = 0; i < GetCount() + GetInternalPartsCount(); ++i) { + Y_VERIFY(shift < PackedData.size()); + blobs->push_back(TClientBlob::Deserialize(PackedData.c_str() + shift, PackedData.size() - shift)); + shift += *(ui32*)(PackedData.c_str() + shift); + } + Y_VERIFY(shift == PackedData.size()); +} + + +ui32 TBatch::FindPos(const ui64 offset, const ui16 partNo) const { + Y_VERIFY(!Packed); + if (offset < GetOffset() || offset == GetOffset() && partNo < GetPartNo()) + return Max<ui32>(); + if (offset == GetOffset()) { + ui32 pos = partNo - GetPartNo(); + return pos < Blobs.size() ? pos : Max<ui32>(); + } + ui32 pos = offset - GetOffset(); + for (ui32 i = 0; i < InternalPartsPos.size() && InternalPartsPos[i] < pos; ++i) + ++pos; + //now pos is position of first client blob from offset + pos += partNo; + return pos < Blobs.size() ? pos : Max<ui32>(); +} + + +void THead::Clear() +{ + Offset = PartNo = PackedSize = 0; + Batches.clear(); +} + +ui64 THead::GetNextOffset() const +{ + return Offset + GetCount(); +} + +ui16 THead::GetInternalPartsCount() const +{ + ui16 res = 0; + for (auto& b : Batches) { + res += b.GetInternalPartsCount(); + } + return res; +} + +ui32 THead::GetCount() const +{ + if (Batches.empty()) + return 0; + + //how much offsets before last batch and how much offsets in last batch + Y_VERIFY(Batches.front().GetOffset() == Offset); + return Batches.back().GetOffset() - Offset + Batches.back().GetCount(); +} + IOutputStream& operator <<(IOutputStream& out, const THead& value) -{ - out << "Offset " << value.Offset << " PartNo " << value.PartNo << " PackedSize " << value.PackedSize << " count " << value.GetCount() - << " nextOffset " << value.GetNextOffset() << " batches " << value.Batches.size(); - return out; -} - -ui32 THead::FindPos(const ui64 offset, const ui16 partNo) const { - ui32 i = 0; - for (; i < Batches.size(); ++i) { - //this batch contains blobs with position bigger than requested - if (Batches[i].GetOffset() > offset || Batches[i].GetOffset() == offset && Batches[i].GetPartNo() > partNo) - break; - } - if (i == 0) - return Max<ui32>(); - return i - 1; -} - -TPartitionedBlob& TPartitionedBlob::operator=(const TPartitionedBlob& x) -{ - Partition = x.Partition; - Offset = x.Offset; - InternalPartsCount = x.InternalPartsCount; - StartOffset = x.StartOffset; - StartPartNo = x.StartPartNo; - SourceId = x.SourceId; - SeqNo = x.SeqNo; - TotalParts = x.TotalParts; - TotalSize = x.TotalSize; - NextPartNo = x.NextPartNo; - HeadPartNo = x.HeadPartNo; - Blobs = x.Blobs; - BlobsSize = x.BlobsSize; - FormedBlobs = x.FormedBlobs; - Head = x.Head; - NewHead = x.NewHead; - HeadSize = x.HeadSize; - GlueHead = x.GlueHead; - GlueNewHead = x.GlueNewHead; - NeedCompactHead = x.NeedCompactHead; - MaxBlobSize = x.MaxBlobSize; - return *this; -} - -TPartitionedBlob::TPartitionedBlob(const TPartitionedBlob& x) - : Partition(x.Partition) - , Offset(x.Offset) - , InternalPartsCount(x.InternalPartsCount) - , StartOffset(x.StartOffset) - , StartPartNo(x.StartPartNo) - , SourceId(x.SourceId) - , SeqNo(x.SeqNo) - , TotalParts(x.TotalParts) - , TotalSize(x.TotalSize) - , NextPartNo(x.NextPartNo) - , HeadPartNo(x.HeadPartNo) - , Blobs(x.Blobs) - , BlobsSize(x.BlobsSize) - , FormedBlobs(x.FormedBlobs) - , Head(x.Head) - , NewHead(x.NewHead) - , HeadSize(x.HeadSize) - , GlueHead(x.GlueHead) - , GlueNewHead(x.GlueNewHead) - , NeedCompactHead(x.NeedCompactHead) - , MaxBlobSize(x.MaxBlobSize) -{} - +{ + out << "Offset " << value.Offset << " PartNo " << value.PartNo << " PackedSize " << value.PackedSize << " count " << value.GetCount() + << " nextOffset " << value.GetNextOffset() << " batches " << value.Batches.size(); + return out; +} + +ui32 THead::FindPos(const ui64 offset, const ui16 partNo) const { + ui32 i = 0; + for (; i < Batches.size(); ++i) { + //this batch contains blobs with position bigger than requested + if (Batches[i].GetOffset() > offset || Batches[i].GetOffset() == offset && Batches[i].GetPartNo() > partNo) + break; + } + if (i == 0) + return Max<ui32>(); + return i - 1; +} + +TPartitionedBlob& TPartitionedBlob::operator=(const TPartitionedBlob& x) +{ + Partition = x.Partition; + Offset = x.Offset; + InternalPartsCount = x.InternalPartsCount; + StartOffset = x.StartOffset; + StartPartNo = x.StartPartNo; + SourceId = x.SourceId; + SeqNo = x.SeqNo; + TotalParts = x.TotalParts; + TotalSize = x.TotalSize; + NextPartNo = x.NextPartNo; + HeadPartNo = x.HeadPartNo; + Blobs = x.Blobs; + BlobsSize = x.BlobsSize; + FormedBlobs = x.FormedBlobs; + Head = x.Head; + NewHead = x.NewHead; + HeadSize = x.HeadSize; + GlueHead = x.GlueHead; + GlueNewHead = x.GlueNewHead; + NeedCompactHead = x.NeedCompactHead; + MaxBlobSize = x.MaxBlobSize; + return *this; +} + +TPartitionedBlob::TPartitionedBlob(const TPartitionedBlob& x) + : Partition(x.Partition) + , Offset(x.Offset) + , InternalPartsCount(x.InternalPartsCount) + , StartOffset(x.StartOffset) + , StartPartNo(x.StartPartNo) + , SourceId(x.SourceId) + , SeqNo(x.SeqNo) + , TotalParts(x.TotalParts) + , TotalSize(x.TotalSize) + , NextPartNo(x.NextPartNo) + , HeadPartNo(x.HeadPartNo) + , Blobs(x.Blobs) + , BlobsSize(x.BlobsSize) + , FormedBlobs(x.FormedBlobs) + , Head(x.Head) + , NewHead(x.NewHead) + , HeadSize(x.HeadSize) + , GlueHead(x.GlueHead) + , GlueNewHead(x.GlueNewHead) + , NeedCompactHead(x.NeedCompactHead) + , MaxBlobSize(x.MaxBlobSize) +{} + TPartitionedBlob::TPartitionedBlob(const ui32 partition, const ui64 offset, const TString& sourceId, const ui64 seqNo, const ui16 totalParts, - const ui32 totalSize, THead& head, THead& newHead, bool headCleared, bool needCompactHead, const ui32 maxBlobSize) - : Partition(partition) - , Offset(offset) - , InternalPartsCount(0) - , StartOffset(head.Offset) - , StartPartNo(head.PartNo) - , SourceId(sourceId) - , SeqNo(seqNo) - , TotalParts(totalParts) - , TotalSize(totalSize) - , NextPartNo(0) - , HeadPartNo(0) - , BlobsSize(0) - , Head(head) - , NewHead(newHead) - , HeadSize(NewHead.PackedSize) - , GlueHead(false) - , GlueNewHead(true) - , NeedCompactHead(needCompactHead) - , MaxBlobSize(maxBlobSize) -{ - Y_VERIFY(NewHead.Offset == Head.GetNextOffset() && NewHead.PartNo == 0 || headCleared || needCompactHead || Head.PackedSize == 0); // if head not cleared, then NewHead is going after Head - if (!headCleared) { - HeadSize = Head.PackedSize + NewHead.PackedSize; - InternalPartsCount = Head.GetInternalPartsCount() + NewHead.GetInternalPartsCount(); - GlueHead = true; - } else { - InternalPartsCount = NewHead.GetInternalPartsCount(); - StartOffset = NewHead.Offset; - StartPartNo = NewHead.PartNo; - HeadSize = NewHead.PackedSize; - GlueHead = false; - } - if (HeadSize == 0) { - StartOffset = offset; - NewHead.Offset = offset; - Y_VERIFY(StartPartNo == 0); - } -} - + const ui32 totalSize, THead& head, THead& newHead, bool headCleared, bool needCompactHead, const ui32 maxBlobSize) + : Partition(partition) + , Offset(offset) + , InternalPartsCount(0) + , StartOffset(head.Offset) + , StartPartNo(head.PartNo) + , SourceId(sourceId) + , SeqNo(seqNo) + , TotalParts(totalParts) + , TotalSize(totalSize) + , NextPartNo(0) + , HeadPartNo(0) + , BlobsSize(0) + , Head(head) + , NewHead(newHead) + , HeadSize(NewHead.PackedSize) + , GlueHead(false) + , GlueNewHead(true) + , NeedCompactHead(needCompactHead) + , MaxBlobSize(maxBlobSize) +{ + Y_VERIFY(NewHead.Offset == Head.GetNextOffset() && NewHead.PartNo == 0 || headCleared || needCompactHead || Head.PackedSize == 0); // if head not cleared, then NewHead is going after Head + if (!headCleared) { + HeadSize = Head.PackedSize + NewHead.PackedSize; + InternalPartsCount = Head.GetInternalPartsCount() + NewHead.GetInternalPartsCount(); + GlueHead = true; + } else { + InternalPartsCount = NewHead.GetInternalPartsCount(); + StartOffset = NewHead.Offset; + StartPartNo = NewHead.PartNo; + HeadSize = NewHead.PackedSize; + GlueHead = false; + } + if (HeadSize == 0) { + StartOffset = offset; + NewHead.Offset = offset; + Y_VERIFY(StartPartNo == 0); + } +} + TString TPartitionedBlob::CompactHead(bool glueHead, THead& head, bool glueNewHead, THead& newHead, ui32 estimatedSize) -{ +{ TString valueD; - valueD.reserve(estimatedSize); - if (glueHead) { - for (ui32 pp = 0; pp < head.Batches.size(); ++pp) { - Y_VERIFY(head.Batches[pp].Packed); - valueD += head.Batches[pp].Serialize(); - } - } - if (glueNewHead) { - for (ui32 pp = 0; pp < newHead.Batches.size(); ++pp) { - TBatch *b = &newHead.Batches[pp]; - TBatch batch; - if (!b->Packed) { - Y_VERIFY(pp + 1 == newHead.Batches.size()); - batch = newHead.Batches[pp]; - batch.Pack(); - b = &batch; - } - Y_VERIFY(b->Packed); - valueD += b->Serialize(); - } - } - return valueD; -} - + valueD.reserve(estimatedSize); + if (glueHead) { + for (ui32 pp = 0; pp < head.Batches.size(); ++pp) { + Y_VERIFY(head.Batches[pp].Packed); + valueD += head.Batches[pp].Serialize(); + } + } + if (glueNewHead) { + for (ui32 pp = 0; pp < newHead.Batches.size(); ++pp) { + TBatch *b = &newHead.Batches[pp]; + TBatch batch; + if (!b->Packed) { + Y_VERIFY(pp + 1 == newHead.Batches.size()); + batch = newHead.Batches[pp]; + batch.Pack(); + b = &batch; + } + Y_VERIFY(b->Packed); + valueD += b->Serialize(); + } + } + return valueD; +} + std::pair<TKey, TString> TPartitionedBlob::Add(TClientBlob&& blob) -{ - Y_VERIFY(NewHead.Offset >= Head.Offset); - ui32 size = blob.GetBlobSize(); +{ + Y_VERIFY(NewHead.Offset >= Head.Offset); + ui32 size = blob.GetBlobSize(); std::pair<TKey, TString> res; - Y_VERIFY(InternalPartsCount < 1000); //just check for future packing - if (HeadSize + BlobsSize + size + GetMaxHeaderSize() > MaxBlobSize) - NeedCompactHead = true; - if (HeadSize + BlobsSize == 0) { //if nothing to compact at all - NeedCompactHead = false; - } - - if (NeedCompactHead) { //need form blob without last chunk, on start or in case of big head - NeedCompactHead = false; - HeadPartNo = NextPartNo; - ui32 count = (GlueHead ? Head.GetCount() : 0) + (GlueNewHead ? NewHead.GetCount() : 0); - - Y_VERIFY(Offset >= (GlueHead ? Head.Offset : NewHead.Offset)); - - Y_VERIFY(NewHead.GetNextOffset() >= (GlueHead ? Head.Offset : NewHead.Offset)); - - res.first = TKey(TKeyPrefix::TypeTmpData, Partition, StartOffset, StartPartNo, count, InternalPartsCount, false); - - StartOffset = Offset; - StartPartNo = NextPartNo; - InternalPartsCount = 0; - + Y_VERIFY(InternalPartsCount < 1000); //just check for future packing + if (HeadSize + BlobsSize + size + GetMaxHeaderSize() > MaxBlobSize) + NeedCompactHead = true; + if (HeadSize + BlobsSize == 0) { //if nothing to compact at all + NeedCompactHead = false; + } + + if (NeedCompactHead) { //need form blob without last chunk, on start or in case of big head + NeedCompactHead = false; + HeadPartNo = NextPartNo; + ui32 count = (GlueHead ? Head.GetCount() : 0) + (GlueNewHead ? NewHead.GetCount() : 0); + + Y_VERIFY(Offset >= (GlueHead ? Head.Offset : NewHead.Offset)); + + Y_VERIFY(NewHead.GetNextOffset() >= (GlueHead ? Head.Offset : NewHead.Offset)); + + res.first = TKey(TKeyPrefix::TypeTmpData, Partition, StartOffset, StartPartNo, count, InternalPartsCount, false); + + StartOffset = Offset; + StartPartNo = NextPartNo; + InternalPartsCount = 0; + TString valueD = CompactHead(GlueHead, Head, GlueNewHead, NewHead, HeadSize + BlobsSize + (BlobsSize > 0 ? GetMaxHeaderSize() : 0)); - - GlueHead = GlueNewHead = false; - if (!Blobs.empty()) { - TBatch batch{Offset, Blobs.front().GetPartNo(), std::move(Blobs)}; - Blobs.clear(); - batch.Pack(); - Y_VERIFY(batch.Packed); - valueD += batch.Serialize(); - } - res.second = valueD; - Y_VERIFY(res.second.size() <= MaxBlobSize && (res.second.size() + size + 1024 * 1024 > MaxBlobSize - || HeadSize + BlobsSize + size + GetMaxHeaderSize() <= MaxBlobSize)); - HeadSize = 0; - BlobsSize = 0; - CheckBlob(res.first, res.second); - FormedBlobs.push_back(std::make_pair(res.first, res.second.size())); - Blobs.clear(); - } - BlobsSize += size + GetMaxHeaderSize(); - ++NextPartNo; - Blobs.push_back(blob); - if (!IsComplete()) - ++InternalPartsCount; - return res; -} - -bool TPartitionedBlob::IsComplete() const -{ - return NextPartNo == TotalParts; -} - - + + GlueHead = GlueNewHead = false; + if (!Blobs.empty()) { + TBatch batch{Offset, Blobs.front().GetPartNo(), std::move(Blobs)}; + Blobs.clear(); + batch.Pack(); + Y_VERIFY(batch.Packed); + valueD += batch.Serialize(); + } + res.second = valueD; + Y_VERIFY(res.second.size() <= MaxBlobSize && (res.second.size() + size + 1024 * 1024 > MaxBlobSize + || HeadSize + BlobsSize + size + GetMaxHeaderSize() <= MaxBlobSize)); + HeadSize = 0; + BlobsSize = 0; + CheckBlob(res.first, res.second); + FormedBlobs.push_back(std::make_pair(res.first, res.second.size())); + Blobs.clear(); + } + BlobsSize += size + GetMaxHeaderSize(); + ++NextPartNo; + Blobs.push_back(blob); + if (!IsComplete()) + ++InternalPartsCount; + return res; +} + +bool TPartitionedBlob::IsComplete() const +{ + return NextPartNo == TotalParts; +} + + bool TPartitionedBlob::IsNextPart(const TString& sourceId, const ui64 seqNo, const ui16 partNo, TString *reason) const -{ - if (sourceId != SourceId || seqNo != SeqNo || partNo != NextPartNo) { - TStringBuilder s; - s << "waited sourceId '" << EscapeC(SourceId) << "' seqNo " - << SeqNo << " partNo " << NextPartNo << " got sourceId '" << EscapeC(sourceId) - << "' seqNo " << seqNo << " partNo " << partNo; - *reason = s; - return false; - } - return true; -} - - -}// NPQ -}// NKikimr - +{ + if (sourceId != SourceId || seqNo != SeqNo || partNo != NextPartNo) { + TStringBuilder s; + s << "waited sourceId '" << EscapeC(SourceId) << "' seqNo " + << SeqNo << " partNo " << NextPartNo << " got sourceId '" << EscapeC(sourceId) + << "' seqNo " << seqNo << " partNo " << partNo; + *reason = s; + return false; + } + return true; +} + + +}// NPQ +}// NKikimr + diff --git a/ydb/core/persqueue/blob.h b/ydb/core/persqueue/blob.h index 4b90f2463d..633ec2499a 100644 --- a/ydb/core/persqueue/blob.h +++ b/ydb/core/persqueue/blob.h @@ -1,76 +1,76 @@ -#pragma once -#include "header.h" -#include "key.h" - +#pragma once +#include "header.h" +#include "key.h" + #include <util/datetime/base.h> #include <util/generic/maybe.h> #include <util/generic/vector.h> #include <deque> - -namespace NKikimr { -namespace NPQ { - - + +namespace NKikimr { +namespace NPQ { + + void CheckBlob(const TKey& key, const TString& blob); - -struct TPartData { - ui16 PartNo; - ui16 TotalParts; - ui32 TotalSize; - - TPartData(const ui16 partNo, const ui16 totalParts, const ui32 totalSize) - : PartNo(partNo) - , TotalParts(totalParts) - , TotalSize(totalSize) - {} -}; - -struct TClientBlob { - - static const ui8 HAS_PARTDATA = 1; - static const ui8 HAS_TS = 2; - static const ui8 HAS_TS2 = 4; - static const ui8 HAS_US = 8; + +struct TPartData { + ui16 PartNo; + ui16 TotalParts; + ui32 TotalSize; + + TPartData(const ui16 partNo, const ui16 totalParts, const ui32 totalSize) + : PartNo(partNo) + , TotalParts(totalParts) + , TotalSize(totalSize) + {} +}; + +struct TClientBlob { + + static const ui8 HAS_PARTDATA = 1; + static const ui8 HAS_TS = 2; + static const ui8 HAS_TS2 = 4; + static const ui8 HAS_US = 8; static const ui8 HAS_KINESIS = 16; - + TString SourceId; - ui64 SeqNo; + ui64 SeqNo; TString Data; - TMaybe<TPartData> PartData; + TMaybe<TPartData> PartData; TInstant WriteTimestamp; TInstant CreateTimestamp; - ui32 UncompressedSize; + ui32 UncompressedSize; TString PartitionKey; TString ExplicitHashKey; - - TClientBlob() - : SeqNo(0) - , UncompressedSize(0) - {} - + + TClientBlob() + : SeqNo(0) + , UncompressedSize(0) + {} + TClientBlob(const TString& sourceId, const ui64 seqNo, const TString& data, TMaybe<TPartData> &&partData, TInstant writeTimestamp, TInstant createTimestamp, const ui64 uncompressedSize, const TString& partitionKey, const TString& explicitHashKey) - : SourceId(sourceId) - , SeqNo(seqNo) - , Data(data) - , PartData(std::move(partData)) - , WriteTimestamp(writeTimestamp) - , CreateTimestamp(createTimestamp) - , UncompressedSize(uncompressedSize) + : SourceId(sourceId) + , SeqNo(seqNo) + , Data(data) + , PartData(std::move(partData)) + , WriteTimestamp(writeTimestamp) + , CreateTimestamp(createTimestamp) + , UncompressedSize(uncompressedSize) , PartitionKey(partitionKey) , ExplicitHashKey(explicitHashKey) { Y_VERIFY(PartitionKey.size() <= 256); } - - ui32 GetPartDataSize() const { - if (PartData) { - return 1 + sizeof(ui16) + sizeof(ui16) + sizeof(ui32); - } - return 1; - } - + + ui32 GetPartDataSize() const { + if (PartData) { + return 1 + sizeof(ui16) + sizeof(ui16) + sizeof(ui32); + } + return 1; + } + ui32 GetKinesisSize() const { if (PartitionKey.size() > 0) { return 2 + PartitionKey.size() + ExplicitHashKey.size(); @@ -78,231 +78,231 @@ struct TClientBlob { return 0; } - ui32 GetBlobSize() const { + ui32 GetBlobSize() const { return GetPartDataSize() + OVERHEAD + SourceId.size() + Data.size() + (UncompressedSize == 0 ? 0 : sizeof(ui32)) + GetKinesisSize(); - } - - ui16 GetPartNo() const { - return PartData ? PartData->PartNo : 0; - } - - bool IsLastPart() const { - return !PartData || PartData->PartNo + 1 == PartData->TotalParts; - } - - static const ui32 OVERHEAD = sizeof(ui32)/*totalSize*/ + sizeof(ui64)/*SeqNo*/ + sizeof(ui16) /*SourceId*/ + sizeof(ui64) /*WriteTimestamp*/ + sizeof(ui64) /*CreateTimestamp*/; - - void Serialize(TBuffer& buffer) const; - static TClientBlob Deserialize(const char *data, ui32 size); - -}; - - + } + + ui16 GetPartNo() const { + return PartData ? PartData->PartNo : 0; + } + + bool IsLastPart() const { + return !PartData || PartData->PartNo + 1 == PartData->TotalParts; + } + + static const ui32 OVERHEAD = sizeof(ui32)/*totalSize*/ + sizeof(ui64)/*SeqNo*/ + sizeof(ui16) /*SourceId*/ + sizeof(ui64) /*WriteTimestamp*/ + sizeof(ui64) /*CreateTimestamp*/; + + void Serialize(TBuffer& buffer) const; + static TClientBlob Deserialize(const char *data, ui32 size); + +}; + + //TBatch represents several clientBlobs. Can be in unpacked state(TVector<TClientBlob> blobs) -//or packed(PackedData) -//on disk representation: -//<ui16 size><serialized proto header><payload> -// size == serialized proto size -// header.PayloadSize == payload size -// payload contains of <ui8 type><data> -// type=0 - not packed serialized data. - -struct TBatch { - bool Packed; +//or packed(PackedData) +//on disk representation: +//<ui16 size><serialized proto header><payload> +// size == serialized proto size +// header.PayloadSize == payload size +// payload contains of <ui8 type><data> +// type=0 - not packed serialized data. + +struct TBatch { + bool Packed; TVector<TClientBlob> Blobs; TVector<ui32> InternalPartsPos; - NKikimrPQ::TBatchHeader Header; + NKikimrPQ::TBatchHeader Header; TString PackedData; - TBatch() - : Packed(false) - {} - + TBatch() + : Packed(false) + {} + TBatch(const ui64 offset, const ui16 partNo, const TVector<TClientBlob>& blobs) - : Packed(false) - { - Header.SetOffset(offset); - Header.SetPartNo(partNo); - Header.SetUnpackedSize(0); - Header.SetCount(0); - Header.SetInternalPartsCount(0); - for (auto& b : blobs) { - AddBlob(b); - } - } - - TBatch(const ui64 offset, const ui16 partNo, const std::deque<TClientBlob>& blobs) - : Packed(false) - { - Header.SetOffset(offset); - Header.SetPartNo(partNo); - Header.SetUnpackedSize(0); - Header.SetCount(0); - Header.SetInternalPartsCount(0); - for (auto& b : blobs) { - AddBlob(b); - } - } - - void AddBlob(const TClientBlob &b) { - ui32 count = GetCount(); - ui32 unpackedSize = GetUnpackedSize(); - ui32 i = Blobs.size(); - Blobs.push_back(b); - unpackedSize += b.GetBlobSize(); - if (b.IsLastPart()) - ++count; - else { - InternalPartsPos.push_back(i); - } - - Header.SetUnpackedSize(unpackedSize); - Header.SetCount(count); - Header.SetInternalPartsCount(InternalPartsPos.size()); - } - - ui64 GetOffset() const { - return Header.GetOffset(); - } - ui16 GetPartNo() const { - return Header.GetPartNo(); - } - ui32 GetUnpackedSize() const { - return Header.GetUnpackedSize(); - } - ui32 GetCount() const { - return Header.GetCount(); - } - ui16 GetInternalPartsCount() const { - return Header.GetInternalPartsCount(); - } - - TBatch(const NKikimrPQ::TBatchHeader &header, const char* data) - : Packed(true) - , Header(header) - , PackedData(data, header.GetPayloadSize()) - {} - - ui32 GetPackedSize() const { Y_VERIFY(Packed); return sizeof(ui16) + PackedData.size() + Header.ByteSize(); } - void Pack(); - void Unpack(); + : Packed(false) + { + Header.SetOffset(offset); + Header.SetPartNo(partNo); + Header.SetUnpackedSize(0); + Header.SetCount(0); + Header.SetInternalPartsCount(0); + for (auto& b : blobs) { + AddBlob(b); + } + } + + TBatch(const ui64 offset, const ui16 partNo, const std::deque<TClientBlob>& blobs) + : Packed(false) + { + Header.SetOffset(offset); + Header.SetPartNo(partNo); + Header.SetUnpackedSize(0); + Header.SetCount(0); + Header.SetInternalPartsCount(0); + for (auto& b : blobs) { + AddBlob(b); + } + } + + void AddBlob(const TClientBlob &b) { + ui32 count = GetCount(); + ui32 unpackedSize = GetUnpackedSize(); + ui32 i = Blobs.size(); + Blobs.push_back(b); + unpackedSize += b.GetBlobSize(); + if (b.IsLastPart()) + ++count; + else { + InternalPartsPos.push_back(i); + } + + Header.SetUnpackedSize(unpackedSize); + Header.SetCount(count); + Header.SetInternalPartsCount(InternalPartsPos.size()); + } + + ui64 GetOffset() const { + return Header.GetOffset(); + } + ui16 GetPartNo() const { + return Header.GetPartNo(); + } + ui32 GetUnpackedSize() const { + return Header.GetUnpackedSize(); + } + ui32 GetCount() const { + return Header.GetCount(); + } + ui16 GetInternalPartsCount() const { + return Header.GetInternalPartsCount(); + } + + TBatch(const NKikimrPQ::TBatchHeader &header, const char* data) + : Packed(true) + , Header(header) + , PackedData(data, header.GetPayloadSize()) + {} + + ui32 GetPackedSize() const { Y_VERIFY(Packed); return sizeof(ui16) + PackedData.size() + Header.ByteSize(); } + void Pack(); + void Unpack(); void UnpackTo(TVector<TClientBlob> *result); void UnpackToType0(TVector<TClientBlob> *result); void UnpackToType1(TVector<TClientBlob> *result); - + TString Serialize(); - - ui32 FindPos(const ui64 offset, const ui16 partNo) const; - -}; - -class TBlobIterator { -public: + + ui32 FindPos(const ui64 offset, const ui16 partNo) const; + +}; + +class TBlobIterator { +public: TBlobIterator(const TKey& key, const TString& blob); - //return true is there is batch - bool IsValid(); - //get next batch and return false if there is no next batch - bool Next(); - - const TBatch& GetBatch(); -private: - void ParseBatch(bool isFirst); - - const TKey& Key; - const char *Data; - const char *End; - TBatch Batch; - ui64 Offset; - ui32 Count; - ui16 InternalPartsCount; -}; - -//THead represents bathes, stored in head(at most 8 Mb) -struct THead { - std::deque<TBatch> Batches; - //all batches except last must be packed - // BlobsSize <= 512Kb - // size of Blobs after packing must be <= BlobsSize - //otherwise head will be compacted not in total, some blobs will still remain in head - //PackedSize + BlobsSize must be <= 8Mb - ui64 Offset; - ui16 PartNo; - ui32 PackedSize; - - THead() - : Offset(0) - , PartNo(0) - , PackedSize(0) - {} - - void Clear(); - - ui64 GetNextOffset() const; - - ui32 GetCount() const; - - ui16 GetInternalPartsCount() const; - - //return Max<ui32> if not such pos in head - //returns batch with such position - ui32 FindPos(const ui64 offset, const ui16 partNo) const; -}; - + //return true is there is batch + bool IsValid(); + //get next batch and return false if there is no next batch + bool Next(); + + const TBatch& GetBatch(); +private: + void ParseBatch(bool isFirst); + + const TKey& Key; + const char *Data; + const char *End; + TBatch Batch; + ui64 Offset; + ui32 Count; + ui16 InternalPartsCount; +}; + +//THead represents bathes, stored in head(at most 8 Mb) +struct THead { + std::deque<TBatch> Batches; + //all batches except last must be packed + // BlobsSize <= 512Kb + // size of Blobs after packing must be <= BlobsSize + //otherwise head will be compacted not in total, some blobs will still remain in head + //PackedSize + BlobsSize must be <= 8Mb + ui64 Offset; + ui16 PartNo; + ui32 PackedSize; + + THead() + : Offset(0) + , PartNo(0) + , PackedSize(0) + {} + + void Clear(); + + ui64 GetNextOffset() const; + + ui32 GetCount() const; + + ui16 GetInternalPartsCount() const; + + //return Max<ui32> if not such pos in head + //returns batch with such position + ui32 FindPos(const ui64 offset, const ui16 partNo) const; +}; + IOutputStream& operator <<(IOutputStream& out, const THead& value); - - + + //stucture for tracking written KV-blobs, stored in memory parts of one partitioned blob -class TPartitionedBlob { -public: - TPartitionedBlob& operator=(const TPartitionedBlob& x); - - TPartitionedBlob(const TPartitionedBlob& x); - +class TPartitionedBlob { +public: + TPartitionedBlob& operator=(const TPartitionedBlob& x); + + TPartitionedBlob(const TPartitionedBlob& x); + TPartitionedBlob(const ui32 partition, const ui64 offset, const TString& sourceId, const ui64 seqNo, - const ui16 totalParts, const ui32 totalSize, THead& head, THead& newHead, bool headCleared, bool needCompactHead, const ui32 maxBlobSize); - - + const ui16 totalParts, const ui32 totalSize, THead& head, THead& newHead, bool headCleared, bool needCompactHead, const ui32 maxBlobSize); + + std::pair<TKey, TString> Add(TClientBlob&& blob); - - bool IsInited() const { return !SourceId.empty(); } - - bool IsComplete() const; - - bool HasFormedBlobs() const { return !FormedBlobs.empty(); } - - ui64 GetOffset() const { return Offset; } - ui16 GetHeadPartNo() const { return HeadPartNo; } - + + bool IsInited() const { return !SourceId.empty(); } + + bool IsComplete() const; + + bool HasFormedBlobs() const { return !FormedBlobs.empty(); } + + ui64 GetOffset() const { return Offset; } + ui16 GetHeadPartNo() const { return HeadPartNo; } + bool IsNextPart(const TString& sourceId, const ui64 seqNo, const ui16 partNo, TString *reason) const; - - const std::deque<TClientBlob>& GetClientBlobs() const { return Blobs; } - const std::deque<std::pair<TKey, ui32>> GetFormedBlobs() const { return FormedBlobs; } - -private: + + const std::deque<TClientBlob>& GetClientBlobs() const { return Blobs; } + const std::deque<std::pair<TKey, ui32>> GetFormedBlobs() const { return FormedBlobs; } + +private: TString CompactHead(bool glueHead, THead& head, bool glueNewHead, THead& newHead, ui32 estimatedSize); - -private: - ui32 Partition; - ui64 Offset; - ui16 InternalPartsCount; - ui64 StartOffset; - ui16 StartPartNo; + +private: + ui32 Partition; + ui64 Offset; + ui16 InternalPartsCount; + ui64 StartOffset; + ui16 StartPartNo; TString SourceId; - ui64 SeqNo; - ui16 TotalParts; - ui32 TotalSize; - ui16 NextPartNo; - ui16 HeadPartNo; - std::deque<TClientBlob> Blobs; - ui32 BlobsSize; - std::deque<std::pair<TKey, ui32>> FormedBlobs; - THead &Head; - THead &NewHead; - ui32 HeadSize; - bool GlueHead; - bool GlueNewHead; - bool NeedCompactHead; - ui32 MaxBlobSize; -}; - -}// NPQ -}// NKikimr + ui64 SeqNo; + ui16 TotalParts; + ui32 TotalSize; + ui16 NextPartNo; + ui16 HeadPartNo; + std::deque<TClientBlob> Blobs; + ui32 BlobsSize; + std::deque<std::pair<TKey, ui32>> FormedBlobs; + THead &Head; + THead &NewHead; + ui32 HeadSize; + bool GlueHead; + bool GlueNewHead; + bool NeedCompactHead; + ui32 MaxBlobSize; +}; + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/cache_eviction.h b/ydb/core/persqueue/cache_eviction.h index 5aaa7eb4ed..07ae210f85 100644 --- a/ydb/core/persqueue/cache_eviction.h +++ b/ydb/core/persqueue/cache_eviction.h @@ -1,40 +1,40 @@ -#pragma once +#pragma once -#include "blob.h" +#include "blob.h" #include "pq_l2_service.h" - + #include <ydb/core/base/appdata.h> #include <ydb/core/persqueue/events/internal.h> -namespace NKikimr { -namespace NPQ { - - struct TBlobId { - ui32 Partition; - ui64 Offset; - ui16 PartNo; - ui32 Count; // have to be unique for {Partition, Offset, partNo} - ui16 InternalPartsCount; // have to be unique for {Partition, Offset, partNo} - - TBlobId(ui32 partition, ui64 offset, ui16 partNo, ui32 count, ui16 internalPartsCount) +namespace NKikimr { +namespace NPQ { + + struct TBlobId { + ui32 Partition; + ui64 Offset; + ui16 PartNo; + ui32 Count; // have to be unique for {Partition, Offset, partNo} + ui16 InternalPartsCount; // have to be unique for {Partition, Offset, partNo} + + TBlobId(ui32 partition, ui64 offset, ui16 partNo, ui32 count, ui16 internalPartsCount) : Partition(partition) , Offset(offset) - , PartNo(partNo) + , PartNo(partNo) , Count(count) - , InternalPartsCount(internalPartsCount) + , InternalPartsCount(internalPartsCount) { } bool operator == (const TBlobId& r) const { - return Partition == r.Partition && Offset == r.Offset && PartNo == r.PartNo; - } - + return Partition == r.Partition && Offset == r.Offset && PartNo == r.PartNo; + } + ui64 Hash() const { return Hash128to32((ui64(Partition) << 16) + PartNo, Offset); - } - }; + } + }; }} - + template <> struct THash<NKikimr::NPQ::TBlobId> { inline size_t operator() (const NKikimr::NPQ::TBlobId& key) const { @@ -54,7 +54,7 @@ namespace NPQ { ERequestType Type; TActorId Sender; ui64 CookiePQ; - ui32 Partition; + ui32 Partition; ui32 MetadataWritesCount; TVector<TRequestedBlob> Blobs; @@ -66,7 +66,7 @@ namespace NPQ { , MetadataWritesCount(0) {} - TBlobId GetBlobId(ui32 pos) const { return TBlobId(Partition, Blobs[pos].Offset, Blobs[pos].PartNo, Blobs[pos].Count, Blobs[pos].InternalPartsCount); } + TBlobId GetBlobId(ui32 pos) const { return TBlobId(Partition, Blobs[pos].Offset, Blobs[pos].PartNo, Blobs[pos].Count, Blobs[pos].InternalPartsCount); } THolder<TEvKeyValue::TEvRequest> MakeKvRequest() const { @@ -74,7 +74,7 @@ namespace NPQ { for (auto& blob : Blobs) { if (blob.Value.empty()) { // add reading command - TKey key(TKeyPrefix::TypeData, Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount); + TKey key(TKeyPrefix::TypeData, Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount); auto read = request->Record.AddCmdRead(); read->SetKey(key.Data(), key.Size()); } @@ -110,12 +110,12 @@ namespace NPQ { } void Verify(const TRequestedBlob& blob) const { - TKey key(TKeyPrefix::TypeData, 0, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount, false); + TKey key(TKeyPrefix::TypeData, 0, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount, false); Y_VERIFY(blob.Value.size() == blob.Size); - CheckBlob(key, blob.Value); + CheckBlob(key, blob.Value); } - }; - + }; + // TODO: better interface class TCacheEvictionStrategy { public: @@ -129,8 +129,8 @@ namespace NPQ { class TCacheStrategyKeepUsed : public TCacheEvictionStrategy { public: - TCacheStrategyKeepUsed(ui64 size, ui64 maxBlobSize) - : HeadBlobsCount(size / maxBlobSize) + TCacheStrategyKeepUsed(ui64 size, ui64 maxBlobSize) + : HeadBlobsCount(size / maxBlobSize) {} virtual ~TCacheStrategyKeepUsed() @@ -155,7 +155,7 @@ namespace NPQ { virtual TDeque<TBlobId> BlobsToTouch() const override { - return Head; + return Head; } private: @@ -259,7 +259,7 @@ namespace NPQ { for (const auto& blob : kvReq.Blobs) { // Touching blobs in L2. We don't need data here - TCacheBlobL2 key = {kvReq.Partition, blob.Offset, blob.PartNo, nullptr}; + TCacheBlobL2 key = {kvReq.Partition, blob.Offset, blob.PartNo, nullptr}; if (blob.Cached) reqData->RequestedBlobs.push_back(key); else @@ -276,7 +276,7 @@ namespace NPQ { THolder<TCacheL2Request> reqData = MakeHolder<TCacheL2Request>(TopicName); for (const TRequestedBlob& reqBlob : kvReq.Blobs) { - TBlobId blob(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); + TBlobId blob(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); { // there could be a new blob with same id (for big messages) if (RemoveExists(ctx, blob)) { TCacheBlobL2 removed = {kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, nullptr}; @@ -291,7 +291,7 @@ namespace NPQ { if (L1Strategy) L1Strategy->SaveHeadBlob(blob); - TCacheBlobL2 blobL2 = {kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, cached}; + TCacheBlobL2 blobL2 = {kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, cached}; reqData->StoredBlobs.push_back(blobL2); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Caching head blob in L1. Partition " @@ -315,7 +315,7 @@ namespace NPQ { continue; const TRequestedBlob& reqBlob = kvReq.Blobs[i]; - TBlobId blob(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); + TBlobId blob(kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); { TValueL1 value; if (CheckExists(ctx, blob, value)) { @@ -329,7 +329,7 @@ namespace NPQ { Cache[blob] = valL1; // weak Counters.Inc(valL1); - TCacheBlobL2 blobL2 = {kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, cached}; + TCacheBlobL2 blobL2 = {kvReq.Partition, reqBlob.Offset, reqBlob.PartNo, cached}; reqData->StoredBlobs.push_back(blobL2); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Prefetched blob in L1. Partition " @@ -385,7 +385,7 @@ namespace NPQ { void PrepareTouch(const TActorContext& ctx, THolder<TCacheL2Request>& reqData, const TDeque<TBlobId>& used) { for (auto& blob : used) { - TCacheBlobL2 blobL2 = {blob.Partition, blob.Offset, blob.PartNo, nullptr}; + TCacheBlobL2 blobL2 = {blob.Partition, blob.Offset, blob.PartNo, nullptr}; reqData->ExpectedBlobs.push_back(blobL2); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Touching blob. Partition " @@ -429,7 +429,7 @@ namespace NPQ { ++numCached; continue; } - TBlobId blobId(kvReq.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount); + TBlobId blobId(kvReq.Partition, blob.Offset, blob.PartNo, blob.Count, blob.InternalPartsCount); TCacheValue::TPtr cached = GetValue(ctx, blobId); if (cached) { ++numCached; @@ -491,5 +491,5 @@ namespace NPQ { } }; -} //NPQ -} //NKikimr +} //NPQ +} //NKikimr diff --git a/ydb/core/persqueue/cluster_tracker.cpp b/ydb/core/persqueue/cluster_tracker.cpp index b493860544..59fdea5c2b 100644 --- a/ydb/core/persqueue/cluster_tracker.cpp +++ b/ydb/core/persqueue/cluster_tracker.cpp @@ -85,7 +85,7 @@ private: hFunc(TEvClusterTracker::TEvSubscribe, HandleWhileWorking); hFunc(TEvents::TEvWakeup, HandleWhileWorking); hFunc(NKqp::TEvKqp::TEvQueryResponse, HandleWhileWorking); - hFunc(NKqp::TEvKqp::TEvProcessResponse, HandleWhileWorking); + hFunc(NKqp::TEvKqp::TEvProcessResponse, HandleWhileWorking); } } @@ -150,15 +150,15 @@ private: } } - void HandleWhileWorking(NKqp::TEvKqp::TEvProcessResponse::TPtr& ev) { - const auto& record = ev->Get()->Record; - LOG_ERROR_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "failed to list clusters: " << record); - + void HandleWhileWorking(NKqp::TEvKqp::TEvProcessResponse::TPtr& ev) { + const auto& record = ev->Get()->Record; + LOG_ERROR_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "failed to list clusters: " << record); + ClustersList = nullptr; Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutOnErrorSec()), new TEvents::TEvWakeup); - } - + } + template<typename TProtoRecord> void UpdateClustersList(const TProtoRecord& record) { auto clustersList = MakeIntrusive<TClustersList>(); diff --git a/ydb/core/persqueue/config/config.cpp b/ydb/core/persqueue/config/config.cpp index 6b92d42002..027fc5e64a 100644 --- a/ydb/core/persqueue/config/config.cpp +++ b/ydb/core/persqueue/config/config.cpp @@ -1,33 +1,33 @@ -#include "config.h" -#include <util/generic/hash_set.h> -#include <util/string/printf.h> -namespace NKikimr { - +#include "config.h" +#include <util/generic/hash_set.h> +#include <util/string/printf.h> +namespace NKikimr { + bool CheckPersQueueConfig(const NKikimrPQ::TPQTabletConfig& config, const bool shouldHavePartitionsList, TString *error) { - if (!config.HasPartitionConfig()) { - if (error) - *error = "no PartitionConfig"; - return false; - } + if (!config.HasPartitionConfig()) { + if (error) + *error = "no PartitionConfig"; + return false; + } const auto& partitionIds = config.GetPartitionIds(); const auto& partitions = config.GetPartitions(); - if (shouldHavePartitionsList) { + if (shouldHavePartitionsList) { if (partitionIds.empty() && partitions.empty()) { - if (error) - *error = "empty Partitions list"; - return false; - } - + if (error) + *error = "empty Partitions list"; + return false; + } + THashSet<ui32> parts; for (const auto partitionId : partitionIds) { if (!parts.insert(partitionId).second) { - if (error) + if (error) *error = Sprintf("duplicate partitions with id %u", partitionId); - return false; - } - } + return false; + } + } parts.clear(); for (const auto& partition : partitions) { @@ -38,22 +38,22 @@ bool CheckPersQueueConfig(const NKikimrPQ::TPQTabletConfig& config, const bool s return false; } } - } else { + } else { if (!partitionIds.empty() || !partitions.empty()) { - if (error) - *error = "Partitions list must be empty"; - return false; - } - } - - const auto& partCfg = config.GetPartitionConfig(); - if (!partCfg.HasLifetimeSeconds()) { - if (error) - *error = "no lifetimeSeconds specified in TPartitionConfig"; - return false; - } - - return true; -} - -} // NKikimr + if (error) + *error = "Partitions list must be empty"; + return false; + } + } + + const auto& partCfg = config.GetPartitionConfig(); + if (!partCfg.HasLifetimeSeconds()) { + if (error) + *error = "no lifetimeSeconds specified in TPartitionConfig"; + return false; + } + + return true; +} + +} // NKikimr diff --git a/ydb/core/persqueue/config/config.h b/ydb/core/persqueue/config/config.h index 31aa498c40..86e11c8f32 100644 --- a/ydb/core/persqueue/config/config.h +++ b/ydb/core/persqueue/config/config.h @@ -1,8 +1,8 @@ -#pragma once +#pragma once #include <ydb/core/protos/msgbus_pq.pb.h> - -namespace NKikimr { - + +namespace NKikimr { + bool CheckPersQueueConfig(const NKikimrPQ::TPQTabletConfig& config, const bool shouldHavePartitionsList = true, TString *error = nullptr); - -} // NKikimr + +} // NKikimr diff --git a/ydb/core/persqueue/config/ya.make b/ydb/core/persqueue/config/ya.make index c49022921d..08ff7c6de1 100644 --- a/ydb/core/persqueue/config/ya.make +++ b/ydb/core/persqueue/config/ya.make @@ -1,18 +1,18 @@ -LIBRARY() - -OWNER( - alexnick +LIBRARY() + +OWNER( + alexnick g:kikimr g:logbroker -) - -SRCS( - config.h - config.cpp -) - -PEERDIR( +) + +SRCS( + config.h + config.cpp +) + +PEERDIR( ydb/core/protos -) - -END() +) + +END() diff --git a/ydb/core/persqueue/events/global.h b/ydb/core/persqueue/events/global.h index 6b33226a1e..e1957e7a16 100644 --- a/ydb/core/persqueue/events/global.h +++ b/ydb/core/persqueue/events/global.h @@ -1,150 +1,150 @@ -#pragma once +#pragma once #include <ydb/core/keyvalue/defs.h> #include <ydb/core/tablet/tablet_counters.h> - + #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/core/actorid.h> #include <ydb/core/base/blobstorage.h> #include <ydb/core/protos/msgbus.pb.h> #include <ydb/public/api/protos/draft/persqueue_common.pb.h> - - -namespace NKikimr { - -struct TEvPersQueue { - enum EEv { - EvRequest = EventSpaceBegin(TKikimrEvents::ES_PQ), - EvUpdateConfig, //change config for all partitions and count of partitions - EvUpdateConfigResponse, - EvOffsets, //get offsets from all partitions in order 0..n-1 - it's for scheemeshard to change (TabletId,PartId) to Partition - EvOffsetsResponse, + + +namespace NKikimr { + +struct TEvPersQueue { + enum EEv { + EvRequest = EventSpaceBegin(TKikimrEvents::ES_PQ), + EvUpdateConfig, //change config for all partitions and count of partitions + EvUpdateConfigResponse, + EvOffsets, //get offsets from all partitions in order 0..n-1 - it's for scheemeshard to change (TabletId,PartId) to Partition + EvOffsetsResponse, EvDropTablet, EvDropTabletResult, - EvStatus, - EvStatusResponse, - EvHasDataInfo, //how much data is available to fetch from partition - EvHasDataInfoResponse, + EvStatus, + EvStatusResponse, + EvHasDataInfo, //how much data is available to fetch from partition + EvHasDataInfoResponse, EvPartitionClientInfo, EvPartitionClientInfoResponse, - EvUpdateBalancerConfig, - EvRegisterReadSession, - EvLockPartition, - EvReleasePartition, - EvPartitionReleased, - EvDescribe, - EvDescribeResponse, - EvGetReadSessionsInfo, - EvReadSessionsInfoResponse, - EvWakeupClient, + EvUpdateBalancerConfig, + EvRegisterReadSession, + EvLockPartition, + EvReleasePartition, + EvPartitionReleased, + EvDescribe, + EvDescribeResponse, + EvGetReadSessionsInfo, + EvReadSessionsInfoResponse, + EvWakeupClient, EvUpdateACL, EvCheckACL, EvCheckACLResponse, - EvError, - EvGetPartitionIdForWrite, - EvGetPartitionIdForWriteResponse, + EvError, + EvGetPartitionIdForWrite, + EvGetPartitionIdForWriteResponse, EvReportPartitionError, - EvResponse = EvRequest + 256, - EvInternalEvents = EvResponse + 256, - EvEnd - }; - - static_assert( - EvEnd < EventSpaceEnd(TKikimrEvents::ES_PQ), - "expect EvEnd < EventSpaceEnd(TKikimrEvents::ES_PQ)"); - - struct TEvRequest : public TEventPB<TEvRequest, + EvResponse = EvRequest + 256, + EvInternalEvents = EvResponse + 256, + EvEnd + }; + + static_assert( + EvEnd < EventSpaceEnd(TKikimrEvents::ES_PQ), + "expect EvEnd < EventSpaceEnd(TKikimrEvents::ES_PQ)"); + + struct TEvRequest : public TEventPB<TEvRequest, NKikimrClient::TPersQueueRequest, EvRequest> { - TEvRequest() {} - }; - - struct TEvResponse: public TEventPB<TEvResponse, + TEvRequest() {} + }; + + struct TEvResponse: public TEventPB<TEvResponse, NKikimrClient::TResponse, EvResponse> { - TEvResponse() {} - }; - - struct TEvUpdateConfig: public TEventPB<TEvUpdateConfig, - NKikimrPQ::TUpdateConfig, EvUpdateConfig> { - TEvUpdateConfig() {} - }; - - struct TEvUpdateBalancerConfig: public TEventPB<TEvUpdateBalancerConfig, - NKikimrPQ::TUpdateBalancerConfig, EvUpdateBalancerConfig> { - TEvUpdateBalancerConfig() {} - }; - - struct TEvRegisterReadSession: public TEventPB<TEvRegisterReadSession, - NKikimrPQ::TRegisterReadSession, EvRegisterReadSession> { - TEvRegisterReadSession() {} - }; - - struct TEvGetReadSessionsInfo: public TEventPB<TEvGetReadSessionsInfo, - NKikimrPQ::TGetReadSessionsInfo, EvGetReadSessionsInfo> { - TEvGetReadSessionsInfo() {} - }; - - struct TEvReadSessionsInfoResponse: public TEventPB<TEvReadSessionsInfoResponse, - NKikimrPQ::TReadSessionsInfoResponse, EvReadSessionsInfoResponse> { - TEvReadSessionsInfoResponse() {} - }; - - struct TEvLockPartition : public TEventPB<TEvLockPartition, - NKikimrPQ::TLockPartition, EvLockPartition> { - TEvLockPartition() {} - }; - - struct TEvReleasePartition : public TEventPB<TEvReleasePartition, - NKikimrPQ::TReleasePartition, EvReleasePartition> { - TEvReleasePartition() {} - }; - - struct TEvPartitionReleased : public TEventPB<TEvPartitionReleased, - NKikimrPQ::TPartitionReleased, EvPartitionReleased> { - TEvPartitionReleased() {} - }; - - struct TEvUpdateConfigResponse: public TEventPB<TEvUpdateConfigResponse, - NKikimrPQ::TUpdateConfigResponse, EvUpdateConfigResponse> { - TEvUpdateConfigResponse() {} - - ui64 GetOrigin() const { - return Record.GetOrigin(); - } - }; - - struct TEvOffsets : public TEventPB<TEvOffsets, - NKikimrPQ::TOffsets, EvOffsets> { - TEvOffsets() {} - }; - - struct TEvOffsetsResponse : public TEventPB<TEvOffsetsResponse, - NKikimrPQ::TOffsetsResponse, EvOffsetsResponse> { - TEvOffsetsResponse() {} - }; - - struct TEvStatus : public TEventPB<TEvStatus, - NKikimrPQ::TStatus, EvStatus> { - explicit TEvStatus(const TString& consumer = "") { - if (!consumer.empty()) - Record.SetClientId(consumer); - } - }; - - struct TEvStatusResponse : public TEventPB<TEvStatusResponse, - NKikimrPQ::TStatusResponse, EvStatusResponse> { - TEvStatusResponse() {} - }; - - struct TEvHasDataInfo : public TEventPB<TEvHasDataInfo, - NKikimrPQ::THasDataInfo, EvHasDataInfo> { - TEvHasDataInfo() {} - }; - - struct TEvHasDataInfoResponse : public TEventPB<TEvHasDataInfoResponse, - NKikimrPQ::THasDataInfoResponse, EvHasDataInfoResponse> { - TEvHasDataInfoResponse() {} - }; - - + TEvResponse() {} + }; + + struct TEvUpdateConfig: public TEventPB<TEvUpdateConfig, + NKikimrPQ::TUpdateConfig, EvUpdateConfig> { + TEvUpdateConfig() {} + }; + + struct TEvUpdateBalancerConfig: public TEventPB<TEvUpdateBalancerConfig, + NKikimrPQ::TUpdateBalancerConfig, EvUpdateBalancerConfig> { + TEvUpdateBalancerConfig() {} + }; + + struct TEvRegisterReadSession: public TEventPB<TEvRegisterReadSession, + NKikimrPQ::TRegisterReadSession, EvRegisterReadSession> { + TEvRegisterReadSession() {} + }; + + struct TEvGetReadSessionsInfo: public TEventPB<TEvGetReadSessionsInfo, + NKikimrPQ::TGetReadSessionsInfo, EvGetReadSessionsInfo> { + TEvGetReadSessionsInfo() {} + }; + + struct TEvReadSessionsInfoResponse: public TEventPB<TEvReadSessionsInfoResponse, + NKikimrPQ::TReadSessionsInfoResponse, EvReadSessionsInfoResponse> { + TEvReadSessionsInfoResponse() {} + }; + + struct TEvLockPartition : public TEventPB<TEvLockPartition, + NKikimrPQ::TLockPartition, EvLockPartition> { + TEvLockPartition() {} + }; + + struct TEvReleasePartition : public TEventPB<TEvReleasePartition, + NKikimrPQ::TReleasePartition, EvReleasePartition> { + TEvReleasePartition() {} + }; + + struct TEvPartitionReleased : public TEventPB<TEvPartitionReleased, + NKikimrPQ::TPartitionReleased, EvPartitionReleased> { + TEvPartitionReleased() {} + }; + + struct TEvUpdateConfigResponse: public TEventPB<TEvUpdateConfigResponse, + NKikimrPQ::TUpdateConfigResponse, EvUpdateConfigResponse> { + TEvUpdateConfigResponse() {} + + ui64 GetOrigin() const { + return Record.GetOrigin(); + } + }; + + struct TEvOffsets : public TEventPB<TEvOffsets, + NKikimrPQ::TOffsets, EvOffsets> { + TEvOffsets() {} + }; + + struct TEvOffsetsResponse : public TEventPB<TEvOffsetsResponse, + NKikimrPQ::TOffsetsResponse, EvOffsetsResponse> { + TEvOffsetsResponse() {} + }; + + struct TEvStatus : public TEventPB<TEvStatus, + NKikimrPQ::TStatus, EvStatus> { + explicit TEvStatus(const TString& consumer = "") { + if (!consumer.empty()) + Record.SetClientId(consumer); + } + }; + + struct TEvStatusResponse : public TEventPB<TEvStatusResponse, + NKikimrPQ::TStatusResponse, EvStatusResponse> { + TEvStatusResponse() {} + }; + + struct TEvHasDataInfo : public TEventPB<TEvHasDataInfo, + NKikimrPQ::THasDataInfo, EvHasDataInfo> { + TEvHasDataInfo() {} + }; + + struct TEvHasDataInfoResponse : public TEventPB<TEvHasDataInfoResponse, + NKikimrPQ::THasDataInfoResponse, EvHasDataInfoResponse> { + TEvHasDataInfoResponse() {} + }; + + struct TEvDropTablet : public TEventPB<TEvDropTablet, NKikimrPQ::TDropTablet, EvDropTablet> { TEvDropTablet() {} @@ -162,26 +162,26 @@ struct TEvPersQueue { struct TEvPartitionClientInfoResponse : TEventPB<TEvPartitionClientInfoResponse, NKikimrPQ::TClientInfoResponse, EvPartitionClientInfoResponse> { TEvPartitionClientInfoResponse() = default; }; - - struct TEvWakeupClient : TEventLocal<TEvWakeupClient, EvWakeupClient> { - TEvWakeupClient(const TString& client, const ui32 group) - : Client(client) - , Group(group) - {} - - TString Client; - ui32 Group; - }; - - struct TEvDescribe : public TEventPB<TEvDescribe, NKikimrPQ::TDescribe, EvDescribe> { - TEvDescribe() - {} - }; - - struct TEvDescribeResponse : public TEventPB<TEvDescribeResponse, NKikimrPQ::TDescribeResponse, EvDescribeResponse> { - TEvDescribeResponse() - {} - }; + + struct TEvWakeupClient : TEventLocal<TEvWakeupClient, EvWakeupClient> { + TEvWakeupClient(const TString& client, const ui32 group) + : Client(client) + , Group(group) + {} + + TString Client; + ui32 Group; + }; + + struct TEvDescribe : public TEventPB<TEvDescribe, NKikimrPQ::TDescribe, EvDescribe> { + TEvDescribe() + {} + }; + + struct TEvDescribeResponse : public TEventPB<TEvDescribeResponse, NKikimrPQ::TDescribeResponse, EvDescribeResponse> { + TEvDescribeResponse() + {} + }; struct TEvUpdateACL : public TEventLocal<TEvUpdateACL, EvUpdateACL> { TEvUpdateACL() @@ -198,25 +198,25 @@ struct TEvPersQueue { {}; }; - struct TEvError : public TEventPB<TEvError, + struct TEvError : public TEventPB<TEvError, NPersQueueCommon::TError, EvError> { - TEvError() {} - }; - - struct TEvGetPartitionIdForWrite : public TEventPB<TEvGetPartitionIdForWrite, NKikimrPQ::TGetPartitionIdForWrite, EvGetPartitionIdForWrite> { - TEvGetPartitionIdForWrite() - {} - }; - - struct TEvGetPartitionIdForWriteResponse : public TEventPB<TEvGetPartitionIdForWriteResponse, NKikimrPQ::TGetPartitionIdForWriteResponse, EvGetPartitionIdForWriteResponse> { - TEvGetPartitionIdForWriteResponse() - {}; - }; - + TEvError() {} + }; + + struct TEvGetPartitionIdForWrite : public TEventPB<TEvGetPartitionIdForWrite, NKikimrPQ::TGetPartitionIdForWrite, EvGetPartitionIdForWrite> { + TEvGetPartitionIdForWrite() + {} + }; + + struct TEvGetPartitionIdForWriteResponse : public TEventPB<TEvGetPartitionIdForWriteResponse, NKikimrPQ::TGetPartitionIdForWriteResponse, EvGetPartitionIdForWriteResponse> { + TEvGetPartitionIdForWriteResponse() + {}; + }; + struct TEvReportPartitionError : public TEventPB<TEvReportPartitionError, NKikimrPQ::TStatusResponse::TErrorMessage, EvReportPartitionError> { TEvReportPartitionError() {} }; -}; -} //NKikimr +}; +} //NKikimr diff --git a/ydb/core/persqueue/events/internal.h b/ydb/core/persqueue/events/internal.h index cba6100482..bc892988b4 100644 --- a/ydb/core/persqueue/events/internal.h +++ b/ydb/core/persqueue/events/internal.h @@ -1,5 +1,5 @@ -#pragma once - +#pragma once + #include "global.h" #include <ydb/core/protos/pqconfig.pb.h> @@ -7,29 +7,29 @@ #include <library/cpp/actors/core/event_local.h> #include <library/cpp/actors/core/actorid.h> - -#include <util/generic/maybe.h> - -namespace NKikimr { - + +#include <util/generic/maybe.h> + +namespace NKikimr { + namespace NPQ { - + struct TRequestedBlob { ui64 Offset; - ui16 PartNo; + ui16 PartNo; ui32 Count; - ui16 InternalPartsCount; - ui32 Size; + ui16 InternalPartsCount; + ui32 Size; TString Value; bool Cached; - TRequestedBlob() = delete; - + TRequestedBlob() = delete; + TRequestedBlob(ui64 offset, ui16 partNo, ui32 count, ui16 internalPartsCount, ui32 size, TString value) : Offset(offset) - , PartNo(partNo) + , PartNo(partNo) , Count(count) - , InternalPartsCount(internalPartsCount) + , InternalPartsCount(internalPartsCount) , Size(size) , Value(value) , Cached(false) @@ -37,20 +37,20 @@ namespace NPQ { }; struct TErrorInfo { - NPersQueue::NErrorCode::EErrorCode ErrorCode; + NPersQueue::NErrorCode::EErrorCode ErrorCode; TString ErrorStr; TErrorInfo() - : ErrorCode(NPersQueue::NErrorCode::OK) + : ErrorCode(NPersQueue::NErrorCode::OK) {} - TErrorInfo(NPersQueue::NErrorCode::EErrorCode err, const TString& str) + TErrorInfo(NPersQueue::NErrorCode::EErrorCode err, const TString& str) : ErrorCode(err) , ErrorStr(str) {} bool HasError() const { - return ErrorCode != NPersQueue::NErrorCode::OK; + return ErrorCode != NPersQueue::NErrorCode::OK; } }; @@ -60,40 +60,40 @@ namespace NPQ { } } -struct TEvPQ { - enum EEv { - EvWrite = TEvPersQueue::EvInternalEvents, - EvRead, - EvDie, - EvMonRequest, - EvMonResponse, - EvReadTimeout, - EvGetMaxSeqNoRequest, - EvGetClientOffset, - EvSetClientInfo, - EvPartitionOffsets, - EvPartitionOffsetsResponse, - EvPartitionStatus, - EvPartitionStatusResponse, - EvProxyResponse, - EvError, - EvBlobRequest, - EvBlobResponse, - EvInitComplete, - EvChangeOwner, - EvChangeConfig, - EvChangeCacheConfig, - EvPartitionCounters, +struct TEvPQ { + enum EEv { + EvWrite = TEvPersQueue::EvInternalEvents, + EvRead, + EvDie, + EvMonRequest, + EvMonResponse, + EvReadTimeout, + EvGetMaxSeqNoRequest, + EvGetClientOffset, + EvSetClientInfo, + EvPartitionOffsets, + EvPartitionOffsetsResponse, + EvPartitionStatus, + EvPartitionStatusResponse, + EvProxyResponse, + EvError, + EvBlobRequest, + EvBlobResponse, + EvInitComplete, + EvChangeOwner, + EvChangeConfig, + EvChangeCacheConfig, + EvPartitionCounters, EvTabletCacheCounters, - EvPartitionLabeledCounters, + EvPartitionLabeledCounters, EvGetPartitionClientInfo, - EvUpdateAvailableSize, - EvPipeDisconnected, - EvReserveBytes, - EvPartitionLabeledCountersDrop, - EvUpdateWriteTimestamp, - EvHandleWriteResponse, - EvQuotaDeadlineCheck, + EvUpdateAvailableSize, + EvPipeDisconnected, + EvReserveBytes, + EvPartitionLabeledCountersDrop, + EvUpdateWriteTimestamp, + EvHandleWriteResponse, + EvQuotaDeadlineCheck, EvRegisterMessageGroup, EvDeregisterMessageGroup, EvSplitMessageGroup, @@ -108,387 +108,387 @@ struct TEvPQ { EvCreateConsumer, EvRequestPartitionStatus, EvReaderEventArrived, - EvEnd - }; - - struct TEvHandleWriteResponse : TEventLocal<TEvHandleWriteResponse, EvHandleWriteResponse> { - TEvHandleWriteResponse() - {} - }; - - struct TEvWrite : public TEventLocal<TEvWrite, EvWrite> { - struct TMsg { + EvEnd + }; + + struct TEvHandleWriteResponse : TEventLocal<TEvHandleWriteResponse, EvHandleWriteResponse> { + TEvHandleWriteResponse() + {} + }; + + struct TEvWrite : public TEventLocal<TEvWrite, EvWrite> { + struct TMsg { TString SourceId; - ui64 SeqNo; - ui16 PartNo; - ui16 TotalParts; - ui32 TotalSize; - ui64 CreateTimestamp; - ui64 ReceiveTimestamp; + ui64 SeqNo; + ui16 PartNo; + ui16 TotalParts; + ui32 TotalSize; + ui64 CreateTimestamp; + ui64 ReceiveTimestamp; bool DisableDeduplication; - ui64 WriteTimestamp; + ui64 WriteTimestamp; TString Data; - ui32 UncompressedSize; + ui32 UncompressedSize; TString PartitionKey; TString ExplicitHashKey; bool External; - }; - + }; + TEvWrite(const ui64 cookie, const ui64 messageNo, const TString& ownerCookie, const TMaybe<ui64> offset, TVector<TMsg> &&msgs, bool isDirectWrite) - : Cookie(cookie) - , MessageNo(messageNo) - , OwnerCookie(ownerCookie) - , Offset(offset) - , Msgs(std::move(msgs)) + : Cookie(cookie) + , MessageNo(messageNo) + , OwnerCookie(ownerCookie) + , Offset(offset) + , Msgs(std::move(msgs)) , IsDirectWrite(isDirectWrite) - {} - - ui64 Cookie; - ui64 MessageNo; + {} + + ui64 Cookie; + ui64 MessageNo; TString OwnerCookie; - TMaybe<ui64> Offset; + TMaybe<ui64> Offset; TVector<TMsg> Msgs; bool IsDirectWrite; - }; - - struct TEvReadTimeout : public TEventLocal<TEvReadTimeout, EvReadTimeout> { - explicit TEvReadTimeout(const ui64 cookie) - : Cookie(cookie) - {} - - ui64 Cookie; - }; - - struct TEvRead : public TEventLocal<TEvRead, EvRead> { + }; + + struct TEvReadTimeout : public TEventLocal<TEvReadTimeout, EvReadTimeout> { + explicit TEvReadTimeout(const ui64 cookie) + : Cookie(cookie) + {} + + ui64 Cookie; + }; + + struct TEvRead : public TEventLocal<TEvRead, EvRead> { TEvRead(const ui64 cookie, const ui64 offset, const ui16 partNo, const ui32 count, const TString& sessionId, const TString& clientId, const ui32 timeout, const ui32 size, const ui32 maxTimeLagMs, const ui64 readTimestampMs, const TString& clientDC, bool externalOperation) - : Cookie(cookie) - , Offset(offset) - , PartNo(partNo) - , Count(count) - , SessionId(sessionId) - , ClientId(clientId) - , Timeout(timeout) - , Size(size) + : Cookie(cookie) + , Offset(offset) + , PartNo(partNo) + , Count(count) + , SessionId(sessionId) + , ClientId(clientId) + , Timeout(timeout) + , Size(size) , MaxTimeLagMs(maxTimeLagMs) - , ReadTimestampMs(readTimestampMs) - , ClientDC(clientDC) + , ReadTimestampMs(readTimestampMs) + , ClientDC(clientDC) , ExternalOperation(externalOperation) - {} - - ui64 Cookie; - ui64 Offset; - ui16 PartNo; - ui32 Count; + {} + + ui64 Cookie; + ui64 Offset; + ui16 PartNo; + ui32 Count; TString SessionId; TString ClientId; - ui32 Timeout; - ui32 Size; + ui32 Timeout; + ui32 Size; ui32 MaxTimeLagMs; - ui64 ReadTimestampMs; - TString ClientDC; + ui64 ReadTimestampMs; + TString ClientDC; bool ExternalOperation; - }; - - struct TEvMonRequest : public TEventLocal<TEvMonRequest, EvMonRequest> { + }; + + struct TEvMonRequest : public TEventLocal<TEvMonRequest, EvMonRequest> { TEvMonRequest(const TActorId& sender, const TString& query) - : Sender(sender) - , Query(query) - {} - + : Sender(sender) + , Query(query) + {} + TActorId Sender; TString Query; - }; - - struct TEvGetMaxSeqNoRequest : public TEventLocal<TEvGetMaxSeqNoRequest, EvGetMaxSeqNoRequest> { + }; + + struct TEvGetMaxSeqNoRequest : public TEventLocal<TEvGetMaxSeqNoRequest, EvGetMaxSeqNoRequest> { TEvGetMaxSeqNoRequest(const ui64 cookie, const TVector<TString>& sourceIds) - : Cookie(cookie) - , SourceIds(sourceIds) - {} - - ui64 Cookie; + : Cookie(cookie) + , SourceIds(sourceIds) + {} + + ui64 Cookie; TVector<TString> SourceIds; - }; - - struct TEvMonResponse : public TEventLocal<TEvMonResponse, EvMonResponse> { + }; + + struct TEvMonResponse : public TEventLocal<TEvMonResponse, EvMonResponse> { TEvMonResponse(ui32 partition, const TVector<TString>& res, const TString& str) - : Partition(partition) - , Res(res) - , Str(str) - {} - - ui32 Partition; + : Partition(partition) + , Res(res) + , Str(str) + {} + + ui32 Partition; TVector<TString> Res; TString Str; - }; - - - struct TEvSetClientInfo : public TEventLocal<TEvSetClientInfo, EvSetClientInfo> { - enum ESetClientInfoType { - ESCI_OFFSET = 0, - ESCI_CREATE_SESSION, - ESCI_DROP_SESSION, - ESCI_INIT_READ_RULE, - ESCI_DROP_READ_RULE - }; - - TEvSetClientInfo(const ui64 cookie, const TString& clientId, const ui64 offset, const TString& sessionId, - const ui32 generation, const ui32 step, ESetClientInfoType type = ESCI_OFFSET, ui64 readRuleGeneration = 0) - : Cookie(cookie) - , ClientId(clientId) - , Offset(offset) - , SessionId(sessionId) - , Generation(generation) - , Step(step) - , Type(type) - , ReadRuleGeneration(readRuleGeneration) - { - } - - ui64 Cookie; + }; + + + struct TEvSetClientInfo : public TEventLocal<TEvSetClientInfo, EvSetClientInfo> { + enum ESetClientInfoType { + ESCI_OFFSET = 0, + ESCI_CREATE_SESSION, + ESCI_DROP_SESSION, + ESCI_INIT_READ_RULE, + ESCI_DROP_READ_RULE + }; + + TEvSetClientInfo(const ui64 cookie, const TString& clientId, const ui64 offset, const TString& sessionId, + const ui32 generation, const ui32 step, ESetClientInfoType type = ESCI_OFFSET, ui64 readRuleGeneration = 0) + : Cookie(cookie) + , ClientId(clientId) + , Offset(offset) + , SessionId(sessionId) + , Generation(generation) + , Step(step) + , Type(type) + , ReadRuleGeneration(readRuleGeneration) + { + } + + ui64 Cookie; TString ClientId; - ui64 Offset; - TString SessionId; - ui32 Generation; - ui32 Step; - ESetClientInfoType Type; - ui64 ReadRuleGeneration; - }; - - struct TEvGetClientOffset : public TEventLocal<TEvGetClientOffset, EvGetClientOffset> { - TEvGetClientOffset(const ui64 cookie, const TString& clientId) - : Cookie(cookie) - , ClientId(clientId) - {} - - ui64 Cookie; + ui64 Offset; + TString SessionId; + ui32 Generation; + ui32 Step; + ESetClientInfoType Type; + ui64 ReadRuleGeneration; + }; + + struct TEvGetClientOffset : public TEventLocal<TEvGetClientOffset, EvGetClientOffset> { + TEvGetClientOffset(const ui64 cookie, const TString& clientId) + : Cookie(cookie) + , ClientId(clientId) + {} + + ui64 Cookie; TString ClientId; - }; - - - struct TEvUpdateWriteTimestamp : public TEventLocal<TEvUpdateWriteTimestamp, EvUpdateWriteTimestamp> { - TEvUpdateWriteTimestamp(const ui64 cookie, const ui64 writeTimestamp) - : Cookie(cookie) - , WriteTimestamp(writeTimestamp) - {} - - ui64 Cookie; - ui64 WriteTimestamp; - }; - - - struct TEvPartitionOffsets : public TEventLocal<TEvPartitionOffsets, EvPartitionOffsets> { + }; + + + struct TEvUpdateWriteTimestamp : public TEventLocal<TEvUpdateWriteTimestamp, EvUpdateWriteTimestamp> { + TEvUpdateWriteTimestamp(const ui64 cookie, const ui64 writeTimestamp) + : Cookie(cookie) + , WriteTimestamp(writeTimestamp) + {} + + ui64 Cookie; + ui64 WriteTimestamp; + }; + + + struct TEvPartitionOffsets : public TEventLocal<TEvPartitionOffsets, EvPartitionOffsets> { TEvPartitionOffsets(const TActorId& sender, const TString& clientId) - : Sender(sender) - , ClientId(clientId) - {} - + : Sender(sender) + , ClientId(clientId) + {} + TActorId Sender; TString ClientId; - }; - - struct TEvPartitionOffsetsResponse : public TEventLocal<TEvPartitionOffsetsResponse, EvPartitionOffsetsResponse> { - explicit TEvPartitionOffsetsResponse(NKikimrPQ::TOffsetsResponse::TPartResult& partResult) - : PartResult(partResult) - {} - - NKikimrPQ::TOffsetsResponse::TPartResult PartResult; - }; - - struct TEvPartitionStatus : public TEventLocal<TEvPartitionStatus, EvPartitionStatus> { + }; + + struct TEvPartitionOffsetsResponse : public TEventLocal<TEvPartitionOffsetsResponse, EvPartitionOffsetsResponse> { + explicit TEvPartitionOffsetsResponse(NKikimrPQ::TOffsetsResponse::TPartResult& partResult) + : PartResult(partResult) + {} + + NKikimrPQ::TOffsetsResponse::TPartResult PartResult; + }; + + struct TEvPartitionStatus : public TEventLocal<TEvPartitionStatus, EvPartitionStatus> { explicit TEvPartitionStatus(const TActorId& sender, const TString& clientId) - : Sender(sender) - , ClientId(clientId) - {} - + : Sender(sender) + , ClientId(clientId) + {} + TActorId Sender; - TString ClientId; - }; - - struct TEvPartitionStatusResponse : public TEventLocal<TEvPartitionStatusResponse, EvPartitionStatusResponse> { - explicit TEvPartitionStatusResponse(NKikimrPQ::TStatusResponse::TPartResult& partResult) - : PartResult(partResult) - {} - - NKikimrPQ::TStatusResponse::TPartResult PartResult; - }; - - - struct TEvProxyResponse : public TEventLocal<TEvProxyResponse, EvProxyResponse> { - TEvProxyResponse(ui64 cookie) - : Cookie(cookie) - {} - ui64 Cookie; + TString ClientId; + }; + + struct TEvPartitionStatusResponse : public TEventLocal<TEvPartitionStatusResponse, EvPartitionStatusResponse> { + explicit TEvPartitionStatusResponse(NKikimrPQ::TStatusResponse::TPartResult& partResult) + : PartResult(partResult) + {} + + NKikimrPQ::TStatusResponse::TPartResult PartResult; + }; + + + struct TEvProxyResponse : public TEventLocal<TEvProxyResponse, EvProxyResponse> { + TEvProxyResponse(ui64 cookie) + : Cookie(cookie) + {} + ui64 Cookie; NKikimrClient::TResponse Response; - }; - - struct TEvInitComplete : public TEventLocal<TEvInitComplete, EvInitComplete> { - explicit TEvInitComplete(const ui32 partition) - : Partition(partition) - {} - - ui32 Partition; - }; - - struct TEvError : public TEventLocal<TEvError, EvError> { - TEvError(const NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error, ui64 cookie) - : ErrorCode(errorCode) - , Error(error) - , Cookie(cookie) - {} - - NPersQueue::NErrorCode::EErrorCode ErrorCode; + }; + + struct TEvInitComplete : public TEventLocal<TEvInitComplete, EvInitComplete> { + explicit TEvInitComplete(const ui32 partition) + : Partition(partition) + {} + + ui32 Partition; + }; + + struct TEvError : public TEventLocal<TEvError, EvError> { + TEvError(const NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error, ui64 cookie) + : ErrorCode(errorCode) + , Error(error) + , Cookie(cookie) + {} + + NPersQueue::NErrorCode::EErrorCode ErrorCode; TString Error; - ui64 Cookie; - }; - - struct TEvBlobRequest : public TEventLocal<TEvBlobRequest, EvBlobRequest> { + ui64 Cookie; + }; + + struct TEvBlobRequest : public TEventLocal<TEvBlobRequest, EvBlobRequest> { TEvBlobRequest(const TString& user, const ui64 cookie, const ui32 partition, const ui64 readOffset, TVector<NPQ::TRequestedBlob>&& blobs) - : User(user) - , Cookie(cookie) - , Partition(partition) - , ReadOffset(readOffset) - , Blobs(std::move(blobs)) - {} - + : User(user) + , Cookie(cookie) + , Partition(partition) + , ReadOffset(readOffset) + , Blobs(std::move(blobs)) + {} + TString User; - ui64 Cookie; - ui32 Partition; - ui64 ReadOffset; + ui64 Cookie; + ui32 Partition; + ui64 ReadOffset; TVector<NPQ::TRequestedBlob> Blobs; - }; - - class TEvBlobResponse : public TEventLocal<TEvBlobResponse, EvBlobResponse> { - public: + }; + + class TEvBlobResponse : public TEventLocal<TEvBlobResponse, EvBlobResponse> { + public: NPQ::TErrorInfo Error; TEvBlobResponse(const ui64 cookie, TVector<NPQ::TRequestedBlob>&& blobs, NPQ::TErrorInfo error = NPQ::TErrorInfo()) : Error(error) , Cookie(cookie) - , Blobs(std::move(blobs)) - {} - - ui64 GetCookie() const - { - return Cookie; - } - + , Blobs(std::move(blobs)) + {} + + ui64 GetCookie() const + { + return Cookie; + } + const TVector<NPQ::TRequestedBlob>& GetBlobs() const - { - return Blobs; - } - - void Check() const - { + { + return Blobs; + } + + void Check() const + { //error or empty response(all from cache) or not empty response at all Y_VERIFY(Error.HasError() || Blobs.empty() || !Blobs[0].Value.empty(), - "Cookie %" PRIu64 " Error code: %" PRIu32 ", blobs count: %" PRIu64, Cookie, Error.ErrorCode, Blobs.size()); - } - - private: - ui64 Cookie; + "Cookie %" PRIu64 " Error code: %" PRIu32 ", blobs count: %" PRIu64, Cookie, Error.ErrorCode, Blobs.size()); + } + + private: + ui64 Cookie; TVector<NPQ::TRequestedBlob> Blobs; - }; - - struct TEvChangeOwner : public TEventLocal<TEvChangeOwner, EvChangeOwner> { + }; + + struct TEvChangeOwner : public TEventLocal<TEvChangeOwner, EvChangeOwner> { explicit TEvChangeOwner(const ui64 cookie, const TString& owner, const TActorId& pipeClient, const TActorId& sender, const bool force) - : Cookie(cookie) - , Owner(owner) - , PipeClient(pipeClient) - , Sender(sender) - , Force(force) - {} - - ui64 Cookie; - TString Owner; + : Cookie(cookie) + , Owner(owner) + , PipeClient(pipeClient) + , Sender(sender) + , Force(force) + {} + + ui64 Cookie; + TString Owner; TActorId PipeClient; TActorId Sender; - bool Force; - }; - - struct TEvPipeDisconnected : public TEventLocal<TEvPipeDisconnected, EvPipeDisconnected> { + bool Force; + }; + + struct TEvPipeDisconnected : public TEventLocal<TEvPipeDisconnected, EvPipeDisconnected> { explicit TEvPipeDisconnected(const TString& owner, const TActorId& pipeClient) - : Owner(owner) - , PipeClient(pipeClient) - {} - - TString Owner; + : Owner(owner) + , PipeClient(pipeClient) + {} + + TString Owner; TActorId PipeClient; - }; - - struct TEvReserveBytes : public TEventLocal<TEvReserveBytes, EvReserveBytes> { - explicit TEvReserveBytes(const ui64 cookie, const ui32 size, const TString& ownerCookie, const ui64 messageNo, bool lastRequest) - : Cookie(cookie) - , Size(size) - , OwnerCookie(ownerCookie) - , MessageNo(messageNo) - , LastRequest(lastRequest) - {} - - ui64 Cookie; - ui32 Size; - TString OwnerCookie; - ui64 MessageNo; - bool LastRequest; - }; - - - struct TEvChangeConfig : public TEventLocal<TEvChangeConfig, EvChangeConfig> { - TEvChangeConfig(const TString& topicName, const NKikimrPQ::TPQTabletConfig& config) + }; + + struct TEvReserveBytes : public TEventLocal<TEvReserveBytes, EvReserveBytes> { + explicit TEvReserveBytes(const ui64 cookie, const ui32 size, const TString& ownerCookie, const ui64 messageNo, bool lastRequest) + : Cookie(cookie) + , Size(size) + , OwnerCookie(ownerCookie) + , MessageNo(messageNo) + , LastRequest(lastRequest) + {} + + ui64 Cookie; + ui32 Size; + TString OwnerCookie; + ui64 MessageNo; + bool LastRequest; + }; + + + struct TEvChangeConfig : public TEventLocal<TEvChangeConfig, EvChangeConfig> { + TEvChangeConfig(const TString& topicName, const NKikimrPQ::TPQTabletConfig& config) : TopicName(topicName) , Config(config) - {} - + {} + TString TopicName; - NKikimrPQ::TPQTabletConfig Config; - }; - - struct TEvChangeCacheConfig : public TEventLocal<TEvChangeCacheConfig, EvChangeCacheConfig> { - explicit TEvChangeCacheConfig(ui32 maxSize) - : MaxSize(maxSize) - {} - - ui32 MaxSize; - }; - - struct TEvPartitionCounters : public TEventLocal<TEvPartitionCounters, EvPartitionCounters> { - TEvPartitionCounters(const ui32 partition, const TTabletCountersBase& counters) - : Partition(partition) - { - Counters.Populate(counters); - } - - const ui32 Partition; - TTabletCountersBase Counters; - }; - - struct TEvPartitionLabeledCounters : public TEventLocal<TEvPartitionLabeledCounters, EvPartitionLabeledCounters> { - TEvPartitionLabeledCounters(const ui32 partition, const TTabletLabeledCountersBase& labeledCounters) - : Partition(partition) - , LabeledCounters(labeledCounters) - { - } - - const ui32 Partition; - TTabletLabeledCountersBase LabeledCounters; - }; - - struct TEvPartitionLabeledCountersDrop : public TEventLocal<TEvPartitionLabeledCountersDrop, EvPartitionLabeledCountersDrop> { - TEvPartitionLabeledCountersDrop(const ui32 partition, const TString& group) - : Partition(partition) - , Group(group) - { - } - - const ui32 Partition; - TString Group; - }; - - + NKikimrPQ::TPQTabletConfig Config; + }; + + struct TEvChangeCacheConfig : public TEventLocal<TEvChangeCacheConfig, EvChangeCacheConfig> { + explicit TEvChangeCacheConfig(ui32 maxSize) + : MaxSize(maxSize) + {} + + ui32 MaxSize; + }; + + struct TEvPartitionCounters : public TEventLocal<TEvPartitionCounters, EvPartitionCounters> { + TEvPartitionCounters(const ui32 partition, const TTabletCountersBase& counters) + : Partition(partition) + { + Counters.Populate(counters); + } + + const ui32 Partition; + TTabletCountersBase Counters; + }; + + struct TEvPartitionLabeledCounters : public TEventLocal<TEvPartitionLabeledCounters, EvPartitionLabeledCounters> { + TEvPartitionLabeledCounters(const ui32 partition, const TTabletLabeledCountersBase& labeledCounters) + : Partition(partition) + , LabeledCounters(labeledCounters) + { + } + + const ui32 Partition; + TTabletLabeledCountersBase LabeledCounters; + }; + + struct TEvPartitionLabeledCountersDrop : public TEventLocal<TEvPartitionLabeledCountersDrop, EvPartitionLabeledCountersDrop> { + TEvPartitionLabeledCountersDrop(const ui32 partition, const TString& group) + : Partition(partition) + , Group(group) + { + } + + const ui32 Partition; + TString Group; + }; + + struct TEvTabletCacheCounters : public TEventLocal<TEvTabletCacheCounters, EvTabletCacheCounters> { struct TCacheCounters { ui64 CacheSizeBytes = 0; @@ -510,16 +510,16 @@ struct TEvPQ { TActorId Sender; }; - - struct TEvUpdateAvailableSize : TEventLocal<TEvUpdateAvailableSize, EvUpdateAvailableSize> { - TEvUpdateAvailableSize() - {} - }; - - struct TEvQuotaDeadlineCheck : TEventLocal<TEvQuotaDeadlineCheck, EvQuotaDeadlineCheck> { - TEvQuotaDeadlineCheck() - {} - }; + + struct TEvUpdateAvailableSize : TEventLocal<TEvUpdateAvailableSize, EvUpdateAvailableSize> { + TEvUpdateAvailableSize() + {} + }; + + struct TEvQuotaDeadlineCheck : TEventLocal<TEvQuotaDeadlineCheck, EvQuotaDeadlineCheck> { + TEvQuotaDeadlineCheck() + {} + }; struct TEvRegisterMessageGroup : TEventLocal<TEvRegisterMessageGroup, EvRegisterMessageGroup> { struct TBody { @@ -619,5 +619,5 @@ struct TEvPQ { {} }; }; - -} //NKikimr + +} //NKikimr diff --git a/ydb/core/persqueue/header.cpp b/ydb/core/persqueue/header.cpp index 77799523b3..60f20f3d4e 100644 --- a/ydb/core/persqueue/header.cpp +++ b/ydb/core/persqueue/header.cpp @@ -1,31 +1,31 @@ -#include "header.h" +#include "header.h" #include <google/protobuf/io/coded_stream.h> -#include <util/generic/buffer.h> +#include <util/generic/buffer.h> #include <util/system/unaligned_mem.h> - -namespace NKikimr { -namespace NPQ { - - -const ui32 MAX_HEADER_SIZE = 32; // max TBatchHeader size - - -ui32 GetMaxHeaderSize() { - return MAX_HEADER_SIZE; -} - -NKikimrPQ::TBatchHeader ExtractHeader(const char *data, ui32 size) { + +namespace NKikimr { +namespace NPQ { + + +const ui32 MAX_HEADER_SIZE = 32; // max TBatchHeader size + + +ui32 GetMaxHeaderSize() { + return MAX_HEADER_SIZE; +} + +NKikimrPQ::TBatchHeader ExtractHeader(const char *data, ui32 size) { ui16 sz = ReadUnaligned<ui16>(data); - Y_VERIFY(sz < size); - data += sizeof(ui16); - NKikimrPQ::TBatchHeader header; - bool res = header.ParseFromArray(data, sz); - Y_VERIFY(res); - Y_VERIFY((ui32)header.ByteSize() == sz); - - Y_VERIFY(header.ByteSize() + header.GetPayloadSize() + sizeof(ui16) <= size); - return header; -} - -}// NPQ -}// NKikimr + Y_VERIFY(sz < size); + data += sizeof(ui16); + NKikimrPQ::TBatchHeader header; + bool res = header.ParseFromArray(data, sz); + Y_VERIFY(res); + Y_VERIFY((ui32)header.ByteSize() == sz); + + Y_VERIFY(header.ByteSize() + header.GetPayloadSize() + sizeof(ui16) <= size); + return header; +} + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/header.h b/ydb/core/persqueue/header.h index 1c32a8e673..c9c4e2713c 100644 --- a/ydb/core/persqueue/header.h +++ b/ydb/core/persqueue/header.h @@ -1,12 +1,12 @@ -#pragma once +#pragma once #include <ydb/core/protos/pqconfig.pb.h> - -namespace NKikimr { -namespace NPQ { - -ui32 GetMaxHeaderSize(); - -NKikimrPQ::TBatchHeader ExtractHeader(const char* buffer, ui32 size); - -}// NPQ -}// NKikimr + +namespace NKikimr { +namespace NPQ { + +ui32 GetMaxHeaderSize(); + +NKikimrPQ::TBatchHeader ExtractHeader(const char* buffer, ui32 size); + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/internals_ut.cpp b/ydb/core/persqueue/internals_ut.cpp index 55fa7fdaca..d72ffe5e11 100644 --- a/ydb/core/persqueue/internals_ut.cpp +++ b/ydb/core/persqueue/internals_ut.cpp @@ -1,208 +1,208 @@ -#include "blob.h" +#include "blob.h" #include <library/cpp/testing/unittest/registar.h> - -namespace NKikimr { -namespace NPQ { -namespace { - + +namespace NKikimr { +namespace NPQ { +namespace { + Y_UNIT_TEST_SUITE(TPQTestInternal) { - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TEST CASES -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TEST CASES +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + Y_UNIT_TEST(TestPartitionedBlobSimpleTest) { - THead head; - THead newHead; - - TPartitionedBlob blob(0, 0, "sourceId", 1, 1, 10, head, newHead, false, false, 8 << 20); + THead head; + THead newHead; + + TPartitionedBlob blob(0, 0, "sourceId", 1, 1, 10, head, newHead, false, false, 8 << 20); TClientBlob clientBlob("sourceId", 1, "valuevalue", TMaybe<TPartData>(), TInstant::MilliSeconds(1), TInstant::MilliSeconds(1), 0, "123", "123"); - UNIT_ASSERT(blob.IsInited()); + UNIT_ASSERT(blob.IsInited()); TString error; - UNIT_ASSERT(blob.IsNextPart("sourceId", 1, 0, &error)); - - blob.Add(std::move(clientBlob)); - UNIT_ASSERT(blob.IsComplete()); - UNIT_ASSERT(blob.GetFormedBlobs().empty()); - UNIT_ASSERT(blob.GetClientBlobs().size() == 1); -} - -void Test(bool headCompacted, ui32 parts, ui32 partSize, ui32 leftInHead) -{ + UNIT_ASSERT(blob.IsNextPart("sourceId", 1, 0, &error)); + + blob.Add(std::move(clientBlob)); + UNIT_ASSERT(blob.IsComplete()); + UNIT_ASSERT(blob.GetFormedBlobs().empty()); + UNIT_ASSERT(blob.GetClientBlobs().size() == 1); +} + +void Test(bool headCompacted, ui32 parts, ui32 partSize, ui32 leftInHead) +{ TVector<TClientBlob> all; - - THead head; - head.Offset = 100; + + THead head; + head.Offset = 100; TString value(102400, 'a'); head.Batches.push_back(TBatch(head.Offset, 0, TVector<TClientBlob>())); - for (ui32 i = 0; i < 50; ++i) { + for (ui32 i = 0; i < 50; ++i) { head.Batches.back().AddBlob(TClientBlob( "sourceId" + TString(1,'a' + rand() % 26), i + 1, value, TMaybe<TPartData>(), TInstant::MilliSeconds(i + 1), TInstant::MilliSeconds(i + 1), 1, "", "" )); - if (!headCompacted) - all.push_back(head.Batches.back().Blobs.back()); - } - head.Batches.back().Pack(); - UNIT_ASSERT(head.Batches.back().Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); - head.Batches.back().Unpack(); - head.Batches.back().Pack(); + if (!headCompacted) + all.push_back(head.Batches.back().Blobs.back()); + } + head.Batches.back().Pack(); + UNIT_ASSERT(head.Batches.back().Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); + head.Batches.back().Unpack(); + head.Batches.back().Pack(); TString str = head.Batches.back().Serialize(); auto header = ExtractHeader(str.c_str(), str.size()); - TBatch batch(header, str.c_str() + header.ByteSize() + sizeof(ui16)); - batch.Unpack(); - - head.PackedSize = head.Batches.back().GetPackedSize(); - UNIT_ASSERT(head.Batches.back().GetUnpackedSize() + GetMaxHeaderSize() >= head.Batches.back().GetPackedSize()); - THead newHead; - newHead.Offset = head.GetNextOffset(); + TBatch batch(header, str.c_str() + header.ByteSize() + sizeof(ui16)); + batch.Unpack(); + + head.PackedSize = head.Batches.back().GetPackedSize(); + UNIT_ASSERT(head.Batches.back().GetUnpackedSize() + GetMaxHeaderSize() >= head.Batches.back().GetPackedSize()); + THead newHead; + newHead.Offset = head.GetNextOffset(); newHead.Batches.push_back(TBatch(newHead.Offset, 0, TVector<TClientBlob>())); - for (ui32 i = 0; i < 10; ++i) { + for (ui32 i = 0; i < 10; ++i) { newHead.Batches.back().AddBlob(TClientBlob( "sourceId2", i + 1, value, TMaybe<TPartData>(), TInstant::MilliSeconds(i + 1000), TInstant::MilliSeconds(i + 1000), 1, "", "" )); - all.push_back(newHead.Batches.back().Blobs.back()); //newHead always glued - } - newHead.PackedSize = newHead.Batches.back().GetUnpackedSize(); + all.push_back(newHead.Batches.back().Blobs.back()); //newHead always glued + } + newHead.PackedSize = newHead.Batches.back().GetUnpackedSize(); TString value2(partSize, 'b'); - ui32 maxBlobSize = 8 << 20; - TPartitionedBlob blob(0, newHead.GetNextOffset(), "sourceId3", 1, parts, parts * value2.size(), head, newHead, headCompacted, false, maxBlobSize); - + ui32 maxBlobSize = 8 << 20; + TPartitionedBlob blob(0, newHead.GetNextOffset(), "sourceId3", 1, parts, parts * value2.size(), head, newHead, headCompacted, false, maxBlobSize); + TVector<std::pair<TKey, TString>> formed; - + TString error; - for (ui32 i = 0; i < parts; ++i) { - UNIT_ASSERT(!blob.IsComplete()); - UNIT_ASSERT(blob.IsNextPart("sourceId3", 1, i, &error)); - TMaybe<TPartData> partData = TPartData(i, parts, value2.size()); + for (ui32 i = 0; i < parts; ++i) { + UNIT_ASSERT(!blob.IsComplete()); + UNIT_ASSERT(blob.IsNextPart("sourceId3", 1, i, &error)); + TMaybe<TPartData> partData = TPartData(i, parts, value2.size()); TClientBlob clientBlob( "soruceId3", 1, value2, std::move(partData), TInstant::MilliSeconds(1), TInstant::MilliSeconds(1), 1, "", "" ); - all.push_back(clientBlob); - auto res = blob.Add(std::move(clientBlob)); - if (!res.second.empty()) - formed.push_back(res); - } - UNIT_ASSERT(blob.IsComplete()); - UNIT_ASSERT(formed.size() == blob.GetFormedBlobs().size()); - for (ui32 i = 0; i < formed.size(); ++i) { - UNIT_ASSERT(formed[i].first == blob.GetFormedBlobs()[i].first); - UNIT_ASSERT(formed[i].second.size() == blob.GetFormedBlobs()[i].second); - UNIT_ASSERT(formed[i].second.size() <= 8 * 1024 * 1024); - UNIT_ASSERT(formed[i].second.size() > 6 * 1024 * 1024); - } + all.push_back(clientBlob); + auto res = blob.Add(std::move(clientBlob)); + if (!res.second.empty()) + formed.push_back(res); + } + UNIT_ASSERT(blob.IsComplete()); + UNIT_ASSERT(formed.size() == blob.GetFormedBlobs().size()); + for (ui32 i = 0; i < formed.size(); ++i) { + UNIT_ASSERT(formed[i].first == blob.GetFormedBlobs()[i].first); + UNIT_ASSERT(formed[i].second.size() == blob.GetFormedBlobs()[i].second); + UNIT_ASSERT(formed[i].second.size() <= 8 * 1024 * 1024); + UNIT_ASSERT(formed[i].second.size() > 6 * 1024 * 1024); + } TVector<TClientBlob> real; - ui32 nextOffset = headCompacted ? newHead.Offset : head.Offset; - for (auto& p : formed) { - const char* data = p.second.c_str(); - const char* end = data + p.second.size(); - ui64 offset = p.first.GetOffset(); - UNIT_ASSERT(offset == nextOffset); - while(data < end) { - auto header = ExtractHeader(data, end - data); - UNIT_ASSERT(header.GetOffset() == nextOffset); - nextOffset += header.GetCount(); - data += header.ByteSize() + sizeof(ui16); - TBatch batch(header, data); - data += header.GetPayloadSize(); - batch.Unpack(); - for (auto& b: batch.Blobs) { - real.push_back(b); - } - } - } - ui32 s = 0; - ui32 c = 0; - - if (formed.empty()) { //nothing compacted - newHead must be here - - if (!headCompacted) { - for (auto& p : head.Batches) { - p.Unpack(); - for (const auto& b : p.Blobs) - real.push_back(b); - } - } - - for (auto& p : newHead.Batches) { - p.Unpack(); - for (const auto& b : p.Blobs) - real.push_back(b); - } - } - - for (const auto& p : blob.GetClientBlobs()) { - real.push_back(p); - c++; - s += p.GetBlobSize(); - } - - UNIT_ASSERT(c == leftInHead); - UNIT_ASSERT(s + GetMaxHeaderSize() <= maxBlobSize); - UNIT_ASSERT(real.size() == all.size()); - for (ui32 i = 0; i < all.size(); ++i) { - UNIT_ASSERT(all[i].SourceId == real[i].SourceId); - UNIT_ASSERT(all[i].SeqNo == real[i].SeqNo); - UNIT_ASSERT(all[i].Data == real[i].Data); - UNIT_ASSERT(all[i].PartData.Defined() == real[i].PartData.Defined()); - if (all[i].PartData.Defined()) { - UNIT_ASSERT(all[i].PartData->PartNo == real[i].PartData->PartNo); - UNIT_ASSERT(all[i].PartData->TotalParts == real[i].PartData->TotalParts); - UNIT_ASSERT(all[i].PartData->TotalSize == real[i].PartData->TotalSize); - } - - } -} - + ui32 nextOffset = headCompacted ? newHead.Offset : head.Offset; + for (auto& p : formed) { + const char* data = p.second.c_str(); + const char* end = data + p.second.size(); + ui64 offset = p.first.GetOffset(); + UNIT_ASSERT(offset == nextOffset); + while(data < end) { + auto header = ExtractHeader(data, end - data); + UNIT_ASSERT(header.GetOffset() == nextOffset); + nextOffset += header.GetCount(); + data += header.ByteSize() + sizeof(ui16); + TBatch batch(header, data); + data += header.GetPayloadSize(); + batch.Unpack(); + for (auto& b: batch.Blobs) { + real.push_back(b); + } + } + } + ui32 s = 0; + ui32 c = 0; + + if (formed.empty()) { //nothing compacted - newHead must be here + + if (!headCompacted) { + for (auto& p : head.Batches) { + p.Unpack(); + for (const auto& b : p.Blobs) + real.push_back(b); + } + } + + for (auto& p : newHead.Batches) { + p.Unpack(); + for (const auto& b : p.Blobs) + real.push_back(b); + } + } + + for (const auto& p : blob.GetClientBlobs()) { + real.push_back(p); + c++; + s += p.GetBlobSize(); + } + + UNIT_ASSERT(c == leftInHead); + UNIT_ASSERT(s + GetMaxHeaderSize() <= maxBlobSize); + UNIT_ASSERT(real.size() == all.size()); + for (ui32 i = 0; i < all.size(); ++i) { + UNIT_ASSERT(all[i].SourceId == real[i].SourceId); + UNIT_ASSERT(all[i].SeqNo == real[i].SeqNo); + UNIT_ASSERT(all[i].Data == real[i].Data); + UNIT_ASSERT(all[i].PartData.Defined() == real[i].PartData.Defined()); + if (all[i].PartData.Defined()) { + UNIT_ASSERT(all[i].PartData->PartNo == real[i].PartData->PartNo); + UNIT_ASSERT(all[i].PartData->TotalParts == real[i].PartData->TotalParts); + UNIT_ASSERT(all[i].PartData->TotalSize == real[i].PartData->TotalSize); + } + + } +} + Y_UNIT_TEST(TestPartitionedBigTest) { - - Test(true, 100, 400*1024, 3); - Test(false, 100, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 16); //serialized size of client blob is 512*1024 - 100 - Test(false, 101, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 1); //serialized size of client blob is 512*1024 - 100 - Test(false, 1, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 1); //serialized size of client blob is 512*1024 - 100 - Test(true, 1, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 1); //serialized size of client blob is 512*1024 - 100 - Test(true, 101, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 7); //serialized size of client blob is 512*1024 - 100 -} - + + Test(true, 100, 400*1024, 3); + Test(false, 100, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 16); //serialized size of client blob is 512*1024 - 100 + Test(false, 101, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 1); //serialized size of client blob is 512*1024 - 100 + Test(false, 1, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 1); //serialized size of client blob is 512*1024 - 100 + Test(true, 1, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 1); //serialized size of client blob is 512*1024 - 100 + Test(true, 101, 512*1024 - 9 - sizeof(ui64) - sizeof(ui16) - 100, 7); //serialized size of client blob is 512*1024 - 100 +} + Y_UNIT_TEST(TestBatchPacking) { TString value(10, 'a'); - TBatch batch; - for (ui32 i = 0; i < 100; ++i) { + TBatch batch; + for (ui32 i = 0; i < 100; ++i) { batch.AddBlob(TClientBlob( "sourceId1", i + 1, value, TMaybe<TPartData>(), TInstant::MilliSeconds(1), TInstant::MilliSeconds(1), 0, "", "" )); - } - batch.Pack(); + } + batch.Pack(); TString s = batch.PackedData; - UNIT_ASSERT(batch.Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); - batch.Unpack(); - batch.Pack(); - UNIT_ASSERT(batch.PackedData == s); + UNIT_ASSERT(batch.Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); + batch.Unpack(); + batch.Pack(); + UNIT_ASSERT(batch.PackedData == s); TString str = batch.Serialize(); auto header = ExtractHeader(str.c_str(), str.size()); - TBatch batch2(header, str.c_str() + header.ByteSize() + sizeof(ui16)); - batch2.Unpack(); - Y_VERIFY(batch2.Blobs.size() == 100); - - TBatch batch3; + TBatch batch2(header, str.c_str() + header.ByteSize() + sizeof(ui16)); + batch2.Unpack(); + Y_VERIFY(batch2.Blobs.size() == 100); + + TBatch batch3; batch3.AddBlob(TClientBlob( "sourceId", 999999999999999ll, "abacaba", TPartData{33,66,4000000000u}, TInstant::MilliSeconds(999999999999ll), TInstant::MilliSeconds(1000), 0, "", "" )); - batch3.Pack(); - UNIT_ASSERT(batch3.Header.GetFormat() == NKikimrPQ::TBatchHeader::EUncompressed); - batch3.Unpack(); - Y_VERIFY(batch3.Blobs.size() == 1); -} - - + batch3.Pack(); + UNIT_ASSERT(batch3.Header.GetFormat() == NKikimrPQ::TBatchHeader::EUncompressed); + batch3.Unpack(); + Y_VERIFY(batch3.Blobs.size() == 1); +} + + } //Y_UNIT_TEST_SUITE - - -} // TInternalsTest -} // NPQ -} // NKikimr + + +} // TInternalsTest +} // NPQ +} // NKikimr diff --git a/ydb/core/persqueue/key.h b/ydb/core/persqueue/key.h index da0623d1c0..098c67cf7d 100644 --- a/ydb/core/persqueue/key.h +++ b/ydb/core/persqueue/key.h @@ -1,250 +1,250 @@ -#pragma once - +#pragma once + #include <util/generic/buffer.h> #include <util/string/cast.h> #include <util/string/printf.h> -namespace NKikimr { -namespace NPQ { - -// {char type; ui32 partiton; (char mark)} -class TKeyPrefix : public TBuffer -{ -public: - enum EType : char { - TypeNone = 0, - TypeInfo = 'm', - TypeData = 'd', - TypeTmpData = 'x', - TypeMeta = 'i' - }; - - enum EMark : char { +namespace NKikimr { +namespace NPQ { + +// {char type; ui32 partiton; (char mark)} +class TKeyPrefix : public TBuffer +{ +public: + enum EType : char { + TypeNone = 0, + TypeInfo = 'm', + TypeData = 'd', + TypeTmpData = 'x', + TypeMeta = 'i' + }; + + enum EMark : char { MarkUserDeprecated = 'u', MarkSourceId = 's', MarkProtoSourceId = 'p', MarkUser = 'c' - }; - - TKeyPrefix(EType type, const ui32 partition) - : Partition(partition) - { - Resize(UnmarkedSize()); - *PtrType() = type; + }; + + TKeyPrefix(EType type, const ui32 partition) + : Partition(partition) + { + Resize(UnmarkedSize()); + *PtrType() = type; memcpy(PtrPartition(), Sprintf("%.10" PRIu32, partition).data(), 10); - } - - TKeyPrefix(EType type, const ui32 partition, EMark mark) - : TKeyPrefix(type, partition) - { - Resize(MarkedSize()); - *PtrMark() = mark; - } - - TKeyPrefix() - : TKeyPrefix(TypeNone, 0) - {} - - virtual ~TKeyPrefix() - {} - - bool Marked(EMark mark) { - if (Size() >= MarkedSize()) - return *PtrMark() == mark; - return false; - } - - static constexpr ui32 MarkPosition() { return UnmarkedSize(); } - static constexpr ui32 MarkedSize() { return UnmarkedSize() + 1; } - - void SetType(EType type) { - *PtrType() = type; - } - - EType GetType() const { - return EType(*PtrType()); - } - - ui32 GetPartition() const { return Partition; } - -protected: - static constexpr ui32 UnmarkedSize() { return 1 + 10; } - - void ParsePartition() - { - Partition = FromString<ui32>(TStringBuf{PtrPartition(), 10}); - } -private: - char* PtrType() { return Data(); } - char* PtrMark() { return Data() + UnmarkedSize(); } - char* PtrPartition() { return Data() + 1; } - - const char* PtrType() const { return Data(); } - const char* PtrMark() const { return Data() + UnmarkedSize(); } - const char* PtrPartition() const { return Data() + 1; } - - ui32 Partition; -}; - -// {char type; ui32 partiton; ui64 offset; ui16 partNo; ui32 count, ui16 internalPartsCount} -// offset, partNo - index of first rec -// count - diff of last record offset and first record offset in blob -// internalPartsCount - number of internal parts -// A4|A5B1B2C1C2C3|D1 - Offset A, partNo 5, count 2, internalPartsCount 3 -// ^ ^ ^ ^ -// internalparts -class TKey : public TKeyPrefix -{ -public: - TKey(EType type, const ui32 partition, const ui64 offset, const ui16 partNo, const ui32 count, const ui16 internalPartsCount, const bool isHead = false) - : TKeyPrefix(type, partition) - , Offset(offset) - , Count(count) - , PartNo(partNo) - , InternalPartsCount(internalPartsCount) - { - Resize(KeySize()); - *(PtrOffset() - 1) = *(PtrCount() - 1) = *(PtrPartNo() - 1) = *(PtrInternalPartsCount() - 1) = '_'; - SetOffset(offset); - SetPartNo(partNo); - SetCount(count); - SetInternalPartsCount(InternalPartsCount); - SetHead(isHead); - } - - TKey(const TKey& key) - : TKey(key.GetType(), key.GetPartition(), key.Offset, key.PartNo, key.Count, key.InternalPartsCount, key.IsHead()) - { - } - + } + + TKeyPrefix(EType type, const ui32 partition, EMark mark) + : TKeyPrefix(type, partition) + { + Resize(MarkedSize()); + *PtrMark() = mark; + } + + TKeyPrefix() + : TKeyPrefix(TypeNone, 0) + {} + + virtual ~TKeyPrefix() + {} + + bool Marked(EMark mark) { + if (Size() >= MarkedSize()) + return *PtrMark() == mark; + return false; + } + + static constexpr ui32 MarkPosition() { return UnmarkedSize(); } + static constexpr ui32 MarkedSize() { return UnmarkedSize() + 1; } + + void SetType(EType type) { + *PtrType() = type; + } + + EType GetType() const { + return EType(*PtrType()); + } + + ui32 GetPartition() const { return Partition; } + +protected: + static constexpr ui32 UnmarkedSize() { return 1 + 10; } + + void ParsePartition() + { + Partition = FromString<ui32>(TStringBuf{PtrPartition(), 10}); + } +private: + char* PtrType() { return Data(); } + char* PtrMark() { return Data() + UnmarkedSize(); } + char* PtrPartition() { return Data() + 1; } + + const char* PtrType() const { return Data(); } + const char* PtrMark() const { return Data() + UnmarkedSize(); } + const char* PtrPartition() const { return Data() + 1; } + + ui32 Partition; +}; + +// {char type; ui32 partiton; ui64 offset; ui16 partNo; ui32 count, ui16 internalPartsCount} +// offset, partNo - index of first rec +// count - diff of last record offset and first record offset in blob +// internalPartsCount - number of internal parts +// A4|A5B1B2C1C2C3|D1 - Offset A, partNo 5, count 2, internalPartsCount 3 +// ^ ^ ^ ^ +// internalparts +class TKey : public TKeyPrefix +{ +public: + TKey(EType type, const ui32 partition, const ui64 offset, const ui16 partNo, const ui32 count, const ui16 internalPartsCount, const bool isHead = false) + : TKeyPrefix(type, partition) + , Offset(offset) + , Count(count) + , PartNo(partNo) + , InternalPartsCount(internalPartsCount) + { + Resize(KeySize()); + *(PtrOffset() - 1) = *(PtrCount() - 1) = *(PtrPartNo() - 1) = *(PtrInternalPartsCount() - 1) = '_'; + SetOffset(offset); + SetPartNo(partNo); + SetCount(count); + SetInternalPartsCount(InternalPartsCount); + SetHead(isHead); + } + + TKey(const TKey& key) + : TKey(key.GetType(), key.GetPartition(), key.Offset, key.PartNo, key.Count, key.InternalPartsCount, key.IsHead()) + { + } + TKey(const TString& data) - { + { Assign(data.data(), data.size()); - Y_VERIFY(data.size() == KeySize() + IsHead()); - Y_VERIFY(*(PtrOffset() - 1) == '_'); - Y_VERIFY(*(PtrCount() - 1) == '_'); - Y_VERIFY(*(PtrPartNo() - 1) == '_'); - Y_VERIFY(*(PtrInternalPartsCount() - 1) == '_'); - - ParsePartition(); - ParseOffset(); - ParseCount(); - ParsePartNo(); - ParseInternalPartsCount(); - } - - TKey() + Y_VERIFY(data.size() == KeySize() + IsHead()); + Y_VERIFY(*(PtrOffset() - 1) == '_'); + Y_VERIFY(*(PtrCount() - 1) == '_'); + Y_VERIFY(*(PtrPartNo() - 1) == '_'); + Y_VERIFY(*(PtrInternalPartsCount() - 1) == '_'); + + ParsePartition(); + ParseOffset(); + ParseCount(); + ParsePartNo(); + ParseInternalPartsCount(); + } + + TKey() : TKey(TypeNone, 0, 0, 0, 0, 0) - {} - - virtual ~TKey() - {} - + {} + + virtual ~TKey() + {} + TString ToString() const { return TString(Data(), Size()); - } - - void SetHead(const bool isHead) { - Resize(KeySize() + isHead); - if (isHead) - Data()[KeySize()] = '|'; - } - - void SetOffset(const ui64 offset) { - Y_VERIFY(Size() == KeySize() + IsHead()); - Offset = offset; + } + + void SetHead(const bool isHead) { + Resize(KeySize() + isHead); + if (isHead) + Data()[KeySize()] = '|'; + } + + void SetOffset(const ui64 offset) { + Y_VERIFY(Size() == KeySize() + IsHead()); + Offset = offset; memcpy(PtrOffset(), Sprintf("%.20" PRIu64, offset).data(), 20); - } - - ui64 GetOffset() const { - Y_VERIFY(Size() == KeySize() + IsHead()); - return Offset; - } - - void SetCount(const ui32 count) { - Y_VERIFY(Size() == KeySize() + IsHead()); - Count = count; + } + + ui64 GetOffset() const { + Y_VERIFY(Size() == KeySize() + IsHead()); + return Offset; + } + + void SetCount(const ui32 count) { + Y_VERIFY(Size() == KeySize() + IsHead()); + Count = count; memcpy(PtrCount(), Sprintf("%.10" PRIu32, count).data(), 10); - } - - ui32 GetCount() const { - Y_VERIFY(Size() == KeySize() + IsHead()); - return Count; - } - - void SetPartNo(const ui16 partNo) { - Y_VERIFY(Size() == KeySize() + IsHead()); - PartNo = partNo; + } + + ui32 GetCount() const { + Y_VERIFY(Size() == KeySize() + IsHead()); + return Count; + } + + void SetPartNo(const ui16 partNo) { + Y_VERIFY(Size() == KeySize() + IsHead()); + PartNo = partNo; memcpy(PtrPartNo(), Sprintf("%.5" PRIu16, partNo).data(), 5); - } - - ui16 GetPartNo() const { - Y_VERIFY(Size() == KeySize() + IsHead()); - return PartNo; - } - - void SetInternalPartsCount(const ui16 internalPartsCount) { - Y_VERIFY(Size() == KeySize() + IsHead()); - InternalPartsCount = internalPartsCount; + } + + ui16 GetPartNo() const { + Y_VERIFY(Size() == KeySize() + IsHead()); + return PartNo; + } + + void SetInternalPartsCount(const ui16 internalPartsCount) { + Y_VERIFY(Size() == KeySize() + IsHead()); + InternalPartsCount = internalPartsCount; memcpy(PtrInternalPartsCount(), Sprintf("%.5" PRIu16, internalPartsCount).data(), 5); - } - - ui16 GetInternalPartsCount() const { - Y_VERIFY(Size() == KeySize() + IsHead()); - return InternalPartsCount; - } - - bool IsHead() const { - return Size() == KeySize() + 1; - } - - static constexpr ui32 KeySize() { - return UnmarkedSize() + 1 + 20 + 1 + 5 + 1 + 10 + 1 + 5; - //p<partition 10 chars>_<offset 20 chars>_<part number 5 chars>_<count 10 chars>_<internalPartsCount count 5 chars> - } - - bool operator==(const TKey& key) const - { - return Size() == key.Size() && strncmp(Data(), key.Data(), Size()) == 0; - } - -private: - char* PtrOffset() { return Data() + UnmarkedSize() + 1; } - char* PtrPartNo() { return PtrOffset() + 20 + 1; } - char* PtrCount() { return PtrPartNo() + 5 + 1; } - char* PtrInternalPartsCount() { return PtrCount() + 10 + 1; } - - const char* PtrOffset() const { return Data() + UnmarkedSize() + 1; } - const char* PtrPartNo() const { return PtrOffset() + 20 + 1; } - const char* PtrCount() const { return PtrPartNo() + 5 + 1; } - const char* PtrInternalPartsCount() const { return PtrCount() + 10 + 1; } - - void ParseOffset() - { - Offset = FromString<ui64>(TStringBuf{PtrOffset(), 20}); - } - - void ParseCount() - { - Count = FromString<ui32>(TStringBuf{PtrCount(), 10}); - } - - void ParsePartNo() - { - PartNo = FromString<ui16>(TStringBuf{PtrPartNo(), 5}); - } - - void ParseInternalPartsCount() - { - InternalPartsCount = FromString<ui16>(TStringBuf{PtrInternalPartsCount(), 5}); - } - - ui64 Offset; - ui32 Count; - ui16 PartNo; - ui16 InternalPartsCount; -}; - - -}// NPQ -}// NKikimr + } + + ui16 GetInternalPartsCount() const { + Y_VERIFY(Size() == KeySize() + IsHead()); + return InternalPartsCount; + } + + bool IsHead() const { + return Size() == KeySize() + 1; + } + + static constexpr ui32 KeySize() { + return UnmarkedSize() + 1 + 20 + 1 + 5 + 1 + 10 + 1 + 5; + //p<partition 10 chars>_<offset 20 chars>_<part number 5 chars>_<count 10 chars>_<internalPartsCount count 5 chars> + } + + bool operator==(const TKey& key) const + { + return Size() == key.Size() && strncmp(Data(), key.Data(), Size()) == 0; + } + +private: + char* PtrOffset() { return Data() + UnmarkedSize() + 1; } + char* PtrPartNo() { return PtrOffset() + 20 + 1; } + char* PtrCount() { return PtrPartNo() + 5 + 1; } + char* PtrInternalPartsCount() { return PtrCount() + 10 + 1; } + + const char* PtrOffset() const { return Data() + UnmarkedSize() + 1; } + const char* PtrPartNo() const { return PtrOffset() + 20 + 1; } + const char* PtrCount() const { return PtrPartNo() + 5 + 1; } + const char* PtrInternalPartsCount() const { return PtrCount() + 10 + 1; } + + void ParseOffset() + { + Offset = FromString<ui64>(TStringBuf{PtrOffset(), 20}); + } + + void ParseCount() + { + Count = FromString<ui32>(TStringBuf{PtrCount(), 10}); + } + + void ParsePartNo() + { + PartNo = FromString<ui16>(TStringBuf{PtrPartNo(), 5}); + } + + void ParseInternalPartsCount() + { + InternalPartsCount = FromString<ui16>(TStringBuf{PtrInternalPartsCount(), 5}); + } + + ui64 Offset; + ui32 Count; + ui16 PartNo; + ui16 InternalPartsCount; +}; + + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/mirrorer.cpp b/ydb/core/persqueue/mirrorer.cpp index 74f47ab96c..37aea95ad1 100644 --- a/ydb/core/persqueue/mirrorer.cpp +++ b/ydb/core/persqueue/mirrorer.cpp @@ -60,11 +60,11 @@ void TMirrorer::Bootstrap(const TActorContext& ctx) { auto counters = AppData(ctx)->Counters; TString suffix = LocalDC ? "Remote" : "Internal"; MirrorerErrors = NKikimr::NPQ::TMultiCounter( - GetServiceCounters(counters, "pqproxy|writeSession"), + GetServiceCounters(counters, "pqproxy|writeSession"), GetLabels(TopicName), {}, {"MirrorerErrors" + suffix}, true ); MirrorerTimeLags = THolder<TPercentileCounter>(new TPercentileCounter( - GetServiceCounters(counters, "pqproxy|mirrorWriteTimeLag"), + GetServiceCounters(counters, "pqproxy|mirrorWriteTimeLag"), GetLabels(TopicName), {{"sensor", "TimeLags" + suffix}}, "Interval", lagsIntervals, true @@ -91,7 +91,7 @@ void TMirrorer::Handle(TEvents::TEvPoisonPill::TPtr&, const TActorContext& ctx) LOG_NOTICE_S(ctx, NKikimrServices::PQ_MIRRORER, MirrorerDescription() << " killed"); ReadSession = nullptr; PartitionStream = nullptr; - CredentialsProvider = nullptr; + CredentialsProvider = nullptr; Die(ctx); } @@ -335,7 +335,7 @@ void TMirrorer::TryToWrite(const TActorContext& ctx) { void TMirrorer::HandleInitCredentials(TEvPQ::TEvInitCredentials::TPtr& /*ev*/, const TActorContext& ctx) { LastInitStageTimestamp = ctx.Now(); try { - RecreateCredentialsProvider(ctx); + RecreateCredentialsProvider(ctx); } catch(...) { ProcessError(ctx, "cannot initialize credentials provider: " + CurrentExceptionMessage()); ScheduleWithIncreasingTimeout<TEvPQ::TEvInitCredentials>(SelfId(), ConsumerInitInterval, CONSUMER_INIT_INTERVAL_MAX, ctx); @@ -387,10 +387,10 @@ void TMirrorer::CreateConsumer(TEvPQ::TEvCreateConsumer::TPtr&, const TActorCont Y_VERIFY(factory); ReadSession = factory->GetReadSession(Config, Partition, CredentialsProvider, MAX_BYTES_IN_FLIGHT); - + LOG_NOTICE_S(ctx, NKikimrServices::PQ_MIRRORER, MirrorerDescription() << " read session created: " << ReadSession->GetSessionId()); - + Send(SelfId(), new TEvents::TEvWakeup()); Become(&TThis::StateWork); } @@ -434,7 +434,7 @@ void TMirrorer::AddMessagesToQueue(TVector<TPersQueueReadEvent::TDataReceivedEve } } -void TMirrorer::ScheduleConsumerCreation(const TActorContext& ctx) { +void TMirrorer::ScheduleConsumerCreation(const TActorContext& ctx) { LastInitStageTimestamp = ctx.Now(); ReadSession = nullptr; PartitionStream = nullptr; @@ -444,9 +444,9 @@ void TMirrorer::ScheduleConsumerCreation(const TActorContext& ctx) { ScheduleWithIncreasingTimeout<TEvPQ::TEvCreateConsumer>(SelfId(), ConsumerInitInterval, CONSUMER_INIT_INTERVAL_MAX, ctx); } -void TMirrorer::RecreateCredentialsProvider(const TActorContext& ctx) { - CredentialsProvider = nullptr; - +void TMirrorer::RecreateCredentialsProvider(const TActorContext& ctx) { + CredentialsProvider = nullptr; + auto factory = AppData(ctx)->PersQueueMirrorReaderFactory; Y_VERIFY(factory); CredentialsProvider = factory->GetCredentialsProvider(Config.GetCredentials()); diff --git a/ydb/core/persqueue/mirrorer.h b/ydb/core/persqueue/mirrorer.h index 25bb415e73..6819891f6c 100644 --- a/ydb/core/persqueue/mirrorer.h +++ b/ydb/core/persqueue/mirrorer.h @@ -1,6 +1,6 @@ #pragma once -#include "actor_persqueue_client_iface.h" +#include "actor_persqueue_client_iface.h" #include <library/cpp/actors/core/hfunc.h> #include <library/cpp/actors/core/log.h> @@ -114,8 +114,8 @@ private: const TActorContext& ctx, const NKikimrClient::TPersQueuePartitionResponse& response ); - void ScheduleConsumerCreation(const TActorContext& ctx); - void RecreateCredentialsProvider(const TActorContext& ctx); + void ScheduleConsumerCreation(const TActorContext& ctx); + void RecreateCredentialsProvider(const TActorContext& ctx); void StartInit(const TActorContext& ctx); void RetryWrite(const TActorContext& ctx); diff --git a/ydb/core/persqueue/partition.cpp b/ydb/core/persqueue/partition.cpp index bf50b5d051..cc8e2419cf 100644 --- a/ydb/core/persqueue/partition.cpp +++ b/ydb/core/persqueue/partition.cpp @@ -1,4 +1,4 @@ -#include "partition.h" +#include "partition.h" #include "event_helpers.h" #include "read.h" #include "sourceid.h" @@ -19,162 +19,162 @@ #include <library/cpp/time_provider/time_provider.h> #include <util/folder/path.h> #include <util/string/escape.h> -#include <util/system/byteorder.h> +#include <util/system/byteorder.h> -#define VERIFY_RESULT_BLOB(blob, pos) \ +#define VERIFY_RESULT_BLOB(blob, pos) \ Y_VERIFY(!blob.Data.empty(), "Empty data. SourceId: %s, SeqNo: %" PRIu64, blob.SourceId.data(), blob.SeqNo); \ Y_VERIFY(blob.SeqNo <= (ui64)Max<i64>(), "SeqNo is too big: %" PRIu64, blob.SeqNo); -namespace NKikimr { -namespace NPQ { - +namespace NKikimr { +namespace NPQ { + static const ui32 BATCH_UNPACK_SIZE_BORDER = 500 * 1024; //500kb -static const ui32 MAX_WRITE_CYCLE_SIZE = 16 << 20; //16MB - -static const ui32 MAX_USER_ACTS = 1000; - +static const ui32 MAX_WRITE_CYCLE_SIZE = 16 << 20; //16MB + +static const ui32 MAX_USER_ACTS = 1000; + static const TDuration WAKE_TIMEOUT = TDuration::Seconds(5); - -static const ui32 MAX_INLINE_SIZE = 1000; - -static const ui32 LEVEL0 = 32; - + +static const ui32 MAX_INLINE_SIZE = 1000; + +static const ui32 LEVEL0 = 32; + static const TDuration UPDATE_AVAIL_SIZE_INTERVAL = TDuration::MilliSeconds(100); - + static const TString WRITE_QUOTA_ROOT_PATH = "write-quota"; -struct TPartition::THasDataReq { - ui64 Num; - ui64 Offset; +struct TPartition::THasDataReq { + ui64 Num; + ui64 Offset; TActorId Sender; - TMaybe<ui64> Cookie; - TString ClientId; - - bool operator < (const THasDataReq& req) const - { - return Num < req.Num; - } -}; - -struct TPartition::THasDataDeadline { + TMaybe<ui64> Cookie; + TString ClientId; + + bool operator < (const THasDataReq& req) const + { + return Num < req.Num; + } +}; + +struct TPartition::THasDataDeadline { TInstant Deadline; - TPartition::THasDataReq Request; - - bool operator < (const THasDataDeadline& dl) const - { - return Deadline < dl.Deadline || Deadline == dl.Deadline && Request < dl.Request; - } -}; - - -class TKeyLevel { -public: + TPartition::THasDataReq Request; + + bool operator < (const THasDataDeadline& dl) const + { + return Deadline < dl.Deadline || Deadline == dl.Deadline && Request < dl.Request; + } +}; + + +class TKeyLevel { +public: friend IOutputStream& operator <<(IOutputStream& out, const TKeyLevel& value); - - TKeyLevel(ui32 border) - : Border_(border) - , Sum_(0) - , RecsCount_(0) - , InternalPartsCount_(0) - {} - - void Clear() { - Keys_.clear(); - Sum_ = 0; - RecsCount_ = 0; - InternalPartsCount_ = 0; - } - - ui32 KeysCount() const { - return Keys_.size(); - } - - ui32 RecsCount() const { - return RecsCount_; - } - - ui16 InternalPartsCount() const { - return InternalPartsCount_; - } - - bool NeedCompaction() const { - return Sum_ >= Border_; - } - - std::pair<TKey, ui32> Compact() { - Y_VERIFY(!Keys_.empty()); - TKey tmp(Keys_.front().first); - tmp.SetCount(RecsCount_); - tmp.SetInternalPartsCount(InternalPartsCount_); - std::pair<TKey, ui32> res(tmp, Sum_); - Clear(); - return res; - } - - std::pair<TKey, ui32> PopFront() { - Y_VERIFY(!Keys_.empty()); - Sum_ -= Keys_.front().second; - RecsCount_ -= Keys_.front().first.GetCount(); - InternalPartsCount_ -= Keys_.front().first.GetInternalPartsCount(); - auto res = Keys_.front(); - Keys_.pop_front(); - return res; - } - - std::pair<TKey, ui32> PopBack() { - Y_VERIFY(!Keys_.empty()); - Sum_ -= Keys_.back().second; - RecsCount_ -= Keys_.back().first.GetCount(); - InternalPartsCount_ -= Keys_.back().first.GetInternalPartsCount(); - auto res = Keys_.back(); - Keys_.pop_back(); - return res; - } - - - ui32 Sum() const { - return Sum_; - } - - const TKey& GetKey(const ui32 pos) const { - Y_VERIFY(pos < Keys_.size()); - return Keys_[pos].first; - } - - const ui32& GetSize(const ui32 pos) const { - Y_VERIFY(pos < Keys_.size()); - return Keys_[pos].second; - } - void PushKeyToFront(const TKey& key, ui32 size) { - Sum_ += size; - RecsCount_ += key.GetCount(); - InternalPartsCount_ += key.GetInternalPartsCount(); - Keys_.push_front(std::make_pair(key, size)); - } - - void AddKey(const TKey& key, ui32 size) { - Sum_ += size; - RecsCount_ += key.GetCount(); - InternalPartsCount_ += key.GetInternalPartsCount(); - Keys_.push_back(std::make_pair(key, size)); - } - - ui32 Border() const { - return Border_; - } - -private: - const ui32 Border_; - std::deque<std::pair<TKey, ui32>> Keys_; - ui32 Sum_; - ui32 RecsCount_; - ui16 InternalPartsCount_; -}; - - + + TKeyLevel(ui32 border) + : Border_(border) + , Sum_(0) + , RecsCount_(0) + , InternalPartsCount_(0) + {} + + void Clear() { + Keys_.clear(); + Sum_ = 0; + RecsCount_ = 0; + InternalPartsCount_ = 0; + } + + ui32 KeysCount() const { + return Keys_.size(); + } + + ui32 RecsCount() const { + return RecsCount_; + } + + ui16 InternalPartsCount() const { + return InternalPartsCount_; + } + + bool NeedCompaction() const { + return Sum_ >= Border_; + } + + std::pair<TKey, ui32> Compact() { + Y_VERIFY(!Keys_.empty()); + TKey tmp(Keys_.front().first); + tmp.SetCount(RecsCount_); + tmp.SetInternalPartsCount(InternalPartsCount_); + std::pair<TKey, ui32> res(tmp, Sum_); + Clear(); + return res; + } + + std::pair<TKey, ui32> PopFront() { + Y_VERIFY(!Keys_.empty()); + Sum_ -= Keys_.front().second; + RecsCount_ -= Keys_.front().first.GetCount(); + InternalPartsCount_ -= Keys_.front().first.GetInternalPartsCount(); + auto res = Keys_.front(); + Keys_.pop_front(); + return res; + } + + std::pair<TKey, ui32> PopBack() { + Y_VERIFY(!Keys_.empty()); + Sum_ -= Keys_.back().second; + RecsCount_ -= Keys_.back().first.GetCount(); + InternalPartsCount_ -= Keys_.back().first.GetInternalPartsCount(); + auto res = Keys_.back(); + Keys_.pop_back(); + return res; + } + + + ui32 Sum() const { + return Sum_; + } + + const TKey& GetKey(const ui32 pos) const { + Y_VERIFY(pos < Keys_.size()); + return Keys_[pos].first; + } + + const ui32& GetSize(const ui32 pos) const { + Y_VERIFY(pos < Keys_.size()); + return Keys_[pos].second; + } + void PushKeyToFront(const TKey& key, ui32 size) { + Sum_ += size; + RecsCount_ += key.GetCount(); + InternalPartsCount_ += key.GetInternalPartsCount(); + Keys_.push_front(std::make_pair(key, size)); + } + + void AddKey(const TKey& key, ui32 size) { + Sum_ += size; + RecsCount_ += key.GetCount(); + InternalPartsCount_ += key.GetInternalPartsCount(); + Keys_.push_back(std::make_pair(key, size)); + } + + ui32 Border() const { + return Border_; + } + +private: + const ui32 Border_; + std::deque<std::pair<TKey, ui32>> Keys_; + ui32 Sum_; + ui32 RecsCount_; + ui16 InternalPartsCount_; +}; + + void HtmlOutput(IOutputStream& out, const TString& line, const std::deque<std::pair<TKey, ui32>>& keys) -{ +{ HTML(out) { TABLE() { TABLEHEAD() { @@ -185,44 +185,44 @@ void HtmlOutput(IOutputStream& out, const TString& line, const std::deque<std::p TABLEBODY() { TABLER() { TABLEH() {out << "offset";} - for (auto& p: keys) { + for (auto& p: keys) { TABLED() {out << p.first.GetOffset();} - } + } } TABLER() { TABLEH() {out << "partNo";} - for (auto& p: keys) { + for (auto& p: keys) { TABLED() {out << p.first.GetPartNo();} - } + } } TABLER() { TABLEH() {out << "size";} - for (auto& p: keys) { + for (auto& p: keys) { TABLED() {out << p.second;} - } + } } } } } -} - - +} + + IOutputStream& operator <<(IOutputStream& out, const TKeyLevel& value) { - TStringStream str; - str << "count=" << value.Keys_.size() << " sum=" << value.Sum_ << " border=" << value.Border_ << " recs= " << value.RecsCount_ << ":"; - HtmlOutput(out, str.Str(), value.Keys_); - return out; -} - + TStringStream str; + str << "count=" << value.Keys_.size() << " sum=" << value.Sum_ << " border=" << value.Border_ << " recs= " << value.RecsCount_ << ":"; + HtmlOutput(out, str.Str(), value.Keys_); + return out; +} + ui64 GetOffsetEstimate(const std::deque<TDataKey>& container, TInstant timestamp, ui64 offset) { if (container.empty()) { - return offset; + return offset; } auto it = std::lower_bound(container.begin(), container.end(), timestamp, [](const TDataKey& p, const TInstant timestamp) { return timestamp > p.Timestamp; }); if (it == container.end()) { - return offset; + return offset; } else { return it->Key.GetOffset(); } @@ -239,335 +239,335 @@ struct TMirrorerInfo { TTabletCountersBase Baseline; }; -void TPartition::ReplyError(const TActorContext& ctx, const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error) -{ +void TPartition::ReplyError(const TActorContext& ctx, const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error) +{ ReplyPersQueueError( dst == 0 ? ctx.SelfID : Tablet, ctx, TabletID, TopicName, Partition, Counters, NKikimrServices::PERSQUEUE, dst, errorCode, error, true ); -} - -void TPartition::ReplyOk(const TActorContext& ctx, const ui64 dst) -{ +} + +void TPartition::ReplyOk(const TActorContext& ctx, const ui64 dst) +{ THolder<TEvPQ::TEvProxyResponse> response = MakeHolder<TEvPQ::TEvProxyResponse>(dst); NKikimrClient::TResponse& resp = response->Response; - resp.SetStatus(NMsgBusProxy::MSTATUS_OK); - resp.SetErrorCode(NPersQueue::NErrorCode::OK); - ctx.Send(Tablet, response.Release()); -} - -void TPartition::ReplyOwnerOk(const TActorContext& ctx, const ui64 dst, const TString& cookie) -{ + resp.SetStatus(NMsgBusProxy::MSTATUS_OK); + resp.SetErrorCode(NPersQueue::NErrorCode::OK); + ctx.Send(Tablet, response.Release()); +} + +void TPartition::ReplyOwnerOk(const TActorContext& ctx, const ui64 dst, const TString& cookie) +{ THolder<TEvPQ::TEvProxyResponse> response = MakeHolder<TEvPQ::TEvProxyResponse>(dst); NKikimrClient::TResponse& resp = response->Response; - resp.SetStatus(NMsgBusProxy::MSTATUS_OK); - resp.SetErrorCode(NPersQueue::NErrorCode::OK); - resp.MutablePartitionResponse()->MutableCmdGetOwnershipResult()->SetOwnerCookie(cookie); - ctx.Send(Tablet, response.Release()); -} - + resp.SetStatus(NMsgBusProxy::MSTATUS_OK); + resp.SetErrorCode(NPersQueue::NErrorCode::OK); + resp.MutablePartitionResponse()->MutableCmdGetOwnershipResult()->SetOwnerCookie(cookie); + ctx.Send(Tablet, response.Release()); +} + void TPartition::ReplyWrite( const TActorContext& ctx, const ui64 dst, const TString& sourceId, const ui64 seqNo, const ui16 partNo, const ui16 totalParts, const ui64 offset, const TInstant writeTimestamp, bool already, const ui64 maxSeqNo, const ui64 partitionQuotedTime, const TDuration topicQuotedTime, const ui64 queueTime, const ui64 writeTime) -{ +{ Y_VERIFY(offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, offset); Y_VERIFY(seqNo <= (ui64)Max<i64>(), "SeqNo is too big: %" PRIu64, seqNo); THolder<TEvPQ::TEvProxyResponse> response = MakeHolder<TEvPQ::TEvProxyResponse>(dst); NKikimrClient::TResponse& resp = response->Response; - resp.SetStatus(NMsgBusProxy::MSTATUS_OK); - resp.SetErrorCode(NPersQueue::NErrorCode::OK); - auto write = resp.MutablePartitionResponse()->AddCmdWriteResult(); - write->SetSourceId(sourceId); - write->SetSeqNo(seqNo); + resp.SetStatus(NMsgBusProxy::MSTATUS_OK); + resp.SetErrorCode(NPersQueue::NErrorCode::OK); + auto write = resp.MutablePartitionResponse()->AddCmdWriteResult(); + write->SetSourceId(sourceId); + write->SetSeqNo(seqNo); write->SetWriteTimestampMS(writeTimestamp.MilliSeconds()); - if (totalParts > 1) - write->SetPartNo(partNo); - write->SetAlreadyWritten(already); - if (already) - write->SetMaxSeqNo(maxSeqNo); - write->SetOffset(offset); - + if (totalParts > 1) + write->SetPartNo(partNo); + write->SetAlreadyWritten(already); + if (already) + write->SetMaxSeqNo(maxSeqNo); + write->SetOffset(offset); + write->SetPartitionQuotedTimeMs(partitionQuotedTime); write->SetTopicQuotedTimeMs(topicQuotedTime.MilliSeconds()); - write->SetTotalTimeInPartitionQueueMs(queueTime); - write->SetWriteTimeMs(writeTime); - - ctx.Send(Tablet, response.Release()); -} - - -void TPartition::ReplyGetClientOffsetOk(const TActorContext& ctx, const ui64 dst, const i64 offset, + write->SetTotalTimeInPartitionQueueMs(queueTime); + write->SetWriteTimeMs(writeTime); + + ctx.Send(Tablet, response.Release()); +} + + +void TPartition::ReplyGetClientOffsetOk(const TActorContext& ctx, const ui64 dst, const i64 offset, const TInstant writeTimestamp, const TInstant createTimestamp) -{ +{ THolder<TEvPQ::TEvProxyResponse> response = MakeHolder<TEvPQ::TEvProxyResponse>(dst); NKikimrClient::TResponse& resp = response->Response; - resp.SetStatus(NMsgBusProxy::MSTATUS_OK); - resp.SetErrorCode(NPersQueue::NErrorCode::OK); - - auto user = resp.MutablePartitionResponse()->MutableCmdGetClientOffsetResult(); - if (offset > -1) - user->SetOffset(offset); + resp.SetStatus(NMsgBusProxy::MSTATUS_OK); + resp.SetErrorCode(NPersQueue::NErrorCode::OK); + + auto user = resp.MutablePartitionResponse()->MutableCmdGetClientOffsetResult(); + if (offset > -1) + user->SetOffset(offset); if (writeTimestamp) user->SetWriteTimestampMS(writeTimestamp.MilliSeconds()); if (createTimestamp) { Y_VERIFY(writeTimestamp); user->SetCreateTimestampMS(createTimestamp.MilliSeconds()); - } - user->SetEndOffset(EndOffset); - user->SetSizeLag(GetSizeLag(offset)); - user->SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); - ctx.Send(Tablet, response.Release()); -} - - + } + user->SetEndOffset(EndOffset); + user->SetSizeLag(GetSizeLag(offset)); + user->SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); + ctx.Send(Tablet, response.Release()); +} + + static void RequestRange(const TActorContext& ctx, const TActorId& dst, ui32 partition, TKeyPrefix::EType c, bool includeData = false, const TString& key = "", bool dropTmp = false) -{ - THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); - auto read = request->Record.AddCmdReadRange(); - auto range = read->MutableRange(); - TKeyPrefix from(c, partition); - if (!key.empty()) { +{ + THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); + auto read = request->Record.AddCmdReadRange(); + auto range = read->MutableRange(); + TKeyPrefix from(c, partition); + if (!key.empty()) { Y_VERIFY(key.StartsWith(TStringBuf(from.Data(), from.Size()))); - from.Clear(); + from.Clear(); from.Append(key.data(), key.size()); - } - range->SetFrom(from.Data(), from.Size()); - - TKeyPrefix to(c, partition + 1); - range->SetTo(to.Data(), to.Size()); - - if(includeData) - read->SetIncludeData(true); - - if (dropTmp) { - auto del = request->Record.AddCmdDeleteRange(); - auto range = del->MutableRange(); - TKeyPrefix from(TKeyPrefix::TypeTmpData, partition); - range->SetFrom(from.Data(), from.Size()); - - TKeyPrefix to(TKeyPrefix::TypeTmpData, partition + 1); - range->SetTo(to.Data(), to.Size()); - } - - ctx.Send(dst, request.Release()); -} - - -NKikimrClient::TKeyValueRequest::EStorageChannel GetChannel(ui32 i) -{ - return NKikimrClient::TKeyValueRequest::EStorageChannel(NKikimrClient::TKeyValueRequest::MAIN + i); -} - - -void AddCheckDiskRequest(TEvKeyValue::TEvRequest *request, ui32 numChannels) { - for (ui32 i = 0; i < numChannels; ++i) { - request->Record.AddCmdGetStatus()->SetStorageChannel(GetChannel(i)); - } -} - - + } + range->SetFrom(from.Data(), from.Size()); + + TKeyPrefix to(c, partition + 1); + range->SetTo(to.Data(), to.Size()); + + if(includeData) + read->SetIncludeData(true); + + if (dropTmp) { + auto del = request->Record.AddCmdDeleteRange(); + auto range = del->MutableRange(); + TKeyPrefix from(TKeyPrefix::TypeTmpData, partition); + range->SetFrom(from.Data(), from.Size()); + + TKeyPrefix to(TKeyPrefix::TypeTmpData, partition + 1); + range->SetTo(to.Data(), to.Size()); + } + + ctx.Send(dst, request.Release()); +} + + +NKikimrClient::TKeyValueRequest::EStorageChannel GetChannel(ui32 i) +{ + return NKikimrClient::TKeyValueRequest::EStorageChannel(NKikimrClient::TKeyValueRequest::MAIN + i); +} + + +void AddCheckDiskRequest(TEvKeyValue::TEvRequest *request, ui32 numChannels) { + for (ui32 i = 0; i < numChannels; ++i) { + request->Record.AddCmdGetStatus()->SetStorageChannel(GetChannel(i)); + } +} + + static void RequestDiskStatus(const TActorContext& ctx, const TActorId& dst, ui32 numChannels) -{ - THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); - - AddCheckDiskRequest(request.Get(), numChannels); - - ctx.Send(dst, request.Release()); -} - - +{ + THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); + + AddCheckDiskRequest(request.Get(), numChannels); + + ctx.Send(dst, request.Release()); +} + + void RequestInfoRange(const TActorContext& ctx, const TActorId& dst, ui32 partition, const TString& key) -{ - RequestRange(ctx, dst, partition, TKeyPrefix::TypeInfo, true, key, key == ""); -} - -void RequestMetaRead(const TActorContext& ctx, const TActorId& dst, ui32 partition) -{ - THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); - auto read = request->Record.AddCmdRead(); - TKeyPrefix key{TKeyPrefix::TypeMeta, partition}; - read->SetKey(key.Data(), key.Size()); - ctx.Send(dst, request.Release()); -} - +{ + RequestRange(ctx, dst, partition, TKeyPrefix::TypeInfo, true, key, key == ""); +} + +void RequestMetaRead(const TActorContext& ctx, const TActorId& dst, ui32 partition) +{ + THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); + auto read = request->Record.AddCmdRead(); + TKeyPrefix key{TKeyPrefix::TypeMeta, partition}; + read->SetKey(key.Data(), key.Size()); + ctx.Send(dst, request.Release()); +} + void RequestData(const TActorContext& ctx, const TActorId& dst, const TVector<TString>& keys) -{ - THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); - for (auto& key: keys) { - auto read = request->Record.AddCmdRead(); - read->SetKey(key); - } - ctx.Send(dst, request.Release()); -} - +{ + THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); + for (auto& key: keys) { + auto read = request->Record.AddCmdRead(); + read->SetKey(key); + } + ctx.Send(dst, request.Release()); +} + void RequestDataRange(const TActorContext& ctx, const TActorId& dst, ui32 partition, const TString& key) -{ +{ RequestRange(ctx, dst, partition, TKeyPrefix::TypeData, false, key); -} - - -void TPartition::FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config, const TActorContext& ctx) { - TSet<TString> hasReadRule; - +} + + +void TPartition::FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config, const TActorContext& ctx) { + TSet<TString> hasReadRule; + for (auto& userInfo : UsersInfoStorage.GetAll()) { userInfo.second.ReadFromTimestamp = TInstant::Zero(); if (userInfo.second.HasReadRule) { userInfo.second.HasReadRule = false; hasReadRule.insert(userInfo.first); - } - } - for (ui32 i = 0; i < config.ReadRulesSize(); ++i) { - const auto& consumer = config.GetReadRules(i); + } + } + for (ui32 i = 0; i < config.ReadRulesSize(); ++i) { + const auto& consumer = config.GetReadRules(i); auto& userInfo = UsersInfoStorage.GetOrCreate(consumer, ctx); userInfo.HasReadRule = true; - ui64 rrGen = i < config.ReadRuleGenerationsSize() ? config.GetReadRuleGenerations(i) : 0; - if (userInfo.ReadRuleGeneration != rrGen) { - THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(0, consumer, 0, "", 0, 0, - TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE, rrGen); - userInfo.UserActs.push_back(event.Release()); - userInfo.Session = ""; - userInfo.Offset = 0; - userInfo.Step = userInfo.Generation = 0; - } - hasReadRule.erase(consumer); + ui64 rrGen = i < config.ReadRuleGenerationsSize() ? config.GetReadRuleGenerations(i) : 0; + if (userInfo.ReadRuleGeneration != rrGen) { + THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(0, consumer, 0, "", 0, 0, + TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE, rrGen); + userInfo.UserActs.push_back(event.Release()); + userInfo.Session = ""; + userInfo.Offset = 0; + userInfo.Step = userInfo.Generation = 0; + } + hasReadRule.erase(consumer); TInstant ts = i < config.ReadFromTimestampsMsSize() ? TInstant::MilliSeconds(config.GetReadFromTimestampsMs(i)) : TInstant::Zero(); if (!ts) ts += TDuration::MilliSeconds(1); if (!userInfo.ReadFromTimestamp|| userInfo.ReadFromTimestamp > ts) userInfo.ReadFromTimestamp = ts; - } - for (auto& consumer : hasReadRule) { + } + for (auto& consumer : hasReadRule) { auto& userInfo = UsersInfoStorage.GetOrCreate(consumer, ctx); if (!userInfo.Important) { ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCountersDrop(Partition, userInfo.LabeledCounters.GetGroup())); - } - THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(0, consumer, - 0, "", 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0); - userInfo.Session = ""; - userInfo.Offset = 0; - userInfo.Step = userInfo.Generation = 0; - userInfo.UserActs.push_back(event.Release()); - } -} - + } + THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(0, consumer, + 0, "", 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0); + userInfo.Session = ""; + userInfo.Offset = 0; + userInfo.Step = userInfo.Generation = 0; + userInfo.UserActs.push_back(event.Release()); + } +} + TPartition::TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, const TActorId& blobCache, const TString& topicName, const TString& topicPath, const bool localDC, TString dcId, const NKikimrPQ::TPQTabletConfig& config, const TTabletCountersBase& counters, - const TActorContext &ctx, bool newPartition) + const TActorContext &ctx, bool newPartition) : TabletID(tabletId) , Partition(partition) - , Config(config) + , Config(config) , TopicName(topicName) , TopicPath(topicPath) - , LocalDC(localDC) + , LocalDC(localDC) , DCId(std::move(dcId)) - , StartOffset(0) - , EndOffset(0) - , WriteInflightSize(0) - , Tablet(tablet) - , BlobCache(blobCache) - , InitState(WaitDiskStatus) - , PartitionedBlob(partition, 0, 0, 0, 0, 0, Head, NewHead, true, false, 8 << 20) + , StartOffset(0) + , EndOffset(0) + , WriteInflightSize(0) + , Tablet(tablet) + , BlobCache(blobCache) + , InitState(WaitDiskStatus) + , PartitionedBlob(partition, 0, 0, 0, 0, 0, Head, NewHead, true, false, 8 << 20) , NewHeadKey{TKey{}, 0, TInstant::Zero(), 0} - , BodySize(0) - , MaxWriteResponsesSize(0) - , GapSize(0) + , BodySize(0) + , MaxWriteResponsesSize(0) + , GapSize(0) , CloudId(config.GetYcCloudId()) , DbId(config.GetYdbDatabaseId()) , FolderId(config.GetYcFolderId()) , UsersInfoStorage(DCId, TabletID, TopicName, Partition, counters, Config, CloudId, DbId, FolderId) - , ReadingTimestamp(false) - , SetOffsetCookie(0) - , Cookie(0) - , CreationTime(ctx.Now()) - , InitDuration(TDuration::Zero()) - , InitDone(false) - , NewPartition(newPartition) - , PartitionLabeledCounters(topicName, partition) - , Subscriber(partition, Counters, Tablet) - , WriteCycleStartTime(ctx.Now()) - , WriteCycleSize(0) - , WriteNewSize(0) + , ReadingTimestamp(false) + , SetOffsetCookie(0) + , Cookie(0) + , CreationTime(ctx.Now()) + , InitDuration(TDuration::Zero()) + , InitDone(false) + , NewPartition(newPartition) + , PartitionLabeledCounters(topicName, partition) + , Subscriber(partition, Counters, Tablet) + , WriteCycleStartTime(ctx.Now()) + , WriteCycleSize(0) + , WriteNewSize(0) , WriteNewSizeInternal(0) - , WriteNewSizeUncompressed(0) - , WriteNewMessages(0) + , WriteNewSizeUncompressed(0) + , WriteNewMessages(0) , WriteNewMessagesInternal(0) - , DiskIsFull(false) - , HasDataReqNum(0) - , WriteQuota(Config.GetPartitionConfig().GetBurstSize(), Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(), ctx.Now()) - , AvgWriteBytes{{TDuration::Seconds(1), 1000}, {TDuration::Minutes(1), 1000}, {TDuration::Hours(1), 2000}, {TDuration::Days(1), 2000}} - , AvgQuotaBytes{{TDuration::Seconds(1), 1000}, {TDuration::Minutes(1), 1000}, {TDuration::Hours(1), 2000}, {TDuration::Days(1), 2000}} - , ReservedSize(0) - , Channel(0) - , TotalChannelWritesByHead(Config.GetPartitionConfig().GetNumChannels(), 0) - , WriteBufferIsFullCounter(nullptr) - , WriteTimestamp(ctx.Now()) - , WriteLagMs(TDuration::Minutes(1), 100) -{ - if (Config.GetPartitionConfig().HasMirrorFrom()) { - ManageWriteTimestampEstimate = !Config.GetPartitionConfig().GetMirrorFrom().GetSyncWriteTime(); + , DiskIsFull(false) + , HasDataReqNum(0) + , WriteQuota(Config.GetPartitionConfig().GetBurstSize(), Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(), ctx.Now()) + , AvgWriteBytes{{TDuration::Seconds(1), 1000}, {TDuration::Minutes(1), 1000}, {TDuration::Hours(1), 2000}, {TDuration::Days(1), 2000}} + , AvgQuotaBytes{{TDuration::Seconds(1), 1000}, {TDuration::Minutes(1), 1000}, {TDuration::Hours(1), 2000}, {TDuration::Days(1), 2000}} + , ReservedSize(0) + , Channel(0) + , TotalChannelWritesByHead(Config.GetPartitionConfig().GetNumChannels(), 0) + , WriteBufferIsFullCounter(nullptr) + , WriteTimestamp(ctx.Now()) + , WriteLagMs(TDuration::Minutes(1), 100) +{ + if (Config.GetPartitionConfig().HasMirrorFrom()) { + ManageWriteTimestampEstimate = !Config.GetPartitionConfig().GetMirrorFrom().GetSyncWriteTime(); } else { ManageWriteTimestampEstimate = LocalDC; } - + WriteTimestampEstimate = ManageWriteTimestampEstimate ? ctx.Now() : TInstant::Zero(); CalcTopicWriteQuotaParams(); - - Counters.Populate(counters); -} - -void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorContext& ctx) -{ + + Counters.Populate(counters); +} + +void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorContext& ctx) +{ TVector<TString> res; TString str; - if (CurrentStateFunc() == &TThis::StateInit) { - str = "State is StateInit"; - } else if (CurrentStateFunc() == &TThis::StateIdle) { - str = "State is StateIdle"; - } else if (CurrentStateFunc() == &TThis::StateWrite) { - str = "State is StateWrite"; - } else { - Y_FAIL(""); - } - TStringStream out; - out << "Partition " << i32(Partition) << ": " << str; res.push_back(out.Str()); out.Clear(); + if (CurrentStateFunc() == &TThis::StateInit) { + str = "State is StateInit"; + } else if (CurrentStateFunc() == &TThis::StateIdle) { + str = "State is StateIdle"; + } else if (CurrentStateFunc() == &TThis::StateWrite) { + str = "State is StateWrite"; + } else { + Y_FAIL(""); + } + TStringStream out; + out << "Partition " << i32(Partition) << ": " << str; res.push_back(out.Str()); out.Clear(); if (DiskIsFull) { out << "DISK IS FULL"; res.push_back(out.Str()); out.Clear(); } - out << "StartOffset: " << StartOffset; res.push_back(out.Str()); out.Clear(); - out << "EndOffset: " << EndOffset; res.push_back(out.Str()); out.Clear(); - out << "CreationTime: " << CreationTime.ToStringLocalUpToSeconds(); res.push_back(out.Str()); out.Clear(); - out << "InitDuration: " << InitDuration.ToString(); res.push_back(out.Str()); out.Clear(); - out << "TotalCount: " << (Head.GetNextOffset() - StartOffset); res.push_back(out.Str()); out.Clear(); - out << "TotalSize: " << BodySize + Head.PackedSize; res.push_back(out.Str()); out.Clear(); - out << "LastOffset: " << (Head.GetNextOffset()); res.push_back(out.Str()); out.Clear(); - out << "HeadOffset: " << Head.Offset << ", count: " << Head.GetCount(); res.push_back(out.Str()); out.Clear(); - out << "WriteInflightSize: " << WriteInflightSize; res.push_back(out.Str()); out.Clear(); - out << "ReservedBytesSize: " << ReservedSize; res.push_back(out.Str()); out.Clear(); - out << "OwnerPipes: " << OwnerPipes.size(); res.push_back(out.Str()); out.Clear(); - out << "Owners: " << Owners.size(); res.push_back(out.Str()); out.Clear(); - out << "Currently writing: " << Responses.size(); res.push_back(out.Str()); out.Clear(); - out << "MaxCurrently writing: " << MaxWriteResponsesSize; res.push_back(out.Str()); out.Clear(); - out << "DataKeysBody size: " << DataKeysBody.size(); res.push_back(out.Str()); out.Clear(); - for (ui32 i = 0; i < DataKeysHead.size(); ++i) { + out << "StartOffset: " << StartOffset; res.push_back(out.Str()); out.Clear(); + out << "EndOffset: " << EndOffset; res.push_back(out.Str()); out.Clear(); + out << "CreationTime: " << CreationTime.ToStringLocalUpToSeconds(); res.push_back(out.Str()); out.Clear(); + out << "InitDuration: " << InitDuration.ToString(); res.push_back(out.Str()); out.Clear(); + out << "TotalCount: " << (Head.GetNextOffset() - StartOffset); res.push_back(out.Str()); out.Clear(); + out << "TotalSize: " << BodySize + Head.PackedSize; res.push_back(out.Str()); out.Clear(); + out << "LastOffset: " << (Head.GetNextOffset()); res.push_back(out.Str()); out.Clear(); + out << "HeadOffset: " << Head.Offset << ", count: " << Head.GetCount(); res.push_back(out.Str()); out.Clear(); + out << "WriteInflightSize: " << WriteInflightSize; res.push_back(out.Str()); out.Clear(); + out << "ReservedBytesSize: " << ReservedSize; res.push_back(out.Str()); out.Clear(); + out << "OwnerPipes: " << OwnerPipes.size(); res.push_back(out.Str()); out.Clear(); + out << "Owners: " << Owners.size(); res.push_back(out.Str()); out.Clear(); + out << "Currently writing: " << Responses.size(); res.push_back(out.Str()); out.Clear(); + out << "MaxCurrently writing: " << MaxWriteResponsesSize; res.push_back(out.Str()); out.Clear(); + out << "DataKeysBody size: " << DataKeysBody.size(); res.push_back(out.Str()); out.Clear(); + for (ui32 i = 0; i < DataKeysHead.size(); ++i) { out << "DataKeysHead[" << i << "] size: " << DataKeysHead[i].KeysCount() << " sum: " << DataKeysHead[i].Sum() - << " border: " << DataKeysHead[i].Border() << " recs: " << DataKeysHead[i].RecsCount() << " intCount: " << DataKeysHead[i].InternalPartsCount(); + << " border: " << DataKeysHead[i].Border() << " recs: " << DataKeysHead[i].RecsCount() << " intCount: " << DataKeysHead[i].InternalPartsCount(); res.push_back(out.Str()); out.Clear(); - } - for (auto& avg : AvgWriteBytes) { + } + for (auto& avg : AvgWriteBytes) { out << "AvgWriteSize per " << avg.GetDuration().ToString() << " is " << avg.GetValue() << " bytes"; - res.push_back(out.Str()); out.Clear(); - } - out << Config.DebugString(); res.push_back(out.Str()); out.Clear(); + res.push_back(out.Str()); out.Clear(); + } + out << Config.DebugString(); res.push_back(out.Str()); out.Clear(); HTML(out) - { + { DIV_CLASS_ID("tab-pane fade", Sprintf("partition_%u", ui32(Partition))) { TABLE_SORTABLE_CLASS("table") { TABLEHEAD() { @@ -583,8 +583,8 @@ void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorCo } } TABLEBODY() { - ui32 i = 0; - for (auto& d: DataKeysBody) { + ui32 i = 0; + for (auto& d: DataKeysBody) { TABLER() { TABLED() {out << "DataBody";} TABLED() {out << i++;} @@ -595,13 +595,13 @@ void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorCo TABLED() {out << d.Key.GetInternalPartsCount();} TABLED() {out << d.Size;} } - } - ui32 currentLevel = 0; - for (ui32 p = 0; p < HeadKeys.size(); ++p) { - ui32 size = HeadKeys[p].Size; - while (currentLevel + 1 < TotalLevels && size < CompactLevelBorder[currentLevel + 1]) - ++currentLevel; - Y_VERIFY(size < CompactLevelBorder[currentLevel]); + } + ui32 currentLevel = 0; + for (ui32 p = 0; p < HeadKeys.size(); ++p) { + ui32 size = HeadKeys[p].Size; + while (currentLevel + 1 < TotalLevels && size < CompactLevelBorder[currentLevel + 1]) + ++currentLevel; + Y_VERIFY(size < CompactLevelBorder[currentLevel]); TABLER() { TABLED() {out << "DataHead[" << currentLevel << "]";} TABLED() {out << i++;} @@ -612,10 +612,10 @@ void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorCo TABLED() {out << HeadKeys[p].Key.GetInternalPartsCount();} TABLED() {out << size;} } - } + } } } - + TABLE_SORTABLE_CLASS("table") { TABLEHEAD() { TABLER() { @@ -625,29 +625,29 @@ void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorCo TABLEH() {out << "id";} } } - ui32 i = 0; + ui32 i = 0; TABLEBODY() { - for (auto& d: GapOffsets) { + for (auto& d: GapOffsets) { TABLER() { TABLED() {out << d.first;} TABLED() {out << d.second;} TABLED() {out << (d.second - d.first);} TABLED() {out << (i++);} } - } - if (!DataKeysBody.empty() && DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount() < Head.Offset) { + } + if (!DataKeysBody.empty() && DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount() < Head.Offset) { TABLER() { TABLED() {out << (DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount());} TABLED() {out << Head.Offset;} TABLED() {out << (Head.Offset - (DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount()));} TABLED() {out << (i++);} } - - } + + } } } - - + + TABLE_SORTABLE_CLASS("table") { TABLEHEAD() { TABLER() { @@ -671,7 +671,7 @@ void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorCo TABLED() {out << (sourceIdInfo.Explicit ? "true" : "false");} TABLED() {out << sourceIdInfo.State;} } - } + } } } TABLE_SORTABLE_CLASS("table") { @@ -680,15 +680,15 @@ void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorCo TABLEH() {out << "user";} TABLEH() {out << "offset";} TABLEH() {out << "lag";} - TABLEH() {out << "ReadFromTimestamp";} + TABLEH() {out << "ReadFromTimestamp";} TABLEH() {out << "WriteTimestamp";} TABLEH() {out << "CreateTimestamp";} - TABLEH() {out << "ReadOffset";} - TABLEH() {out << "ReadWriteTimestamp";} - TABLEH() {out << "ReadCreateTimestamp";} + TABLEH() {out << "ReadOffset";} + TABLEH() {out << "ReadWriteTimestamp";} + TABLEH() {out << "ReadCreateTimestamp";} TABLEH() {out << "ReadOffsetRewindSum";} - TABLEH() {out << "ActiveReads";} - TABLEH() {out << "Subscriptions";} + TABLEH() {out << "ActiveReads";} + TABLEH() {out << "Subscriptions";} } } TABLEBODY() { @@ -700,65 +700,65 @@ void TPartition::HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorCo TABLED() {out << ToStringLocalTimeUpToSeconds(d.second.ReadFromTimestamp);} TABLED() {out << ToStringLocalTimeUpToSeconds(d.second.WriteTimestamp);} TABLED() {out << ToStringLocalTimeUpToSeconds(d.second.CreateTimestamp);} - TABLED() {out << (d.second.GetReadOffset());} + TABLED() {out << (d.second.GetReadOffset());} TABLED() {out << ToStringLocalTimeUpToSeconds(d.second.GetReadWriteTimestamp());} TABLED() {out << ToStringLocalTimeUpToSeconds(d.second.GetReadCreateTimestamp());} TABLED() {out << (d.second.ReadOffsetRewindSum);} - TABLED() {out << d.second.ActiveReads;} - TABLED() {out << d.second.Subscriptions;} + TABLED() {out << d.second.ActiveReads;} + TABLED() {out << d.second.Subscriptions;} } - } + } } } } } - - ctx.Send(ev->Sender, new TEvPQ::TEvMonResponse(Partition, res, out.Str())); -} - - -void TPartition::Bootstrap(const TActorContext& ctx) -{ - + + ctx.Send(ev->Sender, new TEvPQ::TEvMonResponse(Partition, res, out.Str())); +} + + +void TPartition::Bootstrap(const TActorContext& ctx) +{ + UsersInfoStorage.Init(Tablet, SelfId()); - Y_VERIFY(AppData(ctx)->PQConfig.GetMaxBlobsPerLevel() > 0); - ui32 border = LEVEL0; - MaxSizeCheck = 0; - MaxBlobSize = AppData(ctx)->PQConfig.GetMaxBlobSize(); - PartitionedBlob = TPartitionedBlob(Partition, 0, 0, 0, 0, 0, Head, NewHead, true, false, MaxBlobSize); - for (ui32 i = 0; i < TotalLevels; ++i) { - CompactLevelBorder.push_back(border); - MaxSizeCheck += border; - Y_VERIFY(i + 1 < TotalLevels && border < MaxBlobSize || i + 1 == TotalLevels && border == MaxBlobSize); - border *= AppData(ctx)->PQConfig.GetMaxBlobsPerLevel(); - border = Min(border, MaxBlobSize); - } - TotalMaxCount = AppData(ctx)->PQConfig.GetMaxBlobsPerLevel() * TotalLevels; - - std::reverse(CompactLevelBorder.begin(), CompactLevelBorder.end()); - - - for (ui32 i = 0; i < TotalLevels; ++i) { - DataKeysHead.push_back(TKeyLevel(CompactLevelBorder[i])); - } - - for (const auto& readQuota : Config.GetPartitionConfig().GetReadQuota()) { + Y_VERIFY(AppData(ctx)->PQConfig.GetMaxBlobsPerLevel() > 0); + ui32 border = LEVEL0; + MaxSizeCheck = 0; + MaxBlobSize = AppData(ctx)->PQConfig.GetMaxBlobSize(); + PartitionedBlob = TPartitionedBlob(Partition, 0, 0, 0, 0, 0, Head, NewHead, true, false, MaxBlobSize); + for (ui32 i = 0; i < TotalLevels; ++i) { + CompactLevelBorder.push_back(border); + MaxSizeCheck += border; + Y_VERIFY(i + 1 < TotalLevels && border < MaxBlobSize || i + 1 == TotalLevels && border == MaxBlobSize); + border *= AppData(ctx)->PQConfig.GetMaxBlobsPerLevel(); + border = Min(border, MaxBlobSize); + } + TotalMaxCount = AppData(ctx)->PQConfig.GetMaxBlobsPerLevel() * TotalLevels; + + std::reverse(CompactLevelBorder.begin(), CompactLevelBorder.end()); + + + for (ui32 i = 0; i < TotalLevels; ++i) { + DataKeysHead.push_back(TKeyLevel(CompactLevelBorder[i])); + } + + for (const auto& readQuota : Config.GetPartitionConfig().GetReadQuota()) { auto &userInfo = UsersInfoStorage.GetOrCreate(readQuota.GetClientId(), ctx); userInfo.ReadQuota.UpdateConfig(readQuota.GetBurstSize(), readQuota.GetSpeedInBytesPerSecond()); - } - - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "boostrapping " << Partition << " " << ctx.SelfID); - - if (NewPartition) { - InitComplete(ctx); - } else { - Y_VERIFY(InitState == WaitDiskStatus); - RequestDiskStatus(ctx, Tablet, Config.GetPartitionConfig().GetNumChannels()); - Become(&TThis::StateInit); - } - - if (AppData(ctx)->Counters) { + } + + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "boostrapping " << Partition << " " << ctx.SelfID); + + if (NewPartition) { + InitComplete(ctx); + } else { + Y_VERIFY(InitState == WaitDiskStatus); + RequestDiskStatus(ctx, Tablet, Config.GetPartitionConfig().GetNumChannels()); + Become(&TThis::StateInit); + } + + if (AppData(ctx)->Counters) { TVector<NPQ::TLabelsInfo> labels; if (AppData()->PQConfig.GetTopicsAreFirstClassCitizen()) { SetupStreamCounters(ctx); @@ -769,18 +769,18 @@ void TPartition::Bootstrap(const TActorContext& ctx) } } } - + void TPartition::SetupTopicCounters(const TActorContext& ctx) { auto counters = AppData(ctx)->Counters; auto labels = NKikimr::NPQ::GetLabels(TopicName); const TString suffix = LocalDC ? "Original" : "Mirrored"; - + WriteBufferIsFullCounter.SetCounter( GetCounters(counters, "writingTime", TopicName), {{"host", DCId}, {"Partition", ToString<ui32>(Partition)}}, {"sensor", "BufferFullTime" + suffix, true}); - + InputTimeLag = THolder<NKikimr::NPQ::TPercentileCounter>(new NKikimr::NPQ::TPercentileCounter( GetServiceCounters(counters, "pqproxy|writeTimeLag"), GetLabels(TopicName), {{"sensor", "TimeLags" + suffix}}, "Interval", @@ -788,8 +788,8 @@ void TPartition::SetupTopicCounters(const TActorContext& ctx) { {100, "100ms"}, {200, "200ms"}, {500, "500ms"}, {1000, "1000ms"}, {2000, "2000ms"}, {5000, "5000ms"}, {10'000, "10000ms"}, {30'000, "30000ms"}, {60'000, "60000ms"}, {180'000,"180000ms"}, {9'999'999, "999999ms"}}, true)); - - + + MessageSize = THolder<NKikimr::NPQ::TPercentileCounter>(new NKikimr::NPQ::TPercentileCounter( GetServiceCounters(counters, "pqproxy|writeInfo"), GetLabels(TopicName), {{"sensor", "MessageSize" + suffix}}, "Size", @@ -798,15 +798,15 @@ void TPartition::SetupTopicCounters(const TActorContext& ctx) { {20'480, "20kb"}, {51'200, "50kb"}, {102'400, "100kb"}, {204'800, "200kb"}, {524'288, "512kb"},{1'048'576, "1024kb"}, {2'097'152,"2048kb"}, {5'242'880, "5120kb"}, {10'485'760, "10240kb"}, {67'108'864, "65536kb"}, {999'999'999, "99999999kb"}}, true)); - + BytesWritten = NKikimr::NPQ::TMultiCounter(GetServiceCounters(counters, "pqproxy|writeSession"), GetLabels(TopicName), {}, {"BytesWritten" + suffix}, true); BytesWrittenUncompressed = NKikimr::NPQ::TMultiCounter(GetServiceCounters(counters, "pqproxy|writeSession"), GetLabels(TopicName), {}, {"UncompressedBytesWritten" + suffix}, true); - + BytesWrittenComp = NKikimr::NPQ::TMultiCounter(GetServiceCounters(counters, "pqproxy|writeSession"), GetLabels(TopicName), {}, {"CompactedBytesWritten" + suffix}, true); - + MsgsWritten = NKikimr::NPQ::TMultiCounter(GetServiceCounters(counters, "pqproxy|writeSession"), GetLabels(TopicName), {}, {"MessagesWritten" + suffix}, true); @@ -829,7 +829,7 @@ void TPartition::SetupTopicCounters(const TActorContext& ctx) { {1000, "1000ms"}, {2500, "2500ms"}, {5000, "5000ms"}, {10'000, "10000ms"}, {9'999'999, "999999ms"}}, true)); } - + PartitionWriteQuotaWaitCounter = THolder<NKikimr::NPQ::TPercentileCounter>( new NKikimr::NPQ::TPercentileCounter(GetServiceCounters(counters, "pqproxy|partitionWriteQuotaWait"), GetLabels(TopicName), {{"sensor", "PartitionWriteQuotaWait" + suffix}}, "Interval", @@ -839,7 +839,7 @@ void TPartition::SetupTopicCounters(const TActorContext& ctx) { {1000, "1000ms"}, {2500, "2500ms"}, {5000, "5000ms"}, {10'000, "10000ms"}, {9'999'999, "999999ms"}}, true)); } - + void TPartition::SetupStreamCounters(const TActorContext& ctx) { auto counters = AppData(ctx)->Counters; auto labels = NKikimr::NPQ::GetLabelsForStream(TopicName, CloudId, DbId, FolderId); @@ -904,7 +904,7 @@ void TPartition::SetupStreamCounters(const TActorContext& ctx) { {20, "20"}, {50, "50"}, {100, "100"}, {500, "500"}, {1000, "1000"}, {2500, "2500"}, {5000, "5000"}, {10'000, "10000"}, {9'999'999, "999999"}}, true)); - } + } PartitionWriteQuotaWaitCounter = THolder<NKikimr::NPQ::TPercentileCounter>( new NKikimr::NPQ::TPercentileCounter( @@ -915,169 +915,169 @@ void TPartition::SetupStreamCounters(const TActorContext& ctx) { {20, "20"}, {50, "50"}, {100, "100"}, {500, "500"}, {1000, "1000"}, {2500, "2500"}, {5000, "5000"}, {10'000, "10000"}, {9'999'999, "999999"}}, true)); -} - -void TPartition::ProcessHasDataRequests(const TActorContext& ctx) { - if (!InitDone) - return; - for (auto it = HasDataRequests.begin(); it != HasDataRequests.end();) { - if (it->Offset < EndOffset) { - TAutoPtr<TEvPersQueue::TEvHasDataInfoResponse> res(new TEvPersQueue::TEvHasDataInfoResponse()); - res->Record.SetEndOffset(EndOffset); - res->Record.SetSizeLag(GetSizeLag(it->Offset)); - res->Record.SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); - if (it->Cookie) - res->Record.SetCookie(*(it->Cookie)); - ctx.Send(it->Sender, res.Release()); - if (!it->ClientId.empty()) { +} + +void TPartition::ProcessHasDataRequests(const TActorContext& ctx) { + if (!InitDone) + return; + for (auto it = HasDataRequests.begin(); it != HasDataRequests.end();) { + if (it->Offset < EndOffset) { + TAutoPtr<TEvPersQueue::TEvHasDataInfoResponse> res(new TEvPersQueue::TEvHasDataInfoResponse()); + res->Record.SetEndOffset(EndOffset); + res->Record.SetSizeLag(GetSizeLag(it->Offset)); + res->Record.SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); + if (it->Cookie) + res->Record.SetCookie(*(it->Cookie)); + ctx.Send(it->Sender, res.Release()); + if (!it->ClientId.empty()) { auto& userInfo = UsersInfoStorage.GetOrCreate(it->ClientId, ctx); userInfo.ForgetSubscription(ctx.Now()); - } - it = HasDataRequests.erase(it); - } else { - break; - } - } - for (auto it = HasDataDeadlines.begin(); it != HasDataDeadlines.end();) { + } + it = HasDataRequests.erase(it); + } else { + break; + } + } + for (auto it = HasDataDeadlines.begin(); it != HasDataDeadlines.end();) { if (it->Deadline <= ctx.Now()) { - auto jt = HasDataRequests.find(it->Request); - if (jt != HasDataRequests.end()) { - TAutoPtr<TEvPersQueue::TEvHasDataInfoResponse> res(new TEvPersQueue::TEvHasDataInfoResponse()); - res->Record.SetEndOffset(EndOffset); - res->Record.SetSizeLag(0); - res->Record.SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); - if (it->Request.Cookie) - res->Record.SetCookie(*(it->Request.Cookie)); - ctx.Send(it->Request.Sender, res.Release()); - if (!it->Request.ClientId.empty()) { + auto jt = HasDataRequests.find(it->Request); + if (jt != HasDataRequests.end()) { + TAutoPtr<TEvPersQueue::TEvHasDataInfoResponse> res(new TEvPersQueue::TEvHasDataInfoResponse()); + res->Record.SetEndOffset(EndOffset); + res->Record.SetSizeLag(0); + res->Record.SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); + if (it->Request.Cookie) + res->Record.SetCookie(*(it->Request.Cookie)); + ctx.Send(it->Request.Sender, res.Release()); + if (!it->Request.ClientId.empty()) { auto& userInfo = UsersInfoStorage.GetOrCreate(it->Request.ClientId, ctx); userInfo.ForgetSubscription(ctx.Now()); - } - HasDataRequests.erase(jt); - } - it = HasDataDeadlines.erase(it); - } else { - break; - } - } -} - - -void TPartition::UpdateAvailableSize(const TActorContext& ctx) { - - FilterDeadlinedWrites(ctx); - - auto now = ctx.Now(); - WriteQuota.Update(now); + } + HasDataRequests.erase(jt); + } + it = HasDataDeadlines.erase(it); + } else { + break; + } + } +} + + +void TPartition::UpdateAvailableSize(const TActorContext& ctx) { + + FilterDeadlinedWrites(ctx); + + auto now = ctx.Now(); + WriteQuota.Update(now); for (auto &c : UsersInfoStorage.GetAll()) { - while (true) { - c.second.ReadQuota.Update(now); - if (!c.second.ReadQuota.CanExaust() && !c.second.ReadRequests.empty()) { - break; - } - if (!c.second.ReadRequests.empty()) { - auto ri(std::move(c.second.ReadRequests.front().first)); - auto cookie = c.second.ReadRequests.front().second; - c.second.ReadRequests.pop_front(); - ProcessRead(ctx, std::move(ri), cookie, false); - } else - break; - } - } - ScheduleUpdateAvailableSize(ctx); - ReportLabeledCounters(ctx); -} - -void TPartition::HandleOnIdle(TEvPQ::TEvUpdateAvailableSize::TPtr&, const TActorContext& ctx) { - UpdateAvailableSize(ctx); - HandleWrites(ctx); -} - -void TPartition::HandleOnWrite(TEvPQ::TEvUpdateAvailableSize::TPtr&, const TActorContext& ctx) { - UpdateAvailableSize(ctx); -} - - -void TPartition::HandleWakeup(const TActorContext& ctx) { - FilterDeadlinedWrites(ctx); - + while (true) { + c.second.ReadQuota.Update(now); + if (!c.second.ReadQuota.CanExaust() && !c.second.ReadRequests.empty()) { + break; + } + if (!c.second.ReadRequests.empty()) { + auto ri(std::move(c.second.ReadRequests.front().first)); + auto cookie = c.second.ReadRequests.front().second; + c.second.ReadRequests.pop_front(); + ProcessRead(ctx, std::move(ri), cookie, false); + } else + break; + } + } + ScheduleUpdateAvailableSize(ctx); + ReportLabeledCounters(ctx); +} + +void TPartition::HandleOnIdle(TEvPQ::TEvUpdateAvailableSize::TPtr&, const TActorContext& ctx) { + UpdateAvailableSize(ctx); + HandleWrites(ctx); +} + +void TPartition::HandleOnWrite(TEvPQ::TEvUpdateAvailableSize::TPtr&, const TActorContext& ctx) { + UpdateAvailableSize(ctx); +} + + +void TPartition::HandleWakeup(const TActorContext& ctx) { + FilterDeadlinedWrites(ctx); + ctx.Schedule(WAKE_TIMEOUT, new TEvents::TEvWakeup()); - ctx.Send(Tablet, new TEvPQ::TEvPartitionCounters(Partition, Counters)); - - ReportLabeledCounters(ctx); - - ProcessHasDataRequests(ctx); - - auto now = ctx.Now(); + ctx.Send(Tablet, new TEvPQ::TEvPartitionCounters(Partition, Counters)); + + ReportLabeledCounters(ctx); + + ProcessHasDataRequests(ctx); + + auto now = ctx.Now(); for (auto& userInfo : UsersInfoStorage.GetAll()) { userInfo.second.UpdateReadingTimeAndState(now); for (auto& avg : userInfo.second.AvgReadBytes) { - avg.Update(now); - } - } - - WriteBufferIsFullCounter.UpdateWorkingTime(now); - - WriteLagMs.Update(0, now); - - for (auto& avg : AvgWriteBytes) { - avg.Update(now); - } - for (auto& avg : AvgQuotaBytes) { - avg.Update(now); - } - - if (CurrentStateFunc() == &TThis::StateWrite) {//Write will handle all itself - return; - } - Y_VERIFY(CurrentStateFunc() == &TThis::StateIdle); + avg.Update(now); + } + } + + WriteBufferIsFullCounter.UpdateWorkingTime(now); + + WriteLagMs.Update(0, now); + + for (auto& avg : AvgWriteBytes) { + avg.Update(now); + } + for (auto& avg : AvgQuotaBytes) { + avg.Update(now); + } + + if (CurrentStateFunc() == &TThis::StateWrite) {//Write will handle all itself + return; + } + Y_VERIFY(CurrentStateFunc() == &TThis::StateIdle); if (ManageWriteTimestampEstimate) - WriteTimestampEstimate = now; - + WriteTimestampEstimate = now; + THolder <TEvKeyValue::TEvRequest> request = MakeHolder<TEvKeyValue::TEvRequest>(); - bool haveChanges = DropOldStuff(request.Get(), false, ctx); - if (DiskIsFull) { - AddCheckDiskRequest(request.Get(), Config.GetPartitionConfig().GetNumChannels()); - haveChanges = true; - } - - if (haveChanges) { - WriteCycleStartTime = ctx.Now(); - WriteStartTime = ctx.Now(); - TopicQuotaWaitTimeForCurrentBlob = TDuration::Zero(); - WritesTotal.Inc(); - Become(&TThis::StateWrite); - AddMetaKey(request.Get()); - ctx.Send(Tablet, request.Release()); - } -} - -void TPartition::AddMetaKey(TEvKeyValue::TEvRequest* request) { - //Set Start Offset - auto write = request->Record.AddCmdWrite(); - TKeyPrefix ikey(TKeyPrefix::TypeMeta, Partition); - - NKikimrPQ::TPartitionMeta meta; - meta.SetStartOffset(StartOffset); - meta.SetEndOffset(Max(NewHead.GetNextOffset(), EndOffset)); - - TString out; + bool haveChanges = DropOldStuff(request.Get(), false, ctx); + if (DiskIsFull) { + AddCheckDiskRequest(request.Get(), Config.GetPartitionConfig().GetNumChannels()); + haveChanges = true; + } + + if (haveChanges) { + WriteCycleStartTime = ctx.Now(); + WriteStartTime = ctx.Now(); + TopicQuotaWaitTimeForCurrentBlob = TDuration::Zero(); + WritesTotal.Inc(); + Become(&TThis::StateWrite); + AddMetaKey(request.Get()); + ctx.Send(Tablet, request.Release()); + } +} + +void TPartition::AddMetaKey(TEvKeyValue::TEvRequest* request) { + //Set Start Offset + auto write = request->Record.AddCmdWrite(); + TKeyPrefix ikey(TKeyPrefix::TypeMeta, Partition); + + NKikimrPQ::TPartitionMeta meta; + meta.SetStartOffset(StartOffset); + meta.SetEndOffset(Max(NewHead.GetNextOffset(), EndOffset)); + + TString out; Y_PROTOBUF_SUPPRESS_NODISCARD meta.SerializeToString(&out); - - write->SetKey(ikey.Data(), ikey.Size()); - write->SetValue(out.c_str(), out.size()); - write->SetStorageChannel(NKikimrClient::TKeyValueRequest::INLINE); - -} - -bool TPartition::DropOldStuff(TEvKeyValue::TEvRequest* request, bool hasWrites, const TActorContext& ctx) { + + write->SetKey(ikey.Data(), ikey.Size()); + write->SetValue(out.c_str(), out.size()); + write->SetStorageChannel(NKikimrClient::TKeyValueRequest::INLINE); + +} + +bool TPartition::DropOldStuff(TEvKeyValue::TEvRequest* request, bool hasWrites, const TActorContext& ctx) { bool haveChanges = false; - if (DropOldData(request, hasWrites, ctx)) + if (DropOldData(request, hasWrites, ctx)) haveChanges = true; LOG_DEBUG(ctx, NKikimrServices::PERSQUEUE, TStringBuilder() << "Have " << request->Record.CmdDeleteRangeSize() << " items to delete old stuff"); - if (SourceIdStorage.DropOldSourceIds(request, ctx.Now(), StartOffset, Partition, Config.GetPartitionConfig())) { - haveChanges = true; + if (SourceIdStorage.DropOldSourceIds(request, ctx.Now(), StartOffset, Partition, Config.GetPartitionConfig())) { + haveChanges = true; SourceIdStorage.MarkOwnersForDeletedSourceId(Owners); } LOG_DEBUG(ctx, NKikimrServices::PERSQUEUE, TStringBuilder() << "Have " << request->Record.CmdDeleteRangeSize() << " items to delete all stuff"); @@ -1085,127 +1085,127 @@ bool TPartition::DropOldStuff(TEvKeyValue::TEvRequest* request, bool hasWrites, return haveChanges; } - -bool TPartition::DropOldData(TEvKeyValue::TEvRequest *request, bool hasWrites, const TActorContext& ctx) { - if (StartOffset == EndOffset) - return false; - if (DataKeysBody.size() <= 1) - return false; - - ui64 minOffset = EndOffset; - for (const auto& importantClientId : Config.GetPartitionConfig().GetImportantClientId()) { + +bool TPartition::DropOldData(TEvKeyValue::TEvRequest *request, bool hasWrites, const TActorContext& ctx) { + if (StartOffset == EndOffset) + return false; + if (DataKeysBody.size() <= 1) + return false; + + ui64 minOffset = EndOffset; + for (const auto& importantClientId : Config.GetPartitionConfig().GetImportantClientId()) { TUserInfo* userInfo = UsersInfoStorage.GetIfExists(importantClientId); - ui64 curOffset = StartOffset; + ui64 curOffset = StartOffset; if (userInfo && userInfo->Offset >= 0) //-1 means no offset curOffset = userInfo->Offset; - minOffset = Min<ui64>(minOffset, curOffset); - } - - bool hasDrop = false; - ui64 endOffset = StartOffset; - - if (DataKeysBody.size() > 1) { - - while (DataKeysBody.size() > 1 && ctx.Now() >= DataKeysBody.front().Timestamp + TDuration::Seconds(Config.GetPartitionConfig().GetLifetimeSeconds()) - && (minOffset > DataKeysBody[1].Key.GetOffset() || minOffset == DataKeysBody[1].Key.GetOffset() && DataKeysBody[1].Key.GetPartNo() == 0)) {//all offsets from blob[0] are readed, and don't delete last blob - BodySize -= DataKeysBody.front().Size; - - DataKeysBody.pop_front(); - if (!GapOffsets.empty() && !DataKeysBody.empty() && DataKeysBody.front().Key.GetOffset() == GapOffsets.front().second) { - GapSize -= GapOffsets.front().second - GapOffsets.front().first; - GapOffsets.pop_front(); - } - hasDrop = true; - } - Y_VERIFY(!DataKeysBody.empty()); - - endOffset = DataKeysBody.front().Key.GetOffset(); - if (DataKeysBody.front().Key.GetPartNo() > 0) ++endOffset; - - } - - TDataKey lastKey = HeadKeys.empty() ? DataKeysBody.back() : HeadKeys.back(); - - if (!hasWrites && ctx.Now() >= lastKey.Timestamp + TDuration::Seconds(Config.GetPartitionConfig().GetLifetimeSeconds()) && minOffset == EndOffset && false) { // disable drop of all data - Y_VERIFY(!HeadKeys.empty() || !DataKeysBody.empty()); - - Y_VERIFY(CompactedKeys.empty()); - Y_VERIFY(NewHead.PackedSize == 0); - Y_VERIFY(NewHeadKey.Size == 0); - - Y_VERIFY(EndOffset == Head.GetNextOffset()); - Y_VERIFY(EndOffset == NewHead.GetNextOffset() || NewHead.GetNextOffset() == 0); - - hasDrop = true; - - BodySize = 0; - DataKeysBody.clear(); - GapSize = 0; - GapOffsets.clear(); - - for (ui32 i = 0; i < TotalLevels; ++i) { - DataKeysHead[i].Clear(); - } - HeadKeys.clear(); - Head.Clear(); - Head.Offset = EndOffset; - NewHead.Clear(); - NewHead.Offset = EndOffset; - endOffset = EndOffset; - } else { - if (hasDrop) { - lastKey = DataKeysBody.front(); - } - } - - if (!hasDrop) - return false; - - StartOffset = endOffset; - - TKey key(TKeyPrefix::TypeData, Partition, 0, 0, 0, 0); //will drop all that could not be dropped before of case of full disks - - auto del = request->Record.AddCmdDeleteRange(); - auto range = del->MutableRange(); - range->SetFrom(key.Data(), key.Size()); - range->SetIncludeFrom(true); - range->SetTo(lastKey.Key.Data(), lastKey.Key.Size()); - range->SetIncludeTo(StartOffset == EndOffset); - - return true; -} - -void TPartition::Handle(TEvPersQueue::TEvHasDataInfo::TPtr& ev, const TActorContext& ctx) { - auto& record = ev->Get()->Record; - Y_VERIFY(record.HasSender()); - + minOffset = Min<ui64>(minOffset, curOffset); + } + + bool hasDrop = false; + ui64 endOffset = StartOffset; + + if (DataKeysBody.size() > 1) { + + while (DataKeysBody.size() > 1 && ctx.Now() >= DataKeysBody.front().Timestamp + TDuration::Seconds(Config.GetPartitionConfig().GetLifetimeSeconds()) + && (minOffset > DataKeysBody[1].Key.GetOffset() || minOffset == DataKeysBody[1].Key.GetOffset() && DataKeysBody[1].Key.GetPartNo() == 0)) {//all offsets from blob[0] are readed, and don't delete last blob + BodySize -= DataKeysBody.front().Size; + + DataKeysBody.pop_front(); + if (!GapOffsets.empty() && !DataKeysBody.empty() && DataKeysBody.front().Key.GetOffset() == GapOffsets.front().second) { + GapSize -= GapOffsets.front().second - GapOffsets.front().first; + GapOffsets.pop_front(); + } + hasDrop = true; + } + Y_VERIFY(!DataKeysBody.empty()); + + endOffset = DataKeysBody.front().Key.GetOffset(); + if (DataKeysBody.front().Key.GetPartNo() > 0) ++endOffset; + + } + + TDataKey lastKey = HeadKeys.empty() ? DataKeysBody.back() : HeadKeys.back(); + + if (!hasWrites && ctx.Now() >= lastKey.Timestamp + TDuration::Seconds(Config.GetPartitionConfig().GetLifetimeSeconds()) && minOffset == EndOffset && false) { // disable drop of all data + Y_VERIFY(!HeadKeys.empty() || !DataKeysBody.empty()); + + Y_VERIFY(CompactedKeys.empty()); + Y_VERIFY(NewHead.PackedSize == 0); + Y_VERIFY(NewHeadKey.Size == 0); + + Y_VERIFY(EndOffset == Head.GetNextOffset()); + Y_VERIFY(EndOffset == NewHead.GetNextOffset() || NewHead.GetNextOffset() == 0); + + hasDrop = true; + + BodySize = 0; + DataKeysBody.clear(); + GapSize = 0; + GapOffsets.clear(); + + for (ui32 i = 0; i < TotalLevels; ++i) { + DataKeysHead[i].Clear(); + } + HeadKeys.clear(); + Head.Clear(); + Head.Offset = EndOffset; + NewHead.Clear(); + NewHead.Offset = EndOffset; + endOffset = EndOffset; + } else { + if (hasDrop) { + lastKey = DataKeysBody.front(); + } + } + + if (!hasDrop) + return false; + + StartOffset = endOffset; + + TKey key(TKeyPrefix::TypeData, Partition, 0, 0, 0, 0); //will drop all that could not be dropped before of case of full disks + + auto del = request->Record.AddCmdDeleteRange(); + auto range = del->MutableRange(); + range->SetFrom(key.Data(), key.Size()); + range->SetIncludeFrom(true); + range->SetTo(lastKey.Key.Data(), lastKey.Key.Size()); + range->SetIncludeTo(StartOffset == EndOffset); + + return true; +} + +void TPartition::Handle(TEvPersQueue::TEvHasDataInfo::TPtr& ev, const TActorContext& ctx) { + auto& record = ev->Get()->Record; + Y_VERIFY(record.HasSender()); + TActorId sender = ActorIdFromProto(record.GetSender()); - if (InitDone && EndOffset > (ui64)record.GetOffset()) { //already has data, answer right now - TAutoPtr<TEvPersQueue::TEvHasDataInfoResponse> res(new TEvPersQueue::TEvHasDataInfoResponse()); - res->Record.SetEndOffset(EndOffset); - res->Record.SetSizeLag(GetSizeLag(record.GetOffset())); - res->Record.SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); - if (record.HasCookie()) - res->Record.SetCookie(record.GetCookie()); - ctx.Send(sender, res.Release()); - return; - } else { - THasDataReq req{++HasDataReqNum, (ui64)record.GetOffset(), sender, record.HasCookie() ? TMaybe<ui64>(record.GetCookie()) : TMaybe<ui64>(), - record.HasClientId() && InitDone ? record.GetClientId() : ""}; + if (InitDone && EndOffset > (ui64)record.GetOffset()) { //already has data, answer right now + TAutoPtr<TEvPersQueue::TEvHasDataInfoResponse> res(new TEvPersQueue::TEvHasDataInfoResponse()); + res->Record.SetEndOffset(EndOffset); + res->Record.SetSizeLag(GetSizeLag(record.GetOffset())); + res->Record.SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); + if (record.HasCookie()) + res->Record.SetCookie(record.GetCookie()); + ctx.Send(sender, res.Release()); + return; + } else { + THasDataReq req{++HasDataReqNum, (ui64)record.GetOffset(), sender, record.HasCookie() ? TMaybe<ui64>(record.GetCookie()) : TMaybe<ui64>(), + record.HasClientId() && InitDone ? record.GetClientId() : ""}; THasDataDeadline dl{TInstant::MilliSeconds(record.GetDeadline()), req}; - auto res = HasDataRequests.insert(req); - HasDataDeadlines.insert(dl); - Y_VERIFY(res.second); - - if (InitDone && record.HasClientId() && !record.GetClientId().empty()) { + auto res = HasDataRequests.insert(req); + HasDataDeadlines.insert(dl); + Y_VERIFY(res.second); + + if (InitDone && record.HasClientId() && !record.GetClientId().empty()) { auto& userInfo = UsersInfoStorage.GetOrCreate(record.GetClientId(), ctx); ++userInfo.Subscriptions; userInfo.UpdateReadOffset((i64)EndOffset - 1, ctx.Now(), ctx.Now(), ctx.Now()); userInfo.UpdateReadingTimeAndState(ctx.Now()); - } - } -} - + } + } +} + void TPartition::Handle(TEvPQ::TEvMirrorerCounters::TPtr& ev, const TActorContext& /*ctx*/) { if (Mirrorer) { auto diff = ev->Get()->Counters.MakeDiffForAggr(Mirrorer->Baseline); @@ -1231,21 +1231,21 @@ void TPartition::Handle(TEvents::TEvPoisonPill::TPtr&, const TActorContext& ctx) TStringBuilder ss; ss << "Tablet is restarting, topic '" << TopicName << "'"; - for (const auto& ev : WaitToChangeOwner) { - ReplyError(ctx, ev->Cookie, NPersQueue::NErrorCode::INITIALIZING, ss); + for (const auto& ev : WaitToChangeOwner) { + ReplyError(ctx, ev->Cookie, NPersQueue::NErrorCode::INITIALIZING, ss); } - for (const auto& w : Requests) { - ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::INITIALIZING, ss); + for (const auto& w : Requests) { + ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::INITIALIZING, ss); } - for (const auto& wr : Responses) { - ReplyError(ctx, wr.GetCookie(), NPersQueue::NErrorCode::INITIALIZING, TStringBuilder() << ss << " (WriteResponses)"); + for (const auto& wr : Responses) { + ReplyError(ctx, wr.GetCookie(), NPersQueue::NErrorCode::INITIALIZING, TStringBuilder() << ss << " (WriteResponses)"); } for (const auto& ri : ReadInfo) { - ReplyError(ctx, ri.second.Destination, NPersQueue::NErrorCode::INITIALIZING, - TStringBuilder() << ss << " (ReadInfo) cookie " << ri.first); + ReplyError(ctx, ri.second.Destination, NPersQueue::NErrorCode::INITIALIZING, + TStringBuilder() << ss << " (ReadInfo) cookie " << ri.first); } if (Mirrorer) { @@ -1253,522 +1253,522 @@ void TPartition::Handle(TEvents::TEvPoisonPill::TPtr&, const TActorContext& ctx) } UsersInfoStorage.Clear(ctx); - Die(ctx); -} - -void TPartition::CancelAllWritesOnIdle(const TActorContext& ctx) -{ - for (const auto& w : Requests) { - ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::WRITE_ERROR_DISK_IS_FULL, "Disk is full"); + Die(ctx); +} + +void TPartition::CancelAllWritesOnIdle(const TActorContext& ctx) +{ + for (const auto& w : Requests) { + ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::WRITE_ERROR_DISK_IS_FULL, "Disk is full"); if (w.IsWrite()) { const auto& msg = w.GetWrite().Msg; - Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); + Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(msg.Data.size() + msg.SourceId.size()); WriteInflightSize -= msg.Data.size(); - } - } - - UpdateWriteBufferIsFullState(ctx.Now()); - - Requests.clear(); - - Y_VERIFY(Responses.empty()); - - ProcessReserveRequests(ctx); -} - - -void TPartition::FailBadClient(const TActorContext& ctx) -{ - for (auto it = Owners.begin(); it != Owners.end();) { - it = DropOwner(it, ctx); - } - Y_VERIFY(Owners.empty()); - Y_VERIFY(ReservedSize == 0); - - for (const auto& w : Requests) { - ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::BAD_REQUEST, "previous write request failed"); + } + } + + UpdateWriteBufferIsFullState(ctx.Now()); + + Requests.clear(); + + Y_VERIFY(Responses.empty()); + + ProcessReserveRequests(ctx); +} + + +void TPartition::FailBadClient(const TActorContext& ctx) +{ + for (auto it = Owners.begin(); it != Owners.end();) { + it = DropOwner(it, ctx); + } + Y_VERIFY(Owners.empty()); + Y_VERIFY(ReservedSize == 0); + + for (const auto& w : Requests) { + ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::BAD_REQUEST, "previous write request failed"); if (w.IsWrite()) { const auto& msg = w.GetWrite().Msg; - Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); + Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(msg.Data.size() + msg.SourceId.size()); WriteInflightSize -= msg.Data.size(); - } - } - UpdateWriteBufferIsFullState(ctx.Now()); - - Requests.clear(); - for (const auto& w : Responses) { - ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::BAD_REQUEST, "previous write request failed"); + } + } + UpdateWriteBufferIsFullState(ctx.Now()); + + Requests.clear(); + for (const auto& w : Responses) { + ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::BAD_REQUEST, "previous write request failed"); if (w.IsWrite()) - Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); - } - Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(WriteNewSize); - Responses.clear(); - - ProcessChangeOwnerRequests(ctx); - ProcessReserveRequests(ctx); -} - - + Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); + } + Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(WriteNewSize); + Responses.clear(); + + ProcessChangeOwnerRequests(ctx); + ProcessReserveRequests(ctx); +} + + bool CheckDiskStatus(const TStorageStatusFlags status) -{ +{ return !status.Check(NKikimrBlobStorage::StatusDiskSpaceLightYellowMove); -} - -void TPartition::HandleGetDiskStatus(const NKikimrClient::TResponse& response, const TActorContext& ctx) -{ - bool diskIsOk = true; - for (ui32 i = 0; i < response.GetStatusResultSize(); ++i) { - auto& res = response.GetGetStatusResult(i); - - if (res.GetStatus() != NKikimrProto::OK) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "commands for topic '" << TopicName << "' partition " << Partition << - " are not processed at all, got KV error in CmdGetStatus " << res.GetStatus()); - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - return; - } - diskIsOk = diskIsOk && CheckDiskStatus(res.GetStatusFlags()); - } - DiskIsFull = !diskIsOk; +} + +void TPartition::HandleGetDiskStatus(const NKikimrClient::TResponse& response, const TActorContext& ctx) +{ + bool diskIsOk = true; + for (ui32 i = 0; i < response.GetStatusResultSize(); ++i) { + auto& res = response.GetGetStatusResult(i); + + if (res.GetStatus() != NKikimrProto::OK) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "commands for topic '" << TopicName << "' partition " << Partition << + " are not processed at all, got KV error in CmdGetStatus " << res.GetStatus()); + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + return; + } + diskIsOk = diskIsOk && CheckDiskStatus(res.GetStatusFlags()); + } + DiskIsFull = !diskIsOk; if (DiskIsFull) { LogAndCollectError(NKikimrServices::PERSQUEUE, "disk is full", ctx); } - - InitState = WaitMetaRead; - RequestMetaRead(ctx, Tablet, Partition); -} - -void TPartition::HandleMetaRead(const NKikimrClient::TKeyValueResponse::TReadResult& response, const TActorContext& ctx) -{ - NKikimrPQ::TPartitionMeta meta; - switch (response.GetStatus()) { - case NKikimrProto::OK: { - bool res = meta.ParseFromString(response.GetValue()); - Y_VERIFY(res); - /* Bring back later, when switch to 21-2 will be unable - StartOffset = meta.GetStartOffset(); - EndOffset = meta.GetEndOffset(); - if (StartOffset == EndOffset) { - NewHead.Offset = Head.Offset = EndOffset; - } - */ - break; - } - case NKikimrProto::NODATA: - break; - case NKikimrProto::ERROR: - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "read topic '" << TopicName << "' partition " << Partition << " error"); - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - break; - default: - Cerr << "ERROR " << response.GetStatus() << "\n"; - Y_FAIL("bad status"); - }; - - InitState = WaitInfoRange; - RequestInfoRange(ctx, Tablet, Partition, ""); -} - - - + + InitState = WaitMetaRead; + RequestMetaRead(ctx, Tablet, Partition); +} + +void TPartition::HandleMetaRead(const NKikimrClient::TKeyValueResponse::TReadResult& response, const TActorContext& ctx) +{ + NKikimrPQ::TPartitionMeta meta; + switch (response.GetStatus()) { + case NKikimrProto::OK: { + bool res = meta.ParseFromString(response.GetValue()); + Y_VERIFY(res); + /* Bring back later, when switch to 21-2 will be unable + StartOffset = meta.GetStartOffset(); + EndOffset = meta.GetEndOffset(); + if (StartOffset == EndOffset) { + NewHead.Offset = Head.Offset = EndOffset; + } + */ + break; + } + case NKikimrProto::NODATA: + break; + case NKikimrProto::ERROR: + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "read topic '" << TopicName << "' partition " << Partition << " error"); + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + break; + default: + Cerr << "ERROR " << response.GetStatus() << "\n"; + Y_FAIL("bad status"); + }; + + InitState = WaitInfoRange; + RequestInfoRange(ctx, Tablet, Partition, ""); +} + + + void TPartition::HandleInfoRangeRead(const NKikimrClient::TKeyValueResponse::TReadRangeResult& range, const TActorContext& ctx) -{ - //megaqc check here all results - Y_VERIFY(range.HasStatus()); +{ + //megaqc check here all results + Y_VERIFY(range.HasStatus()); const TString *key = nullptr; - switch (range.GetStatus()) { - case NKikimrProto::OK: - case NKikimrProto::OVERRUN: - for (ui32 i = 0; i < range.PairSize(); ++i) { - const auto& pair = range.GetPair(i); - Y_VERIFY(pair.HasStatus()); - if (pair.GetStatus() != NKikimrProto::OK) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "read range error topic '" << TopicName << "' partition " << Partition - << " got status " << pair.GetStatus() << " for key " << (pair.HasKey() ? pair.GetKey() : "unknown")); - - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - return; - } - Y_VERIFY(pair.HasKey()); - Y_VERIFY(pair.HasValue()); - - key = &pair.GetKey(); - if ((*key)[TKeyPrefix::MarkPosition()] == TKeyPrefix::MarkSourceId) { + switch (range.GetStatus()) { + case NKikimrProto::OK: + case NKikimrProto::OVERRUN: + for (ui32 i = 0; i < range.PairSize(); ++i) { + const auto& pair = range.GetPair(i); + Y_VERIFY(pair.HasStatus()); + if (pair.GetStatus() != NKikimrProto::OK) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "read range error topic '" << TopicName << "' partition " << Partition + << " got status " << pair.GetStatus() << " for key " << (pair.HasKey() ? pair.GetKey() : "unknown")); + + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + return; + } + Y_VERIFY(pair.HasKey()); + Y_VERIFY(pair.HasValue()); + + key = &pair.GetKey(); + if ((*key)[TKeyPrefix::MarkPosition()] == TKeyPrefix::MarkSourceId) { SourceIdStorage.LoadSourceIdInfo(*key, pair.GetValue(), ctx.Now()); } else if ((*key)[TKeyPrefix::MarkPosition()] == TKeyPrefix::MarkProtoSourceId) { SourceIdStorage.LoadSourceIdInfo(*key, pair.GetValue(), ctx.Now()); - } else if ((*key)[TKeyPrefix::MarkPosition()] == TKeyPrefix::MarkUser) { + } else if ((*key)[TKeyPrefix::MarkPosition()] == TKeyPrefix::MarkUser) { UsersInfoStorage.Parse(*key, pair.GetValue(), ctx); } else if ((*key)[TKeyPrefix::MarkPosition()] == TKeyPrefix::MarkUserDeprecated) { UsersInfoStorage.ParseDeprecated(*key, pair.GetValue(), ctx); - } - } - //make next step - if (range.GetStatus() == NKikimrProto::OVERRUN) { - Y_VERIFY(key); - RequestInfoRange(ctx, Tablet, Partition, *key); - } else { - InitState = WaitDataRange; - RequestDataRange(ctx, Tablet, Partition, ""); - } - break; - case NKikimrProto::NODATA: - InitState = WaitDataRange; - RequestDataRange(ctx, Tablet, Partition, ""); - break; - case NKikimrProto::ERROR: - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "read topic '" << TopicName << "' partition " << Partition << " error"); - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - break; - default: - Cerr << "ERROR " << range.GetStatus() << "\n"; - Y_FAIL("bad status"); - }; -} - + } + } + //make next step + if (range.GetStatus() == NKikimrProto::OVERRUN) { + Y_VERIFY(key); + RequestInfoRange(ctx, Tablet, Partition, *key); + } else { + InitState = WaitDataRange; + RequestDataRange(ctx, Tablet, Partition, ""); + } + break; + case NKikimrProto::NODATA: + InitState = WaitDataRange; + RequestDataRange(ctx, Tablet, Partition, ""); + break; + case NKikimrProto::ERROR: + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "read topic '" << TopicName << "' partition " << Partition << " error"); + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + break; + default: + Cerr << "ERROR " << range.GetStatus() << "\n"; + Y_FAIL("bad status"); + }; +} + void TPartition::FillBlobsMetaData(const NKikimrClient::TKeyValueResponse::TReadRangeResult& range, const TActorContext& ctx) -{ - for (ui32 i = 0; i < range.PairSize(); ++i) { - auto pair = range.GetPair(i); - Y_VERIFY(pair.GetStatus() == NKikimrProto::OK); //this is readrange without keys, only OK could be here - TKey k(pair.GetKey()); - if (DataKeysBody.empty()) { //no data - this is first pair of first range - Head.Offset = EndOffset = StartOffset = k.GetOffset(); - if (k.GetPartNo() > 0) ++StartOffset; - Head.PartNo = 0; - } else { - Y_VERIFY(EndOffset <= k.GetOffset(), "%s", pair.GetKey().c_str()); - if (EndOffset < k.GetOffset()) { - GapOffsets.push_back(std::make_pair(EndOffset, k.GetOffset())); - GapSize += k.GetOffset() - EndOffset; - } - } - Y_VERIFY(k.GetCount() + k.GetInternalPartsCount() > 0); - Y_VERIFY(k.GetOffset() >= EndOffset); - EndOffset = k.GetOffset() + k.GetCount(); - //at this point EndOffset > StartOffset - if (!k.IsHead()) //head.Size will be filled after read or head blobs - BodySize += pair.GetValueSize(); - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Got data topic " << TopicName << " partition " << k.GetPartition() << " offset " << k.GetOffset() - << " count " << k.GetCount() << " size " << pair.GetValueSize() << " so " << StartOffset << " eo " << EndOffset << " " << pair.GetKey()); +{ + for (ui32 i = 0; i < range.PairSize(); ++i) { + auto pair = range.GetPair(i); + Y_VERIFY(pair.GetStatus() == NKikimrProto::OK); //this is readrange without keys, only OK could be here + TKey k(pair.GetKey()); + if (DataKeysBody.empty()) { //no data - this is first pair of first range + Head.Offset = EndOffset = StartOffset = k.GetOffset(); + if (k.GetPartNo() > 0) ++StartOffset; + Head.PartNo = 0; + } else { + Y_VERIFY(EndOffset <= k.GetOffset(), "%s", pair.GetKey().c_str()); + if (EndOffset < k.GetOffset()) { + GapOffsets.push_back(std::make_pair(EndOffset, k.GetOffset())); + GapSize += k.GetOffset() - EndOffset; + } + } + Y_VERIFY(k.GetCount() + k.GetInternalPartsCount() > 0); + Y_VERIFY(k.GetOffset() >= EndOffset); + EndOffset = k.GetOffset() + k.GetCount(); + //at this point EndOffset > StartOffset + if (!k.IsHead()) //head.Size will be filled after read or head blobs + BodySize += pair.GetValueSize(); + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Got data topic " << TopicName << " partition " << k.GetPartition() << " offset " << k.GetOffset() + << " count " << k.GetCount() << " size " << pair.GetValueSize() << " so " << StartOffset << " eo " << EndOffset << " " << pair.GetKey()); DataKeysBody.push_back({k, pair.GetValueSize(), TInstant::Seconds(pair.GetCreationUnixTime()), DataKeysBody.empty() ? 0 : DataKeysBody.back().CumulativeSize + DataKeysBody.back().Size}); - } - - Y_VERIFY(EndOffset >= StartOffset); -} - -void TPartition::FormHeadAndProceed(const TActorContext& ctx) -{ - Head.Offset = EndOffset; - Head.PartNo = 0; + } + + Y_VERIFY(EndOffset >= StartOffset); +} + +void TPartition::FormHeadAndProceed(const TActorContext& ctx) +{ + Head.Offset = EndOffset; + Head.PartNo = 0; TVector<TString> keys; - while (DataKeysBody.size() > 0 && DataKeysBody.back().Key.IsHead()) { - Y_VERIFY(DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount() == Head.Offset); //no gaps in head allowed - HeadKeys.push_front(DataKeysBody.back()); - Head.Offset = DataKeysBody.back().Key.GetOffset(); - Head.PartNo = DataKeysBody.back().Key.GetPartNo(); - DataKeysBody.pop_back(); - } - for (const auto& p : DataKeysBody) { - Y_VERIFY(!p.Key.IsHead()); - } - - Y_VERIFY(HeadKeys.empty() || Head.Offset == HeadKeys.front().Key.GetOffset() && Head.PartNo == HeadKeys.front().Key.GetPartNo()); - Y_VERIFY(Head.Offset < EndOffset || Head.Offset == EndOffset && HeadKeys.empty()); - Y_VERIFY(Head.Offset >= StartOffset || Head.Offset == StartOffset - 1 && Head.PartNo > 0); - - //form head request - for (auto& p : HeadKeys) { - keys.push_back({p.Key.Data(), p.Key.Size()}); - } - Y_VERIFY(keys.size() < TotalMaxCount); - if (keys.empty()) { - InitComplete(ctx); - return; - } - InitState = WaitDataRead; - RequestData(ctx, Tablet, keys); -} - + while (DataKeysBody.size() > 0 && DataKeysBody.back().Key.IsHead()) { + Y_VERIFY(DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount() == Head.Offset); //no gaps in head allowed + HeadKeys.push_front(DataKeysBody.back()); + Head.Offset = DataKeysBody.back().Key.GetOffset(); + Head.PartNo = DataKeysBody.back().Key.GetPartNo(); + DataKeysBody.pop_back(); + } + for (const auto& p : DataKeysBody) { + Y_VERIFY(!p.Key.IsHead()); + } + + Y_VERIFY(HeadKeys.empty() || Head.Offset == HeadKeys.front().Key.GetOffset() && Head.PartNo == HeadKeys.front().Key.GetPartNo()); + Y_VERIFY(Head.Offset < EndOffset || Head.Offset == EndOffset && HeadKeys.empty()); + Y_VERIFY(Head.Offset >= StartOffset || Head.Offset == StartOffset - 1 && Head.PartNo > 0); + + //form head request + for (auto& p : HeadKeys) { + keys.push_back({p.Key.Data(), p.Key.Size()}); + } + Y_VERIFY(keys.size() < TotalMaxCount); + if (keys.empty()) { + InitComplete(ctx); + return; + } + InitState = WaitDataRead; + RequestData(ctx, Tablet, keys); +} + void TPartition::HandleDataRangeRead(const NKikimrClient::TKeyValueResponse::TReadRangeResult& range, const TActorContext& ctx) -{ - Y_VERIFY(range.HasStatus()); - switch(range.GetStatus()) { - case NKikimrProto::OK: - case NKikimrProto::OVERRUN: - - FillBlobsMetaData(range, ctx); - - if (range.GetStatus() == NKikimrProto::OVERRUN) { //request rest of range - Y_VERIFY(range.PairSize()); - RequestDataRange(ctx, Tablet, Partition, range.GetPair(range.PairSize() - 1).GetKey()); - return; - } - FormHeadAndProceed(ctx); - break; - case NKikimrProto::NODATA: - InitComplete(ctx); - break; - default: - Cerr << "ERROR " << range.GetStatus() << "\n"; - Y_FAIL("bad status"); - }; -} - +{ + Y_VERIFY(range.HasStatus()); + switch(range.GetStatus()) { + case NKikimrProto::OK: + case NKikimrProto::OVERRUN: + + FillBlobsMetaData(range, ctx); + + if (range.GetStatus() == NKikimrProto::OVERRUN) { //request rest of range + Y_VERIFY(range.PairSize()); + RequestDataRange(ctx, Tablet, Partition, range.GetPair(range.PairSize() - 1).GetKey()); + return; + } + FormHeadAndProceed(ctx); + break; + case NKikimrProto::NODATA: + InitComplete(ctx); + break; + default: + Cerr << "ERROR " << range.GetStatus() << "\n"; + Y_FAIL("bad status"); + }; +} + void TPartition::HandleDataRead(const NKikimrClient::TResponse& response, const TActorContext& ctx) -{ - Y_VERIFY(InitState == WaitDataRead); - ui32 currentLevel = 0; - Y_VERIFY(HeadKeys.size() == response.ReadResultSize()); - for (ui32 i = 0; i < response.ReadResultSize(); ++i) { - auto& read = response.GetReadResult(i); - Y_VERIFY(read.HasStatus()); - switch(read.GetStatus()) { - case NKikimrProto::OK: { - const TKey& key = HeadKeys[i].Key; - ui32 size = HeadKeys[i].Size; - Y_VERIFY(key.IsHead()); - ui64 offset = key.GetOffset(); - while (currentLevel + 1 < TotalLevels && size < CompactLevelBorder[currentLevel + 1]) - ++currentLevel; - Y_VERIFY(size < CompactLevelBorder[currentLevel]); - - DataKeysHead[currentLevel].AddKey(key, size); - Y_VERIFY(DataKeysHead[currentLevel].KeysCount() < AppData(ctx)->PQConfig.GetMaxBlobsPerLevel()); - Y_VERIFY(!DataKeysHead[currentLevel].NeedCompaction()); - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "read res partition topic '" << TopicName << "' parititon " - << key.GetPartition() << " offset " << offset << " endOffset " << EndOffset << " key " - << key.GetOffset() << "," << key.GetCount() << " valuesize " << read.GetValue().size() << " expected " << size); - - Y_VERIFY(offset + 1 >= StartOffset); - Y_VERIFY(offset < EndOffset); - Y_VERIFY(size == read.GetValue().size()); - - for (TBlobIterator it(key, read.GetValue()); it.IsValid(); it.Next()) { - Head.Batches.push_back(it.GetBatch()); - } - Head.PackedSize += size; - - break; - } - case NKikimrProto::OVERRUN: - Y_FAIL("implement overrun in readresult!!"); - return; - case NKikimrProto::NODATA: - Y_FAIL("NODATA can't be here"); - return; - case NKikimrProto::ERROR: - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "tablet " << TabletID << " HandleOnInit topic '" << TopicName << "' partition " << Partition +{ + Y_VERIFY(InitState == WaitDataRead); + ui32 currentLevel = 0; + Y_VERIFY(HeadKeys.size() == response.ReadResultSize()); + for (ui32 i = 0; i < response.ReadResultSize(); ++i) { + auto& read = response.GetReadResult(i); + Y_VERIFY(read.HasStatus()); + switch(read.GetStatus()) { + case NKikimrProto::OK: { + const TKey& key = HeadKeys[i].Key; + ui32 size = HeadKeys[i].Size; + Y_VERIFY(key.IsHead()); + ui64 offset = key.GetOffset(); + while (currentLevel + 1 < TotalLevels && size < CompactLevelBorder[currentLevel + 1]) + ++currentLevel; + Y_VERIFY(size < CompactLevelBorder[currentLevel]); + + DataKeysHead[currentLevel].AddKey(key, size); + Y_VERIFY(DataKeysHead[currentLevel].KeysCount() < AppData(ctx)->PQConfig.GetMaxBlobsPerLevel()); + Y_VERIFY(!DataKeysHead[currentLevel].NeedCompaction()); + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "read res partition topic '" << TopicName << "' parititon " + << key.GetPartition() << " offset " << offset << " endOffset " << EndOffset << " key " + << key.GetOffset() << "," << key.GetCount() << " valuesize " << read.GetValue().size() << " expected " << size); + + Y_VERIFY(offset + 1 >= StartOffset); + Y_VERIFY(offset < EndOffset); + Y_VERIFY(size == read.GetValue().size()); + + for (TBlobIterator it(key, read.GetValue()); it.IsValid(); it.Next()) { + Head.Batches.push_back(it.GetBatch()); + } + Head.PackedSize += size; + + break; + } + case NKikimrProto::OVERRUN: + Y_FAIL("implement overrun in readresult!!"); + return; + case NKikimrProto::NODATA: + Y_FAIL("NODATA can't be here"); + return; + case NKikimrProto::ERROR: + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "tablet " << TabletID << " HandleOnInit topic '" << TopicName << "' partition " << Partition << " ReadResult " << i << " status NKikimrProto::ERROR result message: \"" << read.GetMessage() << " \" errorReason: \"" << response.GetErrorReason() << "\""); ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - return; - default: + return; + default: Cerr << "ERROR " << read.GetStatus() << " message: \"" << read.GetMessage() << "\"\n"; - Y_FAIL("bad status"); - - }; - } - - Y_VERIFY(Head.PackedSize > 0); - Y_VERIFY(Head.PackedSize < MaxBlobSize); - Y_VERIFY(Head.GetNextOffset() == EndOffset); - Y_VERIFY(std::accumulate(DataKeysHead.begin(), DataKeysHead.end(), 0u, - [](ui32 sum, const TKeyLevel& level){return sum + level.Sum();}) == Head.PackedSize); - - InitComplete(ctx); -} - - -void TPartition::HandleOnInit(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx) { - - auto& response = ev->Get()->Record; - if (response.GetStatus() != NMsgBusProxy::MSTATUS_OK) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "commands for topic '" << TopicName << " partition " - << Partition << " are not processed at all, got KV error " << response.GetStatus()); - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - return; - } - bool diskIsOk = true; - for (ui32 i = 0; i < response.GetStatusResultSize(); ++i) { - auto& res = response.GetGetStatusResult(i); - if (res.GetStatus() != NKikimrProto::OK) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "commands for topic '" << TopicName << "' partition " << Partition << - " are not processed at all, got KV error in CmdGetStatus " << res.GetStatus()); - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - return; - } - diskIsOk = diskIsOk && CheckDiskStatus(res.GetStatusFlags()); - } - if (response.GetStatusResultSize()) - DiskIsFull = !diskIsOk; - - switch(InitState) { - case WaitDiskStatus: - Y_VERIFY(response.GetStatusResultSize()); - HandleGetDiskStatus(response, ctx); - break; - case WaitMetaRead: - Y_VERIFY(response.ReadResultSize() == 1); - HandleMetaRead(response.GetReadResult(0), ctx); - break; - case WaitInfoRange: - Y_VERIFY(response.ReadRangeResultSize() == 1); - HandleInfoRangeRead(response.GetReadRangeResult(0), ctx); - break; - case WaitDataRange: - Y_VERIFY(response.ReadRangeResultSize() == 1); - HandleDataRangeRead(response.GetReadRangeResult(0), ctx); - break; - case WaitDataRead: - Y_VERIFY(response.ReadResultSize()); - HandleDataRead(response, ctx); - break; - default: - Y_FAIL("Unknown state"); - - }; -} - -void TPartition::InitComplete(const TActorContext& ctx) { - if (StartOffset == EndOffset && EndOffset == 0) { - for (auto& [user, info] : UsersInfoStorage.GetAll()) { - if (info.Offset > 0 && StartOffset < (ui64)info.Offset) { - Head.Offset = EndOffset = StartOffset = info.Offset; - } - } - } - - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "init complete for topic '" << TopicName << "' partition " << Partition << " " << ctx.SelfID); - - TStringBuilder ss; - ss << "SYNC INIT topic " << TopicName << " partitition " << Partition << " so " << StartOffset << " endOffset " << EndOffset << " Head " << Head << "\n"; + Y_FAIL("bad status"); + + }; + } + + Y_VERIFY(Head.PackedSize > 0); + Y_VERIFY(Head.PackedSize < MaxBlobSize); + Y_VERIFY(Head.GetNextOffset() == EndOffset); + Y_VERIFY(std::accumulate(DataKeysHead.begin(), DataKeysHead.end(), 0u, + [](ui32 sum, const TKeyLevel& level){return sum + level.Sum();}) == Head.PackedSize); + + InitComplete(ctx); +} + + +void TPartition::HandleOnInit(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx) { + + auto& response = ev->Get()->Record; + if (response.GetStatus() != NMsgBusProxy::MSTATUS_OK) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "commands for topic '" << TopicName << " partition " + << Partition << " are not processed at all, got KV error " << response.GetStatus()); + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + return; + } + bool diskIsOk = true; + for (ui32 i = 0; i < response.GetStatusResultSize(); ++i) { + auto& res = response.GetGetStatusResult(i); + if (res.GetStatus() != NKikimrProto::OK) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "commands for topic '" << TopicName << "' partition " << Partition << + " are not processed at all, got KV error in CmdGetStatus " << res.GetStatus()); + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + return; + } + diskIsOk = diskIsOk && CheckDiskStatus(res.GetStatusFlags()); + } + if (response.GetStatusResultSize()) + DiskIsFull = !diskIsOk; + + switch(InitState) { + case WaitDiskStatus: + Y_VERIFY(response.GetStatusResultSize()); + HandleGetDiskStatus(response, ctx); + break; + case WaitMetaRead: + Y_VERIFY(response.ReadResultSize() == 1); + HandleMetaRead(response.GetReadResult(0), ctx); + break; + case WaitInfoRange: + Y_VERIFY(response.ReadRangeResultSize() == 1); + HandleInfoRangeRead(response.GetReadRangeResult(0), ctx); + break; + case WaitDataRange: + Y_VERIFY(response.ReadRangeResultSize() == 1); + HandleDataRangeRead(response.GetReadRangeResult(0), ctx); + break; + case WaitDataRead: + Y_VERIFY(response.ReadResultSize()); + HandleDataRead(response, ctx); + break; + default: + Y_FAIL("Unknown state"); + + }; +} + +void TPartition::InitComplete(const TActorContext& ctx) { + if (StartOffset == EndOffset && EndOffset == 0) { + for (auto& [user, info] : UsersInfoStorage.GetAll()) { + if (info.Offset > 0 && StartOffset < (ui64)info.Offset) { + Head.Offset = EndOffset = StartOffset = info.Offset; + } + } + } + + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "init complete for topic '" << TopicName << "' partition " << Partition << " " << ctx.SelfID); + + TStringBuilder ss; + ss << "SYNC INIT topic " << TopicName << " partitition " << Partition << " so " << StartOffset << " endOffset " << EndOffset << " Head " << Head << "\n"; for (const auto& s : SourceIdStorage.GetInMemorySourceIds()) { - ss << "SYNC INIT sourceId " << s.first << " seqNo " << s.second.SeqNo << " offset " << s.second.Offset << "\n"; - } - for (const auto& h : DataKeysBody) { + ss << "SYNC INIT sourceId " << s.first << " seqNo " << s.second.SeqNo << " offset " << s.second.Offset << "\n"; + } + for (const auto& h : DataKeysBody) { ss << "SYNC INIT DATA KEY: " << TString(h.Key.Data(), h.Key.Size()) << " size " << h.Size << "\n"; - } - for (const auto& h : HeadKeys) { + } + for (const auto& h : HeadKeys) { ss << "SYNC INIT HEAD KEY: " << TString(h.Key.Data(), h.Key.Size()) << " size " << h.Size << "\n"; - } - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, ss); - - CheckHeadConsistency(); - - Become(&TThis::StateIdle); - InitDuration = ctx.Now() - CreationTime; - InitDone = true; - Counters.Percentile()[COUNTER_LATENCY_PQ_INIT].IncrementFor(InitDuration.MilliSeconds()); - - FillReadFromTimestamps(Config, ctx); - - for (auto& ui : UsersInfoStorage.GetAll()) { - ProcessUserActs(ui.second, ctx); - } - - ctx.Send(ctx.SelfID, new TEvents::TEvWakeup()); - if (!NewPartition) { - ctx.Send(Tablet, new TEvPQ::TEvInitComplete(Partition)); - } + } + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, ss); + + CheckHeadConsistency(); + + Become(&TThis::StateIdle); + InitDuration = ctx.Now() - CreationTime; + InitDone = true; + Counters.Percentile()[COUNTER_LATENCY_PQ_INIT].IncrementFor(InitDuration.MilliSeconds()); + + FillReadFromTimestamps(Config, ctx); + + for (auto& ui : UsersInfoStorage.GetAll()) { + ProcessUserActs(ui.second, ctx); + } + + ctx.Send(ctx.SelfID, new TEvents::TEvWakeup()); + if (!NewPartition) { + ctx.Send(Tablet, new TEvPQ::TEvInitComplete(Partition)); + } for (const auto& s : SourceIdStorage.GetInMemorySourceIds()) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Init complete for topic '" << TopicName << "' Partition: " << Partition << " SourceId: " << - s.first << " SeqNo: " << s.second.SeqNo << " offset: " << s.second.Offset << " MaxOffset: " << EndOffset); - } - ProcessHasDataRequests(ctx); - - InitUserInfoForImportantClients(ctx); - + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Init complete for topic '" << TopicName << "' Partition: " << Partition << " SourceId: " << + s.first << " SeqNo: " << s.second.SeqNo << " offset: " << s.second.Offset << " MaxOffset: " << EndOffset); + } + ProcessHasDataRequests(ctx); + + InitUserInfoForImportantClients(ctx); + for (auto& userInfoPair : UsersInfoStorage.GetAll()) { Y_VERIFY(userInfoPair.second.Offset >= 0); ReadTimestampForOffset(userInfoPair.first, userInfoPair.second, ctx); - } - - PartitionLabeledCounters.GetCounters()[METRIC_INIT_TIME] = InitDuration.MilliSeconds(); - PartitionLabeledCounters.GetCounters()[METRIC_LIFE_TIME] = CreationTime.MilliSeconds(); - PartitionLabeledCounters.GetCounters()[METRIC_PARTITIONS] = 1; - ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCounters(Partition, PartitionLabeledCounters)); - - UpdateUserInfoEndOffset(ctx.Now()); - - ScheduleUpdateAvailableSize(ctx); - - if (Config.GetPartitionConfig().HasMirrorFrom()) { + } + + PartitionLabeledCounters.GetCounters()[METRIC_INIT_TIME] = InitDuration.MilliSeconds(); + PartitionLabeledCounters.GetCounters()[METRIC_LIFE_TIME] = CreationTime.MilliSeconds(); + PartitionLabeledCounters.GetCounters()[METRIC_PARTITIONS] = 1; + ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCounters(Partition, PartitionLabeledCounters)); + + UpdateUserInfoEndOffset(ctx.Now()); + + ScheduleUpdateAvailableSize(ctx); + + if (Config.GetPartitionConfig().HasMirrorFrom()) { CreateMirrorerActor(); } -} - - -void TPartition::UpdateUserInfoEndOffset(const TInstant& now) { +} + + +void TPartition::UpdateUserInfoEndOffset(const TInstant& now) { for (auto& userInfo : UsersInfoStorage.GetAll()) { userInfo.second.EndOffset = (i64)EndOffset; userInfo.second.UpdateReadingTimeAndState(now); - } - -} - -void TPartition::ProcessChangeOwnerRequest(TAutoPtr<TEvPQ::TEvChangeOwner> ev, const TActorContext& ctx) { - - auto &owner = ev->Owner; - auto it = Owners.find(owner); - if (it == Owners.end()) { - Owners[owner]; - it = Owners.find(owner); - } - WriteQuota.Update(ctx.Now()); - if (it->second.NeedResetOwner || ev->Force) { //change owner - Y_VERIFY(ReservedSize >= it->second.ReservedSize); - ReservedSize -= it->second.ReservedSize; - - it->second.GenerateCookie(owner, ev->PipeClient, ev->Sender, TopicName, Partition, ctx);//will change OwnerCookie - //cookie is generated. but answer will be sent when all inflight writes will be done - they in the same queue 'Requests' + } + +} + +void TPartition::ProcessChangeOwnerRequest(TAutoPtr<TEvPQ::TEvChangeOwner> ev, const TActorContext& ctx) { + + auto &owner = ev->Owner; + auto it = Owners.find(owner); + if (it == Owners.end()) { + Owners[owner]; + it = Owners.find(owner); + } + WriteQuota.Update(ctx.Now()); + if (it->second.NeedResetOwner || ev->Force) { //change owner + Y_VERIFY(ReservedSize >= it->second.ReservedSize); + ReservedSize -= it->second.ReservedSize; + + it->second.GenerateCookie(owner, ev->PipeClient, ev->Sender, TopicName, Partition, ctx);//will change OwnerCookie + //cookie is generated. but answer will be sent when all inflight writes will be done - they in the same queue 'Requests' Requests.emplace_back(TOwnershipMsg{ev->Cookie, it->second.OwnerCookie}, WriteQuota.GetQuotedTime(), ctx.Now().MilliSeconds(), 0); - - Counters.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(ReservedSize); - UpdateWriteBufferIsFullState(ctx.Now()); - ProcessReserveRequests(ctx); - - } else { + + Counters.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(ReservedSize); + UpdateWriteBufferIsFullState(ctx.Now()); + ProcessReserveRequests(ctx); + + } else { it->second.WaitToChangeOwner.push_back(THolder<TEvPQ::TEvChangeOwner>(ev.Release())); - } -} - - + } +} + + THashMap<TString, NKikimr::NPQ::TOwnerInfo>::iterator TPartition::DropOwner(THashMap<TString, NKikimr::NPQ::TOwnerInfo>::iterator& it, const TActorContext& ctx) { - Y_VERIFY(ReservedSize >= it->second.ReservedSize); - ReservedSize -= it->second.ReservedSize; - UpdateWriteBufferIsFullState(ctx.Now()); - Counters.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(ReservedSize); - for (auto& ev : it->second.WaitToChangeOwner) { //this request maybe could be done right now + Y_VERIFY(ReservedSize >= it->second.ReservedSize); + ReservedSize -= it->second.ReservedSize; + UpdateWriteBufferIsFullState(ctx.Now()); + Counters.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(ReservedSize); + for (auto& ev : it->second.WaitToChangeOwner) { //this request maybe could be done right now WaitToChangeOwner.push_back(THolder<TEvPQ::TEvChangeOwner>(ev.Release())); - } - auto jt = it; - ++jt; - Owners.erase(it); - return jt; -} - - -void TPartition::InitUserInfoForImportantClients(const TActorContext& ctx) { + } + auto jt = it; + ++jt; + Owners.erase(it); + return jt; +} + + +void TPartition::InitUserInfoForImportantClients(const TActorContext& ctx) { TSet<TString> important; - for (const auto& importantUser : Config.GetPartitionConfig().GetImportantClientId()) { + for (const auto& importantUser : Config.GetPartitionConfig().GetImportantClientId()) { important.insert(importantUser); TUserInfo* userInfo = UsersInfoStorage.GetIfExists(importantUser); if (userInfo && !userInfo->Important) { ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCountersDrop(Partition, userInfo->LabeledCounters.GetGroup())); userInfo->SetImportant(true); - continue; - } + continue; + } if (!userInfo) { - userInfo = &UsersInfoStorage.Create(ctx, importantUser, 0, true, "", 0, 0, 0, 0, TInstant::Zero()); + userInfo = &UsersInfoStorage.Create(ctx, importantUser, 0, true, "", 0, 0, 0, 0, TInstant::Zero()); } if (userInfo->Offset < (i64)StartOffset) userInfo->Offset = StartOffset; ReadTimestampForOffset(importantUser, *userInfo, ctx); - } + } for (auto& userInfoPair : UsersInfoStorage.GetAll()) { if (!important.contains(userInfoPair.first) && userInfoPair.second.Important) { ctx.Send( @@ -1776,38 +1776,38 @@ void TPartition::InitUserInfoForImportantClients(const TActorContext& ctx) { new TEvPQ::TEvPartitionLabeledCountersDrop(Partition, userInfoPair.second.LabeledCounters.GetGroup()) ); userInfoPair.second.SetImportant(false); - } - } -} - - -void TPartition::Handle(TEvPQ::TEvChangeConfig::TPtr& ev, const TActorContext& ctx) { - - Config = ev->Get()->Config; + } + } +} + + +void TPartition::Handle(TEvPQ::TEvChangeConfig::TPtr& ev, const TActorContext& ctx) { + + Config = ev->Get()->Config; TopicName = ev->Get()->TopicName; - - Y_VERIFY(Config.GetPartitionConfig().GetTotalPartitions() > 0); - - UsersInfoStorage.UpdateConfig(ev->Get()->Config); - - WriteQuota.UpdateConfig(Config.GetPartitionConfig().GetBurstSize(), Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond()); - if (AppData(ctx)->PQConfig.GetQuotingConfig().GetPartitionReadQuotaIsTwiceWriteQuota()) { - for (auto& userInfo : UsersInfoStorage.GetAll()) { - userInfo.second.ReadQuota.UpdateConfig(Config.GetPartitionConfig().GetBurstSize() * 2, Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() * 2); - } - } - - for (const auto& readQuota : Config.GetPartitionConfig().GetReadQuota()) { + + Y_VERIFY(Config.GetPartitionConfig().GetTotalPartitions() > 0); + + UsersInfoStorage.UpdateConfig(ev->Get()->Config); + + WriteQuota.UpdateConfig(Config.GetPartitionConfig().GetBurstSize(), Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond()); + if (AppData(ctx)->PQConfig.GetQuotingConfig().GetPartitionReadQuotaIsTwiceWriteQuota()) { + for (auto& userInfo : UsersInfoStorage.GetAll()) { + userInfo.second.ReadQuota.UpdateConfig(Config.GetPartitionConfig().GetBurstSize() * 2, Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() * 2); + } + } + + for (const auto& readQuota : Config.GetPartitionConfig().GetReadQuota()) { auto& userInfo = UsersInfoStorage.GetOrCreate(readQuota.GetClientId(), ctx); userInfo.ReadQuota.UpdateConfig(readQuota.GetBurstSize(), readQuota.GetSpeedInBytesPerSecond()); - } - - if (CurrentStateFunc() != &TThis::StateInit) { - InitUserInfoForImportantClients(ctx); - FillReadFromTimestamps(Config, ctx); - } - - if (Config.GetPartitionConfig().HasMirrorFrom()) { + } + + if (CurrentStateFunc() != &TThis::StateInit) { + InitUserInfoForImportantClients(ctx); + FillReadFromTimestamps(Config, ctx); + } + + if (Config.GetPartitionConfig().HasMirrorFrom()) { if (Mirrorer) { ctx.Send(ev->Forward(Mirrorer->Actor)); } else { @@ -1819,111 +1819,111 @@ void TPartition::Handle(TEvPQ::TEvChangeConfig::TPtr& ev, const TActorContext& c Mirrorer.Reset(); } } -} - - -void TPartition::Handle(TEvPQ::TEvChangeOwner::TPtr& ev, const TActorContext& ctx) { - bool res = OwnerPipes.insert(ev->Get()->PipeClient).second; - Y_VERIFY(res); - WaitToChangeOwner.push_back(ev->Release()); - ProcessChangeOwnerRequests(ctx); -} - - -void TPartition::Handle(TEvPQ::TEvPipeDisconnected::TPtr& ev, const TActorContext& ctx) { - - const TString& owner = ev->Get()->Owner; +} + + +void TPartition::Handle(TEvPQ::TEvChangeOwner::TPtr& ev, const TActorContext& ctx) { + bool res = OwnerPipes.insert(ev->Get()->PipeClient).second; + Y_VERIFY(res); + WaitToChangeOwner.push_back(ev->Release()); + ProcessChangeOwnerRequests(ctx); +} + + +void TPartition::Handle(TEvPQ::TEvPipeDisconnected::TPtr& ev, const TActorContext& ctx) { + + const TString& owner = ev->Get()->Owner; const TActorId& pipeClient = ev->Get()->PipeClient; - - OwnerPipes.erase(pipeClient); - - auto it = Owners.find(owner); - if (it == Owners.end() || it->second.PipeClient != pipeClient) // owner session is already dead - return; - //TODO: Uncommet when writes will be done via new gRPC protocol - // msgbus do not reserve bytes right now!! - // DropOwner will drop reserved bytes and ownership - if (owner != "default") { //default owner is for old LB protocol, pipe is dead right now after GetOwnership request, and no ReserveBytes done. So, ignore pipe disconnection - DropOwner(it, ctx); - ProcessChangeOwnerRequests(ctx); - } -} - - -void TPartition::ProcessReserveRequests(const TActorContext& ctx) { - - while (!ReserveRequests.empty()) { - const TString& ownerCookie = ReserveRequests.front()->OwnerCookie; - const TStringBuf owner = TOwnerInfo::GetOwnerFromOwnerCookie(ownerCookie); - const ui64& size = ReserveRequests.front()->Size; - const ui64& cookie = ReserveRequests.front()->Cookie; - const bool& lastRequest = ReserveRequests.front()->LastRequest; - - auto it = Owners.find(owner); - if (it == Owners.end() || it->second.OwnerCookie != ownerCookie) { - ReplyError(ctx, cookie, NPersQueue::NErrorCode::BAD_REQUEST, "ReserveRequest from dead ownership session"); - ReserveRequests.pop_front(); - continue; - } - if (ReservedSize + WriteInflightSize + WriteCycleSize + size <= Config.GetPartitionConfig().GetMaxWriteInflightSize() || ReservedSize + WriteInflightSize + WriteCycleSize == 0) { - it->second.AddReserveRequest(size, lastRequest); - ReservedSize += size; - - ReplyOk(ctx, cookie); - - ReserveRequests.pop_front(); - - continue; - } - break; - } - UpdateWriteBufferIsFullState(ctx.Now()); - Counters.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(ReservedSize); -} - -void TPartition::UpdateWriteBufferIsFullState(const TInstant& now) { - WriteBufferIsFullCounter.UpdateWorkingTime(now); - WriteBufferIsFullCounter.UpdateState(ReservedSize + WriteInflightSize + WriteCycleSize >= Config.GetPartitionConfig().GetBorderWriteInflightSize()); -} - - - -void TPartition::Handle(TEvPQ::TEvReserveBytes::TPtr& ev, const TActorContext& ctx) { - const TString& ownerCookie = ev->Get()->OwnerCookie; - TStringBuf owner = TOwnerInfo::GetOwnerFromOwnerCookie(ownerCookie); - const ui64& messageNo = ev->Get()->MessageNo; - - auto it = Owners.find(owner); - if (it == Owners.end() || it->second.OwnerCookie != ownerCookie) { - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, "ReserveRequest from dead ownership session"); - return; - } - - if (messageNo != it->second.NextMessageNo) { - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, - TStringBuilder() << "reorder in reserve requests, waiting " << it->second.NextMessageNo << ", but got " << messageNo); - DropOwner(it, ctx); - ProcessChangeOwnerRequests(ctx); - return; - } - - ++it->second.NextMessageNo; - ReserveRequests.push_back(ev->Release()); - ProcessReserveRequests(ctx); -} - - - - -void TPartition::Handle(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContext& ctx) { - NKikimrPQ::TOffsetsResponse::TPartResult result; - result.SetPartition(Partition); - result.SetStartOffset(StartOffset); - result.SetEndOffset(EndOffset); - result.SetErrorCode(NPersQueue::NErrorCode::OK); - result.SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); - - if (!ev->Get()->ClientId.empty()) { + + OwnerPipes.erase(pipeClient); + + auto it = Owners.find(owner); + if (it == Owners.end() || it->second.PipeClient != pipeClient) // owner session is already dead + return; + //TODO: Uncommet when writes will be done via new gRPC protocol + // msgbus do not reserve bytes right now!! + // DropOwner will drop reserved bytes and ownership + if (owner != "default") { //default owner is for old LB protocol, pipe is dead right now after GetOwnership request, and no ReserveBytes done. So, ignore pipe disconnection + DropOwner(it, ctx); + ProcessChangeOwnerRequests(ctx); + } +} + + +void TPartition::ProcessReserveRequests(const TActorContext& ctx) { + + while (!ReserveRequests.empty()) { + const TString& ownerCookie = ReserveRequests.front()->OwnerCookie; + const TStringBuf owner = TOwnerInfo::GetOwnerFromOwnerCookie(ownerCookie); + const ui64& size = ReserveRequests.front()->Size; + const ui64& cookie = ReserveRequests.front()->Cookie; + const bool& lastRequest = ReserveRequests.front()->LastRequest; + + auto it = Owners.find(owner); + if (it == Owners.end() || it->second.OwnerCookie != ownerCookie) { + ReplyError(ctx, cookie, NPersQueue::NErrorCode::BAD_REQUEST, "ReserveRequest from dead ownership session"); + ReserveRequests.pop_front(); + continue; + } + if (ReservedSize + WriteInflightSize + WriteCycleSize + size <= Config.GetPartitionConfig().GetMaxWriteInflightSize() || ReservedSize + WriteInflightSize + WriteCycleSize == 0) { + it->second.AddReserveRequest(size, lastRequest); + ReservedSize += size; + + ReplyOk(ctx, cookie); + + ReserveRequests.pop_front(); + + continue; + } + break; + } + UpdateWriteBufferIsFullState(ctx.Now()); + Counters.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(ReservedSize); +} + +void TPartition::UpdateWriteBufferIsFullState(const TInstant& now) { + WriteBufferIsFullCounter.UpdateWorkingTime(now); + WriteBufferIsFullCounter.UpdateState(ReservedSize + WriteInflightSize + WriteCycleSize >= Config.GetPartitionConfig().GetBorderWriteInflightSize()); +} + + + +void TPartition::Handle(TEvPQ::TEvReserveBytes::TPtr& ev, const TActorContext& ctx) { + const TString& ownerCookie = ev->Get()->OwnerCookie; + TStringBuf owner = TOwnerInfo::GetOwnerFromOwnerCookie(ownerCookie); + const ui64& messageNo = ev->Get()->MessageNo; + + auto it = Owners.find(owner); + if (it == Owners.end() || it->second.OwnerCookie != ownerCookie) { + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, "ReserveRequest from dead ownership session"); + return; + } + + if (messageNo != it->second.NextMessageNo) { + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, + TStringBuilder() << "reorder in reserve requests, waiting " << it->second.NextMessageNo << ", but got " << messageNo); + DropOwner(it, ctx); + ProcessChangeOwnerRequests(ctx); + return; + } + + ++it->second.NextMessageNo; + ReserveRequests.push_back(ev->Release()); + ProcessReserveRequests(ctx); +} + + + + +void TPartition::Handle(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContext& ctx) { + NKikimrPQ::TOffsetsResponse::TPartResult result; + result.SetPartition(Partition); + result.SetStartOffset(StartOffset); + result.SetEndOffset(EndOffset); + result.SetErrorCode(NPersQueue::NErrorCode::OK); + result.SetWriteTimestampEstimateMS(WriteTimestampEstimate.MilliSeconds()); + + if (!ev->Get()->ClientId.empty()) { TUserInfo* userInfo = UsersInfoStorage.GetIfExists(ev->Get()->ClientId); if (userInfo) { i64 offset = Max<i64>(userInfo->Offset, 0); @@ -1935,119 +1935,119 @@ void TPartition::Handle(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContex tmp = userInfo->GetReadWriteTimestamp() ? userInfo->GetReadWriteTimestamp() : GetWriteTimeEstimate(userInfo->GetReadOffset()); result.SetReadWriteTimestampMS(tmp.MilliSeconds()); result.SetReadCreateTimestampMS(userInfo->GetReadCreateTimestamp().MilliSeconds()); - } - } - ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionOffsetsResponse(result)); -} - -void TPartition::HandleOnInit(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContext& ctx) { - NKikimrPQ::TOffsetsResponse::TPartResult result; - result.SetPartition(Partition); - result.SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); - result.SetErrorReason("partition is not ready yet"); - ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionOffsetsResponse(result)); -} - -void TPartition::Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) { - NKikimrPQ::TStatusResponse::TPartResult result; - result.SetPartition(Partition); - if (DiskIsFull) { - result.SetStatus(NKikimrPQ::TStatusResponse::STATUS_DISK_IS_FULL); - } else if (EndOffset - StartOffset >= static_cast<ui64>(Config.GetPartitionConfig().GetMaxCountInPartition()) || - BodySize + Head.PackedSize >= static_cast<ui64>(Config.GetPartitionConfig().GetMaxSizeInPartition())) { - result.SetStatus(NKikimrPQ::TStatusResponse::STATUS_PARTITION_IS_FULL); - } else { - result.SetStatus(NKikimrPQ::TStatusResponse::STATUS_OK); - } - result.SetLastInitDurationSeconds(InitDuration.Seconds()); - result.SetCreationTimestamp(CreationTime.Seconds()); - ui64 headGapSize = DataKeysBody.empty() ? 0 : (Head.Offset - (DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount())); - ui32 gapsCount = GapOffsets.size() + (headGapSize ? 1 : 0); - result.SetGapCount(gapsCount); - result.SetGapSize(headGapSize + GapSize); - - Y_VERIFY(AvgWriteBytes.size() == 4); + } + } + ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionOffsetsResponse(result)); +} + +void TPartition::HandleOnInit(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContext& ctx) { + NKikimrPQ::TOffsetsResponse::TPartResult result; + result.SetPartition(Partition); + result.SetErrorCode(NPersQueue::NErrorCode::INITIALIZING); + result.SetErrorReason("partition is not ready yet"); + ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionOffsetsResponse(result)); +} + +void TPartition::Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) { + NKikimrPQ::TStatusResponse::TPartResult result; + result.SetPartition(Partition); + if (DiskIsFull) { + result.SetStatus(NKikimrPQ::TStatusResponse::STATUS_DISK_IS_FULL); + } else if (EndOffset - StartOffset >= static_cast<ui64>(Config.GetPartitionConfig().GetMaxCountInPartition()) || + BodySize + Head.PackedSize >= static_cast<ui64>(Config.GetPartitionConfig().GetMaxSizeInPartition())) { + result.SetStatus(NKikimrPQ::TStatusResponse::STATUS_PARTITION_IS_FULL); + } else { + result.SetStatus(NKikimrPQ::TStatusResponse::STATUS_OK); + } + result.SetLastInitDurationSeconds(InitDuration.Seconds()); + result.SetCreationTimestamp(CreationTime.Seconds()); + ui64 headGapSize = DataKeysBody.empty() ? 0 : (Head.Offset - (DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount())); + ui32 gapsCount = GapOffsets.size() + (headGapSize ? 1 : 0); + result.SetGapCount(gapsCount); + result.SetGapSize(headGapSize + GapSize); + + Y_VERIFY(AvgWriteBytes.size() == 4); result.SetAvgWriteSpeedPerSec(AvgWriteBytes[0].GetValue()); result.SetAvgWriteSpeedPerMin(AvgWriteBytes[1].GetValue()); result.SetAvgWriteSpeedPerHour(AvgWriteBytes[2].GetValue()); result.SetAvgWriteSpeedPerDay(AvgWriteBytes[3].GetValue()); - - Y_VERIFY(AvgQuotaBytes.size() == 4); - result.SetAvgQuotaSpeedPerSec(AvgQuotaBytes[0].GetValue()); - result.SetAvgQuotaSpeedPerMin(AvgQuotaBytes[1].GetValue()); - result.SetAvgQuotaSpeedPerHour(AvgQuotaBytes[2].GetValue()); - result.SetAvgQuotaSpeedPerDay(AvgQuotaBytes[3].GetValue()); - + + Y_VERIFY(AvgQuotaBytes.size() == 4); + result.SetAvgQuotaSpeedPerSec(AvgQuotaBytes[0].GetValue()); + result.SetAvgQuotaSpeedPerMin(AvgQuotaBytes[1].GetValue()); + result.SetAvgQuotaSpeedPerHour(AvgQuotaBytes[2].GetValue()); + result.SetAvgQuotaSpeedPerDay(AvgQuotaBytes[3].GetValue()); + result.SetSourceIdCount(SourceIdStorage.GetInMemorySourceIds().size()); result.SetSourceIdRetentionPeriodSec((ctx.Now() - SourceIdStorage.MinAvailableTimestamp(ctx.Now())).Seconds()); - - result.SetWriteBytesQuota(WriteQuota.GetTotalSpeed()); - - TVector<ui64> resSpeed; - resSpeed.resize(4); - ui64 maxQuota = 0; + + result.SetWriteBytesQuota(WriteQuota.GetTotalSpeed()); + + TVector<ui64> resSpeed; + resSpeed.resize(4); + ui64 maxQuota = 0; for (auto& userInfoPair : UsersInfoStorage.GetAll()) { auto& userInfo = userInfoPair.second; if (ev->Get()->ClientId.empty() || ev->Get()->ClientId == userInfo.User) { Y_VERIFY(userInfo.AvgReadBytes.size() == 4); - for (ui32 i = 0; i < 4; ++i) { + for (ui32 i = 0; i < 4; ++i) { resSpeed[i] += userInfo.AvgReadBytes[i].GetValue(); - } + } maxQuota += userInfo.ReadQuota.GetTotalSpeed(); - } + } if (ev->Get()->ClientId == userInfo.User) { //fill lags - NKikimrPQ::TClientInfo* clientInfo = result.MutableLagsInfo(); + NKikimrPQ::TClientInfo* clientInfo = result.MutableLagsInfo(); clientInfo->SetClientId(userInfo.User); - auto write = clientInfo->MutableWritePosition(); - write->SetOffset(userInfo.Offset); - userInfo.EndOffset = EndOffset; + auto write = clientInfo->MutableWritePosition(); + write->SetOffset(userInfo.Offset); + userInfo.EndOffset = EndOffset; write->SetWriteTimestamp((userInfo.GetWriteTimestamp() ? userInfo.GetWriteTimestamp() : GetWriteTimeEstimate(userInfo.Offset)).MilliSeconds()); write->SetCreateTimestamp(userInfo.GetCreateTimestamp().MilliSeconds()); - auto read = clientInfo->MutableReadPosition(); - read->SetOffset(userInfo.GetReadOffset()); + auto read = clientInfo->MutableReadPosition(); + read->SetOffset(userInfo.GetReadOffset()); read->SetWriteTimestamp((userInfo.GetReadWriteTimestamp() ? userInfo.GetReadWriteTimestamp() : GetWriteTimeEstimate(userInfo.GetReadOffset())).MilliSeconds()); read->SetCreateTimestamp(userInfo.GetReadCreateTimestamp().MilliSeconds()); - write->SetSize(GetSizeLag(userInfo.Offset)); - read->SetSize(GetSizeLag(userInfo.GetReadOffset())); - - clientInfo->SetReadLagMs(userInfo.GetReadOffset() < (i64)EndOffset + write->SetSize(GetSizeLag(userInfo.Offset)); + read->SetSize(GetSizeLag(userInfo.GetReadOffset())); + + clientInfo->SetReadLagMs(userInfo.GetReadOffset() < (i64)EndOffset ? (userInfo.GetReadTimestamp() - TInstant::MilliSeconds(read->GetWriteTimestamp())).MilliSeconds() - : 0); + : 0); clientInfo->SetLastReadTimestampMs(userInfo.GetReadTimestamp().MilliSeconds()); - clientInfo->SetWriteLagMs(userInfo.GetWriteLagMs()); + clientInfo->SetWriteLagMs(userInfo.GetWriteLagMs()); ui64 totalLag = clientInfo->GetReadLagMs() + userInfo.GetWriteLagMs() + (ctx.Now() - userInfo.GetReadTimestamp()).MilliSeconds(); - clientInfo->SetTotalLagMs(totalLag); - } - } - result.SetAvgReadSpeedPerSec(resSpeed[0]); - result.SetAvgReadSpeedPerMin(resSpeed[1]); - result.SetAvgReadSpeedPerHour(resSpeed[2]); - result.SetAvgReadSpeedPerDay(resSpeed[3]); - - result.SetReadBytesQuota(maxQuota); - - result.SetPartitionSize(BodySize + Head.PackedSize); - result.SetStartOffset(StartOffset); - result.SetEndOffset(EndOffset); - - result.SetLastWriteTimestampMs(WriteTimestamp.MilliSeconds()); - result.SetWriteLagMs(WriteLagMs.GetValue()); - + clientInfo->SetTotalLagMs(totalLag); + } + } + result.SetAvgReadSpeedPerSec(resSpeed[0]); + result.SetAvgReadSpeedPerMin(resSpeed[1]); + result.SetAvgReadSpeedPerHour(resSpeed[2]); + result.SetAvgReadSpeedPerDay(resSpeed[3]); + + result.SetReadBytesQuota(maxQuota); + + result.SetPartitionSize(BodySize + Head.PackedSize); + result.SetStartOffset(StartOffset); + result.SetEndOffset(EndOffset); + + result.SetLastWriteTimestampMs(WriteTimestamp.MilliSeconds()); + result.SetWriteLagMs(WriteLagMs.GetValue()); + *result.MutableErrors() = {Errors.begin(), Errors.end()}; - ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionStatusResponse(result)); -} - -void TPartition::HandleOnInit(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) { - NKikimrPQ::TStatusResponse::TPartResult result; - result.SetPartition(Partition); - result.SetStatus(NKikimrPQ::TStatusResponse::STATUS_INITIALIZING); - result.SetLastInitDurationSeconds((ctx.Now() - CreationTime).Seconds()); - result.SetCreationTimestamp(CreationTime.Seconds()); - ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionStatusResponse(result)); -} - - + ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionStatusResponse(result)); +} + +void TPartition::HandleOnInit(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) { + NKikimrPQ::TStatusResponse::TPartResult result; + result.SetPartition(Partition); + result.SetStatus(NKikimrPQ::TStatusResponse::STATUS_INITIALIZING); + result.SetLastInitDurationSeconds((ctx.Now() - CreationTime).Seconds()); + result.SetCreationTimestamp(CreationTime.Seconds()); + ctx.Send(ev->Get()->Sender, new TEvPQ::TEvPartitionStatusResponse(result)); +} + + void TPartition::Handle(TEvPQ::TEvGetPartitionClientInfo::TPtr& ev, const TActorContext& ctx) { THolder<TEvPersQueue::TEvPartitionClientInfoResponse> response = MakeHolder<TEvPersQueue::TEvPartitionClientInfoResponse>(); NKikimrPQ::TClientInfoResponse& result(response->Record); @@ -2056,24 +2056,24 @@ void TPartition::Handle(TEvPQ::TEvGetPartitionClientInfo::TPtr& ev, const TActor result.SetEndOffset(EndOffset); result.SetResponseTimestamp(ctx.Now().MilliSeconds()); for (auto& pr : UsersInfoStorage.GetAll()) { - TUserInfo& userInfo(pr.second); + TUserInfo& userInfo(pr.second); NKikimrPQ::TClientInfo& clientInfo = *result.AddClientInfo(); clientInfo.SetClientId(pr.first); auto& write = *clientInfo.MutableWritePosition(); write.SetOffset(userInfo.Offset); - userInfo.EndOffset = EndOffset; + userInfo.EndOffset = EndOffset; write.SetWriteTimestamp((userInfo.GetWriteTimestamp() ? userInfo.GetWriteTimestamp() : GetWriteTimeEstimate(userInfo.Offset)).MilliSeconds()); write.SetCreateTimestamp(userInfo.GetCreateTimestamp().MilliSeconds()); auto& read = *clientInfo.MutableReadPosition(); read.SetOffset(userInfo.GetReadOffset()); read.SetWriteTimestamp((userInfo.GetReadWriteTimestamp() ? userInfo.GetReadWriteTimestamp() : GetWriteTimeEstimate(userInfo.GetReadOffset())).MilliSeconds()); read.SetCreateTimestamp(userInfo.GetReadCreateTimestamp().MilliSeconds()); - write.SetSize(GetSizeLag(userInfo.Offset)); - read.SetSize(GetSizeLag(userInfo.GetReadOffset())); + write.SetSize(GetSizeLag(userInfo.Offset)); + read.SetSize(GetSizeLag(userInfo.GetReadOffset())); } ctx.Send(ev->Get()->Sender, response.Release(), 0, ev->Cookie); } - + void TPartition::Handle(TEvPersQueue::TEvReportPartitionError::TPtr& ev, const TActorContext& ctx) { LogAndCollectError(ev->Get()->Record, ctx); } @@ -2095,88 +2095,88 @@ void TPartition::LogAndCollectError(NKikimrServices::EServiceKikimr service, con } std::pair<TInstant, TInstant> TPartition::GetTime(const TUserInfo& userInfo, ui64 offset) const -{ +{ TInstant wtime = userInfo.WriteTimestamp > TInstant::Zero() ? userInfo.WriteTimestamp : GetWriteTimeEstimate(offset); - return std::make_pair(wtime, userInfo.CreateTimestamp); -} - -//zero means no such record + return std::make_pair(wtime, userInfo.CreateTimestamp); +} + +//zero means no such record TInstant TPartition::GetWriteTimeEstimate(ui64 offset) const -{ - if (offset < StartOffset) offset = StartOffset; - if (offset >= EndOffset) +{ + if (offset < StartOffset) offset = StartOffset; + if (offset >= EndOffset) return TInstant::Zero(); - const std::deque<TDataKey>& container = (offset < Head.Offset || offset == Head.Offset && Head.PartNo > 0) ? DataKeysBody : HeadKeys; - Y_VERIFY(!container.empty()); - auto it = std::upper_bound(container.begin(), container.end(), offset, - [](const ui64 offset, const TDataKey& p) { return offset < p.Key.GetOffset() || offset == p.Key.GetOffset() && p.Key.GetPartNo() > 0;}); - Y_VERIFY(it != container.begin(),"Tablet %lu StartOffset %lu, HeadOffset %lu, offset %lu, containter size %lu, first-elem: %s", - TabletID, StartOffset, Head.Offset, offset, container.size(), container.front().Key.ToString().c_str()); //always greater - Y_VERIFY(it == container.end() || it->Key.GetOffset() > offset || it->Key.GetOffset() == offset && it->Key.GetPartNo() > 0); - --it; - if (it != container.begin()) - --it; + const std::deque<TDataKey>& container = (offset < Head.Offset || offset == Head.Offset && Head.PartNo > 0) ? DataKeysBody : HeadKeys; + Y_VERIFY(!container.empty()); + auto it = std::upper_bound(container.begin(), container.end(), offset, + [](const ui64 offset, const TDataKey& p) { return offset < p.Key.GetOffset() || offset == p.Key.GetOffset() && p.Key.GetPartNo() > 0;}); + Y_VERIFY(it != container.begin(),"Tablet %lu StartOffset %lu, HeadOffset %lu, offset %lu, containter size %lu, first-elem: %s", + TabletID, StartOffset, Head.Offset, offset, container.size(), container.front().Key.ToString().c_str()); //always greater + Y_VERIFY(it == container.end() || it->Key.GetOffset() > offset || it->Key.GetOffset() == offset && it->Key.GetPartNo() > 0); + --it; + if (it != container.begin()) + --it; return it->Timestamp; -} - - -void TPartition::Handle(TEvPQ::TEvGetClientOffset::TPtr& ev, const TActorContext& ctx) { +} + + +void TPartition::Handle(TEvPQ::TEvGetClientOffset::TPtr& ev, const TActorContext& ctx) { auto& userInfo = UsersInfoStorage.GetOrCreate(ev->Get()->ClientId, ctx); Y_VERIFY(userInfo.Offset >= -1, "Unexpected Offset: %" PRIi64, userInfo.Offset); ui64 offset = Max<i64>(userInfo.Offset, 0); auto ts = GetTime(userInfo, offset); - Counters.Cumulative()[COUNTER_PQ_GET_CLIENT_OFFSET_OK].Increment(1); + Counters.Cumulative()[COUNTER_PQ_GET_CLIENT_OFFSET_OK].Increment(1); ReplyGetClientOffsetOk(ctx, ev->Get()->Cookie, userInfo.Offset, ts.first, ts.second); -} - -void TPartition::Handle(TEvPQ::TEvUpdateWriteTimestamp::TPtr& ev, const TActorContext& ctx) { - TInstant timestamp = TInstant::MilliSeconds(ev->Get()->WriteTimestamp); - if (WriteTimestampEstimate > timestamp) { - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, - TStringBuilder() << "too big timestamp: " << timestamp << " known " << WriteTimestampEstimate); - return; - } - WriteTimestampEstimate = timestamp; - ReplyOk(ctx, ev->Get()->Cookie); -} - - -void TPartition::Handle(TEvPQ::TEvSetClientInfo::TPtr& ev, const TActorContext& ctx) { +} + +void TPartition::Handle(TEvPQ::TEvUpdateWriteTimestamp::TPtr& ev, const TActorContext& ctx) { + TInstant timestamp = TInstant::MilliSeconds(ev->Get()->WriteTimestamp); + if (WriteTimestampEstimate > timestamp) { + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, + TStringBuilder() << "too big timestamp: " << timestamp << " known " << WriteTimestampEstimate); + return; + } + WriteTimestampEstimate = timestamp; + ReplyOk(ctx, ev->Get()->Cookie); +} + + +void TPartition::Handle(TEvPQ::TEvSetClientInfo::TPtr& ev, const TActorContext& ctx) { const TString& user = ev->Get()->ClientId; auto& userInfo = UsersInfoStorage.GetOrCreate(user, ctx); - + if (userInfo.UserActs.size() > MAX_USER_ACTS) { - Counters.Cumulative()[COUNTER_PQ_SET_CLIENT_OFFSET_ERROR].Increment(1); - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::OVERLOAD, + Counters.Cumulative()[COUNTER_PQ_SET_CLIENT_OFFSET_ERROR].Increment(1); + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::OVERLOAD, TStringBuilder() << "too big inflight: " << userInfo.UserActs.size()); - return; - } + return; + } + - - const ui64& offset = ev->Get()->Offset; + const ui64& offset = ev->Get()->Offset; Y_VERIFY(offset <= (ui64)Max<i64>(), "Unexpected Offset: %" PRIu64, offset); - + userInfo.UserActs.push_back(ev->Release()); - - ProcessUserActs(userInfo, ctx); -} - -void TPartition::ProcessUserActs(TUserInfo& userInfo, const TActorContext& ctx) -{ - if (userInfo.WriteInProgress || userInfo.UserActs.empty()) - return; - ui64 cookie = ++SetOffsetCookie; - CookieToUser[cookie] = userInfo.User; - WriteClientInfo(cookie, userInfo, ctx); -} - -void TPartition::Handle(TEvPQ::TEvGetMaxSeqNoRequest::TPtr& ev, const TActorContext& ctx) { + + ProcessUserActs(userInfo, ctx); +} + +void TPartition::ProcessUserActs(TUserInfo& userInfo, const TActorContext& ctx) +{ + if (userInfo.WriteInProgress || userInfo.UserActs.empty()) + return; + ui64 cookie = ++SetOffsetCookie; + CookieToUser[cookie] = userInfo.User; + WriteClientInfo(cookie, userInfo, ctx); +} + +void TPartition::Handle(TEvPQ::TEvGetMaxSeqNoRequest::TPtr& ev, const TActorContext& ctx) { auto response = MakeHolder<TEvPQ::TEvProxyResponse>(ev->Get()->Cookie); NKikimrClient::TResponse& resp = response->Response; - - resp.SetStatus(NMsgBusProxy::MSTATUS_OK); - resp.SetErrorCode(NPersQueue::NErrorCode::OK); - + + resp.SetStatus(NMsgBusProxy::MSTATUS_OK); + resp.SetErrorCode(NPersQueue::NErrorCode::OK); + auto& result = *resp.MutablePartitionResponse()->MutableCmdGetMaxSeqNoResult(); for (const auto& sourceId : ev->Get()->SourceIds) { auto& protoInfo = *result.AddSourceIdInfo(); @@ -2185,7 +2185,7 @@ void TPartition::Handle(TEvPQ::TEvGetMaxSeqNoRequest::TPtr& ev, const TActorCont auto it = SourceIdStorage.GetInMemorySourceIds().find(sourceId); if (it == SourceIdStorage.GetInMemorySourceIds().end()) { continue; - } + } const auto& memInfo = it->second; Y_VERIFY(memInfo.Offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, memInfo.Offset); @@ -2196,51 +2196,51 @@ void TPartition::Handle(TEvPQ::TEvGetMaxSeqNoRequest::TPtr& ev, const TActorCont protoInfo.SetWriteTimestampMS(memInfo.WriteTimestamp.MilliSeconds()); protoInfo.SetExplicit(memInfo.Explicit); protoInfo.SetState(TSourceIdInfo::ConvertState(memInfo.State)); - } - - ctx.Send(Tablet, response.Release()); -} - - -void TPartition::Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& ctx) { - const ui64 cookie = ev->Get()->GetCookie(); + } + + ctx.Send(Tablet, response.Release()); +} + + +void TPartition::Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& ctx) { + const ui64 cookie = ev->Get()->GetCookie(); Y_VERIFY(ReadInfo.contains(cookie)); - - auto it = ReadInfo.find(cookie); - Y_VERIFY(it != ReadInfo.end()); + + auto it = ReadInfo.find(cookie); + Y_VERIFY(it != ReadInfo.end()); TReadInfo info = std::move(it->second); ReadInfo.erase(it); - - //make readinfo class + + //make readinfo class TReadAnswer answer(info.FormAnswer( ctx, *ev->Get(), EndOffset, Partition, &UsersInfoStorage.GetOrCreate(info.User, ctx), info.Destination, GetSizeLag(info.Offset) )); - - if (HasError(*ev->Get())) { + + if (HasError(*ev->Get())) { if (info.IsSubscription) { - Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_ERROR].Increment(1); - } - Counters.Cumulative()[COUNTER_PQ_READ_ERROR].Increment(1); + Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_ERROR].Increment(1); + } + Counters.Cumulative()[COUNTER_PQ_READ_ERROR].Increment(1); Counters.Percentile()[COUNTER_LATENCY_PQ_READ_ERROR].IncrementFor((ctx.Now() - info.Timestamp).MilliSeconds()); - } else { + } else { if (info.IsSubscription) { - Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_OK].Increment(1); - } + Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_OK].Increment(1); + } const auto& resp = dynamic_cast<TEvPQ::TEvProxyResponse*>(answer.Event.Get())->Response; - Counters.Cumulative()[COUNTER_PQ_READ_OK].Increment(1); + Counters.Cumulative()[COUNTER_PQ_READ_OK].Increment(1); Counters.Percentile()[COUNTER_LATENCY_PQ_READ_OK].IncrementFor((ctx.Now() - info.Timestamp).MilliSeconds()); - Counters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp.ByteSize()); - } + Counters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp.ByteSize()); + } ctx.Send(info.Destination != 0 ? Tablet : ctx.SelfID, answer.Event.Release()); - ReportLabeledCounters(ctx); + ReportLabeledCounters(ctx); OnReadRequestFinished(std::move(info), answer.Size); -} - - +} + + template <typename T> // TCmdReadResult -static void AddResultBlob(T* read, const TClientBlob& blob, ui64 offset) -{ +static void AddResultBlob(T* read, const TClientBlob& blob, ui64 offset) +{ auto cc = read->AddResult(); cc->SetOffset(offset); cc->SetData(blob.Data); @@ -2248,32 +2248,32 @@ static void AddResultBlob(T* read, const TClientBlob& blob, ui64 offset) cc->SetSeqNo(blob.SeqNo); cc->SetWriteTimestampMS(blob.WriteTimestamp.MilliSeconds()); cc->SetCreateTimestampMS(blob.CreateTimestamp.MilliSeconds()); - cc->SetUncompressedSize(blob.UncompressedSize); + cc->SetUncompressedSize(blob.UncompressedSize); cc->SetPartitionKey(blob.PartitionKey); cc->SetExplicitHash(blob.ExplicitHashKey); - if (blob.PartData) { - cc->SetPartNo(blob.PartData->PartNo); - cc->SetTotalParts(blob.PartData->TotalParts); - if (blob.PartData->PartNo == 0) - cc->SetTotalSize(blob.PartData->TotalSize); - } + if (blob.PartData) { + cc->SetPartNo(blob.PartData->PartNo); + cc->SetTotalParts(blob.PartData->TotalParts); + if (blob.PartData->PartNo == 0) + cc->SetTotalSize(blob.PartData->TotalSize); + } } template <typename T> static void AddResultDebugInfo(const TEvPQ::TEvBlobResponse* response, T* readResult) { - ui64 cachedSize = 0; + ui64 cachedSize = 0; ui32 cachedBlobs = 0; ui32 diskBlobs = 0; for (auto blob : response->GetBlobs()) { - if (blob.Cached) { + if (blob.Cached) { ++cachedBlobs; - cachedSize += blob.Size; - } else + cachedSize += blob.Size; + } else ++diskBlobs; } - if (cachedSize) - readResult->SetBlobsCachedSize(cachedSize); + if (cachedSize) + readResult->SetBlobsCachedSize(cachedSize); if (cachedBlobs) readResult->SetBlobsFromCache(cachedBlobs); if (diskBlobs) @@ -2289,7 +2289,7 @@ TReadAnswer TReadInfo::FormAnswer( const ui64 cookie, const ui64 sizeLag ) { - Y_UNUSED(partition); + Y_UNUSED(partition); THolder<TEvPQ::TEvProxyResponse> answer = MakeHolder<TEvPQ::TEvProxyResponse>(cookie); NKikimrClient::TResponse& res = answer->Response; const TEvPQ::TEvBlobResponse* response = &blobResponse; @@ -2299,89 +2299,89 @@ TReadAnswer TReadInfo::FormAnswer( blobResponse.Error.ErrorStr.size(), MakeHolder<TEvPQ::TEvError>(blobResponse.Error.ErrorCode, blobResponse.Error.ErrorStr, cookie) }; - } - - res.SetStatus(NMsgBusProxy::MSTATUS_OK); - res.SetErrorCode(NPersQueue::NErrorCode::OK); + } + + res.SetStatus(NMsgBusProxy::MSTATUS_OK); + res.SetErrorCode(NPersQueue::NErrorCode::OK); auto readResult = res.MutablePartitionResponse()->MutableCmdReadResult(); readResult->SetWaitQuotaTimeMs(WaitQuotaTime.MilliSeconds()); readResult->SetMaxOffset(endOffset); - readResult->SetRealReadOffset(Offset); + readResult->SetRealReadOffset(Offset); Y_VERIFY(endOffset <= (ui64)Max<i64>(), "Max offset is too big: %" PRIu64, endOffset); - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "FormAnswer " << Blobs.size()); + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "FormAnswer " << Blobs.size()); AddResultDebugInfo(response, readResult); - ui32 cnt = 0, pcnt = 0; - ui32 size = 0, psize = 0; + ui32 cnt = 0, pcnt = 0; + ui32 size = 0, psize = 0; const TVector<TRequestedBlob>& blobs = response->GetBlobs(); - - Y_VERIFY(blobs.size() == Blobs.size()); - response->Check(); - bool needStop = false; - for (ui32 pos = 0; pos < blobs.size() && !needStop; ++pos) { + + Y_VERIFY(blobs.size() == Blobs.size()); + response->Check(); + bool needStop = false; + for (ui32 pos = 0; pos < blobs.size() && !needStop; ++pos) { Y_VERIFY(Blobs[pos].Offset == blobs[pos].Offset, "Mismatch %" PRIu64 " vs %" PRIu64, Blobs[pos].Offset, blobs[pos].Offset); Y_VERIFY(Blobs[pos].Count == blobs[pos].Count, "Mismatch %" PRIu32 " vs %" PRIu32, Blobs[pos].Count, blobs[pos].Count); ui64 offset = blobs[pos].Offset; ui32 count = blobs[pos].Count; - ui16 partNo = blobs[pos].PartNo; - ui16 internalPartsCount = blobs[pos].InternalPartsCount; + ui16 partNo = blobs[pos].PartNo; + ui16 internalPartsCount = blobs[pos].InternalPartsCount; const TString& blobValue = blobs[pos].Value; if (blobValue.empty()) { // this is ok. Means that someone requested too much data - LOG_DEBUG(ctx, NKikimrServices::PERSQUEUE, "Not full answer here!"); + LOG_DEBUG(ctx, NKikimrServices::PERSQUEUE, "Not full answer here!"); ui64 answerSize = answer->Response.ByteSize(); if (userInfo && Destination != 0) { userInfo->ReadDone(ctx, ctx.Now(), answerSize, cnt, ClientDC); - } - readResult->SetSizeLag(sizeLag - size); + } + readResult->SetSizeLag(sizeLag - size); return {answerSize, std::move(answer)}; - } - Y_VERIFY(blobValue.size() == blobs[pos].Size, "value for offset %" PRIu64 " count %u size must be %u, but got %u", - offset, count, blobs[pos].Size, (ui32)blobValue.size()); - + } + Y_VERIFY(blobValue.size() == blobs[pos].Size, "value for offset %" PRIu64 " count %u size must be %u, but got %u", + offset, count, blobs[pos].Size, (ui32)blobValue.size()); + if (offset > Offset || (offset == Offset && partNo > PartNo)) { // got gap - Offset = offset; + Offset = offset; PartNo = partNo; - } - Y_VERIFY(offset <= Offset); - Y_VERIFY(offset < Offset || partNo <= PartNo); - TKey key(TKeyPrefix::TypeData, 0, offset, partNo, count, internalPartsCount, false); - for (TBlobIterator it(key, blobValue); it.IsValid() && !needStop; it.Next()) { - TBatch batch = it.GetBatch(); - auto& header = batch.Header; - batch.Unpack(); - - ui32 pos = 0; - if (header.GetOffset() > Offset || header.GetOffset() == Offset && header.GetPartNo() >= PartNo) { - pos = 0; - } else { - pos = batch.FindPos(Offset, PartNo); - } - offset += header.GetCount(); - + } + Y_VERIFY(offset <= Offset); + Y_VERIFY(offset < Offset || partNo <= PartNo); + TKey key(TKeyPrefix::TypeData, 0, offset, partNo, count, internalPartsCount, false); + for (TBlobIterator it(key, blobValue); it.IsValid() && !needStop; it.Next()) { + TBatch batch = it.GetBatch(); + auto& header = batch.Header; + batch.Unpack(); + + ui32 pos = 0; + if (header.GetOffset() > Offset || header.GetOffset() == Offset && header.GetPartNo() >= PartNo) { + pos = 0; + } else { + pos = batch.FindPos(Offset, PartNo); + } + offset += header.GetCount(); + if (pos == Max<ui32>()) // this batch does not contain data to read, skip it - continue; - - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "FormAnswer processing batch offset " - << (offset - header.GetCount()) << " totakecount " << count << " count " << header.GetCount() << " size " << header.GetPayloadSize() << " from pos " << pos << " cbcount " << batch.Blobs.size()); - - ui32 i = 0; - for (i = pos; i < batch.Blobs.size() && size < Size && cnt < Count; ++i) { - pcnt = cnt; - psize = size; - TClientBlob &res = batch.Blobs[i]; - VERIFY_RESULT_BLOB(res, i); + continue; + + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "FormAnswer processing batch offset " + << (offset - header.GetCount()) << " totakecount " << count << " count " << header.GetCount() << " size " << header.GetPayloadSize() << " from pos " << pos << " cbcount " << batch.Blobs.size()); + + ui32 i = 0; + for (i = pos; i < batch.Blobs.size() && size < Size && cnt < Count; ++i) { + pcnt = cnt; + psize = size; + TClientBlob &res = batch.Blobs[i]; + VERIFY_RESULT_BLOB(res, i); bool messageSkippingBehaviour = AppData()->PQConfig.GetTopicsAreFirstClassCitizen() && ReadTimestampMs > res.WriteTimestamp.MilliSeconds(); if (!messageSkippingBehaviour) { size += res.GetBlobSize(); Y_VERIFY(PartNo == res.GetPartNo(), "pos %" PRIu32 " i %" PRIu32 " Offset %" PRIu64 " PartNo %" PRIu16 " offset %" PRIu64 " partNo %" PRIu16, pos, i, Offset, PartNo, offset, res.GetPartNo()); - + if (userInfo) { userInfo->AddTimestampToCache( Offset, res.WriteTimestamp, res.CreateTimestamp, @@ -2394,228 +2394,228 @@ TReadAnswer TReadInfo::FormAnswer( ++cnt; } } - + if (res.IsLastPart()) { - PartNo = 0; - ++Offset; + PartNo = 0; + ++Offset; } else { ++PartNo; - } - } - - if (i != batch.Blobs.size()) {//not fully processed batch - next definetely will not be processed - needStop = true; - } - } - } - Y_VERIFY(pcnt <= Count && psize <= Size); - Y_VERIFY(pcnt <= cnt && psize <= size); - if (!needStop) { // body blobs are fully processed - - if (CachedOffset > Offset) { - Offset = CachedOffset; - } - - for (const auto& writeBlob : Cached) { - if (cnt >= Count || size >= Size) - break; - pcnt = cnt; - psize = size; - VERIFY_RESULT_BLOB(writeBlob, 0u); - - readResult->SetBlobsCachedSize(readResult->GetBlobsCachedSize() + writeBlob.GetBlobSize()); - - size += writeBlob.GetBlobSize(); + } + } + + if (i != batch.Blobs.size()) {//not fully processed batch - next definetely will not be processed + needStop = true; + } + } + } + Y_VERIFY(pcnt <= Count && psize <= Size); + Y_VERIFY(pcnt <= cnt && psize <= size); + if (!needStop) { // body blobs are fully processed + + if (CachedOffset > Offset) { + Offset = CachedOffset; + } + + for (const auto& writeBlob : Cached) { + if (cnt >= Count || size >= Size) + break; + pcnt = cnt; + psize = size; + VERIFY_RESULT_BLOB(writeBlob, 0u); + + readResult->SetBlobsCachedSize(readResult->GetBlobsCachedSize() + writeBlob.GetBlobSize()); + + size += writeBlob.GetBlobSize(); if (userInfo) { userInfo->AddTimestampToCache( Offset, writeBlob.WriteTimestamp, writeBlob.CreateTimestamp, Destination != 0, ctx.Now() ); } - AddResultBlob(readResult, writeBlob, Offset); - if (writeBlob.IsLastPart()) { - ++Offset; - ++cnt; - } - } - } - Y_VERIFY(pcnt <= Count && psize <= Size); - Y_VERIFY(pcnt <= cnt && psize <= size); + AddResultBlob(readResult, writeBlob, Offset); + if (writeBlob.IsLastPart()) { + ++Offset; + ++cnt; + } + } + } + Y_VERIFY(pcnt <= Count && psize <= Size); + Y_VERIFY(pcnt <= cnt && psize <= size); Y_VERIFY(Offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, Offset); ui64 answerSize = answer->Response.ByteSize(); if (userInfo && Destination != 0) { userInfo->ReadDone(ctx, ctx.Now(), answerSize, cnt, ClientDC); - } - readResult->SetSizeLag(sizeLag - size); + } + readResult->SetSizeLag(sizeLag - size); return {answerSize, std::move(answer)}; -} - - -void TPartition::HandleOnIdle(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ctx) { - HandleOnWrite(ev, ctx); - HandleWrites(ctx); -} - - -void TPartition::Handle(TEvPQ::TEvReadTimeout::TPtr& ev, const TActorContext& ctx) { +} + + +void TPartition::HandleOnIdle(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ctx) { + HandleOnWrite(ev, ctx); + HandleWrites(ctx); +} + + +void TPartition::Handle(TEvPQ::TEvReadTimeout::TPtr& ev, const TActorContext& ctx) { auto res = Subscriber.OnTimeout(ev); - if (!res) - return; + if (!res) + return; TReadAnswer answer(res->FormAnswer(ctx, res->Offset, Partition, nullptr, res->Destination, 0)); ctx.Send(Tablet, answer.Event.Release()); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, " waiting read cookie " << ev->Get()->Cookie << " partition " << Partition << " read timeout for " << res->User << " offset " << res->Offset); auto& userInfo = UsersInfoStorage.GetOrCreate(res->User, ctx); - + userInfo.ForgetSubscription(ctx.Now()); OnReadRequestFinished(std::move(res.GetRef()), answer.Size); -} - - +} + + TVector<TRequestedBlob> TPartition::GetReadRequestFromBody(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, ui32* rcount, ui32* rsize) -{ - Y_VERIFY(rcount && rsize); - ui32& count = *rcount; - ui32& size = *rsize; - count = size = 0; +{ + Y_VERIFY(rcount && rsize); + ui32& count = *rcount; + ui32& size = *rsize; + count = size = 0; TVector<TRequestedBlob> blobs; - if (!DataKeysBody.empty() && (Head.Offset > startOffset || Head.Offset == startOffset && Head.PartNo > partNo)) { //will read smth from body - auto it = std::upper_bound(DataKeysBody.begin(), DataKeysBody.end(), std::make_pair(startOffset, partNo), - [](const std::pair<ui64, ui16>& offsetAndPartNo, const TDataKey& p) { return offsetAndPartNo.first < p.Key.GetOffset() || offsetAndPartNo.first == p.Key.GetOffset() && offsetAndPartNo.second < p.Key.GetPartNo();}); - if (it == DataKeysBody.begin()) //could be true if data is deleted or gaps are created - return blobs; - Y_VERIFY(it != DataKeysBody.begin()); //always greater, startoffset can't be less that StartOffset - Y_VERIFY(it == DataKeysBody.end() || it->Key.GetOffset() > startOffset || it->Key.GetOffset() == startOffset && it->Key.GetPartNo() > partNo); - --it; - Y_VERIFY(it->Key.GetOffset() < startOffset || (it->Key.GetOffset() == startOffset && it->Key.GetPartNo() <= partNo)); - ui32 cnt = 0; - ui32 sz = 0; - if (startOffset > it->Key.GetOffset() + it->Key.GetCount()) { //there is a gap - ++it; - if (it != DataKeysBody.end()) { - cnt = it->Key.GetCount(); - sz = it->Size; - } - } else { - Y_VERIFY(it->Key.GetCount() >= (startOffset - it->Key.GetOffset())); - cnt = it->Key.GetCount() - (startOffset - it->Key.GetOffset()); //don't count all elements from first blob - sz = (cnt == it->Key.GetCount() ? it->Size : 0); //not readed client blobs can be of ~8Mb, so don't count this size at all - } - while (it != DataKeysBody.end() && size < maxSize && count < maxCount) { + if (!DataKeysBody.empty() && (Head.Offset > startOffset || Head.Offset == startOffset && Head.PartNo > partNo)) { //will read smth from body + auto it = std::upper_bound(DataKeysBody.begin(), DataKeysBody.end(), std::make_pair(startOffset, partNo), + [](const std::pair<ui64, ui16>& offsetAndPartNo, const TDataKey& p) { return offsetAndPartNo.first < p.Key.GetOffset() || offsetAndPartNo.first == p.Key.GetOffset() && offsetAndPartNo.second < p.Key.GetPartNo();}); + if (it == DataKeysBody.begin()) //could be true if data is deleted or gaps are created + return blobs; + Y_VERIFY(it != DataKeysBody.begin()); //always greater, startoffset can't be less that StartOffset + Y_VERIFY(it == DataKeysBody.end() || it->Key.GetOffset() > startOffset || it->Key.GetOffset() == startOffset && it->Key.GetPartNo() > partNo); + --it; + Y_VERIFY(it->Key.GetOffset() < startOffset || (it->Key.GetOffset() == startOffset && it->Key.GetPartNo() <= partNo)); + ui32 cnt = 0; + ui32 sz = 0; + if (startOffset > it->Key.GetOffset() + it->Key.GetCount()) { //there is a gap + ++it; + if (it != DataKeysBody.end()) { + cnt = it->Key.GetCount(); + sz = it->Size; + } + } else { + Y_VERIFY(it->Key.GetCount() >= (startOffset - it->Key.GetOffset())); + cnt = it->Key.GetCount() - (startOffset - it->Key.GetOffset()); //don't count all elements from first blob + sz = (cnt == it->Key.GetCount() ? it->Size : 0); //not readed client blobs can be of ~8Mb, so don't count this size at all + } + while (it != DataKeysBody.end() && size < maxSize && count < maxCount) { size += sz; count += cnt; TRequestedBlob reqBlob(it->Key.GetOffset(), it->Key.GetPartNo(), it->Key.GetCount(), it->Key.GetInternalPartsCount(), it->Size, TString()); blobs.push_back(reqBlob); - ++it; - if (it == DataKeysBody.end()) - break; - sz = it->Size; - cnt = it->Key.GetCount(); - } - } - return blobs; -} - - - + ++it; + if (it == DataKeysBody.end()) + break; + sz = it->Size; + cnt = it->Key.GetCount(); + } + } + return blobs; +} + + + TVector<TClientBlob> TPartition::GetReadRequestFromHead(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount, ui32* rsize, ui64* insideHeadOffset) -{ - ui32& count = *rcount; - ui32& size = *rsize; +{ + ui32& count = *rcount; + ui32& size = *rsize; TVector<TClientBlob> res; std::optional<ui64> firstAddedBlobOffset{}; - ui32 pos = 0; - if (startOffset > Head.Offset || startOffset == Head.Offset && partNo > Head.PartNo) { - pos = Head.FindPos(startOffset, partNo); - Y_VERIFY(pos != Max<ui32>()); - } - for (;pos < Head.Batches.size(); ++pos) - { - + ui32 pos = 0; + if (startOffset > Head.Offset || startOffset == Head.Offset && partNo > Head.PartNo) { + pos = Head.FindPos(startOffset, partNo); + Y_VERIFY(pos != Max<ui32>()); + } + for (;pos < Head.Batches.size(); ++pos) + { + TVector<TClientBlob> blobs; - Head.Batches[pos].UnpackTo(&blobs); - ui32 i = 0; - ui64 offset = Head.Batches[pos].GetOffset(); - ui16 pno = Head.Batches[pos].GetPartNo(); - for (; i < blobs.size(); ++i) { - - Y_VERIFY(pno == blobs[i].GetPartNo()); + Head.Batches[pos].UnpackTo(&blobs); + ui32 i = 0; + ui64 offset = Head.Batches[pos].GetOffset(); + ui16 pno = Head.Batches[pos].GetPartNo(); + for (; i < blobs.size(); ++i) { + + Y_VERIFY(pno == blobs[i].GetPartNo()); bool messageSkippingBehaviour = AppData()->PQConfig.GetTopicsAreFirstClassCitizen() && readTimestampMs > blobs[i].WriteTimestamp.MilliSeconds(); bool skip = offset < startOffset || offset == startOffset && blobs[i].GetPartNo() < partNo || messageSkippingBehaviour; - if (blobs[i].IsLastPart()) { - ++offset; - pno = 0; - if (!skip) - ++count; - } else { - ++pno; - } - if (skip) - continue; + if (blobs[i].IsLastPart()) { + ++offset; + pno = 0; + if (!skip) + ++count; + } else { + ++pno; + } + if (skip) + continue; if (count > maxCount) // blob is counted already - break; - if (size >= maxSize) - break; + break; + if (size >= maxSize) + break; size += blobs[i].GetBlobSize(); res.push_back(blobs[i]); if (!firstAddedBlobOffset && AppData()->PQConfig.GetTopicsAreFirstClassCitizen()) firstAddedBlobOffset = offset > 0 ? offset - 1 : 0; - } - if (i < blobs.size()) // already got limit - break; - } + } + if (i < blobs.size()) // already got limit + break; + } *insideHeadOffset = firstAddedBlobOffset.value_or(*insideHeadOffset); - return res; -} - -void TPartition::Handle(TEvPQ::TEvRead::TPtr& ev, const TActorContext& ctx) { - auto read = ev->Get(); - - if (read->Count == 0) { - Counters.Cumulative()[COUNTER_PQ_READ_ERROR].Increment(1); - Counters.Percentile()[COUNTER_LATENCY_PQ_READ_ERROR].IncrementFor(0); - ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, "no infinite flows allowed - count is not set or 0"); - return; - } - if (read->Offset < StartOffset) { - Counters.Cumulative()[COUNTER_PQ_READ_ERROR_SMALL_OFFSET].Increment(1); - read->Offset = StartOffset; - if (read->PartNo > 0) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "I was right, there could be rewinds and deletions at once! Topic " << TopicName << " partition " << Partition - << " readOffset " << read->Offset << " readPartNo " << read->PartNo << " startOffset " << StartOffset); - ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::READ_ERROR_TOO_SMALL_OFFSET, "client requested not from first part, and this part is lost"); - return; - } - } - if (read->Offset > EndOffset || read->Offset == EndOffset && read->PartNo > 0) { - Counters.Cumulative()[COUNTER_PQ_READ_ERROR_BIG_OFFSET].Increment(1); - Counters.Percentile()[COUNTER_LATENCY_PQ_READ_ERROR].IncrementFor(0); - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "reading from too big offset - topic " << TopicName << " partition " << Partition << " client " - << read->ClientId << " EndOffset " << EndOffset << " offset " << read->Offset); - ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::READ_ERROR_TOO_BIG_OFFSET, - TStringBuilder() << "trying to read from future. ReadOffset " << read->Offset << ", " << read->PartNo << " EndOffset " << EndOffset); - return; - } - - const TString& user = read->ClientId; - - Y_VERIFY(read->Offset <= EndOffset); - + return res; +} + +void TPartition::Handle(TEvPQ::TEvRead::TPtr& ev, const TActorContext& ctx) { + auto read = ev->Get(); + + if (read->Count == 0) { + Counters.Cumulative()[COUNTER_PQ_READ_ERROR].Increment(1); + Counters.Percentile()[COUNTER_LATENCY_PQ_READ_ERROR].IncrementFor(0); + ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, "no infinite flows allowed - count is not set or 0"); + return; + } + if (read->Offset < StartOffset) { + Counters.Cumulative()[COUNTER_PQ_READ_ERROR_SMALL_OFFSET].Increment(1); + read->Offset = StartOffset; + if (read->PartNo > 0) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "I was right, there could be rewinds and deletions at once! Topic " << TopicName << " partition " << Partition + << " readOffset " << read->Offset << " readPartNo " << read->PartNo << " startOffset " << StartOffset); + ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::READ_ERROR_TOO_SMALL_OFFSET, "client requested not from first part, and this part is lost"); + return; + } + } + if (read->Offset > EndOffset || read->Offset == EndOffset && read->PartNo > 0) { + Counters.Cumulative()[COUNTER_PQ_READ_ERROR_BIG_OFFSET].Increment(1); + Counters.Percentile()[COUNTER_LATENCY_PQ_READ_ERROR].IncrementFor(0); + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "reading from too big offset - topic " << TopicName << " partition " << Partition << " client " + << read->ClientId << " EndOffset " << EndOffset << " offset " << read->Offset); + ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::READ_ERROR_TOO_BIG_OFFSET, + TStringBuilder() << "trying to read from future. ReadOffset " << read->Offset << ", " << read->PartNo << " EndOffset " << EndOffset); + return; + } + + const TString& user = read->ClientId; + + Y_VERIFY(read->Offset <= EndOffset); + auto& userInfo = UsersInfoStorage.GetOrCreate(user, ctx); - - if (!read->SessionId.empty()) { + + if (!read->SessionId.empty()) { if (userInfo.Session != read->SessionId) { - Counters.Cumulative()[COUNTER_PQ_READ_ERROR_NO_SESSION].Increment(1); - Counters.Percentile()[COUNTER_LATENCY_PQ_READ_ERROR].IncrementFor(0); - ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::READ_ERROR_NO_SESSION, + Counters.Cumulative()[COUNTER_PQ_READ_ERROR_NO_SESSION].Increment(1); + Counters.Percentile()[COUNTER_LATENCY_PQ_READ_ERROR].IncrementFor(0); + ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::READ_ERROR_NO_SESSION, TStringBuilder() << "no such session '" << read->SessionId << "'"); - return; - } - } + return; + } + } if (userInfo.ReadSpeedLimiter) { Send(userInfo.ReadSpeedLimiter->Actor, new NReadSpeedLimiterEvents::TEvRequest(ev.Release())); @@ -2638,42 +2638,42 @@ void TPartition::DoRead(TEvPQ::TEvRead::TPtr ev, TDuration waitQuotaTime, const TInstant timestamp = read->MaxTimeLagMs > 0 ? ctx.Now() - TDuration::MilliSeconds(read->MaxTimeLagMs) : TInstant::Zero(); timestamp = Max(timestamp, TInstant::MilliSeconds(read->ReadTimestampMs)); timestamp = Max(timestamp, userInfo.ReadFromTimestamp); - offset = Max(GetOffsetEstimate(DataKeysBody, timestamp, Min(Head.Offset, EndOffset - 1)), offset); + offset = Max(GetOffsetEstimate(DataKeysBody, timestamp, Min(Head.Offset, EndOffset - 1)), offset); userInfo.ReadOffsetRewindSum += offset - read->Offset; } - + TReadInfo info(user, read->ClientDC, offset, read->PartNo, read->Count, read->Size, read->Cookie, read->ReadTimestampMs, waitQuotaTime); - - ui64 cookie = Cookie++; - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "read cookie " << cookie << " Topic '" << TopicName << "' partition " << Partition << " user " << user + + ui64 cookie = Cookie++; + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "read cookie " << cookie << " Topic '" << TopicName << "' partition " << Partition << " user " << user << " offset " << read->Offset << " count " << read->Count << " size " << read->Size << " endOffset " << EndOffset << " max time lag " << read->MaxTimeLagMs << "ms effective offset " << offset); - - + + if (offset == EndOffset) { - if (read->Timeout > 30000) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "too big read timeout " << " Topic '" << TopicName << "' partition " << Partition << " user " << read->ClientId + if (read->Timeout > 30000) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "too big read timeout " << " Topic '" << TopicName << "' partition " << Partition << " user " << read->ClientId << " offset " << read->Offset << " count " << read->Count << " size " << read->Size << " endOffset " << EndOffset << " max time lag " << read->MaxTimeLagMs << "ms effective offset " << offset); - read->Timeout = 30000; - } - Subscriber.AddSubscription(std::move(info), read->Timeout, cookie, ctx); + read->Timeout = 30000; + } + Subscriber.AddSubscription(std::move(info), read->Timeout, cookie, ctx); ++userInfo.Subscriptions; userInfo.UpdateReadOffset((i64)offset - 1, userInfo.WriteTimestamp, userInfo.CreateTimestamp, ctx.Now()); - - return; - } - + + return; + } + Y_VERIFY(offset < EndOffset); - - ProcessRead(ctx, std::move(info), cookie, false); -} - + + ProcessRead(ctx, std::move(info), cookie, false); +} + void TPartition::OnReadRequestFinished(TReadInfo&& info, ui64 answerSize) { auto userInfo = UsersInfoStorage.GetIfExists(info.User); Y_VERIFY(userInfo); - + if (userInfo->ReadSpeedLimiter) { Send( userInfo->ReadSpeedLimiter->Actor, @@ -2682,44 +2682,44 @@ void TPartition::OnReadRequestFinished(TReadInfo&& info, ui64 answerSize) { } } -void TPartition::AnswerCurrentWrites(const TActorContext& ctx) -{ - ui64 offset = EndOffset; - while (!Responses.empty()) { +void TPartition::AnswerCurrentWrites(const TActorContext& ctx) +{ + ui64 offset = EndOffset; + while (!Responses.empty()) { const ui64 quotedTime = Responses.front().QuotedTime; const ui64 queueTime = Responses.front().QueueTime; - const ui64 writeTime = ctx.Now().MilliSeconds() - Responses.front().WriteTime; + const ui64 writeTime = ctx.Now().MilliSeconds() - Responses.front().WriteTime; if (Responses.front().IsWrite()) { const auto& writeResponse = Responses.front().GetWrite(); - const TString& s = writeResponse.Msg.SourceId; - const ui64& seqNo = writeResponse.Msg.SeqNo; - const ui16& partNo = writeResponse.Msg.PartNo; - const ui16& totalParts = writeResponse.Msg.TotalParts; - const TMaybe<ui64>& wrOffset = writeResponse.Offset; - - bool already = false; - + const TString& s = writeResponse.Msg.SourceId; + const ui64& seqNo = writeResponse.Msg.SeqNo; + const ui16& partNo = writeResponse.Msg.PartNo; + const ui16& totalParts = writeResponse.Msg.TotalParts; + const TMaybe<ui64>& wrOffset = writeResponse.Offset; + + bool already = false; + auto it = SourceIdStorage.GetInMemorySourceIds().find(s); - - ui64 maxSeqNo = 0; - ui64 maxOffset = 0; - + + ui64 maxSeqNo = 0; + ui64 maxOffset = 0; + if (it != SourceIdStorage.GetInMemorySourceIds().end()) { - maxSeqNo = it->second.SeqNo; - maxOffset = it->second.Offset; + maxSeqNo = it->second.SeqNo; + maxOffset = it->second.Offset; if (it->second.SeqNo >= seqNo && !writeResponse.Msg.DisableDeduplication) { - already = true; - } - } - - if (!already) { - if (wrOffset) { - Y_VERIFY(*wrOffset >= offset); - offset = *wrOffset; - } - } - if (!already && partNo + 1 == totalParts) { + already = true; + } + } + + if (!already) { + if (wrOffset) { + Y_VERIFY(*wrOffset >= offset); + offset = *wrOffset; + } + } + if (!already && partNo + 1 == totalParts) { if (it == SourceIdStorage.GetInMemorySourceIds().end()) { Counters.Cumulative()[COUNTER_PQ_SID_CREATED].Increment(1); SourceIdStorage.RegisterSourceId(s, writeResponse.Msg.SeqNo, offset, CurrentTimestamp); @@ -2727,33 +2727,33 @@ void TPartition::AnswerCurrentWrites(const TActorContext& ctx) SourceIdStorage.RegisterSourceId(s, it->second.Updated(writeResponse.Msg.SeqNo, offset, CurrentTimestamp)); } - Counters.Cumulative()[COUNTER_PQ_WRITE_OK].Increment(1); - } + Counters.Cumulative()[COUNTER_PQ_WRITE_OK].Increment(1); + } ReplyWrite( ctx, writeResponse.Cookie, s, seqNo, partNo, totalParts, already ? maxOffset : offset, CurrentTimestamp, already, maxSeqNo, quotedTime, TopicQuotaWaitTimeForCurrentBlob, queueTime, writeTime ); - LOG_DEBUG_S( - ctx, - NKikimrServices::PERSQUEUE, - "Answering for message sourceid: '" << EscapeC(s) << "', Topic: '" << TopicName << "', Partition: " << Partition - << ", SeqNo: " << seqNo << ", partNo: " << partNo << ", Offset: " << offset << " is " << (already ? "already written" : "stored on disk") - ); + LOG_DEBUG_S( + ctx, + NKikimrServices::PERSQUEUE, + "Answering for message sourceid: '" << EscapeC(s) << "', Topic: '" << TopicName << "', Partition: " << Partition + << ", SeqNo: " << seqNo << ", partNo: " << partNo << ", Offset: " << offset << " is " << (already ? "already written" : "stored on disk") + ); if (PartitionWriteQuotaWaitCounter) { PartitionWriteQuotaWaitCounter->IncFor(quotedTime); } - - if (!already && partNo + 1 == totalParts) - ++offset; + + if (!already && partNo + 1 == totalParts) + ++offset; } else if (Responses.front().IsOwnership()) { const TString& ownerCookie = Responses.front().GetOwnership().OwnerCookie; - auto it = Owners.find(TOwnerInfo::GetOwnerFromOwnerCookie(ownerCookie)); - if (it != Owners.end() && it->second.OwnerCookie == ownerCookie) { - ReplyOwnerOk(ctx, Responses.front().GetCookie(), ownerCookie); - } else { - ReplyError(ctx, Responses.front().GetCookie(), NPersQueue::NErrorCode::WRONG_COOKIE, "new GetOwnership request is dropped already"); - } + auto it = Owners.find(TOwnerInfo::GetOwnerFromOwnerCookie(ownerCookie)); + if (it != Owners.end() && it->second.OwnerCookie == ownerCookie) { + ReplyOwnerOk(ctx, Responses.front().GetCookie(), ownerCookie); + } else { + ReplyError(ctx, Responses.front().GetCookie(), NPersQueue::NErrorCode::WRONG_COOKIE, "new GetOwnership request is dropped already"); + } } else if (Responses.front().IsRegisterMessageGroup()) { const auto& body = Responses.front().GetRegisterMessageGroup().Body; @@ -2790,781 +2790,781 @@ void TPartition::AnswerCurrentWrites(const TActorContext& ctx) ReplyOk(ctx, Responses.front().GetCookie()); } else { Y_FAIL("Unexpected message"); - } - Responses.pop_front(); - } + } + Responses.pop_front(); + } TopicQuotaWaitTimeForCurrentBlob = TDuration::Zero(); -} - - +} + + void TPartition::ReadTimestampForOffset(const TString& user, TUserInfo& userInfo, const TActorContext& ctx) -{ +{ if (userInfo.ReadScheduled) - return; + return; userInfo.ReadScheduled = true; - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition << " user " << user << " readTimeStamp for offset " << userInfo.Offset << " initiated " << " queuesize " << UpdateUserInfoTimestamp.size() << " startOffset " << StartOffset << " ReadingTimestamp " << ReadingTimestamp); - - if (ReadingTimestamp) { - UpdateUserInfoTimestamp.push_back(user); - return; - } + + if (ReadingTimestamp) { + UpdateUserInfoTimestamp.push_back(user); + return; + } if (userInfo.Offset < (i64)StartOffset) { userInfo.ReadScheduled = false; auto now = ctx.Now(); - userInfo.CreateTimestamp = now - TDuration::Seconds(Max(86400, Config.GetPartitionConfig().GetLifetimeSeconds())); - userInfo.WriteTimestamp = now - TDuration::Seconds(Max(86400, Config.GetPartitionConfig().GetLifetimeSeconds())); + userInfo.CreateTimestamp = now - TDuration::Seconds(Max(86400, Config.GetPartitionConfig().GetLifetimeSeconds())); + userInfo.WriteTimestamp = now - TDuration::Seconds(Max(86400, Config.GetPartitionConfig().GetLifetimeSeconds())); userInfo.ActualTimestamps = true; if (userInfo.ReadOffset + 1 < userInfo.Offset) { userInfo.ReadOffset = userInfo.Offset - 1; userInfo.ReadCreateTimestamp = userInfo.CreateTimestamp; userInfo.ReadWriteTimestamp = userInfo.WriteTimestamp; - } - - Counters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_OFFSET_IS_LOST].Increment(1); - ReportLabeledCounters(ctx); - return; - } - - if (userInfo.Offset >= (i64)EndOffset || StartOffset == EndOffset) { + } + + Counters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_OFFSET_IS_LOST].Increment(1); + ReportLabeledCounters(ctx); + return; + } + + if (userInfo.Offset >= (i64)EndOffset || StartOffset == EndOffset) { userInfo.ReadScheduled = false; - ReportLabeledCounters(ctx); - return; - } - - Y_VERIFY(!ReadingTimestamp); - - ReadingTimestamp = true; - ReadingForUser = user; + ReportLabeledCounters(ctx); + return; + } + + Y_VERIFY(!ReadingTimestamp); + + ReadingTimestamp = true; + ReadingForUser = user; ReadingForOffset = userInfo.Offset; - ReadingForUserReadRuleGeneration = userInfo.ReadRuleGeneration; - - for (const auto& user : UpdateUserInfoTimestamp) { - Y_VERIFY(user != ReadingForUser); - } - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + ReadingForUserReadRuleGeneration = userInfo.ReadRuleGeneration; + + for (const auto& user : UpdateUserInfoTimestamp) { + Y_VERIFY(user != ReadingForUser); + } + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition << " user " << user << " send read request for offset " << userInfo.Offset << " initiated " << " queuesize " << UpdateUserInfoTimestamp.size() << " startOffset " << StartOffset << " ReadingTimestamp " << ReadingTimestamp); - - + + THolder<TEvPQ::TEvRead> event = MakeHolder<TEvPQ::TEvRead>(0, userInfo.Offset, 0, 1, "", user, 0, MAX_BLOB_PART_SIZE * 2, 0, 0, "", userInfo.DoExternalRead); - ctx.Send(ctx.SelfID, event.Release()); - Counters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_CACHE_MISS].Increment(1); -} - - -void TPartition::ProcessTimestampsForNewData(const ui64 prevEndOffset, const TActorContext& ctx) { + ctx.Send(ctx.SelfID, event.Release()); + Counters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_CACHE_MISS].Increment(1); +} + + +void TPartition::ProcessTimestampsForNewData(const ui64 prevEndOffset, const TActorContext& ctx) { for (auto& userInfoPair : UsersInfoStorage.GetAll()) { if (userInfoPair.second.Offset >= (i64)prevEndOffset && userInfoPair.second.Offset < (i64)EndOffset) { ReadTimestampForOffset(userInfoPair.first, userInfoPair.second, ctx); - } - } -} - - -void TPartition::Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& ctx) -{ - ReadingTimestamp = false; - auto userInfo = UsersInfoStorage.GetIfExists(ReadingForUser); - if (!userInfo || userInfo->ReadRuleGeneration != ReadingForUserReadRuleGeneration) { - ProcessTimestampRead(ctx); - return; - } - Y_VERIFY(userInfo->ReadScheduled); - userInfo->ReadScheduled = false; - Y_VERIFY(ReadingForUser != ""); - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition - << " user " << ReadingForUser << " readTimeStamp done, result " << userInfo->WriteTimestamp.MilliSeconds() - << " queuesize " << UpdateUserInfoTimestamp.size() << " startOffset " << StartOffset); - - if (!userInfo->ActualTimestamps) { + } + } +} + + +void TPartition::Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& ctx) +{ + ReadingTimestamp = false; + auto userInfo = UsersInfoStorage.GetIfExists(ReadingForUser); + if (!userInfo || userInfo->ReadRuleGeneration != ReadingForUserReadRuleGeneration) { + ProcessTimestampRead(ctx); + return; + } + Y_VERIFY(userInfo->ReadScheduled); + userInfo->ReadScheduled = false; + Y_VERIFY(ReadingForUser != ""); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + << " user " << ReadingForUser << " readTimeStamp done, result " << userInfo->WriteTimestamp.MilliSeconds() + << " queuesize " << UpdateUserInfoTimestamp.size() << " startOffset " << StartOffset); + + if (!userInfo->ActualTimestamps) { LOG_INFO_S( - ctx, - NKikimrServices::PERSQUEUE, - "Reading Timestamp failed for offset " << ReadingForOffset << " ( "<< userInfo->Offset << " ) " << ev->Get()->Response.DebugString() - ); - if (ev->Get()->Response.GetStatus() == NMsgBusProxy::MSTATUS_OK && ev->Get()->Response.GetErrorCode() == NPersQueue::NErrorCode::OK - && ev->Get()->Response.GetPartitionResponse().HasCmdReadResult() - && ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().ResultSize() > 0 - && (i64)ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().GetResult(0).GetOffset() >= userInfo->Offset) { - //offsets is inside gap - return timestamp of first record after gap - const auto& res = ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().GetResult(0); - userInfo->WriteTimestamp = TInstant::MilliSeconds(res.GetWriteTimestampMS()); - userInfo->CreateTimestamp = TInstant::MilliSeconds(res.GetCreateTimestampMS()); - userInfo->ActualTimestamps = true; - if (userInfo->ReadOffset + 1 < userInfo->Offset) { - userInfo->ReadOffset = userInfo->Offset - 1; - userInfo->ReadWriteTimestamp = userInfo->WriteTimestamp; - userInfo->ReadCreateTimestamp = userInfo->CreateTimestamp; - } - } else { - UpdateUserInfoTimestamp.push_back(ReadingForUser); - userInfo->ReadScheduled = true; - } - Counters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_ERROR].Increment(1); - } - ProcessTimestampRead(ctx); -} - - -void TPartition::ProcessTimestampRead(const TActorContext& ctx) { - ReadingForUser = ""; - ReadingForOffset = 0; - ReadingForUserReadRuleGeneration = 0; - while (!ReadingTimestamp && !UpdateUserInfoTimestamp.empty()) { + ctx, + NKikimrServices::PERSQUEUE, + "Reading Timestamp failed for offset " << ReadingForOffset << " ( "<< userInfo->Offset << " ) " << ev->Get()->Response.DebugString() + ); + if (ev->Get()->Response.GetStatus() == NMsgBusProxy::MSTATUS_OK && ev->Get()->Response.GetErrorCode() == NPersQueue::NErrorCode::OK + && ev->Get()->Response.GetPartitionResponse().HasCmdReadResult() + && ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().ResultSize() > 0 + && (i64)ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().GetResult(0).GetOffset() >= userInfo->Offset) { + //offsets is inside gap - return timestamp of first record after gap + const auto& res = ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().GetResult(0); + userInfo->WriteTimestamp = TInstant::MilliSeconds(res.GetWriteTimestampMS()); + userInfo->CreateTimestamp = TInstant::MilliSeconds(res.GetCreateTimestampMS()); + userInfo->ActualTimestamps = true; + if (userInfo->ReadOffset + 1 < userInfo->Offset) { + userInfo->ReadOffset = userInfo->Offset - 1; + userInfo->ReadWriteTimestamp = userInfo->WriteTimestamp; + userInfo->ReadCreateTimestamp = userInfo->CreateTimestamp; + } + } else { + UpdateUserInfoTimestamp.push_back(ReadingForUser); + userInfo->ReadScheduled = true; + } + Counters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_ERROR].Increment(1); + } + ProcessTimestampRead(ctx); +} + + +void TPartition::ProcessTimestampRead(const TActorContext& ctx) { + ReadingForUser = ""; + ReadingForOffset = 0; + ReadingForUserReadRuleGeneration = 0; + while (!ReadingTimestamp && !UpdateUserInfoTimestamp.empty()) { TString user = UpdateUserInfoTimestamp.front(); - UpdateUserInfoTimestamp.pop_front(); - auto userInfo = UsersInfoStorage.GetIfExists(user); - if (!userInfo || !userInfo->ReadScheduled) - continue; - userInfo->ReadScheduled = false; - if (userInfo->Offset == (i64)EndOffset) - continue; - ReadTimestampForOffset(user, *userInfo, ctx); - } - Y_VERIFY(ReadingTimestamp || UpdateUserInfoTimestamp.empty()); - ReportLabeledCounters(ctx); -} - - -void TPartition::Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext& ctx) -{ - ReadingTimestamp = false; - auto userInfo = UsersInfoStorage.GetIfExists(ReadingForUser); - if (!userInfo || userInfo->ReadRuleGeneration != ReadingForUserReadRuleGeneration) { - ProcessTimestampRead(ctx); - return; - } - Y_VERIFY(userInfo->ReadScheduled); - userInfo->ReadScheduled = false; - Y_VERIFY(ReadingForUser != ""); - - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition - << " user " << ReadingForUser << " readTimeStamp error: " << ev->Get()->Error); - - UpdateUserInfoTimestamp.push_back(ReadingForUser); - userInfo->ReadScheduled = true; - - ProcessTimestampRead(ctx); -} - - -void TPartition::CheckHeadConsistency() const -{ - ui32 p = 0; - for (ui32 j = 0; j < DataKeysHead.size(); ++j) { - ui32 s = 0; - for (ui32 k = 0; k < DataKeysHead[j].KeysCount(); ++k) { - Y_VERIFY(p < HeadKeys.size()); - Y_VERIFY(DataKeysHead[j].GetKey(k) == HeadKeys[p].Key); - Y_VERIFY(DataKeysHead[j].GetSize(k) == HeadKeys[p].Size); - s += DataKeysHead[j].GetSize(k); - Y_VERIFY(j + 1 == TotalLevels || DataKeysHead[j].GetSize(k) >= CompactLevelBorder[j + 1]); - ++p; - } - Y_VERIFY(s < DataKeysHead[j].Border()); - } - Y_VERIFY(DataKeysBody.empty() || Head.Offset >= DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount()); - Y_VERIFY(p == HeadKeys.size()); - if (!HeadKeys.empty()) { - Y_VERIFY(HeadKeys.size() <= TotalMaxCount); - Y_VERIFY(HeadKeys.front().Key.GetOffset() == Head.Offset); - Y_VERIFY(HeadKeys.front().Key.GetPartNo() == Head.PartNo); - for (p = 1; p < HeadKeys.size(); ++p) { - Y_VERIFY(HeadKeys[p].Key.GetOffset() == HeadKeys[p-1].Key.GetOffset() + HeadKeys[p-1].Key.GetCount()); - Y_VERIFY(HeadKeys[p].Key.ToString() > HeadKeys[p-1].Key.ToString()); - } - } -} - - -void TPartition::SyncMemoryStateWithKVState(const TActorContext& ctx) -{ - if (!CompactedKeys.empty()) - HeadKeys.clear(); - - if (NewHeadKey.Size > 0) { - while (!HeadKeys.empty() && - (HeadKeys.back().Key.GetOffset() > NewHeadKey.Key.GetOffset() || HeadKeys.back().Key.GetOffset() == NewHeadKey.Key.GetOffset() - && HeadKeys.back().Key.GetPartNo() >= NewHeadKey.Key.GetPartNo())) { - HeadKeys.pop_back(); - } - HeadKeys.push_back(NewHeadKey); + UpdateUserInfoTimestamp.pop_front(); + auto userInfo = UsersInfoStorage.GetIfExists(user); + if (!userInfo || !userInfo->ReadScheduled) + continue; + userInfo->ReadScheduled = false; + if (userInfo->Offset == (i64)EndOffset) + continue; + ReadTimestampForOffset(user, *userInfo, ctx); + } + Y_VERIFY(ReadingTimestamp || UpdateUserInfoTimestamp.empty()); + ReportLabeledCounters(ctx); +} + + +void TPartition::Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext& ctx) +{ + ReadingTimestamp = false; + auto userInfo = UsersInfoStorage.GetIfExists(ReadingForUser); + if (!userInfo || userInfo->ReadRuleGeneration != ReadingForUserReadRuleGeneration) { + ProcessTimestampRead(ctx); + return; + } + Y_VERIFY(userInfo->ReadScheduled); + userInfo->ReadScheduled = false; + Y_VERIFY(ReadingForUser != ""); + + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + << " user " << ReadingForUser << " readTimeStamp error: " << ev->Get()->Error); + + UpdateUserInfoTimestamp.push_back(ReadingForUser); + userInfo->ReadScheduled = true; + + ProcessTimestampRead(ctx); +} + + +void TPartition::CheckHeadConsistency() const +{ + ui32 p = 0; + for (ui32 j = 0; j < DataKeysHead.size(); ++j) { + ui32 s = 0; + for (ui32 k = 0; k < DataKeysHead[j].KeysCount(); ++k) { + Y_VERIFY(p < HeadKeys.size()); + Y_VERIFY(DataKeysHead[j].GetKey(k) == HeadKeys[p].Key); + Y_VERIFY(DataKeysHead[j].GetSize(k) == HeadKeys[p].Size); + s += DataKeysHead[j].GetSize(k); + Y_VERIFY(j + 1 == TotalLevels || DataKeysHead[j].GetSize(k) >= CompactLevelBorder[j + 1]); + ++p; + } + Y_VERIFY(s < DataKeysHead[j].Border()); + } + Y_VERIFY(DataKeysBody.empty() || Head.Offset >= DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount()); + Y_VERIFY(p == HeadKeys.size()); + if (!HeadKeys.empty()) { + Y_VERIFY(HeadKeys.size() <= TotalMaxCount); + Y_VERIFY(HeadKeys.front().Key.GetOffset() == Head.Offset); + Y_VERIFY(HeadKeys.front().Key.GetPartNo() == Head.PartNo); + for (p = 1; p < HeadKeys.size(); ++p) { + Y_VERIFY(HeadKeys[p].Key.GetOffset() == HeadKeys[p-1].Key.GetOffset() + HeadKeys[p-1].Key.GetCount()); + Y_VERIFY(HeadKeys[p].Key.ToString() > HeadKeys[p-1].Key.ToString()); + } + } +} + + +void TPartition::SyncMemoryStateWithKVState(const TActorContext& ctx) +{ + if (!CompactedKeys.empty()) + HeadKeys.clear(); + + if (NewHeadKey.Size > 0) { + while (!HeadKeys.empty() && + (HeadKeys.back().Key.GetOffset() > NewHeadKey.Key.GetOffset() || HeadKeys.back().Key.GetOffset() == NewHeadKey.Key.GetOffset() + && HeadKeys.back().Key.GetPartNo() >= NewHeadKey.Key.GetPartNo())) { + HeadKeys.pop_back(); + } + HeadKeys.push_back(NewHeadKey); NewHeadKey = TDataKey{TKey{}, 0, TInstant::Zero(), 0}; - } - - if (CompactedKeys.empty() && NewHead.PackedSize == 0) { //Nothing writed at all - return; - } - - Y_VERIFY(EndOffset == Head.GetNextOffset()); - - if (!CompactedKeys.empty() || Head.PackedSize == 0) { //has compactedkeys or head is already empty - Head.PackedSize = 0; - Head.Offset = NewHead.Offset; - Head.PartNo = NewHead.PartNo; //no partNo at this point - Head.Batches.clear(); - } - - while (!CompactedKeys.empty()) { - const auto& ck = CompactedKeys.front(); - BodySize += ck.second; - Y_VERIFY(!ck.first.IsHead()); - ui64 lastOffset = DataKeysBody.empty() ? 0 : (DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount()); - Y_VERIFY(lastOffset <= ck.first.GetOffset()); - if (DataKeysBody.empty()) { - StartOffset = ck.first.GetOffset() + (ck.first.GetPartNo() > 0 ? 1 : 0); - } else { - if (lastOffset < ck.first.GetOffset()) { - GapOffsets.push_back(std::make_pair(lastOffset, ck.first.GetOffset())); - GapSize += ck.first.GetOffset() - lastOffset; - } - } + } + + if (CompactedKeys.empty() && NewHead.PackedSize == 0) { //Nothing writed at all + return; + } + + Y_VERIFY(EndOffset == Head.GetNextOffset()); + + if (!CompactedKeys.empty() || Head.PackedSize == 0) { //has compactedkeys or head is already empty + Head.PackedSize = 0; + Head.Offset = NewHead.Offset; + Head.PartNo = NewHead.PartNo; //no partNo at this point + Head.Batches.clear(); + } + + while (!CompactedKeys.empty()) { + const auto& ck = CompactedKeys.front(); + BodySize += ck.second; + Y_VERIFY(!ck.first.IsHead()); + ui64 lastOffset = DataKeysBody.empty() ? 0 : (DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount()); + Y_VERIFY(lastOffset <= ck.first.GetOffset()); + if (DataKeysBody.empty()) { + StartOffset = ck.first.GetOffset() + (ck.first.GetPartNo() > 0 ? 1 : 0); + } else { + if (lastOffset < ck.first.GetOffset()) { + GapOffsets.push_back(std::make_pair(lastOffset, ck.first.GetOffset())); + GapSize += ck.first.GetOffset() - lastOffset; + } + } DataKeysBody.push_back({ck.first, ck.second, ctx.Now(), DataKeysBody.empty() ? 0 : DataKeysBody.back().CumulativeSize + DataKeysBody.back().Size}); - - CompactedKeys.pop_front(); - } // head cleared, all data moved to body - - //append Head with newHead - while (!NewHead.Batches.empty()) { - Head.Batches.push_back(NewHead.Batches.front()); - NewHead.Batches.pop_front(); - } - Head.PackedSize += NewHead.PackedSize; - - if (Head.PackedSize > 0 && DataKeysBody.empty()) { - StartOffset = Head.Offset + (Head.PartNo > 0 ? 1 : 0); - } - - EndOffset = Head.GetNextOffset(); - NewHead.Clear(); - NewHead.Offset = EndOffset; - - CheckHeadConsistency(); - - UpdateUserInfoEndOffset(ctx.Now()); -} - - -ui64 TPartition::GetSizeLag(i64 offset) -{ - ui64 sizeLag = 0; - if (!DataKeysBody.empty() && (offset < (i64)Head.Offset || offset == (i64)Head.Offset && Head.PartNo > 0)) { //there will be something in body - auto it = std::upper_bound(DataKeysBody.begin(), DataKeysBody.end(), std::make_pair(offset, 0), - [](const std::pair<ui64, ui16>& offsetAndPartNo, const TDataKey& p) { return offsetAndPartNo.first < p.Key.GetOffset() || offsetAndPartNo.first == p.Key.GetOffset() && offsetAndPartNo.second < p.Key.GetPartNo();}); - if (it != DataKeysBody.begin()) - --it; //point to blob with this offset - Y_VERIFY(it != DataKeysBody.end()); - sizeLag = it->Size + DataKeysBody.back().CumulativeSize - it->CumulativeSize; - Y_VERIFY(BodySize == DataKeysBody.back().CumulativeSize + DataKeysBody.back().Size - DataKeysBody.front().CumulativeSize); - } - for (auto& b : HeadKeys) { - if ((i64)b.Key.GetOffset() >= offset) - sizeLag += b.Size; - } - return sizeLag; -} - - -void TPartition::ReportLabeledCounters(const TActorContext& ctx) -{ - //per client counters - const auto now = ctx.Now(); + + CompactedKeys.pop_front(); + } // head cleared, all data moved to body + + //append Head with newHead + while (!NewHead.Batches.empty()) { + Head.Batches.push_back(NewHead.Batches.front()); + NewHead.Batches.pop_front(); + } + Head.PackedSize += NewHead.PackedSize; + + if (Head.PackedSize > 0 && DataKeysBody.empty()) { + StartOffset = Head.Offset + (Head.PartNo > 0 ? 1 : 0); + } + + EndOffset = Head.GetNextOffset(); + NewHead.Clear(); + NewHead.Offset = EndOffset; + + CheckHeadConsistency(); + + UpdateUserInfoEndOffset(ctx.Now()); +} + + +ui64 TPartition::GetSizeLag(i64 offset) +{ + ui64 sizeLag = 0; + if (!DataKeysBody.empty() && (offset < (i64)Head.Offset || offset == (i64)Head.Offset && Head.PartNo > 0)) { //there will be something in body + auto it = std::upper_bound(DataKeysBody.begin(), DataKeysBody.end(), std::make_pair(offset, 0), + [](const std::pair<ui64, ui16>& offsetAndPartNo, const TDataKey& p) { return offsetAndPartNo.first < p.Key.GetOffset() || offsetAndPartNo.first == p.Key.GetOffset() && offsetAndPartNo.second < p.Key.GetPartNo();}); + if (it != DataKeysBody.begin()) + --it; //point to blob with this offset + Y_VERIFY(it != DataKeysBody.end()); + sizeLag = it->Size + DataKeysBody.back().CumulativeSize - it->CumulativeSize; + Y_VERIFY(BodySize == DataKeysBody.back().CumulativeSize + DataKeysBody.back().Size - DataKeysBody.front().CumulativeSize); + } + for (auto& b : HeadKeys) { + if ((i64)b.Key.GetOffset() >= offset) + sizeLag += b.Size; + } + return sizeLag; +} + + +void TPartition::ReportLabeledCounters(const TActorContext& ctx) +{ + //per client counters + const auto now = ctx.Now(); for (auto& userInfoPair : UsersInfoStorage.GetAll()) { auto& userInfo = userInfoPair.second; if (!userInfo.HasReadRule && !userInfo.Important) - continue; - bool haveChanges = false; + continue; + bool haveChanges = false; userInfo.EndOffset = EndOffset; userInfo.UpdateReadingTimeAndState(now); ui64 ts = userInfo.GetWriteTimestamp().MilliSeconds(); - if (ts < MIN_TIMESTAMP_MS) ts = Max<i64>(); + if (ts < MIN_TIMESTAMP_MS) ts = Max<i64>(); if (userInfo.LabeledCounters.GetCounters()[METRIC_COMMIT_WRITE_TIME].Get() != ts) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_COMMIT_WRITE_TIME].Set(ts); - } + } ts = userInfo.GetCreateTimestamp().MilliSeconds(); - if (ts < MIN_TIMESTAMP_MS) ts = Max<i64>(); + if (ts < MIN_TIMESTAMP_MS) ts = Max<i64>(); if (userInfo.LabeledCounters.GetCounters()[METRIC_COMMIT_CREATE_TIME].Get() != ts) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_COMMIT_CREATE_TIME].Set(ts); - } + } ts = userInfo.GetReadWriteTimestamp().MilliSeconds(); if (userInfo.LabeledCounters.GetCounters()[METRIC_READ_WRITE_TIME].Get() != ts) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_READ_WRITE_TIME].Set(ts); - } - + } + i64 off = userInfo.GetReadOffset(); //we want to track first not-readed offset TInstant wts = userInfo.GetReadWriteTimestamp() ? userInfo.GetReadWriteTimestamp() : GetWriteTimeEstimate(userInfo.GetReadOffset()); TInstant readTimestamp = userInfo.GetReadTimestamp(); ui64 readTimeLag = off >= (i64)EndOffset ? 0 : (readTimestamp - wts).MilliSeconds(); ui64 totalLag = userInfo.GetWriteLagMs() + readTimeLag + (now - readTimestamp).MilliSeconds(); - + if (userInfo.LabeledCounters.GetCounters()[METRIC_READ_TOTAL_TIME].Get() != totalLag) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_READ_TOTAL_TIME].Set(totalLag); - } - + } + ts = readTimestamp.MilliSeconds(); if (userInfo.LabeledCounters.GetCounters()[METRIC_LAST_READ_TIME].Get() != ts) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_LAST_READ_TIME].Set(ts); - } - + } + ui64 timeLag = userInfo.GetWriteLagMs(); if (userInfo.LabeledCounters.GetCounters()[METRIC_WRITE_TIME_LAG].Get() != timeLag) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_WRITE_TIME_LAG].Set(timeLag); - } - + } + if (userInfo.LabeledCounters.GetCounters()[METRIC_READ_TIME_LAG].Get() != readTimeLag) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_READ_TIME_LAG].Set(readTimeLag); - } - + } + if (userInfo.LabeledCounters.GetCounters()[METRIC_COMMIT_MESSAGE_LAG].Get() != EndOffset - userInfo.Offset) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_COMMIT_MESSAGE_LAG].Set(EndOffset - userInfo.Offset); - } - - + } + + if (userInfo.LabeledCounters.GetCounters()[METRIC_READ_MESSAGE_LAG].Get() != EndOffset - off) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_READ_MESSAGE_LAG].Set(EndOffset - off); userInfo.LabeledCounters.GetCounters()[METRIC_READ_TOTAL_MESSAGE_LAG].Set(EndOffset - off); - } + } ui64 sizeLag = GetSizeLag(userInfo.Offset); ui64 sizeLagRead = GetSizeLag(userInfo.ReadOffset); if (userInfo.LabeledCounters.GetCounters()[METRIC_COMMIT_SIZE_LAG].Get() != sizeLag) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_COMMIT_SIZE_LAG].Set(sizeLag); - } + } if (userInfo.LabeledCounters.GetCounters()[METRIC_READ_SIZE_LAG].Get() != sizeLagRead) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_READ_SIZE_LAG].Set(sizeLagRead); userInfo.LabeledCounters.GetCounters()[METRIC_READ_TOTAL_SIZE_LAG].Set(sizeLag); - } + } if (userInfo.LabeledCounters.GetCounters()[METRIC_USER_PARTITIONS].Get() == 0) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_USER_PARTITIONS].Set(1); - } - + } + ui64 speed = userInfo.ReadQuota.GetTotalSpeed(); if (speed != userInfo.LabeledCounters.GetCounters()[METRIC_READ_QUOTA_BYTES].Get()) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_READ_QUOTA_BYTES].Set(speed); - } - + } + ui64 availSec = userInfo.ReadQuota.GetAvailableAvgSec(ctx.Now()); if (availSec != userInfo.LabeledCounters.GetCounters()[METRIC_MIN_READ_QUOTA_BYTES_AVAIL_SEC].Get()) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_MIN_READ_QUOTA_BYTES_AVAIL_SEC].Set(availSec); - } - + } + ui64 availMin = userInfo.ReadQuota.GetAvailableAvgMin(ctx.Now()); if (availMin != userInfo.LabeledCounters.GetCounters()[METRIC_MIN_READ_QUOTA_BYTES_AVAIL_MIN].Get()) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_MIN_READ_QUOTA_BYTES_AVAIL_MIN].Set(availMin); - } - + } + ui64 readOffsetRewindSum = userInfo.ReadOffsetRewindSum; if (readOffsetRewindSum != userInfo.LabeledCounters.GetCounters()[METRIC_READ_OFFSET_REWIND_SUM].Get()) { haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_READ_OFFSET_REWIND_SUM].Set(readOffsetRewindSum); } - if (readOffsetRewindSum != userInfo.LabeledCounters.GetCounters()[METRIC_READ_OFFSET_REWIND_TOTAL].Get()) { - haveChanges = true; - userInfo.LabeledCounters.GetCounters()[METRIC_READ_OFFSET_REWIND_TOTAL].Set(readOffsetRewindSum); - } + if (readOffsetRewindSum != userInfo.LabeledCounters.GetCounters()[METRIC_READ_OFFSET_REWIND_TOTAL].Get()) { + haveChanges = true; + userInfo.LabeledCounters.GetCounters()[METRIC_READ_OFFSET_REWIND_TOTAL].Set(readOffsetRewindSum); + } - ui32 id = METRIC_TOTAL_READ_SPEED_1; + ui32 id = METRIC_TOTAL_READ_SPEED_1; for (ui32 i = 0; i < userInfo.AvgReadBytes.size(); ++i) { ui64 avg = userInfo.AvgReadBytes[i].GetValue(); if (avg != userInfo.LabeledCounters.GetCounters()[id].Get()) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[id].Set(avg); //total userInfo.LabeledCounters.GetCounters()[id + 1].Set(avg); //max - } - id += 2; - } - Y_VERIFY(id == METRIC_MAX_READ_SPEED_4 + 1); + } + id += 2; + } + Y_VERIFY(id == METRIC_MAX_READ_SPEED_4 + 1); if (userInfo.ReadQuota.GetTotalSpeed()) { ui64 quotaUsage = ui64(userInfo.AvgReadBytes[1].GetValue()) * 1000000 / userInfo.ReadQuota.GetTotalSpeed() / 60; if (quotaUsage != userInfo.LabeledCounters.GetCounters()[METRIC_READ_QUOTA_USAGE].Get()) { - haveChanges = true; + haveChanges = true; userInfo.LabeledCounters.GetCounters()[METRIC_READ_QUOTA_USAGE].Set(quotaUsage); - } - } - if (haveChanges) { + } + } + if (haveChanges) { ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCounters(Partition, userInfo.LabeledCounters)); - } - } - //Partition counters - bool haveChanges = false; + } + } + //Partition counters + bool haveChanges = false; if (SourceIdStorage.GetInMemorySourceIds().size() != PartitionLabeledCounters.GetCounters()[METRIC_MAX_NUM_SIDS].Get()) { - haveChanges = true; + haveChanges = true; PartitionLabeledCounters.GetCounters()[METRIC_MAX_NUM_SIDS].Set(SourceIdStorage.GetInMemorySourceIds().size()); PartitionLabeledCounters.GetCounters()[METRIC_NUM_SIDS].Set(SourceIdStorage.GetInMemorySourceIds().size()); - } - + } + TDuration lifetimeNow = ctx.Now() - SourceIdStorage.MinAvailableTimestamp(ctx.Now()); if (lifetimeNow.MilliSeconds() != PartitionLabeledCounters.GetCounters()[METRIC_MIN_SID_LIFETIME].Get()) { haveChanges = true; PartitionLabeledCounters.GetCounters()[METRIC_MIN_SID_LIFETIME].Set(lifetimeNow.MilliSeconds()); } - ui64 headGapSize = DataKeysBody.empty() ? 0 : (Head.Offset - (DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount())); - ui64 gapSize = GapSize + headGapSize; - ui32 gapsCount = GapOffsets.size() + (headGapSize ? 1 : 0); - - if (gapSize != PartitionLabeledCounters.GetCounters()[METRIC_GAPS_SIZE].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_MAX_GAPS_SIZE].Set(gapSize); - PartitionLabeledCounters.GetCounters()[METRIC_GAPS_SIZE].Set(gapSize); - } - if (gapsCount != PartitionLabeledCounters.GetCounters()[METRIC_GAPS_COUNT].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_MAX_GAPS_COUNT].Set(gapsCount); - PartitionLabeledCounters.GetCounters()[METRIC_GAPS_COUNT].Set(gapsCount); - } - - ui64 speed = WriteQuota.GetTotalSpeed(); - if (speed != PartitionLabeledCounters.GetCounters()[METRIC_WRITE_QUOTA_BYTES].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_WRITE_QUOTA_BYTES].Set(speed); - } - - ui64 availSec = WriteQuota.GetAvailableAvgSec(ctx.Now()); - if (availSec != PartitionLabeledCounters.GetCounters()[METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC].Set(availSec); - } - - ui64 availMin = WriteQuota.GetAvailableAvgMin(ctx.Now()); - if (availMin != PartitionLabeledCounters.GetCounters()[METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN].Set(availMin); - } - - ui32 id = METRIC_TOTAL_WRITE_SPEED_1; - for (ui32 i = 0; i < AvgWriteBytes.size(); ++i) { + ui64 headGapSize = DataKeysBody.empty() ? 0 : (Head.Offset - (DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount())); + ui64 gapSize = GapSize + headGapSize; + ui32 gapsCount = GapOffsets.size() + (headGapSize ? 1 : 0); + + if (gapSize != PartitionLabeledCounters.GetCounters()[METRIC_GAPS_SIZE].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_MAX_GAPS_SIZE].Set(gapSize); + PartitionLabeledCounters.GetCounters()[METRIC_GAPS_SIZE].Set(gapSize); + } + if (gapsCount != PartitionLabeledCounters.GetCounters()[METRIC_GAPS_COUNT].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_MAX_GAPS_COUNT].Set(gapsCount); + PartitionLabeledCounters.GetCounters()[METRIC_GAPS_COUNT].Set(gapsCount); + } + + ui64 speed = WriteQuota.GetTotalSpeed(); + if (speed != PartitionLabeledCounters.GetCounters()[METRIC_WRITE_QUOTA_BYTES].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_WRITE_QUOTA_BYTES].Set(speed); + } + + ui64 availSec = WriteQuota.GetAvailableAvgSec(ctx.Now()); + if (availSec != PartitionLabeledCounters.GetCounters()[METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC].Set(availSec); + } + + ui64 availMin = WriteQuota.GetAvailableAvgMin(ctx.Now()); + if (availMin != PartitionLabeledCounters.GetCounters()[METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN].Set(availMin); + } + + ui32 id = METRIC_TOTAL_WRITE_SPEED_1; + for (ui32 i = 0; i < AvgWriteBytes.size(); ++i) { ui64 avg = AvgWriteBytes[i].GetValue(); - if (avg != PartitionLabeledCounters.GetCounters()[id].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[id].Set(avg); //total - PartitionLabeledCounters.GetCounters()[id + 1].Set(avg); //max - } - id += 2; - } - Y_VERIFY(id == METRIC_MAX_WRITE_SPEED_4 + 1); - - - id = METRIC_TOTAL_QUOTA_SPEED_1; - for (ui32 i = 0; i < AvgQuotaBytes.size(); ++i) { - ui64 avg = AvgQuotaBytes[i].GetValue(); - if (avg != PartitionLabeledCounters.GetCounters()[id].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[id].Set(avg); //total - PartitionLabeledCounters.GetCounters()[id + 1].Set(avg); //max - } - id += 2; - } - Y_VERIFY(id == METRIC_MAX_QUOTA_SPEED_4 + 1); - - if (WriteQuota.GetTotalSpeed()) { - ui64 quotaUsage = ui64(AvgQuotaBytes[1].GetValue()) * 1000000 / WriteQuota.GetTotalSpeed() / 60; - if (quotaUsage != PartitionLabeledCounters.GetCounters()[METRIC_WRITE_QUOTA_USAGE].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_WRITE_QUOTA_USAGE].Set(quotaUsage); - } - } - - ui64 partSize = BodySize + Head.PackedSize; - if (partSize != PartitionLabeledCounters.GetCounters()[METRIC_TOTAL_PART_SIZE].Get()) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_MAX_PART_SIZE].Set(partSize); - PartitionLabeledCounters.GetCounters()[METRIC_TOTAL_PART_SIZE].Set(partSize); - } - - ui64 ts = WriteTimestamp.MilliSeconds(); - if (ts < MIN_TIMESTAMP_MS) ts = Max<i64>(); - if (PartitionLabeledCounters.GetCounters()[METRIC_LAST_WRITE_TIME].Get() != ts) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_LAST_WRITE_TIME].Set(ts); - } - - ui64 timeLag = WriteLagMs.GetValue(); - if (PartitionLabeledCounters.GetCounters()[METRIC_WRITE_TIME_LAG_MS].Get() != timeLag) { - haveChanges = true; - PartitionLabeledCounters.GetCounters()[METRIC_WRITE_TIME_LAG_MS].Set(timeLag); - } - - if (haveChanges) { - ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCounters(Partition, PartitionLabeledCounters)); - } -} - - -void TPartition::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx) { - - auto& response = ev->Get()->Record; - - //check correctness of response - if (response.GetStatus() != NMsgBusProxy::MSTATUS_OK) { + if (avg != PartitionLabeledCounters.GetCounters()[id].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[id].Set(avg); //total + PartitionLabeledCounters.GetCounters()[id + 1].Set(avg); //max + } + id += 2; + } + Y_VERIFY(id == METRIC_MAX_WRITE_SPEED_4 + 1); + + + id = METRIC_TOTAL_QUOTA_SPEED_1; + for (ui32 i = 0; i < AvgQuotaBytes.size(); ++i) { + ui64 avg = AvgQuotaBytes[i].GetValue(); + if (avg != PartitionLabeledCounters.GetCounters()[id].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[id].Set(avg); //total + PartitionLabeledCounters.GetCounters()[id + 1].Set(avg); //max + } + id += 2; + } + Y_VERIFY(id == METRIC_MAX_QUOTA_SPEED_4 + 1); + + if (WriteQuota.GetTotalSpeed()) { + ui64 quotaUsage = ui64(AvgQuotaBytes[1].GetValue()) * 1000000 / WriteQuota.GetTotalSpeed() / 60; + if (quotaUsage != PartitionLabeledCounters.GetCounters()[METRIC_WRITE_QUOTA_USAGE].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_WRITE_QUOTA_USAGE].Set(quotaUsage); + } + } + + ui64 partSize = BodySize + Head.PackedSize; + if (partSize != PartitionLabeledCounters.GetCounters()[METRIC_TOTAL_PART_SIZE].Get()) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_MAX_PART_SIZE].Set(partSize); + PartitionLabeledCounters.GetCounters()[METRIC_TOTAL_PART_SIZE].Set(partSize); + } + + ui64 ts = WriteTimestamp.MilliSeconds(); + if (ts < MIN_TIMESTAMP_MS) ts = Max<i64>(); + if (PartitionLabeledCounters.GetCounters()[METRIC_LAST_WRITE_TIME].Get() != ts) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_LAST_WRITE_TIME].Set(ts); + } + + ui64 timeLag = WriteLagMs.GetValue(); + if (PartitionLabeledCounters.GetCounters()[METRIC_WRITE_TIME_LAG_MS].Get() != timeLag) { + haveChanges = true; + PartitionLabeledCounters.GetCounters()[METRIC_WRITE_TIME_LAG_MS].Set(timeLag); + } + + if (haveChanges) { + ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCounters(Partition, PartitionLabeledCounters)); + } +} + + +void TPartition::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx) { + + auto& response = ev->Get()->Record; + + //check correctness of response + if (response.GetStatus() != NMsgBusProxy::MSTATUS_OK) { LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "OnWrite topic '" << TopicName << "' partition " << Partition << " commands are not processed at all, reason: " << response.DebugString()); - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - //TODO: if status is DISK IS FULL, is global status MSTATUS_OK? it will be good if it is true - return; - } - if (response.DeleteRangeResultSize()) { - for (ui32 i = 0; i < response.DeleteRangeResultSize(); ++i) { - if (response.GetDeleteRangeResult(i).GetStatus() != NKikimrProto::OK) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "OnWrite topic '" << TopicName << "' partition " - << Partition << " delete range error"); - //TODO: if disk is full, could this be ok? delete must be ok, of course - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - return; - } - } - } - - if (response.WriteResultSize()) { - bool diskIsOk = true; - for (ui32 i = 0; i < response.WriteResultSize(); ++i) { - if (response.GetWriteResult(i).GetStatus() != NKikimrProto::OK) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "OnWrite topic '" << TopicName << "' partition " - << Partition << " write error"); - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - return; - } - diskIsOk = diskIsOk && CheckDiskStatus(response.GetWriteResult(i).GetStatusFlags()); - } - DiskIsFull = !diskIsOk; - } - bool diskIsOk = true; - for (ui32 i = 0; i < response.GetStatusResultSize(); ++i) { - auto& res = response.GetGetStatusResult(i); - if (res.GetStatus() != NKikimrProto::OK) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "OnWrite topic '" << TopicName << "' partition " << Partition << - " are not processed at all, got KV error in CmdGetStatus " << res.GetStatus()); - ctx.Send(Tablet, new TEvents::TEvPoisonPill()); - return; - } - diskIsOk = diskIsOk && CheckDiskStatus(res.GetStatusFlags()); - } - if (response.GetStatusResultSize()) - DiskIsFull = !diskIsOk; - - if (response.HasCookie()) { - HandleSetOffsetResponse(response, ctx); - } else { - if (ctx.Now() - WriteStartTime > TDuration::MilliSeconds(AppData(ctx)->PQConfig.GetMinWriteLatencyMs())) { - HandleWriteResponse(ctx); - } else { - ctx.Schedule(TDuration::MilliSeconds(AppData(ctx)->PQConfig.GetMinWriteLatencyMs()) - (ctx.Now() - WriteStartTime), new TEvPQ::TEvHandleWriteResponse()); - } - } -} - -void TPartition::Handle(TEvPQ::TEvHandleWriteResponse::TPtr&, const TActorContext& ctx) { - HandleWriteResponse(ctx); -} - + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + //TODO: if status is DISK IS FULL, is global status MSTATUS_OK? it will be good if it is true + return; + } + if (response.DeleteRangeResultSize()) { + for (ui32 i = 0; i < response.DeleteRangeResultSize(); ++i) { + if (response.GetDeleteRangeResult(i).GetStatus() != NKikimrProto::OK) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "OnWrite topic '" << TopicName << "' partition " + << Partition << " delete range error"); + //TODO: if disk is full, could this be ok? delete must be ok, of course + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + return; + } + } + } + + if (response.WriteResultSize()) { + bool diskIsOk = true; + for (ui32 i = 0; i < response.WriteResultSize(); ++i) { + if (response.GetWriteResult(i).GetStatus() != NKikimrProto::OK) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "OnWrite topic '" << TopicName << "' partition " + << Partition << " write error"); + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + return; + } + diskIsOk = diskIsOk && CheckDiskStatus(response.GetWriteResult(i).GetStatusFlags()); + } + DiskIsFull = !diskIsOk; + } + bool diskIsOk = true; + for (ui32 i = 0; i < response.GetStatusResultSize(); ++i) { + auto& res = response.GetGetStatusResult(i); + if (res.GetStatus() != NKikimrProto::OK) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "OnWrite topic '" << TopicName << "' partition " << Partition << + " are not processed at all, got KV error in CmdGetStatus " << res.GetStatus()); + ctx.Send(Tablet, new TEvents::TEvPoisonPill()); + return; + } + diskIsOk = diskIsOk && CheckDiskStatus(res.GetStatusFlags()); + } + if (response.GetStatusResultSize()) + DiskIsFull = !diskIsOk; + + if (response.HasCookie()) { + HandleSetOffsetResponse(response, ctx); + } else { + if (ctx.Now() - WriteStartTime > TDuration::MilliSeconds(AppData(ctx)->PQConfig.GetMinWriteLatencyMs())) { + HandleWriteResponse(ctx); + } else { + ctx.Schedule(TDuration::MilliSeconds(AppData(ctx)->PQConfig.GetMinWriteLatencyMs()) - (ctx.Now() - WriteStartTime), new TEvPQ::TEvHandleWriteResponse()); + } + } +} + +void TPartition::Handle(TEvPQ::TEvHandleWriteResponse::TPtr&, const TActorContext& ctx) { + HandleWriteResponse(ctx); +} + void TPartition::HandleSetOffsetResponse(NKikimrClient::TResponse& response, const TActorContext& ctx) { - ui64 cookie = response.GetCookie(); - auto it = CookieToUser.find(cookie); - Y_VERIFY(it != CookieToUser.end()); + ui64 cookie = response.GetCookie(); + auto it = CookieToUser.find(cookie); + Y_VERIFY(it != CookieToUser.end()); TString user = it->second; - CookieToUser.erase(it); - + CookieToUser.erase(it); + TUserInfo* userInfo = UsersInfoStorage.GetIfExists(user); Y_VERIFY(userInfo); - - Y_VERIFY(!userInfo->UserActs.empty()); - - auto ev = userInfo->UserActs.front(); - userInfo->UserActs.pop_front(); - - ui64 offset = ev->Offset; - const TString& session = ev->SessionId; - ui32 generation = ev->Generation; - ui32 step = ev->Step; - const ui64 readRuleGeneration = ev->ReadRuleGeneration; - bool setSession = ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION; - bool dropSession = ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION; - if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE) { - userInfo->ReadRuleGeneration = 0; - userInfo->Session = ""; - userInfo->Generation = userInfo->Step = 0; - userInfo->Offset = 0; - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition - << " user " << user << " drop done; " << (userInfo->UserActs.empty() ? "dropping state" : "preserve state for existed init")); - - while (!userInfo->UserActs.empty() && userInfo->UserActs.front()->Type != TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE) { - if (userInfo->UserActs.front()->Type != TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE) { - ReplyError(ctx, userInfo->UserActs.front()->Cookie, NPersQueue::NErrorCode::WRONG_COOKIE, - TStringBuilder() << "request to deleted read rule "); - } - userInfo->UserActs.pop_front(); - } - if (userInfo->UserActs.empty()) { - UsersInfoStorage.Remove(user, ctx); - return; - } - } else if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition - << " user " << user << " reinit with generation " << readRuleGeneration << " done"); - userInfo->ReadRuleGeneration = readRuleGeneration; + + Y_VERIFY(!userInfo->UserActs.empty()); + + auto ev = userInfo->UserActs.front(); + userInfo->UserActs.pop_front(); + + ui64 offset = ev->Offset; + const TString& session = ev->SessionId; + ui32 generation = ev->Generation; + ui32 step = ev->Step; + const ui64 readRuleGeneration = ev->ReadRuleGeneration; + bool setSession = ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION; + bool dropSession = ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION; + if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE) { + userInfo->ReadRuleGeneration = 0; + userInfo->Session = ""; + userInfo->Generation = userInfo->Step = 0; + userInfo->Offset = 0; + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + << " user " << user << " drop done; " << (userInfo->UserActs.empty() ? "dropping state" : "preserve state for existed init")); + + while (!userInfo->UserActs.empty() && userInfo->UserActs.front()->Type != TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE) { + if (userInfo->UserActs.front()->Type != TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE) { + ReplyError(ctx, userInfo->UserActs.front()->Cookie, NPersQueue::NErrorCode::WRONG_COOKIE, + TStringBuilder() << "request to deleted read rule "); + } + userInfo->UserActs.pop_front(); + } + if (userInfo->UserActs.empty()) { + UsersInfoStorage.Remove(user, ctx); + return; + } + } else if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + << " user " << user << " reinit with generation " << readRuleGeneration << " done"); + userInfo->ReadRuleGeneration = readRuleGeneration; userInfo->Session = ""; - userInfo->Generation = userInfo->Step = 0; - userInfo->Offset = 0; - } else { - if (setSession || dropSession) { - offset = userInfo->Offset; - auto ts = GetTime(*userInfo, offset); - ReplyGetClientOffsetOk(ctx, ev->Cookie, offset, ts.first, ts.second); - } else { - ReplyOk(ctx, ev->Cookie); - } - - if (setSession) { - userInfo->Session = session; - userInfo->Generation = generation; - userInfo->Step = step; - } - if (dropSession) { - userInfo->Session = ""; - userInfo->Generation = 0; - userInfo->Step = 0; - } - Y_VERIFY(offset <= (ui64)Max<i64>(), "Unexpected Offset: %" PRIu64, offset); - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition - << " user " << user << (setSession || dropSession ? " session" : " offset") - << " is set to " << offset << " (startOffset " << StartOffset << ") session " << session); - i64 poffset = userInfo->Offset; - userInfo->Offset = offset; - if (poffset != userInfo->Offset && !userInfo->UpdateTimestampFromCache()) { - userInfo->ActualTimestamps = false; - ReadTimestampForOffset(user, *userInfo, ctx); - } else { - Counters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_CACHE_HIT].Increment(1); - } - auto counter = setSession ? COUNTER_PQ_CREATE_SESSION_OK : (dropSession ? COUNTER_PQ_DELETE_SESSION_OK : COUNTER_PQ_SET_CLIENT_OFFSET_OK); - Counters.Cumulative()[counter].Increment(1); - } - userInfo->WriteInProgress = false; - ProcessUserActs(*userInfo, ctx); -} - - -void TPartition::ScheduleUpdateAvailableSize(const TActorContext& ctx) -{ + userInfo->Generation = userInfo->Step = 0; + userInfo->Offset = 0; + } else { + if (setSession || dropSession) { + offset = userInfo->Offset; + auto ts = GetTime(*userInfo, offset); + ReplyGetClientOffsetOk(ctx, ev->Cookie, offset, ts.first, ts.second); + } else { + ReplyOk(ctx, ev->Cookie); + } + + if (setSession) { + userInfo->Session = session; + userInfo->Generation = generation; + userInfo->Step = step; + } + if (dropSession) { + userInfo->Session = ""; + userInfo->Generation = 0; + userInfo->Step = 0; + } + Y_VERIFY(offset <= (ui64)Max<i64>(), "Unexpected Offset: %" PRIu64, offset); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + << " user " << user << (setSession || dropSession ? " session" : " offset") + << " is set to " << offset << " (startOffset " << StartOffset << ") session " << session); + i64 poffset = userInfo->Offset; + userInfo->Offset = offset; + if (poffset != userInfo->Offset && !userInfo->UpdateTimestampFromCache()) { + userInfo->ActualTimestamps = false; + ReadTimestampForOffset(user, *userInfo, ctx); + } else { + Counters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_CACHE_HIT].Increment(1); + } + auto counter = setSession ? COUNTER_PQ_CREATE_SESSION_OK : (dropSession ? COUNTER_PQ_DELETE_SESSION_OK : COUNTER_PQ_SET_CLIENT_OFFSET_OK); + Counters.Cumulative()[counter].Increment(1); + } + userInfo->WriteInProgress = false; + ProcessUserActs(*userInfo, ctx); +} + + +void TPartition::ScheduleUpdateAvailableSize(const TActorContext& ctx) +{ ctx.Schedule(UPDATE_AVAIL_SIZE_INTERVAL, new TEvPQ::TEvUpdateAvailableSize()); -} - - -void TQuotaTracker::Update(const TInstant& timestamp) { - ui64 ms = (timestamp - LastUpdateTime).MilliSeconds(); - LastUpdateTime += TDuration::MilliSeconds(ms); - - if (AvailableSize < 0) { - QuotedTime += ms; - } - - AvailableSize = Min<i64>(AvailableSize + (ui64)SpeedPerSecond * ms / 1000, MaxBurst); - AvgMin.Update(AvailableSize, timestamp.MilliSeconds()); - AvgSec.Update(AvailableSize, timestamp.MilliSeconds()); -} - - -void TPartition::HandleWriteResponse(const TActorContext& ctx) { - - Y_VERIFY(CurrentStateFunc() == &TThis::StateWrite); - ui64 prevEndOffset = EndOffset; - - ui32 totalLatencyMs = (ctx.Now() - WriteCycleStartTime).MilliSeconds(); - ui32 writeLatencyMs = (ctx.Now() - WriteStartTime).MilliSeconds(); - - WriteLatency.IncFor(writeLatencyMs, 1); - if (writeLatencyMs >= AppData(ctx)->PQConfig.GetWriteLatencyBigMs()) { - SLIBigLatency.Inc(); - } - - Counters.Percentile()[COUNTER_LATENCY_PQ_WRITE_CYCLE].IncrementFor(totalLatencyMs); - Counters.Cumulative()[COUNTER_PQ_WRITE_CYCLE_BYTES_TOTAL].Increment(WriteCycleSize); - Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_OK].Increment(WriteNewSize); - Counters.Percentile()[COUNTER_PQ_WRITE_CYCLE_BYTES].IncrementFor(WriteCycleSize); - Counters.Percentile()[COUNTER_PQ_WRITE_NEW_BYTES].IncrementFor(WriteNewSize); - if (BytesWritten) +} + + +void TQuotaTracker::Update(const TInstant& timestamp) { + ui64 ms = (timestamp - LastUpdateTime).MilliSeconds(); + LastUpdateTime += TDuration::MilliSeconds(ms); + + if (AvailableSize < 0) { + QuotedTime += ms; + } + + AvailableSize = Min<i64>(AvailableSize + (ui64)SpeedPerSecond * ms / 1000, MaxBurst); + AvgMin.Update(AvailableSize, timestamp.MilliSeconds()); + AvgSec.Update(AvailableSize, timestamp.MilliSeconds()); +} + + +void TPartition::HandleWriteResponse(const TActorContext& ctx) { + + Y_VERIFY(CurrentStateFunc() == &TThis::StateWrite); + ui64 prevEndOffset = EndOffset; + + ui32 totalLatencyMs = (ctx.Now() - WriteCycleStartTime).MilliSeconds(); + ui32 writeLatencyMs = (ctx.Now() - WriteStartTime).MilliSeconds(); + + WriteLatency.IncFor(writeLatencyMs, 1); + if (writeLatencyMs >= AppData(ctx)->PQConfig.GetWriteLatencyBigMs()) { + SLIBigLatency.Inc(); + } + + Counters.Percentile()[COUNTER_LATENCY_PQ_WRITE_CYCLE].IncrementFor(totalLatencyMs); + Counters.Cumulative()[COUNTER_PQ_WRITE_CYCLE_BYTES_TOTAL].Increment(WriteCycleSize); + Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_OK].Increment(WriteNewSize); + Counters.Percentile()[COUNTER_PQ_WRITE_CYCLE_BYTES].IncrementFor(WriteCycleSize); + Counters.Percentile()[COUNTER_PQ_WRITE_NEW_BYTES].IncrementFor(WriteNewSize); + if (BytesWritten) BytesWritten.Inc(WriteNewSizeInternal); - if (BytesWrittenUncompressed) - BytesWrittenUncompressed.Inc(WriteNewSizeUncompressed); - if (BytesWrittenComp) - BytesWrittenComp.Inc(WriteCycleSize); - if (MsgsWritten) + if (BytesWrittenUncompressed) + BytesWrittenUncompressed.Inc(WriteNewSizeUncompressed); + if (BytesWrittenComp) + BytesWrittenComp.Inc(WriteCycleSize); + if (MsgsWritten) MsgsWritten.Inc(WriteNewMessagesInternal); - - //All ok - auto now = ctx.Now(); + + //All ok + auto now = ctx.Now(); const auto& quotingConfig = AppData()->PQConfig.GetQuotingConfig(); - if (quotingConfig.GetTopicWriteQuotaEntityToLimit() == NKikimrPQ::TPQConfig::TQuotingConfig::USER_PAYLOAD_SIZE) { + if (quotingConfig.GetTopicWriteQuotaEntityToLimit() == NKikimrPQ::TPQConfig::TQuotingConfig::USER_PAYLOAD_SIZE) { WriteQuota.Exaust(WriteNewSize, now); } else { WriteQuota.Exaust(WriteCycleSize, now); } - for (auto& avg : AvgWriteBytes) { - avg.Update(WriteNewSize, now); - } - for (auto& avg : AvgQuotaBytes) { - avg.Update(WriteNewSize, now); - } - - WriteCycleSize = 0; - WriteNewSize = 0; + for (auto& avg : AvgWriteBytes) { + avg.Update(WriteNewSize, now); + } + for (auto& avg : AvgQuotaBytes) { + avg.Update(WriteNewSize, now); + } + + WriteCycleSize = 0; + WriteNewSize = 0; WriteNewSizeInternal = 0; - WriteNewSizeUncompressed = 0; - WriteNewMessages = 0; + WriteNewSizeUncompressed = 0; + WriteNewMessages = 0; WriteNewMessagesInternal = 0; - UpdateWriteBufferIsFullState(now); - - AnswerCurrentWrites(ctx); - SyncMemoryStateWithKVState(ctx); - - //if EndOffset changed there could be subscriptions witch could be completed + UpdateWriteBufferIsFullState(now); + + AnswerCurrentWrites(ctx); + SyncMemoryStateWithKVState(ctx); + + //if EndOffset changed there could be subscriptions witch could be completed TVector<std::pair<TReadInfo, ui64>> reads = Subscriber.GetReads(EndOffset); - for (auto& read : reads) { - Y_VERIFY(EndOffset > read.first.Offset); - ProcessRead(ctx, std::move(read.first), read.second, true); - } - //same for read requests - ProcessHasDataRequests(ctx); - - ProcessTimestampsForNewData(prevEndOffset, ctx); - - ReportLabeledCounters(ctx); - - HandleWrites(ctx); -} - -void TPartition::HandleOnWrite(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ctx) { - ui32 sz = std::accumulate(ev->Get()->Msgs.begin(), ev->Get()->Msgs.end(), 0u, [](ui32 sum, const TEvPQ::TEvWrite::TMsg& msg){ - return sum + msg.Data.size(); - }); - bool mirroredPartition = Config.GetPartitionConfig().HasMirrorFrom(); - + for (auto& read : reads) { + Y_VERIFY(EndOffset > read.first.Offset); + ProcessRead(ctx, std::move(read.first), read.second, true); + } + //same for read requests + ProcessHasDataRequests(ctx); + + ProcessTimestampsForNewData(prevEndOffset, ctx); + + ReportLabeledCounters(ctx); + + HandleWrites(ctx); +} + +void TPartition::HandleOnWrite(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ctx) { + ui32 sz = std::accumulate(ev->Get()->Msgs.begin(), ev->Get()->Msgs.end(), 0u, [](ui32 sum, const TEvPQ::TEvWrite::TMsg& msg){ + return sum + msg.Data.size(); + }); + bool mirroredPartition = Config.GetPartitionConfig().HasMirrorFrom(); + if (mirroredPartition && !ev->Get()->OwnerCookie.empty()) { - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "Write to mirrored topic is forbiden "); - return; - } - + return; + } + ui64 decReservedSize = 0; TStringBuf owner; - + if (!mirroredPartition && !ev->Get()->IsDirectWrite) { owner = TOwnerInfo::GetOwnerFromOwnerCookie(ev->Get()->OwnerCookie); auto it = Owners.find(owner); @@ -3578,9 +3578,9 @@ void TPartition::HandleOnWrite(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& c if (it->second.SourceIdDeleted) { ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::SOURCEID_DELETED, TStringBuilder() << "Yours maximum written sequence number for session was deleted, need to recreate session. " - << "Current count of sourceIds is " << SourceIdStorage.GetInMemorySourceIds().size() << " and limit is " << Config.GetPartitionConfig().GetSourceIdMaxCounts() - << ", current minimum sourceid timestamp(Ms) is " << SourceIdStorage.MinAvailableTimestamp(ctx.Now()).MilliSeconds() - << " and border timestamp(Ms) is " << ((ctx.Now() - TInstant::Seconds(Config.GetPartitionConfig().GetSourceIdLifetimeSeconds())).MilliSeconds())); + << "Current count of sourceIds is " << SourceIdStorage.GetInMemorySourceIds().size() << " and limit is " << Config.GetPartitionConfig().GetSourceIdMaxCounts() + << ", current minimum sourceid timestamp(Ms) is " << SourceIdStorage.MinAvailableTimestamp(ctx.Now()).MilliSeconds() + << " and border timestamp(Ms) is " << ((ctx.Now() - TInstant::Seconds(Config.GetPartitionConfig().GetSourceIdLifetimeSeconds())).MilliSeconds())); return; } @@ -3599,75 +3599,75 @@ void TPartition::HandleOnWrite(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& c ++it->second.NextMessageNo; decReservedSize = it->second.DecReservedSize(); - } - - TMaybe<ui64> offset = ev->Get()->Offset; - - if (WriteInflightSize > Config.GetPartitionConfig().GetMaxWriteInflightSize()) { - Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(ev->Get()->Msgs.size()); - Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(sz); - - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::OVERLOAD, - TStringBuilder() << "try later. Write inflight limit reached. " - << WriteInflightSize << " vs. maximum " << Config.GetPartitionConfig().GetMaxWriteInflightSize()); - return; - } - for (const auto& msg: ev->Get()->Msgs) { - //this is checked in pq_impl when forming EvWrite request + } + + TMaybe<ui64> offset = ev->Get()->Offset; + + if (WriteInflightSize > Config.GetPartitionConfig().GetMaxWriteInflightSize()) { + Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(ev->Get()->Msgs.size()); + Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(sz); + + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::OVERLOAD, + TStringBuilder() << "try later. Write inflight limit reached. " + << WriteInflightSize << " vs. maximum " << Config.GetPartitionConfig().GetMaxWriteInflightSize()); + return; + } + for (const auto& msg: ev->Get()->Msgs) { + //this is checked in pq_impl when forming EvWrite request Y_VERIFY(!msg.SourceId.empty() || ev->Get()->IsDirectWrite); - Y_VERIFY(!msg.Data.empty()); - - if (msg.SeqNo > (ui64)Max<i64>()) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Request to write wrong SeqNo. Partition " - << Partition << " sourceId '" << EscapeC(msg.SourceId) << "' seqno " << msg.SeqNo); - - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, - TStringBuilder() << "wrong SeqNo " << msg.SeqNo); + Y_VERIFY(!msg.Data.empty()); + + if (msg.SeqNo > (ui64)Max<i64>()) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Request to write wrong SeqNo. Partition " + << Partition << " sourceId '" << EscapeC(msg.SourceId) << "' seqno " << msg.SeqNo); + + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, + TStringBuilder() << "wrong SeqNo " << msg.SeqNo); + return; + } + + ui32 sz = msg.Data.size() + msg.SourceId.size() + TClientBlob::OVERHEAD; + + if (sz > MAX_BLOB_PART_SIZE) { + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, + TStringBuilder() << "too big message " << sz << " vs. maximum " << MAX_BLOB_PART_SIZE); return; } - - ui32 sz = msg.Data.size() + msg.SourceId.size() + TClientBlob::OVERHEAD; - - if (sz > MAX_BLOB_PART_SIZE) { - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, - TStringBuilder() << "too big message " << sz << " vs. maximum " << MAX_BLOB_PART_SIZE); - return; - } if (!mirroredPartition) { SourceIdStorage.RegisterSourceIdOwner(msg.SourceId, owner); } - } - - if (EndOffset - StartOffset >= static_cast<ui64>(Config.GetPartitionConfig().GetMaxCountInPartition()) - || BodySize + Head.PackedSize >= static_cast<ui64>(Config.GetPartitionConfig().GetMaxSizeInPartition())) { - - Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(ev->Get()->Msgs.size()); - Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(sz); - - ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::WRITE_ERROR_PARTITION_IS_FULL, - Sprintf("try later, partition is full - already have %" PRIu64" from %" PRIu64 " count, %" PRIu64 " from %" PRIu64 " size", - EndOffset - StartOffset, static_cast<ui64>(Config.GetPartitionConfig().GetMaxCountInPartition()), - BodySize + Head.PackedSize, static_cast<ui64>(Config.GetPartitionConfig().GetMaxSizeInPartition()))); - return; - } - ui64 size = 0; - WriteQuota.Update(ctx.Now()); - for (auto& msg: ev->Get()->Msgs) { - size += msg.Data.size(); - bool needToChangeOffset = msg.PartNo + 1 == msg.TotalParts; + } + + if (EndOffset - StartOffset >= static_cast<ui64>(Config.GetPartitionConfig().GetMaxCountInPartition()) + || BodySize + Head.PackedSize >= static_cast<ui64>(Config.GetPartitionConfig().GetMaxSizeInPartition())) { + + Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(ev->Get()->Msgs.size()); + Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(sz); + + ReplyError(ctx, ev->Get()->Cookie, NPersQueue::NErrorCode::WRITE_ERROR_PARTITION_IS_FULL, + Sprintf("try later, partition is full - already have %" PRIu64" from %" PRIu64 " count, %" PRIu64 " from %" PRIu64 " size", + EndOffset - StartOffset, static_cast<ui64>(Config.GetPartitionConfig().GetMaxCountInPartition()), + BodySize + Head.PackedSize, static_cast<ui64>(Config.GetPartitionConfig().GetMaxSizeInPartition()))); + return; + } + ui64 size = 0; + WriteQuota.Update(ctx.Now()); + for (auto& msg: ev->Get()->Msgs) { + size += msg.Data.size(); + bool needToChangeOffset = msg.PartNo + 1 == msg.TotalParts; Requests.emplace_back(TWriteMsg{ev->Get()->Cookie, offset, std::move(msg)}, WriteQuota.GetQuotedTime(), ctx.Now().MilliSeconds(), 0); - if (offset && needToChangeOffset) - ++*offset; - } - WriteInflightSize += size; + if (offset && needToChangeOffset) + ++*offset; + } + WriteInflightSize += size; ReservedSize -= decReservedSize; Y_VERIFY(size <= decReservedSize || decReservedSize == 0); //TODO: remove decReservedSize == 0 - Counters.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(ReservedSize); - UpdateWriteBufferIsFullState(ctx.Now()); -} - + Counters.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(ReservedSize); + UpdateWriteBufferIsFullState(ctx.Now()); +} + void TPartition::HandleOnIdle(TEvPQ::TEvRegisterMessageGroup::TPtr& ev, const TActorContext& ctx) { HandleOnWrite(ev, ctx); HandleWrites(ctx); @@ -3762,137 +3762,137 @@ void TPartition::HandleOnWrite(TEvPQ::TEvSplitMessageGroup::TPtr& ev, const TAct Requests.emplace_back(std::move(msg), WriteQuota.GetQuotedTime(), ctx.Now().MilliSeconds(), 0); } -std::pair<TKey, ui32> TPartition::Compact(const TKey& key, const ui32 size, bool headCleared) -{ - std::pair<TKey, ui32> res({key, size}); - ui32 x = headCleared ? 0 : Head.PackedSize; - Y_VERIFY(std::accumulate(DataKeysHead.begin(), DataKeysHead.end(), 0u, [](ui32 sum, const TKeyLevel& level){return sum + level.Sum();}) == NewHead.PackedSize + x); - for (auto it = DataKeysHead.rbegin(); it != DataKeysHead.rend(); ++it) { - auto jt = it; ++jt; - if (it->NeedCompaction()) { - res = it->Compact(); - if (jt != DataKeysHead.rend()) { - jt->AddKey(res.first, res.second); - } - } else { - Y_VERIFY(jt == DataKeysHead.rend() || !jt->NeedCompaction()); //compact must start from last level, not internal - } - Y_VERIFY(!it->NeedCompaction()); - } - Y_VERIFY(res.second >= size); - Y_VERIFY(res.first.GetOffset() < key.GetOffset() || res.first.GetOffset() == key.GetOffset() && res.first.GetPartNo() <= key.GetPartNo()); - return res; -} - - -void TPartition::ProcessChangeOwnerRequests(const TActorContext& ctx) -{ - while (!WaitToChangeOwner.empty()) { - auto &ev = WaitToChangeOwner.front(); - if (OwnerPipes.find(ev->PipeClient) != OwnerPipes.end()) { //this is not request from dead pipe - ProcessChangeOwnerRequest(ev.Release(), ctx); - } else { - ReplyError(ctx, ev->Cookie, NPersQueue::NErrorCode::ERROR, "Pipe for GetOwnershipRequest is already dead"); - } - WaitToChangeOwner.pop_front(); - } - if (CurrentStateFunc() == &TThis::StateIdle) { - HandleWrites(ctx); - } -} - - -void TPartition::BecomeIdle(const TActorContext&) -{ - Become(&TThis::StateIdle); -} - - +std::pair<TKey, ui32> TPartition::Compact(const TKey& key, const ui32 size, bool headCleared) +{ + std::pair<TKey, ui32> res({key, size}); + ui32 x = headCleared ? 0 : Head.PackedSize; + Y_VERIFY(std::accumulate(DataKeysHead.begin(), DataKeysHead.end(), 0u, [](ui32 sum, const TKeyLevel& level){return sum + level.Sum();}) == NewHead.PackedSize + x); + for (auto it = DataKeysHead.rbegin(); it != DataKeysHead.rend(); ++it) { + auto jt = it; ++jt; + if (it->NeedCompaction()) { + res = it->Compact(); + if (jt != DataKeysHead.rend()) { + jt->AddKey(res.first, res.second); + } + } else { + Y_VERIFY(jt == DataKeysHead.rend() || !jt->NeedCompaction()); //compact must start from last level, not internal + } + Y_VERIFY(!it->NeedCompaction()); + } + Y_VERIFY(res.second >= size); + Y_VERIFY(res.first.GetOffset() < key.GetOffset() || res.first.GetOffset() == key.GetOffset() && res.first.GetPartNo() <= key.GetPartNo()); + return res; +} + + +void TPartition::ProcessChangeOwnerRequests(const TActorContext& ctx) +{ + while (!WaitToChangeOwner.empty()) { + auto &ev = WaitToChangeOwner.front(); + if (OwnerPipes.find(ev->PipeClient) != OwnerPipes.end()) { //this is not request from dead pipe + ProcessChangeOwnerRequest(ev.Release(), ctx); + } else { + ReplyError(ctx, ev->Cookie, NPersQueue::NErrorCode::ERROR, "Pipe for GetOwnershipRequest is already dead"); + } + WaitToChangeOwner.pop_front(); + } + if (CurrentStateFunc() == &TThis::StateIdle) { + HandleWrites(ctx); + } +} + + +void TPartition::BecomeIdle(const TActorContext&) +{ + Become(&TThis::StateIdle); +} + + void TPartition::WriteClientInfo(const ui64 cookie, TUserInfo& userInfo, const TActorContext& ctx) { - THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); - - Y_VERIFY(!userInfo.WriteInProgress); + THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); + + Y_VERIFY(!userInfo.WriteInProgress); Y_VERIFY(!userInfo.UserActs.empty()); while (!userInfo.UserActs.empty()) { const auto ev = userInfo.UserActs.front().Get(); - - TKeyPrefix ikey(TKeyPrefix::TypeInfo, Partition, TKeyPrefix::MarkUser); - ikey.Append(ev->ClientId.c_str(), ev->ClientId.size()); - TKeyPrefix ikeyDeprecated(TKeyPrefix::TypeInfo, Partition, TKeyPrefix::MarkUserDeprecated); - ikeyDeprecated.Append(ev->ClientId.c_str(), ev->ClientId.size()); - - if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition - << " user " << ev->ClientId << " drop request"); - - auto del = request->Record.AddCmdDeleteRange(); - auto range = del->MutableRange(); - range->SetFrom(ikey.Data(), ikey.Size()); - range->SetTo(ikey.Data(), ikey.Size()); - range->SetIncludeFrom(true); - range->SetIncludeTo(true); - request->Record.SetCookie(cookie); - - ctx.Send(Tablet, request.Release()); - userInfo.WriteInProgress = true; - return; - } - - if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION && ev->SessionId == userInfo.Session) { //this is retry of current request, answer ok + + TKeyPrefix ikey(TKeyPrefix::TypeInfo, Partition, TKeyPrefix::MarkUser); + ikey.Append(ev->ClientId.c_str(), ev->ClientId.size()); + TKeyPrefix ikeyDeprecated(TKeyPrefix::TypeInfo, Partition, TKeyPrefix::MarkUserDeprecated); + ikeyDeprecated.Append(ev->ClientId.c_str(), ev->ClientId.size()); + + if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + << " user " << ev->ClientId << " drop request"); + + auto del = request->Record.AddCmdDeleteRange(); + auto range = del->MutableRange(); + range->SetFrom(ikey.Data(), ikey.Size()); + range->SetTo(ikey.Data(), ikey.Size()); + range->SetIncludeFrom(true); + range->SetIncludeTo(true); + request->Record.SetCookie(cookie); + + ctx.Send(Tablet, request.Release()); + userInfo.WriteInProgress = true; + return; + } + + if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION && ev->SessionId == userInfo.Session) { //this is retry of current request, answer ok auto ts = GetTime(userInfo, userInfo.Offset); ReplyGetClientOffsetOk(ctx, ev->Cookie, userInfo.Offset, ts.first, ts.second); userInfo.UserActs.pop_front(); - continue; - } - - if (ev->Type != TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION && ev->Type != TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE - && !ev->SessionId.empty() && userInfo.Session != ev->SessionId //request to wrong session - && (ev->Type != TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION || !userInfo.Session.empty()) //but allow DropSession request when session is already dropped - for idempotence - || (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION && !userInfo.Session.empty() + continue; + } + + if (ev->Type != TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION && ev->Type != TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE + && !ev->SessionId.empty() && userInfo.Session != ev->SessionId //request to wrong session + && (ev->Type != TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION || !userInfo.Session.empty()) //but allow DropSession request when session is already dropped - for idempotence + || (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION && !userInfo.Session.empty() && (ev->Generation < userInfo.Generation || ev->Generation == userInfo.Generation && ev->Step <= userInfo.Step))) { //old generation request - Counters.Cumulative()[COUNTER_PQ_SET_CLIENT_OFFSET_ERROR].Increment(1); - ReplyError(ctx, ev->Cookie, NPersQueue::NErrorCode::WRONG_COOKIE, + Counters.Cumulative()[COUNTER_PQ_SET_CLIENT_OFFSET_ERROR].Increment(1); + ReplyError(ctx, ev->Cookie, NPersQueue::NErrorCode::WRONG_COOKIE, TStringBuilder() << "set offset in already dead session " << ev->SessionId << " actual is " << userInfo.Session); userInfo.UserActs.pop_front(); - continue; - } - + continue; + } + if (!ev->SessionId.empty() && ev->Type == TEvPQ::TEvSetClientInfo::ESCI_OFFSET && (i64)ev->Offset <= userInfo.Offset) { //this is stale request, answer ok for it - ReplyOk(ctx, ev->Cookie); + ReplyOk(ctx, ev->Cookie); userInfo.UserActs.pop_front(); - continue; - } - - //request in correct session - make it - - TString session = (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION ? ev->SessionId : (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION ? "" : userInfo.Session)); - ui32 gen = (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION ? ev->Generation : (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION ? 0 : userInfo.Generation)); - ui32 step = (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION ? ev->Step : (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION ? 0 : userInfo.Step)); + continue; + } + + //request in correct session - make it + + TString session = (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION ? ev->SessionId : (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION ? "" : userInfo.Session)); + ui32 gen = (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION ? ev->Generation : (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION ? 0 : userInfo.Generation)); + ui32 step = (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION ? ev->Step : (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION ? 0 : userInfo.Step)); ui64 offset = (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_OFFSET ? ev->Offset : userInfo.Offset); - ui64 readRuleGeneration = userInfo.ReadRuleGeneration; - if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE) { - readRuleGeneration = ev->ReadRuleGeneration; - gen = step = 0; - offset = 0; - session = ""; - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition - << " user " << ev->ClientId << " reinit request with generation " << readRuleGeneration); - - } - - Y_VERIFY(offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, offset); - if (offset > EndOffset) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "commit to future - topic " << TopicName << " partition " << Partition << " client " - << ev->ClientId << " EndOffset " << EndOffset << " offset " << offset); - offset = EndOffset; - ev->Offset = offset; -/* Counters.Cumulative()[COUNTER_PQ_SET_CLIENT_OFFSET_ERROR].Increment(1); - ReplyError(ctx, ev->Cookie, NPersQueue::NErrorCode::SET_OFFSET_ERROR_COMMIT_TO_FUTURE, - TStringBuilder() << "can't commit to future. Offset " << offset << " EndOffset " << EndOffset); + ui64 readRuleGeneration = userInfo.ReadRuleGeneration; + if (ev->Type == TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE) { + readRuleGeneration = ev->ReadRuleGeneration; + gen = step = 0; + offset = 0; + session = ""; + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + << " user " << ev->ClientId << " reinit request with generation " << readRuleGeneration); + + } + + Y_VERIFY(offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, offset); + if (offset > EndOffset) { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "commit to future - topic " << TopicName << " partition " << Partition << " client " + << ev->ClientId << " EndOffset " << EndOffset << " offset " << offset); + offset = EndOffset; + ev->Offset = offset; +/* Counters.Cumulative()[COUNTER_PQ_SET_CLIENT_OFFSET_ERROR].Increment(1); + ReplyError(ctx, ev->Cookie, NPersQueue::NErrorCode::SET_OFFSET_ERROR_COMMIT_TO_FUTURE, + TStringBuilder() << "can't commit to future. Offset " << offset << " EndOffset " << EndOffset); userInfo.UserActrs.pop_front(); - continue;*/ - } - + continue;*/ + } + TBuffer idata; { NKikimrPQ::TUserInfo userData; @@ -3901,94 +3901,94 @@ void TPartition::WriteClientInfo(const ui64 cookie, TUserInfo& userInfo, const T userData.SetStep(step); userData.SetSession(session); userData.SetOffsetRewindSum(userInfo.ReadOffsetRewindSum); - userData.SetReadRuleGeneration(readRuleGeneration); + userData.SetReadRuleGeneration(readRuleGeneration); TString out; Y_PROTOBUF_SUPPRESS_NODISCARD userData.SerializeToString(&out); idata.Append(out.c_str(), out.size()); } - TBuffer idataDeprecated = NDeprecatedUserData::Serialize(offset, gen, step, session); - - auto write = request->Record.AddCmdWrite(); - write->SetKey(ikey.Data(), ikey.Size()); - write->SetValue(idata.Data(), idata.Size()); - write->SetStorageChannel(NKikimrClient::TKeyValueRequest::INLINE); - auto write2 = request->Record.AddCmdWrite(); - write2->SetKey(ikeyDeprecated.Data(), ikeyDeprecated.Size()); - write2->SetValue(idataDeprecated.Data(), idataDeprecated.Size()); - write2->SetStorageChannel(NKikimrClient::TKeyValueRequest::INLINE); - - request->Record.SetCookie(cookie); - - ctx.Send(Tablet, request.Release()); - userInfo.WriteInProgress = true; - - break; - } -} - - -void TPartition::ClearOldHead(const ui64 offset, const ui16 partNo, TEvKeyValue::TEvRequest* request) { - for (auto it = HeadKeys.rbegin(); it != HeadKeys.rend(); ++it) { - if (it->Key.GetOffset() > offset || it->Key.GetOffset() == offset && it->Key.GetPartNo() >= partNo) { - auto del = request->Record.AddCmdDeleteRange(); - auto range = del->MutableRange(); - range->SetFrom(it->Key.Data(), it->Key.Size()); - range->SetIncludeFrom(true); - range->SetTo(it->Key.Data(), it->Key.Size()); - range->SetIncludeTo(true); - } else { - break; - } - } -} - - + TBuffer idataDeprecated = NDeprecatedUserData::Serialize(offset, gen, step, session); + + auto write = request->Record.AddCmdWrite(); + write->SetKey(ikey.Data(), ikey.Size()); + write->SetValue(idata.Data(), idata.Size()); + write->SetStorageChannel(NKikimrClient::TKeyValueRequest::INLINE); + auto write2 = request->Record.AddCmdWrite(); + write2->SetKey(ikeyDeprecated.Data(), ikeyDeprecated.Size()); + write2->SetValue(idataDeprecated.Data(), idataDeprecated.Size()); + write2->SetStorageChannel(NKikimrClient::TKeyValueRequest::INLINE); + + request->Record.SetCookie(cookie); + + ctx.Send(Tablet, request.Release()); + userInfo.WriteInProgress = true; + + break; + } +} + + +void TPartition::ClearOldHead(const ui64 offset, const ui16 partNo, TEvKeyValue::TEvRequest* request) { + for (auto it = HeadKeys.rbegin(); it != HeadKeys.rend(); ++it) { + if (it->Key.GetOffset() > offset || it->Key.GetOffset() == offset && it->Key.GetPartNo() >= partNo) { + auto del = request->Record.AddCmdDeleteRange(); + auto range = del->MutableRange(); + range->SetFrom(it->Key.Data(), it->Key.Size()); + range->SetIncludeFrom(true); + range->SetTo(it->Key.Data(), it->Key.Size()); + range->SetIncludeTo(true); + } else { + break; + } + } +} + + void TPartition::CancelAllWritesOnWrite(const TActorContext& ctx, TEvKeyValue::TEvRequest* request, const TString& errorStr, const TWriteMsg& p, TSourceIdWriter& sourceIdWriter, NPersQueue::NErrorCode::EErrorCode errorCode = NPersQueue::NErrorCode::BAD_REQUEST) { ReplyError(ctx, p.Cookie, errorCode, errorStr); - Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); - Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(p.Msg.Data.size() + p.Msg.SourceId.size()); - FailBadClient(ctx); - NewHead.Clear(); - NewHead.Offset = EndOffset; + Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); + Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(p.Msg.Data.size() + p.Msg.SourceId.size()); + FailBadClient(ctx); + NewHead.Clear(); + NewHead.Offset = EndOffset; sourceIdWriter.Clear(); - request->Record.Clear(); - PartitionedBlob = TPartitionedBlob(Partition, 0, "", 0, 0, 0, Head, NewHead, true, false, MaxBlobSize); - CompactedKeys.clear(); -} - - + request->Record.Clear(); + PartitionedBlob = TPartitionedBlob(Partition, 0, "", 0, 0, 0, Head, NewHead, true, false, MaxBlobSize); + CompactedKeys.clear(); +} + + bool TPartition::AppendHeadWithNewWrites(TEvKeyValue::TEvRequest* request, const TActorContext& ctx, TSourceIdWriter& sourceIdWriter) { - - ui64 curOffset = PartitionedBlob.IsInited() ? PartitionedBlob.GetOffset() : EndOffset; - - WriteCycleSize = 0; - WriteNewSize = 0; - WriteNewSizeUncompressed = 0; - WriteNewMessages = 0; - UpdateWriteBufferIsFullState(ctx.Now()); + + ui64 curOffset = PartitionedBlob.IsInited() ? PartitionedBlob.GetOffset() : EndOffset; + + WriteCycleSize = 0; + WriteNewSize = 0; + WriteNewSizeUncompressed = 0; + WriteNewMessages = 0; + UpdateWriteBufferIsFullState(ctx.Now()); CurrentTimestamp = ctx.Now(); - - NewHead.Offset = EndOffset; - NewHead.PartNo = 0; - NewHead.PackedSize = 0; - - Y_VERIFY(NewHead.Batches.empty()); - - bool oldPartsCleared = false; - bool headCleared = (Head.PackedSize == 0); - - - //TODO: Process here not TClientBlobs, but also TBatches from LB(LB got them from pushclient too) - //Process is following: if batch contains already written messages or only one client message part -> unpack it and process as several TClientBlobs - //otherwise write this batch as is to head; - - WriteQuota.Update(ctx.Now()); - - while (!Requests.empty() && WriteCycleSize < MAX_WRITE_CYCLE_SIZE) { //head is not too big - auto pp = Requests.front(); - Requests.pop_front(); + + NewHead.Offset = EndOffset; + NewHead.PartNo = 0; + NewHead.PackedSize = 0; + + Y_VERIFY(NewHead.Batches.empty()); + + bool oldPartsCleared = false; + bool headCleared = (Head.PackedSize == 0); + + + //TODO: Process here not TClientBlobs, but also TBatches from LB(LB got them from pushclient too) + //Process is following: if batch contains already written messages or only one client message part -> unpack it and process as several TClientBlobs + //otherwise write this batch as is to head; + + WriteQuota.Update(ctx.Now()); + + while (!Requests.empty() && WriteCycleSize < MAX_WRITE_CYCLE_SIZE) { //head is not too big + auto pp = Requests.front(); + Requests.pop_front(); if (!pp.IsWrite()) { if (pp.IsRegisterMessageGroup()) { @@ -4021,608 +4021,608 @@ bool TPartition::AppendHeadWithNewWrites(TEvKeyValue::TEvRequest* request, const Y_VERIFY(pp.IsOwnership()); } - pp.QuotedTime = WriteQuota.GetQuotedTime() - pp.QuotedTime; //change to duration - pp.QueueTime = ctx.Now().MilliSeconds() - pp.QueueTime; - pp.WriteTime = ctx.Now().MilliSeconds(); - Responses.push_back(pp); - continue; - } + pp.QuotedTime = WriteQuota.GetQuotedTime() - pp.QuotedTime; //change to duration + pp.QueueTime = ctx.Now().MilliSeconds() - pp.QueueTime; + pp.WriteTime = ctx.Now().MilliSeconds(); + Responses.push_back(pp); + continue; + } Y_VERIFY(pp.IsWrite()); auto& p = pp.GetWrite(); - + WriteInflightSize -= p.Msg.Data.size(); - - Counters.Percentile()[COUNTER_LATENCY_PQ_RECEIVE_QUEUE].IncrementFor(ctx.Now().MilliSeconds() - p.Msg.ReceiveTimestamp); - //check already written - - ui64 poffset = p.Offset ? *p.Offset : curOffset; - + + Counters.Percentile()[COUNTER_LATENCY_PQ_RECEIVE_QUEUE].IncrementFor(ctx.Now().MilliSeconds() - p.Msg.ReceiveTimestamp); + //check already written + + ui64 poffset = p.Offset ? *p.Offset : curOffset; + auto it_inMemory = SourceIdStorage.GetInMemorySourceIds().find(p.Msg.SourceId); auto it_toWrite = sourceIdWriter.GetSourceIdsToWrite().find(p.Msg.SourceId); if (!p.Msg.DisableDeduplication && (it_inMemory != SourceIdStorage.GetInMemorySourceIds().end() && it_inMemory->second.SeqNo >= p.Msg.SeqNo || (it_toWrite != sourceIdWriter.GetSourceIdsToWrite().end() && it_toWrite->second.SeqNo >= p.Msg.SeqNo))) { bool isWriting = (it_toWrite != sourceIdWriter.GetSourceIdsToWrite().end()); bool isCommitted = (it_inMemory != SourceIdStorage.GetInMemorySourceIds().end()); - - if (poffset >= curOffset) { - LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, "Already written message. Topic: '" << TopicName << "' Partition: " << Partition - << " SourceId: '" << EscapeC(p.Msg.SourceId) << "'. Message seqNo = " << p.Msg.SeqNo + + if (poffset >= curOffset) { + LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, "Already written message. Topic: '" << TopicName << "' Partition: " << Partition + << " SourceId: '" << EscapeC(p.Msg.SourceId) << "'. Message seqNo = " << p.Msg.SeqNo << ". Committed seqNo = " << (isCommitted ? it_inMemory->second.SeqNo : 0) << (isWriting ? ". Writing seqNo: " : ". ") << (isWriting ? it_toWrite->second.SeqNo : 0) << " EndOffset " << EndOffset - << " CurOffset " << curOffset << " offset " << poffset); - - Counters.Cumulative()[COUNTER_PQ_WRITE_ALREADY].Increment(1); - Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ALREADY].Increment(p.Msg.Data.size()); - } else { - Counters.Cumulative()[COUNTER_PQ_WRITE_SMALL_OFFSET].Increment(1); - Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_SMALL_OFFSET].Increment(p.Msg.Data.size()); - } - - TString().swap(p.Msg.Data); - pp.QuotedTime = WriteQuota.GetQuotedTime() - pp.QuotedTime; //change to duration - pp.QueueTime = ctx.Now().MilliSeconds() - pp.QueueTime; - pp.WriteTime = ctx.Now().MilliSeconds(); - Responses.push_back(pp); - continue; - } - - if (poffset < curOffset) { //too small offset - CancelAllWritesOnWrite(ctx, request, - TStringBuilder() << "write message sourceId: " << EscapeC(p.Msg.SourceId) << " seqNo: " << p.Msg.SeqNo - << " partNo: " << p.Msg.PartNo << " has incorrect offset " << poffset << ", must be at least " << curOffset, + << " CurOffset " << curOffset << " offset " << poffset); + + Counters.Cumulative()[COUNTER_PQ_WRITE_ALREADY].Increment(1); + Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ALREADY].Increment(p.Msg.Data.size()); + } else { + Counters.Cumulative()[COUNTER_PQ_WRITE_SMALL_OFFSET].Increment(1); + Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_SMALL_OFFSET].Increment(p.Msg.Data.size()); + } + + TString().swap(p.Msg.Data); + pp.QuotedTime = WriteQuota.GetQuotedTime() - pp.QuotedTime; //change to duration + pp.QueueTime = ctx.Now().MilliSeconds() - pp.QueueTime; + pp.WriteTime = ctx.Now().MilliSeconds(); + Responses.push_back(pp); + continue; + } + + if (poffset < curOffset) { //too small offset + CancelAllWritesOnWrite(ctx, request, + TStringBuilder() << "write message sourceId: " << EscapeC(p.Msg.SourceId) << " seqNo: " << p.Msg.SeqNo + << " partNo: " << p.Msg.PartNo << " has incorrect offset " << poffset << ", must be at least " << curOffset, p, sourceIdWriter, NPersQueue::NErrorCode::EErrorCode::WRITE_ERROR_BAD_OFFSET); - return false; - } - - Y_VERIFY(poffset >= curOffset); - - bool needCompactHead = poffset > curOffset; - if (needCompactHead) { //got gap - if (p.Msg.PartNo != 0) { //gap can't be inside of partitioned message - CancelAllWritesOnWrite(ctx, request, - TStringBuilder() << "write message sourceId: " << EscapeC(p.Msg.SourceId) << " seqNo: " << p.Msg.SeqNo - << " partNo: " << p.Msg.PartNo << " has gap inside partitioned message, incorrect offset " - << poffset << ", must be " << curOffset, + return false; + } + + Y_VERIFY(poffset >= curOffset); + + bool needCompactHead = poffset > curOffset; + if (needCompactHead) { //got gap + if (p.Msg.PartNo != 0) { //gap can't be inside of partitioned message + CancelAllWritesOnWrite(ctx, request, + TStringBuilder() << "write message sourceId: " << EscapeC(p.Msg.SourceId) << " seqNo: " << p.Msg.SeqNo + << " partNo: " << p.Msg.PartNo << " has gap inside partitioned message, incorrect offset " + << poffset << ", must be " << curOffset, p, sourceIdWriter); - return false; - } - curOffset = poffset; - } - - if (p.Msg.PartNo == 0) { //create new PartitionedBlob - //there could be parts from previous owner, clear them - if (!oldPartsCleared) { - oldPartsCleared = true; - auto del = request->Record.AddCmdDeleteRange(); - auto range = del->MutableRange(); - TKeyPrefix from(TKeyPrefix::TypeTmpData, Partition); - range->SetFrom(from.Data(), from.Size()); - TKeyPrefix to(TKeyPrefix::TypeTmpData, Partition + 1); - range->SetTo(to.Data(), to.Size()); - } - - if (PartitionedBlob.HasFormedBlobs()) { - //clear currently-writed blobs - auto oldCmdWrite = request->Record.GetCmdWrite(); - request->Record.ClearCmdWrite(); - for (ui32 i = 0; i < (ui32)oldCmdWrite.size(); ++i) { - TKey key(oldCmdWrite.Get(i).GetKey()); - if (key.GetType() != TKeyPrefix::TypeTmpData) { - request->Record.AddCmdWrite()->CopyFrom(oldCmdWrite.Get(i)); - } - } - } + return false; + } + curOffset = poffset; + } + + if (p.Msg.PartNo == 0) { //create new PartitionedBlob + //there could be parts from previous owner, clear them + if (!oldPartsCleared) { + oldPartsCleared = true; + auto del = request->Record.AddCmdDeleteRange(); + auto range = del->MutableRange(); + TKeyPrefix from(TKeyPrefix::TypeTmpData, Partition); + range->SetFrom(from.Data(), from.Size()); + TKeyPrefix to(TKeyPrefix::TypeTmpData, Partition + 1); + range->SetTo(to.Data(), to.Size()); + } + + if (PartitionedBlob.HasFormedBlobs()) { + //clear currently-writed blobs + auto oldCmdWrite = request->Record.GetCmdWrite(); + request->Record.ClearCmdWrite(); + for (ui32 i = 0; i < (ui32)oldCmdWrite.size(); ++i) { + TKey key(oldCmdWrite.Get(i).GetKey()); + if (key.GetType() != TKeyPrefix::TypeTmpData) { + request->Record.AddCmdWrite()->CopyFrom(oldCmdWrite.Get(i)); + } + } + } PartitionedBlob = TPartitionedBlob(Partition, curOffset, p.Msg.SourceId, p.Msg.SeqNo, p.Msg.TotalParts, p.Msg.TotalSize, Head, NewHead, headCleared, needCompactHead, MaxBlobSize); - } - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + } + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition << " part blob processing sourceId '" << EscapeC(p.Msg.SourceId) << "' seqNo " << p.Msg.SeqNo << " partNo " << p.Msg.PartNo); TString s; - if (!PartitionedBlob.IsNextPart(p.Msg.SourceId, p.Msg.SeqNo, p.Msg.PartNo, &s)) { - //this must not be happen - client sends gaps, fail this client till the end + if (!PartitionedBlob.IsNextPart(p.Msg.SourceId, p.Msg.SeqNo, p.Msg.PartNo, &s)) { + //this must not be happen - client sends gaps, fail this client till the end CancelAllWritesOnWrite(ctx, request, s, p, sourceIdWriter); - //now no changes will leak - return false; - } - - WriteNewSize += p.Msg.SourceId.size() + p.Msg.Data.size(); + //now no changes will leak + return false; + } + + WriteNewSize += p.Msg.SourceId.size() + p.Msg.Data.size(); WriteNewSizeInternal = p.Msg.External ? 0 : WriteNewSize; - WriteNewSizeUncompressed += p.Msg.UncompressedSize + p.Msg.SourceId.size(); + WriteNewSizeUncompressed += p.Msg.UncompressedSize + p.Msg.SourceId.size(); if (p.Msg.PartNo == 0) { ++WriteNewMessages; if (!p.Msg.External) ++WriteNewMessagesInternal; } - - TMaybe<TPartData> partData; - if (p.Msg.TotalParts > 1) { //this is multi-part message - partData = TPartData(p.Msg.PartNo, p.Msg.TotalParts, p.Msg.TotalSize); - } - WriteTimestamp = ctx.Now(); - WriteTimestampEstimate = p.Msg.WriteTimestamp > 0 ? TInstant::MilliSeconds(p.Msg.WriteTimestamp) : WriteTimestamp; + + TMaybe<TPartData> partData; + if (p.Msg.TotalParts > 1) { //this is multi-part message + partData = TPartData(p.Msg.PartNo, p.Msg.TotalParts, p.Msg.TotalSize); + } + WriteTimestamp = ctx.Now(); + WriteTimestampEstimate = p.Msg.WriteTimestamp > 0 ? TInstant::MilliSeconds(p.Msg.WriteTimestamp) : WriteTimestamp; TClientBlob blob(p.Msg.SourceId, p.Msg.SeqNo, p.Msg.Data, std::move(partData), WriteTimestampEstimate, TInstant::MilliSeconds(p.Msg.CreateTimestamp == 0 ? curOffset : p.Msg.CreateTimestamp), p.Msg.UncompressedSize, p.Msg.PartitionKey, p.Msg.ExplicitHashKey); //remove curOffset when LB will report CTime - + ui64 writeLagMs = (WriteTimestamp - TInstant::MilliSeconds(p.Msg.CreateTimestamp)).MilliSeconds(); WriteLagMs.Update(writeLagMs, WriteTimestamp); - if (InputTimeLag) { + if (InputTimeLag) { InputTimeLag->IncFor(writeLagMs, 1); - if (p.Msg.PartNo == 0) { - MessageSize->IncFor(p.Msg.TotalSize + p.Msg.SourceId.size(), 1); - } - } - - bool lastBlobPart = blob.IsLastPart(); - - //will return compacted tmp blob + if (p.Msg.PartNo == 0) { + MessageSize->IncFor(p.Msg.TotalSize + p.Msg.SourceId.size(), 1); + } + } + + bool lastBlobPart = blob.IsLastPart(); + + //will return compacted tmp blob std::pair<TKey, TString> newWrite = PartitionedBlob.Add(std::move(blob)); - - if (!newWrite.second.empty()) { - auto write = request->Record.AddCmdWrite(); - write->SetKey(newWrite.first.Data(), newWrite.first.Size()); - write->SetValue(newWrite.second); - Y_VERIFY(!newWrite.first.IsHead()); - auto channel = GetChannel(NextChannel(newWrite.first.IsHead(), newWrite.second.Size())); - write->SetStorageChannel(channel); - write->SetTactic(AppData(ctx)->PQConfig.GetTactic()); - - TKey resKey = newWrite.first; - resKey.SetType(TKeyPrefix::TypeData); - write->SetKeyToCache(resKey.Data(), resKey.Size()); - WriteCycleSize += newWrite.second.size(); - + + if (!newWrite.second.empty()) { + auto write = request->Record.AddCmdWrite(); + write->SetKey(newWrite.first.Data(), newWrite.first.Size()); + write->SetValue(newWrite.second); + Y_VERIFY(!newWrite.first.IsHead()); + auto channel = GetChannel(NextChannel(newWrite.first.IsHead(), newWrite.second.Size())); + write->SetStorageChannel(channel); + write->SetTactic(AppData(ctx)->PQConfig.GetTactic()); + + TKey resKey = newWrite.first; + resKey.SetType(TKeyPrefix::TypeData); + write->SetKeyToCache(resKey.Data(), resKey.Size()); + WriteCycleSize += newWrite.second.size(); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition << " part blob sourceId '" << EscapeC(p.Msg.SourceId) << "' seqNo " << p.Msg.SeqNo << " partNo " << p.Msg.PartNo << " result is " << TStringBuf(newWrite.first.Data(), newWrite.first.Size()) << " size " << newWrite.second.size()); - } - - if (lastBlobPart) { - Y_VERIFY(PartitionedBlob.IsComplete()); - ui32 curWrites = 0; - for (ui32 i = 0; i < request->Record.CmdWriteSize(); ++i) { //change keys for yet to be writed KV pairs - TKey key(request->Record.GetCmdWrite(i).GetKey()); - if (key.GetType() == TKeyPrefix::TypeTmpData) { - key.SetType(TKeyPrefix::TypeData); + } + + if (lastBlobPart) { + Y_VERIFY(PartitionedBlob.IsComplete()); + ui32 curWrites = 0; + for (ui32 i = 0; i < request->Record.CmdWriteSize(); ++i) { //change keys for yet to be writed KV pairs + TKey key(request->Record.GetCmdWrite(i).GetKey()); + if (key.GetType() == TKeyPrefix::TypeTmpData) { + key.SetType(TKeyPrefix::TypeData); request->Record.MutableCmdWrite(i)->SetKey(TString(key.Data(), key.Size())); - ++curWrites; - } - } - Y_VERIFY(curWrites <= PartitionedBlob.GetFormedBlobs().size()); - auto formedBlobs = PartitionedBlob.GetFormedBlobs(); - for (ui32 i = 0; i < formedBlobs.size(); ++i) { - const auto& x = formedBlobs[i]; - if (i + curWrites < formedBlobs.size()) { //this KV pair is already writed, rename needed - auto rename = request->Record.AddCmdRename(); - TKey key = x.first; + ++curWrites; + } + } + Y_VERIFY(curWrites <= PartitionedBlob.GetFormedBlobs().size()); + auto formedBlobs = PartitionedBlob.GetFormedBlobs(); + for (ui32 i = 0; i < formedBlobs.size(); ++i) { + const auto& x = formedBlobs[i]; + if (i + curWrites < formedBlobs.size()) { //this KV pair is already writed, rename needed + auto rename = request->Record.AddCmdRename(); + TKey key = x.first; rename->SetOldKey(TString(key.Data(), key.Size())); - key.SetType(TKeyPrefix::TypeData); + key.SetType(TKeyPrefix::TypeData); rename->SetNewKey(TString(key.Data(), key.Size())); - } - if (!DataKeysBody.empty() && CompactedKeys.empty()) { - Y_VERIFY(DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount() <= x.first.GetOffset(), - "LAST KEY %s, HeadOffset %lu, NEWKEY %s", DataKeysBody.back().Key.ToString().c_str(), Head.Offset, x.first.ToString().c_str()); - } - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "writing blob: topic '" << TopicName << "' partition " << Partition - << " " << x.first.ToString() << " size " << x.second << " WTime " << ctx.Now().MilliSeconds()); - - CompactedKeys.push_back(x); - CompactedKeys.back().first.SetType(TKeyPrefix::TypeData); - } - if (PartitionedBlob.HasFormedBlobs()) { //Head and newHead are cleared - headCleared = true; - NewHead.Clear(); - NewHead.Offset = PartitionedBlob.GetOffset(); - NewHead.PartNo = PartitionedBlob.GetHeadPartNo(); - NewHead.PackedSize = 0; - } - ui32 countOfLastParts = 0; - for (auto& x : PartitionedBlob.GetClientBlobs()) { - if (NewHead.Batches.empty() || NewHead.Batches.back().Packed) { + } + if (!DataKeysBody.empty() && CompactedKeys.empty()) { + Y_VERIFY(DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount() <= x.first.GetOffset(), + "LAST KEY %s, HeadOffset %lu, NEWKEY %s", DataKeysBody.back().Key.ToString().c_str(), Head.Offset, x.first.ToString().c_str()); + } + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "writing blob: topic '" << TopicName << "' partition " << Partition + << " " << x.first.ToString() << " size " << x.second << " WTime " << ctx.Now().MilliSeconds()); + + CompactedKeys.push_back(x); + CompactedKeys.back().first.SetType(TKeyPrefix::TypeData); + } + if (PartitionedBlob.HasFormedBlobs()) { //Head and newHead are cleared + headCleared = true; + NewHead.Clear(); + NewHead.Offset = PartitionedBlob.GetOffset(); + NewHead.PartNo = PartitionedBlob.GetHeadPartNo(); + NewHead.PackedSize = 0; + } + ui32 countOfLastParts = 0; + for (auto& x : PartitionedBlob.GetClientBlobs()) { + if (NewHead.Batches.empty() || NewHead.Batches.back().Packed) { NewHead.Batches.emplace_back(curOffset, x.GetPartNo(), TVector<TClientBlob>()); - NewHead.PackedSize += GetMaxHeaderSize(); //upper bound for packed size - } - if (x.IsLastPart()) { - ++countOfLastParts; - } - Y_VERIFY(!NewHead.Batches.back().Packed); - NewHead.Batches.back().AddBlob(x); - NewHead.PackedSize += x.GetBlobSize(); - if (NewHead.Batches.back().GetUnpackedSize() >= BATCH_UNPACK_SIZE_BORDER) { - NewHead.Batches.back().Pack(); - NewHead.PackedSize += NewHead.Batches.back().GetPackedSize(); //add real packed size for this blob - - NewHead.PackedSize -= GetMaxHeaderSize(); //instead of upper bound - NewHead.PackedSize -= NewHead.Batches.back().GetUnpackedSize(); - } - } - - Y_VERIFY(countOfLastParts == 1); - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition - << " part blob complete sourceId '" << EscapeC(p.Msg.SourceId) << "' seqNo " << p.Msg.SeqNo << " partNo " << p.Msg.PartNo - << " FormedBlobsCount " << PartitionedBlob.GetFormedBlobs().size() << " NewHead: " - << NewHead); - + NewHead.PackedSize += GetMaxHeaderSize(); //upper bound for packed size + } + if (x.IsLastPart()) { + ++countOfLastParts; + } + Y_VERIFY(!NewHead.Batches.back().Packed); + NewHead.Batches.back().AddBlob(x); + NewHead.PackedSize += x.GetBlobSize(); + if (NewHead.Batches.back().GetUnpackedSize() >= BATCH_UNPACK_SIZE_BORDER) { + NewHead.Batches.back().Pack(); + NewHead.PackedSize += NewHead.Batches.back().GetPackedSize(); //add real packed size for this blob + + NewHead.PackedSize -= GetMaxHeaderSize(); //instead of upper bound + NewHead.PackedSize -= NewHead.Batches.back().GetUnpackedSize(); + } + } + + Y_VERIFY(countOfLastParts == 1); + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Topic '" << TopicName << "' partition " << Partition + << " part blob complete sourceId '" << EscapeC(p.Msg.SourceId) << "' seqNo " << p.Msg.SeqNo << " partNo " << p.Msg.PartNo + << " FormedBlobsCount " << PartitionedBlob.GetFormedBlobs().size() << " NewHead: " + << NewHead); + if (it_inMemory == SourceIdStorage.GetInMemorySourceIds().end()) { sourceIdWriter.RegisterSourceId(p.Msg.SourceId, p.Msg.SeqNo, curOffset, CurrentTimestamp); } else { sourceIdWriter.RegisterSourceId(p.Msg.SourceId, it_inMemory->second.Updated(p.Msg.SeqNo, curOffset, CurrentTimestamp)); } - ++curOffset; - PartitionedBlob = TPartitionedBlob(Partition, 0, "", 0, 0, 0, Head, NewHead, true, false, MaxBlobSize); - } + ++curOffset; + PartitionedBlob = TPartitionedBlob(Partition, 0, "", 0, 0, 0, Head, NewHead, true, false, MaxBlobSize); + } TString().swap(p.Msg.Data); - pp.QuotedTime = WriteQuota.GetQuotedTime() - pp.QuotedTime; //change to duration - pp.QueueTime = ctx.Now().MilliSeconds() - pp.QueueTime; - pp.WriteTime = ctx.Now().MilliSeconds(); - Responses.push_back(pp); - } - - UpdateWriteBufferIsFullState(ctx.Now()); - - if (!NewHead.Batches.empty() && !NewHead.Batches.back().Packed) { - NewHead.Batches.back().Pack(); - NewHead.PackedSize += NewHead.Batches.back().GetPackedSize(); //add real packed size for this blob - - NewHead.PackedSize -= GetMaxHeaderSize(); //instead of upper bound - NewHead.PackedSize -= NewHead.Batches.back().GetUnpackedSize(); - } - - Y_VERIFY((headCleared ? 0 : Head.PackedSize) + NewHead.PackedSize <= MaxBlobSize); //otherwise last PartitionedBlob.Add must compact all except last cl - MaxWriteResponsesSize = Max<ui32>(MaxWriteResponsesSize, Responses.size()); - - return headCleared; -} - - -std::pair<TKey, ui32> TPartition::GetNewWriteKey(bool headCleared) -{ - bool needCompaction = false; - ui32 HeadSize = headCleared ? 0 : Head.PackedSize; - if (HeadSize + NewHead.PackedSize > 0 && HeadSize + NewHead.PackedSize - >= Min<ui32>(MaxBlobSize, Config.GetPartitionConfig().GetLowWatermark())) - needCompaction = true; - - if (PartitionedBlob.IsInited()) { //has active partitioned blob - compaction is forbiden, head and newHead will be compacted when this partitioned blob is finished - needCompaction = false; - } - - Y_VERIFY(NewHead.PackedSize > 0 || needCompaction); //smthing must be here - - TKey key(TKeyPrefix::TypeData, Partition, NewHead.Offset, NewHead.PartNo, NewHead.GetCount(), NewHead.GetInternalPartsCount(), !needCompaction); - - if (NewHead.PackedSize > 0) - DataKeysHead[TotalLevels - 1].AddKey(key, NewHead.PackedSize); - Y_VERIFY(HeadSize + NewHead.PackedSize <= 3 * MaxSizeCheck); - - std::pair<TKey, ui32> res; - - if (needCompaction) { //compact all - for (ui32 i = 0; i < TotalLevels; ++i) { - DataKeysHead[i].Clear(); - } - if (!headCleared) { //compacted blob must contain both head and NewHead - key = TKey(TKeyPrefix::TypeData, Partition, Head.Offset, Head.PartNo, NewHead.GetCount() + Head.GetCount(), - Head.GetInternalPartsCount() + NewHead.GetInternalPartsCount(), false); - } //otherwise KV blob is not from head (!key.IsHead()) and contains only new data from NewHead - res = std::make_pair(key, HeadSize + NewHead.PackedSize); - } else { - res = Compact(key, NewHead.PackedSize, headCleared); - Y_VERIFY(res.first.IsHead());//may compact some KV blobs from head, but new KV blob is from head too - Y_VERIFY(res.second >= NewHead.PackedSize); //at least new data must be writed - } - Y_VERIFY(res.second <= MaxBlobSize); - return res; -} - -void TPartition::AddNewWriteBlob(std::pair<TKey, ui32>& res, TEvKeyValue::TEvRequest* request, bool headCleared, const TActorContext& ctx) { - const auto& key = res.first; - + pp.QuotedTime = WriteQuota.GetQuotedTime() - pp.QuotedTime; //change to duration + pp.QueueTime = ctx.Now().MilliSeconds() - pp.QueueTime; + pp.WriteTime = ctx.Now().MilliSeconds(); + Responses.push_back(pp); + } + + UpdateWriteBufferIsFullState(ctx.Now()); + + if (!NewHead.Batches.empty() && !NewHead.Batches.back().Packed) { + NewHead.Batches.back().Pack(); + NewHead.PackedSize += NewHead.Batches.back().GetPackedSize(); //add real packed size for this blob + + NewHead.PackedSize -= GetMaxHeaderSize(); //instead of upper bound + NewHead.PackedSize -= NewHead.Batches.back().GetUnpackedSize(); + } + + Y_VERIFY((headCleared ? 0 : Head.PackedSize) + NewHead.PackedSize <= MaxBlobSize); //otherwise last PartitionedBlob.Add must compact all except last cl + MaxWriteResponsesSize = Max<ui32>(MaxWriteResponsesSize, Responses.size()); + + return headCleared; +} + + +std::pair<TKey, ui32> TPartition::GetNewWriteKey(bool headCleared) +{ + bool needCompaction = false; + ui32 HeadSize = headCleared ? 0 : Head.PackedSize; + if (HeadSize + NewHead.PackedSize > 0 && HeadSize + NewHead.PackedSize + >= Min<ui32>(MaxBlobSize, Config.GetPartitionConfig().GetLowWatermark())) + needCompaction = true; + + if (PartitionedBlob.IsInited()) { //has active partitioned blob - compaction is forbiden, head and newHead will be compacted when this partitioned blob is finished + needCompaction = false; + } + + Y_VERIFY(NewHead.PackedSize > 0 || needCompaction); //smthing must be here + + TKey key(TKeyPrefix::TypeData, Partition, NewHead.Offset, NewHead.PartNo, NewHead.GetCount(), NewHead.GetInternalPartsCount(), !needCompaction); + + if (NewHead.PackedSize > 0) + DataKeysHead[TotalLevels - 1].AddKey(key, NewHead.PackedSize); + Y_VERIFY(HeadSize + NewHead.PackedSize <= 3 * MaxSizeCheck); + + std::pair<TKey, ui32> res; + + if (needCompaction) { //compact all + for (ui32 i = 0; i < TotalLevels; ++i) { + DataKeysHead[i].Clear(); + } + if (!headCleared) { //compacted blob must contain both head and NewHead + key = TKey(TKeyPrefix::TypeData, Partition, Head.Offset, Head.PartNo, NewHead.GetCount() + Head.GetCount(), + Head.GetInternalPartsCount() + NewHead.GetInternalPartsCount(), false); + } //otherwise KV blob is not from head (!key.IsHead()) and contains only new data from NewHead + res = std::make_pair(key, HeadSize + NewHead.PackedSize); + } else { + res = Compact(key, NewHead.PackedSize, headCleared); + Y_VERIFY(res.first.IsHead());//may compact some KV blobs from head, but new KV blob is from head too + Y_VERIFY(res.second >= NewHead.PackedSize); //at least new data must be writed + } + Y_VERIFY(res.second <= MaxBlobSize); + return res; +} + +void TPartition::AddNewWriteBlob(std::pair<TKey, ui32>& res, TEvKeyValue::TEvRequest* request, bool headCleared, const TActorContext& ctx) { + const auto& key = res.first; + TString valueD; - valueD.reserve(res.second); - ui32 pp = Head.FindPos(key.GetOffset(), key.GetPartNo()); - if (pp < Max<ui32>() && key.GetOffset() < EndOffset) { //this batch trully contains this offset - Y_VERIFY(pp < Head.Batches.size()); - Y_VERIFY(Head.Batches[pp].GetOffset() == key.GetOffset()); - Y_VERIFY(Head.Batches[pp].GetPartNo() == key.GetPartNo()); - for (; pp < Head.Batches.size(); ++pp) { //TODO - merge small batches here - Y_VERIFY(Head.Batches[pp].Packed); - valueD += Head.Batches[pp].Serialize(); - } - } - for (auto& b : NewHead.Batches) { - Y_VERIFY(b.Packed); - valueD += b.Serialize(); - } - - Y_VERIFY(res.second >= valueD.size()); - - if (res.second > valueD.size() && res.first.IsHead()) { //change to real size if real packed size is smaller - - Y_FAIL("Can't be here right now, only after merging of small batches"); - - for (auto it = DataKeysHead.rbegin(); it != DataKeysHead.rend(); ++it) { - if (it->KeysCount() > 0 ) { - auto res2 = it->PopBack(); - Y_VERIFY(res2 == res); - res2.second = valueD.size(); - - DataKeysHead[TotalLevels - 1].AddKey(res2.first, res2.second); - - res2 = Compact(res2.first, res2.second, headCleared); - - Y_VERIFY(res2.first == res.first); - Y_VERIFY(res2.second == valueD.size()); - res = res2; - break; - } - } - } - - Y_VERIFY(res.second == valueD.size() || res.first.IsHead()); - - CheckBlob(key, valueD); - - auto write = request->Record.AddCmdWrite(); - write->SetKey(key.Data(), key.Size()); - write->SetValue(valueD); - - if (!key.IsHead()) - write->SetKeyToCache(key.Data(), key.Size()); - - bool isInline = key.IsHead() && valueD.size() < MAX_INLINE_SIZE; - - if (isInline) - write->SetStorageChannel(NKikimrClient::TKeyValueRequest::INLINE); - else { - auto channel = GetChannel(NextChannel(key.IsHead(), valueD.size())); - write->SetStorageChannel(channel); - write->SetTactic(AppData(ctx)->PQConfig.GetTactic()); - } - - //Need to clear all compacted blobs - TKey k = CompactedKeys.empty() ? key : CompactedKeys.front().first; - ClearOldHead(k.GetOffset(), k.GetPartNo(), request); - - if (!key.IsHead()) { - if (!DataKeysBody.empty() && CompactedKeys.empty()) { - Y_VERIFY(DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount() <= key.GetOffset(), - "LAST KEY %s, HeadOffset %lu, NEWKEY %s", DataKeysBody.back().Key.ToString().c_str(), Head.Offset, key.ToString().c_str()); - } - CompactedKeys.push_back(res); - NewHead.Clear(); - NewHead.Offset = res.first.GetOffset() + res.first.GetCount(); - NewHead.PartNo = 0; - } else { - Y_VERIFY(NewHeadKey.Size == 0); + valueD.reserve(res.second); + ui32 pp = Head.FindPos(key.GetOffset(), key.GetPartNo()); + if (pp < Max<ui32>() && key.GetOffset() < EndOffset) { //this batch trully contains this offset + Y_VERIFY(pp < Head.Batches.size()); + Y_VERIFY(Head.Batches[pp].GetOffset() == key.GetOffset()); + Y_VERIFY(Head.Batches[pp].GetPartNo() == key.GetPartNo()); + for (; pp < Head.Batches.size(); ++pp) { //TODO - merge small batches here + Y_VERIFY(Head.Batches[pp].Packed); + valueD += Head.Batches[pp].Serialize(); + } + } + for (auto& b : NewHead.Batches) { + Y_VERIFY(b.Packed); + valueD += b.Serialize(); + } + + Y_VERIFY(res.second >= valueD.size()); + + if (res.second > valueD.size() && res.first.IsHead()) { //change to real size if real packed size is smaller + + Y_FAIL("Can't be here right now, only after merging of small batches"); + + for (auto it = DataKeysHead.rbegin(); it != DataKeysHead.rend(); ++it) { + if (it->KeysCount() > 0 ) { + auto res2 = it->PopBack(); + Y_VERIFY(res2 == res); + res2.second = valueD.size(); + + DataKeysHead[TotalLevels - 1].AddKey(res2.first, res2.second); + + res2 = Compact(res2.first, res2.second, headCleared); + + Y_VERIFY(res2.first == res.first); + Y_VERIFY(res2.second == valueD.size()); + res = res2; + break; + } + } + } + + Y_VERIFY(res.second == valueD.size() || res.first.IsHead()); + + CheckBlob(key, valueD); + + auto write = request->Record.AddCmdWrite(); + write->SetKey(key.Data(), key.Size()); + write->SetValue(valueD); + + if (!key.IsHead()) + write->SetKeyToCache(key.Data(), key.Size()); + + bool isInline = key.IsHead() && valueD.size() < MAX_INLINE_SIZE; + + if (isInline) + write->SetStorageChannel(NKikimrClient::TKeyValueRequest::INLINE); + else { + auto channel = GetChannel(NextChannel(key.IsHead(), valueD.size())); + write->SetStorageChannel(channel); + write->SetTactic(AppData(ctx)->PQConfig.GetTactic()); + } + + //Need to clear all compacted blobs + TKey k = CompactedKeys.empty() ? key : CompactedKeys.front().first; + ClearOldHead(k.GetOffset(), k.GetPartNo(), request); + + if (!key.IsHead()) { + if (!DataKeysBody.empty() && CompactedKeys.empty()) { + Y_VERIFY(DataKeysBody.back().Key.GetOffset() + DataKeysBody.back().Key.GetCount() <= key.GetOffset(), + "LAST KEY %s, HeadOffset %lu, NEWKEY %s", DataKeysBody.back().Key.ToString().c_str(), Head.Offset, key.ToString().c_str()); + } + CompactedKeys.push_back(res); + NewHead.Clear(); + NewHead.Offset = res.first.GetOffset() + res.first.GetCount(); + NewHead.PartNo = 0; + } else { + Y_VERIFY(NewHeadKey.Size == 0); NewHeadKey = {key, res.second, CurrentTimestamp, 0}; - } - WriteCycleSize += write->GetValue().size(); - UpdateWriteBufferIsFullState(ctx.Now()); -} - - -ui32 TPartition::NextChannel(bool isHead, ui32 blobSize) { - - if (isHead) { - ui32 i = 0; - for (ui32 j = 1; j < TotalChannelWritesByHead.size(); ++j) { - if (TotalChannelWritesByHead[j] < TotalChannelWritesByHead[i]) - i = j; - } - TotalChannelWritesByHead[i] += blobSize; - - return i; - }; - - ui32 res = Channel; - Channel = (Channel + 1) % Config.GetPartitionConfig().GetNumChannels(); - - return res; -} - -void TPartition::SetDeadlinesForWrites(const TActorContext& ctx) { - if (AppData(ctx)->PQConfig.GetQuotingConfig().GetQuotaWaitDurationMs() > 0 && QuotaDeadline == TInstant::Zero()) { - - QuotaDeadline = ctx.Now() + TDuration::MilliSeconds(AppData(ctx)->PQConfig.GetQuotingConfig().GetQuotaWaitDurationMs()); - - ctx.Schedule(QuotaDeadline, new TEvPQ::TEvQuotaDeadlineCheck()); - } -} - -void TPartition::Handle(TEvPQ::TEvQuotaDeadlineCheck::TPtr&, const TActorContext& ctx) { - FilterDeadlinedWrites(ctx); -} - -bool TPartition::ProcessWrites(TEvKeyValue::TEvRequest* request, const TActorContext& ctx) { - - FilterDeadlinedWrites(ctx); - - if (!WriteQuota.CanExaust()) { // Waiting for partition quota. - SetDeadlinesForWrites(ctx); + } + WriteCycleSize += write->GetValue().size(); + UpdateWriteBufferIsFullState(ctx.Now()); +} + + +ui32 TPartition::NextChannel(bool isHead, ui32 blobSize) { + + if (isHead) { + ui32 i = 0; + for (ui32 j = 1; j < TotalChannelWritesByHead.size(); ++j) { + if (TotalChannelWritesByHead[j] < TotalChannelWritesByHead[i]) + i = j; + } + TotalChannelWritesByHead[i] += blobSize; + + return i; + }; + + ui32 res = Channel; + Channel = (Channel + 1) % Config.GetPartitionConfig().GetNumChannels(); + + return res; +} + +void TPartition::SetDeadlinesForWrites(const TActorContext& ctx) { + if (AppData(ctx)->PQConfig.GetQuotingConfig().GetQuotaWaitDurationMs() > 0 && QuotaDeadline == TInstant::Zero()) { + + QuotaDeadline = ctx.Now() + TDuration::MilliSeconds(AppData(ctx)->PQConfig.GetQuotingConfig().GetQuotaWaitDurationMs()); + + ctx.Schedule(QuotaDeadline, new TEvPQ::TEvQuotaDeadlineCheck()); + } +} + +void TPartition::Handle(TEvPQ::TEvQuotaDeadlineCheck::TPtr&, const TActorContext& ctx) { + FilterDeadlinedWrites(ctx); +} + +bool TPartition::ProcessWrites(TEvKeyValue::TEvRequest* request, const TActorContext& ctx) { + + FilterDeadlinedWrites(ctx); + + if (!WriteQuota.CanExaust()) { // Waiting for partition quota. + SetDeadlinesForWrites(ctx); return false; } - - if (WaitingForPreviousBlobQuota()) { // Waiting for topic quota. - SetDeadlinesForWrites(ctx); - + + if (WaitingForPreviousBlobQuota()) { // Waiting for topic quota. + SetDeadlinesForWrites(ctx); + if (StartTopicQuotaWaitTimeForCurrentBlob == TInstant::Zero() && !Requests.empty()) { StartTopicQuotaWaitTimeForCurrentBlob = TActivationContext::Now(); } - return false; - } - - QuotaDeadline = TInstant::Zero(); - - if (Requests.empty()) - return false; - - Y_VERIFY(request->Record.CmdWriteSize() == 0); - Y_VERIFY(request->Record.CmdRenameSize() == 0); - Y_VERIFY(request->Record.CmdDeleteRangeSize() == 0); + return false; + } + + QuotaDeadline = TInstant::Zero(); + + if (Requests.empty()) + return false; + + Y_VERIFY(request->Record.CmdWriteSize() == 0); + Y_VERIFY(request->Record.CmdRenameSize() == 0); + Y_VERIFY(request->Record.CmdDeleteRangeSize() == 0); const auto format = AppData(ctx)->PQConfig.GetEnableProtoSourceIdInfo() ? ESourceIdFormat::Proto : ESourceIdFormat::Raw; TSourceIdWriter sourceIdWriter(format); - + bool headCleared = AppendHeadWithNewWrites(request, ctx, sourceIdWriter); - - if (headCleared) { - Y_VERIFY(!CompactedKeys.empty() || Head.PackedSize == 0); - for (ui32 i = 0; i < TotalLevels; ++i) { - DataKeysHead[i].Clear(); - } - } - - if (NewHead.PackedSize == 0) { //nothing added to head - just compaction or tmp part blobs writed + + if (headCleared) { + Y_VERIFY(!CompactedKeys.empty() || Head.PackedSize == 0); + for (ui32 i = 0; i < TotalLevels; ++i) { + DataKeysHead[i].Clear(); + } + } + + if (NewHead.PackedSize == 0) { //nothing added to head - just compaction or tmp part blobs writed Y_VERIFY(sourceIdWriter.GetSourceIdsToWrite().empty()); - return request->Record.CmdWriteSize() > 0 || request->Record.CmdRenameSize() > 0 || request->Record.CmdDeleteRangeSize() > 0; - } + return request->Record.CmdWriteSize() > 0 || request->Record.CmdRenameSize() > 0 || request->Record.CmdDeleteRangeSize() > 0; + } sourceIdWriter.FillRequest(request, Partition); - - std::pair<TKey, ui32> res = GetNewWriteKey(headCleared); - const auto& key = res.first; - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "writing blob: topic '" << TopicName << "' partition " << Partition - << " compactOffset " << key.GetOffset() << "," << key.GetCount() - << " HeadOffset " << Head.Offset << " endOffset " << EndOffset << " curOffset " << NewHead.GetNextOffset() << " " << key.ToString() - << " size " << res.second << " WTime " << ctx.Now().MilliSeconds()); - - AddNewWriteBlob(res, request, headCleared, ctx); - return true; -} - -void TPartition::FilterDeadlinedWrites(const TActorContext& ctx) -{ - if (QuotaDeadline == TInstant::Zero() || QuotaDeadline > ctx.Now()) - return; - - for (auto& w : Requests) { - ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::OVERLOAD, "quota exceeded"); + + std::pair<TKey, ui32> res = GetNewWriteKey(headCleared); + const auto& key = res.first; + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "writing blob: topic '" << TopicName << "' partition " << Partition + << " compactOffset " << key.GetOffset() << "," << key.GetCount() + << " HeadOffset " << Head.Offset << " endOffset " << EndOffset << " curOffset " << NewHead.GetNextOffset() << " " << key.ToString() + << " size " << res.second << " WTime " << ctx.Now().MilliSeconds()); + + AddNewWriteBlob(res, request, headCleared, ctx); + return true; +} + +void TPartition::FilterDeadlinedWrites(const TActorContext& ctx) +{ + if (QuotaDeadline == TInstant::Zero() || QuotaDeadline > ctx.Now()) + return; + + for (auto& w : Requests) { + ReplyError(ctx, w.GetCookie(), NPersQueue::NErrorCode::OVERLOAD, "quota exceeded"); if (w.IsWrite()) { const auto& msg = w.GetWrite().Msg; - Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); + Counters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); Counters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(msg.Data.size() + msg.SourceId.size()); WriteInflightSize -= msg.Data.size(); - } - } - Requests.clear(); - QuotaDeadline = TInstant::Zero(); - - UpdateWriteBufferIsFullState(ctx.Now()); -} - - -void TPartition::HandleWrites(const TActorContext& ctx) -{ - Become(&TThis::StateWrite); - - THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); - - Y_VERIFY(Head.PackedSize + NewHead.PackedSize <= 2 * MaxSizeCheck); - - WriteCycleStartTime = ctx.Now(); - - bool haveData = false; - bool haveCheckDisk = false; - if (!Requests.empty() && DiskIsFull) { - CancelAllWritesOnIdle(ctx); - AddCheckDiskRequest(request.Get(), Config.GetPartitionConfig().GetNumChannels()); - haveCheckDisk = true; - } else { - haveData = ProcessWrites(request.Get(), ctx); - } - bool haveDrop = DropOldStuff(request.Get(), haveData, ctx); - - ProcessReserveRequests(ctx); - - if (!haveData && !haveDrop && !haveCheckDisk) { //no data writed/deleted + } + } + Requests.clear(); + QuotaDeadline = TInstant::Zero(); + + UpdateWriteBufferIsFullState(ctx.Now()); +} + + +void TPartition::HandleWrites(const TActorContext& ctx) +{ + Become(&TThis::StateWrite); + + THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); + + Y_VERIFY(Head.PackedSize + NewHead.PackedSize <= 2 * MaxSizeCheck); + + WriteCycleStartTime = ctx.Now(); + + bool haveData = false; + bool haveCheckDisk = false; + if (!Requests.empty() && DiskIsFull) { + CancelAllWritesOnIdle(ctx); + AddCheckDiskRequest(request.Get(), Config.GetPartitionConfig().GetNumChannels()); + haveCheckDisk = true; + } else { + haveData = ProcessWrites(request.Get(), ctx); + } + bool haveDrop = DropOldStuff(request.Get(), haveData, ctx); + + ProcessReserveRequests(ctx); + + if (!haveData && !haveDrop && !haveCheckDisk) { //no data writed/deleted if (!Requests.empty()) { //there could be change ownership requests that - bool res = ProcessWrites(request.Get(), ctx); - Y_VERIFY(!res); - } + bool res = ProcessWrites(request.Get(), ctx); + Y_VERIFY(!res); + } Y_VERIFY(Requests.empty() || !WriteQuota.CanExaust() || WaitingForPreviousBlobQuota()); //in this case all writes must be processed or no quota left - AnswerCurrentWrites(ctx); //in case if all writes are already done - no answer will be called on kv write, no kv write at all - BecomeIdle(ctx); - return; - } - - WritesTotal.Inc(); + AnswerCurrentWrites(ctx); //in case if all writes are already done - no answer will be called on kv write, no kv write at all + BecomeIdle(ctx); + return; + } + + WritesTotal.Inc(); WriteBlobWithQuota(std::move(request)); -} - - -void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const ui64 cookie, bool subscription) -{ - ui32 count = 0; - ui32 size = 0; - - Y_VERIFY(!info.User.empty()); +} + + +void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const ui64 cookie, bool subscription) +{ + ui32 count = 0; + ui32 size = 0; + + Y_VERIFY(!info.User.empty()); auto& userInfo = UsersInfoStorage.GetOrCreate(info.User, ctx); - - if (subscription) { + + if (subscription) { userInfo.ForgetSubscription(ctx.Now()); - } - + } + if (!userInfo.ReadQuota.CanExaust()) { userInfo.ReadRequests.push_back({std::move(info), cookie}); userInfo.UpdateReadingTimeAndState(ctx.Now()); - return; - } + return; + } TVector<TRequestedBlob> blobs = GetReadRequestFromBody(info.Offset, info.PartNo, info.Count, info.Size, &count, &size); - info.Blobs = blobs; - ui64 lastOffset = info.Offset + Min(count, info.Count); - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "read cookie " << cookie << " added " << info.Blobs.size() - << " blobs, size " << size << " count " << count << " last offset " << lastOffset); - + info.Blobs = blobs; + ui64 lastOffset = info.Offset + Min(count, info.Count); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "read cookie " << cookie << " added " << info.Blobs.size() + << " blobs, size " << size << " count " << count << " last offset " << lastOffset); + ui64 insideHeadOffset{0}; info.Cached = GetReadRequestFromHead(info.Offset, info.PartNo, info.Count, info.Size, info.ReadTimestampMs, &count, &size, &insideHeadOffset); info.CachedOffset = Head.Offset > 0 ? Head.Offset : insideHeadOffset; - - if (info.Destination != 0) { + + if (info.Destination != 0) { ++userInfo.ActiveReads; userInfo.UpdateReadingTimeAndState(ctx.Now()); - } - - if (info.Blobs.empty()) { //all from head, answer right now - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Reading cookie " << cookie << ". All data is from uncompacted head."); - + } + + if (info.Blobs.empty()) { //all from head, answer right now + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Reading cookie " << cookie << ". All data is from uncompacted head."); + TReadAnswer answer(info.FormAnswer( ctx, EndOffset, Partition, &UsersInfoStorage.GetOrCreate(info.User, ctx), info.Destination, GetSizeLag(info.Offset) )); const auto& resp = dynamic_cast<TEvPQ::TEvProxyResponse*>(answer.Event.Get())->Response; - if (info.IsSubscription) { - Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_OK].Increment(1); - } - Counters.Cumulative()[COUNTER_PQ_READ_HEAD_ONLY_OK].Increment(1); - Counters.Percentile()[COUNTER_LATENCY_PQ_READ_HEAD_ONLY].IncrementFor((ctx.Now() - info.Timestamp).MilliSeconds()); - Counters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp.ByteSize()); + if (info.IsSubscription) { + Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_OK].Increment(1); + } + Counters.Cumulative()[COUNTER_PQ_READ_HEAD_ONLY_OK].Increment(1); + Counters.Percentile()[COUNTER_LATENCY_PQ_READ_HEAD_ONLY].IncrementFor((ctx.Now() - info.Timestamp).MilliSeconds()); + Counters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp.ByteSize()); ctx.Send(info.Destination != 0 ? Tablet : ctx.SelfID, answer.Event.Release()); - ReportLabeledCounters(ctx); + ReportLabeledCounters(ctx); OnReadRequestFinished(std::move(info), answer.Size); return; } const TString user = info.User; - bool res = ReadInfo.insert({cookie, std::move(info)}).second; - Y_VERIFY(res); - + bool res = ReadInfo.insert({cookie, std::move(info)}).second; + Y_VERIFY(res); + THolder<TEvPQ::TEvBlobRequest> request(new TEvPQ::TEvBlobRequest(user, cookie, Partition, lastOffset, std::move(blobs))); - - ctx.Send(BlobCache, request.Release()); -} - + + ctx.Send(BlobCache, request.Release()); +} + void TPartition::Handle(TEvQuota::TEvClearance::TPtr& ev, const TActorContext& ctx) { const ui64 cookie = ev->Cookie; @@ -4636,7 +4636,7 @@ void TPartition::Handle(TEvQuota::TEvClearance::TPtr& ev, const TActorContext& c ctx.Send(Tablet, new TEvents::TEvPoisonPill()); return; } - + // Search for proper request Y_VERIFY(TopicQuotaRequestCookie == cookie); TopicQuotaRequestCookie = 0; @@ -4650,7 +4650,7 @@ void TPartition::Handle(TEvQuota::TEvClearance::TPtr& ev, const TActorContext& c // Reset quota wait time StartTopicQuotaWaitTimeForCurrentBlob = TInstant::Zero(); - if (CurrentStateFunc() == &TThis::StateIdle) + if (CurrentStateFunc() == &TThis::StateIdle) HandleWrites(ctx); } @@ -4685,7 +4685,7 @@ bool TPartition::WaitingForPreviousBlobQuota() const { return TopicQuotaRequestCookie != 0; } -void TPartition::WriteBlobWithQuota(THolder<TEvKeyValue::TEvRequest>&& request) +void TPartition::WriteBlobWithQuota(THolder<TEvKeyValue::TEvRequest>&& request) { // Request quota and write blob. // Mirrored topics are not quoted in local dc. @@ -4698,8 +4698,8 @@ void TPartition::WriteBlobWithQuota(THolder<TEvKeyValue::TEvRequest>&& request) RequestQuotaForWriteBlobRequest(quotaRequestSize, TopicQuotaRequestCookie); } - AddMetaKey(request.Get()); - + AddMetaKey(request.Get()); + WriteStartTime = TActivationContext::Now(); // Write blob #if 1 @@ -4742,7 +4742,7 @@ void TPartition::CalcTopicWriteQuotaParams() void TPartition::CreateMirrorerActor() { Mirrorer = MakeHolder<TMirrorerInfo>( - Register(new TMirrorer(Tablet, SelfId(), TopicName, Partition, LocalDC, EndOffset, Config.GetPartitionConfig().GetMirrorFrom(), Counters)), + Register(new TMirrorer(Tablet, SelfId(), TopicName, Partition, LocalDC, EndOffset, Config.GetPartitionConfig().GetMirrorFrom(), Counters)), Counters ); } @@ -4753,5 +4753,5 @@ bool TPartition::IsQuotingEnabled() const { return LocalDC && !pqConfig.GetTopicsAreFirstClassCitizen() && quotingConfig.GetEnableQuoting(); } -}// NPQ -}// NKikimr +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/partition.h b/ydb/core/persqueue/partition.h index 346170342a..2eec26f251 100644 --- a/ydb/core/persqueue/partition.h +++ b/ydb/core/persqueue/partition.h @@ -1,7 +1,7 @@ -#pragma once -#include <util/generic/set.h> -#include <util/system/hp_timer.h> - +#pragma once +#include <util/generic/set.h> +#include <util/system/hp_timer.h> + #include <ydb/core/base/quoter.h> #include <ydb/core/keyvalue/keyvalue_events.h> #include <library/cpp/actors/core/actor.h> @@ -12,26 +12,26 @@ #include <ydb/core/persqueue/events/internal.h> #include <ydb/library/persqueue/counter_time_keeper/counter_time_keeper.h> -#include "key.h" -#include "blob.h" -#include "subscriber.h" -#include "header.h" -#include "user_info.h" +#include "key.h" +#include "blob.h" +#include "subscriber.h" +#include "header.h" +#include "user_info.h" #include "sourceid.h" #include "ownerinfo.h" - + #include <variant> -namespace NKikimr { -namespace NPQ { - -class TKeyLevel; - -static const ui32 MAX_BLOB_PART_SIZE = 500 << 10; //500Kb - -typedef TProtobufTabletLabeledCounters<EPartitionLabeledCounters_descriptor> TPartitionLabeledCounters; - - +namespace NKikimr { +namespace NPQ { + +class TKeyLevel; + +static const ui32 MAX_BLOB_PART_SIZE = 500 << 10; //500Kb + +typedef TProtobufTabletLabeledCounters<EPartitionLabeledCounters_descriptor> TPartitionLabeledCounters; + + struct TDataKey { TKey Key; ui32 Size; @@ -43,110 +43,110 @@ ui64 GetOffsetEstimate(const std::deque<TDataKey>& container, TInstant timestamp struct TMirrorerInfo; -class TPartition : public TActorBootstrapped<TPartition> { +class TPartition : public TActorBootstrapped<TPartition> { private: static constexpr ui32 MAX_ERRORS_COUNT_TO_STORE = 10; - + private: - struct THasDataReq; - struct THasDataDeadline; - - //answer for requests when data arrives and drop deadlined requests - void ProcessHasDataRequests(const TActorContext& ctx); - - void FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config, const TActorContext& ctx); - void ProcessUserActs(TUserInfo& userInfo, const TActorContext& ctx); - - void ReplyError(const TActorContext& ctx, const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); - void ReplyErrorForStoredWrites(const TActorContext& ctx); - void ReplyOk(const TActorContext& ctx, const ui64 dst); + struct THasDataReq; + struct THasDataDeadline; + + //answer for requests when data arrives and drop deadlined requests + void ProcessHasDataRequests(const TActorContext& ctx); + + void FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config, const TActorContext& ctx); + void ProcessUserActs(TUserInfo& userInfo, const TActorContext& ctx); + + void ReplyError(const TActorContext& ctx, const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); + void ReplyErrorForStoredWrites(const TActorContext& ctx); + void ReplyOk(const TActorContext& ctx, const ui64 dst); void ReplyWrite( const TActorContext& ctx, ui64 dst, const TString& sourceId, ui64 seqNo, ui16 partNo, ui16 totalParts, ui64 offset, TInstant writeTimestamp, bool already, ui64 maxSeqNo, ui64 partitionQuotedTime, TDuration topicQuotedTime, ui64 queueTime, ui64 writeTime); - void ReplyGetClientOffsetOk(const TActorContext& ctx, const ui64 dst, const i64 offset, + void ReplyGetClientOffsetOk(const TActorContext& ctx, const ui64 dst, const i64 offset, const TInstant writeTimestamp, const TInstant createTimestamp); - - void ReplyOwnerOk(const TActorContext& ctx, const ui64 dst, const TString& ownerCookie); - - void Handle(TEvPersQueue::TEvHasDataInfo::TPtr& ev, const TActorContext& ctx); - + + void ReplyOwnerOk(const TActorContext& ctx, const ui64 dst, const TString& ownerCookie); + + void Handle(TEvPersQueue::TEvHasDataInfo::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQ::TEvMirrorerCounters::TPtr& ev, const TActorContext& ctx); void Handle(NReadSpeedLimiterEvents::TEvCounters::TPtr& ev, const TActorContext& ctx); - //answer for reads for Timestamps - void Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext& ctx); - void ProcessTimestampRead(const TActorContext& ctx); - - void HandleOnInit(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx); - - void HandleGetDiskStatus(const NKikimrClient::TResponse& res, const TActorContext& ctx); + //answer for reads for Timestamps + void Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext& ctx); + void ProcessTimestampRead(const TActorContext& ctx); + + void HandleOnInit(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx); + + void HandleGetDiskStatus(const NKikimrClient::TResponse& res, const TActorContext& ctx); void HandleInfoRangeRead(const NKikimrClient::TKeyValueResponse::TReadRangeResult& range, const TActorContext& ctx); void HandleDataRangeRead(const NKikimrClient::TKeyValueResponse::TReadRangeResult& range, const TActorContext& ctx); - void HandleMetaRead(const NKikimrClient::TKeyValueResponse::TReadResult& response, const TActorContext& ctx); - - //forms DataKeysBody and other partition's info - //ctx here only for logging + void HandleMetaRead(const NKikimrClient::TKeyValueResponse::TReadResult& response, const TActorContext& ctx); + + //forms DataKeysBody and other partition's info + //ctx here only for logging void FillBlobsMetaData(const NKikimrClient::TKeyValueResponse::TReadRangeResult& range, const TActorContext& ctx); - //will form head and request data keys from head or finish initialization - void FormHeadAndProceed(const TActorContext& ctx); + //will form head and request data keys from head or finish initialization + void FormHeadAndProceed(const TActorContext& ctx); void HandleDataRead(const NKikimrClient::TResponse& range, const TActorContext& ctx); - void InitComplete(const TActorContext& ctx); - - - void Handle(TEvPQ::TEvChangeOwner::TPtr& ev, const TActorContext& ctx); - void ProcessChangeOwnerRequests(const TActorContext& ctx); - void ProcessChangeOwnerRequest(TAutoPtr<TEvPQ::TEvChangeOwner> ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvChangeConfig::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvGetClientOffset::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvUpdateWriteTimestamp::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvSetClientInfo::TPtr& ev, const TActorContext& ctx); - void WriteClientInfo(const ui64 cookie, TUserInfo& ui, const TActorContext& ctx); - - - void HandleOnInit(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContext& ctx); - void HandleOnInit(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx); + void InitComplete(const TActorContext& ctx); + + + void Handle(TEvPQ::TEvChangeOwner::TPtr& ev, const TActorContext& ctx); + void ProcessChangeOwnerRequests(const TActorContext& ctx); + void ProcessChangeOwnerRequest(TAutoPtr<TEvPQ::TEvChangeOwner> ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvChangeConfig::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvGetClientOffset::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvUpdateWriteTimestamp::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvSetClientInfo::TPtr& ev, const TActorContext& ctx); + void WriteClientInfo(const ui64 cookie, TUserInfo& ui, const TActorContext& ctx); + + + void HandleOnInit(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContext& ctx); + void HandleOnInit(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQ::TEvPartitionOffsets::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQ::TEvGetPartitionClientInfo::TPtr& ev, const TActorContext& ctx); - + void Handle(TEvPersQueue::TEvReportPartitionError::TPtr& ev, const TActorContext& ctx); void LogAndCollectError(const NKikimrPQ::TStatusResponse::TErrorMessage& error, const TActorContext& ctx); void LogAndCollectError(NKikimrServices::EServiceKikimr service, const TString& msg, const TActorContext& ctx); - void HandleOnIdle(TEvPQ::TEvUpdateAvailableSize::TPtr& ev, const TActorContext& ctx); - void HandleOnWrite(TEvPQ::TEvUpdateAvailableSize::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvQuotaDeadlineCheck::TPtr& ev, const TActorContext& ctx); - - void UpdateAvailableSize(const TActorContext& ctx); - void ScheduleUpdateAvailableSize(const TActorContext& ctx); - - void Handle(TEvPQ::TEvGetMaxSeqNoRequest::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvReadTimeout::TPtr& ev, const TActorContext& ctx); - void HandleWakeup(const TActorContext& ctx); + void HandleOnIdle(TEvPQ::TEvUpdateAvailableSize::TPtr& ev, const TActorContext& ctx); + void HandleOnWrite(TEvPQ::TEvUpdateAvailableSize::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvQuotaDeadlineCheck::TPtr& ev, const TActorContext& ctx); + + void UpdateAvailableSize(const TActorContext& ctx); + void ScheduleUpdateAvailableSize(const TActorContext& ctx); + + void Handle(TEvPQ::TEvGetMaxSeqNoRequest::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvReadTimeout::TPtr& ev, const TActorContext& ctx); + void HandleWakeup(const TActorContext& ctx); void Handle(TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvRead::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvRead::TPtr& ev, const TActorContext& ctx); void Handle(NReadSpeedLimiterEvents::TEvResponse::TPtr& ev, const TActorContext& ctx); void DoRead(TEvPQ::TEvRead::TPtr ev, TDuration waitQuotaTime, const TActorContext& ctx); void OnReadRequestFinished(TReadInfo&& info, ui64 answerSize); - - // will return rcount and rsize also + + // will return rcount and rsize also TVector<TRequestedBlob> GetReadRequestFromBody(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, ui32* rcount, ui32* rsize); TVector<TClientBlob> GetReadRequestFromHead(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount, ui32* rsize, ui64* insideHeadOffset); - void ProcessRead(const TActorContext& ctx, TReadInfo&& info, const ui64 cookie, bool subscription); - - void HandleOnIdle(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ctx); - void HandleOnWrite(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ctx); + void ProcessRead(const TActorContext& ctx, TReadInfo&& info, const ui64 cookie, bool subscription); + + void HandleOnIdle(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ctx); + void HandleOnWrite(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ctx); void HandleOnIdle(TEvPQ::TEvRegisterMessageGroup::TPtr& ev, const TActorContext& ctx); void HandleOnWrite(TEvPQ::TEvRegisterMessageGroup::TPtr& ev, const TActorContext& ctx); @@ -158,62 +158,62 @@ private: void HandleOnWrite(TEvPQ::TEvSplitMessageGroup::TPtr& ev, const TActorContext& ctx); void Handle(TEvQuota::TEvClearance::TPtr& ev, const TActorContext& ctx); - bool DropOldStuff(TEvKeyValue::TEvRequest* request, bool hasWrites, const TActorContext& ctx); - - //will fill sourceIds, request and NewHead - //returns true if head is compacted + bool DropOldStuff(TEvKeyValue::TEvRequest* request, bool hasWrites, const TActorContext& ctx); + + //will fill sourceIds, request and NewHead + //returns true if head is compacted bool AppendHeadWithNewWrites(TEvKeyValue::TEvRequest* request, const TActorContext& ctx, TSourceIdWriter& sourceIdWriter); - std::pair<TKey, ui32> GetNewWriteKey(bool headCleared); - void AddNewWriteBlob(std::pair<TKey, ui32>& res, TEvKeyValue::TEvRequest* request, bool headCleared, const TActorContext& ctx); - - bool ProcessWrites(TEvKeyValue::TEvRequest* request, const TActorContext& ctx); - void FilterDeadlinedWrites(const TActorContext& ctx); - void SetDeadlinesForWrites(const TActorContext& ctx); - + std::pair<TKey, ui32> GetNewWriteKey(bool headCleared); + void AddNewWriteBlob(std::pair<TKey, ui32>& res, TEvKeyValue::TEvRequest* request, bool headCleared, const TActorContext& ctx); + + bool ProcessWrites(TEvKeyValue::TEvRequest* request, const TActorContext& ctx); + void FilterDeadlinedWrites(const TActorContext& ctx); + void SetDeadlinesForWrites(const TActorContext& ctx); + void ReadTimestampForOffset(const TString& user, TUserInfo& ui, const TActorContext& ctx); - void ProcessTimestampsForNewData(const ui64 prevEndOffset, const TActorContext& ctx); - void ReportLabeledCounters(const TActorContext& ctx); - ui64 GetSizeLag(i64 offset); - - void Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx); + void ProcessTimestampsForNewData(const ui64 prevEndOffset, const TActorContext& ctx); + void ReportLabeledCounters(const TActorContext& ctx); + ui64 GetSizeLag(i64 offset); + + void Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx); void HandleSetOffsetResponse(NKikimrClient::TResponse& response, const TActorContext& ctx); - void HandleWriteResponse(const TActorContext& ctx); - void Handle(TEvPQ::TEvHandleWriteResponse::TPtr&, const TActorContext& ctx); - - - void AnswerCurrentWrites(const TActorContext& ctx); - void SyncMemoryStateWithKVState(const TActorContext& ctx); - - //only Writes container is filled; only DISK_IS_FULL can be here - void CancelAllWritesOnIdle(const TActorContext& ctx); - //additional contaiters are half-filled, need to clear them too + void HandleWriteResponse(const TActorContext& ctx); + void Handle(TEvPQ::TEvHandleWriteResponse::TPtr&, const TActorContext& ctx); + + + void AnswerCurrentWrites(const TActorContext& ctx); + void SyncMemoryStateWithKVState(const TActorContext& ctx); + + //only Writes container is filled; only DISK_IS_FULL can be here + void CancelAllWritesOnIdle(const TActorContext& ctx); + //additional contaiters are half-filled, need to clear them too struct TWriteMsg; // forward void CancelAllWritesOnWrite(const TActorContext& ctx, TEvKeyValue::TEvRequest* request, const TString& errorStr, const TWriteMsg& p, TSourceIdWriter& sourceIdWriter, NPersQueue::NErrorCode::EErrorCode errorCode); - - - void FailBadClient(const TActorContext& ctx); - void ClearOldHead(const ui64 offset, const ui16 partNo, TEvKeyValue::TEvRequest* request); - - void HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorContext& ctx); - - void InitUserInfoForImportantClients(const TActorContext& ctx); - - + + + void FailBadClient(const TActorContext& ctx); + void ClearOldHead(const ui64 offset, const ui16 partNo, TEvKeyValue::TEvRequest* request); + + void HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorContext& ctx); + + void InitUserInfoForImportantClients(const TActorContext& ctx); + + THashMap<TString, TOwnerInfo>::iterator DropOwner(THashMap<TString, TOwnerInfo>::iterator& it, const TActorContext& ctx); - - void Handle(TEvPQ::TEvPipeDisconnected::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQ::TEvReserveBytes::TPtr& ev, const TActorContext& ctx); - void ProcessReserveRequests(const TActorContext& ctx); - + + void Handle(TEvPQ::TEvPipeDisconnected::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQ::TEvReserveBytes::TPtr& ev, const TActorContext& ctx); + void ProcessReserveRequests(const TActorContext& ctx); + void CreateMirrorerActor(); bool IsQuotingEnabled() const; void SetupTopicCounters(const TActorContext& ctx); void SetupStreamCounters(const TActorContext& ctx); -public: +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PERSQUEUE_PARTITION_ACTOR; } @@ -221,27 +221,27 @@ public: TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, const TActorId& blobCache, const TString& topicName, const TString& topicPath, const bool localDC, TString dcId, const NKikimrPQ::TPQTabletConfig& config, const TTabletCountersBase& counters, - const TActorContext& ctx, bool newPartition = false); - - void Bootstrap(const TActorContext& ctx); - - - //Bootstrap sends kvRead - //Become StateInit - //StateInit - //wait for correct result, cache all - //Become StateIdle - //StateIdle - //got read - make kvRead - //got kvReadResult - answer read - //got write - make kvWrite, Become StateWrite - //StateWrite - // got read - ... - // got kwReadResult - ... - //got write - store it inflight - //got kwWriteResult - check it, become StateIdle of StateWrite(and write inflight) - -private: + const TActorContext& ctx, bool newPartition = false); + + void Bootstrap(const TActorContext& ctx); + + + //Bootstrap sends kvRead + //Become StateInit + //StateInit + //wait for correct result, cache all + //Become StateIdle + //StateIdle + //got read - make kvRead + //got kvReadResult - answer read + //got write - make kvWrite, Become StateWrite + //StateWrite + // got read - ... + // got kwReadResult - ... + //got write - store it inflight + //got kwWriteResult - check it, become StateIdle of StateWrite(and write inflight) + +private: template <typename TEv> TString EventStr(const char * func, const TEv& ev) { TStringStream ss; @@ -250,162 +250,162 @@ private: return ss.Str(); } - STFUNC(StateInit) - { + STFUNC(StateInit) + { NPersQueue::TCounterTimeKeeper keeper(Counters.Cumulative()[COUNTER_PQ_TABLET_CPU_USAGE]); - + LOG_TRACE_S(ctx, NKikimrServices::PERSQUEUE, EventStr("StateInit", ev)); TRACE_EVENT(NKikimrServices::PERSQUEUE); - switch (ev->GetTypeRewrite()) { - CFunc(TEvents::TSystem::Wakeup, HandleWakeup); + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TSystem::Wakeup, HandleWakeup); HFuncTraced(TEvKeyValue::TEvResponse, HandleOnInit); //result of reads HFuncTraced(TEvents::TEvPoisonPill, Handle); HFuncTraced(TEvPQ::TEvMonRequest, HandleMonitoring); HFuncTraced(TEvPQ::TEvChangeConfig, Handle); - HFuncTraced(TEvPQ::TEvPartitionOffsets, HandleOnInit); - HFuncTraced(TEvPQ::TEvPartitionStatus, HandleOnInit); + HFuncTraced(TEvPQ::TEvPartitionOffsets, HandleOnInit); + HFuncTraced(TEvPQ::TEvPartitionStatus, HandleOnInit); HFuncTraced(TEvPersQueue::TEvReportPartitionError, Handle); - HFuncTraced(TEvPersQueue::TEvHasDataInfo, Handle); + HFuncTraced(TEvPersQueue::TEvHasDataInfo, Handle); HFuncTraced(TEvPQ::TEvMirrorerCounters, Handle); HFuncTraced(NReadSpeedLimiterEvents::TEvCounters, Handle); HFuncTraced(TEvPQ::TEvGetPartitionClientInfo, Handle); - default: + default: LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Unexpected " << EventStr("StateInit", ev)); - break; - }; - } - - STFUNC(StateIdle) - { + break; + }; + } + + STFUNC(StateIdle) + { NPersQueue::TCounterTimeKeeper keeper(Counters.Cumulative()[COUNTER_PQ_TABLET_CPU_USAGE]); - + LOG_TRACE_S(ctx, NKikimrServices::PERSQUEUE, EventStr("StateIdle", ev)); TRACE_EVENT(NKikimrServices::PERSQUEUE); - switch (ev->GetTypeRewrite()) { - CFunc(TEvents::TSystem::Wakeup, HandleWakeup); - HFuncTraced(TEvKeyValue::TEvResponse, Handle); + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TSystem::Wakeup, HandleWakeup); + HFuncTraced(TEvKeyValue::TEvResponse, Handle); HFuncTraced(TEvPQ::TEvBlobResponse, Handle); HFuncTraced(TEvPQ::TEvWrite, HandleOnIdle); HFuncTraced(TEvPQ::TEvRead, Handle); HFuncTraced(NReadSpeedLimiterEvents::TEvResponse, Handle); - HFuncTraced(TEvPQ::TEvReadTimeout, Handle); + HFuncTraced(TEvPQ::TEvReadTimeout, Handle); HFuncTraced(TEvents::TEvPoisonPill, Handle); HFuncTraced(TEvPQ::TEvMonRequest, HandleMonitoring); HFuncTraced(TEvPQ::TEvGetMaxSeqNoRequest, Handle); HFuncTraced(TEvPQ::TEvChangeConfig, Handle); - HFuncTraced(TEvPQ::TEvGetClientOffset, Handle); - HFuncTraced(TEvPQ::TEvUpdateWriteTimestamp, Handle); - HFuncTraced(TEvPQ::TEvSetClientInfo, Handle); + HFuncTraced(TEvPQ::TEvGetClientOffset, Handle); + HFuncTraced(TEvPQ::TEvUpdateWriteTimestamp, Handle); + HFuncTraced(TEvPQ::TEvSetClientInfo, Handle); HFuncTraced(TEvPQ::TEvPartitionOffsets, Handle); - HFuncTraced(TEvPQ::TEvPartitionStatus, Handle); + HFuncTraced(TEvPQ::TEvPartitionStatus, Handle); HFuncTraced(TEvPersQueue::TEvReportPartitionError, Handle); - HFuncTraced(TEvPQ::TEvChangeOwner, Handle); - HFuncTraced(TEvPersQueue::TEvHasDataInfo, Handle); + HFuncTraced(TEvPQ::TEvChangeOwner, Handle); + HFuncTraced(TEvPersQueue::TEvHasDataInfo, Handle); HFuncTraced(TEvPQ::TEvMirrorerCounters, Handle); HFuncTraced(NReadSpeedLimiterEvents::TEvCounters, Handle); - HFuncTraced(TEvPQ::TEvProxyResponse, Handle); - HFuncTraced(TEvPQ::TEvError, Handle); + HFuncTraced(TEvPQ::TEvProxyResponse, Handle); + HFuncTraced(TEvPQ::TEvError, Handle); HFuncTraced(TEvPQ::TEvGetPartitionClientInfo, Handle); - HFuncTraced(TEvPQ::TEvUpdateAvailableSize, HandleOnIdle); - HFuncTraced(TEvPQ::TEvReserveBytes, Handle); - HFuncTraced(TEvPQ::TEvPipeDisconnected, Handle); + HFuncTraced(TEvPQ::TEvUpdateAvailableSize, HandleOnIdle); + HFuncTraced(TEvPQ::TEvReserveBytes, Handle); + HFuncTraced(TEvPQ::TEvPipeDisconnected, Handle); HFuncTraced(TEvQuota::TEvClearance, Handle); - HFuncTraced(TEvPQ::TEvQuotaDeadlineCheck, Handle); + HFuncTraced(TEvPQ::TEvQuotaDeadlineCheck, Handle); HFuncTraced(TEvPQ::TEvRegisterMessageGroup, HandleOnIdle); HFuncTraced(TEvPQ::TEvDeregisterMessageGroup, HandleOnIdle); HFuncTraced(TEvPQ::TEvSplitMessageGroup, HandleOnIdle); - - default: + + default: LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Unexpected " << EventStr("StateIdle", ev)); - break; - }; - } - - STFUNC(StateWrite) - { + break; + }; + } + + STFUNC(StateWrite) + { NPersQueue::TCounterTimeKeeper keeper(Counters.Cumulative()[COUNTER_PQ_TABLET_CPU_USAGE]); - + LOG_TRACE_S(ctx, NKikimrServices::PERSQUEUE, EventStr("StateWrite", ev)); TRACE_EVENT(NKikimrServices::PERSQUEUE); - switch (ev->GetTypeRewrite()) { - CFunc(TEvents::TSystem::Wakeup, HandleWakeup); - HFuncTraced(TEvKeyValue::TEvResponse, Handle); - HFuncTraced(TEvPQ::TEvHandleWriteResponse, Handle); + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TSystem::Wakeup, HandleWakeup); + HFuncTraced(TEvKeyValue::TEvResponse, Handle); + HFuncTraced(TEvPQ::TEvHandleWriteResponse, Handle); HFuncTraced(TEvPQ::TEvBlobResponse, Handle); HFuncTraced(TEvPQ::TEvWrite, HandleOnWrite); HFuncTraced(TEvPQ::TEvRead, Handle); HFuncTraced(NReadSpeedLimiterEvents::TEvResponse, Handle); - HFuncTraced(TEvPQ::TEvReadTimeout, Handle); + HFuncTraced(TEvPQ::TEvReadTimeout, Handle); HFuncTraced(TEvents::TEvPoisonPill, Handle); HFuncTraced(TEvPQ::TEvMonRequest, HandleMonitoring); HFuncTraced(TEvPQ::TEvGetMaxSeqNoRequest, Handle); - HFuncTraced(TEvPQ::TEvGetClientOffset, Handle); - HFuncTraced(TEvPQ::TEvUpdateWriteTimestamp, Handle); - HFuncTraced(TEvPQ::TEvSetClientInfo, Handle); + HFuncTraced(TEvPQ::TEvGetClientOffset, Handle); + HFuncTraced(TEvPQ::TEvUpdateWriteTimestamp, Handle); + HFuncTraced(TEvPQ::TEvSetClientInfo, Handle); HFuncTraced(TEvPQ::TEvPartitionOffsets, Handle); - HFuncTraced(TEvPQ::TEvPartitionStatus, Handle); + HFuncTraced(TEvPQ::TEvPartitionStatus, Handle); HFuncTraced(TEvPersQueue::TEvReportPartitionError, Handle); - HFuncTraced(TEvPQ::TEvChangeOwner, Handle); + HFuncTraced(TEvPQ::TEvChangeOwner, Handle); HFuncTraced(TEvPQ::TEvChangeConfig, Handle); - HFuncTraced(TEvPersQueue::TEvHasDataInfo, Handle); + HFuncTraced(TEvPersQueue::TEvHasDataInfo, Handle); HFuncTraced(TEvPQ::TEvMirrorerCounters, Handle); HFuncTraced(NReadSpeedLimiterEvents::TEvCounters, Handle); - HFuncTraced(TEvPQ::TEvProxyResponse, Handle); - HFuncTraced(TEvPQ::TEvError, Handle); - HFuncTraced(TEvPQ::TEvReserveBytes, Handle); + HFuncTraced(TEvPQ::TEvProxyResponse, Handle); + HFuncTraced(TEvPQ::TEvError, Handle); + HFuncTraced(TEvPQ::TEvReserveBytes, Handle); HFuncTraced(TEvPQ::TEvGetPartitionClientInfo, Handle); - HFuncTraced(TEvPQ::TEvPipeDisconnected, Handle); - HFuncTraced(TEvPQ::TEvUpdateAvailableSize, HandleOnWrite); - HFuncTraced(TEvPQ::TEvQuotaDeadlineCheck, Handle); + HFuncTraced(TEvPQ::TEvPipeDisconnected, Handle); + HFuncTraced(TEvPQ::TEvUpdateAvailableSize, HandleOnWrite); + HFuncTraced(TEvPQ::TEvQuotaDeadlineCheck, Handle); HFuncTraced(TEvQuota::TEvClearance, Handle); HFuncTraced(TEvPQ::TEvRegisterMessageGroup, HandleOnWrite); HFuncTraced(TEvPQ::TEvDeregisterMessageGroup, HandleOnWrite); HFuncTraced(TEvPQ::TEvSplitMessageGroup, HandleOnWrite); - default: + default: LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Unexpected " << EventStr("StateWrite", ev)); - break; - }; - } - - bool DropOldData(TEvKeyValue::TEvRequest *request, bool hasWrites, const TActorContext& ctx); - std::pair<TKey, ui32> Compact(const TKey& key, const ui32 size, bool headCleared); - - void HandleWrites(const TActorContext& ctx); - void BecomeIdle(const TActorContext& ctx); - - void CheckHeadConsistency() const; - + break; + }; + } + + bool DropOldData(TEvKeyValue::TEvRequest *request, bool hasWrites, const TActorContext& ctx); + std::pair<TKey, ui32> Compact(const TKey& key, const ui32 size, bool headCleared); + + void HandleWrites(const TActorContext& ctx); + void BecomeIdle(const TActorContext& ctx); + + void CheckHeadConsistency() const; + std::pair<TInstant, TInstant> GetTime(const TUserInfo& userInfo, ui64 offset) const; TInstant GetWriteTimeEstimate(ui64 offset) const; - - ui32 NextChannel(bool isHead, ui32 blobSize); - - void WriteBlobWithQuota(THolder<TEvKeyValue::TEvRequest>&& request); - void AddMetaKey(TEvKeyValue::TEvRequest* request); - + + ui32 NextChannel(bool isHead, ui32 blobSize); + + void WriteBlobWithQuota(THolder<TEvKeyValue::TEvRequest>&& request); + void AddMetaKey(TEvKeyValue::TEvRequest* request); + size_t GetQuotaRequestSize(const TEvKeyValue::TEvRequest& request); void RequestQuotaForWriteBlobRequest(size_t dataSize, ui64 cookie); void CalcTopicWriteQuotaParams(); bool WaitingForPreviousBlobQuota() const; -private: - void UpdateUserInfoEndOffset(const TInstant& now); - - void UpdateWriteBufferIsFullState(const TInstant& now); - - enum EInitState { - WaitDiskStatus, - WaitInfoRange, - WaitDataRange, - WaitDataRead, - WaitMetaRead - }; - - +private: + void UpdateUserInfoEndOffset(const TInstant& now); + + void UpdateWriteBufferIsFullState(const TInstant& now); + + enum EInitState { + WaitDiskStatus, + WaitInfoRange, + WaitDataRange, + WaitDataRead, + WaitMetaRead + }; + + struct TUserCookie { TString User; ui64 Cookie; @@ -413,44 +413,44 @@ private: ui64 TabletID; - ui32 Partition; - NKikimrPQ::TPQTabletConfig Config; + ui32 Partition; + NKikimrPQ::TPQTabletConfig Config; TString TopicName; TString TopicPath; - bool LocalDC; + bool LocalDC; TString DCId; - - ui32 MaxBlobSize; - const ui32 TotalLevels = 4; - TVector<ui32> CompactLevelBorder; - ui32 TotalMaxCount; - ui32 MaxSizeCheck; - -// [ 8+Mb][ 8+Mb ][not compacted data ] [ data sended to KV but not yet confirmed] -//ofsets in partition: 101 102|103 104|105 106 107 108 109 110|111 112 113 -// ^ ^ ^ -// StartOffset HeadOffset EndOffset -// [DataKeysBody ][DataKeysHead ] - ui64 StartOffset; - ui64 EndOffset; - - ui64 WriteInflightSize; + + ui32 MaxBlobSize; + const ui32 TotalLevels = 4; + TVector<ui32> CompactLevelBorder; + ui32 TotalMaxCount; + ui32 MaxSizeCheck; + +// [ 8+Mb][ 8+Mb ][not compacted data ] [ data sended to KV but not yet confirmed] +//ofsets in partition: 101 102|103 104|105 106 107 108 109 110|111 112 113 +// ^ ^ ^ +// StartOffset HeadOffset EndOffset +// [DataKeysBody ][DataKeysHead ] + ui64 StartOffset; + ui64 EndOffset; + + ui64 WriteInflightSize; TActorId Tablet; TActorId BlobCache; - - EInitState InitState; - + + EInitState InitState; + struct TWriteMsg { ui64 Cookie; TMaybe<ui64> Offset; TEvPQ::TEvWrite::TMsg Msg; }; - + struct TOwnershipMsg { - ui64 Cookie; - TString OwnerCookie; - }; - + ui64 Cookie; + TString OwnerCookie; + }; + struct TRegisterMessageGroupMsg { ui64 Cookie; TEvPQ::TEvRegisterMessageGroup::TBody Body; @@ -484,7 +484,7 @@ private: } }; - struct TMessage { + struct TMessage { std::variant< TWriteMsg, TOwnershipMsg, @@ -492,11 +492,11 @@ private: TDeregisterMessageGroupMsg, TSplitMessageGroupMsg > Body; - - ui64 QuotedTime; - ui64 QueueTime; - ui64 WriteTime; - + + ui64 QuotedTime; + ui64 QueueTime; + ui64 WriteTime; + template <typename T> explicit TMessage(T&& body, ui64 quotedTime, ui64 queueTime, ui64 writeTime) : Body(std::forward<T>(body)) @@ -506,7 +506,7 @@ private: { } - ui64 GetCookie() const { + ui64 GetCookie() const { switch (Body.index()) { case 0: return std::get<0>(Body).Cookie; @@ -521,7 +521,7 @@ private: default: Y_FAIL("unreachable"); } - } + } #define DEFINE_CHECKER_GETTER(name, i) \ bool Is##name() const { \ @@ -543,114 +543,114 @@ private: DEFINE_CHECKER_GETTER(SplitMessageGroup, 4) #undef DEFINE_CHECKER_GETTER - }; - - std::deque<TMessage> Requests; - std::deque<TMessage> Responses; - - THead Head; - THead NewHead; - TPartitionedBlob PartitionedBlob; - std::deque<std::pair<TKey, ui32>> CompactedKeys; //key and blob size - TDataKey NewHeadKey; - - ui64 BodySize; - ui32 MaxWriteResponsesSize; - - std::deque<TDataKey> DataKeysBody; + }; + + std::deque<TMessage> Requests; + std::deque<TMessage> Responses; + + THead Head; + THead NewHead; + TPartitionedBlob PartitionedBlob; + std::deque<std::pair<TKey, ui32>> CompactedKeys; //key and blob size + TDataKey NewHeadKey; + + ui64 BodySize; + ui32 MaxWriteResponsesSize; + + std::deque<TDataKey> DataKeysBody; TVector<TKeyLevel> DataKeysHead; - std::deque<TDataKey> HeadKeys; - - std::deque<std::pair<ui64,ui64>> GapOffsets; - ui64 GapSize; - + std::deque<TDataKey> HeadKeys; + + std::deque<std::pair<ui64,ui64>> GapOffsets; + ui64 GapSize; + TString CloudId; TString DbId; TString FolderId; TUsersInfoStorage UsersInfoStorage; - + std::deque<TString> UpdateUserInfoTimestamp; - bool ReadingTimestamp; + bool ReadingTimestamp; TString ReadingForUser; - ui64 ReadingForUserReadRuleGeneration; - ui64 ReadingForOffset; - + ui64 ReadingForUserReadRuleGeneration; + ui64 ReadingForOffset; + THashMap<ui64, TString> CookieToUser; - ui64 SetOffsetCookie; - + ui64 SetOffsetCookie; + THashMap<ui64, TReadInfo> ReadInfo; // cookie -> {...} - ui64 Cookie; - TInstant CreationTime; - TDuration InitDuration; - bool InitDone; - const bool NewPartition; - + ui64 Cookie; + TInstant CreationTime; + TDuration InitDuration; + bool InitDone; + const bool NewPartition; + THashMap<TString, NKikimr::NPQ::TOwnerInfo> Owners; THashSet<TActorId> OwnerPipes; - + TSourceIdStorage SourceIdStorage; - - std::deque<THolder<TEvPQ::TEvChangeOwner>> WaitToChangeOwner; - - TTabletCountersBase Counters; - TPartitionLabeledCounters PartitionLabeledCounters; - - TSubscriber Subscriber; - - TInstant WriteCycleStartTime; - ui32 WriteCycleSize; - ui32 WriteNewSize; + + std::deque<THolder<TEvPQ::TEvChangeOwner>> WaitToChangeOwner; + + TTabletCountersBase Counters; + TPartitionLabeledCounters PartitionLabeledCounters; + + TSubscriber Subscriber; + + TInstant WriteCycleStartTime; + ui32 WriteCycleSize; + ui32 WriteNewSize; ui32 WriteNewSizeInternal; - ui64 WriteNewSizeUncompressed; - ui32 WriteNewMessages; + ui64 WriteNewSizeUncompressed; + ui32 WriteNewMessages; ui32 WriteNewMessagesInternal; - + TInstant CurrentTimestamp; - - bool DiskIsFull; - + + bool DiskIsFull; + TSet<THasDataReq> HasDataRequests; TSet<THasDataDeadline> HasDataDeadlines; - ui64 HasDataReqNum; - - TQuotaTracker WriteQuota; + ui64 HasDataReqNum; + + TQuotaTracker WriteQuota; THolder<TPercentileCounter> PartitionWriteQuotaWaitCounter; - TInstant QuotaDeadline = TInstant::Zero(); + TInstant QuotaDeadline = TInstant::Zero(); TVector<NSlidingWindow::TSlidingWindow<NSlidingWindow::TSumOperation<ui64>>> AvgWriteBytes; - TVector<NSlidingWindow::TSlidingWindow<NSlidingWindow::TSumOperation<ui64>>> AvgQuotaBytes; - - - ui64 ReservedSize; - std::deque<THolder<TEvPQ::TEvReserveBytes>> ReserveRequests; - - ui32 Channel; - TVector<ui32> TotalChannelWritesByHead; - - TWorkingTimeCounter WriteBufferIsFullCounter; - - TInstant WriteTimestamp; - TInstant WriteTimestampEstimate; + TVector<NSlidingWindow::TSlidingWindow<NSlidingWindow::TSumOperation<ui64>>> AvgQuotaBytes; + + + ui64 ReservedSize; + std::deque<THolder<TEvPQ::TEvReserveBytes>> ReserveRequests; + + ui32 Channel; + TVector<ui32> TotalChannelWritesByHead; + + TWorkingTimeCounter WriteBufferIsFullCounter; + + TInstant WriteTimestamp; + TInstant WriteTimestampEstimate; bool ManageWriteTimestampEstimate = true; - NSlidingWindow::TSlidingWindow<NSlidingWindow::TMaxOperation<ui64>> WriteLagMs; - THolder<TPercentileCounter> InputTimeLag; - THolder<TPercentileCounter> MessageSize; - TPercentileCounter WriteLatency; - NKikimr::NPQ::TMultiCounter SLIBigLatency; - NKikimr::NPQ::TMultiCounter WritesTotal; - - NKikimr::NPQ::TMultiCounter BytesWritten; - NKikimr::NPQ::TMultiCounter BytesWrittenUncompressed; - NKikimr::NPQ::TMultiCounter BytesWrittenComp; - NKikimr::NPQ::TMultiCounter MsgsWritten; + NSlidingWindow::TSlidingWindow<NSlidingWindow::TMaxOperation<ui64>> WriteLagMs; + THolder<TPercentileCounter> InputTimeLag; + THolder<TPercentileCounter> MessageSize; + TPercentileCounter WriteLatency; + NKikimr::NPQ::TMultiCounter SLIBigLatency; + NKikimr::NPQ::TMultiCounter WritesTotal; + + NKikimr::NPQ::TMultiCounter BytesWritten; + NKikimr::NPQ::TMultiCounter BytesWrittenUncompressed; + NKikimr::NPQ::TMultiCounter BytesWrittenComp; + NKikimr::NPQ::TMultiCounter MsgsWritten; // Writing blob with topic quota variables ui64 TopicQuotaRequestCookie = 0; // Wait topic quota metrics THolder<TPercentileCounter> TopicWriteQuotaWaitCounter; TInstant StartTopicQuotaWaitTimeForCurrentBlob; - TInstant WriteStartTime; + TInstant WriteStartTime; TDuration TopicQuotaWaitTimeForCurrentBlob; // Topic quota parameters TString TopicWriteQuoterPath; @@ -660,7 +660,7 @@ private: TDeque<NKikimrPQ::TStatusResponse::TErrorMessage> Errors; THolder<TMirrorerInfo> Mirrorer; -}; - -}// NPQ -}// NKikimr +}; + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/percentile_counter.cpp b/ydb/core/persqueue/percentile_counter.cpp index df0299f4dc..a6414e70bd 100644 --- a/ydb/core/persqueue/percentile_counter.cpp +++ b/ydb/core/persqueue/percentile_counter.cpp @@ -1,33 +1,33 @@ -#include "percentile_counter.h" - +#include "percentile_counter.h" + #include <ydb/library/persqueue/topic_parser/topic_parser.h> #include <ydb/core/base/counters.h> -namespace NKikimr { - -namespace NPQ { - +namespace NKikimr { + +namespace NPQ { + NMonitoring::TDynamicCounterPtr GetCounters(NMonitoring::TDynamicCounterPtr counters, const TString& subsystem, const TString& topic) -{ - auto pos = topic.find("--"); +{ + auto pos = topic.find("--"); Y_VERIFY(pos != TString::npos); - - TString origDC = topic.substr(4, pos - 4); - origDC.to_title(); - TString realTopic = topic.substr(pos + 2); - pos = realTopic.find("--"); - TString producer = realTopic.substr(0, pos); - TString topicPath = NPersQueue::ConvertOldTopicName(realTopic); - TString account = topicPath.substr(0, topicPath.find("/")); - return GetServiceCounters(counters, "pqproxy|" + subsystem) + + TString origDC = topic.substr(4, pos - 4); + origDC.to_title(); + TString realTopic = topic.substr(pos + 2); + pos = realTopic.find("--"); + TString producer = realTopic.substr(0, pos); + TString topicPath = NPersQueue::ConvertOldTopicName(realTopic); + TString account = topicPath.substr(0, topicPath.find("/")); + return GetServiceCounters(counters, "pqproxy|" + subsystem) ->GetSubgroup("OriginDC", origDC) ->GetSubgroup("Producer", producer) ->GetSubgroup("TopicPath", topicPath) ->GetSubgroup("Account", account) ->GetSubgroup("Topic", realTopic); -} - +} + NMonitoring::TDynamicCounterPtr GetCountersForStream(NMonitoring::TDynamicCounterPtr counters, const TString& subsystem) { @@ -35,37 +35,37 @@ NMonitoring::TDynamicCounterPtr GetCountersForStream(NMonitoring::TDynamicCounte ->GetSubgroup("subsystem", subsystem); } -TVector<TLabelsInfo> GetLabels(const TString& topic) -{ - auto pos = topic.find("--"); - if (pos == TString::npos) - return {}; - Y_VERIFY(pos != TString::npos); - - TString origDC = topic.substr(4, pos - 4); - - return GetLabels(origDC, topic.substr(pos + 2)); -} - -TVector<TLabelsInfo> GetLabels(const TString& cluster, const TString& realTopic) -{ - TString origDC = cluster; - origDC.to_title(); - auto pos = realTopic.find("--"); - if (pos == TString::npos) - return {}; - TString producer = realTopic.substr(0, pos); - TString topicPath = NPersQueue::ConvertOldTopicName(realTopic); - TString account = topicPath.substr(0, topicPath.find("/")); - TVector<TLabelsInfo> res = { - {{{"Account", account}}, {"total"}}, - {{{"Producer", producer}}, {"total"}}, - {{{"Topic", realTopic}, {"TopicPath", topicPath}}, {"total", "total"}}, - {{{"OriginDC", origDC}}, {"cluster"}} - }; - return res; -} - +TVector<TLabelsInfo> GetLabels(const TString& topic) +{ + auto pos = topic.find("--"); + if (pos == TString::npos) + return {}; + Y_VERIFY(pos != TString::npos); + + TString origDC = topic.substr(4, pos - 4); + + return GetLabels(origDC, topic.substr(pos + 2)); +} + +TVector<TLabelsInfo> GetLabels(const TString& cluster, const TString& realTopic) +{ + TString origDC = cluster; + origDC.to_title(); + auto pos = realTopic.find("--"); + if (pos == TString::npos) + return {}; + TString producer = realTopic.substr(0, pos); + TString topicPath = NPersQueue::ConvertOldTopicName(realTopic); + TString account = topicPath.substr(0, topicPath.find("/")); + TVector<TLabelsInfo> res = { + {{{"Account", account}}, {"total"}}, + {{{"Producer", producer}}, {"total"}}, + {{{"Topic", realTopic}, {"TopicPath", topicPath}}, {"total", "total"}}, + {{{"OriginDC", origDC}}, {"cluster"}} + }; + return res; +} + TVector<TLabelsInfo> GetLabelsForStream(const TString& topic, const TString& cloudId, const TString& dbId, const TString& folderId) { TVector<TLabelsInfo> res = { @@ -75,7 +75,7 @@ TVector<TLabelsInfo> GetLabelsForStream(const TString& topic, const TString& clo {{{"stream", topic}}, {topic}}}; return res; } - + TMultiCounter::TMultiCounter(NMonitoring::TDynamicCounterPtr counters, const TVector<TLabelsInfo>& labels, const TVector<std::pair<TString, TString>>& subgroups, @@ -131,20 +131,20 @@ TMultiCounter::operator bool() { } -TPercentileCounter::TPercentileCounter(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const TVector<TLabelsInfo>& labels, const TVector<std::pair<TString, TString>>& subgroups, const TString& sensor, +TPercentileCounter::TPercentileCounter(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const TVector<TLabelsInfo>& labels, const TVector<std::pair<TString, TString>>& subgroups, const TString& sensor, const TVector<std::pair<ui64, TString>>& intervals, const bool deriv, bool expiring) -{ - Y_VERIFY(!intervals.empty()); - Counters.reserve(intervals.size()); - Ranges.reserve(intervals.size()); - for (auto& interval : intervals) { - Ranges.push_back(interval.first); +{ + Y_VERIFY(!intervals.empty()); + Counters.reserve(intervals.size()); + Ranges.reserve(intervals.size()); + for (auto& interval : intervals) { + Ranges.push_back(interval.first); Counters.push_back(TMultiCounter(counters, labels, subgroups, {interval.second}, deriv, sensor, expiring)); - } - Ranges.back() = Max<ui64>(); -} - -void TPercentileCounter::IncFor(ui64 key, ui64 value) { + } + Ranges.back() = Max<ui64>(); +} + +void TPercentileCounter::IncFor(ui64 key, ui64 value) { if (!Ranges.empty()) { ui32 i = 0; // The last range value is Max<ui64>(). @@ -152,11 +152,11 @@ void TPercentileCounter::IncFor(ui64 key, ui64 value) { ++i; } Y_ASSERT(i < Ranges.size()); - Counters[i].Inc(value); + Counters[i].Inc(value); } -} - -void TPercentileCounter::DecFor(ui64 key, ui64 value) { +} + +void TPercentileCounter::DecFor(ui64 key, ui64 value) { if (!Ranges.empty()) { ui32 i = 0; // The last range value is Max<ui64>(). @@ -164,34 +164,34 @@ void TPercentileCounter::DecFor(ui64 key, ui64 value) { ++i; } Y_ASSERT(i < Ranges.size()); - Counters[i].Dec(value); + Counters[i].Dec(value); + } +} + +NKikimr::NPQ::TPercentileCounter CreateSLIDurationCounter(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TVector<NPQ::TLabelsInfo> aggr, const TString name, ui32 border, TVector<ui32> durations) +{ + bool found = false; + for (auto it = durations.begin(); it != durations.end(); ++it) { + if (*it == border) { + found = true; + break; + } + if (*it > border) { + found = true; + durations.insert(it, border); + break; + } + } + if (!found) + durations.push_back(border); + TVector<std::pair<ui64, TString>> buckets; + for (auto& dur : durations) { + buckets.emplace_back(dur, TStringBuilder() << dur << "ms"); } -} - -NKikimr::NPQ::TPercentileCounter CreateSLIDurationCounter(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TVector<NPQ::TLabelsInfo> aggr, const TString name, ui32 border, TVector<ui32> durations) -{ - bool found = false; - for (auto it = durations.begin(); it != durations.end(); ++it) { - if (*it == border) { - found = true; - break; - } - if (*it > border) { - found = true; - durations.insert(it, border); - break; - } - } - if (!found) - durations.push_back(border); - TVector<std::pair<ui64, TString>> buckets; - for (auto& dur : durations) { - buckets.emplace_back(dur, TStringBuilder() << dur << "ms"); - } - return NKikimr::NPQ::TPercentileCounter(counters->GetSubgroup("sensor", name), aggr, {}, "Duration", buckets, true, false); -} - - - -} // NPQ -} // NKikimr + return NKikimr::NPQ::TPercentileCounter(counters->GetSubgroup("sensor", name), aggr, {}, "Duration", buckets, true, false); +} + + + +} // NPQ +} // NKikimr diff --git a/ydb/core/persqueue/percentile_counter.h b/ydb/core/persqueue/percentile_counter.h index d00e3aec02..74ee43d287 100644 --- a/ydb/core/persqueue/percentile_counter.h +++ b/ydb/core/persqueue/percentile_counter.h @@ -1,30 +1,30 @@ -#pragma once - +#pragma once + #include <library/cpp/monlib/dynamic_counters/counters.h> - -namespace NKikimr { -namespace NPQ { - + +namespace NKikimr { +namespace NPQ { + NMonitoring::TDynamicCounterPtr GetCounters(NMonitoring::TDynamicCounterPtr counters, const TString& subsystem, const TString& topic); NMonitoring::TDynamicCounterPtr GetCountersForStream(NMonitoring::TDynamicCounterPtr counters, const TString& subsystem); - -struct TLabelsInfo { - TVector<std::pair<TString,TString>> Labels; - TVector<TString> AggrNames; -}; - -TVector<TLabelsInfo> GetLabels(const TString& topic); -TVector<TLabelsInfo> GetLabels(const TString& cluster, const TString& oldTopic); + +struct TLabelsInfo { + TVector<std::pair<TString,TString>> Labels; + TVector<TString> AggrNames; +}; + +TVector<TLabelsInfo> GetLabels(const TString& topic); +TVector<TLabelsInfo> GetLabels(const TString& cluster, const TString& oldTopic); TVector<TLabelsInfo> GetLabelsForStream(const TString& topic, const TString& cloudId, const TString& dbId, const TString& folderId); - -class TMultiCounter { -public: + +class TMultiCounter { +public: TMultiCounter() = default; - + TMultiCounter(NMonitoring::TDynamicCounterPtr counters, const TVector<TLabelsInfo>& labels, const TVector<std::pair<TString, TString>>& subgroups, @@ -32,20 +32,20 @@ public: bool deriv, const TString& name = "sensor", bool expiring = true); - + void Inc(ui64 val = 1); void Dec(ui64 val = 1); void Set(ui64 value); - + operator bool(); - -private: - ui64 Value = 0; - TVector<NMonitoring::TDynamicCounters::TCounterPtr> Counters; -}; - -class TPercentileCounter { -public: + +private: + ui64 Value = 0; + TVector<NMonitoring::TDynamicCounters::TCounterPtr> Counters; +}; + +class TPercentileCounter { +public: TPercentileCounter() = default; TPercentileCounter(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const TVector<TLabelsInfo>& labels, @@ -54,16 +54,16 @@ public: const TVector<std::pair<ui64, TString>>& intervals, const bool deriv, bool expiring = true); - + void IncFor(ui64 key, ui64 value = 1); void DecFor(ui64 key, ui64 value = 1); -private: - TVector<TMultiCounter> Counters; - TVector<ui64> Ranges; -}; - -NKikimr::NPQ::TPercentileCounter CreateSLIDurationCounter(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TVector<NPQ::TLabelsInfo> aggr, const TString name, ui32 border, TVector<ui32> durations); - -}// NPQ -}// NKikimr +private: + TVector<TMultiCounter> Counters; + TVector<ui64> Ranges; +}; + +NKikimr::NPQ::TPercentileCounter CreateSLIDurationCounter(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TVector<NPQ::TLabelsInfo> aggr, const TString name, ui32 border, TVector<ui32> durations); + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/pq.cpp b/ydb/core/persqueue/pq.cpp index bce18be603..d0ab706a34 100644 --- a/ydb/core/persqueue/pq.cpp +++ b/ydb/core/persqueue/pq.cpp @@ -1,15 +1,15 @@ -#include "pq_impl.h" -#include "read_balancer.h" - -namespace NKikimr { - +#include "pq_impl.h" +#include "read_balancer.h" + +namespace NKikimr { + IActor* CreatePersQueue(const TActorId& tablet, TTabletStorageInfo *info) { - return new NPQ::TPersQueue(tablet, info); -} - + return new NPQ::TPersQueue(tablet, info); +} + IActor* CreatePersQueueReadBalancer(const TActorId& tablet, TTabletStorageInfo *info) { - return new NPQ::TPersQueueReadBalancer(tablet, info); -} - - -} // NKikimr + return new NPQ::TPersQueueReadBalancer(tablet, info); +} + + +} // NKikimr diff --git a/ydb/core/persqueue/pq.h b/ydb/core/persqueue/pq.h index 8b20ce54e6..e5f3309ba9 100644 --- a/ydb/core/persqueue/pq.h +++ b/ydb/core/persqueue/pq.h @@ -1,12 +1,12 @@ -#pragma once - +#pragma once + #include <ydb/core/base/blobstorage.h> - + #include <library/cpp/actors/core/actorid.h> - -namespace NKikimr { - + +namespace NKikimr { + IActor* CreatePersQueue(const TActorId& tablet, TTabletStorageInfo *info); IActor* CreatePersQueueReadBalancer(const TActorId& tablet, TTabletStorageInfo *info); - -} //NKikimr + +} //NKikimr diff --git a/ydb/core/persqueue/pq_impl.cpp b/ydb/core/persqueue/pq_impl.cpp index d17f210afc..9937dbe561 100644 --- a/ydb/core/persqueue/pq_impl.cpp +++ b/ydb/core/persqueue/pq_impl.cpp @@ -1,8 +1,8 @@ -#include "pq_impl.h" +#include "pq_impl.h" #include "event_helpers.h" -#include "partition.h" -#include "read.h" +#include "partition.h" +#include "read.h" #include <ydb/core/persqueue/config/config.h> #include <ydb/core/persqueue/partition_key_range/partition_key_range.h> #include <ydb/core/protos/pqconfig.pb.h> @@ -10,558 +10,558 @@ #include <ydb/core/metering/metering.h> #include <ydb/core/tablet/tablet_counters.h> #include <library/cpp/json/json_writer.h> - + #include <util/generic/strbuf.h> -//TODO: move this code to vieiwer +//TODO: move this code to vieiwer #include <ydb/core/tablet/tablet_counters_aggregator.h> - + #include <library/cpp/monlib/service/pages/templates.h> #include <util/string/escape.h> - -namespace NKikimr { -namespace NPQ { - + +namespace NKikimr { +namespace NPQ { + const TString TMP_REQUEST_MARKER = "__TMP__REQUEST__MARKER__"; -const ui32 CACHE_SIZE = 100 << 20; //100mb per tablet by default -const ui32 MAX_BYTES = 25 * 1024 * 1024; +const ui32 CACHE_SIZE = 100 << 20; //100mb per tablet by default +const ui32 MAX_BYTES = 25 * 1024 * 1024; const TDuration TOTAL_TIMEOUT = TDuration::Seconds(120); static constexpr ui32 MAX_SOURCE_ID_LENGTH = 10240; - -struct TPartitionInfo { + +struct TPartitionInfo { TPartitionInfo(const TActorId& actor, TMaybe<TPartitionKeyRange>&& keyRange, const bool initDone, const TTabletCountersBase& baseline) - : Actor(actor) + : Actor(actor) , KeyRange(std::move(keyRange)) - , InitDone(initDone) + , InitDone(initDone) { Baseline.Populate(baseline); } - - TPartitionInfo(const TPartitionInfo& info) - : Actor(info.Actor) + + TPartitionInfo(const TPartitionInfo& info) + : Actor(info.Actor) , KeyRange(info.KeyRange) - , InitDone(info.InitDone) + , InitDone(info.InitDone) { Baseline.Populate(info.Baseline); } - + TActorId Actor; TMaybe<TPartitionKeyRange> KeyRange; - bool InitDone; - TTabletCountersBase Baseline; + bool InitDone; + TTabletCountersBase Baseline; THashMap<TString, TTabletLabeledCountersBase> LabeledCounters; -}; - +}; + struct TChangeNotification { TChangeNotification(const TActorId& actor, const ui64 txId) - : Actor(actor) - , TxId(txId) - {} - - operator size_t() const { + : Actor(actor) + , TxId(txId) + {} + + operator size_t() const { return THash<TActorId>()(Actor) + TxId; - } - + } + bool operator==(const TChangeNotification& b) { - return b.Actor == Actor && b.TxId == TxId; - } - + return b.Actor == Actor && b.TxId == TxId; + } + bool operator < (const TChangeNotification& req) const { return (Actor < req.Actor) || (Actor == req.Actor && TxId < req.TxId); } TActorId Actor; - ui64 TxId; -}; - + ui64 TxId; +}; + static TMaybe<TPartitionKeyRange> GetPartitionKeyRange(const NKikimrPQ::TPQTabletConfig::TPartition& proto) { if (!proto.HasKeyRange()) { return Nothing(); } return TPartitionKeyRange::Parse(proto.GetKeyRange()); } - -/******************************************************* ReadProxy *********************************************************/ -//megaqc - remove it when LB will be ready -class TReadProxy : public TActorBootstrapped<TReadProxy> { -public: + +/******************************************************* ReadProxy *********************************************************/ +//megaqc - remove it when LB will be ready +class TReadProxy : public TActorBootstrapped<TReadProxy> { +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PERSQUEUE_ANS_ACTOR; - } - + } + TReadProxy(const TActorId& sender, const TActorId& tablet, const NKikimrClient::TPersQueueRequest& request) - : Sender(sender) - , Tablet(tablet) - , Request(request) - , Response(new TEvPersQueue::TEvResponse) - { - Y_VERIFY(Request.HasPartitionRequest() && Request.GetPartitionRequest().HasCmdRead()); - Y_VERIFY(Request.GetPartitionRequest().GetCmdRead().GetPartNo() == 0); //partial request are not allowed, otherwise remove ReadProxy - Y_VERIFY(!Response->Record.HasPartitionResponse()); - } - - void Bootstrap(const TActorContext&) - { - Become(&TThis::StateFunc); - } - -private: - - void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) - { - Y_VERIFY(Response); - const auto& record = ev->Get()->Record; - if (!record.HasPartitionResponse() || !record.GetPartitionResponse().HasCmdReadResult() || - record.GetStatus() != NMsgBusProxy::MSTATUS_OK || record.GetErrorCode() != NPersQueue::NErrorCode::OK || - record.GetPartitionResponse().GetCmdReadResult().ResultSize() == 0) { - Response->Record.CopyFrom(record); - ctx.Send(Sender, Response.Release()); - Die(ctx); - return; - } - - Y_VERIFY(record.HasPartitionResponse() && record.GetPartitionResponse().HasCmdReadResult()); - - const auto& res = record.GetPartitionResponse().GetCmdReadResult(); - - Response->Record.SetStatus(NMsgBusProxy::MSTATUS_OK); - Response->Record.SetErrorCode(NPersQueue::NErrorCode::OK); - - Y_VERIFY(res.ResultSize() > 0); - bool isStart = false; - if (!Response->Record.HasPartitionResponse()) { - Y_VERIFY(!res.GetResult(0).HasPartNo() || res.GetResult(0).GetPartNo() == 0); //starts from begin of record - auto partResp = Response->Record.MutablePartitionResponse(); - auto readRes = partResp->MutableCmdReadResult(); - readRes->SetBlobsFromDisk(readRes->GetBlobsFromDisk() + res.GetBlobsFromDisk()); - readRes->SetBlobsFromCache(readRes->GetBlobsFromCache() + res.GetBlobsFromCache()); - isStart = true; - } - if (record.GetPartitionResponse().HasCookie()) - Response->Record.MutablePartitionResponse()->SetCookie(record.GetPartitionResponse().GetCookie()); - - auto partResp = Response->Record.MutablePartitionResponse()->MutableCmdReadResult(); - + : Sender(sender) + , Tablet(tablet) + , Request(request) + , Response(new TEvPersQueue::TEvResponse) + { + Y_VERIFY(Request.HasPartitionRequest() && Request.GetPartitionRequest().HasCmdRead()); + Y_VERIFY(Request.GetPartitionRequest().GetCmdRead().GetPartNo() == 0); //partial request are not allowed, otherwise remove ReadProxy + Y_VERIFY(!Response->Record.HasPartitionResponse()); + } + + void Bootstrap(const TActorContext&) + { + Become(&TThis::StateFunc); + } + +private: + + void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) + { + Y_VERIFY(Response); + const auto& record = ev->Get()->Record; + if (!record.HasPartitionResponse() || !record.GetPartitionResponse().HasCmdReadResult() || + record.GetStatus() != NMsgBusProxy::MSTATUS_OK || record.GetErrorCode() != NPersQueue::NErrorCode::OK || + record.GetPartitionResponse().GetCmdReadResult().ResultSize() == 0) { + Response->Record.CopyFrom(record); + ctx.Send(Sender, Response.Release()); + Die(ctx); + return; + } + + Y_VERIFY(record.HasPartitionResponse() && record.GetPartitionResponse().HasCmdReadResult()); + + const auto& res = record.GetPartitionResponse().GetCmdReadResult(); + + Response->Record.SetStatus(NMsgBusProxy::MSTATUS_OK); + Response->Record.SetErrorCode(NPersQueue::NErrorCode::OK); + + Y_VERIFY(res.ResultSize() > 0); + bool isStart = false; + if (!Response->Record.HasPartitionResponse()) { + Y_VERIFY(!res.GetResult(0).HasPartNo() || res.GetResult(0).GetPartNo() == 0); //starts from begin of record + auto partResp = Response->Record.MutablePartitionResponse(); + auto readRes = partResp->MutableCmdReadResult(); + readRes->SetBlobsFromDisk(readRes->GetBlobsFromDisk() + res.GetBlobsFromDisk()); + readRes->SetBlobsFromCache(readRes->GetBlobsFromCache() + res.GetBlobsFromCache()); + isStart = true; + } + if (record.GetPartitionResponse().HasCookie()) + Response->Record.MutablePartitionResponse()->SetCookie(record.GetPartitionResponse().GetCookie()); + + auto partResp = Response->Record.MutablePartitionResponse()->MutableCmdReadResult(); + partResp->SetMaxOffset(res.GetMaxOffset()); partResp->SetSizeLag(res.GetSizeLag()); partResp->SetWaitQuotaTimeMs(partResp->GetWaitQuotaTimeMs() + res.GetWaitQuotaTimeMs()); - - for (ui32 i = 0; i < res.ResultSize(); ++i) { - if (!res.GetResult(i).HasPartNo() || res.GetResult(i).GetPartNo() == 0) { - if (!isStart && res.GetResult(i).HasTotalParts() && res.GetResult(i).GetTotalParts() + i > res.ResultSize()) //last blob is not full - break; - partResp->AddResult()->CopyFrom(res.GetResult(i)); - isStart = false; - } else { //glue to last res - Y_VERIFY(partResp->GetResult(partResp->ResultSize() - 1).GetSeqNo() == res.GetResult(i).GetSeqNo()); - auto rr = partResp->MutableResult(partResp->ResultSize() - 1); - (*rr->MutableData()) += res.GetResult(i).GetData(); + + for (ui32 i = 0; i < res.ResultSize(); ++i) { + if (!res.GetResult(i).HasPartNo() || res.GetResult(i).GetPartNo() == 0) { + if (!isStart && res.GetResult(i).HasTotalParts() && res.GetResult(i).GetTotalParts() + i > res.ResultSize()) //last blob is not full + break; + partResp->AddResult()->CopyFrom(res.GetResult(i)); + isStart = false; + } else { //glue to last res + Y_VERIFY(partResp->GetResult(partResp->ResultSize() - 1).GetSeqNo() == res.GetResult(i).GetSeqNo()); + auto rr = partResp->MutableResult(partResp->ResultSize() - 1); + (*rr->MutableData()) += res.GetResult(i).GetData(); rr->SetPartitionKey(res.GetResult(i).GetPartitionKey()); rr->SetExplicitHash(res.GetResult(i).GetExplicitHash()); - rr->SetPartNo(res.GetResult(i).GetPartNo()); - rr->SetUncompressedSize(rr->GetUncompressedSize() + res.GetResult(i).GetUncompressedSize()); - if (res.GetResult(i).GetPartNo() + 1 == res.GetResult(i).GetTotalParts()) { - Y_VERIFY((ui32)rr->GetTotalSize() == (ui32)rr->GetData().size()); - } - } - } - const auto& lastRes = partResp->GetResult(partResp->ResultSize() - 1); - if (!lastRes.HasPartNo() || lastRes.GetPartNo() + 1 == lastRes.GetTotalParts()) { //last res is full, can answer - ctx.Send(Sender, Response.Release()); - Die(ctx); - return; - } - //not full answer - need uprequest - - Request.SetRequestId(TMP_REQUEST_MARKER); - - auto read = Request.MutablePartitionRequest()->MutableCmdRead(); - read->SetOffset(lastRes.GetOffset()); - read->SetPartNo(lastRes.GetPartNo() + 1); - read->SetCount(1); - read->ClearBytes(); - read->ClearTimeoutMs(); - read->ClearMaxTimeLagMs(); - read->ClearReadTimestampMs(); - THolder<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); - req->Record = Request; - ctx.Send(Tablet, req.Release()); - } - - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvPersQueue::TEvResponse, Handle); - default: - break; - }; - } - + rr->SetPartNo(res.GetResult(i).GetPartNo()); + rr->SetUncompressedSize(rr->GetUncompressedSize() + res.GetResult(i).GetUncompressedSize()); + if (res.GetResult(i).GetPartNo() + 1 == res.GetResult(i).GetTotalParts()) { + Y_VERIFY((ui32)rr->GetTotalSize() == (ui32)rr->GetData().size()); + } + } + } + const auto& lastRes = partResp->GetResult(partResp->ResultSize() - 1); + if (!lastRes.HasPartNo() || lastRes.GetPartNo() + 1 == lastRes.GetTotalParts()) { //last res is full, can answer + ctx.Send(Sender, Response.Release()); + Die(ctx); + return; + } + //not full answer - need uprequest + + Request.SetRequestId(TMP_REQUEST_MARKER); + + auto read = Request.MutablePartitionRequest()->MutableCmdRead(); + read->SetOffset(lastRes.GetOffset()); + read->SetPartNo(lastRes.GetPartNo() + 1); + read->SetCount(1); + read->ClearBytes(); + read->ClearTimeoutMs(); + read->ClearMaxTimeLagMs(); + read->ClearReadTimestampMs(); + THolder<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); + req->Record = Request; + ctx.Send(Tablet, req.Release()); + } + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPersQueue::TEvResponse, Handle); + default: + break; + }; + } + const TActorId Sender; const TActorId Tablet; NKikimrClient::TPersQueueRequest Request; - THolder<TEvPersQueue::TEvResponse> Response; -}; - - + THolder<TEvPersQueue::TEvResponse> Response; +}; + + TActorId CreateReadProxy(const TActorId& sender, const TActorId& tablet, const NKikimrClient::TPersQueueRequest& request, const TActorContext&ctx) -{ - return ctx.Register(new TReadProxy(sender, tablet, request)); -} - -/******************************************************* AnswerBuilderProxy *********************************************************/ -class TResponseBuilder { -public: +{ + return ctx.Register(new TReadProxy(sender, tablet, request)); +} + +/******************************************************* AnswerBuilderProxy *********************************************************/ +class TResponseBuilder { +public: TResponseBuilder(const TActorId& sender, const TActorId& tablet, const TString& topicName, const ui32 partition, const ui64 messageNo, - const TString& reqId, const TMaybe<ui64> cookie, NMetrics::TResourceMetrics* resourceMetrics, const TActorContext& ctx) - : Sender(sender) - , Tablet(tablet) - , TopicName(topicName) - , Partition(partition) - , MessageNo(messageNo) - , CounterId(0) - , Waiting(0) - , ReqId(reqId) - , Response(new TEvPersQueue::TEvResponse) - , Timestamp(TAppData::TimeProvider->Now()) - , WasSplit(false) - , Cookie(cookie) - , ResourceMetrics(resourceMetrics) - { - if (cookie) - Response->Record.MutablePartitionResponse()->SetCookie(*cookie); - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Handle TEvRequest topic: '" << TopicName << "' requestId: " << ReqId); - - } - - void SetWasSplit() - { - WasSplit = true; - } - - void AddPartialReplyCount(const ui32 partialReplyCount) - { - Waiting += partialReplyCount; - } - - void SetCounterId(const ui32 counterId) - { - CounterId = counterId; - } - - bool HandleProxyResponse(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& ctx) - { - Y_VERIFY(Waiting); - Y_VERIFY(Response); - --Waiting; - bool skip = false; - if (WasSplit && ev->Get()->Response.GetPartitionResponse().CmdWriteResultSize() == 1) { //megaqc - remove this - const auto& x = ev->Get()->Response.GetPartitionResponse().GetCmdWriteResult(0); - if (x.HasPartNo() && x.GetPartNo() > 0) - skip = true; - } - if (!skip) //megaqc - remove this - Response->Record.MergeFrom(ev->Get()->Response); - - if (!Waiting) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Answer ok topic: '" << TopicName << "' partition: " << Partition - << " messageNo: " << MessageNo << " requestId: " << ReqId << " cookie: " << (Cookie ? *Cookie : 0)); - - if (ResourceMetrics) { - ResourceMetrics->Network.Increment(Response->Record.ByteSizeLong()); - ResourceMetrics->TryUpdate(ctx); - } - - ctx.Send(Sender, Response.Release()); - return true; - } - return false; - } - - bool HandleError(TEvPQ::TEvError *ev, const TActorContext& ctx) - { - LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, "Answer error topic: '" << TopicName << "' partition: " << Partition - << " messageNo: " << MessageNo << " requestId: " << ReqId << " error: " << ev->Error); - Response->Record.SetStatus(NMsgBusProxy::MSTATUS_ERROR); - Response->Record.SetErrorCode(ev->ErrorCode); - Response->Record.SetErrorReason(ev->Error); - ctx.Send(Sender, Response.Release()); - return true; - } - + const TString& reqId, const TMaybe<ui64> cookie, NMetrics::TResourceMetrics* resourceMetrics, const TActorContext& ctx) + : Sender(sender) + , Tablet(tablet) + , TopicName(topicName) + , Partition(partition) + , MessageNo(messageNo) + , CounterId(0) + , Waiting(0) + , ReqId(reqId) + , Response(new TEvPersQueue::TEvResponse) + , Timestamp(TAppData::TimeProvider->Now()) + , WasSplit(false) + , Cookie(cookie) + , ResourceMetrics(resourceMetrics) + { + if (cookie) + Response->Record.MutablePartitionResponse()->SetCookie(*cookie); + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Handle TEvRequest topic: '" << TopicName << "' requestId: " << ReqId); + + } + + void SetWasSplit() + { + WasSplit = true; + } + + void AddPartialReplyCount(const ui32 partialReplyCount) + { + Waiting += partialReplyCount; + } + + void SetCounterId(const ui32 counterId) + { + CounterId = counterId; + } + + bool HandleProxyResponse(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& ctx) + { + Y_VERIFY(Waiting); + Y_VERIFY(Response); + --Waiting; + bool skip = false; + if (WasSplit && ev->Get()->Response.GetPartitionResponse().CmdWriteResultSize() == 1) { //megaqc - remove this + const auto& x = ev->Get()->Response.GetPartitionResponse().GetCmdWriteResult(0); + if (x.HasPartNo() && x.GetPartNo() > 0) + skip = true; + } + if (!skip) //megaqc - remove this + Response->Record.MergeFrom(ev->Get()->Response); + + if (!Waiting) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Answer ok topic: '" << TopicName << "' partition: " << Partition + << " messageNo: " << MessageNo << " requestId: " << ReqId << " cookie: " << (Cookie ? *Cookie : 0)); + + if (ResourceMetrics) { + ResourceMetrics->Network.Increment(Response->Record.ByteSizeLong()); + ResourceMetrics->TryUpdate(ctx); + } + + ctx.Send(Sender, Response.Release()); + return true; + } + return false; + } + + bool HandleError(TEvPQ::TEvError *ev, const TActorContext& ctx) + { + LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, "Answer error topic: '" << TopicName << "' partition: " << Partition + << " messageNo: " << MessageNo << " requestId: " << ReqId << " error: " << ev->Error); + Response->Record.SetStatus(NMsgBusProxy::MSTATUS_ERROR); + Response->Record.SetErrorCode(ev->ErrorCode); + Response->Record.SetErrorReason(ev->Error); + ctx.Send(Sender, Response.Release()); + return true; + } + const TActorId Sender; const TActorId Tablet; const TString TopicName; - const ui32 Partition; - const ui64 MessageNo; - ui32 CounterId; - ui32 Waiting; + const ui32 Partition; + const ui64 MessageNo; + ui32 CounterId; + ui32 Waiting; const TString ReqId; - THolder<TEvPersQueue::TEvResponse> Response; - TInstant Timestamp; - bool WasSplit; - TMaybe<ui64> Cookie; - NMetrics::TResourceMetrics* ResourceMetrics; -}; - - + THolder<TEvPersQueue::TEvResponse> Response; + TInstant Timestamp; + bool WasSplit; + TMaybe<ui64> Cookie; + NMetrics::TResourceMetrics* ResourceMetrics; +}; + + TAutoPtr<TResponseBuilder> CreateResponseProxy(const TActorId& sender, const TActorId& tablet, const TString& topicName, - const ui32 partition, const ui64 messageNo, const TString& reqId, const TMaybe<ui64> cookie, - NMetrics::TResourceMetrics *resourceMetrics, const TActorContext& ctx) -{ - return new TResponseBuilder(sender, tablet, topicName, partition, messageNo, reqId, cookie, resourceMetrics, ctx); -} - - -/******************************************************* OffsetsBuilderProxy *********************************************************/ - -template <typename T, typename T2, typename T3> -class TBuilderProxy : public TActorBootstrapped<TBuilderProxy<T,T2,T3>> { - typedef TBuilderProxy<T,T2,T3> TThis; - - friend class TActorBootstrapped<TThis>; - typedef T TEvent; - typedef typename TEvent::TPtr TTPtr; -public: + const ui32 partition, const ui64 messageNo, const TString& reqId, const TMaybe<ui64> cookie, + NMetrics::TResourceMetrics *resourceMetrics, const TActorContext& ctx) +{ + return new TResponseBuilder(sender, tablet, topicName, partition, messageNo, reqId, cookie, resourceMetrics, ctx); +} + + +/******************************************************* OffsetsBuilderProxy *********************************************************/ + +template <typename T, typename T2, typename T3> +class TBuilderProxy : public TActorBootstrapped<TBuilderProxy<T,T2,T3>> { + typedef TBuilderProxy<T,T2,T3> TThis; + + friend class TActorBootstrapped<TThis>; + typedef T TEvent; + typedef typename TEvent::TPtr TTPtr; +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PERSQUEUE_ANS_ACTOR; } TBuilderProxy(const ui64 tabletId, const TActorId& sender, const ui32 count) - : TabletId(tabletId) - , Sender(sender) - , Waiting(count) - , Result() + : TabletId(tabletId) + , Sender(sender) + , Waiting(count) + , Result() {} - - void Bootstrap(const TActorContext& ctx) - { - TThis::Become(&TThis::StateFunc); - if (!Waiting) { - AnswerAndDie(ctx); - return; - } + + void Bootstrap(const TActorContext& ctx) + { + TThis::Become(&TThis::StateFunc); + if (!Waiting) { + AnswerAndDie(ctx); + return; + } ctx.Schedule(TOTAL_TIMEOUT, new TEvents::TEvWakeup()); - } - -private: - - void AnswerAndDie(const TActorContext& ctx) - { - std::sort(Result.begin(), Result.end(), [](const typename T2::TPartResult& a, const typename T2::TPartResult& b){ - return a.GetPartition() < b.GetPartition(); - }); + } + +private: + + void AnswerAndDie(const TActorContext& ctx) + { + std::sort(Result.begin(), Result.end(), [](const typename T2::TPartResult& a, const typename T2::TPartResult& b){ + return a.GetPartition() < b.GetPartition(); + }); THolder<T3> res = MakeHolder<T3>(); - auto& resp = res->Record; - resp.SetTabletId(TabletId); - for (const auto& p : Result) { - resp.AddPartResult()->CopyFrom(p); - } - ctx.Send(Sender, res.Release()); - TThis::Die(ctx); - } - - void Handle(TTPtr& ev, const TActorContext& ctx) - { - Result.push_back(ev->Get()->PartResult); - if (--Waiting == 0) { - AnswerAndDie(ctx); - } - } - - void Wakeup(const TActorContext& ctx) { - AnswerAndDie(ctx); - } - - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvent, Handle); - CFunc(TEvents::TSystem::Wakeup, Wakeup); - default: - break; - }; - } - - ui64 TabletId; + auto& resp = res->Record; + resp.SetTabletId(TabletId); + for (const auto& p : Result) { + resp.AddPartResult()->CopyFrom(p); + } + ctx.Send(Sender, res.Release()); + TThis::Die(ctx); + } + + void Handle(TTPtr& ev, const TActorContext& ctx) + { + Result.push_back(ev->Get()->PartResult); + if (--Waiting == 0) { + AnswerAndDie(ctx); + } + } + + void Wakeup(const TActorContext& ctx) { + AnswerAndDie(ctx); + } + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvent, Handle); + CFunc(TEvents::TSystem::Wakeup, Wakeup); + default: + break; + }; + } + + ui64 TabletId; TActorId Sender; - ui32 Waiting; + ui32 Waiting; TVector<typename T2::TPartResult> Result; -}; - - +}; + + TActorId CreateOffsetsProxyActor(const ui64 tabletId, const TActorId& sender, const ui32 count, const TActorContext& ctx) -{ - return ctx.Register(new TBuilderProxy<TEvPQ::TEvPartitionOffsetsResponse, - NKikimrPQ::TOffsetsResponse, - TEvPersQueue::TEvOffsetsResponse>(tabletId, sender, count)); -} - -/******************************************************* StatusProxy *********************************************************/ - - +{ + return ctx.Register(new TBuilderProxy<TEvPQ::TEvPartitionOffsetsResponse, + NKikimrPQ::TOffsetsResponse, + TEvPersQueue::TEvOffsetsResponse>(tabletId, sender, count)); +} + +/******************************************************* StatusProxy *********************************************************/ + + TActorId CreateStatusProxyActor(const ui64 tabletId, const TActorId& sender, const ui32 count, const TActorContext& ctx) -{ - return ctx.Register(new TBuilderProxy<TEvPQ::TEvPartitionStatusResponse, - NKikimrPQ::TStatusResponse, - TEvPersQueue::TEvStatusResponse>(tabletId, sender, count)); -} - -/******************************************************* MonitoringProxy *********************************************************/ - - -class TMonitoringProxy : public TActorBootstrapped<TMonitoringProxy> { -public: +{ + return ctx.Register(new TBuilderProxy<TEvPQ::TEvPartitionStatusResponse, + NKikimrPQ::TStatusResponse, + TEvPersQueue::TEvStatusResponse>(tabletId, sender, count)); +} + +/******************************************************* MonitoringProxy *********************************************************/ + + +class TMonitoringProxy : public TActorBootstrapped<TMonitoringProxy> { +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PERSQUEUE_MON_ACTOR; } TMonitoringProxy(const TActorId& sender, const TString& query, const TMap<ui32, TActorId>& partitions, const TActorId& cache, - const TString& topicName, ui64 tabletId, ui32 inflight) - : Sender(sender) - , Query(query) - , Partitions(partitions) - , Cache(cache) - , TotalRequests(partitions.size() + 1) - , TotalResponses(0) + const TString& topicName, ui64 tabletId, ui32 inflight) + : Sender(sender) + , Query(query) + , Partitions(partitions) + , Cache(cache) + , TotalRequests(partitions.size() + 1) + , TotalResponses(0) , TopicName(topicName) , TabletID(tabletId) - , Inflight(inflight) - { - for (auto& p: Partitions) { - Results[p.first].push_back(Sprintf("Partition %u: NO DATA", p.first)); - } - } - - void Bootstrap(const TActorContext& ctx) - { - Become(&TThis::StateFunc); - ctx.Send(Cache, new TEvPQ::TEvMonRequest(Sender, Query)); - for (auto& p : Partitions) { - ctx.Send(p.second, new TEvPQ::TEvMonRequest(Sender, Query)); - } - ctx.Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup()); - } - -private: - - void Reply(const TActorContext& ctx) { - TStringStream str; - ui32 mx = 0; - for (auto& r: Results) mx = Max<ui32>(mx, r.second.size()); - + , Inflight(inflight) + { + for (auto& p: Partitions) { + Results[p.first].push_back(Sprintf("Partition %u: NO DATA", p.first)); + } + } + + void Bootstrap(const TActorContext& ctx) + { + Become(&TThis::StateFunc); + ctx.Send(Cache, new TEvPQ::TEvMonRequest(Sender, Query)); + for (auto& p : Partitions) { + ctx.Send(p.second, new TEvPQ::TEvMonRequest(Sender, Query)); + } + ctx.Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup()); + } + +private: + + void Reply(const TActorContext& ctx) { + TStringStream str; + ui32 mx = 0; + for (auto& r: Results) mx = Max<ui32>(mx, r.second.size()); + HTML(str) { H2() {str << "PersQueue Tablet";} H3() {str << "Topic: " << TopicName;} - H4() {str << "inflight: " << Inflight;} + H4() {str << "inflight: " << Inflight;} UL_CLASS("nav nav-tabs") { LI_CLASS("active") { - str << "<a href=\"#main\" data-toggle=\"tab\">main</a>"; + str << "<a href=\"#main\" data-toggle=\"tab\">main</a>"; } LI() { - str << "<a href=\"#cache\" data-toggle=\"tab\">cache</a>"; + str << "<a href=\"#cache\" data-toggle=\"tab\">cache</a>"; } - for (auto& r: Results) { + for (auto& r: Results) { LI() { - str << "<a href=\"#partition_" << r.first << "\" data-toggle=\"tab\">" << r.first << "</a>"; + str << "<a href=\"#partition_" << r.first << "\" data-toggle=\"tab\">" << r.first << "</a>"; } - } + } } DIV_CLASS("tab-content") { DIV_CLASS_ID("tab-pane fade in active", "main") { TABLE() { - for (ui32 i = 0; i < mx; ++i) { + for (ui32 i = 0; i < mx; ++i) { TABLER() { - for (auto& r : Results) { + for (auto& r : Results) { TABLED() { - if (r.second.size() > i) - str << r.second[i]; + if (r.second.size() > i) + str << r.second[i]; } - } + } } - } + } } } - for (auto& s: Str) { - str << s; - } + for (auto& s: Str) { + str << s; + } } H3() {str << "<a href=\"app?TabletID=" << TabletID << "&kv=1\">KV-tablet internals</a>";} } - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Answer TEvRemoteHttpInfoRes: to " << Sender << " self " << ctx.SelfID); - ctx.Send(Sender, new NMon::TEvRemoteHttpInfoRes(str.Str())); - Die(ctx); - } - - void Wakeup(const TActorContext& ctx) { - Reply(ctx); - } - - void Handle(TEvPQ::TEvMonResponse::TPtr& ev, const TActorContext& ctx) - { - if (ev->Get()->Partition != Max<ui32>()) { - Results[ev->Get()->Partition] = ev->Get()->Res; - } else { - Y_VERIFY(ev->Get()->Partition == Max<ui32>()); - } - Str.push_back(ev->Get()->Str); - if(++TotalResponses == TotalRequests) { - Reply(ctx); - } - } - - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - CFunc(TEvents::TSystem::Wakeup, Wakeup); - HFunc(TEvPQ::TEvMonResponse, Handle); - default: - break; - }; - } - + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Answer TEvRemoteHttpInfoRes: to " << Sender << " self " << ctx.SelfID); + ctx.Send(Sender, new NMon::TEvRemoteHttpInfoRes(str.Str())); + Die(ctx); + } + + void Wakeup(const TActorContext& ctx) { + Reply(ctx); + } + + void Handle(TEvPQ::TEvMonResponse::TPtr& ev, const TActorContext& ctx) + { + if (ev->Get()->Partition != Max<ui32>()) { + Results[ev->Get()->Partition] = ev->Get()->Res; + } else { + Y_VERIFY(ev->Get()->Partition == Max<ui32>()); + } + Str.push_back(ev->Get()->Str); + if(++TotalResponses == TotalRequests) { + Reply(ctx); + } + } + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TSystem::Wakeup, Wakeup); + HFunc(TEvPQ::TEvMonResponse, Handle); + default: + break; + }; + } + TActorId Sender; TString Query; TMap<ui32, TVector<TString>> Results; TVector<TString> Str; TMap<ui32, TActorId> Partitions; TActorId Cache; - ui32 TotalRequests; - ui32 TotalResponses; + ui32 TotalRequests; + ui32 TotalResponses; TString TopicName; ui64 TabletID; - ui32 Inflight; -}; - - -/******************************************************* TPersQueue *********************************************************/ - -void TPersQueue::ReplyError(const TActorContext& ctx, const ui64 responseCookie, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error) + ui32 Inflight; +}; + + +/******************************************************* TPersQueue *********************************************************/ + +void TPersQueue::ReplyError(const TActorContext& ctx, const ui64 responseCookie, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error) { ReplyPersQueueError( ctx.SelfID, ctx, TabletID(), TopicName, Nothing(), *Counters, NKikimrServices::PERSQUEUE, responseCookie, errorCode, error ); } - -void TPersQueue::FillMeteringParams(const TActorContext& ctx) -{ - Y_UNUSED(ctx); - ResourceId = Config.GetTopicPath(); - - TStringBuf buf = Config.GetTopicPath(); - TStringBuf stream; - auto res = buf.AfterPrefix(Config.GetYdbDatabasePath() + "/", stream); - if (res) { - StreamName = stream; - } else { - StreamName = buf; - } - -} - -void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) -{ + +void TPersQueue::FillMeteringParams(const TActorContext& ctx) +{ + Y_UNUSED(ctx); + ResourceId = Config.GetTopicPath(); + + TStringBuf buf = Config.GetTopicPath(); + TStringBuf stream; + auto res = buf.AfterPrefix(Config.GetYdbDatabasePath() + "/", stream); + if (res) { + StreamName = stream; + } else { + StreamName = buf; + } + +} + +void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) +{ THashSet<ui32> was; if (NewConfig.PartitionsSize()) { for (const auto& partition : NewConfig.GetPartitions()) { @@ -571,18 +571,18 @@ void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) for (const auto partitionId : NewConfig.GetPartitionIds()) { was.insert(partitionId); } - } + } for (const auto& partition : Config.GetPartitions()) { Y_VERIFY_S(was.contains(partition.GetPartitionId()), "New config is bad, missing partition " << partition.GetPartitionId()); - } - - Y_VERIFY(ConfigInited && PartitionsInited == Partitions.size()); //in order to answer only after all parts are ready to work - - FlushMetrics(true, ctx); - - Config = NewConfig; - - FillMeteringParams(ctx); + } + + Y_VERIFY(ConfigInited && PartitionsInited == Partitions.size()); //in order to answer only after all parts are ready to work + + FlushMetrics(true, ctx); + + Config = NewConfig; + + FillMeteringParams(ctx); if (!Config.PartitionsSize()) { for (const auto partitionId : Config.GetPartitionIds()) { @@ -593,11 +593,11 @@ void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) ui32 cacheSize = CACHE_SIZE; if (Config.HasCacheSize()) cacheSize = Config.GetCacheSize(); - + if (TopicName.empty()) { // it's the first time TopicName = Config.GetTopicName(); TopicPath = Config.GetTopicPath(); - LocalDC = Config.GetLocalDC(); + LocalDC = Config.GetLocalDC(); KeySchema.clear(); KeySchema.reserve(Config.PartitionKeySchemaSize()); @@ -612,9 +612,9 @@ void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) ctx.Send(CacheActor, new TEvPQ::TEvChangeCacheConfig(cacheSize)); } - for (auto& p : Partitions) { //change config for already created partitions - ctx.Send(p.second.Actor, new TEvPQ::TEvChangeConfig(TopicName, Config)); - } + for (auto& p : Partitions) { //change config for already created partitions + ctx.Send(p.second.Actor, new TEvPQ::TEvChangeConfig(TopicName, Config)); + } for (const auto& partition : Config.GetPartitions()) { const auto partitionId = partition.GetPartitionId(); if (Partitions.find(partitionId) == Partitions.end()) { @@ -626,56 +626,56 @@ void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) )); // InitCompleted is true because this partition is empty - ++PartitionsInited; //newly created partition is empty and ready to work - } - } - for (auto& p : ChangeConfigNotification) { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() - << " Config applied version " << Config.GetVersion() << " actor " << p.Actor - << " txId " << p.TxId << " config:\n" << Config.DebugString()); - - THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; - res->Record.SetStatus(NKikimrPQ::OK); - res->Record.SetTxId(p.TxId); - res->Record.SetOrigin(TabletID()); - ctx.Send(p.Actor, res.Release()); - } - ChangeConfigNotification.clear(); - NewConfigShouldBeApplied = false; - NewConfig.Clear(); - -} - + ++PartitionsInited; //newly created partition is empty and ready to work + } + } + for (auto& p : ChangeConfigNotification) { + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() + << " Config applied version " << Config.GetVersion() << " actor " << p.Actor + << " txId " << p.TxId << " config:\n" << Config.DebugString()); + + THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; + res->Record.SetStatus(NKikimrPQ::OK); + res->Record.SetTxId(p.TxId); + res->Record.SetOrigin(TabletID()); + ctx.Send(p.Actor, res.Release()); + } + ChangeConfigNotification.clear(); + NewConfigShouldBeApplied = false; + NewConfig.Clear(); + +} + void TPersQueue::HandleConfigWriteResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx) -{ - if (resp.GetStatus() != NMsgBusProxy::MSTATUS_OK || - resp.WriteResultSize() != 1 || - resp.GetWriteResult(0).GetStatus() != NKikimrProto::OK) - { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() - << " Config write error: " << resp.DebugString() << " " << ctx.SelfID); - ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); - return; - } - - Y_VERIFY(resp.WriteResultSize() == 1); - Y_VERIFY(resp.GetWriteResult(0).GetStatus() == NKikimrProto::OK); - if (ConfigInited && PartitionsInited == Partitions.size()) //all partitions are working well - can apply new config - ApplyNewConfigAndReply(ctx); - else - NewConfigShouldBeApplied = true; //when config will be inited with old value new config will be applied -} +{ + if (resp.GetStatus() != NMsgBusProxy::MSTATUS_OK || + resp.WriteResultSize() != 1 || + resp.GetWriteResult(0).GetStatus() != NKikimrProto::OK) + { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() + << " Config write error: " << resp.DebugString() << " " << ctx.SelfID); + ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); + return; + } + + Y_VERIFY(resp.WriteResultSize() == 1); + Y_VERIFY(resp.GetWriteResult(0).GetStatus() == NKikimrProto::OK); + if (ConfigInited && PartitionsInited == Partitions.size()) //all partitions are working well - can apply new config + ApplyNewConfigAndReply(ctx); + else + NewConfigShouldBeApplied = true; //when config will be inited with old value new config will be applied +} void TPersQueue::HandleConfigReadResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx) -{ - bool ok = (resp.GetStatus() == NMsgBusProxy::MSTATUS_OK) && (resp.ReadResultSize() == 2) && (resp.HasSetExecutorFastLogPolicyResult()) && - (resp.GetSetExecutorFastLogPolicyResult().GetStatus() == NKikimrProto::OK); +{ + bool ok = (resp.GetStatus() == NMsgBusProxy::MSTATUS_OK) && (resp.ReadResultSize() == 2) && (resp.HasSetExecutorFastLogPolicyResult()) && + (resp.GetSetExecutorFastLogPolicyResult().GetStatus() == NKikimrProto::OK); if (!ok) { LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Config read error: " << resp.DebugString() << " " << ctx.SelfID); - ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); - return; - } + ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); + return; + } ReadConfig(resp.GetReadResult(0), ctx); ReadState(resp.GetReadResult(1), ctx); @@ -690,12 +690,12 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& return; } - Y_VERIFY(!ConfigInited); - Y_VERIFY(read.HasStatus()); + Y_VERIFY(!ConfigInited); + Y_VERIFY(read.HasStatus()); - if (read.GetStatus() == NKikimrProto::OK) { - bool res = Config.ParseFromString(read.GetValue()); - Y_VERIFY(res); + if (read.GetStatus() == NKikimrProto::OK) { + bool res = Config.ParseFromString(read.GetValue()); + Y_VERIFY(res); if (!Config.PartitionsSize()) { for (const auto partitionId : Config.GetPartitionIds()) { @@ -704,7 +704,7 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& } TopicName = Config.GetTopicName(); - LocalDC = Config.GetLocalDC(); + LocalDC = Config.GetLocalDC(); KeySchema.clear(); KeySchema.reserve(Config.PartitionKeySchemaSize()); @@ -718,11 +718,11 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& Y_VERIFY(TopicName.size(), "Need topic name here"); CacheActor = ctx.Register(new TPQCacheProxy(ctx.SelfID, TopicName, cacheSize)); - } else if (read.GetStatus() == NKikimrProto::NODATA) { + } else if (read.GetStatus() == NKikimrProto::NODATA) { LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " no config, start with empty partitions and default config"); - } else { - Y_FAIL("Unexpected config read status: %d", read.GetStatus()); - } + } else { + Y_FAIL("Unexpected config read status: %d", read.GetStatus()); + } for (const auto& partition : Config.GetPartitions()) { // no partitions will be created with empty config const auto partitionId = partition.GetPartitionId(); @@ -732,37 +732,37 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& false, *Counters )); - } - ConfigInited = true; - - auto now = ctx.Now(); - ShardsMetricsLastFlush = now; - RequestsMetricsLastFlush = now; - - FillMeteringParams(ctx); - - Y_VERIFY(!NewConfigShouldBeApplied); - for ( auto& req : UpdateConfigRequests) { - ProcessUpdateConfigRequest(req.first, req.second, ctx); - } - UpdateConfigRequests.clear(); - - for (auto& req : HasDataRequests) { - auto it = Partitions.find(req->Record.GetPartition()); - if (it != Partitions.end()) { - if (now.MilliSeconds() < req->Record.GetDeadline()) { //otherwise there is no need to send event - proxy will generate event itself - ctx.Send(it->second.Actor, req.Release()); - } - } - } - HasDataRequests.clear(); - -} - + } + ConfigInited = true; + + auto now = ctx.Now(); + ShardsMetricsLastFlush = now; + RequestsMetricsLastFlush = now; + + FillMeteringParams(ctx); + + Y_VERIFY(!NewConfigShouldBeApplied); + for ( auto& req : UpdateConfigRequests) { + ProcessUpdateConfigRequest(req.first, req.second, ctx); + } + UpdateConfigRequests.clear(); + + for (auto& req : HasDataRequests) { + auto it = Partitions.find(req->Record.GetPartition()); + if (it != Partitions.end()) { + if (now.MilliSeconds() < req->Record.GetDeadline()) { //otherwise there is no need to send event - proxy will generate event itself + ctx.Send(it->second.Actor, req.Release()); + } + } + } + HasDataRequests.clear(); + +} + void TPersQueue::ReadState(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx) { Y_UNUSED(ctx); - + if (read.GetStatus() == NKikimrProto::OK) { NKikimrPQ::TTabletState stateProto; bool ok = stateProto.ParseFromString(read.GetValue()); @@ -811,17 +811,17 @@ void TPersQueue::HandleStateWriteResponse(const NKikimrClient::TResponse& resp, ReturnTabletStateAll(ctx); } -void TPersQueue::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx) -{ - auto& resp = ev->Get()->Record; - +void TPersQueue::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx) +{ + auto& resp = ev->Get()->Record; + switch (resp.GetCookie()) { case WRITE_CONFIG_COOKIE: - HandleConfigWriteResponse(resp, ctx); + HandleConfigWriteResponse(resp, ctx); break; case READ_CONFIG_COOKIE: - // read is only for config - is signal to create interal actors - HandleConfigReadResponse(resp, ctx); + // read is only for config - is signal to create interal actors + HandleConfigReadResponse(resp, ctx); break; case WRITE_STATE_COOKIE: HandleStateWriteResponse(resp, ctx); @@ -830,106 +830,106 @@ void TPersQueue::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Unexpected KV response: " << ev->Get()->ToString() << " " << ctx.SelfID); ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); - } -} - + } +} + void TPersQueue::SetCacheCounters(TEvPQ::TEvTabletCacheCounters::TCacheCounters& cacheCounters) { Counters->Simple()[COUNTER_PQ_TABLET_CACHE_SIZE] = cacheCounters.CacheSizeBytes; Counters->Simple()[COUNTER_PQ_TABLET_CACHE_COUNT] = cacheCounters.CacheSizeBlobs; Counters->Simple()[COUNTER_PQ_TABLET_CACHED_ON_READ] = cacheCounters.CachedOnRead; Counters->Simple()[COUNTER_PQ_TABLET_CACHED_ON_WRATE] = cacheCounters.CachedOnWrite; - Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size(); + Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size(); } -void TPersQueue::Handle(TEvPQ::TEvPartitionCounters::TPtr& ev, const TActorContext& ctx) -{ - auto it = Partitions.find(ev->Get()->Partition); - Y_VERIFY(it != Partitions.end()); - auto diff = ev->Get()->Counters.MakeDiffForAggr(it->second.Baseline); - ui64 cpuUsage = diff->Cumulative()[COUNTER_PQ_TABLET_CPU_USAGE].Get(); +void TPersQueue::Handle(TEvPQ::TEvPartitionCounters::TPtr& ev, const TActorContext& ctx) +{ + auto it = Partitions.find(ev->Get()->Partition); + Y_VERIFY(it != Partitions.end()); + auto diff = ev->Get()->Counters.MakeDiffForAggr(it->second.Baseline); + ui64 cpuUsage = diff->Cumulative()[COUNTER_PQ_TABLET_CPU_USAGE].Get(); ui64 networkBytesUsage = diff->Cumulative()[COUNTER_PQ_TABLET_NETWORK_BYTES_USAGE].Get(); if (ResourceMetrics) { if (cpuUsage > 0) { - ResourceMetrics->CPU.Increment(cpuUsage); + ResourceMetrics->CPU.Increment(cpuUsage); } if (networkBytesUsage > 0) { ResourceMetrics->Network.Increment(networkBytesUsage); } if (cpuUsage > 0 || networkBytesUsage > 0) { - ResourceMetrics->TryUpdate(ctx); + ResourceMetrics->TryUpdate(ctx); } - } + } - Counters->Populate(*diff.Get()); - ev->Get()->Counters.RememberCurrentStateAsBaseline(it->second.Baseline); + Counters->Populate(*diff.Get()); + ev->Get()->Counters.RememberCurrentStateAsBaseline(it->second.Baseline); // restore cache's simple counters cleaned by partition's counters SetCacheCounters(CacheCounters); - ui64 reservedSize = 0; - for (auto& p : Partitions) { - if (p.second.Baseline.Simple().Size() > 0) //there could be no counters from this partition yet - reservedSize += p.second.Baseline.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Get(); - } - Counters->Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(reservedSize); -} - - + ui64 reservedSize = 0; + for (auto& p : Partitions) { + if (p.second.Baseline.Simple().Size() > 0) //there could be no counters from this partition yet + reservedSize += p.second.Baseline.Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Get(); + } + Counters->Simple()[COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE].Set(reservedSize); +} + + void TPersQueue::AggregateAndSendLabeledCountersFor(const TString& group, const TActorContext& ctx) -{ - if (CounterEventsInflight[group].RefCount() <= 1) { - if (CounterEventsInflight[group].RefCount() == 0) { - CounterEventsInflight[group] = new TEvTabletCounters::TInFlightCookie; - } - - TAutoPtr<TTabletLabeledCountersBase> aggr(new TTabletLabeledCountersBase); - for (auto& p : Partitions) { - auto it = p.second.LabeledCounters.find(group); - if (it != p.second.LabeledCounters.end()) { - aggr->AggregateWith(it->second); - if (it->second.GetDrop()) { - p.second.LabeledCounters.erase(it); - } - } - } - - Y_VERIFY(aggr->HasCounters()); - +{ + if (CounterEventsInflight[group].RefCount() <= 1) { + if (CounterEventsInflight[group].RefCount() == 0) { + CounterEventsInflight[group] = new TEvTabletCounters::TInFlightCookie; + } + + TAutoPtr<TTabletLabeledCountersBase> aggr(new TTabletLabeledCountersBase); + for (auto& p : Partitions) { + auto it = p.second.LabeledCounters.find(group); + if (it != p.second.LabeledCounters.end()) { + aggr->AggregateWith(it->second); + if (it->second.GetDrop()) { + p.second.LabeledCounters.erase(it); + } + } + } + + Y_VERIFY(aggr->HasCounters()); + TActorId countersAggregator = MakeTabletCountersAggregatorID(ctx.SelfID.NodeId()); - ctx.Send(countersAggregator, new TEvTabletCounters::TEvTabletAddLabeledCounters( - CounterEventsInflight[group], TabletID(), TTabletTypes::PERSQUEUE, aggr)); - } -} - - -void TPersQueue::Handle(TEvPQ::TEvPartitionLabeledCounters::TPtr& ev, const TActorContext& ctx) -{ - auto it = Partitions.find(ev->Get()->Partition); - Y_VERIFY(it != Partitions.end()); + ctx.Send(countersAggregator, new TEvTabletCounters::TEvTabletAddLabeledCounters( + CounterEventsInflight[group], TabletID(), TTabletTypes::PERSQUEUE, aggr)); + } +} + + +void TPersQueue::Handle(TEvPQ::TEvPartitionLabeledCounters::TPtr& ev, const TActorContext& ctx) +{ + auto it = Partitions.find(ev->Get()->Partition); + Y_VERIFY(it != Partitions.end()); const TString& group = ev->Get()->LabeledCounters.GetGroup(); - it->second.LabeledCounters[group] = ev->Get()->LabeledCounters; - Y_UNUSED(ctx); -// if uncommented, all changes will be reported immediatly -// AggregateAndSendLabeledCountersFor(group, ctx); -} - -void TPersQueue::Handle(TEvPQ::TEvPartitionLabeledCountersDrop::TPtr& ev, const TActorContext& ctx) -{ - auto it = Partitions.find(ev->Get()->Partition); - Y_VERIFY(it != Partitions.end()); - const TString& group = ev->Get()->Group; - auto jt = it->second.LabeledCounters.find(group); - if (jt != it->second.LabeledCounters.end()) - jt->second.SetDrop(); - Y_UNUSED(ctx); -// if uncommented, all changes will be reported immediatly -// AggregateAndSendLabeledCountersFor(group, ctx); - -} - - - + it->second.LabeledCounters[group] = ev->Get()->LabeledCounters; + Y_UNUSED(ctx); +// if uncommented, all changes will be reported immediatly +// AggregateAndSendLabeledCountersFor(group, ctx); +} + +void TPersQueue::Handle(TEvPQ::TEvPartitionLabeledCountersDrop::TPtr& ev, const TActorContext& ctx) +{ + auto it = Partitions.find(ev->Get()->Partition); + Y_VERIFY(it != Partitions.end()); + const TString& group = ev->Get()->Group; + auto jt = it->second.LabeledCounters.find(group); + if (jt != it->second.LabeledCounters.end()) + jt->second.SetDrop(); + Y_UNUSED(ctx); +// if uncommented, all changes will be reported immediatly +// AggregateAndSendLabeledCountersFor(group, ctx); + +} + + + void TPersQueue::Handle(TEvPQ::TEvTabletCacheCounters::TPtr& ev, const TActorContext& ctx) { CacheCounters = ev->Get()->Counters; @@ -940,125 +940,125 @@ void TPersQueue::Handle(TEvPQ::TEvTabletCacheCounters::TPtr& ev, const TActorCon } -void TPersQueue::Handle(TEvPQ::TEvInitComplete::TPtr& ev, const TActorContext& ctx) -{ - auto it = Partitions.find(ev->Get()->Partition); - Y_VERIFY(it != Partitions.end()); - Y_VERIFY(!it->second.InitDone); - it->second.InitDone = true; - ++PartitionsInited; - Y_VERIFY(ConfigInited);//partitions are inited only after config - if (NewConfigShouldBeApplied && PartitionsInited == Partitions.size()) { - ApplyNewConfigAndReply(ctx); - } -} - - -void TPersQueue::Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext& ctx) -{ - - auto it = ResponseProxy.find(ev->Get()->Cookie); - if (it == ResponseProxy.end()) - return; - bool res = it->second->HandleError(ev->Get(), ctx); - if (res) { - FinishResponse(it); - } -} - -void TPersQueue::Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& ctx) -{ - - auto it = ResponseProxy.find(ev->Get()->Cookie); - if (it == ResponseProxy.end()) { //response for already closed proxy - return; - } - bool res = it->second->HandleProxyResponse(ev, ctx); - if (res) { - FinishResponse(it); - } -} - - +void TPersQueue::Handle(TEvPQ::TEvInitComplete::TPtr& ev, const TActorContext& ctx) +{ + auto it = Partitions.find(ev->Get()->Partition); + Y_VERIFY(it != Partitions.end()); + Y_VERIFY(!it->second.InitDone); + it->second.InitDone = true; + ++PartitionsInited; + Y_VERIFY(ConfigInited);//partitions are inited only after config + if (NewConfigShouldBeApplied && PartitionsInited == Partitions.size()) { + ApplyNewConfigAndReply(ctx); + } +} + + +void TPersQueue::Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext& ctx) +{ + + auto it = ResponseProxy.find(ev->Get()->Cookie); + if (it == ResponseProxy.end()) + return; + bool res = it->second->HandleError(ev->Get(), ctx); + if (res) { + FinishResponse(it); + } +} + +void TPersQueue::Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& ctx) +{ + + auto it = ResponseProxy.find(ev->Get()->Cookie); + if (it == ResponseProxy.end()) { //response for already closed proxy + return; + } + bool res = it->second->HandleProxyResponse(ev, ctx); + if (res) { + FinishResponse(it); + } +} + + void TPersQueue::FinishResponse(THashMap<ui64, TAutoPtr<TResponseBuilder>>::iterator it) -{ - // ctx.Send(Tablet, new TEvPQ::TEvCompleteResponse(Sender, CounterId, , Response.Release())); - Counters->Percentile()[it->second->CounterId].IncrementFor((TAppData::TimeProvider->Now() - it->second->Timestamp).MilliSeconds()); - ResponseProxy.erase(it); - Counters->Simple()[COUNTER_PQ_TABLET_INFLIGHT].Set(ResponseProxy.size()); -} - - -void TPersQueue::Handle(TEvPersQueue::TEvUpdateConfig::TPtr& ev, const TActorContext& ctx) -{ - if (!ConfigInited) { - UpdateConfigRequests.emplace_back(ev->Release(), ev->Sender); - return; - } - ProcessUpdateConfigRequest(ev->Release(), ev->Sender, ctx); -} - - +{ + // ctx.Send(Tablet, new TEvPQ::TEvCompleteResponse(Sender, CounterId, , Response.Release())); + Counters->Percentile()[it->second->CounterId].IncrementFor((TAppData::TimeProvider->Now() - it->second->Timestamp).MilliSeconds()); + ResponseProxy.erase(it); + Counters->Simple()[COUNTER_PQ_TABLET_INFLIGHT].Set(ResponseProxy.size()); +} + + +void TPersQueue::Handle(TEvPersQueue::TEvUpdateConfig::TPtr& ev, const TActorContext& ctx) +{ + if (!ConfigInited) { + UpdateConfigRequests.emplace_back(ev->Release(), ev->Sender); + return; + } + ProcessUpdateConfigRequest(ev->Release(), ev->Sender, ctx); +} + + void TPersQueue::ProcessUpdateConfigRequest(TAutoPtr<TEvPersQueue::TEvUpdateConfig> ev, const TActorId& sender, const TActorContext& ctx) -{ - auto& record = ev->Record; - - int oldConfigVersion = Config.HasVersion() ? Config.GetVersion() : -1; - int newConfigVersion = NewConfig.HasVersion() ? NewConfig.GetVersion() : oldConfigVersion; - - Y_VERIFY(newConfigVersion >= oldConfigVersion); - - NKikimrPQ::TPQTabletConfig cfg = record.GetTabletConfig(); - - Y_VERIFY(cfg.HasVersion()); - int curConfigVersion = cfg.GetVersion(); - - if (curConfigVersion == oldConfigVersion) { //already applied - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() - << " Config already applied version " << Config.GetVersion() << " actor " << sender - << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); - - THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; - res->Record.SetStatus(NKikimrPQ::OK); - res->Record.SetTxId(record.GetTxId()); - res->Record.SetOrigin(TabletID()); - ctx.Send(sender, res.Release()); - return; - } - if (curConfigVersion < newConfigVersion) { //Version must increase - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() - << " Config has too small version " << curConfigVersion << " actual " << newConfigVersion << " actor " << sender - << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); - - THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; - res->Record.SetStatus(NKikimrPQ::ERROR_BAD_VERSION); - res->Record.SetTxId(record.GetTxId()); - res->Record.SetOrigin(TabletID()); - ctx.Send(sender, res.Release()); - return; - } - if (curConfigVersion == newConfigVersion) { //nothing to change, will be answered on cfg write from prev step +{ + auto& record = ev->Record; + + int oldConfigVersion = Config.HasVersion() ? Config.GetVersion() : -1; + int newConfigVersion = NewConfig.HasVersion() ? NewConfig.GetVersion() : oldConfigVersion; + + Y_VERIFY(newConfigVersion >= oldConfigVersion); + + NKikimrPQ::TPQTabletConfig cfg = record.GetTabletConfig(); + + Y_VERIFY(cfg.HasVersion()); + int curConfigVersion = cfg.GetVersion(); + + if (curConfigVersion == oldConfigVersion) { //already applied LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() - << " Config update version " << Config.GetVersion() << " is already in progress actor " << sender + << " Config already applied version " << Config.GetVersion() << " actor " << sender + << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); + + THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; + res->Record.SetStatus(NKikimrPQ::OK); + res->Record.SetTxId(record.GetTxId()); + res->Record.SetOrigin(TabletID()); + ctx.Send(sender, res.Release()); + return; + } + if (curConfigVersion < newConfigVersion) { //Version must increase + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() + << " Config has too small version " << curConfigVersion << " actual " << newConfigVersion << " actor " << sender + << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); + + THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; + res->Record.SetStatus(NKikimrPQ::ERROR_BAD_VERSION); + res->Record.SetTxId(record.GetTxId()); + res->Record.SetOrigin(TabletID()); + ctx.Send(sender, res.Release()); + return; + } + if (curConfigVersion == newConfigVersion) { //nothing to change, will be answered on cfg write from prev step + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() + << " Config update version " << Config.GetVersion() << " is already in progress actor " << sender + << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); + ChangeConfigNotification.insert(TChangeNotification(sender, record.GetTxId())); + return; + } + + if (curConfigVersion > newConfigVersion && NewConfig.HasVersion()) { //already in progress with older version + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() + << " Config version " << Config.GetVersion() << " is too big, applying right now version " << newConfigVersion + << " actor " << sender << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); - ChangeConfigNotification.insert(TChangeNotification(sender, record.GetTxId())); - return; - } - - if (curConfigVersion > newConfigVersion && NewConfig.HasVersion()) { //already in progress with older version - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() - << " Config version " << Config.GetVersion() << " is too big, applying right now version " << newConfigVersion - << " actor " << sender - << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); - - THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; - res->Record.SetStatus(NKikimrPQ::ERROR_UPDATE_IN_PROGRESS); - res->Record.SetTxId(record.GetTxId()); - res->Record.SetOrigin(TabletID()); - ctx.Send(sender, res.Release()); - return; - } - + + THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; + res->Record.SetStatus(NKikimrPQ::ERROR_UPDATE_IN_PROGRESS); + res->Record.SetTxId(record.GetTxId()); + res->Record.SetOrigin(TabletID()); + ctx.Send(sender, res.Release()); + return; + } + const auto& bootstrapCfg = record.GetBootstrapConfig(); if (bootstrapCfg.ExplicitMessageGroupsSize() && !AppData(ctx)->PQConfig.GetEnableProtoSourceIdInfo()) { @@ -1101,52 +1101,52 @@ void TPersQueue::ProcessUpdateConfigRequest(TAutoPtr<TEvPersQueue::TEvUpdateConf } } - ChangeConfigNotification.insert(TChangeNotification(sender, record.GetTxId())); - - if (!cfg.HasPartitionConfig()) - cfg.MutablePartitionConfig()->CopyFrom(Config.GetPartitionConfig()); - if (!cfg.HasCacheSize() && Config.HasCacheSize()) //if not set and it is alter - preserve old cache size - cfg.SetCacheSize(Config.GetCacheSize()); - - // set rr generation for provided read rules - { - THashMap<TString, std::pair<ui64, ui64>> existed; // map name -> rrVersion, rrGeneration - for (ui32 i = 0; i < Config.ReadRulesSize(); ++i) { - auto version = i < Config.ReadRuleVersionsSize() ? Config.GetReadRuleVersions(i) : 0; - auto generation = i < Config.ReadRuleGenerationsSize() ? Config.GetReadRuleGenerations(i) : 0; - existed[Config.GetReadRules(i)] = std::make_pair(version, generation); - } - for (ui32 i = 0; i < cfg.ReadRulesSize(); ++i) { - auto version = i < cfg.ReadRuleVersionsSize() ? cfg.GetReadRuleVersions(i) : 0; - auto it = existed.find(cfg.GetReadRules(i)); - ui64 generation = 0; - if (it != existed.end() && it->second.first == version) { - generation = it->second.second; - } else { - generation = curConfigVersion; - } - cfg.AddReadRuleGenerations(generation); - } - } - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() - << " Config update version " << cfg.GetVersion() << "(current " << Config.GetVersion() << ") received from actor " << sender - << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); - + ChangeConfigNotification.insert(TChangeNotification(sender, record.GetTxId())); + + if (!cfg.HasPartitionConfig()) + cfg.MutablePartitionConfig()->CopyFrom(Config.GetPartitionConfig()); + if (!cfg.HasCacheSize() && Config.HasCacheSize()) //if not set and it is alter - preserve old cache size + cfg.SetCacheSize(Config.GetCacheSize()); + + // set rr generation for provided read rules + { + THashMap<TString, std::pair<ui64, ui64>> existed; // map name -> rrVersion, rrGeneration + for (ui32 i = 0; i < Config.ReadRulesSize(); ++i) { + auto version = i < Config.ReadRuleVersionsSize() ? Config.GetReadRuleVersions(i) : 0; + auto generation = i < Config.ReadRuleGenerationsSize() ? Config.GetReadRuleGenerations(i) : 0; + existed[Config.GetReadRules(i)] = std::make_pair(version, generation); + } + for (ui32 i = 0; i < cfg.ReadRulesSize(); ++i) { + auto version = i < cfg.ReadRuleVersionsSize() ? cfg.GetReadRuleVersions(i) : 0; + auto it = existed.find(cfg.GetReadRules(i)); + ui64 generation = 0; + if (it != existed.end() && it->second.first == version) { + generation = it->second.second; + } else { + generation = curConfigVersion; + } + cfg.AddReadRuleGenerations(generation); + } + } + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() + << " Config update version " << cfg.GetVersion() << "(current " << Config.GetVersion() << ") received from actor " << sender + << " txId " << record.GetTxId() << " config:\n" << cfg.DebugString()); + TString str; - - Y_VERIFY(CheckPersQueueConfig(cfg, true, &str), "%s", str.c_str()); - - bool res = cfg.SerializeToString(&str); - Y_VERIFY(res); - TAutoPtr<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); + Y_VERIFY(CheckPersQueueConfig(cfg, true, &str), "%s", str.c_str()); + + bool res = cfg.SerializeToString(&str); + Y_VERIFY(res); + + TAutoPtr<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); request->Record.SetCookie(WRITE_CONFIG_COOKIE); - auto write = request->Record.AddCmdWrite(); + auto write = request->Record.AddCmdWrite(); write->SetKey(KeyConfig()); - write->SetValue(str); - write->SetTactic(AppData(ctx)->PQConfig.GetTactic()); + write->SetValue(str); + write->SetTactic(AppData(ctx)->PQConfig.GetTactic()); TSourceIdWriter sourceIdWriter(ESourceIdFormat::Proto); for (const auto& mg : bootstrapCfg.GetExplicitMessageGroups()) { @@ -1162,11 +1162,11 @@ void TPersQueue::ProcessUpdateConfigRequest(TAutoPtr<TEvPersQueue::TEvUpdateConf } } - NewConfig = cfg; - ctx.Send(ctx.SelfID, request.Release()); -} - - + NewConfig = cfg; + ctx.Send(ctx.SelfID, request.Release()); +} + + void TPersQueue::Handle(TEvPersQueue::TEvDropTablet::TPtr& ev, const TActorContext& ctx) { auto& record = ev->Get()->Record; @@ -1200,51 +1200,51 @@ void TPersQueue::Handle(TEvPersQueue::TEvDropTablet::TPtr& ev, const TActorConte auto kvCmd = kvRequest->Record.AddCmdWrite(); kvCmd->SetKey(KeyState()); kvCmd->SetValue(strState); - kvCmd->SetTactic(AppData(ctx)->PQConfig.GetTactic()); + kvCmd->SetTactic(AppData(ctx)->PQConfig.GetTactic()); ctx.Send(ctx.SelfID, kvRequest.Release()); } -void TPersQueue::Handle(TEvPersQueue::TEvOffsets::TPtr& ev, const TActorContext& ctx) -{ - if (!ConfigInited) { +void TPersQueue::Handle(TEvPersQueue::TEvOffsets::TPtr& ev, const TActorContext& ctx) +{ + if (!ConfigInited) { THolder<TEvPersQueue::TEvOffsetsResponse> res = MakeHolder<TEvPersQueue::TEvOffsetsResponse>(); - auto& resp = res->Record; - resp.SetTabletId(TabletID()); - - ctx.Send(ev->Sender, res.Release()); - return; - } - ui32 cnt = 0; - for (auto& p : Partitions) { - cnt += p.second.InitDone; - } + auto& resp = res->Record; + resp.SetTabletId(TabletID()); + + ctx.Send(ev->Sender, res.Release()); + return; + } + ui32 cnt = 0; + for (auto& p : Partitions) { + cnt += p.second.InitDone; + } TActorId ans = CreateOffsetsProxyActor(TabletID(), ev->Sender, cnt, ctx); - - for (auto& p : Partitions) { - if (!p.second.InitDone) - continue; + + for (auto& p : Partitions) { + if (!p.second.InitDone) + continue; THolder<TEvPQ::TEvPartitionOffsets> event = MakeHolder<TEvPQ::TEvPartitionOffsets>(ans, ev->Get()->Record.HasClientId() ? - ev->Get()->Record.GetClientId() : ""); - ctx.Send(p.second.Actor, event.Release()); - } -} - -void TPersQueue::Handle(TEvPersQueue::TEvHasDataInfo::TPtr& ev, const TActorContext& ctx) -{ - auto& record = ev->Get()->Record; + ev->Get()->Record.GetClientId() : ""); + ctx.Send(p.second.Actor, event.Release()); + } +} + +void TPersQueue::Handle(TEvPersQueue::TEvHasDataInfo::TPtr& ev, const TActorContext& ctx) +{ + auto& record = ev->Get()->Record; ActorIdToProto(ev->Sender, record.MutableSender()); - if (!ConfigInited) { - HasDataRequests.push_back(ev->Release()); - } else { - auto it = Partitions.find(record.GetPartition()); - if (it != Partitions.end()) { - ctx.Send(it->second.Actor, ev->Release().Release()); - } - } -} - - + if (!ConfigInited) { + HasDataRequests.push_back(ev->Release()); + } else { + auto it = Partitions.find(record.GetPartition()); + if (it != Partitions.end()) { + ctx.Send(it->second.Actor, ev->Release().Release()); + } + } +} + + void TPersQueue::Handle(TEvPersQueue::TEvPartitionClientInfo::TPtr& ev, const TActorContext& ctx) { for (auto partition : ev->Get()->Record.GetPartitions()) { auto it = Partitions.find(partition); @@ -1259,160 +1259,160 @@ void TPersQueue::Handle(TEvPersQueue::TEvPartitionClientInfo::TPtr& ev, const TA } -void TPersQueue::Handle(TEvPersQueue::TEvStatus::TPtr& ev, const TActorContext& ctx) -{ - if (!ConfigInited) { +void TPersQueue::Handle(TEvPersQueue::TEvStatus::TPtr& ev, const TActorContext& ctx) +{ + if (!ConfigInited) { THolder<TEvPersQueue::TEvStatusResponse> res = MakeHolder<TEvPersQueue::TEvStatusResponse>(); - auto& resp = res->Record; - resp.SetTabletId(TabletID()); - - ctx.Send(ev->Sender, res.Release()); - return; - } - - ui32 cnt = 0; - for (auto& p : Partitions) { - cnt += p.second.InitDone; - } - + auto& resp = res->Record; + resp.SetTabletId(TabletID()); + + ctx.Send(ev->Sender, res.Release()); + return; + } + + ui32 cnt = 0; + for (auto& p : Partitions) { + cnt += p.second.InitDone; + } + TActorId ans = CreateStatusProxyActor(TabletID(), ev->Sender, cnt, ctx); - for (auto& p : Partitions) { - if (!p.second.InitDone) - continue; + for (auto& p : Partitions) { + if (!p.second.InitDone) + continue; THolder<TEvPQ::TEvPartitionStatus> event = MakeHolder<TEvPQ::TEvPartitionStatus>(ans, ev->Get()->Record.HasClientId() ? ev->Get()->Record.GetClientId() : ""); - ctx.Send(p.second.Actor, event.Release()); - } -} - - -void TPersQueue::InitResponseBuilder(const ui64 responseCookie, const ui32 count, const ui32 counterId) -{ - auto it = ResponseProxy.find(responseCookie); - Y_VERIFY(it != ResponseProxy.end()); - it->second->AddPartialReplyCount(count); - it->second->SetCounterId(counterId); -} - + ctx.Send(p.second.Actor, event.Release()); + } +} + + +void TPersQueue::InitResponseBuilder(const ui64 responseCookie, const ui32 count, const ui32 counterId) +{ + auto it = ResponseProxy.find(responseCookie); + Y_VERIFY(it != ResponseProxy.end()); + it->second->AddPartialReplyCount(count); + it->second->SetCounterId(counterId); +} + void TPersQueue::HandleGetMaxSeqNoRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ - Y_VERIFY(req.HasCmdGetMaxSeqNo()); - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_MAX_SEQ_NO); - const auto& cmd = req.GetCmdGetMaxSeqNo(); +{ + Y_VERIFY(req.HasCmdGetMaxSeqNo()); + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_MAX_SEQ_NO); + const auto& cmd = req.GetCmdGetMaxSeqNo(); TVector<TString> ids; ids.reserve(cmd.SourceIdSize()); - for (ui32 i = 0; i < cmd.SourceIdSize(); ++i) - ids.push_back(cmd.GetSourceId(i)); + for (ui32 i = 0; i < cmd.SourceIdSize(); ++i) + ids.push_back(cmd.GetSourceId(i)); THolder<TEvPQ::TEvGetMaxSeqNoRequest> event = MakeHolder<TEvPQ::TEvGetMaxSeqNoRequest>(responseCookie, ids); - ctx.Send(partActor, event.Release()); -} - + ctx.Send(partActor, event.Release()); +} + void TPersQueue::HandleDeleteSessionRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ - Y_VERIFY(req.HasCmdDeleteSession()); - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_DELETE_SESSION); - const auto& cmd = req.GetCmdDeleteSession(); - - if (!cmd.HasClientId()){ - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, +{ + Y_VERIFY(req.HasCmdDeleteSession()); + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_DELETE_SESSION); + const auto& cmd = req.GetCmdDeleteSession(); + + if (!cmd.HasClientId()){ + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "no clientId in DeleteSession request: " << ToString(req).data()); - } else if (!cmd.HasSessionId()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (!cmd.HasSessionId()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "not sessionId in DeleteSession request: " << ToString(req).data()); - } else { + } else { THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(), - 0, cmd.GetSessionId(), 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION); - ctx.Send(partActor, event.Release()); - } -} - + 0, cmd.GetSessionId(), 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION); + ctx.Send(partActor, event.Release()); + } +} + void TPersQueue::HandleCreateSessionRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ - Y_VERIFY(req.HasCmdCreateSession()); - const auto& cmd = req.GetCmdCreateSession(); - +{ + Y_VERIFY(req.HasCmdCreateSession()); + const auto& cmd = req.GetCmdCreateSession(); + if (!cmd.HasClientId()){ - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "no clientId in CreateSession request: " << ToString(req).data()); } else if (!cmd.HasSessionId()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "not sessionId in CreateSession request: " << ToString(req).data()); - } else if (!cmd.HasGeneration()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (!cmd.HasGeneration()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "not geneartion in CreateSession request: " << ToString(req).data()); - } else if (!cmd.HasStep()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (!cmd.HasStep()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "not step in CreateSession request: " << ToString(req).data()); - } else { - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_CREATE_SESSION); + } else { + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_CREATE_SESSION); THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(), - 0, cmd.GetSessionId(), cmd.GetGeneration(), cmd.GetStep(), TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION); - ctx.Send(partActor, event.Release()); - } -} - + 0, cmd.GetSessionId(), cmd.GetGeneration(), cmd.GetStep(), TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION); + ctx.Send(partActor, event.Release()); + } +} + void TPersQueue::HandleSetClientOffsetRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ - Y_VERIFY(req.HasCmdSetClientOffset()); - const auto& cmd = req.GetCmdSetClientOffset(); - - if (!cmd.HasClientId()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, +{ + Y_VERIFY(req.HasCmdSetClientOffset()); + const auto& cmd = req.GetCmdSetClientOffset(); + + if (!cmd.HasClientId()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "no clientId in SetClientOffset request: " << ToString(req).data()); - } else if (!cmd.HasOffset()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (!cmd.HasOffset()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "no offset in SetClientOffset request: " << ToString(req).data()); - } else if (cmd.GetOffset() < 0) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (cmd.GetOffset() < 0) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "negative offset in SetClientOffset request: " << ToString(req).data()); - } else { - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_SET_OFFSET); + } else { + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_SET_OFFSET); THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(), - cmd.GetOffset(), - cmd.HasSessionId() ? cmd.GetSessionId() : "", 0, 0); - ctx.Send(partActor, event.Release()); - } -} - + cmd.GetOffset(), + cmd.HasSessionId() ? cmd.GetSessionId() : "", 0, 0); + ctx.Send(partActor, event.Release()); + } +} + void TPersQueue::HandleGetClientOffsetRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ - Y_VERIFY(req.HasCmdGetClientOffset()); - const auto& cmd = req.GetCmdGetClientOffset(); - if (!cmd.HasClientId() || cmd.GetClientId().empty()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, +{ + Y_VERIFY(req.HasCmdGetClientOffset()); + const auto& cmd = req.GetCmdGetClientOffset(); + if (!cmd.HasClientId() || cmd.GetClientId().empty()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "no clientId in GetClientOffset request: " << ToString(req).data()); - } else { - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_OFFSET); + } else { + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_OFFSET); THolder<TEvPQ::TEvGetClientOffset> event = MakeHolder<TEvPQ::TEvGetClientOffset>(responseCookie, cmd.GetClientId()); - ctx.Send(partActor, event.Release()); - } -} - + ctx.Send(partActor, event.Release()); + } +} + void TPersQueue::HandleUpdateWriteTimestampRequest(const ui64 responseCookie, const TActorId& partActor, - const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ - Y_VERIFY(req.HasCmdUpdateWriteTimestamp()); - const auto& cmd = req.GetCmdUpdateWriteTimestamp(); - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_OFFSET); + const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) +{ + Y_VERIFY(req.HasCmdUpdateWriteTimestamp()); + const auto& cmd = req.GetCmdUpdateWriteTimestamp(); + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_OFFSET); THolder<TEvPQ::TEvUpdateWriteTimestamp> event = MakeHolder<TEvPQ::TEvUpdateWriteTimestamp>(responseCookie, cmd.GetWriteTimeMS()); - ctx.Send(partActor, event.Release()); -} - + ctx.Send(partActor, event.Release()); +} + void TPersQueue::HandleWriteRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ - Y_VERIFY(req.CmdWriteSize()); +{ + Y_VERIFY(req.CmdWriteSize()); FlushMetrics(false, ctx); // To ensure hours' border; if (req.HasPutUnitsSize()) { CurrentPutUnitsQuantity += req.GetPutUnitsSize(); } - + TVector <TEvPQ::TEvWrite::TMsg> msgs; - + bool mirroredPartition = Config.GetPartitionConfig().HasMirrorFrom(); if (!req.GetIsDirectWrite()) { @@ -1426,239 +1426,239 @@ void TPersQueue::HandleWriteRequest(const ui64 responseCookie, const TActorId& p ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "OwnerCookie must be set for writes"); return; } - } - - if (req.HasCmdWriteOffset() && req.GetCmdWriteOffset() < 0) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "CmdWriteOffset can't be negative"); - return; - } - - for (ui32 i = 0; i < req.CmdWriteSize(); ++i) { - const auto& cmd = req.GetCmdWrite(i); - - if (AppData(ctx)->Counters) { - auto counters = AppData(ctx)->Counters; - TString clientDC = to_lower(cmd.HasClientDC() ? cmd.GetClientDC() : "unknown"); - clientDC.to_title(); - auto it = BytesWrittenFromDC.find(clientDC); - if (it == BytesWrittenFromDC.end()) { - auto pos = TopicName.find("--"); - if (pos != TString::npos) { - auto labels = GetLabels(clientDC, TopicName.substr(pos + 2)); - if (!labels.empty()) { - labels.pop_back(); - } - it = BytesWrittenFromDC.emplace(clientDC, NKikimr::NPQ::TMultiCounter(GetServiceCounters(counters, "pqproxy|writeSession"), + } + + if (req.HasCmdWriteOffset() && req.GetCmdWriteOffset() < 0) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "CmdWriteOffset can't be negative"); + return; + } + + for (ui32 i = 0; i < req.CmdWriteSize(); ++i) { + const auto& cmd = req.GetCmdWrite(i); + + if (AppData(ctx)->Counters) { + auto counters = AppData(ctx)->Counters; + TString clientDC = to_lower(cmd.HasClientDC() ? cmd.GetClientDC() : "unknown"); + clientDC.to_title(); + auto it = BytesWrittenFromDC.find(clientDC); + if (it == BytesWrittenFromDC.end()) { + auto pos = TopicName.find("--"); + if (pos != TString::npos) { + auto labels = GetLabels(clientDC, TopicName.substr(pos + 2)); + if (!labels.empty()) { + labels.pop_back(); + } + it = BytesWrittenFromDC.emplace(clientDC, NKikimr::NPQ::TMultiCounter(GetServiceCounters(counters, "pqproxy|writeSession"), labels, {{"ClientDC", clientDC}}, {"BytesWrittenFromDC"}, true)).first; - } - } - if (it != BytesWrittenFromDC.end()) - it->second.Inc(cmd.ByteSize()); - } - + } + } + if (it != BytesWrittenFromDC.end()) + it->second.Inc(cmd.ByteSize()); + } + TString errorStr = ""; if (!cmd.HasSeqNo() && !req.GetIsDirectWrite()) { - errorStr = "no SeqNo"; - } else if (!cmd.HasData() || cmd.GetData().empty()){ - errorStr = "empty Data"; + errorStr = "no SeqNo"; + } else if (!cmd.HasData() || cmd.GetData().empty()){ + errorStr = "empty Data"; } else if ((!cmd.HasSourceId() || cmd.GetSourceId().empty()) && !req.GetIsDirectWrite()) { - errorStr = "empty SourceId"; + errorStr = "empty SourceId"; } else if (cmd.GetPartitionKey().size() > 256) { errorStr = "too long partition key"; - } else if (cmd.GetSeqNo() < 0) { - errorStr = "SeqNo must be >= 0"; - } else if (cmd.HasPartNo() && (cmd.GetPartNo() < 0 || cmd.GetPartNo() >= Max<ui16>())) { - errorStr = "PartNo must be >= 0 and < 65535"; - } else if (cmd.HasPartNo() != cmd.HasTotalParts()) { - errorStr = "PartNo and TotalParts must be filled together"; - } else if (cmd.HasTotalParts() && (cmd.GetTotalParts() <= cmd.GetPartNo() || cmd.GetTotalParts() <= 1 || cmd.GetTotalParts() > Max<ui16>())) { - errorStr = "TotalParts must be > PartNo and > 1 and < 65536"; - } else if (cmd.HasPartNo() && cmd.GetPartNo() == 0 && !cmd.HasTotalSize()) { - errorStr = "TotalSize must be filled for first part"; - } else if (cmd.HasTotalSize() && static_cast<size_t>(cmd.GetTotalSize()) <= cmd.GetData().size()) { // TotalSize must be > size of each part - errorStr = "TotalSize is incorrect"; + } else if (cmd.GetSeqNo() < 0) { + errorStr = "SeqNo must be >= 0"; + } else if (cmd.HasPartNo() && (cmd.GetPartNo() < 0 || cmd.GetPartNo() >= Max<ui16>())) { + errorStr = "PartNo must be >= 0 and < 65535"; + } else if (cmd.HasPartNo() != cmd.HasTotalParts()) { + errorStr = "PartNo and TotalParts must be filled together"; + } else if (cmd.HasTotalParts() && (cmd.GetTotalParts() <= cmd.GetPartNo() || cmd.GetTotalParts() <= 1 || cmd.GetTotalParts() > Max<ui16>())) { + errorStr = "TotalParts must be > PartNo and > 1 and < 65536"; + } else if (cmd.HasPartNo() && cmd.GetPartNo() == 0 && !cmd.HasTotalSize()) { + errorStr = "TotalSize must be filled for first part"; + } else if (cmd.HasTotalSize() && static_cast<size_t>(cmd.GetTotalSize()) <= cmd.GetData().size()) { // TotalSize must be > size of each part + errorStr = "TotalSize is incorrect"; } else if (cmd.GetSourceId().size() > MAX_SOURCE_ID_LENGTH) { - errorStr = "Too big SourceId"; + errorStr = "Too big SourceId"; } else if (mirroredPartition && !cmd.GetDisableDeduplication()) { errorStr = "Write to mirrored topic is forbiden"; - } + } ui64 createTimestampMs = 0, writeTimestampMs = 0; - if (cmd.HasCreateTimeMS() && cmd.GetCreateTimeMS() >= 0) + if (cmd.HasCreateTimeMS() && cmd.GetCreateTimeMS() >= 0) createTimestampMs = cmd.GetCreateTimeMS(); - if (cmd.HasWriteTimeMS() && cmd.GetWriteTimeMS() > 0) { + if (cmd.HasWriteTimeMS() && cmd.GetWriteTimeMS() > 0) { writeTimestampMs = cmd.GetWriteTimeMS(); - if (!cmd.GetDisableDeduplication()) { - errorStr = "WriteTimestamp avail only without deduplication"; - } - } - - if (!errorStr.empty()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, errorStr); - return; - } - ui32 mSize = MAX_BLOB_PART_SIZE - cmd.GetSourceId().size() - sizeof(ui32) - TClientBlob::OVERHEAD; //megaqc - remove this - Y_VERIFY(mSize > 204800); + if (!cmd.GetDisableDeduplication()) { + errorStr = "WriteTimestamp avail only without deduplication"; + } + } + + if (!errorStr.empty()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, errorStr); + return; + } + ui32 mSize = MAX_BLOB_PART_SIZE - cmd.GetSourceId().size() - sizeof(ui32) - TClientBlob::OVERHEAD; //megaqc - remove this + Y_VERIFY(mSize > 204800); ui64 receiveTimestampMs = TAppData::TimeProvider->Now().MilliSeconds(); bool disableDeduplication = cmd.GetDisableDeduplication(); - if (cmd.GetData().size() > mSize) { - if (cmd.HasPartNo()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, - TStringBuilder() << "Too big message while using PartNo; must be at most " << mSize << ", but got " << cmd.GetData().size()); - return; - } - auto it = ResponseProxy.find(responseCookie); - Y_VERIFY(it != ResponseProxy.end()); - it->second->SetWasSplit(); - ui32 pos = 0; - ui16 partNo = 0; - ui32 totalSize = cmd.GetData().size(); - ui16 totalParts = (totalSize - 1) / mSize + 1; - ui32 diff = 0; - ui32 lastPartSize = (totalSize - 1) % mSize + 1; // mSize for x*mSize , x for x (x < mSize) - ui32 uncompressedSize = cmd.HasUncompressedSize() ? cmd.GetUncompressedSize() : 0; - if (lastPartSize < 100) { //size of first part will be reduced by diff, => size of last part will be increased by diff => = 100 bytes - diff = 100 - lastPartSize; - } - Y_VERIFY(!cmd.HasTotalParts(), "too big part"); //change this verify for errorStr, when LB will be ready - while (pos < totalSize) { + if (cmd.GetData().size() > mSize) { + if (cmd.HasPartNo()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + TStringBuilder() << "Too big message while using PartNo; must be at most " << mSize << ", but got " << cmd.GetData().size()); + return; + } + auto it = ResponseProxy.find(responseCookie); + Y_VERIFY(it != ResponseProxy.end()); + it->second->SetWasSplit(); + ui32 pos = 0; + ui16 partNo = 0; + ui32 totalSize = cmd.GetData().size(); + ui16 totalParts = (totalSize - 1) / mSize + 1; + ui32 diff = 0; + ui32 lastPartSize = (totalSize - 1) % mSize + 1; // mSize for x*mSize , x for x (x < mSize) + ui32 uncompressedSize = cmd.HasUncompressedSize() ? cmd.GetUncompressedSize() : 0; + if (lastPartSize < 100) { //size of first part will be reduced by diff, => size of last part will be increased by diff => = 100 bytes + diff = 100 - lastPartSize; + } + Y_VERIFY(!cmd.HasTotalParts(), "too big part"); //change this verify for errorStr, when LB will be ready + while (pos < totalSize) { TString data = cmd.GetData().substr(pos, mSize - diff); - pos += mSize - diff; - diff = 0; - msgs.push_back({cmd.GetSourceId(), static_cast<ui64>(cmd.GetSeqNo()), partNo, + pos += mSize - diff; + diff = 0; + msgs.push_back({cmd.GetSourceId(), static_cast<ui64>(cmd.GetSeqNo()), partNo, totalParts, totalSize, createTimestampMs, receiveTimestampMs, disableDeduplication, writeTimestampMs, data, uncompressedSize, cmd.GetPartitionKey(), cmd.GetExplicitHash(), cmd.GetExternalOperation() }); - partNo++; - uncompressedSize = 0; - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "got client PART message topic: " << TopicName << " partition: " << req.GetPartition() - << " SourceId: \'" << EscapeC(msgs.back().SourceId) << "\' SeqNo: " - << msgs.back().SeqNo << " partNo : " << msgs.back().PartNo - << " messageNo: " << req.GetMessageNo() << " size: " << data.size()); - } - Y_VERIFY(partNo == totalParts); - } else { - msgs.push_back({cmd.GetSourceId(), static_cast<ui64>(cmd.GetSeqNo()), static_cast<ui16>(cmd.HasPartNo() ? cmd.GetPartNo() : 0), + partNo++; + uncompressedSize = 0; + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "got client PART message topic: " << TopicName << " partition: " << req.GetPartition() + << " SourceId: \'" << EscapeC(msgs.back().SourceId) << "\' SeqNo: " + << msgs.back().SeqNo << " partNo : " << msgs.back().PartNo + << " messageNo: " << req.GetMessageNo() << " size: " << data.size()); + } + Y_VERIFY(partNo == totalParts); + } else { + msgs.push_back({cmd.GetSourceId(), static_cast<ui64>(cmd.GetSeqNo()), static_cast<ui16>(cmd.HasPartNo() ? cmd.GetPartNo() : 0), static_cast<ui16>(cmd.HasPartNo() ? cmd.GetTotalParts() : 1), static_cast<ui32>(cmd.HasTotalSize() ? cmd.GetTotalSize() : cmd.GetData().Size()), createTimestampMs, receiveTimestampMs, disableDeduplication, writeTimestampMs, cmd.GetData(), cmd.HasUncompressedSize() ? cmd.GetUncompressedSize() : 0u, cmd.GetPartitionKey(), cmd.GetExplicitHash(), cmd.GetExternalOperation() }); - } + } LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "got client message topic: " << TopicName << " partition: " << req.GetPartition() << " SourceId: \'" << EscapeC(msgs.back().SourceId) << "\' SeqNo: " << msgs.back().SeqNo << " partNo : " << msgs.back().PartNo << " messageNo: " << req.GetMessageNo() << " size " << msgs.back().Data.size() << " offset: " << (req.HasCmdWriteOffset() ? (req.GetCmdWriteOffset() + i) : -1)); - } - InitResponseBuilder(responseCookie, msgs.size(), COUNTER_LATENCY_PQ_WRITE); + } + InitResponseBuilder(responseCookie, msgs.size(), COUNTER_LATENCY_PQ_WRITE); THolder<TEvPQ::TEvWrite> event = MakeHolder<TEvPQ::TEvWrite>(responseCookie, req.GetMessageNo(), req.HasOwnerCookie() ? req.GetOwnerCookie() : "", req.HasCmdWriteOffset() ? req.GetCmdWriteOffset() : TMaybe<ui64>(), std::move(msgs), req.GetIsDirectWrite()); - ctx.Send(partActor, event.Release()); -} - - + ctx.Send(partActor, event.Release()); +} + + void TPersQueue::HandleReserveBytesRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx, const TActorId& pipeClient, const TActorId&) -{ - Y_VERIFY(req.HasCmdReserveBytes()); - - auto it = PipesInfo.find(pipeClient); - if (it == PipesInfo.end()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::ERROR, - TStringBuilder() << "pipe already dead: " << pipeClient); - return; - } - - if (!req.HasMessageNo()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "MessageNo must be set for ReserveBytes request"); - return; - } - if (!req.HasOwnerCookie()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "OwnerCookie must be set for ReserveBytes request"); - return; - } - - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_RESERVE_BYTES); +{ + Y_VERIFY(req.HasCmdReserveBytes()); + + auto it = PipesInfo.find(pipeClient); + if (it == PipesInfo.end()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::ERROR, + TStringBuilder() << "pipe already dead: " << pipeClient); + return; + } + + if (!req.HasMessageNo()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "MessageNo must be set for ReserveBytes request"); + return; + } + if (!req.HasOwnerCookie()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "OwnerCookie must be set for ReserveBytes request"); + return; + } + + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_RESERVE_BYTES); THolder<TEvPQ::TEvReserveBytes> event = MakeHolder<TEvPQ::TEvReserveBytes>(responseCookie, req.GetCmdReserveBytes().GetSize(), - req.GetOwnerCookie(), req.GetMessageNo(), req.GetCmdReserveBytes().GetLastRequest()); - ctx.Send(partActor, event.Release()); -} - - + req.GetOwnerCookie(), req.GetMessageNo(), req.GetCmdReserveBytes().GetLastRequest()); + ctx.Send(partActor, event.Release()); +} + + void TPersQueue::HandleGetOwnershipRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx, const TActorId& pipeClient, const TActorId& sender) -{ - Y_VERIFY(req.HasCmdGetOwnership()); - - const TString& owner = req.GetCmdGetOwnership().GetOwner(); - if (owner.empty()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, - TStringBuilder() << "empty owner in CmdGetOwnership request"); - return; - } +{ + Y_VERIFY(req.HasCmdGetOwnership()); + + const TString& owner = req.GetCmdGetOwnership().GetOwner(); + if (owner.empty()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + TStringBuilder() << "empty owner in CmdGetOwnership request"); + return; + } Y_VERIFY(pipeClient != TActorId()); - auto it = PipesInfo.find(pipeClient); - if (it == PipesInfo.end()) { //do nothing. this could not be happen, just in tests - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, - TStringBuilder() << "request via dead pipe"); - return; - } - - it->second = {partActor, owner, it->second.ServerActors}; - - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_OWNERSHIP); + auto it = PipesInfo.find(pipeClient); + if (it == PipesInfo.end()) { //do nothing. this could not be happen, just in tests + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + TStringBuilder() << "request via dead pipe"); + return; + } + + it->second = {partActor, owner, it->second.ServerActors}; + + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_OWNERSHIP); THolder<TEvPQ::TEvChangeOwner> event = MakeHolder<TEvPQ::TEvChangeOwner>(responseCookie, owner, pipeClient, sender, req.GetCmdGetOwnership().GetForce()); - ctx.Send(partActor, event.Release()); -} - - + ctx.Send(partActor, event.Release()); +} + + void TPersQueue::HandleReadRequest(const ui64 responseCookie, const TActorId& partActor, const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ - Y_VERIFY(req.HasCmdRead()); - - auto cmd = req.GetCmdRead(); - if (!cmd.HasOffset()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, +{ + Y_VERIFY(req.HasCmdRead()); + + auto cmd = req.GetCmdRead(); + if (!cmd.HasOffset()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "no offset in read request: " << ToString(req).data()); - } else if (!cmd.HasClientId()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (!cmd.HasClientId()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "no clientId in read request: " << ToString(req).data()); - } else if (cmd.HasCount() && cmd.GetCount() <= 0) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (cmd.HasCount() && cmd.GetCount() <= 0) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "invalid count in read request: " << ToString(req).data()); } else if (!cmd.HasOffset() || cmd.GetOffset() < 0) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "invalid offset in read request: " << ToString(req).data()); - } else if (cmd.HasBytes() && cmd.GetBytes() <= 0) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (cmd.HasBytes() && cmd.GetBytes() <= 0) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "invalid bytes in read request: " << ToString(req).data()); - } else if (cmd.HasTimeoutMs() && cmd.GetTimeoutMs() < 0) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (cmd.HasTimeoutMs() && cmd.GetTimeoutMs() < 0) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "invalid timeout in read request: " << ToString(req).data()); - } else if (cmd.HasTimeoutMs() && cmd.GetTimeoutMs() > 120000) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (cmd.HasTimeoutMs() && cmd.GetTimeoutMs() > 120000) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "invalid timeout in read request, must be less than 120 secs: " << ToString(req).data()); - } else if (cmd.HasPartNo() && cmd.GetPartNo() < 0) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + } else if (cmd.HasPartNo() && cmd.GetPartNo() < 0) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "invalid partNo in read request: " << ToString(req).data()); } else if (cmd.HasMaxTimeLagMs() && cmd.GetMaxTimeLagMs() < 0) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "invalid maxTimeLagMs in read request: " << ToString(req).data()); - } else { - InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_READ); - ui32 count = cmd.HasCount() ? cmd.GetCount() : Max<ui32>(); - ui32 bytes = Min<ui32>(MAX_BYTES, cmd.HasBytes() ? cmd.GetBytes() : MAX_BYTES); - auto clientDC = cmd.HasClientDC() ? to_lower(cmd.GetClientDC()) : "unknown"; - clientDC.to_title(); + } else { + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_READ); + ui32 count = cmd.HasCount() ? cmd.GetCount() : Max<ui32>(); + ui32 bytes = Min<ui32>(MAX_BYTES, cmd.HasBytes() ? cmd.GetBytes() : MAX_BYTES); + auto clientDC = cmd.HasClientDC() ? to_lower(cmd.GetClientDC()) : "unknown"; + clientDC.to_title(); THolder<TEvPQ::TEvRead> event = MakeHolder<TEvPQ::TEvRead>(responseCookie, cmd.GetOffset(), cmd.HasPartNo() ? cmd.GetPartNo() : 0, @@ -1669,10 +1669,10 @@ void TPersQueue::HandleReadRequest(const ui64 responseCookie, const TActorId& pa cmd.HasMaxTimeLagMs() ? cmd.GetMaxTimeLagMs() : 0, cmd.HasReadTimestampMs() ? cmd.GetReadTimestampMs() : 0, clientDC, cmd.GetExternalOperation()); - ctx.Send(partActor, event.Release()); - } -} - + ctx.Send(partActor, event.Release()); + } +} + TMaybe<TEvPQ::TEvRegisterMessageGroup::TBody> TPersQueue::MakeRegisterMessageGroup( const NKikimrClient::TPersQueuePartitionRequest::TCmdRegisterMessageGroup& cmd, NPersQueue::NErrorCode::EErrorCode& code, TString& error) const @@ -1787,64 +1787,64 @@ void TPersQueue::HandleSplitMessageGroupRequest(ui64 responseCookie, const TActo ctx.Send(partActor, new TEvPQ::TEvSplitMessageGroup(responseCookie, std::move(deregistrations), std::move(registrations))); } -void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& ctx) -{ +void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& ctx) +{ NKikimrClient::TPersQueueRequest& request = ev->Get()->Record; TString s = request.HasRequestId() ? request.GetRequestId() : "<none>"; - ui32 p = request.HasPartitionRequest() && request.GetPartitionRequest().HasPartition() ? request.GetPartitionRequest().GetPartition() : 0; - ui64 m = request.HasPartitionRequest() && request.GetPartitionRequest().HasMessageNo() ? request.GetPartitionRequest().GetMessageNo() : 0; - TMaybe<ui64> c; - if (request.HasPartitionRequest() && request.GetPartitionRequest().HasCookie()) - c = request.GetPartitionRequest().GetCookie(); - TAutoPtr<TResponseBuilder> ans; - if (request.HasPartitionRequest() && request.GetPartitionRequest().HasCmdRead() && s != TMP_REQUEST_MARKER) { + ui32 p = request.HasPartitionRequest() && request.GetPartitionRequest().HasPartition() ? request.GetPartitionRequest().GetPartition() : 0; + ui64 m = request.HasPartitionRequest() && request.GetPartitionRequest().HasMessageNo() ? request.GetPartitionRequest().GetMessageNo() : 0; + TMaybe<ui64> c; + if (request.HasPartitionRequest() && request.GetPartitionRequest().HasCookie()) + c = request.GetPartitionRequest().GetCookie(); + TAutoPtr<TResponseBuilder> ans; + if (request.HasPartitionRequest() && request.GetPartitionRequest().HasCmdRead() && s != TMP_REQUEST_MARKER) { TActorId rr = CreateReadProxy(ev->Sender, ctx.SelfID, request, ctx); - ans = CreateResponseProxy(rr, ctx.SelfID, TopicName, p, m, s, c, ResourceMetrics, ctx); - } else { - ans = CreateResponseProxy(ev->Sender, ctx.SelfID, TopicName, p, m, s, c, ResourceMetrics, ctx); - } - ui64 responseCookie = ++NextResponseCookie; - ResponseProxy[responseCookie] = ans; - Counters->Simple()[COUNTER_PQ_TABLET_INFLIGHT].Set(ResponseProxy.size()); - if (!ConfigInited) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::INITIALIZING, "tablet is not ready"); - return; - } - + ans = CreateResponseProxy(rr, ctx.SelfID, TopicName, p, m, s, c, ResourceMetrics, ctx); + } else { + ans = CreateResponseProxy(ev->Sender, ctx.SelfID, TopicName, p, m, s, c, ResourceMetrics, ctx); + } + ui64 responseCookie = ++NextResponseCookie; + ResponseProxy[responseCookie] = ans; + Counters->Simple()[COUNTER_PQ_TABLET_INFLIGHT].Set(ResponseProxy.size()); + if (!ConfigInited) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::INITIALIZING, "tablet is not ready"); + return; + } + if (TabletState == NKikimrPQ::EDropped) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::TABLET_IS_DROPPED, "tablet is dropped"); + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::TABLET_IS_DROPPED, "tablet is dropped"); + return; + } + + if (!request.HasPartitionRequest()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "no partition request"); + return; + } + + auto& req = request.GetPartitionRequest(); + + if (!req.HasPartition()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "no partition number"); return; } - if (!request.HasPartitionRequest()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "no partition request"); - return; - } - - auto& req = request.GetPartitionRequest(); - - if (!req.HasPartition()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "no partition number"); - return; - } - - ui32 partition = req.GetPartition(); - auto it = Partitions.find(partition); - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "got client message batch for topic " << TopicName << " partition " << partition << "\n"); - - if (it == Partitions.end()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::WRONG_PARTITION_NUMBER, + ui32 partition = req.GetPartition(); + auto it = Partitions.find(partition); + + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "got client message batch for topic " << TopicName << " partition " << partition << "\n"); + + if (it == Partitions.end()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::WRONG_PARTITION_NUMBER, TStringBuilder() << "wrong partition number " << partition); - return; - } - - if (!it->second.InitDone) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::INITIALIZING, + return; + } + + if (!it->second.InitDone) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::INITIALIZING, TStringBuilder() << "partition " << partition << " is not ready"); - return; - } - + return; + } + ui32 count = req.HasCmdGetMaxSeqNo() + req.HasCmdDeleteSession() + req.HasCmdCreateSession() @@ -1858,186 +1858,186 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& + req.HasCmdRegisterMessageGroup() + req.HasCmdDeregisterMessageGroup() + req.HasCmdSplitMessageGroup(); - - if (count != 1) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + + if (count != 1) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "multiple commands in request: " << count); - return; - } - + return; + } + const TActorId& partActor = it->second.Actor; - + TActorId pipeClient = ActorIdFromProto(req.GetPipeClient()); - - if (req.HasCmdGetMaxSeqNo()) { - HandleGetMaxSeqNoRequest(responseCookie, partActor, req, ctx); - } else if (req.HasCmdDeleteSession()) { - HandleDeleteSessionRequest(responseCookie, partActor, req, ctx); - } else if (req.HasCmdCreateSession()) { - HandleCreateSessionRequest(responseCookie, partActor, req, ctx); - } else if (req.HasCmdSetClientOffset()) { - HandleSetClientOffsetRequest(responseCookie, partActor, req, ctx); - } else if (req.HasCmdGetClientOffset()) { - HandleGetClientOffsetRequest(responseCookie, partActor, req, ctx); - } else if (req.CmdWriteSize()) { - HandleWriteRequest(responseCookie, partActor, req, ctx); - } else if (req.HasCmdUpdateWriteTimestamp()) { - HandleUpdateWriteTimestampRequest(responseCookie, partActor, req, ctx); - } else if (req.HasCmdRead()) { - HandleReadRequest(responseCookie, partActor, req, ctx); - } else if (req.HasCmdGetOwnership()) { - HandleGetOwnershipRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); - } else if (req.HasCmdReserveBytes()) { - HandleReserveBytesRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); + + if (req.HasCmdGetMaxSeqNo()) { + HandleGetMaxSeqNoRequest(responseCookie, partActor, req, ctx); + } else if (req.HasCmdDeleteSession()) { + HandleDeleteSessionRequest(responseCookie, partActor, req, ctx); + } else if (req.HasCmdCreateSession()) { + HandleCreateSessionRequest(responseCookie, partActor, req, ctx); + } else if (req.HasCmdSetClientOffset()) { + HandleSetClientOffsetRequest(responseCookie, partActor, req, ctx); + } else if (req.HasCmdGetClientOffset()) { + HandleGetClientOffsetRequest(responseCookie, partActor, req, ctx); + } else if (req.CmdWriteSize()) { + HandleWriteRequest(responseCookie, partActor, req, ctx); + } else if (req.HasCmdUpdateWriteTimestamp()) { + HandleUpdateWriteTimestampRequest(responseCookie, partActor, req, ctx); + } else if (req.HasCmdRead()) { + HandleReadRequest(responseCookie, partActor, req, ctx); + } else if (req.HasCmdGetOwnership()) { + HandleGetOwnershipRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); + } else if (req.HasCmdReserveBytes()) { + HandleReserveBytesRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); } else if (req.HasCmdRegisterMessageGroup()) { HandleRegisterMessageGroupRequest(responseCookie, partActor, req, ctx); } else if (req.HasCmdDeregisterMessageGroup()) { HandleDeregisterMessageGroupRequest(responseCookie, partActor, req, ctx); } else if (req.HasCmdSplitMessageGroup()) { HandleSplitMessageGroupRequest(responseCookie, partActor, req, ctx); - } else Y_FAIL("unknown or empty command"); -} - - -void TPersQueue::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext&) -{ - auto it = PipesInfo.find(ev->Get()->ClientId); - - if (it == PipesInfo.end()) { + } else Y_FAIL("unknown or empty command"); +} + + +void TPersQueue::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext&) +{ + auto it = PipesInfo.find(ev->Get()->ClientId); + + if (it == PipesInfo.end()) { PipesInfo.insert({ev->Get()->ClientId, {TActorId(), "", 1}}); - } else { - it->second.ServerActors++; - } - - Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size(); -} - - -void TPersQueue::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx) -{ - //inform partition if needed; - auto it = PipesInfo.find(ev->Get()->ClientId); - if (it != PipesInfo.end()) { - if(--(it->second.ServerActors) > 0) { - return; - } + } else { + it->second.ServerActors++; + } + + Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size(); +} + + +void TPersQueue::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx) +{ + //inform partition if needed; + auto it = PipesInfo.find(ev->Get()->ClientId); + if (it != PipesInfo.end()) { + if(--(it->second.ServerActors) > 0) { + return; + } if (it->second.PartActor != TActorId()) { - ctx.Send(it->second.PartActor, new TEvPQ::TEvPipeDisconnected(it->second.Owner, it->first)); - } - PipesInfo.erase(it); - Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size(); - } -} - - -bool TPersQueue::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) -{ - if (!ev) - return true; - if (ev->Get()->Cgi().Has("kv")) { - return TKeyValueFlat::OnRenderAppHtmlPage(ev, ctx); - } - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Handle TEvRemoteHttpInfo: " << ev->Get()->Query); + ctx.Send(it->second.PartActor, new TEvPQ::TEvPipeDisconnected(it->second.Owner, it->first)); + } + PipesInfo.erase(it); + Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size(); + } +} + + +bool TPersQueue::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) +{ + if (!ev) + return true; + if (ev->Get()->Cgi().Has("kv")) { + return TKeyValueFlat::OnRenderAppHtmlPage(ev, ctx); + } + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Handle TEvRemoteHttpInfo: " << ev->Get()->Query); TMap<ui32, TActorId> res; - for (auto& p : Partitions) { - res.insert({p.first, p.second.Actor}); - } - ctx.Register(new TMonitoringProxy(ev->Sender, ev->Get()->Query, res, CacheActor, TopicName, TabletID(), ResponseProxy.size())); - return true; -} - - -void TPersQueue::HandleDie(const TActorContext& ctx) -{ + for (auto& p : Partitions) { + res.insert({p.first, p.second.Actor}); + } + ctx.Register(new TMonitoringProxy(ev->Sender, ev->Get()->Query, res, CacheActor, TopicName, TabletID(), ResponseProxy.size())); + return true; +} + + +void TPersQueue::HandleDie(const TActorContext& ctx) +{ FlushMetrics(true, ctx); for (const auto& p : Partitions) { ctx.Send(p.second.Actor, new TEvents::TEvPoisonPill()); - } + } ctx.Send(CacheActor, new TEvents::TEvPoisonPill()); - - - for (const auto& p : ResponseProxy) { + + + for (const auto& p : ResponseProxy) { THolder<TEvPQ::TEvError> ev = MakeHolder<TEvPQ::TEvError>(NPersQueue::NErrorCode::INITIALIZING, "tablet will be restarted right now", p.first); - bool res = p.second->HandleError(ev.Get(), ctx); - Y_VERIFY(res); - } - ResponseProxy.clear(); - NKeyValue::TKeyValueFlat::HandleDie(ctx); -} - - + bool res = p.second->HandleError(ev.Get(), ctx); + Y_VERIFY(res); + } + ResponseProxy.clear(); + NKeyValue::TKeyValueFlat::HandleDie(ctx); +} + + TPersQueue::TPersQueue(const TActorId& tablet, TTabletStorageInfo *info) - : TKeyValueFlat(tablet, info) - , ConfigInited(false) - , PartitionsInited(0) - , NewConfigShouldBeApplied(false) + : TKeyValueFlat(tablet, info) + , ConfigInited(false) + , PartitionsInited(0) + , NewConfigShouldBeApplied(false) , TabletState(NKikimrPQ::ENormal) - , Counters(nullptr) - , NextResponseCookie(0) - , ResourceMetrics(nullptr) -{ - typedef TProtobufTabletCounters< - NKeyValue::ESimpleCounters_descriptor, - NKeyValue::ECumulativeCounters_descriptor, - NKeyValue::EPercentileCounters_descriptor, - NKeyValue::ETxTypes_descriptor> TKeyValueCounters; - typedef TAppProtobufTabletCounters< - ESimpleCounters_descriptor, - ECumulativeCounters_descriptor, - EPercentileCounters_descriptor> TPersQueueCounters; - typedef TProtobufTabletCountersPair<TKeyValueCounters, TPersQueueCounters> TCounters; - TAutoPtr<TCounters> counters(new TCounters()); - Counters = (counters->GetSecondTabletCounters()).Release(); - - State.SetupTabletCounters(counters->GetFirstTabletCounters().Release()); //FirstTabletCounters is of good type and contains all counters - State.Clear(); -} - -void TPersQueue::CreatedHook(const TActorContext& ctx) -{ + , Counters(nullptr) + , NextResponseCookie(0) + , ResourceMetrics(nullptr) +{ + typedef TProtobufTabletCounters< + NKeyValue::ESimpleCounters_descriptor, + NKeyValue::ECumulativeCounters_descriptor, + NKeyValue::EPercentileCounters_descriptor, + NKeyValue::ETxTypes_descriptor> TKeyValueCounters; + typedef TAppProtobufTabletCounters< + ESimpleCounters_descriptor, + ECumulativeCounters_descriptor, + EPercentileCounters_descriptor> TPersQueueCounters; + typedef TProtobufTabletCountersPair<TKeyValueCounters, TPersQueueCounters> TCounters; + TAutoPtr<TCounters> counters(new TCounters()); + Counters = (counters->GetSecondTabletCounters()).Release(); + + State.SetupTabletCounters(counters->GetFirstTabletCounters().Release()); //FirstTabletCounters is of good type and contains all counters + State.Clear(); +} + +void TPersQueue::CreatedHook(const TActorContext& ctx) +{ const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvGetNode(ctx.SelfID.NodeId())); - - auto& pqConfig = AppData(ctx)->PQConfig; - if (pqConfig.HasBillingMeteringConfig() && pqConfig.GetBillingMeteringConfig().GetEnabled()) { - MeteringEnabled = true; - MetricsFlushInterval = TDuration::Seconds(pqConfig.GetBillingMeteringConfig().GetFlushIntervalSec()); - } else { - MeteringEnabled = false; - } -} - -void TPersQueue::Handle(TEvInterconnect::TEvNodeInfo::TPtr& ev, const TActorContext& ctx) -{ - Y_VERIFY(ev->Get()->Node); + ctx.Send(nameserviceId, new TEvInterconnect::TEvGetNode(ctx.SelfID.NodeId())); + + auto& pqConfig = AppData(ctx)->PQConfig; + if (pqConfig.HasBillingMeteringConfig() && pqConfig.GetBillingMeteringConfig().GetEnabled()) { + MeteringEnabled = true; + MetricsFlushInterval = TDuration::Seconds(pqConfig.GetBillingMeteringConfig().GetFlushIntervalSec()); + } else { + MeteringEnabled = false; + } +} + +void TPersQueue::Handle(TEvInterconnect::TEvNodeInfo::TPtr& ev, const TActorContext& ctx) +{ + Y_VERIFY(ev->Get()->Node); DCId = ev->Get()->Node->Location.GetDataCenterId(); - - ResourceMetrics = Executor()->GetResourceMetrics(); - THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); + + ResourceMetrics = Executor()->GetResourceMetrics(); + THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); request->Record.SetCookie(READ_CONFIG_COOKIE); request->Record.AddCmdRead()->SetKey(KeyConfig()); request->Record.AddCmdRead()->SetKey(KeyState()); - request->Record.MutableCmdSetExecutorFastLogPolicy() - ->SetIsAllowed(AppData(ctx)->PQConfig.GetTactic() == NKikimrClient::TKeyValueRequest::MIN_LATENCY); - ctx.Send(ctx.SelfID, request.Release()); - ctx.Schedule(TDuration::Seconds(5), new TEvents::TEvWakeup()); -} - -void TPersQueue::HandleWakeup(const TActorContext& ctx) { + request->Record.MutableCmdSetExecutorFastLogPolicy() + ->SetIsAllowed(AppData(ctx)->PQConfig.GetTactic() == NKikimrClient::TKeyValueRequest::MIN_LATENCY); + ctx.Send(ctx.SelfID, request.Release()); + ctx.Schedule(TDuration::Seconds(5), new TEvents::TEvWakeup()); +} + +void TPersQueue::HandleWakeup(const TActorContext& ctx) { THashSet<TString> groups; - for (auto& p : Partitions) { - for (auto& m : p.second.LabeledCounters) { - groups.insert(m.first); - } - } - for (auto& g : groups) { - AggregateAndSendLabeledCountersFor(g, ctx); - } + for (auto& p : Partitions) { + for (auto& m : p.second.LabeledCounters) { + groups.insert(m.first); + } + } + for (auto& g : groups) { + AggregateAndSendLabeledCountersFor(g, ctx); + } FlushMetrics(false, ctx); - ctx.Schedule(TDuration::Seconds(5), new TEvents::TEvWakeup()); -} - + ctx.Schedule(TDuration::Seconds(5), new TEvents::TEvWakeup()); +} + TString TPersQueue::GetMeteringJson( const TString& metricBillingId, const TString& schemeName, const THashMap<TString, ui64>& tags, ui64 quantity, const TString& quantityUnit, @@ -2045,13 +2045,13 @@ TString TPersQueue::GetMeteringJson( ) { TStringStream output; NJson::TJsonWriter writer(&output, false); - + writer.OpenMap(); writer.Write("cloud_id", Config.GetYcCloudId()); writer.Write("folder_id", Config.GetYcFolderId()); writer.Write("resource_id", ResourceId); - writer.Write("id", TStringBuilder() << metricBillingId << "-" << Config.GetYdbDatabaseId() << "-" << TabletID() << "-" << start.MilliSeconds() << "-" << (++MeteringCounter)); + writer.Write("id", TStringBuilder() << metricBillingId << "-" << Config.GetYdbDatabaseId() << "-" << TabletID() << "-" << start.MilliSeconds() << "-" << (++MeteringCounter)); writer.Write("schema", schemeName); @@ -2065,12 +2065,12 @@ TString TPersQueue::GetMeteringJson( writer.Write("quantity", quantity); writer.Write("unit", quantityUnit); writer.Write("start", start.Seconds()); - writer.Write("finish", end.Seconds()); + writer.Write("finish", end.Seconds()); writer.CloseMap(); // "usage" writer.OpenMap("labels"); - writer.Write("datastreams_stream_name", StreamName); - writer.Write("ydb_database", Config.GetYdbDatabaseId()); + writer.Write("datastreams_stream_name", StreamName); + writer.Write("ydb_database", Config.GetYdbDatabaseId()); writer.CloseMap(); // "labels" writer.Write("version", "v1"); @@ -2086,8 +2086,8 @@ void TPersQueue::FlushMetrics(bool force, const TActorContext &ctx) { if (!MeteringEnabled) { return; } - if (Config.PartitionsSize() == 0) - return; + if (Config.PartitionsSize() == 0) + return; auto now = ctx.Now(); bool needFlushRequests = force, needFlushShards = force; @@ -2112,7 +2112,7 @@ void TPersQueue::FlushMetrics(bool force, const TActorContext &ctx) { if (needFlushRequests) { if (CurrentPutUnitsQuantity > 0) { auto record = GetMeteringJson( - "put_units", "yds.events.puts.v1", {}, CurrentPutUnitsQuantity, "put_events", + "put_units", "yds.events.puts.v1", {}, CurrentPutUnitsQuantity, "put_events", RequestsMetricsLastFlush, requestsEndTime, now ); NMetering::SendMeteringJson(ctx, record); @@ -2121,18 +2121,18 @@ void TPersQueue::FlushMetrics(bool force, const TActorContext &ctx) { RequestsMetricsLastFlush = now; } if (needFlushShards) { - ui64 writeQuota = Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); - ui64 reservedSpace = Config.GetPartitionConfig().GetLifetimeSeconds() * writeQuota; - ui64 consumersThroughput = Config.ReadRulesSize() * writeQuota; - ui64 numPartitions = Config.PartitionsSize(); + ui64 writeQuota = Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); + ui64 reservedSpace = Config.GetPartitionConfig().GetLifetimeSeconds() * writeQuota; + ui64 consumersThroughput = Config.ReadRulesSize() * writeQuota; + ui64 numPartitions = Config.PartitionsSize(); THashMap<TString, ui64> tags = { {"reserved_throughput_bps", writeQuota}, {"shard_enhanced_consumers_throughput", consumersThroughput}, {"reserved_storage_bytes", reservedSpace} }; auto makeShardsMetricsJson = [&](TInstant& end) { auto res = GetMeteringJson( - "reserved_resources", "yds.resources.reserved.v1", tags, - numPartitions * (end - ShardsMetricsLastFlush).Seconds(), "second", + "reserved_resources", "yds.resources.reserved.v1", tags, + numPartitions * (end - ShardsMetricsLastFlush).Seconds(), "second", ShardsMetricsLastFlush, end, now ); ShardsMetricsLastFlush = end; @@ -2150,36 +2150,36 @@ void TPersQueue::FlushMetrics(bool force, const TActorContext &ctx) { } } -bool TPersQueue::HandleHook(STFUNC_SIG) -{ +bool TPersQueue::HandleHook(STFUNC_SIG) +{ SetActivityType(NKikimrServices::TActivity::PERSQUEUE_ACTOR); TRACE_EVENT(NKikimrServices::PERSQUEUE); - switch(ev->GetTypeRewrite()) - { - HFuncTraced(TEvInterconnect::TEvNodeInfo, Handle); + switch(ev->GetTypeRewrite()) + { + HFuncTraced(TEvInterconnect::TEvNodeInfo, Handle); HFuncTraced(TEvPersQueue::TEvRequest, Handle); HFuncTraced(TEvPersQueue::TEvUpdateConfig, Handle); HFuncTraced(TEvPersQueue::TEvOffsets, Handle); - HFuncTraced(TEvPersQueue::TEvHasDataInfo, Handle); - HFuncTraced(TEvPersQueue::TEvStatus, Handle); + HFuncTraced(TEvPersQueue::TEvHasDataInfo, Handle); + HFuncTraced(TEvPersQueue::TEvStatus, Handle); HFuncTraced(TEvPersQueue::TEvPartitionClientInfo, Handle); HFuncTraced(TEvKeyValue::TEvResponse, Handle); HFuncTraced(TEvPQ::TEvInitComplete, Handle); - HFuncTraced(TEvPQ::TEvPartitionCounters, Handle); - HFuncTraced(TEvPQ::TEvPartitionLabeledCounters, Handle); - HFuncTraced(TEvPQ::TEvPartitionLabeledCountersDrop, Handle); + HFuncTraced(TEvPQ::TEvPartitionCounters, Handle); + HFuncTraced(TEvPQ::TEvPartitionLabeledCounters, Handle); + HFuncTraced(TEvPQ::TEvPartitionLabeledCountersDrop, Handle); HFuncTraced(TEvPQ::TEvTabletCacheCounters, Handle); HFuncTraced(TEvPersQueue::TEvDropTablet, Handle); - HFuncTraced(TEvTabletPipe::TEvServerConnected, Handle); - HFuncTraced(TEvTabletPipe::TEvServerDisconnected, Handle); - HFuncTraced(TEvPQ::TEvError, Handle); - HFuncTraced(TEvPQ::TEvProxyResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleWakeup); - default: - return false; - } - return true; -} - -}// NPQ -}// NKikimr + HFuncTraced(TEvTabletPipe::TEvServerConnected, Handle); + HFuncTraced(TEvTabletPipe::TEvServerDisconnected, Handle); + HFuncTraced(TEvPQ::TEvError, Handle); + HFuncTraced(TEvPQ::TEvProxyResponse, Handle); + CFunc(TEvents::TSystem::Wakeup, HandleWakeup); + default: + return false; + } + return true; +} + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/pq_impl.h b/ydb/core/persqueue/pq_impl.h index 372c107a20..bc3bfb10ba 100644 --- a/ydb/core/persqueue/pq_impl.h +++ b/ydb/core/persqueue/pq_impl.h @@ -1,85 +1,85 @@ -#pragma once +#pragma once -#include "percentile_counter.h" +#include "percentile_counter.h" #include <ydb/core/keyvalue/keyvalue_flat_impl.h> #include <ydb/core/tablet/tablet_counters.h> #include <ydb/core/base/tablet_pipe.h> #include <ydb/core/persqueue/events/internal.h> - + #include <library/cpp/actors/interconnect/interconnect.h> - -namespace NKikimr { -namespace NPQ { - -struct TPartitionInfo; + +namespace NKikimr { +namespace NPQ { + +struct TPartitionInfo; struct TChangeNotification; - -class TResponseBuilder; - -//USES MAIN chanel for big blobs, INLINE or EXTRA for ZK-like load, EXTRA2 for small blob for logging (VDISK of type LOG is ok with EXTRA2) - -class TPersQueue : public NKeyValue::TKeyValueFlat { + +class TResponseBuilder; + +//USES MAIN chanel for big blobs, INLINE or EXTRA for ZK-like load, EXTRA2 for small blob for logging (VDISK of type LOG is ok with EXTRA2) + +class TPersQueue : public NKeyValue::TKeyValueFlat { enum ECookie : ui64 { WRITE_CONFIG_COOKIE = 2, READ_CONFIG_COOKIE = 3, WRITE_STATE_COOKIE = 4 }; - void CreatedHook(const TActorContext& ctx) override; - bool HandleHook(STFUNC_SIG) override; - + void CreatedHook(const TActorContext& ctx) override; + bool HandleHook(STFUNC_SIG) override; + // void ReplyError(const TActorContext& ctx, const TActorId& dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); - void ReplyError(const TActorContext& ctx, const ui64 responseCookie, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); - - void HandleWakeup(const TActorContext&); - - void InitResponseBuilder(const ui64 responseCookie, const ui32 count, const ui32 counterId); - void Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext&); - void Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext&); + void ReplyError(const TActorContext& ctx, const ui64 responseCookie, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); + + void HandleWakeup(const TActorContext&); + + void InitResponseBuilder(const ui64 responseCookie, const ui32 count, const ui32 counterId); + void Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext&); + void Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext&); void FinishResponse(THashMap<ui64, TAutoPtr<TResponseBuilder>>::iterator it); - - void Handle(TEvInterconnect::TEvNodeInfo::TPtr& ev, const TActorContext&); - - void Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext&); - void Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext&); - - //when partition is ready it's sends event to tablet - void Handle(TEvPQ::TEvInitComplete::TPtr& ev, const TActorContext&); - - //partitions will send some times it's counters - void Handle(TEvPQ::TEvPartitionCounters::TPtr& ev, const TActorContext&); - - void Handle(TEvPQ::TEvPartitionLabeledCounters::TPtr& ev, const TActorContext&); - void Handle(TEvPQ::TEvPartitionLabeledCountersDrop::TPtr& ev, const TActorContext&); + + void Handle(TEvInterconnect::TEvNodeInfo::TPtr& ev, const TActorContext&); + + void Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext&); + void Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext&); + + //when partition is ready it's sends event to tablet + void Handle(TEvPQ::TEvInitComplete::TPtr& ev, const TActorContext&); + + //partitions will send some times it's counters + void Handle(TEvPQ::TEvPartitionCounters::TPtr& ev, const TActorContext&); + + void Handle(TEvPQ::TEvPartitionLabeledCounters::TPtr& ev, const TActorContext&); + void Handle(TEvPQ::TEvPartitionLabeledCountersDrop::TPtr& ev, const TActorContext&); void AggregateAndSendLabeledCountersFor(const TString& group, const TActorContext&); - + void Handle(TEvPQ::TEvTabletCacheCounters::TPtr& ev, const TActorContext&); void SetCacheCounters(TEvPQ::TEvTabletCacheCounters::TCacheCounters& cacheCounters); - - //client requests - void Handle(TEvPersQueue::TEvUpdateConfig::TPtr& ev, const TActorContext& ctx); + + //client requests + void Handle(TEvPersQueue::TEvUpdateConfig::TPtr& ev, const TActorContext& ctx); void ProcessUpdateConfigRequest(TAutoPtr<TEvPersQueue::TEvUpdateConfig> ev, const TActorId& sender, const TActorContext& ctx); - void Handle(TEvPersQueue::TEvOffsets::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPersQueue::TEvStatus::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvOffsets::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvStatus::TPtr& ev, const TActorContext& ctx); void Handle(TEvPersQueue::TEvDropTablet::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPersQueue::TEvHasDataInfo::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvHasDataInfo::TPtr& ev, const TActorContext& ctx); void Handle(TEvPersQueue::TEvPartitionClientInfo::TPtr& ev, const TActorContext& ctx); - bool OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) override; - - void HandleDie(const TActorContext& ctx) override; - - //response from KV on READ or WRITE config request - void Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx); + bool OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) override; + + void HandleDie(const TActorContext& ctx) override; + + //response from KV on READ or WRITE config request + void Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx); void HandleConfigWriteResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx); void HandleConfigReadResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx); - void ApplyNewConfigAndReply(const TActorContext& ctx); + void ApplyNewConfigAndReply(const TActorContext& ctx); void HandleStateWriteResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx); - + void ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx); void ReadState(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx); - void FillMeteringParams(const TActorContext& ctx); + void FillMeteringParams(const TActorContext& ctx); TString GetMeteringJson(const TString& metricBillingId, const TString& schemeName, const THashMap<TString, ui64>& tags, ui64 quantity, const TString& quantityUnit, const TInstant& start, const TInstant& end, const TInstant& now); @@ -96,28 +96,28 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { const NKikimrClient::TPersQueuePartitionRequest::TCmdDeregisterMessageGroup& cmd, NPersQueue::NErrorCode::EErrorCode& code, TString& error) const; - //client request - void Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& ctx); + //client request + void Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& ctx); #define DESCRIBE_HANDLE(A) void A(const ui64 responseCookie, const TActorId& partActor, \ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx); - DESCRIBE_HANDLE(HandleGetMaxSeqNoRequest) - DESCRIBE_HANDLE(HandleDeleteSessionRequest) - DESCRIBE_HANDLE(HandleCreateSessionRequest) - DESCRIBE_HANDLE(HandleSetClientOffsetRequest) - DESCRIBE_HANDLE(HandleGetClientOffsetRequest) - DESCRIBE_HANDLE(HandleWriteRequest) - DESCRIBE_HANDLE(HandleUpdateWriteTimestampRequest) - DESCRIBE_HANDLE(HandleReadRequest) + DESCRIBE_HANDLE(HandleGetMaxSeqNoRequest) + DESCRIBE_HANDLE(HandleDeleteSessionRequest) + DESCRIBE_HANDLE(HandleCreateSessionRequest) + DESCRIBE_HANDLE(HandleSetClientOffsetRequest) + DESCRIBE_HANDLE(HandleGetClientOffsetRequest) + DESCRIBE_HANDLE(HandleWriteRequest) + DESCRIBE_HANDLE(HandleUpdateWriteTimestampRequest) + DESCRIBE_HANDLE(HandleReadRequest) DESCRIBE_HANDLE(HandleRegisterMessageGroupRequest) DESCRIBE_HANDLE(HandleDeregisterMessageGroupRequest) DESCRIBE_HANDLE(HandleSplitMessageGroupRequest) -#undef DESCRIBE_HANDLE +#undef DESCRIBE_HANDLE #define DESCRIBE_HANDLE_WITH_SENDER(A) void A(const ui64 responseCookie, const TActorId& partActor, \ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx,\ const TActorId& pipeClient, const TActorId& sender); - DESCRIBE_HANDLE_WITH_SENDER(HandleGetOwnershipRequest) - DESCRIBE_HANDLE_WITH_SENDER(HandleReserveBytesRequest) -#undef DESCRIBE_HANDLE_WITH_SENDER + DESCRIBE_HANDLE_WITH_SENDER(HandleGetOwnershipRequest) + DESCRIBE_HANDLE_WITH_SENDER(HandleReserveBytesRequest) +#undef DESCRIBE_HANDLE_WITH_SENDER bool ChangingState() const { return !TabletStateRequests.empty(); } void ReturnTabletStateAll(const TActorContext& ctx, NKikimrProto::EReplyStatus status = NKikimrProto::OK); void ReturnTabletState(const TActorContext& ctx, const TChangeNotification& req, NKikimrProto::EReplyStatus status); @@ -125,57 +125,57 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { static constexpr const char * KeyConfig() { return "_config"; } static constexpr const char * KeyState() { return "_state"; } -public: +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PERSQUEUE_ACTOR; } TPersQueue(const TActorId& tablet, TTabletStorageInfo *info); - -private: - bool ConfigInited; - ui32 PartitionsInited; + +private: + bool ConfigInited; + ui32 PartitionsInited; THashMap<ui32, TPartitionInfo> Partitions; THashMap<TString, TIntrusivePtr<TEvTabletCounters::TInFlightCookie>> CounterEventsInflight; - + TActorId CacheActor; - + TSet<TChangeNotification> ChangeConfigNotification; - NKikimrPQ::TPQTabletConfig NewConfig; - bool NewConfigShouldBeApplied; - + NKikimrPQ::TPQTabletConfig NewConfig; + bool NewConfigShouldBeApplied; + TString TopicName; TString TopicPath; - bool LocalDC; + bool LocalDC; TString DCId; TVector<NScheme::TTypeId> KeySchema; - NKikimrPQ::TPQTabletConfig Config; - + NKikimrPQ::TPQTabletConfig Config; + NKikimrPQ::ETabletState TabletState; TSet<TChangeNotification> TabletStateRequests; - TAutoPtr<TTabletCountersBase> Counters; + TAutoPtr<TTabletCountersBase> Counters; TEvPQ::TEvTabletCacheCounters::TCacheCounters CacheCounters; - TMap<TString, NKikimr::NPQ::TMultiCounter> BytesWrittenFromDC; - - + TMap<TString, NKikimr::NPQ::TMultiCounter> BytesWrittenFromDC; + + THashMap<TString, TTabletLabeledCountersBase> LabeledCounters; - + TVector<TAutoPtr<TEvPersQueue::TEvHasDataInfo>> HasDataRequests; TVector<std::pair<TAutoPtr<TEvPersQueue::TEvUpdateConfig>, TActorId> > UpdateConfigRequests; - - struct TPipeInfo { + + struct TPipeInfo { TActorId PartActor; - TString Owner; - ui32 ServerActors; - }; - + TString Owner; + ui32 ServerActors; + }; + THashMap<TActorId, TPipeInfo> PipesInfo; - - ui64 NextResponseCookie; + + ui64 NextResponseCookie; THashMap<ui64, TAutoPtr<TResponseBuilder>> ResponseProxy; - - NMetrics::TResourceMetrics *ResourceMetrics; + + NMetrics::TResourceMetrics *ResourceMetrics; bool MeteringEnabled = false; TDuration MetricsFlushInterval; @@ -184,10 +184,10 @@ private: ui64 CurrentPutUnitsQuantity = 0; TString ResourceId; TString StreamName; - - ui64 MeteringCounter = 0; -}; - - -}// NPQ -}// NKikimr + + ui64 MeteringCounter = 0; +}; + + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/pq_l2_cache.cpp b/ydb/core/persqueue/pq_l2_cache.cpp index 02936b1095..e56642c39a 100644 --- a/ydb/core/persqueue/pq_l2_cache.cpp +++ b/ydb/core/persqueue/pq_l2_cache.cpp @@ -57,7 +57,7 @@ void TPersQueueCacheL2::SendResponses(const TActorContext& ctx, const THashMap<T } Y_VERIFY(key.TopicName == resp->TopicName, "PQ L2. Multiple topics in one PQ tablet."); - resp->Removed.push_back({key.Partition, key.Offset, key.PartNo, evicted}); + resp->Removed.push_back({key.Partition, key.Offset, key.PartNo, evicted}); RetentionTime = now - evicted->GetAccessTime(); if (RetentionTime < KeepTime) @@ -88,7 +88,7 @@ void TPersQueueCacheL2::AddBlobs(const TActorContext& ctx, TString topic, const continue; } - Y_VERIFY(CurrentSize <= Cache.Size() * MAX_BLOB_SIZE); + Y_VERIFY(CurrentSize <= Cache.Size() * MAX_BLOB_SIZE); CurrentSize += blob.Value->DataSize(); diff --git a/ydb/core/persqueue/pq_l2_cache.h b/ydb/core/persqueue/pq_l2_cache.h index 4a819d5f58..f9fcccbc8e 100644 --- a/ydb/core/persqueue/pq_l2_cache.h +++ b/ydb/core/persqueue/pq_l2_cache.h @@ -12,8 +12,8 @@ namespace NKikimr { namespace NPQ { -static const ui32 MAX_BLOB_SIZE = 8 << 20; //8mb - +static const ui32 MAX_BLOB_SIZE = 8 << 20; //8mb + struct TL2Counters { NMonitoring::TDynamicCounters::TCounterPtr TotalSize; NMonitoring::TDynamicCounters::TCounterPtr TotalCount; @@ -46,13 +46,13 @@ public: TString TopicName; ui32 Partition; ui64 Offset; - ui16 PartNo; + ui16 PartNo; TKey(TString topicName, const TCacheBlobL2& blob) : TopicName(topicName) , Partition(blob.Partition) , Offset(blob.Offset) - , PartNo(blob.PartNo) + , PartNo(blob.PartNo) { KeyHash = Hash128to32(ComputeHash(topicName), (static_cast<ui64>(Partition) << 16) + PartNo); KeyHash = Hash128to32(KeyHash, Offset); @@ -61,8 +61,8 @@ public: bool operator == (const TKey& key) const { return TopicName == key.TopicName && Partition == key.Partition && - Offset == key.Offset && - PartNo == key.PartNo; + Offset == key.Offset && + PartNo == key.PartNo; } ui64 Hash() const noexcept { diff --git a/ydb/core/persqueue/pq_l2_service.h b/ydb/core/persqueue/pq_l2_service.h index dac09edbb1..a9c5eefd16 100644 --- a/ydb/core/persqueue/pq_l2_service.h +++ b/ydb/core/persqueue/pq_l2_service.h @@ -69,7 +69,7 @@ private: struct TCacheBlobL2 { ui32 Partition; ui64 Offset; - ui16 PartNo; + ui16 PartNo; TCacheValue::TPtr Value; }; diff --git a/ydb/core/persqueue/pq_ut.cpp b/ydb/core/persqueue/pq_ut.cpp index 2a1c40a3ad..13a6122910 100644 --- a/ydb/core/persqueue/pq_ut.cpp +++ b/ydb/core/persqueue/pq_ut.cpp @@ -1,5 +1,5 @@ -#include "pq_ut.h" - +#include "pq_ut.h" + #include <ydb/core/testlib/basics/runtime.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/tx/schemeshard/schemeshard.h> @@ -15,128 +15,128 @@ #include <ydb/core/testlib/tablet_helpers.h> #include <library/cpp/testing/unittest/registar.h> - + #include <util/system/sanitizers.h> #include <util/system/valgrind.h> -namespace NKikimr { +namespace NKikimr { Y_UNIT_TEST_SUITE(TPQTest) { - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// SINGLE COMMAND TEST FUNCTIONS -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// SINGLE COMMAND TEST FUNCTIONS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + Y_UNIT_TEST(TestGroupsBalancer) { - TTestContext tc; - TFinalizer finalizer(tc); - tc.Prepare(); - - tc.Runtime->SetScheduledLimit(50); - tc.Runtime->SetDispatchTimeout(TDuration::Seconds(1)); - tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); - TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; - ui64 ssId = 325; - BootFakeSchemeShard(*tc.Runtime, ssId, state); - - BalancerPrepare("topic", {{0,{1, 1}}, {11,{1, 1}}, {1,{1, 2}}, {2,{1, 2}}}, ssId, tc); - + TTestContext tc; + TFinalizer finalizer(tc); + tc.Prepare(); + + tc.Runtime->SetScheduledLimit(50); + tc.Runtime->SetDispatchTimeout(TDuration::Seconds(1)); + tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); + TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; + ui64 ssId = 325; + BootFakeSchemeShard(*tc.Runtime, ssId, state); + + BalancerPrepare("topic", {{0,{1, 1}}, {11,{1, 1}}, {1,{1, 2}}, {2,{1, 2}}}, ssId, tc); + TActorId pipe = RegisterReadSession("session1", tc); - Y_UNUSED(pipe); + Y_UNUSED(pipe); WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions - return error - + TActorId pipe2 = RegisterReadSession("session2", tc, {1}); - + WaitPartition("session2", tc, 0, "", "", TActorId()); WaitPartition("session2", tc, 0, "", "", TActorId()); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions to balance - + TActorId pipe4 = RegisterReadSession("session8", tc, {1}); - Y_UNUSED(pipe4); - - WaitPartition("session8", tc, 0, "session2", "topic1", pipe2); + Y_UNUSED(pipe4); + + WaitPartition("session8", tc, 0, "session2", "topic1", pipe2); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions to balance - - tc.Runtime->Send(new IEventHandle(pipe2, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session - + + tc.Runtime->Send(new IEventHandle(pipe2, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session + WaitPartition("session8", tc, 0, "", "", TActorId()); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions to balance - - RegisterReadSession("session3", tc); + + RegisterReadSession("session3", tc); WaitPartition("session3", tc, 0, "", "", TActorId()); WaitPartition("session3", tc, 0, "", "", TActorId()); - WaitPartition("session3", tc, 0, "session8", "topic1", pipe4); + WaitPartition("session3", tc, 0, "session8", "topic1", pipe4); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions to balance - - -} - + + +} + Y_UNIT_TEST(TestGroupsBalancer2) { - TTestContext tc; - TFinalizer finalizer(tc); - tc.Prepare(); - - tc.Runtime->SetScheduledLimit(50); - tc.Runtime->SetDispatchTimeout(TDuration::Seconds(1)); - tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); - TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; - ui64 ssId = 325; - BootFakeSchemeShard(*tc.Runtime, ssId, state); - - BalancerPrepare("topic", {{0, {1, 1}}, {1, {1, 2}}, {2, {1, 3}}, {3, {1, 4}}}, ssId, tc); - + TTestContext tc; + TFinalizer finalizer(tc); + tc.Prepare(); + + tc.Runtime->SetScheduledLimit(50); + tc.Runtime->SetDispatchTimeout(TDuration::Seconds(1)); + tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); + TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; + ui64 ssId = 325; + BootFakeSchemeShard(*tc.Runtime, ssId, state); + + BalancerPrepare("topic", {{0, {1, 1}}, {1, {1, 2}}, {2, {1, 3}}, {3, {1, 4}}}, ssId, tc); + TActorId pipe = RegisterReadSession("session1", tc, {1,2}); - Y_UNUSED(pipe); - + Y_UNUSED(pipe); + WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions - return error TActorId pipe2 = RegisterReadSession("session2", tc, {3,4}); - Y_UNUSED(pipe2); - + Y_UNUSED(pipe2); + WaitPartition("session2", tc, 0, "", "", TActorId()); WaitPartition("session2", tc, 0, "", "", TActorId()); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions - return error -} - -Y_UNIT_TEST(TestGroupsBalancer3) { - TTestContext tc; - TFinalizer finalizer(tc); - tc.Prepare(); - - tc.Runtime->SetScheduledLimit(50); - tc.Runtime->SetDispatchTimeout(TDuration::Seconds(1)); - tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); - TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; - ui64 ssId = 325; - BootFakeSchemeShard(*tc.Runtime, ssId, state); - - BalancerPrepare("topic", {{0, {1, 1}}, {1, {1, 2}} }, ssId, tc); - - TActorId pipe = RegisterReadSession("session", tc, {2}); - - WaitPartition("session", tc, 0, "", "", TActorId()); - WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions - return error - - tc.Runtime->Send(new IEventHandle(pipe, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session - - TActorId pipe2 = RegisterReadSession("session1", tc); - Y_UNUSED(pipe2); - - WaitPartition("session1", tc, 0, "", "", TActorId()); - WaitPartition("session1", tc, 0, "", "", TActorId()); - WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions - return error - - pipe = RegisterReadSession("session2", tc, {2}); - WaitSessionKill(tc); //session 1 will die -} - - +} + +Y_UNIT_TEST(TestGroupsBalancer3) { + TTestContext tc; + TFinalizer finalizer(tc); + tc.Prepare(); + + tc.Runtime->SetScheduledLimit(50); + tc.Runtime->SetDispatchTimeout(TDuration::Seconds(1)); + tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); + TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; + ui64 ssId = 325; + BootFakeSchemeShard(*tc.Runtime, ssId, state); + + BalancerPrepare("topic", {{0, {1, 1}}, {1, {1, 2}} }, ssId, tc); + + TActorId pipe = RegisterReadSession("session", tc, {2}); + + WaitPartition("session", tc, 0, "", "", TActorId()); + WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions - return error + + tc.Runtime->Send(new IEventHandle(pipe, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session + + TActorId pipe2 = RegisterReadSession("session1", tc); + Y_UNUSED(pipe2); + + WaitPartition("session1", tc, 0, "", "", TActorId()); + WaitPartition("session1", tc, 0, "", "", TActorId()); + WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions - return error + + pipe = RegisterReadSession("session2", tc, {2}); + WaitSessionKill(tc); //session 1 will die +} + + Y_UNIT_TEST(TestUserInfoCompatibility) { TTestContext tc; RunTestWithReboots(tc.TabletIds, [&]() { @@ -146,132 +146,132 @@ Y_UNIT_TEST(TestUserInfoCompatibility) { tc.Prepare(dispatchName, setup, activeZone); activeZone = false; TString client = "test"; - tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); + tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{client, false}}, tc, 4, 6*1024*1024, true, 0, 0, 1); - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{client, false}}, tc, 4, 6*1024*1024, true, 0, 0, 1); - TVector<std::pair<ui64, TString>> data; data.push_back({1, "s"}); data.push_back({2, "q"}); CmdWrite(0, "sourceid", data, tc); CmdWrite(1, "sourceid", data, tc); CmdWrite(2, "sourceid", data, tc); - CmdWrite(3, "sourceid", data, tc); + CmdWrite(3, "sourceid", data, tc); + - THolder<TEvKeyValue::TEvRequest> request(new TEvKeyValue::TEvRequest); FillUserInfo(request->Record.AddCmdWrite(), client, 0, 0); FillDeprecatedUserInfo(request->Record.AddCmdWrite(), client, 0, 0); FillUserInfo(request->Record.AddCmdWrite(), client, 1, 1); FillDeprecatedUserInfo(request->Record.AddCmdWrite(), client, 2, 1); - FillUserInfo(request->Record.AddCmdWrite(), client, 2, 1); - FillDeprecatedUserInfo(request->Record.AddCmdWrite(), client, 3, 0); - FillUserInfo(request->Record.AddCmdWrite(), client, 3, 1); + FillUserInfo(request->Record.AddCmdWrite(), client, 2, 1); + FillDeprecatedUserInfo(request->Record.AddCmdWrite(), client, 3, 0); + FillUserInfo(request->Record.AddCmdWrite(), client, 3, 1); + - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); TAutoPtr<IEventHandle> handle; TEvKeyValue::TEvResponse* result = tc.Runtime->GrabEdgeEvent<TEvKeyValue::TEvResponse>(handle); Y_UNUSED(result); RestartTablet(tc); - Cerr << "AFTER RESTART\n"; - + Cerr << "AFTER RESTART\n"; + CmdGetOffset(0, client, 0, tc); CmdGetOffset(1, client, 1, tc); CmdGetOffset(2, client, 1, tc); - CmdGetOffset(3, client, 1, tc); - + CmdGetOffset(3, client, 1, tc); + + }); +} + +Y_UNIT_TEST(TestReadRuleVersions) { + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + TString client = "test"; + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{client, false}}, tc, 3); + + tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); + + TVector<std::pair<ui64, TString>> data; + data.push_back({1, "s"}); + data.push_back({2, "q"}); + CmdWrite(0, "sourceid", data, tc); + CmdWrite(1, "sourceid", data, tc); + CmdWrite(2, "sourceid", data, tc); + + CmdSetOffset(0, client, 1, false, tc); + CmdSetOffset(1, client, 2, false, tc); + + RestartTablet(tc); + + CmdGetOffset(0, client, 1, tc); + CmdGetOffset(1, client, 2, tc); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 3); + + CmdCreateSession(0, client, "session1", tc, 0, 0, 0, true); + CmdCreateSession(1, client, "session2", tc, 0, 0, 0, true); + + CmdGetOffset(0, client, 0, tc); + CmdGetOffset(1, client, 0, tc); }); } -Y_UNIT_TEST(TestReadRuleVersions) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - TString client = "test"; - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{client, false}}, tc, 3); - - tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); - - TVector<std::pair<ui64, TString>> data; - data.push_back({1, "s"}); - data.push_back({2, "q"}); - CmdWrite(0, "sourceid", data, tc); - CmdWrite(1, "sourceid", data, tc); - CmdWrite(2, "sourceid", data, tc); - - CmdSetOffset(0, client, 1, false, tc); - CmdSetOffset(1, client, 2, false, tc); - - RestartTablet(tc); - - CmdGetOffset(0, client, 1, tc); - CmdGetOffset(1, client, 2, tc); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 3); - - CmdCreateSession(0, client, "session1", tc, 0, 0, 0, true); - CmdCreateSession(1, client, "session2", tc, 0, 0, 0, true); - - CmdGetOffset(0, client, 0, tc); - CmdGetOffset(1, client, 0, tc); - }); -} - Y_UNIT_TEST(TestCreateBalancer) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(50); - tc.Runtime->SetDispatchTimeout(TDuration::MilliSeconds(100)); - - TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; - ui64 ssId = 325; - BootFakeSchemeShard(*tc.Runtime, ssId, state); - - BalancerPrepare("topic", {{1,{1,2}}}, ssId, tc); - + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(50); + tc.Runtime->SetDispatchTimeout(TDuration::MilliSeconds(100)); + + TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; + ui64 ssId = 325; + BootFakeSchemeShard(*tc.Runtime, ssId, state); + + BalancerPrepare("topic", {{1,{1,2}}}, ssId, tc); + TActorId pipe1 = RegisterReadSession("session0", tc, {1}); - - BalancerPrepare("topic", {{1,{1,2}}, {2,{1,3}}}, ssId, tc); - - tc.Runtime->Send(new IEventHandle(pipe1, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session - - -// BalancerPrepare("topic", {{2,1}}, tc); //TODO: not supported yet -// BalancerPrepare("topic", {{1,1}}, tc); // TODO: not supported yet - BalancerPrepare("topic", {{1,{1, 2}}, {2,{1, 3}}, {3,{1, 4}}}, ssId, tc); - activeZone = false; - + + BalancerPrepare("topic", {{1,{1,2}}, {2,{1,3}}}, ssId, tc); + + tc.Runtime->Send(new IEventHandle(pipe1, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session + + +// BalancerPrepare("topic", {{2,1}}, tc); //TODO: not supported yet +// BalancerPrepare("topic", {{1,1}}, tc); // TODO: not supported yet + BalancerPrepare("topic", {{1,{1, 2}}, {2,{1, 3}}, {3,{1, 4}}}, ssId, tc); + activeZone = false; + TActorId pipe = RegisterReadSession("session1", tc); WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("session1", tc, 0, "", "", TActorId()); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions - return error TActorId pipe2 = RegisterReadSession("session2", tc); - Y_UNUSED(pipe2); - WaitPartition("session2", tc, 1, "session1", "topic1", pipe); + Y_UNUSED(pipe2); + WaitPartition("session2", tc, 1, "session1", "topic1", pipe); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions to balance - tc.Runtime->Send(new IEventHandle(pipe, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session - - TDispatchOptions options; - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTabletPipe::EvServerDisconnected)); - tc.Runtime->DispatchEvents(options); + tc.Runtime->Send(new IEventHandle(pipe, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session + + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTabletPipe::EvServerDisconnected)); + tc.Runtime->DispatchEvents(options); WaitPartition("session2", tc, 0, "", "", TActorId()); WaitPartition("session2", tc, 0, "", "", TActorId()); WaitPartition("", tc, 0, "", "", TActorId(), false);//no partitions to balance - }); -} - + }); +} + Y_UNIT_TEST(TestDescribeBalancer) { TTestContext tc; RunTestWithReboots(tc.TabletIds, [&]() { @@ -280,28 +280,28 @@ Y_UNIT_TEST(TestDescribeBalancer) { TFinalizer finalizer(tc); tc.Prepare(dispatchName, setup, activeZone); activeZone = false; - TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; - ui64 ssId = 9876; - BootFakeSchemeShard(*tc.Runtime, ssId, state); - + TFakeSchemeShardState::TPtr state{new TFakeSchemeShardState()}; + ui64 ssId = 9876; + BootFakeSchemeShard(*tc.Runtime, ssId, state); + tc.Runtime->SetScheduledLimit(50); tc.Runtime->SetDispatchTimeout(TDuration::MilliSeconds(100)); - BalancerPrepare("topic", {{1,{1, 2}}}, ssId, tc); + BalancerPrepare("topic", {{1,{1, 2}}}, ssId, tc); TAutoPtr<IEventHandle> handle; tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, new TEvPersQueue::TEvDescribe(), 0, GetPipeConfigWithRetries()); TEvPersQueue::TEvDescribeResponse* result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvDescribeResponse>(handle); UNIT_ASSERT(result); auto& rec = result->Record; - UNIT_ASSERT(rec.HasSchemeShardId() && rec.GetSchemeShardId() == ssId); + UNIT_ASSERT(rec.HasSchemeShardId() && rec.GetSchemeShardId() == ssId); RestartTablet(tc); tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, new TEvPersQueue::TEvDescribe(), 0, GetPipeConfigWithRetries()); result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvDescribeResponse>(handle); UNIT_ASSERT(result); auto& rec2 = result->Record; - UNIT_ASSERT(rec2.HasSchemeShardId() && rec2.GetSchemeShardId() == ssId); + UNIT_ASSERT(rec2.HasSchemeShardId() && rec2.GetSchemeShardId() == ssId); }); } - + Y_UNIT_TEST(TestCheckACL) { TTestContext tc; RunTestWithReboots(tc.TabletIds, [&]() { @@ -316,18 +316,18 @@ Y_UNIT_TEST(TestCheckACL) { IActor* ticketParser = NKikimr::CreateTicketParser(tc.Runtime->GetAppData().AuthConfig); TActorId ticketParserId = tc.Runtime->Register(ticketParser); tc.Runtime->RegisterService(NKikimr::MakeTicketParserID(), ticketParserId); - - TAutoPtr<IEventHandle> handle; - THolder<TEvPersQueue::TEvCheckACL> request(new TEvPersQueue::TEvCheckACL()); - request->Record.SetToken(NACLib::TUserToken("client@" BUILTIN_ACL_DOMAIN, {}).SerializeAsString()); - request->Record.SetOperation(NKikimrPQ::EOperation::READ_OP); - request->Record.SetUser("client"); - - tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - + + TAutoPtr<IEventHandle> handle; + THolder<TEvPersQueue::TEvCheckACL> request(new TEvPersQueue::TEvCheckACL()); + request->Record.SetToken(NACLib::TUserToken("client@" BUILTIN_ACL_DOMAIN, {}).SerializeAsString()); + request->Record.SetOperation(NKikimrPQ::EOperation::READ_OP); + request->Record.SetUser("client"); + + tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + tc.Runtime->SetScheduledLimit(600); tc.Runtime->SetDispatchTimeout(TDuration::MilliSeconds(100)); - BalancerPrepare("topic", {{1,{1, 2}}}, ssId, tc); + BalancerPrepare("topic", {{1,{1, 2}}}, ssId, tc); { TDispatchOptions options; @@ -349,14 +349,14 @@ Y_UNIT_TEST(TestCheckACL) { } request.Reset(new TEvPersQueue::TEvCheckACL()); - request->Record.SetToken(NACLib::TUserToken("client@" BUILTIN_ACL_DOMAIN, {}).SerializeAsString()); - request->Record.SetUser("client"); + request->Record.SetToken(NACLib::TUserToken("client@" BUILTIN_ACL_DOMAIN, {}).SerializeAsString()); + request->Record.SetUser("client"); request->Record.SetOperation(NKikimrPQ::EOperation::READ_OP); tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvCheckACLResponse>(handle); auto& rec2 = result->Record; - UNIT_ASSERT_C(rec2.GetAccess() == NKikimrPQ::EAccess::ALLOWED, rec2); + UNIT_ASSERT_C(rec2.GetAccess() == NKikimrPQ::EAccess::ALLOWED, rec2); state->ACL.AddAccess(NACLib::EAccessType::Allow, NACLib::UpdateRow, "client@" BUILTIN_ACL_DOMAIN); @@ -367,8 +367,8 @@ Y_UNIT_TEST(TestCheckACL) { } request.Reset(new TEvPersQueue::TEvCheckACL()); - request->Record.SetToken(NACLib::TUserToken("client@" BUILTIN_ACL_DOMAIN, {}).SerializeAsString()); - request->Record.SetUser("client"); + request->Record.SetToken(NACLib::TUserToken("client@" BUILTIN_ACL_DOMAIN, {}).SerializeAsString()); + request->Record.SetUser("client"); request->Record.SetOperation(NKikimrPQ::EOperation::WRITE_OP); tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); @@ -377,16 +377,16 @@ Y_UNIT_TEST(TestCheckACL) { UNIT_ASSERT(rec3.GetAccess() == NKikimrPQ::EAccess::ALLOWED); request.Reset(new TEvPersQueue::TEvCheckACL()); - request->Record.SetToken(NACLib::TUserToken("client@" BUILTIN_ACL_DOMAIN, {}).SerializeAsString()); - request->Record.SetUser("client2"); - request->Record.SetOperation(NKikimrPQ::EOperation::WRITE_OP); - - tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvCheckACLResponse>(handle); - auto& rec9 = result->Record; - UNIT_ASSERT(rec9.GetAccess() == NKikimrPQ::EAccess::ALLOWED); - - request.Reset(new TEvPersQueue::TEvCheckACL()); + request->Record.SetToken(NACLib::TUserToken("client@" BUILTIN_ACL_DOMAIN, {}).SerializeAsString()); + request->Record.SetUser("client2"); + request->Record.SetOperation(NKikimrPQ::EOperation::WRITE_OP); + + tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvCheckACLResponse>(handle); + auto& rec9 = result->Record; + UNIT_ASSERT(rec9.GetAccess() == NKikimrPQ::EAccess::ALLOWED); + + request.Reset(new TEvPersQueue::TEvCheckACL()); // No auth provided and auth for topic not required request->Record.SetOperation(NKikimrPQ::EOperation::WRITE_OP); @@ -407,9 +407,9 @@ Y_UNIT_TEST(TestCheckACL) { request.Reset(new TEvPersQueue::TEvCheckACL()); // No auth provided and auth for topic is required request->Record.SetOperation(NKikimrPQ::EOperation::READ_OP); - request->Record.SetToken(""); + request->Record.SetToken(""); - BalancerPrepare("topic", {{1,{1, 2}}}, ssId, tc, true); + BalancerPrepare("topic", {{1,{1, 2}}}, ssId, tc, true); tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvCheckACLResponse>(handle); auto& rec7 = result->Record; @@ -418,7 +418,7 @@ Y_UNIT_TEST(TestCheckACL) { request.Reset(new TEvPersQueue::TEvCheckACL()); // No auth provided and auth for topic is required request->Record.SetOperation(NKikimrPQ::EOperation::READ_OP); - request->Record.SetToken(""); + request->Record.SetToken(""); tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvCheckACLResponse>(handle); @@ -428,557 +428,557 @@ Y_UNIT_TEST(TestCheckACL) { } -void CheckLabeledCountersResponse(ui32 count, TTestContext& tc, TVector<TString> mustHave = {}) -{ - IActor* actor = CreateClusterLabeledCountersAggregatorActor(tc.Edge, TTabletTypes::PERSQUEUE); - tc.Runtime->Register(actor); - - TAutoPtr<IEventHandle> handle; - TEvTabletCounters::TEvTabletLabeledCountersResponse *result; - result = tc.Runtime->GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(handle); - UNIT_ASSERT(result); - THashSet<TString> groups; - - Cerr << "Checking with " << count << " groups:\n"; - - for (ui32 i = 0; i < result->Record.LabeledCountersByGroupSize(); ++i) { - auto& c = result->Record.GetLabeledCountersByGroup(i); - groups.insert(c.GetGroup()); - Cerr << "Has " << c.GetGroup() << "\n"; - } - UNIT_ASSERT(groups.size() == count); - for (auto& g : mustHave) { +void CheckLabeledCountersResponse(ui32 count, TTestContext& tc, TVector<TString> mustHave = {}) +{ + IActor* actor = CreateClusterLabeledCountersAggregatorActor(tc.Edge, TTabletTypes::PERSQUEUE); + tc.Runtime->Register(actor); + + TAutoPtr<IEventHandle> handle; + TEvTabletCounters::TEvTabletLabeledCountersResponse *result; + result = tc.Runtime->GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(handle); + UNIT_ASSERT(result); + THashSet<TString> groups; + + Cerr << "Checking with " << count << " groups:\n"; + + for (ui32 i = 0; i < result->Record.LabeledCountersByGroupSize(); ++i) { + auto& c = result->Record.GetLabeledCountersByGroup(i); + groups.insert(c.GetGroup()); + Cerr << "Has " << c.GetGroup() << "\n"; + } + UNIT_ASSERT(groups.size() == count); + for (auto& g : mustHave) { UNIT_ASSERT(groups.contains(g)); - } -} - + } +} + Y_UNIT_TEST(TestSwitchOffImportantFlag) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; tc.Runtime->SetScheduledLimit(600); - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - CheckLabeledCountersResponse(8, tc); //only topic counters - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}}, tc); - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - CheckLabeledCountersResponse(8, tc, {"user/1/topic"}); //topic counters + important - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - CheckLabeledCountersResponse(8, tc, {"user/0/topic"}); //topic counters + not important - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}, {"user2", true}}, tc); - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - CheckLabeledCountersResponse(11, tc, {"user/1/topic", "user2/1/topic"}); //topic counters + not important - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}, {"user2", false}}, tc); - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - CheckLabeledCountersResponse(12, tc, {"user/1/topic", "user2/0/topic"}); - - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}}, tc); - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); - tc.Runtime->DispatchEvents(options); - } - - CheckLabeledCountersResponse(8, tc, {"user/1/topic"}); - - - }); -} - - + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + CheckLabeledCountersResponse(8, tc); //only topic counters + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}}, tc); + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + CheckLabeledCountersResponse(8, tc, {"user/1/topic"}); //topic counters + important + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + CheckLabeledCountersResponse(8, tc, {"user/0/topic"}); //topic counters + not important + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}, {"user2", true}}, tc); + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + CheckLabeledCountersResponse(11, tc, {"user/1/topic", "user2/1/topic"}); //topic counters + not important + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}, {"user2", false}}, tc); + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + CheckLabeledCountersResponse(12, tc, {"user/1/topic", "user2/0/topic"}); + + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}}, tc); + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); + tc.Runtime->DispatchEvents(options); + } + + CheckLabeledCountersResponse(8, tc, {"user/1/topic"}); + + + }); +} + + Y_UNIT_TEST(TestSeveralOwners) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + TVector<std::pair<ui64, TString>> data; - - TString s{32, 'c'}; - ui32 pp = 4 + 8 + 1 + 9; - data.push_back({1, s.substr(pp)}); - data.push_back({2, s.substr(pp)}); - TString cookie1 = CmdSetOwner(0, tc, "owner1").first; - TString cookie2 = CmdSetOwner(0, tc, "owner2").first; - CmdWrite(0, "sourceid0", data, tc, false, {}, true, cookie1, 0, -1, true); - - CmdWrite(0, "sourceid1", data, tc, false, {}, false, cookie2, 0, -1, true); - CmdWrite(0, "sourceid2", data, tc, false, {}, false, cookie1, 1, -1, true); - - TString cookie3 = CmdSetOwner(0, tc, "owner1").first; - - CmdWrite(0, "sourceid3", data, tc , true, {}, false, cookie1, 2, -1, true); - }); -} - - + + TString s{32, 'c'}; + ui32 pp = 4 + 8 + 1 + 9; + data.push_back({1, s.substr(pp)}); + data.push_back({2, s.substr(pp)}); + TString cookie1 = CmdSetOwner(0, tc, "owner1").first; + TString cookie2 = CmdSetOwner(0, tc, "owner2").first; + CmdWrite(0, "sourceid0", data, tc, false, {}, true, cookie1, 0, -1, true); + + CmdWrite(0, "sourceid1", data, tc, false, {}, false, cookie2, 0, -1, true); + CmdWrite(0, "sourceid2", data, tc, false, {}, false, cookie1, 1, -1, true); + + TString cookie3 = CmdSetOwner(0, tc, "owner1").first; + + CmdWrite(0, "sourceid3", data, tc , true, {}, false, cookie1, 2, -1, true); + }); +} + + Y_UNIT_TEST(TestWaitInOwners) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + TVector<std::pair<ui64, TString>> data; - - TString s{32, 'c'}; - ui32 pp = 4 + 8 + 1 + 9; - data.push_back({1, s.substr(pp)}); - data.push_back({2, s.substr(pp)}); - - CmdSetOwner(0, tc, "owner", false); - CmdSetOwner(0, tc, "owner", true); //will break last owner - + + TString s{32, 'c'}; + ui32 pp = 4 + 8 + 1 + 9; + data.push_back({1, s.substr(pp)}); + data.push_back({2, s.substr(pp)}); + + CmdSetOwner(0, tc, "owner", false); + CmdSetOwner(0, tc, "owner", true); //will break last owner + TActorId newPipe = SetOwner(0, tc, "owner", false); //this owner will wait - - auto p = CmdSetOwner(0, tc, "owner", true); //will break last owner - - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - try { - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - } catch (NActors::TSchedulingLimitReachedException) { - result = nullptr; - } - - Y_VERIFY(!result); //no answer yet - - CmdSetOwner(0, tc); - CmdSetOwner(0, tc, "owner2"); //just to be dropped by next command - - WritePartData(0, "sourceid", 12, 1, 1, 5, 20, "value", tc, p.first, 0); - - result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ - if (ev.Record.HasPartitionResponse() && ev.Record.GetPartitionResponse().CmdWriteResultSize() > 0 || ev.Record.GetErrorCode() != NPersQueue::NErrorCode::OK) - return true; - return false; - }); //there could be outgoing reads in TestReadSubscription test - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); - - try { - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - } catch (NActors::TSchedulingLimitReachedException) { - result = nullptr; - } - - UNIT_ASSERT(result); //ok for newPipe because old owner is dead now - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - UNIT_ASSERT(result->Record.HasPartitionResponse()); - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetOwnershipResult()); - - SetOwner(0, tc, "owner", false); //will wait - - try { - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - } catch (NActors::TSchedulingLimitReachedException) { - result = nullptr; - } - - Y_VERIFY(!result); //no answer yet, waiting of dying of old ownership session - - tc.Runtime->Send(new IEventHandle(newPipe, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and old session - - TDispatchOptions options; - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTabletPipe::EvServerDisconnected)); - tc.Runtime->DispatchEvents(options); - - try { - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - } catch (NActors::TSchedulingLimitReachedException) { - result = nullptr; - } - - UNIT_ASSERT(result); //now ok - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - UNIT_ASSERT(result->Record.HasPartitionResponse()); - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetOwnershipResult()); - }); -} - - - - + + auto p = CmdSetOwner(0, tc, "owner", true); //will break last owner + + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + try { + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + } catch (NActors::TSchedulingLimitReachedException) { + result = nullptr; + } + + Y_VERIFY(!result); //no answer yet + + CmdSetOwner(0, tc); + CmdSetOwner(0, tc, "owner2"); //just to be dropped by next command + + WritePartData(0, "sourceid", 12, 1, 1, 5, 20, "value", tc, p.first, 0); + + result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ + if (ev.Record.HasPartitionResponse() && ev.Record.GetPartitionResponse().CmdWriteResultSize() > 0 || ev.Record.GetErrorCode() != NPersQueue::NErrorCode::OK) + return true; + return false; + }); //there could be outgoing reads in TestReadSubscription test + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); + + try { + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + } catch (NActors::TSchedulingLimitReachedException) { + result = nullptr; + } + + UNIT_ASSERT(result); //ok for newPipe because old owner is dead now + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT(result->Record.HasPartitionResponse()); + UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetOwnershipResult()); + + SetOwner(0, tc, "owner", false); //will wait + + try { + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + } catch (NActors::TSchedulingLimitReachedException) { + result = nullptr; + } + + Y_VERIFY(!result); //no answer yet, waiting of dying of old ownership session + + tc.Runtime->Send(new IEventHandle(newPipe, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and old session + + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTabletPipe::EvServerDisconnected)); + tc.Runtime->DispatchEvents(options); + + try { + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + } catch (NActors::TSchedulingLimitReachedException) { + result = nullptr; + } + + UNIT_ASSERT(result); //now ok + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT(result->Record.HasPartitionResponse()); + UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetOwnershipResult()); + }); +} + + + + Y_UNIT_TEST(TestReserveBytes) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + TVector<std::pair<ui64, TString>> data; - - TString s{32, 'c'}; - ui32 pp = 4 + 8 + 1 + 9; - data.push_back({1, s.substr(pp)}); - data.push_back({2, s.substr(pp)}); - auto p = CmdSetOwner(0, tc); - - CmdReserveBytes(0, tc, p.first, 0, 20000000, p.second); - CmdReserveBytes(0, tc, p.first, 1, 20000000, p.second, false, true); - - CmdReserveBytes(0, tc, p.first, 2, 40000000, p.second); - - CmdReserveBytes(0, tc, p.first, 3, 80000000, p.second, true); - - TString cookie = p.first; - - CmdWrite(0, "sourceid0", data, tc, false, {}, true, cookie, 4); - - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - try { - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - } catch (NActors::TSchedulingLimitReachedException) { - result = nullptr; - } - - UNIT_ASSERT(!result);//no answer yet 40 + 80 > 90 - - CmdWrite(0, "sourceid2", data, tc, false, {}, false, cookie, 5); - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); //now no inflight - 80 may fit - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - - CmdWrite(0, "sourceid3", data, tc, false, {}, false, cookie, 6); - - CmdReserveBytes(0, tc, p.first, 7, 80000000, p.second); - p = CmdSetOwner(0, tc); - CmdReserveBytes(0, tc, p.first, 0, 80000000, p.second); - - }); -} - - - - + + TString s{32, 'c'}; + ui32 pp = 4 + 8 + 1 + 9; + data.push_back({1, s.substr(pp)}); + data.push_back({2, s.substr(pp)}); + auto p = CmdSetOwner(0, tc); + + CmdReserveBytes(0, tc, p.first, 0, 20000000, p.second); + CmdReserveBytes(0, tc, p.first, 1, 20000000, p.second, false, true); + + CmdReserveBytes(0, tc, p.first, 2, 40000000, p.second); + + CmdReserveBytes(0, tc, p.first, 3, 80000000, p.second, true); + + TString cookie = p.first; + + CmdWrite(0, "sourceid0", data, tc, false, {}, true, cookie, 4); + + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + try { + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + } catch (NActors::TSchedulingLimitReachedException) { + result = nullptr; + } + + UNIT_ASSERT(!result);//no answer yet 40 + 80 > 90 + + CmdWrite(0, "sourceid2", data, tc, false, {}, false, cookie, 5); + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); //now no inflight - 80 may fit + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + + CmdWrite(0, "sourceid3", data, tc, false, {}, false, cookie, 6); + + CmdReserveBytes(0, tc, p.first, 7, 80000000, p.second); + p = CmdSetOwner(0, tc); + CmdReserveBytes(0, tc, p.first, 0, 80000000, p.second); + + }); +} + + + + Y_UNIT_TEST(TestMessageNo) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + TVector<std::pair<ui64, TString>> data; - + TString s{32, 'c'}; - ui32 pp = 4 + 8 + 1 + 9; - data.push_back({1, s.substr(pp)}); - data.push_back({2, s.substr(pp)}); - TString cookie = CmdSetOwner(0, tc).first; - CmdWrite(0, "sourceid0", data, tc, false, {}, true, cookie, 0); - - CmdWrite(0, "sourceid2", data, tc, false, {}, false, cookie, 1); - - WriteData(0, "sourceid1", data, tc, cookie, 2, -1); - - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ - if (!ev.Record.HasPartitionResponse() || !ev.Record.GetPartitionResponse().HasCmdReadResult()) - return true; - return false; - }); //there could be outgoing reads in TestReadSubscription test - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - - UNIT_ASSERT(result->Record.GetPartitionResponse().CmdWriteResultSize() == data.size()); - for (ui32 i = 0; i < data.size(); ++i) { - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasAlreadyWritten()); - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasOffset()); - } - for (ui32 i = 0; i < data.size(); ++i) { - auto res = result->Record.GetPartitionResponse().GetCmdWriteResult(i); - UNIT_ASSERT(!result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetAlreadyWritten()); - } - - CmdWrite(0, "sourceid3", data, tc , true, {}, false, cookie, 0); - }); -} - - + ui32 pp = 4 + 8 + 1 + 9; + data.push_back({1, s.substr(pp)}); + data.push_back({2, s.substr(pp)}); + TString cookie = CmdSetOwner(0, tc).first; + CmdWrite(0, "sourceid0", data, tc, false, {}, true, cookie, 0); + + CmdWrite(0, "sourceid2", data, tc, false, {}, false, cookie, 1); + + WriteData(0, "sourceid1", data, tc, cookie, 2, -1); + + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ + if (!ev.Record.HasPartitionResponse() || !ev.Record.GetPartitionResponse().HasCmdReadResult()) + return true; + return false; + }); //there could be outgoing reads in TestReadSubscription test + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + + UNIT_ASSERT(result->Record.GetPartitionResponse().CmdWriteResultSize() == data.size()); + for (ui32 i = 0; i < data.size(); ++i) { + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasAlreadyWritten()); + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasOffset()); + } + for (ui32 i = 0; i < data.size(); ++i) { + auto res = result->Record.GetPartitionResponse().GetCmdWriteResult(i); + UNIT_ASSERT(!result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetAlreadyWritten()); + } + + CmdWrite(0, "sourceid3", data, tc , true, {}, false, cookie, 0); + }); +} + + Y_UNIT_TEST(TestPartitionedBlobFails) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 200 * 1024 * 1024, 0, {{"user1", true}}, tc); //one important client, never delete - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 200 * 1024 * 1024, 0, {{"user1", true}}, tc); //one important client, never delete + TString ss{50*1024*1024, '_'}; - char k = 0; + char k = 0; TString s = ""; - s += k; - s += ss; - s += char((1) % 256); - ++k; - + s += k; + s += ss; + s += char((1) % 256); + ++k; + TVector<std::pair<ui64, TString>> data; - data.push_back({1, s}); - + data.push_back({1, s}); + TVector<TString> parts; - ui32 size = 400*1024; - ui32 diff = 50; - for (ui32 pos = 0; pos < s.size();) { - parts.push_back(s.substr(pos, size - diff)); - pos += size - diff; - } - Y_VERIFY(parts.size() > 5); - - CmdWrite(0, "sourceid4", data, tc); - { - TString cookie = CmdSetOwner(0, tc).first; - - WritePartDataWithBigMsg(0, "sourceid0", 1, 1, 5, s.size(), parts[1], tc, cookie, 0, 12*1024*1024); - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); - } - - PQGetPartInfo(0, 1, tc); - CmdWrite(0, "sourceid5", data, tc); - RestartTablet(tc); - PQGetPartInfo(0, 2, tc); - - ui32 toWrite = 5; - for (ui32 i = 0; i < 2; ++i) { - TString cookie = CmdSetOwner(0, tc).first; - - for (ui32 j = 0; j < toWrite + 1; ++j) { - ui32 k = j; - if (j == toWrite) - k = parts.size() - 1; - WritePartData(0, "sourceid1", -1, j == toWrite ? 2 : 1, k, parts.size(), s.size(), parts[k], tc, cookie, j); - - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - - UNIT_ASSERT(result->Record.HasStatus()); - if ( j == toWrite) { - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); - } else { - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - - UNIT_ASSERT(result->Record.GetPartitionResponse().CmdWriteResultSize() == 1); - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(0).HasAlreadyWritten()); - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(0).HasOffset()); - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(0).GetOffset() == 2); - - auto res = result->Record.GetPartitionResponse().GetCmdWriteResult(0); - UNIT_ASSERT(!result->Record.GetPartitionResponse().GetCmdWriteResult(0).GetAlreadyWritten()); - } - } - PQGetPartInfo(0, i + 2, tc); - toWrite = parts.size(); - } - data.back().second.resize(64*1024); - CmdWrite(0, "sourceid3", data, tc); - CmdWrite(0, "sourceid5", data, tc); - activeZone = true; - data.back().second.resize(8*1024*1024); - CmdWrite(0, "sourceid7", data, tc); - activeZone = false; - { - TString cookie = CmdSetOwner(0, tc).first; - WritePartData(0, "sourceidX", 10, 1, 0, 5, s.size(), parts[1], tc, cookie, 0); - - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - - //check that after CmdSetOwner all partial data cleared - cookie = CmdSetOwner(0, tc).first; - WritePartData(0, "sourceidX", 12, 1, 0, 5, s.size(), parts[1], tc, cookie, 0); - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - - //check gaps - WritePartData(0, "sourceidX", 15, 1, 1, 5, s.size(), parts[1], tc, cookie, 1); - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); - - //check partNo gaps - cookie = CmdSetOwner(0, tc).first; - WritePartData(0, "sourceidX", 12, 1, 0, 5, s.size(), parts[1], tc, cookie, 0); - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - - //check gaps - WritePartData(0, "sourceidX", 12, 1, 4, 5, s.size(), parts[1], tc, cookie, 1); - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); - - //check very big msg - cookie = CmdSetOwner(0, tc).first; + ui32 size = 400*1024; + ui32 diff = 50; + for (ui32 pos = 0; pos < s.size();) { + parts.push_back(s.substr(pos, size - diff)); + pos += size - diff; + } + Y_VERIFY(parts.size() > 5); + + CmdWrite(0, "sourceid4", data, tc); + { + TString cookie = CmdSetOwner(0, tc).first; + + WritePartDataWithBigMsg(0, "sourceid0", 1, 1, 5, s.size(), parts[1], tc, cookie, 0, 12*1024*1024); + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); + } + + PQGetPartInfo(0, 1, tc); + CmdWrite(0, "sourceid5", data, tc); + RestartTablet(tc); + PQGetPartInfo(0, 2, tc); + + ui32 toWrite = 5; + for (ui32 i = 0; i < 2; ++i) { + TString cookie = CmdSetOwner(0, tc).first; + + for (ui32 j = 0; j < toWrite + 1; ++j) { + ui32 k = j; + if (j == toWrite) + k = parts.size() - 1; + WritePartData(0, "sourceid1", -1, j == toWrite ? 2 : 1, k, parts.size(), s.size(), parts[k], tc, cookie, j); + + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + + UNIT_ASSERT(result->Record.HasStatus()); + if ( j == toWrite) { + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); + } else { + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + + UNIT_ASSERT(result->Record.GetPartitionResponse().CmdWriteResultSize() == 1); + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(0).HasAlreadyWritten()); + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(0).HasOffset()); + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(0).GetOffset() == 2); + + auto res = result->Record.GetPartitionResponse().GetCmdWriteResult(0); + UNIT_ASSERT(!result->Record.GetPartitionResponse().GetCmdWriteResult(0).GetAlreadyWritten()); + } + } + PQGetPartInfo(0, i + 2, tc); + toWrite = parts.size(); + } + data.back().second.resize(64*1024); + CmdWrite(0, "sourceid3", data, tc); + CmdWrite(0, "sourceid5", data, tc); + activeZone = true; + data.back().second.resize(8*1024*1024); + CmdWrite(0, "sourceid7", data, tc); + activeZone = false; + { + TString cookie = CmdSetOwner(0, tc).first; + WritePartData(0, "sourceidX", 10, 1, 0, 5, s.size(), parts[1], tc, cookie, 0); + + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + + //check that after CmdSetOwner all partial data cleared + cookie = CmdSetOwner(0, tc).first; + WritePartData(0, "sourceidX", 12, 1, 0, 5, s.size(), parts[1], tc, cookie, 0); + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + + //check gaps + WritePartData(0, "sourceidX", 15, 1, 1, 5, s.size(), parts[1], tc, cookie, 1); + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); + + //check partNo gaps + cookie = CmdSetOwner(0, tc).first; + WritePartData(0, "sourceidX", 12, 1, 0, 5, s.size(), parts[1], tc, cookie, 0); + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + + //check gaps + WritePartData(0, "sourceidX", 12, 1, 4, 5, s.size(), parts[1], tc, cookie, 1); + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); + + //check very big msg + cookie = CmdSetOwner(0, tc).first; WritePartData(0, "sourceidY", 13, 1, 0, 5, s.size(), TString{10*1024*1024, 'a'}, tc, cookie, 0); - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); - } - RestartTablet(tc); - }); -} - + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::BAD_REQUEST); + } + RestartTablet(tc); + }); +} + Y_UNIT_TEST(TestAlreadyWritten) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - activeZone = true; + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + activeZone = true; TVector<std::pair<ui64, TString>> data; - + TString s{32, 'c'}; - ui32 pp = 4 + 8 + 1 + 9; - data.push_back({2, s.substr(pp)}); - data.push_back({1, s.substr(pp)}); - CmdWrite(0, "sourceid0", data, tc, false, {1}); //0 is written, 1 is already written - data[0].first = 4; - data[1].first = 3; - CmdWrite(0, "sourceid0", data, tc, false, {3}); //0 is written, 1 is already written - CmdWrite(0, "sourceid0", data, tc, false, {3, 4}); //all is already written - }); -} - - + ui32 pp = 4 + 8 + 1 + 9; + data.push_back({2, s.substr(pp)}); + data.push_back({1, s.substr(pp)}); + CmdWrite(0, "sourceid0", data, tc, false, {1}); //0 is written, 1 is already written + data[0].first = 4; + data[1].first = 3; + CmdWrite(0, "sourceid0", data, tc, false, {3}); //0 is written, 1 is already written + CmdWrite(0, "sourceid0", data, tc, false, {3, 4}); //all is already written + }); +} + + Y_UNIT_TEST(TestAlreadyWrittenWithoutDeduplication) { TTestContext tc; RunTestWithReboots(tc.TabletIds, [&]() { @@ -989,7 +989,7 @@ Y_UNIT_TEST(TestAlreadyWrittenWithoutDeduplication) { activeZone = false; tc.Runtime->SetScheduledLimit(200); - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob TVector<std::pair<ui64, TString>> data; activeZone = true; @@ -1005,243 +1005,243 @@ Y_UNIT_TEST(TestAlreadyWrittenWithoutDeduplication) { Y_UNIT_TEST(TestWritePQCompact) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 8 * 1024 * 1024 - 512 * 1024); - //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 8 * 1024 * 1024 - 512 * 1024); + //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + TVector<std::pair<ui64, TString>> data; - + TString ss{1024*1024 - 100, '_'}; TString s1{131072, 'a'}; TString s2{2048, 'b'}; TString s3{32, 'c'}; - ui32 pp = 4 + 8 + 2 + 9; - for (ui32 i = 0; i < 8; ++i) { - data.push_back({i + 1, ss.substr(pp)}); - } - CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob - PQGetPartInfo(0, 8, tc); - data.clear(); - for (ui32 i = 0; i + s1.size() < 7*1024*1024 + 4 * s1.size(); i += s1.size()) { - data.push_back({i + 1, s1.substr(pp)}); - } - CmdWrite(0, "sourceid1", data, tc); - PQGetPartInfo(0, 63 + 4, tc); - data.clear(); - for (ui32 i = 0; i + s2.size() < s1.size(); i += s2.size()) { - data.push_back({i + 1, s2.substr(pp)}); - } - CmdWrite(0, "sourceid2", data, tc); - PQGetPartInfo(8, 2 * 63 + 4, tc); //first is partial, not counted - data.clear(); - for (ui32 i = 0; i + s3.size() + 540 < s2.size(); i += s3.size()) { - data.push_back({i + 1, s3.substr(pp)}); - } - CmdWrite(0, "sourceid3", data, tc); //now 1 blob and at most one - - PQGetPartInfo(8, 177, tc); - data.resize(1); - CmdWrite(0, "sourceid4", data, tc); //now 2 blobs, but delete will be done on next write - // PQGetUserInfo("aaa", 0, 8 + 88 * 3 + 1, -1, tc); dont check here, at may be deleted already(on restart OnWakeUp will occure) - activeZone = true; - CmdWrite(0, "sourceid5", data, tc); //next message just to force drop, don't wait for WakeUp - activeZone = false; - - PQGetPartInfo(8, 179, tc); - - }); -} - - + ui32 pp = 4 + 8 + 2 + 9; + for (ui32 i = 0; i < 8; ++i) { + data.push_back({i + 1, ss.substr(pp)}); + } + CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob + PQGetPartInfo(0, 8, tc); + data.clear(); + for (ui32 i = 0; i + s1.size() < 7*1024*1024 + 4 * s1.size(); i += s1.size()) { + data.push_back({i + 1, s1.substr(pp)}); + } + CmdWrite(0, "sourceid1", data, tc); + PQGetPartInfo(0, 63 + 4, tc); + data.clear(); + for (ui32 i = 0; i + s2.size() < s1.size(); i += s2.size()) { + data.push_back({i + 1, s2.substr(pp)}); + } + CmdWrite(0, "sourceid2", data, tc); + PQGetPartInfo(8, 2 * 63 + 4, tc); //first is partial, not counted + data.clear(); + for (ui32 i = 0; i + s3.size() + 540 < s2.size(); i += s3.size()) { + data.push_back({i + 1, s3.substr(pp)}); + } + CmdWrite(0, "sourceid3", data, tc); //now 1 blob and at most one + + PQGetPartInfo(8, 177, tc); + data.resize(1); + CmdWrite(0, "sourceid4", data, tc); //now 2 blobs, but delete will be done on next write + // PQGetUserInfo("aaa", 0, 8 + 88 * 3 + 1, -1, tc); dont check here, at may be deleted already(on restart OnWakeUp will occure) + activeZone = true; + CmdWrite(0, "sourceid5", data, tc); //next message just to force drop, don't wait for WakeUp + activeZone = false; + + PQGetPartInfo(8, 179, tc); + + }); +} + + Y_UNIT_TEST(TestWritePQBigMessage) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 1000 * 1024 * 1024, 0, {{"user1", true}}, tc, 2, 8 * 1024 * 1024 - 512 * 1024); //nothing dropped - //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 1000 * 1024 * 1024, 0, {{"user1", true}}, tc, 2, 8 * 1024 * 1024 - 512 * 1024); //nothing dropped + //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + TVector<std::pair<ui64, TString>> data; - + TString ss{50*1024*1024 - 100 - 2, '_'}; TString s1{400*1024 - 2, 'a'}; - ui32 pp = 4 + 8 + 2 + 9; - char k = 0; + ui32 pp = 4 + 8 + 2 + 9; + char k = 0; TString s = ""; - s += k; - s += ss.substr(pp); - s += char((1) % 256); - ++k; - data.push_back({1, s}); - - for (ui32 i = 0; i < 25;++i) { + s += k; + s += ss.substr(pp); + s += char((1) % 256); + ++k; + data.push_back({1, s}); + + for (ui32 i = 0; i < 25;++i) { TString s = ""; - s += k; - s += s1.substr(pp); - s += char((i + 2) % 256); - ++k; - data.push_back({i + 2, s}); - } - s = ""; - s += k; - s += ss.substr(pp); - s += char((1000) % 256); - ++k; - data.push_back({1000, s}); - CmdWrite(0, "sourceid0", data, tc, false, {}, true); - PQGetPartInfo(0, 27, tc); - - CmdRead(0, 0, Max<i32>(), Max<i32>(), 1, false, tc); - CmdRead(0, 1, Max<i32>(), Max<i32>(), 25, false, tc); - CmdRead(0, 24, Max<i32>(), Max<i32>(), 2, false, tc); - CmdRead(0, 26, Max<i32>(), Max<i32>(), 1, false, tc); - - activeZone = false; - }); -} - - + s += k; + s += s1.substr(pp); + s += char((i + 2) % 256); + ++k; + data.push_back({i + 2, s}); + } + s = ""; + s += k; + s += ss.substr(pp); + s += char((1000) % 256); + ++k; + data.push_back({1000, s}); + CmdWrite(0, "sourceid0", data, tc, false, {}, true); + PQGetPartInfo(0, 27, tc); + + CmdRead(0, 0, Max<i32>(), Max<i32>(), 1, false, tc); + CmdRead(0, 1, Max<i32>(), Max<i32>(), 25, false, tc); + CmdRead(0, 24, Max<i32>(), Max<i32>(), 2, false, tc); + CmdRead(0, 26, Max<i32>(), Max<i32>(), 1, false, tc); + + activeZone = false; + }); +} + + Y_UNIT_TEST(TestWritePQ) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - tc.Runtime->SetScheduledLimit(100); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}}, tc); //important client, lifetimeseconds=0 - never delete - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(100); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user", true}}, tc); //important client, lifetimeseconds=0 - never delete + TVector<std::pair<ui64, TString>> data, data1, data2; activeZone = PlainOrSoSlow(true, false); - + TString ss{1024*1024, '_'}; TString s1{131072, 'a'}; TString s2{2048, 'b'}; TString s3{32, 'c'}; - ui32 pp = 4 + 8 + 2 + 9; - + ui32 pp = 4 + 8 + 2 + 9; + TString sb{15*1024*1024 + 512*1024, '_'}; - data.push_back({1, sb.substr(pp)}); - CmdWrite(0,"sourceid0", data, tc, false, {}, true, "", -1, 100); - activeZone = false; - - PQGetPartInfo(100, 101, tc); - - data1.push_back({1, s3.substr(pp)}); - data1.push_back({2, sb.substr(pp)}); - data2.push_back({1, s2.substr(pp)}); - data2.push_back({2, sb.substr(pp)}); - CmdWrite(0,"sourceid1", data1, tc); - CmdWrite(0,"sourceid2", data2, tc); - - CmdWrite(0,"sourceid3", data1, tc); - data.clear(); - data.push_back({1, s1.substr(pp)}); - data.push_back({2, ss.substr(pp)}); - CmdWrite(0,"sourceid4", data, tc); - + data.push_back({1, sb.substr(pp)}); + CmdWrite(0,"sourceid0", data, tc, false, {}, true, "", -1, 100); + activeZone = false; + + PQGetPartInfo(100, 101, tc); + + data1.push_back({1, s3.substr(pp)}); + data1.push_back({2, sb.substr(pp)}); + data2.push_back({1, s2.substr(pp)}); + data2.push_back({2, sb.substr(pp)}); + CmdWrite(0,"sourceid1", data1, tc); + CmdWrite(0,"sourceid2", data2, tc); + + CmdWrite(0,"sourceid3", data1, tc); + data.clear(); + data.push_back({1, s1.substr(pp)}); + data.push_back({2, ss.substr(pp)}); + CmdWrite(0,"sourceid4", data, tc); + TString a1{8*1024*1024 - 1024, '_'}; TString a2{2*1024, '_'}; - data.clear(); - data.push_back({1, a1.substr(pp)}); - data1.clear(); - data1.push_back({1, a2.substr(pp)}); - CmdWrite(0,"sourceid5", data, tc); - CmdWrite(0,"sourceid6", data1, tc); - CmdWrite(0,"sourceid7", data, tc); - data.back().first = 4296000000lu; - CmdWrite(0,"sourceid8", data, tc); - PQGetPartInfo(100, 113, tc); - - data1.push_back({2, a2.substr(pp)}); - CmdWrite(0,"sourceId9", data1, tc, false, {}, false, "", -1, 1000); - PQGetPartInfo(100, 1002, tc); - - data1.front().first = 3; - data1.back().first = 4; - - CmdWrite(0,"sourceId9", data1, tc, false, {}, false, "", -1, 2000); - PQGetPartInfo(100, 2002, tc); - - activeZone = true; - - data1.push_back(data1.back()); - data1[1].first = 3; - CmdWrite(0,"sourceId10", data1, tc, false, {}, false, "", -1, 3000); - PQGetPartInfo(100, 3003, tc); - - activeZone = false; - - CmdWrite(1,"sourceId9", data1, tc, false, {}, false, "", -1, 2000); //to other partition - - data1.clear(); + data.clear(); + data.push_back({1, a1.substr(pp)}); + data1.clear(); + data1.push_back({1, a2.substr(pp)}); + CmdWrite(0,"sourceid5", data, tc); + CmdWrite(0,"sourceid6", data1, tc); + CmdWrite(0,"sourceid7", data, tc); + data.back().first = 4296000000lu; + CmdWrite(0,"sourceid8", data, tc); + PQGetPartInfo(100, 113, tc); + + data1.push_back({2, a2.substr(pp)}); + CmdWrite(0,"sourceId9", data1, tc, false, {}, false, "", -1, 1000); + PQGetPartInfo(100, 1002, tc); + + data1.front().first = 3; + data1.back().first = 4; + + CmdWrite(0,"sourceId9", data1, tc, false, {}, false, "", -1, 2000); + PQGetPartInfo(100, 2002, tc); + + activeZone = true; + + data1.push_back(data1.back()); + data1[1].first = 3; + CmdWrite(0,"sourceId10", data1, tc, false, {}, false, "", -1, 3000); + PQGetPartInfo(100, 3003, tc); + + activeZone = false; + + CmdWrite(1,"sourceId9", data1, tc, false, {}, false, "", -1, 2000); //to other partition + + data1.clear(); data1.push_back({1, TString{200, 'a'}}); for (ui32 i = 1; i <= NUM_WRITES; ++i) { - data1.front().first = i; - CmdWrite(1, "sourceidx", data1, tc, false, {}, false, "", -1); - } - - //read all, check offsets - CmdRead(0, 111, Max<i32>(), Max<i32>(), 8, false, tc, {111,112,1000,1001,2000,2001,3000,3002}); - - //read from gap - CmdRead(0, 500, Max<i32>(), Max<i32>(), 6, false, tc, {1000,1001,2000,2001,3000,3002}); - - }); -} - - + data1.front().first = i; + CmdWrite(1, "sourceidx", data1, tc, false, {}, false, "", -1); + } + + //read all, check offsets + CmdRead(0, 111, Max<i32>(), Max<i32>(), 8, false, tc, {111,112,1000,1001,2000,2001,3000,3002}); + + //read from gap + CmdRead(0, 500, Max<i32>(), Max<i32>(), 6, false, tc, {1000,1001,2000,2001,3000,3002}); + + }); +} + + Y_UNIT_TEST(TestSourceIdDropByUserWrites) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important client, lifetimeseconds=0 - delete right now - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //no important client, lifetimeseconds=0 - delete right now + TVector<std::pair<ui64, TString>> data; - activeZone = true; - + activeZone = true; + TString ss{32, '_'}; - - data.push_back({1, ss}); - CmdWrite(0,"sourceid0", data, tc, false, {}, false, "", -1, 100); - - PQGetPartInfo(100, 101, tc); - - CmdWrite(0,"sourceidx", data, tc, false, {}, false, "", -1, 2000); - CmdWrite(0,"sourceid1", data, tc, false, {}, false, "", -1, 3000); - PQGetPartInfo(2000, 3001, tc); - //fail - already written - CmdWrite(0,"sourceid0", data, tc, false); - PQGetPartInfo(2000, 3001, tc); - - tc.Runtime->UpdateCurrentTime(tc.Runtime->GetCurrentTime() + TDuration::Minutes(61)); - CmdWrite(0,"sourceid0", data, tc, false); - CmdWrite(0,"sourceid0", data, tc, false); //second attempt just to be sure that DropOldSourceId is called after previos write, not only on Wakeup - //ok, hour waited - record writted twice - PQGetPartInfo(2000, 3002, tc); - }); -} - - + + data.push_back({1, ss}); + CmdWrite(0,"sourceid0", data, tc, false, {}, false, "", -1, 100); + + PQGetPartInfo(100, 101, tc); + + CmdWrite(0,"sourceidx", data, tc, false, {}, false, "", -1, 2000); + CmdWrite(0,"sourceid1", data, tc, false, {}, false, "", -1, 3000); + PQGetPartInfo(2000, 3001, tc); + //fail - already written + CmdWrite(0,"sourceid0", data, tc, false); + PQGetPartInfo(2000, 3001, tc); + + tc.Runtime->UpdateCurrentTime(tc.Runtime->GetCurrentTime() + TDuration::Minutes(61)); + CmdWrite(0,"sourceid0", data, tc, false); + CmdWrite(0,"sourceid0", data, tc, false); //second attempt just to be sure that DropOldSourceId is called after previos write, not only on Wakeup + //ok, hour waited - record writted twice + PQGetPartInfo(2000, 3002, tc); + }); +} + + Y_UNIT_TEST(TestSourceIdDropBySourceIdCount) { TTestContext tc; RunTestWithReboots(tc.TabletIds, [&]() { @@ -1262,17 +1262,17 @@ Y_UNIT_TEST(TestSourceIdDropBySourceIdCount) { CmdWrite(0,"sourceid0", data, tc, false, {}, false, "", -1, 100); Cout << "written sourceid0" << Endl; - PQGetPartInfo(100, 101, tc); + PQGetPartInfo(100, 101, tc); CmdWrite(0,"sourceidx", data, tc, false, {}, false, "", -1, 2000); Cout << "written sourceidx" << Endl; CmdWrite(0,"sourceid1", data, tc, false, {}, false, "", -1, 3000); Cout << "written sourceid1" << Endl; - PQGetPartInfo(2000, 3001, tc); + PQGetPartInfo(2000, 3001, tc); //fail - already written CmdWrite(0,"sourceid0", data, tc, false); Cout << "written sourceid0" << Endl; - PQGetPartInfo(2000, 3001, tc); + PQGetPartInfo(2000, 3001, tc); for (ui64 i=0; i < 5; ++i) { CmdWrite(0, TStringBuilder() << "sourceid_" << i, data, tc, false, {}, false, "", -1, 3001 + i); @@ -1286,584 +1286,584 @@ Y_UNIT_TEST(TestSourceIdDropBySourceIdCount) { Y_UNIT_TEST(TestWriteOffsetWithBigMessage) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{{"user", true}}}, tc, 3); //important client, lifetimeseconds=0 - never delete - - activeZone = false; - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{{"user", true}}}, tc, 3); //important client, lifetimeseconds=0 - never delete + + activeZone = false; + TVector<std::pair<ui64, TString>> data; - + data.push_back({1, TString{10*1024*1024, 'a'}}); - CmdWrite(1, "sourceIdx", data, tc, false, {}, false, "", -1, 80000); - data.front().first = 2; - CmdWrite(1, "sourceIdx", data, tc, false, {}, false, "", -1, 160000); - - data.clear(); + CmdWrite(1, "sourceIdx", data, tc, false, {}, false, "", -1, 80000); + data.front().first = 2; + CmdWrite(1, "sourceIdx", data, tc, false, {}, false, "", -1, 160000); + + data.clear(); data.push_back({1, TString{100*1024, 'a'}}); - for (ui32 i = 0; i < 100; ++i) { - data.push_back(data.front()); - data.back().first = i + 2; - } - CmdWrite(0, "sourceIdx", data, tc, false, {}, false, "", -1, 80000); - PQGetPartInfo(80000, 80101, tc); - data.resize(70); - CmdWrite(2, "sourceId1", data, tc, false, {}, false, "", -1, 0); - CmdWrite(2, "sourceId2", data, tc, false, {}, false, "", -1, 80000); - }); -} - - + for (ui32 i = 0; i < 100; ++i) { + data.push_back(data.front()); + data.back().first = i + 2; + } + CmdWrite(0, "sourceIdx", data, tc, false, {}, false, "", -1, 80000); + PQGetPartInfo(80000, 80101, tc); + data.resize(70); + CmdWrite(2, "sourceId1", data, tc, false, {}, false, "", -1, 0); + CmdWrite(2, "sourceId2", data, tc, false, {}, false, "", -1, 80000); + }); +} + + Y_UNIT_TEST(TestWriteSplit) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); activeZone = false; - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user1", true}}, tc); //never delete - const ui32 size = PlainOrSoSlow(2*1024*1024, 1*1024*1024); - + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"user1", true}}, tc); //never delete + const ui32 size = PlainOrSoSlow(2*1024*1024, 1*1024*1024); + TVector<std::pair<ui64, TString>> data; data.push_back({1, TString{size, 'b'}}); data.push_back({2, TString{size, 'a'}}); activeZone = PlainOrSoSlow(true, false); - CmdWrite(0, "sourceIdx", data, tc, false, {}, false, "", -1, 40000); - RestartTablet(tc); + CmdWrite(0, "sourceIdx", data, tc, false, {}, false, "", -1, 40000); + RestartTablet(tc); activeZone = false; - PQGetPartInfo(40000, 40002, tc); - }); -} - - + PQGetPartInfo(40000, 40002, tc); + }); +} + + Y_UNIT_TEST(TestLowWatermark) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 2 * 1024 * 1024); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 2 * 1024 * 1024); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + TVector<std::pair<ui64, TString>> data; - - ui32 pp = 4 + 8 + 2 + 9; - + + ui32 pp = 4 + 8 + 2 + 9; + TString ss{1024*1024, '_'}; - data.push_back({1, ss.substr(pp)}); - data.push_back({2, ss.substr(pp)}); - data.push_back({3, ss.substr(pp)}); - CmdWrite(0,"sourceid0", data, tc, false, {}, true); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 6 * 1024 * 1024); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - CmdWrite(0,"sourceid1", data, tc, false, {}, false); //first are compacted - PQGetPartInfo(0, 6, tc); - CmdWrite(0,"sourceid2", data, tc, false, {}, false); //3 and 6 are compacted - PQGetPartInfo(3, 9, tc); - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 3 * 1024 * 1024); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob - CmdWrite(0,"sourceid3", data, tc, false, {}, false); //3, 6 and 3 are compacted - data.resize(1); - CmdWrite(0,"sourceid4", data, tc, false, {}, false); //3, 6 and 3 are compacted - PQGetPartInfo(9, 13, tc); - }); -} - - - + data.push_back({1, ss.substr(pp)}); + data.push_back({2, ss.substr(pp)}); + data.push_back({3, ss.substr(pp)}); + CmdWrite(0,"sourceid0", data, tc, false, {}, true); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 6 * 1024 * 1024); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + CmdWrite(0,"sourceid1", data, tc, false, {}, false); //first are compacted + PQGetPartInfo(0, 6, tc); + CmdWrite(0,"sourceid2", data, tc, false, {}, false); //3 and 6 are compacted + PQGetPartInfo(3, 9, tc); + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 3 * 1024 * 1024); //no important clients, lifetimeseconds=0 - delete all right now, except last datablob + CmdWrite(0,"sourceid3", data, tc, false, {}, false); //3, 6 and 3 are compacted + data.resize(1); + CmdWrite(0,"sourceid4", data, tc, false, {}, false); //3, 6 and 3 are compacted + PQGetPartInfo(9, 13, tc); + }); +} + + + Y_UNIT_TEST(TestWriteToFullPartition) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - activeZone = false; - tc.Prepare(dispatchName, setup, activeZone); - - tc.Runtime->SetScheduledLimit(100); - - PQTabletPrepare(11, 100 * 1024 * 1024, 0, {}, tc); - + TFinalizer finalizer(tc); + activeZone = false; + tc.Prepare(dispatchName, setup, activeZone); + + tc.Runtime->SetScheduledLimit(100); + + PQTabletPrepare(11, 100 * 1024 * 1024, 0, {}, tc); + TVector<std::pair<ui64, TString>> data; activeZone = PlainOrSoSlow(true, false); - + TString s{32, 'c'}; - ui32 pp = 8 + 4 + 2 + 9; - for (ui32 i = 0; i < 10; ++i) { - data.push_back({i + 1, s.substr(pp)}); - } - CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob - PQTabletPrepare(10, 100 * 1024 * 1024, 0, {}, tc); - PQGetPartInfo(0, 10, tc); - data.resize(1); - CmdWrite(0, "sourceid1", data, tc, true); - PQTabletPrepare(12, 100 * 1024 * 1024, 0, {}, tc); - CmdWrite(0, "sourceid1", data, tc); - PQTabletPrepare(12, 100, 0, {}, tc); - CmdWrite(0, "sourceid1", data, tc, true); - }); -} - - - + ui32 pp = 8 + 4 + 2 + 9; + for (ui32 i = 0; i < 10; ++i) { + data.push_back({i + 1, s.substr(pp)}); + } + CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob + PQTabletPrepare(10, 100 * 1024 * 1024, 0, {}, tc); + PQGetPartInfo(0, 10, tc); + data.resize(1); + CmdWrite(0, "sourceid1", data, tc, true); + PQTabletPrepare(12, 100 * 1024 * 1024, 0, {}, tc); + CmdWrite(0, "sourceid1", data, tc); + PQTabletPrepare(12, 100, 0, {}, tc); + CmdWrite(0, "sourceid1", data, tc, true); + }); +} + + + Y_UNIT_TEST(TestPQPartialRead) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete - - activeZone = false; + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete + + activeZone = false; TVector<std::pair<ui64, TString>> data; - - ui32 pp = 4 + 8 + 2 + 9 + 100 + 40; //pp is for size of meta - TString tmp{1024*1024 - pp - 2, '-'}; - char k = 0; - TString ss = ""; - ss += k; - ss += tmp; - ss += char(1); - ++k; - data.push_back({1, ss}); - - CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob - PQGetPartInfo(0, 1, tc); - - CmdRead(0, 0, 1, 1, 1, false, tc); - }); -} - - + + ui32 pp = 4 + 8 + 2 + 9 + 100 + 40; //pp is for size of meta + TString tmp{1024*1024 - pp - 2, '-'}; + char k = 0; + TString ss = ""; + ss += k; + ss += tmp; + ss += char(1); + ++k; + data.push_back({1, ss}); + + CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob + PQGetPartInfo(0, 1, tc); + + CmdRead(0, 0, 1, 1, 1, false, tc); + }); +} + + Y_UNIT_TEST(TestPQRead) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete - - activeZone = false; + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete + + activeZone = false; TVector<std::pair<ui64, TString>> data; - - ui32 pp = 4 + 8 + 2 + 9 + 100 + 40; //pp is for size of meta + + ui32 pp = 4 + 8 + 2 + 9 + 100 + 40; //pp is for size of meta TString tmp{1024*1024 - pp - 2, '-'}; - char k = 0; - for (ui32 i = 0; i < 26 * 1024 * 1024;) { //3 full blobs and 2 in head + char k = 0; + for (ui32 i = 0; i < 26 * 1024 * 1024;) { //3 full blobs and 2 in head TString ss = ""; - ss += k; - ss += tmp; - ss += char((i + 1) % 256); - ++k; - data.push_back({i + 1, ss}); - i += ss.size() + pp; - } - CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob - PQGetPartInfo(0, 26, tc); - - CmdRead(0, 26, Max<i32>(), Max<i32>(), 0, true, tc); - - CmdRead(0, 0, Max<i32>(), Max<i32>(), 25, false, tc); - CmdRead(0, 0, 10, 1024*102400, 10, false, tc); - CmdRead(0, 9, 1, 1024*102400, 1, false, tc); - CmdRead(0, 23, 3, 1024*102400, 3, false, tc); - - CmdRead(0, 3, 1000, 511*1024, 1, false, tc); - CmdRead(0, 3, 1000, 1024, 1, false, tc); //at least one message will be readed always - CmdRead(0, 25, 1000, 1024, 1, false, tc); //at least one message will be readed always, from head - - activeZone = true; - CmdRead(0, 9, 1000, 3*1024*1024, 3, false, tc); - CmdRead(0, 9, 1000, 3*1024*1024-10240, 3, false, tc); - CmdRead(0, 25, 1000, 512*1024, 1, false, tc); //from head - CmdRead(0, 24, 1000, 512*1024, 1, false, tc); //from head - - CmdRead(0, 23, 1000, 102400000, 3, false, tc); - }); -} - + ss += k; + ss += tmp; + ss += char((i + 1) % 256); + ++k; + data.push_back({i + 1, ss}); + i += ss.size() + pp; + } + CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob + PQGetPartInfo(0, 26, tc); + + CmdRead(0, 26, Max<i32>(), Max<i32>(), 0, true, tc); + + CmdRead(0, 0, Max<i32>(), Max<i32>(), 25, false, tc); + CmdRead(0, 0, 10, 1024*102400, 10, false, tc); + CmdRead(0, 9, 1, 1024*102400, 1, false, tc); + CmdRead(0, 23, 3, 1024*102400, 3, false, tc); + + CmdRead(0, 3, 1000, 511*1024, 1, false, tc); + CmdRead(0, 3, 1000, 1024, 1, false, tc); //at least one message will be readed always + CmdRead(0, 25, 1000, 1024, 1, false, tc); //at least one message will be readed always, from head + + activeZone = true; + CmdRead(0, 9, 1000, 3*1024*1024, 3, false, tc); + CmdRead(0, 9, 1000, 3*1024*1024-10240, 3, false, tc); + CmdRead(0, 25, 1000, 512*1024, 1, false, tc); //from head + CmdRead(0, 24, 1000, 512*1024, 1, false, tc); //from head + + CmdRead(0, 23, 1000, 102400000, 3, false, tc); + }); +} + Y_UNIT_TEST(TestPQSmallRead) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete - - activeZone = false; + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete + + activeZone = false; TVector<std::pair<ui64, TString>> data; - - ui32 pp = 4 + 8 + 2 + 9 ; //5 is for 8 blobs for header + + ui32 pp = 4 + 8 + 2 + 9 ; //5 is for 8 blobs for header TString tmp{32 - pp - 2, '-'}; - char k = 0; + char k = 0; TString ss = ""; - ss += k; - ss += tmp; - ss += char(1); - data.push_back({1, ss}); - CmdWrite(0, "sourceid0", data, tc, false, {}, true); + ss += k; + ss += tmp; + ss += char(1); + data.push_back({1, ss}); + CmdWrite(0, "sourceid0", data, tc, false, {}, true); ++k; data[0].second = TString(1, k) + tmp + char(1); - CmdWrite(0, "sourceid1", data, tc, false, {}, false); + CmdWrite(0, "sourceid1", data, tc, false, {}, false); ++k; data[0].second = TString(1, k) + tmp + char(1); - CmdWrite(0, "sourceid2", data, tc, false, {}, false); + CmdWrite(0, "sourceid2", data, tc, false, {}, false); ++k; data[0].second = TString(1, k) + tmp + char(1); - CmdWrite(0, "sourceid3", data, tc, false, {}, false); + CmdWrite(0, "sourceid3", data, tc, false, {}, false); ++k; data[0].second = TString(1, k) + tmp + char(1); - CmdWrite(0, "sourceid4", data, tc, false, {}, false); - PQGetPartInfo(0, 5, tc); - - CmdRead(0, 5, Max<i32>(), Max<i32>(), 0, true, tc); - CmdRead(0, 0, Max<i32>(), Max<i32>(), 5, false, tc); - CmdRead(0, 0, 3, 1024*102400, 3, false, tc); - CmdRead(0, 3, 1000, 1024, 2, false, tc); - }); -} - + CmdWrite(0, "sourceid4", data, tc, false, {}, false); + PQGetPartInfo(0, 5, tc); + + CmdRead(0, 5, Max<i32>(), Max<i32>(), 0, true, tc); + CmdRead(0, 0, Max<i32>(), Max<i32>(), 5, false, tc); + CmdRead(0, 0, 3, 1024*102400, 3, false, tc); + CmdRead(0, 3, 1000, 1024, 2, false, tc); + }); +} + Y_UNIT_TEST(TestPQReadAhead) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - - tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + + tc.Runtime->SetScheduledLimit(200); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete + TVector<std::pair<ui64, TString>> data; - - ui32 pp = 8 + 4 + 2 + 9; + + ui32 pp = 8 + 4 + 2 + 9; TString tmp{1024*1024 - pp - 2, '-'}; TString tmp0{32 - pp - 2, '-'}; - char k = 0; - for (ui32 i = 0; i < 5; ++i) { + char k = 0; + for (ui32 i = 0; i < 5; ++i) { TString ss = ""; - ss += k; - ss += tmp0; - ss += char((i + 1) % 256); - ++k; - data.push_back({i + 1, ss}); - } - for (ui32 i = 0; i < 17 * 1024 * 1024;) { //3 full blobs and 2 in head + ss += k; + ss += tmp0; + ss += char((i + 1) % 256); + ++k; + data.push_back({i + 1, ss}); + } + for (ui32 i = 0; i < 17 * 1024 * 1024;) { //3 full blobs and 2 in head TString ss = ""; - ss += k; - ss += tmp; - ss += char((i + 10) % 256); - ++k; - data.push_back({i + 10, ss}); - i += ss.size() + pp; - } - CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob - PQGetPartInfo(0, 22, tc); - activeZone = true; - CmdRead(0, 0, 1, 1024*102400, 1, false, tc); - CmdRead(0, 1, 1, 1024*102400, 1, false, tc); - CmdRead(0, 2, 1, 1024*102400, 1, false, tc); - CmdRead(0, 3, 1, 1024*102400, 1, false, tc); - CmdRead(0, 4, 10, 1024*102400, 10, false, tc); - }); -} - + ss += k; + ss += tmp; + ss += char((i + 10) % 256); + ++k; + data.push_back({i + 10, ss}); + i += ss.size() + pp; + } + CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob + PQGetPartInfo(0, 22, tc); + activeZone = true; + CmdRead(0, 0, 1, 1024*102400, 1, false, tc); + CmdRead(0, 1, 1, 1024*102400, 1, false, tc); + CmdRead(0, 2, 1, 1024*102400, 1, false, tc); + CmdRead(0, 3, 1, 1024*102400, 1, false, tc); + CmdRead(0, 4, 10, 1024*102400, 10, false, tc); + }); +} + Y_UNIT_TEST(TestOwnership) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - - tc.Runtime->SetScheduledLimit(50); - - PQTabletPrepare(10, 100 * 1024 * 1024, 0, {}, tc); - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + + tc.Runtime->SetScheduledLimit(50); + + PQTabletPrepare(10, 100 * 1024 * 1024, 0, {}, tc); + TString cookie, cookie2; - cookie = CmdSetOwner(0, tc).first; - UNIT_ASSERT(!cookie.empty()); - cookie2 = CmdSetOwner(0, tc).first; - UNIT_ASSERT(!cookie2.empty()); - UNIT_ASSERT(cookie2 != cookie); - }); -} - + cookie = CmdSetOwner(0, tc).first; + UNIT_ASSERT(!cookie.empty()); + cookie2 = CmdSetOwner(0, tc).first; + UNIT_ASSERT(!cookie2.empty()); + UNIT_ASSERT(cookie2 != cookie); + }); +} + Y_UNIT_TEST(TestSetClientOffset) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - tc.Runtime->SetScheduledLimit(50); - - PQTabletPrepare(10, 100 * 1024 * 1024, 0, {{"user1", false}}, tc); - - activeZone = true; - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(50); + + PQTabletPrepare(10, 100 * 1024 * 1024, 0, {{"user1", false}}, tc); + + activeZone = true; + TVector<std::pair<ui64, TString>> data; - - CmdSetOffset(0, "user1", 100, false, tc); //must be true , error - CmdGetOffset(0, "user1", 0, tc); // must be -1 - + + CmdSetOffset(0, "user1", 100, false, tc); //must be true , error + CmdGetOffset(0, "user1", 0, tc); // must be -1 + activeZone = PlainOrSoSlow(true, false); - - CmdSetOffset(0, "user1", 0, false, tc); - CmdGetOffset(0, "user1", 0, tc); - CmdSetOffset(0, "user1", 0, false, tc); - CmdGetOffset(0, "user1", 0, tc); - CmdSetOffset(0, "user1", 0, false, tc); - CmdGetOffset(0, "user1", 0, tc); - CmdGetOffset(0, "user2", 0, tc); - }); -} - + + CmdSetOffset(0, "user1", 0, false, tc); + CmdGetOffset(0, "user1", 0, tc); + CmdSetOffset(0, "user1", 0, false, tc); + CmdGetOffset(0, "user1", 0, tc); + CmdSetOffset(0, "user1", 0, false, tc); + CmdGetOffset(0, "user1", 0, tc); + CmdGetOffset(0, "user2", 0, tc); + }); +} + Y_UNIT_TEST(TestReadSessions) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - tc.Runtime->SetScheduledLimit(50); - - PQTabletPrepare(10, 100 * 1024 * 1024, 0, {{"user1", false}}, tc); - - activeZone = true; - + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(50); + + PQTabletPrepare(10, 100 * 1024 * 1024, 0, {{"user1", false}}, tc); + + activeZone = true; + TVector<std::pair<ui64, TString>> data; - CmdCreateSession(0, "user1", "session1", tc); - CmdSetOffset(0, "user1", 0, false, tc, "session1"); //all ok - session is set - CmdSetOffset(0, "user1", 0, true, tc, "other_session"); //fails - session1 is active - + CmdCreateSession(0, "user1", "session1", tc); + CmdSetOffset(0, "user1", 0, false, tc, "session1"); //all ok - session is set + CmdSetOffset(0, "user1", 0, true, tc, "other_session"); //fails - session1 is active + activeZone = PlainOrSoSlow(true, false); - - CmdSetOffset(0, "user1", 0, false, tc, "session1"); - - CmdCreateSession(0, "user1", "session2", tc, 0, 1, 1); - CmdCreateSession(0, "user1", "session3", tc, 0, 1, 1, true); //error on creation - CmdCreateSession(0, "user1", "session3", tc, 0, 0, 2, true); //error on creation - CmdCreateSession(0, "user1", "session3", tc, 0, 0, 0, true); //error on creation - CmdSetOffset(0, "user1", 0, true, tc, "session1"); - CmdSetOffset(0, "user1", 0, true, tc, "session3"); - CmdSetOffset(0, "user1", 0, false, tc, "session2"); - - activeZone = true; - - CmdKillSession(0, "user1", "session2", tc); - CmdSetOffset(0, "user1", 0, true, tc, "session2"); //session is dead now - }); -} - - - + + CmdSetOffset(0, "user1", 0, false, tc, "session1"); + + CmdCreateSession(0, "user1", "session2", tc, 0, 1, 1); + CmdCreateSession(0, "user1", "session3", tc, 0, 1, 1, true); //error on creation + CmdCreateSession(0, "user1", "session3", tc, 0, 0, 2, true); //error on creation + CmdCreateSession(0, "user1", "session3", tc, 0, 0, 0, true); //error on creation + CmdSetOffset(0, "user1", 0, true, tc, "session1"); + CmdSetOffset(0, "user1", 0, true, tc, "session3"); + CmdSetOffset(0, "user1", 0, false, tc, "session2"); + + activeZone = true; + + CmdKillSession(0, "user1", "session2", tc); + CmdSetOffset(0, "user1", 0, true, tc, "session2"); //session is dead now + }); +} + + + Y_UNIT_TEST(TestGetTimestamps) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - tc.Runtime->SetScheduledLimit(50); - - tc.Runtime->UpdateCurrentTime(TInstant::Zero() + TDuration::Days(2)); - activeZone = false; - - PQTabletPrepare(10, 100 * 1024 * 1024, 0, {{"user1", false}}, tc); - + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(50); + + tc.Runtime->UpdateCurrentTime(TInstant::Zero() + TDuration::Days(2)); + activeZone = false; + + PQTabletPrepare(10, 100 * 1024 * 1024, 0, {{"user1", false}}, tc); + TVector<std::pair<ui64, TString>> data; data.push_back({1, TString(1024, 'a')}); data.push_back({2, TString(1024, 'a')}); data.push_back({3, TString(1024, 'a')}); data.push_back({4, TString(1024, 'a')}); - - CmdWrite(0, "sourceid0", data, tc, false, {}, true, "", -1, 1); - CmdGetOffset(0, "user1", 0, tc, -1); - - CmdSetOffset(0, "user1", 1, true, tc); - CmdSetOffset(0, "user1", 0, true, tc); - CmdGetOffset(0, "user1", 0, tc, Max<i64>()); - CmdSetOffset(0, "user1", 1, true, tc); - CmdGetOffset(0, "user1", 1, tc, 1); - CmdSetOffset(0, "user1", 3, true, tc); - CmdGetOffset(0, "user1", 3, tc, 3); - CmdSetOffset(0, "user1", 4, true, tc); - CmdGetOffset(0, "user1", 4, tc, 4); - CmdSetOffset(0, "user1", 5, true, tc); - CmdGetOffset(0, "user1", 5, tc, 4); - CmdSetOffset(0, "user1", 5, true, tc); - CmdWrite(0, "sourceid1", data, tc, false, {}, false); - CmdGetOffset(0, "user1", 5, tc, 5); - RestartTablet(tc); - CmdGetOffset(0, "user1", 5, tc, 5); - - CmdWrite(0, "sourceid2", data, tc, false, {}, false, "", -1,100); - CmdRead(0, 100, Max<i32>(), Max<i32>(), 4, false, tc, {100,101,102,103}); //all offsets will be putted in cache - - //check offset inside gap - CmdSetOffset(0, "user", 50, true, tc); - CmdGetOffset(0, "user", 50, tc, 100); - - CmdSetOffset(0, "user", 101, true, tc); - CmdGetOffset(0, "user", 101, tc, 101); - }); -} - - + + CmdWrite(0, "sourceid0", data, tc, false, {}, true, "", -1, 1); + CmdGetOffset(0, "user1", 0, tc, -1); + + CmdSetOffset(0, "user1", 1, true, tc); + CmdSetOffset(0, "user1", 0, true, tc); + CmdGetOffset(0, "user1", 0, tc, Max<i64>()); + CmdSetOffset(0, "user1", 1, true, tc); + CmdGetOffset(0, "user1", 1, tc, 1); + CmdSetOffset(0, "user1", 3, true, tc); + CmdGetOffset(0, "user1", 3, tc, 3); + CmdSetOffset(0, "user1", 4, true, tc); + CmdGetOffset(0, "user1", 4, tc, 4); + CmdSetOffset(0, "user1", 5, true, tc); + CmdGetOffset(0, "user1", 5, tc, 4); + CmdSetOffset(0, "user1", 5, true, tc); + CmdWrite(0, "sourceid1", data, tc, false, {}, false); + CmdGetOffset(0, "user1", 5, tc, 5); + RestartTablet(tc); + CmdGetOffset(0, "user1", 5, tc, 5); + + CmdWrite(0, "sourceid2", data, tc, false, {}, false, "", -1,100); + CmdRead(0, 100, Max<i32>(), Max<i32>(), 4, false, tc, {100,101,102,103}); //all offsets will be putted in cache + + //check offset inside gap + CmdSetOffset(0, "user", 50, true, tc); + CmdGetOffset(0, "user", 50, tc, 100); + + CmdSetOffset(0, "user", 101, true, tc); + CmdGetOffset(0, "user", 101, tc, 101); + }); +} + + Y_UNIT_TEST(TestChangeConfig) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - activeZone = false; - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; - tc.Runtime->SetScheduledLimit(50); - + TFinalizer finalizer(tc); + activeZone = false; + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(50); + TVector<std::pair<ui64, TString>> data; - - ui32 pp = 8 + 4 + 2 + 9; + + ui32 pp = 8 + 4 + 2 + 9; TString tmp0{32 - pp - 2, '-'}; - char k = 0; - for (ui32 i = 0; i < 5; ++i) { + char k = 0; + for (ui32 i = 0; i < 5; ++i) { TString ss = ""; - ss += k; - ss += tmp0; - ss += char((i + 1) % 256); - ++k; - data.push_back({i + 1, ss}); - } - - PQTabletPrepare(100, 100 * 1024 * 1024, 86400 * 2, {{"aaa", true}}, tc, 5); - CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob - PQTabletPrepare(5, 1024 * 1024, 86400, {{"bbb", true}, {"ccc", true}}, tc, 10); - data.pop_back(); //to be sure that after write partition will no be full - CmdWrite(0, "sourceid1", data, tc, true); //partition is full - CmdWrite(1, "sourceid2", data, tc); - CmdWrite(9, "sourceid3", data, tc); //now 1 blob - }); -} - + ss += k; + ss += tmp0; + ss += char((i + 1) % 256); + ++k; + data.push_back({i + 1, ss}); + } + + PQTabletPrepare(100, 100 * 1024 * 1024, 86400 * 2, {{"aaa", true}}, tc, 5); + CmdWrite(0, "sourceid0", data, tc, false, {}, true); //now 1 blob + PQTabletPrepare(5, 1024 * 1024, 86400, {{"bbb", true}, {"ccc", true}}, tc, 10); + data.pop_back(); //to be sure that after write partition will no be full + CmdWrite(0, "sourceid1", data, tc, true); //partition is full + CmdWrite(1, "sourceid2", data, tc); + CmdWrite(9, "sourceid3", data, tc); //now 1 blob + }); +} + Y_UNIT_TEST(TestReadSubscription) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - activeZone = false; + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; tc.Runtime->SetScheduledLimit(600); tc.Runtime->SetScheduledEventFilter(&tc.ImmediateLogFlushAndRequestTimeoutFilter); - + TVector<std::pair<ui64, TString>> data; - - ui32 pp = 8 + 4 + 2 + 9; + + ui32 pp = 8 + 4 + 2 + 9; TString tmp0{32 - pp - 2, '-'}; - char k = 0; - for (ui32 i = 0; i < 5; ++i) { + char k = 0; + for (ui32 i = 0; i < 5; ++i) { TString ss = ""; - ss += k; - ss += tmp0; - ss += char((i + 1) % 256); - ++k; - data.push_back({i + 1, ss}); - } - - PQTabletPrepare(100, 100 * 1024 * 1024, 86400 * 2, {{"user1", true}}, tc, 5); - CmdWrite(0, "sourceid0", data, tc, false, {}, true); - - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - THolder<TEvPersQueue::TEvRequest> request; - - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(0); - auto read = req->MutableCmdRead(); - read->SetOffset(5); - read->SetClientId("user1"); - read->SetCount(5); - read->SetBytes(1000000); - read->SetTimeoutMs(5000); - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); //read without write must be timeouted - UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCmdReadResult().ResultSize(), 0); //read without write must be timeouted - - request.Reset(new TEvPersQueue::TEvRequest); - req = request->Record.MutablePartitionRequest(); - req->SetPartition(0); - read = req->MutableCmdRead(); - read->SetOffset(5); - read->SetClientId("user1"); - read->SetCount(3); - read->SetBytes(1000000); - read->SetTimeoutMs(5000); - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); //got read - - CmdWrite(0, "sourceid1", data, tc); //write - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); //now got data - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCmdReadResult().ResultSize(), 3); //got response, but only for 3 from 5 writed blobs - - request.Reset(new TEvPersQueue::TEvRequest); - req = request->Record.MutablePartitionRequest(); - req->SetPartition(0); - read = req->MutableCmdRead(); - read->SetOffset(10); - read->SetClientId("user1"); - read->SetCount(55); - read->SetBytes(1000000); - read->SetTimeoutMs(5000); - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); //got read - - CmdWrite(0, "sourceid2", data, tc); //write - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); //now got data - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCmdReadResult().ResultSize(), 5); //got response for whole written blobs - }); -} - + ss += k; + ss += tmp0; + ss += char((i + 1) % 256); + ++k; + data.push_back({i + 1, ss}); + } + + PQTabletPrepare(100, 100 * 1024 * 1024, 86400 * 2, {{"user1", true}}, tc, 5); + CmdWrite(0, "sourceid0", data, tc, false, {}, true); + + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + THolder<TEvPersQueue::TEvRequest> request; + + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(0); + auto read = req->MutableCmdRead(); + read->SetOffset(5); + read->SetClientId("user1"); + read->SetCount(5); + read->SetBytes(1000000); + read->SetTimeoutMs(5000); + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); //read without write must be timeouted + UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCmdReadResult().ResultSize(), 0); //read without write must be timeouted + + request.Reset(new TEvPersQueue::TEvRequest); + req = request->Record.MutablePartitionRequest(); + req->SetPartition(0); + read = req->MutableCmdRead(); + read->SetOffset(5); + read->SetClientId("user1"); + read->SetCount(3); + read->SetBytes(1000000); + read->SetTimeoutMs(5000); + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); //got read + + CmdWrite(0, "sourceid1", data, tc); //write + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); //now got data + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCmdReadResult().ResultSize(), 3); //got response, but only for 3 from 5 writed blobs + + request.Reset(new TEvPersQueue::TEvRequest); + req = request->Record.MutablePartitionRequest(); + req->SetPartition(0); + read = req->MutableCmdRead(); + read->SetOffset(10); + read->SetClientId("user1"); + read->SetCount(55); + read->SetBytes(1000000); + read->SetTimeoutMs(5000); + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); //got read + + CmdWrite(0, "sourceid2", data, tc); //write + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); //now got data + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCmdReadResult().ResultSize(), 5); //got response for whole written blobs + }); +} + // - + Y_UNIT_TEST(TestPQCacheSizeManagement) { TTestContext tc; @@ -1876,11 +1876,11 @@ Y_UNIT_TEST(TestPQCacheSizeManagement) { tc.Runtime->SetScheduledLimit(200); activeZone = false; - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); //important client - never delete TVector<std::pair<ui64, TString>> data; - ui32 pp = 4 + 8 + 2 + 9 + 100; + ui32 pp = 4 + 8 + 2 + 9 + 100; TString tmp{1024*1024 - pp - 2, '-'}; char k = 0; for (ui32 i = 0; i < 26 * 1024 * 1024;) { @@ -1925,7 +1925,7 @@ Y_UNIT_TEST(TestOffsetEstimation) { Y_UNIT_TEST(TestMaxTimeLagRewind) { TTestContext tc; - + RunTestWithReboots(tc.TabletIds, [&]() { return tc.InitialEventsFilter.Prepare(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { @@ -1934,10 +1934,10 @@ Y_UNIT_TEST(TestMaxTimeLagRewind) { tc.Runtime->SetScheduledLimit(200); - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); activeZone = false; - + for (int i = 0; i < 5; i++) { TVector<std::pair<ui64, TString>> data; for (int j = 0; j < 7; j++) { @@ -1946,60 +1946,60 @@ Y_UNIT_TEST(TestMaxTimeLagRewind) { CmdWrite(0, "sourceid0", data, tc, false, {}, i == 0); tc.Runtime->UpdateCurrentTime(tc.Runtime->GetCurrentTime() + TDuration::Minutes(1)); } - auto ts = tc.Runtime->GetCurrentTime(); + auto ts = tc.Runtime->GetCurrentTime(); CmdRead(0, 0, 1, Max<i32>(), 1, false, tc, {0}); CmdRead(0, 0, 1, Max<i32>(), 1, false, tc, {21}, 3 * 60 * 1000); CmdRead(0, 22, 1, Max<i32>(), 1, false, tc, {22}, 3 * 60 * 1000); - CmdRead(0, 4, 1, Max<i32>(), 1, false, tc, {34}, 1000); - - CmdRead(0, 0, 1, Max<i32>(), 1, false, tc, {21}, 0, ts.MilliSeconds() - 3 * 60 * 1000); - CmdRead(0, 22, 1, Max<i32>(), 1, false, tc, {22}, 0, ts.MilliSeconds() - 3 * 60 * 1000); + CmdRead(0, 4, 1, Max<i32>(), 1, false, tc, {34}, 1000); + + CmdRead(0, 0, 1, Max<i32>(), 1, false, tc, {21}, 0, ts.MilliSeconds() - 3 * 60 * 1000); + CmdRead(0, 22, 1, Max<i32>(), 1, false, tc, {22}, 0, ts.MilliSeconds() - 3 * 60 * 1000); CmdRead(0, 4, 1, Max<i32>(), 1, false, tc, {34}, 0, ts.MilliSeconds() - 1000); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc, 2, 6*1024*1024, true, ts.MilliSeconds() - 1000); - CmdRead(0, 0, 1, Max<i32>(), 1, false, tc, {34}); - + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc, 2, 6*1024*1024, true, ts.MilliSeconds() - 1000); + CmdRead(0, 0, 1, Max<i32>(), 1, false, tc, {34}); + }); } - + Y_UNIT_TEST(TestWriteTimeStampEstimate) { - TTestContext tc; - TFinalizer finalizer(tc); - tc.Prepare(); - - tc.Runtime->SetScheduledLimit(150); - tc.Runtime->SetDispatchTimeout(TDuration::Seconds(1)); - tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); - - tc.Runtime->UpdateCurrentTime(TInstant::MilliSeconds(1000000)); - - TVector<std::pair<ui64, TString>> data{{1,"abacaba"}}; - CmdWrite(0, "sourceid0", data, tc); - - CmdGetOffset(0, "user1", 0, tc, -1, 1000000); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc, 2, 6 * 1024 * 1024, false); - - RestartTablet(tc); - - CmdGetOffset(0, "user1", 0, tc, -1, 0); - - tc.Runtime->UpdateCurrentTime(TInstant::MilliSeconds(2000000)); - - data.front().first = 2; - CmdWrite(0, "sourceid0", data, tc); - - CmdGetOffset(0, "user1", 0, tc, -1, 2000000); - - CmdUpdateWriteTimestamp(0, 3000000, tc); - - CmdGetOffset(0, "user1", 0, tc, -1, 3000000); - -} - - -} // TKeyValueTest -} // NKikimr + TTestContext tc; + TFinalizer finalizer(tc); + tc.Prepare(); + + tc.Runtime->SetScheduledLimit(150); + tc.Runtime->SetDispatchTimeout(TDuration::Seconds(1)); + tc.Runtime->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::PRI_DEBUG); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc); + + tc.Runtime->UpdateCurrentTime(TInstant::MilliSeconds(1000000)); + + TVector<std::pair<ui64, TString>> data{{1,"abacaba"}}; + CmdWrite(0, "sourceid0", data, tc); + + CmdGetOffset(0, "user1", 0, tc, -1, 1000000); + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {{"aaa", true}}, tc, 2, 6 * 1024 * 1024, false); + + RestartTablet(tc); + + CmdGetOffset(0, "user1", 0, tc, -1, 0); + + tc.Runtime->UpdateCurrentTime(TInstant::MilliSeconds(2000000)); + + data.front().first = 2; + CmdWrite(0, "sourceid0", data, tc); + + CmdGetOffset(0, "user1", 0, tc, -1, 2000000); + + CmdUpdateWriteTimestamp(0, 3000000, tc); + + CmdGetOffset(0, "user1", 0, tc, -1, 3000000); + +} + + +} // TKeyValueTest +} // NKikimr diff --git a/ydb/core/persqueue/pq_ut.h b/ydb/core/persqueue/pq_ut.h index ae5144fe64..3db93bc104 100644 --- a/ydb/core/persqueue/pq_ut.h +++ b/ydb/core/persqueue/pq_ut.h @@ -1,8 +1,8 @@ -#pragma once +#pragma once #include "pq.h" #include "user_info.h" - + #include <ydb/core/testlib/basics/runtime.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/tx/schemeshard/schemeshard.h> @@ -15,943 +15,943 @@ #include <ydb/core/persqueue/partition.h> #include <ydb/core/engine/minikql/flat_local_tx_factory.h> #include <ydb/core/security/ticket_parser.h> - + #include <ydb/core/testlib/fake_scheme_shard.h> #include <ydb/core/testlib/tablet_helpers.h> - + #include <library/cpp/testing/unittest/registar.h> - -#include <util/system/sanitizers.h> -#include <util/system/valgrind.h> - -const bool ENABLE_DETAILED_PQ_LOG = false; -const bool ENABLE_DETAILED_KV_LOG = false; - -namespace NKikimr { -namespace { - -template <typename T> -inline constexpr static T PlainOrSoSlow(T plain, T slow) noexcept { - return NSan::PlainOrUnderSanitizer( - NValgrind::PlainOrUnderValgrind(plain, slow), - slow - ); -} - -constexpr ui32 NUM_WRITES = PlainOrSoSlow(100, 1); - -void SetupLogging(TTestActorRuntime& runtime) { - NActors::NLog::EPriority pqPriority = ENABLE_DETAILED_PQ_LOG ? NLog::PRI_TRACE : NLog::PRI_ERROR; - NActors::NLog::EPriority priority = ENABLE_DETAILED_KV_LOG ? NLog::PRI_DEBUG : NLog::PRI_ERROR; - NActors::NLog::EPriority otherPriority = NLog::PRI_INFO; - - runtime.SetLogPriority(NKikimrServices::PERSQUEUE, pqPriority); - runtime.SetLogPriority(NKikimrServices::KEYVALUE, priority); - runtime.SetLogPriority(NKikimrServices::BOOTSTRAPPER, priority); - runtime.SetLogPriority(NKikimrServices::TABLET_MAIN, priority); - runtime.SetLogPriority(NKikimrServices::TABLET_EXECUTOR, priority); - runtime.SetLogPriority(NKikimrServices::BS_PROXY, priority); - - runtime.SetLogPriority(NKikimrServices::HIVE, otherPriority); - runtime.SetLogPriority(NKikimrServices::LOCAL, otherPriority); - runtime.SetLogPriority(NKikimrServices::BS_NODE, otherPriority); - runtime.SetLogPriority(NKikimrServices::BS_CONTROLLER, otherPriority); - runtime.SetLogPriority(NKikimrServices::TABLET_RESOLVER, otherPriority); - - runtime.SetLogPriority(NKikimrServices::PIPE_CLIENT, otherPriority); - runtime.SetLogPriority(NKikimrServices::PIPE_SERVER, otherPriority); - -} - -class TInitialEventsFilter : TNonCopyable { - bool IsDone; -public: - TInitialEventsFilter() - : IsDone(false) - {} - - TTestActorRuntime::TEventFilter Prepare() { - IsDone = false; - return [&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { - return (*this)(runtime, event); - }; - } - - bool operator()(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { - Y_UNUSED(runtime); - Y_UNUSED(event); - return false; - } -}; - -} // anonymous namespace - - -struct TTestContext { - TTabletTypes::EType TabletType; - ui64 TabletId; - ui64 BalancerTabletId; - TInitialEventsFilter InitialEventsFilter; - TVector<ui64> TabletIds; - THolder<TTestActorRuntime> Runtime; + +#include <util/system/sanitizers.h> +#include <util/system/valgrind.h> + +const bool ENABLE_DETAILED_PQ_LOG = false; +const bool ENABLE_DETAILED_KV_LOG = false; + +namespace NKikimr { +namespace { + +template <typename T> +inline constexpr static T PlainOrSoSlow(T plain, T slow) noexcept { + return NSan::PlainOrUnderSanitizer( + NValgrind::PlainOrUnderValgrind(plain, slow), + slow + ); +} + +constexpr ui32 NUM_WRITES = PlainOrSoSlow(100, 1); + +void SetupLogging(TTestActorRuntime& runtime) { + NActors::NLog::EPriority pqPriority = ENABLE_DETAILED_PQ_LOG ? NLog::PRI_TRACE : NLog::PRI_ERROR; + NActors::NLog::EPriority priority = ENABLE_DETAILED_KV_LOG ? NLog::PRI_DEBUG : NLog::PRI_ERROR; + NActors::NLog::EPriority otherPriority = NLog::PRI_INFO; + + runtime.SetLogPriority(NKikimrServices::PERSQUEUE, pqPriority); + runtime.SetLogPriority(NKikimrServices::KEYVALUE, priority); + runtime.SetLogPriority(NKikimrServices::BOOTSTRAPPER, priority); + runtime.SetLogPriority(NKikimrServices::TABLET_MAIN, priority); + runtime.SetLogPriority(NKikimrServices::TABLET_EXECUTOR, priority); + runtime.SetLogPriority(NKikimrServices::BS_PROXY, priority); + + runtime.SetLogPriority(NKikimrServices::HIVE, otherPriority); + runtime.SetLogPriority(NKikimrServices::LOCAL, otherPriority); + runtime.SetLogPriority(NKikimrServices::BS_NODE, otherPriority); + runtime.SetLogPriority(NKikimrServices::BS_CONTROLLER, otherPriority); + runtime.SetLogPriority(NKikimrServices::TABLET_RESOLVER, otherPriority); + + runtime.SetLogPriority(NKikimrServices::PIPE_CLIENT, otherPriority); + runtime.SetLogPriority(NKikimrServices::PIPE_SERVER, otherPriority); + +} + +class TInitialEventsFilter : TNonCopyable { + bool IsDone; +public: + TInitialEventsFilter() + : IsDone(false) + {} + + TTestActorRuntime::TEventFilter Prepare() { + IsDone = false; + return [&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { + return (*this)(runtime, event); + }; + } + + bool operator()(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { + Y_UNUSED(runtime); + Y_UNUSED(event); + return false; + } +}; + +} // anonymous namespace + + +struct TTestContext { + TTabletTypes::EType TabletType; + ui64 TabletId; + ui64 BalancerTabletId; + TInitialEventsFilter InitialEventsFilter; + TVector<ui64> TabletIds; + THolder<TTestActorRuntime> Runtime; TActorId Edge; - THashMap<ui32, ui32> MsgSeqNoMap; - - - TTestContext() { - TabletType = TTabletTypes::PERSQUEUE; - TabletId = MakeTabletID(0, 0, 1); - TabletIds.push_back(TabletId); - - BalancerTabletId = MakeTabletID(0, 0, 2); - TabletIds.push_back(BalancerTabletId); - } - - static bool RequestTimeoutFilter(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration duration, TInstant& deadline) { - if (event->GetTypeRewrite() == TEvents::TSystem::Wakeup) { + THashMap<ui32, ui32> MsgSeqNoMap; + + + TTestContext() { + TabletType = TTabletTypes::PERSQUEUE; + TabletId = MakeTabletID(0, 0, 1); + TabletIds.push_back(TabletId); + + BalancerTabletId = MakeTabletID(0, 0, 2); + TabletIds.push_back(BalancerTabletId); + } + + static bool RequestTimeoutFilter(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration duration, TInstant& deadline) { + if (event->GetTypeRewrite() == TEvents::TSystem::Wakeup) { TActorId actorId = event->GetRecipientRewrite(); - IActor *actor = runtime.FindActor(actorId); - if (actor && actor->GetActivityType() == NKikimrServices::TActivity::PERSQUEUE_ANS_ACTOR) { - return true; - } - } - - Y_UNUSED(deadline); - Y_UNUSED(duration); - - return false; - } - - static bool ImmediateLogFlushAndRequestTimeoutFilter(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration duration, TInstant& deadline) { - if (event->Type == NKikimr::TEvents::TEvFlushLog::EventType) { - deadline = TInstant(); - return false; - } - - deadline = runtime.GetTimeProvider()->Now() + duration; - return RequestTimeoutFilter(runtime, event, duration, deadline); - } - - void Prepare(const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone) { - Y_UNUSED(dispatchName); - outActiveZone = false; - Runtime.Reset(new TTestBasicRuntime); - Runtime->SetScheduledLimit(200); - SetupLogging(*Runtime); - SetupTabletServices(*Runtime); - setup(*Runtime); + IActor *actor = runtime.FindActor(actorId); + if (actor && actor->GetActivityType() == NKikimrServices::TActivity::PERSQUEUE_ANS_ACTOR) { + return true; + } + } + + Y_UNUSED(deadline); + Y_UNUSED(duration); + + return false; + } + + static bool ImmediateLogFlushAndRequestTimeoutFilter(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration duration, TInstant& deadline) { + if (event->Type == NKikimr::TEvents::TEvFlushLog::EventType) { + deadline = TInstant(); + return false; + } + + deadline = runtime.GetTimeProvider()->Now() + duration; + return RequestTimeoutFilter(runtime, event, duration, deadline); + } + + void Prepare(const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone) { + Y_UNUSED(dispatchName); + outActiveZone = false; + Runtime.Reset(new TTestBasicRuntime); + Runtime->SetScheduledLimit(200); + SetupLogging(*Runtime); + SetupTabletServices(*Runtime); + setup(*Runtime); CreateTestBootstrapper(*Runtime, CreateTestTabletInfo(TabletId, TabletType, TErasureType::ErasureNone), - &CreatePersQueue); - - TDispatchOptions options; - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); - Runtime->DispatchEvents(options); - + &CreatePersQueue); + + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); + Runtime->DispatchEvents(options); + CreateTestBootstrapper(*Runtime, CreateTestTabletInfo(BalancerTabletId, TTabletTypes::PERSQUEUE_READ_BALANCER, TErasureType::ErasureNone), - &CreatePersQueueReadBalancer); - - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); - Runtime->DispatchEvents(options); - - Edge = Runtime->AllocateEdgeActor(); - - Runtime->SetScheduledEventFilter(&RequestTimeoutFilter); - - outActiveZone = true; - } - - void Prepare() { - Runtime.Reset(new TTestBasicRuntime); - Runtime->SetScheduledLimit(200); - SetupLogging(*Runtime); - SetupTabletServices(*Runtime); + &CreatePersQueueReadBalancer); + + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); + Runtime->DispatchEvents(options); + + Edge = Runtime->AllocateEdgeActor(); + + Runtime->SetScheduledEventFilter(&RequestTimeoutFilter); + + outActiveZone = true; + } + + void Prepare() { + Runtime.Reset(new TTestBasicRuntime); + Runtime->SetScheduledLimit(200); + SetupLogging(*Runtime); + SetupTabletServices(*Runtime); CreateTestBootstrapper(*Runtime, CreateTestTabletInfo(TabletId, TabletType, TErasureType::ErasureNone), - &CreatePersQueue); - - TDispatchOptions options; - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); - Runtime->DispatchEvents(options); - + &CreatePersQueue); + + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); + Runtime->DispatchEvents(options); + CreateTestBootstrapper(*Runtime, CreateTestTabletInfo(BalancerTabletId, TTabletTypes::PERSQUEUE_READ_BALANCER, TErasureType::ErasureNone), - &CreatePersQueueReadBalancer); - - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); - Runtime->DispatchEvents(options); - - Edge = Runtime->AllocateEdgeActor(); - - Runtime->SetScheduledEventFilter(&RequestTimeoutFilter); - } - - - void Finalize() { - Runtime.Reset(nullptr); - } -}; - -struct TFinalizer { - TTestContext& TestContext; - - TFinalizer(TTestContext& testContext) - : TestContext(testContext) - {} - - ~TFinalizer() { - TestContext.Finalize(); - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// SINGLE COMMAND TEST FUNCTIONS -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void PQTabletPrepare(ui32 mcip, ui64 msip, ui32 deleteTime, const TVector<std::pair<TString, bool>>& users, TTestContext& tc, int partitions = 2, ui32 lw = 6*1024*1024, bool localDC = true, ui64 ts = 0, ui64 sidMaxCount = 0, ui32 specVersion = 0) { - TAutoPtr<IEventHandle> handle; - static int version = 0; - if (specVersion) { - version = specVersion; - } else { - ++version; - } - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - - THolder<TEvPersQueue::TEvUpdateConfig> request(new TEvPersQueue::TEvUpdateConfig()); - for (i32 i = 0; i < partitions; ++i) { + &CreatePersQueueReadBalancer); + + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); + Runtime->DispatchEvents(options); + + Edge = Runtime->AllocateEdgeActor(); + + Runtime->SetScheduledEventFilter(&RequestTimeoutFilter); + } + + + void Finalize() { + Runtime.Reset(nullptr); + } +}; + +struct TFinalizer { + TTestContext& TestContext; + + TFinalizer(TTestContext& testContext) + : TestContext(testContext) + {} + + ~TFinalizer() { + TestContext.Finalize(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// SINGLE COMMAND TEST FUNCTIONS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +void PQTabletPrepare(ui32 mcip, ui64 msip, ui32 deleteTime, const TVector<std::pair<TString, bool>>& users, TTestContext& tc, int partitions = 2, ui32 lw = 6*1024*1024, bool localDC = true, ui64 ts = 0, ui64 sidMaxCount = 0, ui32 specVersion = 0) { + TAutoPtr<IEventHandle> handle; + static int version = 0; + if (specVersion) { + version = specVersion; + } else { + ++version; + } + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + + THolder<TEvPersQueue::TEvUpdateConfig> request(new TEvPersQueue::TEvUpdateConfig()); + for (i32 i = 0; i < partitions; ++i) { request->Record.MutableTabletConfig()->AddPartitionIds(i); - } - request->Record.MutableTabletConfig()->SetCacheSize(10*1024*1024); - request->Record.SetTxId(12345); - auto tabletConfig = request->Record.MutableTabletConfig(); + } + request->Record.MutableTabletConfig()->SetCacheSize(10*1024*1024); + request->Record.SetTxId(12345); + auto tabletConfig = request->Record.MutableTabletConfig(); tabletConfig->SetTopicName("topic"); - tabletConfig->SetVersion(version); - tabletConfig->SetLocalDC(localDC); - tabletConfig->AddReadRules("user"); - tabletConfig->AddReadFromTimestampsMs(ts); - auto config = tabletConfig->MutablePartitionConfig(); - config->SetMaxCountInPartition(mcip); - config->SetMaxSizeInPartition(msip); - config->SetLifetimeSeconds(deleteTime); + tabletConfig->SetVersion(version); + tabletConfig->SetLocalDC(localDC); + tabletConfig->AddReadRules("user"); + tabletConfig->AddReadFromTimestampsMs(ts); + auto config = tabletConfig->MutablePartitionConfig(); + config->SetMaxCountInPartition(mcip); + config->SetMaxSizeInPartition(msip); + config->SetLifetimeSeconds(deleteTime); config->SetSourceIdLifetimeSeconds(1*60*60); if (sidMaxCount > 0) config->SetSourceIdMaxCounts(sidMaxCount); - config->SetMaxWriteInflightSize(90000000); - config->SetLowWatermark(lw); - - for (auto& u : users) { - if (u.second) - config->AddImportantClientId(u.first); - if (u.first != "user") - tabletConfig->AddReadRules(u.first); - } - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - TEvPersQueue::TEvUpdateConfigResponse* result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvUpdateConfigResponse>(handle); - - UNIT_ASSERT(result); - auto& rec = result->Record; - UNIT_ASSERT(rec.HasStatus() && rec.GetStatus() == NKikimrPQ::OK); - UNIT_ASSERT(rec.HasTxId() && rec.GetTxId() == 12345); - UNIT_ASSERT(rec.HasOrigin() && result->GetOrigin() == 1); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT(retriesLeft >= 1); - } - } - TEvKeyValue::TEvResponse *result; - THolder<TEvKeyValue::TEvRequest> request; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - - request.Reset(new TEvKeyValue::TEvRequest); - auto read = request->Record.AddCmdRead(); - read->SetKey("_config"); - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvKeyValue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - UNIT_ASSERT_EQUAL(result->Record.GetStatus(), NMsgBusProxy::MSTATUS_OK); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT(retriesLeft >= 1); - } - } -} - - - -void BalancerPrepare(const TString topic, const TVector<std::pair<ui32, std::pair<ui64, ui32>>>& map, const ui64 ssId, TTestContext& tc, const bool requireAuth = false) { - TAutoPtr<IEventHandle> handle; - static int version = 0; - ++version; - - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - - THolder<TEvPersQueue::TEvUpdateBalancerConfig> request(new TEvPersQueue::TEvUpdateBalancerConfig()); - for (const auto& p : map) { - auto part = request->Record.AddPartitions(); - part->SetPartition(p.first); - part->SetGroup(p.second.second); - part->SetTabletId(p.second.first); - - auto tablet = request->Record.AddTablets(); - tablet->SetTabletId(p.second.first); - tablet->SetOwner(1); - tablet->SetIdx(p.second.first); - } - request->Record.SetTxId(12345); - request->Record.SetPathId(1); - request->Record.SetVersion(version); - request->Record.SetTopicName(topic); - request->Record.SetPath("path"); - request->Record.SetSchemeShardId(ssId); - request->Record.MutableTabletConfig()->AddReadRules("client"); - request->Record.MutableTabletConfig()->SetRequireAuthWrite(requireAuth); - request->Record.MutableTabletConfig()->SetRequireAuthRead(requireAuth); - - tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - TEvPersQueue::TEvUpdateConfigResponse* result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvUpdateConfigResponse>(handle); - - UNIT_ASSERT(result); - auto& rec = result->Record; - UNIT_ASSERT(rec.HasStatus() && rec.GetStatus() == NKikimrPQ::OK); - UNIT_ASSERT(rec.HasTxId() && rec.GetTxId() == 12345); - UNIT_ASSERT(rec.HasOrigin() && result->GetOrigin() == tc.BalancerTabletId); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT(retriesLeft >= 1); - } - } - //TODO: check state - TTestActorRuntime& runtime = *tc.Runtime; - + config->SetMaxWriteInflightSize(90000000); + config->SetLowWatermark(lw); + + for (auto& u : users) { + if (u.second) + config->AddImportantClientId(u.first); + if (u.first != "user") + tabletConfig->AddReadRules(u.first); + } + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + TEvPersQueue::TEvUpdateConfigResponse* result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvUpdateConfigResponse>(handle); + + UNIT_ASSERT(result); + auto& rec = result->Record; + UNIT_ASSERT(rec.HasStatus() && rec.GetStatus() == NKikimrPQ::OK); + UNIT_ASSERT(rec.HasTxId() && rec.GetTxId() == 12345); + UNIT_ASSERT(rec.HasOrigin() && result->GetOrigin() == 1); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT(retriesLeft >= 1); + } + } + TEvKeyValue::TEvResponse *result; + THolder<TEvKeyValue::TEvRequest> request; + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + + request.Reset(new TEvKeyValue::TEvRequest); + auto read = request->Record.AddCmdRead(); + read->SetKey("_config"); + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvKeyValue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetStatus(), NMsgBusProxy::MSTATUS_OK); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT(retriesLeft >= 1); + } + } +} + + + +void BalancerPrepare(const TString topic, const TVector<std::pair<ui32, std::pair<ui64, ui32>>>& map, const ui64 ssId, TTestContext& tc, const bool requireAuth = false) { + TAutoPtr<IEventHandle> handle; + static int version = 0; + ++version; + + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + + THolder<TEvPersQueue::TEvUpdateBalancerConfig> request(new TEvPersQueue::TEvUpdateBalancerConfig()); + for (const auto& p : map) { + auto part = request->Record.AddPartitions(); + part->SetPartition(p.first); + part->SetGroup(p.second.second); + part->SetTabletId(p.second.first); + + auto tablet = request->Record.AddTablets(); + tablet->SetTabletId(p.second.first); + tablet->SetOwner(1); + tablet->SetIdx(p.second.first); + } + request->Record.SetTxId(12345); + request->Record.SetPathId(1); + request->Record.SetVersion(version); + request->Record.SetTopicName(topic); + request->Record.SetPath("path"); + request->Record.SetSchemeShardId(ssId); + request->Record.MutableTabletConfig()->AddReadRules("client"); + request->Record.MutableTabletConfig()->SetRequireAuthWrite(requireAuth); + request->Record.MutableTabletConfig()->SetRequireAuthRead(requireAuth); + + tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + TEvPersQueue::TEvUpdateConfigResponse* result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvUpdateConfigResponse>(handle); + + UNIT_ASSERT(result); + auto& rec = result->Record; + UNIT_ASSERT(rec.HasStatus() && rec.GetStatus() == NKikimrPQ::OK); + UNIT_ASSERT(rec.HasTxId() && rec.GetTxId() == 12345); + UNIT_ASSERT(rec.HasOrigin() && result->GetOrigin() == tc.BalancerTabletId); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT(retriesLeft >= 1); + } + } + //TODO: check state + TTestActorRuntime& runtime = *tc.Runtime; + ForwardToTablet(runtime, tc.BalancerTabletId, tc.Edge, new TEvents::TEvPoisonPill()); - TDispatchOptions rebootOptions; - rebootOptions.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvRestored, 2)); - runtime.DispatchEvents(rebootOptions); -} - - -void PQGetPartInfo(ui64 startOffset, ui64 endOffset, TTestContext& tc) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvOffsetsResponse *result; - THolder<TEvPersQueue::TEvOffsets> request; - - for (i32 retriesLeft = 3; retriesLeft > 0; --retriesLeft) { - try { - - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvOffsets); - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvOffsetsResponse>(handle); - UNIT_ASSERT(result); - - if (result->Record.PartResultSize() == 0 || result->Record.GetPartResult(0).GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - - UNIT_ASSERT(result->Record.PartResultSize()); - UNIT_ASSERT_VALUES_EQUAL((ui64)result->Record.GetPartResult(0).GetStartOffset(), startOffset); - UNIT_ASSERT_VALUES_EQUAL((ui64)result->Record.GetPartResult(0).GetEndOffset(), endOffset); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT(retriesLeft > 0); - } - } - -} - -void RestartTablet(TTestContext& tc) { - TTestActorRuntime& runtime = *tc.Runtime; - + TDispatchOptions rebootOptions; + rebootOptions.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvRestored, 2)); + runtime.DispatchEvents(rebootOptions); +} + + +void PQGetPartInfo(ui64 startOffset, ui64 endOffset, TTestContext& tc) { + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvOffsetsResponse *result; + THolder<TEvPersQueue::TEvOffsets> request; + + for (i32 retriesLeft = 3; retriesLeft > 0; --retriesLeft) { + try { + + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvOffsets); + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvOffsetsResponse>(handle); + UNIT_ASSERT(result); + + if (result->Record.PartResultSize() == 0 || result->Record.GetPartResult(0).GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + + UNIT_ASSERT(result->Record.PartResultSize()); + UNIT_ASSERT_VALUES_EQUAL((ui64)result->Record.GetPartResult(0).GetStartOffset(), startOffset); + UNIT_ASSERT_VALUES_EQUAL((ui64)result->Record.GetPartResult(0).GetEndOffset(), endOffset); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT(retriesLeft > 0); + } + } + +} + +void RestartTablet(TTestContext& tc) { + TTestActorRuntime& runtime = *tc.Runtime; + ForwardToTablet(runtime, tc.TabletId, tc.Edge, new TEvents::TEvPoisonPill()); - TDispatchOptions rebootOptions; - rebootOptions.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvRestored, 2)); - runtime.DispatchEvents(rebootOptions); -} - - + TDispatchOptions rebootOptions; + rebootOptions.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvRestored, 2)); + runtime.DispatchEvents(rebootOptions); +} + + TActorId SetOwner(const ui32 partition, TTestContext& tc, const TString& owner, bool force) { TActorId pipeClient = tc.Runtime->ConnectToPipe(tc.TabletId, tc.Edge, 0, GetPipeConfigWithRetries()); - - THolder<TEvPersQueue::TEvRequest> request; - - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - req->MutableCmdGetOwnership()->SetOwner(owner); - req->MutableCmdGetOwnership()->SetForce(force); + + THolder<TEvPersQueue::TEvRequest> request; + + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + req->MutableCmdGetOwnership()->SetOwner(owner); + req->MutableCmdGetOwnership()->SetForce(force); ActorIdToProto(pipeClient, req->MutablePipeClient()); - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries(), pipeClient); - return pipeClient; -} - + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries(), pipeClient); + return pipeClient; +} + TActorId RegisterReadSession(const TString& session, TTestContext& tc, const TVector<ui32>& groups = {}) { TActorId pipeClient = tc.Runtime->ConnectToPipe(tc.BalancerTabletId, tc.Edge, 0, GetPipeConfigWithRetries()); - - THolder<TEvPersQueue::TEvRegisterReadSession> request; - - request.Reset(new TEvPersQueue::TEvRegisterReadSession); - auto& req = request->Record; - req.SetSession(session); + + THolder<TEvPersQueue::TEvRegisterReadSession> request; + + request.Reset(new TEvPersQueue::TEvRegisterReadSession); + auto& req = request->Record; + req.SetSession(session); ActorIdToProto(pipeClient, req.MutablePipeClient()); - req.SetClientId("user"); - for (const auto& g : groups) { - req.AddGroups(g); - } - - tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries(), pipeClient); - return pipeClient; -} - -void WaitSessionKill(TTestContext& tc) { - TAutoPtr<IEventHandle> handle; - - tc.Runtime->ResetScheduledCount(); - - TEvPersQueue::TEvError *result; - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvError>(handle); - UNIT_ASSERT(result); - Cerr << "ANS: " << result->Record << "\n"; -// UNIT_ASSERT_EQUAL(result->Record.GetSession(), session); -} - - + req.SetClientId("user"); + for (const auto& g : groups) { + req.AddGroups(g); + } + + tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries(), pipeClient); + return pipeClient; +} + +void WaitSessionKill(TTestContext& tc) { + TAutoPtr<IEventHandle> handle; + + tc.Runtime->ResetScheduledCount(); + + TEvPersQueue::TEvError *result; + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvError>(handle); + UNIT_ASSERT(result); + Cerr << "ANS: " << result->Record << "\n"; +// UNIT_ASSERT_EQUAL(result->Record.GetSession(), session); +} + + void WaitPartition(const TString &session, TTestContext& tc, ui32 partition, const TString& sessionToRelease, const TString& topic, const TActorId& pipe, bool ok = true) { - TAutoPtr<IEventHandle> handle; - - tc.Runtime->ResetScheduledCount(); - - for (ui32 i = 0; i < 3; ++i) { - Cerr << "STEP " << i << " ok " << ok << "\n"; - - try { - tc.Runtime->ResetScheduledCount(); - if (i % 2 == 0) { - TEvPersQueue::TEvLockPartition *result; - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvLockPartition>(handle); - UNIT_ASSERT(result); - Cerr << "ANS: " << result->Record << "\n"; - UNIT_ASSERT(ok); - UNIT_ASSERT_EQUAL(result->Record.GetSession(), session); - break; - } else { - TEvPersQueue::TEvReleasePartition *result; - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvReleasePartition>(handle); - UNIT_ASSERT(result); - - Cerr << "ANS2: " << result->Record << "\n"; - - UNIT_ASSERT_EQUAL(result->Record.GetSession(), sessionToRelease); - UNIT_ASSERT(ok); - - THolder<TEvPersQueue::TEvPartitionReleased> request; - - request.Reset(new TEvPersQueue::TEvPartitionReleased); - auto& req = request->Record; - req.SetSession(sessionToRelease); - req.SetPartition(partition); - req.SetTopic(topic); - req.SetClientId("user"); + TAutoPtr<IEventHandle> handle; + + tc.Runtime->ResetScheduledCount(); + + for (ui32 i = 0; i < 3; ++i) { + Cerr << "STEP " << i << " ok " << ok << "\n"; + + try { + tc.Runtime->ResetScheduledCount(); + if (i % 2 == 0) { + TEvPersQueue::TEvLockPartition *result; + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvLockPartition>(handle); + UNIT_ASSERT(result); + Cerr << "ANS: " << result->Record << "\n"; + UNIT_ASSERT(ok); + UNIT_ASSERT_EQUAL(result->Record.GetSession(), session); + break; + } else { + TEvPersQueue::TEvReleasePartition *result; + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvReleasePartition>(handle); + UNIT_ASSERT(result); + + Cerr << "ANS2: " << result->Record << "\n"; + + UNIT_ASSERT_EQUAL(result->Record.GetSession(), sessionToRelease); + UNIT_ASSERT(ok); + + THolder<TEvPersQueue::TEvPartitionReleased> request; + + request.Reset(new TEvPersQueue::TEvPartitionReleased); + auto& req = request->Record; + req.SetSession(sessionToRelease); + req.SetPartition(partition); + req.SetTopic(topic); + req.SetClientId("user"); ActorIdToProto(pipe, req.MutablePipeClient()); - - tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries(), pipe); - } - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT(i < 2 || !ok); - } catch (NActors::TEmptyEventQueueException) { - UNIT_ASSERT(i < 2 || !ok); - } - } -} - - + + tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries(), pipe); + } + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT(i < 2 || !ok); + } catch (NActors::TEmptyEventQueueException) { + UNIT_ASSERT(i < 2 || !ok); + } + } +} + + std::pair<TString, TActorId> CmdSetOwner(const ui32 partition, TTestContext& tc, const TString& owner = "default", bool force = true) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - TString cookie; + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + TString cookie; TActorId pipeClient; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - - pipeClient = SetOwner(partition, tc, owner, force); - - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - - if (result->Record.GetErrorReason().StartsWith("ownership session is killed by another session with id ")) { - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - } - - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - UNIT_ASSERT(result->Record.HasPartitionResponse()); - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetOwnershipResult()); - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdGetOwnershipResult().HasOwnerCookie()); - cookie = result->Record.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); - UNIT_ASSERT(!cookie.empty()); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - } - } - return std::make_pair(cookie, pipeClient); -} - - -void WritePartData(const ui32 partition, const TString& sourceId, const i64 offset, const ui64 seqNo, const ui16 partNo, const ui16 totalParts, - const ui32 totalSize, const TString& data, TTestContext& tc, const TString& cookie, i32 msgSeqNo) -{ - THolder<TEvPersQueue::TEvRequest> request; - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - req->SetOwnerCookie(cookie); - req->SetMessageNo(msgSeqNo); - if (offset != -1) - req->SetCmdWriteOffset(offset); - auto write = req->AddCmdWrite(); - write->SetSourceId(sourceId); - write->SetSeqNo(seqNo); - write->SetPartNo(partNo); - write->SetTotalParts(totalParts); - if (partNo == 0) - write->SetTotalSize(totalSize); - write->SetData(data); - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); -} - -void WritePartDataWithBigMsg(const ui32 partition, const TString& sourceId, const ui64 seqNo, const ui16 partNo, const ui16 totalParts, - const ui32 totalSize, const TString& data, TTestContext& tc, const TString& cookie, i32 msgSeqNo, ui32 bigMsgSize) -{ - THolder<TEvPersQueue::TEvRequest> request; - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - req->SetOwnerCookie(cookie); - req->SetMessageNo(msgSeqNo); - - TString bigData(bigMsgSize, 'a'); - - auto write = req->AddCmdWrite(); - write->SetSourceId(sourceId); - write->SetSeqNo(seqNo); - write->SetData(bigData); - - write = req->AddCmdWrite(); - write->SetSourceId(sourceId); - write->SetSeqNo(seqNo + 1); - write->SetPartNo(partNo); - write->SetTotalParts(totalParts); - if (partNo == 0) - write->SetTotalSize(totalSize); - write->SetData(data); - - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); -} - - - -void WriteData(const ui32 partition, const TString& sourceId, const TVector<std::pair<ui64, TString>> data, TTestContext& tc, - const TString& cookie, i32 msgSeqNo, i64 offset, bool disableDeduplication = false) -{ - THolder<TEvPersQueue::TEvRequest> request; - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - req->SetOwnerCookie(cookie); - req->SetMessageNo(msgSeqNo); - if (offset >= 0) - req->SetCmdWriteOffset(offset); - for (auto& p : data) { - auto write = req->AddCmdWrite(); - write->SetSourceId(sourceId); - write->SetSeqNo(p.first); - write->SetData(p.second); - write->SetDisableDeduplication(disableDeduplication); - } - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); -} - + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + + pipeClient = SetOwner(partition, tc, owner, force); + + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + + if (result->Record.GetErrorReason().StartsWith("ownership session is killed by another session with id ")) { + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + } + + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT(result->Record.HasPartitionResponse()); + UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetOwnershipResult()); + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdGetOwnershipResult().HasOwnerCookie()); + cookie = result->Record.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); + UNIT_ASSERT(!cookie.empty()); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + } + } + return std::make_pair(cookie, pipeClient); +} + + +void WritePartData(const ui32 partition, const TString& sourceId, const i64 offset, const ui64 seqNo, const ui16 partNo, const ui16 totalParts, + const ui32 totalSize, const TString& data, TTestContext& tc, const TString& cookie, i32 msgSeqNo) +{ + THolder<TEvPersQueue::TEvRequest> request; + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + req->SetOwnerCookie(cookie); + req->SetMessageNo(msgSeqNo); + if (offset != -1) + req->SetCmdWriteOffset(offset); + auto write = req->AddCmdWrite(); + write->SetSourceId(sourceId); + write->SetSeqNo(seqNo); + write->SetPartNo(partNo); + write->SetTotalParts(totalParts); + if (partNo == 0) + write->SetTotalSize(totalSize); + write->SetData(data); + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); +} + +void WritePartDataWithBigMsg(const ui32 partition, const TString& sourceId, const ui64 seqNo, const ui16 partNo, const ui16 totalParts, + const ui32 totalSize, const TString& data, TTestContext& tc, const TString& cookie, i32 msgSeqNo, ui32 bigMsgSize) +{ + THolder<TEvPersQueue::TEvRequest> request; + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + req->SetOwnerCookie(cookie); + req->SetMessageNo(msgSeqNo); + + TString bigData(bigMsgSize, 'a'); + + auto write = req->AddCmdWrite(); + write->SetSourceId(sourceId); + write->SetSeqNo(seqNo); + write->SetData(bigData); + + write = req->AddCmdWrite(); + write->SetSourceId(sourceId); + write->SetSeqNo(seqNo + 1); + write->SetPartNo(partNo); + write->SetTotalParts(totalParts); + if (partNo == 0) + write->SetTotalSize(totalSize); + write->SetData(data); + + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); +} + + + +void WriteData(const ui32 partition, const TString& sourceId, const TVector<std::pair<ui64, TString>> data, TTestContext& tc, + const TString& cookie, i32 msgSeqNo, i64 offset, bool disableDeduplication = false) +{ + THolder<TEvPersQueue::TEvRequest> request; + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + req->SetOwnerCookie(cookie); + req->SetMessageNo(msgSeqNo); + if (offset >= 0) + req->SetCmdWriteOffset(offset); + for (auto& p : data) { + auto write = req->AddCmdWrite(); + write->SetSourceId(sourceId); + write->SetSeqNo(p.first); + write->SetData(p.second); + write->SetDisableDeduplication(disableDeduplication); + } + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); +} + void CmdWrite(const ui32 partition, const TString& sourceId, const TVector<std::pair<ui64, TString>> data, TTestContext& tc, bool error = false, const THashSet<ui32>& alreadyWrittenSeqNo = {}, bool isFirst = false, const TString& ownerCookie = "", i32 msn = -1, i64 offset = -1, bool treatWrongCookieAsError = false, bool treatBadOffsetAsError = true, bool disableDeduplication = false) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - - ui32& msgSeqNo = tc.MsgSeqNoMap[partition]; - if (msn != -1) msgSeqNo = msn; - TString cookie = ownerCookie; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - WriteData(partition, sourceId, data, tc, cookie, msgSeqNo, offset, disableDeduplication); - result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ - if (ev.Record.HasPartitionResponse() && ev.Record.GetPartitionResponse().CmdWriteResultSize() > 0 || ev.Record.GetErrorCode() != NPersQueue::NErrorCode::OK) - return true; - return false; - }); //there could be outgoing reads in TestReadSubscription test - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - - if (!treatWrongCookieAsError && result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE) { - cookie = CmdSetOwner(partition, tc).first; - msgSeqNo = 0; - retriesLeft = 3; - continue; - } - - if (!treatBadOffsetAsError && result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRITE_ERROR_BAD_OFFSET) { - return; - } - - if (error) { - UNIT_ASSERT(result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRITE_ERROR_PARTITION_IS_FULL || - result->Record.GetErrorCode() == NPersQueue::NErrorCode::BAD_REQUEST || result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE); - break; - } else { - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - } - UNIT_ASSERT(result->Record.GetPartitionResponse().CmdWriteResultSize() == data.size()); - - for (ui32 i = 0; i < data.size(); ++i) { - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasAlreadyWritten()); - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasOffset()); - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasMaxSeqNo() == - result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetAlreadyWritten()); - if (result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasMaxSeqNo()) { - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetMaxSeqNo() >= (i64)data[i].first); - } - if (isFirst || offset != -1) { - UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetAlreadyWritten() - || result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetOffset() == i + (offset == -1 ? 0 : offset)); - } - } - for (ui32 i = 0; i < data.size(); ++i) { - auto res = result->Record.GetPartitionResponse().GetCmdWriteResult(i); - UNIT_ASSERT(!alreadyWrittenSeqNo.contains(res.GetSeqNo()) || res.GetAlreadyWritten()); - } - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - retriesLeft = 3; - } - } - ++msgSeqNo; -} - - -void ReserveBytes(const ui32 partition, TTestContext& tc, + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + + ui32& msgSeqNo = tc.MsgSeqNoMap[partition]; + if (msn != -1) msgSeqNo = msn; + TString cookie = ownerCookie; + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + WriteData(partition, sourceId, data, tc, cookie, msgSeqNo, offset, disableDeduplication); + result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ + if (ev.Record.HasPartitionResponse() && ev.Record.GetPartitionResponse().CmdWriteResultSize() > 0 || ev.Record.GetErrorCode() != NPersQueue::NErrorCode::OK) + return true; + return false; + }); //there could be outgoing reads in TestReadSubscription test + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + + if (!treatWrongCookieAsError && result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE) { + cookie = CmdSetOwner(partition, tc).first; + msgSeqNo = 0; + retriesLeft = 3; + continue; + } + + if (!treatBadOffsetAsError && result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRITE_ERROR_BAD_OFFSET) { + return; + } + + if (error) { + UNIT_ASSERT(result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRITE_ERROR_PARTITION_IS_FULL || + result->Record.GetErrorCode() == NPersQueue::NErrorCode::BAD_REQUEST || result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE); + break; + } else { + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + } + UNIT_ASSERT(result->Record.GetPartitionResponse().CmdWriteResultSize() == data.size()); + + for (ui32 i = 0; i < data.size(); ++i) { + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasAlreadyWritten()); + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasOffset()); + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasMaxSeqNo() == + result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetAlreadyWritten()); + if (result->Record.GetPartitionResponse().GetCmdWriteResult(i).HasMaxSeqNo()) { + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetMaxSeqNo() >= (i64)data[i].first); + } + if (isFirst || offset != -1) { + UNIT_ASSERT(result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetAlreadyWritten() + || result->Record.GetPartitionResponse().GetCmdWriteResult(i).GetOffset() == i + (offset == -1 ? 0 : offset)); + } + } + for (ui32 i = 0; i < data.size(); ++i) { + auto res = result->Record.GetPartitionResponse().GetCmdWriteResult(i); + UNIT_ASSERT(!alreadyWrittenSeqNo.contains(res.GetSeqNo()) || res.GetAlreadyWritten()); + } + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + retriesLeft = 3; + } + } + ++msgSeqNo; +} + + +void ReserveBytes(const ui32 partition, TTestContext& tc, const TString& cookie, i32 msgSeqNo, i64 size, const TActorId& pipeClient, bool lastRequest) -{ - THolder<TEvPersQueue::TEvRequest> request; - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - req->SetOwnerCookie(cookie); - req->SetMessageNo(msgSeqNo); +{ + THolder<TEvPersQueue::TEvRequest> request; + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + req->SetOwnerCookie(cookie); + req->SetMessageNo(msgSeqNo); ActorIdToProto(pipeClient, req->MutablePipeClient()); - req->MutableCmdReserveBytes()->SetSize(size); - req->MutableCmdReserveBytes()->SetLastRequest(lastRequest); - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - - tc.Runtime->DispatchEvents(); -} - - + req->MutableCmdReserveBytes()->SetSize(size); + req->MutableCmdReserveBytes()->SetLastRequest(lastRequest); + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + + tc.Runtime->DispatchEvents(); +} + + void CmdReserveBytes(const ui32 partition, TTestContext& tc, const TString& ownerCookie, i32 msn, i64 size, TActorId pipeClient, bool noAnswer = false, bool lastRequest = false) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - - ui32& msgSeqNo = tc.MsgSeqNoMap[partition]; - if (msn != -1) msgSeqNo = msn; - TString cookie = ownerCookie; - - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - ReserveBytes(partition, tc, cookie, msgSeqNo, size, pipeClient, lastRequest); - result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ - if (!ev.Record.HasPartitionResponse() || !ev.Record.GetPartitionResponse().HasCmdReadResult()) - return true; - return false; - }); //there could be outgoing reads in TestReadSubscription test - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - retriesLeft = 3; - continue; - } - - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE) { - auto p = CmdSetOwner(partition, tc); - pipeClient = p.second; - cookie = p.first; - msgSeqNo = 0; - retriesLeft = 3; - continue; - } - UNIT_ASSERT(!noAnswer); - - UNIT_ASSERT_C(result->Record.GetErrorCode() == NPersQueue::NErrorCode::OK, result->Record); - - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - if (noAnswer) - break; - UNIT_ASSERT(retriesLeft == 2); - } - } - ++msgSeqNo; -} - - -void CmdSetOffset(const ui32 partition, const TString& user, ui64 offset, bool error, TTestContext& tc, const TString& session = "") { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - THolder<TEvPersQueue::TEvRequest> request; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - auto off = req->MutableCmdSetClientOffset(); - off->SetClientId(user); - off->SetOffset(offset); - if (!session.empty()) - off->SetSessionId(session); - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - if ((result->Record.GetErrorCode() == NPersQueue::NErrorCode::SET_OFFSET_ERROR_COMMIT_TO_FUTURE || - result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE) && error) { - break; - } - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - } - } -} - - -void CmdCreateSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc, const i64 offset = 0, - const ui32 gen = 0, const ui32 step = 0, bool error = false) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - THolder<TEvPersQueue::TEvRequest> request; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - auto off = req->MutableCmdCreateSession(); - off->SetClientId(user); - off->SetSessionId(session); - off->SetGeneration(gen); - off->SetStep(step); - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - - if (error) { - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::WRONG_COOKIE); - return; - } - - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetClientOffsetResult()); - auto resp = result->Record.GetPartitionResponse().GetCmdGetClientOffsetResult(); - UNIT_ASSERT(resp.HasOffset() && (i64)resp.GetOffset() == offset); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - } - } -} - -void CmdKillSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - THolder<TEvPersQueue::TEvRequest> request; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - auto off = req->MutableCmdDeleteSession(); - off->SetClientId(user); - off->SetSessionId(session); - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - } - } -} - - - -void CmdGetOffset(const ui32 partition, const TString& user, i64 offset, TTestContext& tc, i64 ctime = -1, ui64 writeTime = 0) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - THolder<TEvPersQueue::TEvRequest> request; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - auto off = req->MutableCmdGetClientOffset(); - off->SetClientId(user); - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetClientOffsetResult()); - auto resp = result->Record.GetPartitionResponse().GetCmdGetClientOffsetResult(); - if (ctime != -1) { - UNIT_ASSERT_EQUAL(resp.HasCreateTimestampMS(), ctime > 0); - if (ctime > 0) { - if (ctime == Max<i64>()) { - UNIT_ASSERT(resp.GetCreateTimestampMS() + 86000000 < TAppData::TimeProvider->Now().MilliSeconds()); - } else { - UNIT_ASSERT_EQUAL((i64)resp.GetCreateTimestampMS(), ctime); - } - } - } - Cerr << "CMDGETOFFSET partition " << partition << " waiting for offset " << offset << ": " << resp << "\n"; - UNIT_ASSERT((offset == -1 && !resp.HasOffset()) || (i64)resp.GetOffset() == offset); - if (writeTime > 0) { - UNIT_ASSERT(resp.HasWriteTimestampEstimateMS()); - UNIT_ASSERT(resp.GetWriteTimestampEstimateMS() >= writeTime); - } - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - } - } -} - - -void CmdUpdateWriteTimestamp(const ui32 partition, ui64 timestamp, TTestContext& tc) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - THolder<TEvPersQueue::TEvRequest> request; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - auto off = req->MutableCmdUpdateWriteTimestamp(); - off->SetWriteTimeMS(timestamp); - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - } - } -} - - + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + + ui32& msgSeqNo = tc.MsgSeqNoMap[partition]; + if (msn != -1) msgSeqNo = msn; + TString cookie = ownerCookie; + + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + ReserveBytes(partition, tc, cookie, msgSeqNo, size, pipeClient, lastRequest); + result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ + if (!ev.Record.HasPartitionResponse() || !ev.Record.GetPartitionResponse().HasCmdReadResult()) + return true; + return false; + }); //there could be outgoing reads in TestReadSubscription test + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + retriesLeft = 3; + continue; + } + + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE) { + auto p = CmdSetOwner(partition, tc); + pipeClient = p.second; + cookie = p.first; + msgSeqNo = 0; + retriesLeft = 3; + continue; + } + UNIT_ASSERT(!noAnswer); + + UNIT_ASSERT_C(result->Record.GetErrorCode() == NPersQueue::NErrorCode::OK, result->Record); + + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + if (noAnswer) + break; + UNIT_ASSERT(retriesLeft == 2); + } + } + ++msgSeqNo; +} + + +void CmdSetOffset(const ui32 partition, const TString& user, ui64 offset, bool error, TTestContext& tc, const TString& session = "") { + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + THolder<TEvPersQueue::TEvRequest> request; + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + auto off = req->MutableCmdSetClientOffset(); + off->SetClientId(user); + off->SetOffset(offset); + if (!session.empty()) + off->SetSessionId(session); + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + if ((result->Record.GetErrorCode() == NPersQueue::NErrorCode::SET_OFFSET_ERROR_COMMIT_TO_FUTURE || + result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE) && error) { + break; + } + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + } + } +} + + +void CmdCreateSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc, const i64 offset = 0, + const ui32 gen = 0, const ui32 step = 0, bool error = false) { + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + THolder<TEvPersQueue::TEvRequest> request; + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + auto off = req->MutableCmdCreateSession(); + off->SetClientId(user); + off->SetSessionId(session); + off->SetGeneration(gen); + off->SetStep(step); + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + + if (error) { + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::WRONG_COOKIE); + return; + } + + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + + UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetClientOffsetResult()); + auto resp = result->Record.GetPartitionResponse().GetCmdGetClientOffsetResult(); + UNIT_ASSERT(resp.HasOffset() && (i64)resp.GetOffset() == offset); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + } + } +} + +void CmdKillSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc) { + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + THolder<TEvPersQueue::TEvRequest> request; + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + auto off = req->MutableCmdDeleteSession(); + off->SetClientId(user); + off->SetSessionId(session); + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + } + } +} + + + +void CmdGetOffset(const ui32 partition, const TString& user, i64 offset, TTestContext& tc, i64 ctime = -1, ui64 writeTime = 0) { + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + THolder<TEvPersQueue::TEvRequest> request; + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + auto off = req->MutableCmdGetClientOffset(); + off->SetClientId(user); + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetClientOffsetResult()); + auto resp = result->Record.GetPartitionResponse().GetCmdGetClientOffsetResult(); + if (ctime != -1) { + UNIT_ASSERT_EQUAL(resp.HasCreateTimestampMS(), ctime > 0); + if (ctime > 0) { + if (ctime == Max<i64>()) { + UNIT_ASSERT(resp.GetCreateTimestampMS() + 86000000 < TAppData::TimeProvider->Now().MilliSeconds()); + } else { + UNIT_ASSERT_EQUAL((i64)resp.GetCreateTimestampMS(), ctime); + } + } + } + Cerr << "CMDGETOFFSET partition " << partition << " waiting for offset " << offset << ": " << resp << "\n"; + UNIT_ASSERT((offset == -1 && !resp.HasOffset()) || (i64)resp.GetOffset() == offset); + if (writeTime > 0) { + UNIT_ASSERT(resp.HasWriteTimestampEstimateMS()); + UNIT_ASSERT(resp.GetWriteTimestampEstimateMS() >= writeTime); + } + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + } + } +} + + +void CmdUpdateWriteTimestamp(const ui32 partition, ui64 timestamp, TTestContext& tc) { + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + THolder<TEvPersQueue::TEvRequest> request; + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + auto off = req->MutableCmdUpdateWriteTimestamp(); + off->SetWriteTimeMS(timestamp); + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + } + } +} + + TVector<TString> CmdSourceIdRead(TTestContext& tc) { TAutoPtr<IEventHandle> handle; TVector<TString> sourceIds; THolder<TEvKeyValue::TEvRequest> request; TEvKeyValue::TEvResponse *result; - + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { try { request.Reset(new TEvKeyValue::TEvRequest); @@ -987,120 +987,120 @@ TVector<TString> CmdSourceIdRead(TTestContext& tc) { } return sourceIds; } - - -void CmdRead(const ui32 partition, const ui64 offset, const ui32 count, const ui32 size, const ui32 resCount, bool timeouted, TTestContext& tc, TVector<i32> offsets = {}, const ui32 maxTimeLagMs = 0, const ui64 readTimestampMs = 0) { - TAutoPtr<IEventHandle> handle; - TEvPersQueue::TEvResponse *result; - THolder<TEvPersQueue::TEvRequest> request; - - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { - try { - tc.Runtime->ResetScheduledCount(); - request.Reset(new TEvPersQueue::TEvRequest); - auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); - auto read = req->MutableCmdRead(); - read->SetOffset(offset); - read->SetClientId("user"); - read->SetCount(count); - read->SetBytes(size); - if (maxTimeLagMs > 0) { - read->SetMaxTimeLagMs(maxTimeLagMs); - } - if (readTimestampMs > 0) { - read->SetReadTimestampMs(readTimestampMs); - } - req->SetCookie(123); - - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - - UNIT_ASSERT(result->Record.HasPartitionResponse()); - UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCookie(), 123); - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { - tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress - retriesLeft = 3; - continue; - } - if (timeouted) { - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult()); - auto res = result->Record.GetPartitionResponse().GetCmdReadResult(); - UNIT_ASSERT_EQUAL(res.ResultSize(), 0); - break; - } - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult()); - auto res = result->Record.GetPartitionResponse().GetCmdReadResult(); - - UNIT_ASSERT_EQUAL(res.ResultSize(), resCount); - ui64 off = offset; - - for (ui32 i = 0; i < resCount; ++i) { - - auto r = res.GetResult(i); - if (offsets.empty()) { + + +void CmdRead(const ui32 partition, const ui64 offset, const ui32 count, const ui32 size, const ui32 resCount, bool timeouted, TTestContext& tc, TVector<i32> offsets = {}, const ui32 maxTimeLagMs = 0, const ui64 readTimestampMs = 0) { + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + THolder<TEvPersQueue::TEvRequest> request; + + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); + req->SetPartition(partition); + auto read = req->MutableCmdRead(); + read->SetOffset(offset); + read->SetClientId("user"); + read->SetCount(count); + read->SetBytes(size); + if (maxTimeLagMs > 0) { + read->SetMaxTimeLagMs(maxTimeLagMs); + } + if (readTimestampMs > 0) { + read->SetReadTimestampMs(readTimestampMs); + } + req->SetCookie(123); + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + + UNIT_ASSERT(result->Record.HasPartitionResponse()); + UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCookie(), 123); + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress + retriesLeft = 3; + continue; + } + if (timeouted) { + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult()); + auto res = result->Record.GetPartitionResponse().GetCmdReadResult(); + UNIT_ASSERT_EQUAL(res.ResultSize(), 0); + break; + } + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + + UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult()); + auto res = result->Record.GetPartitionResponse().GetCmdReadResult(); + + UNIT_ASSERT_EQUAL(res.ResultSize(), resCount); + ui64 off = offset; + + for (ui32 i = 0; i < resCount; ++i) { + + auto r = res.GetResult(i); + if (offsets.empty()) { if (readTimestampMs == 0) { UNIT_ASSERT_EQUAL((ui64)r.GetOffset(), off); } - UNIT_ASSERT(r.GetSourceId().size() == 9 && r.GetSourceId().StartsWith("sourceid")); - UNIT_ASSERT_EQUAL(ui32(r.GetData()[0]), off); - UNIT_ASSERT_EQUAL(ui32((unsigned char)r.GetData().back()), r.GetSeqNo() % 256); - ++off; - } else { - UNIT_ASSERT(offsets[i] == (i64)r.GetOffset()); - } - } - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - } - } -} - - -void FillUserInfo(NKikimrClient::TKeyValueRequest_TCmdWrite* write, TString& client, ui32 partition, ui64 offset) { - NPQ::TKeyPrefix ikey(NPQ::TKeyPrefix::TypeInfo, partition, NPQ::TKeyPrefix::MarkUser); - ikey.Append(client.c_str(), client.size()); - - NKikimrPQ::TUserInfo userInfo; - userInfo.SetOffset(offset); - userInfo.SetGeneration(1); - userInfo.SetStep(2); - userInfo.SetSession("test-session"); - userInfo.SetOffsetRewindSum(10); - userInfo.SetReadRuleGeneration(1); - TString out; + UNIT_ASSERT(r.GetSourceId().size() == 9 && r.GetSourceId().StartsWith("sourceid")); + UNIT_ASSERT_EQUAL(ui32(r.GetData()[0]), off); + UNIT_ASSERT_EQUAL(ui32((unsigned char)r.GetData().back()), r.GetSeqNo() % 256); + ++off; + } else { + UNIT_ASSERT(offsets[i] == (i64)r.GetOffset()); + } + } + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + } + } +} + + +void FillUserInfo(NKikimrClient::TKeyValueRequest_TCmdWrite* write, TString& client, ui32 partition, ui64 offset) { + NPQ::TKeyPrefix ikey(NPQ::TKeyPrefix::TypeInfo, partition, NPQ::TKeyPrefix::MarkUser); + ikey.Append(client.c_str(), client.size()); + + NKikimrPQ::TUserInfo userInfo; + userInfo.SetOffset(offset); + userInfo.SetGeneration(1); + userInfo.SetStep(2); + userInfo.SetSession("test-session"); + userInfo.SetOffsetRewindSum(10); + userInfo.SetReadRuleGeneration(1); + TString out; Y_PROTOBUF_SUPPRESS_NODISCARD userInfo.SerializeToString(&out); - - TBuffer idata; - idata.Append(out.c_str(), out.size()); - - write->SetKey(ikey.Data(), ikey.Size()); - write->SetValue(idata.Data(), idata.Size()); -} - -void FillDeprecatedUserInfo(NKikimrClient::TKeyValueRequest_TCmdWrite* write, TString& client, ui32 partition, ui64 offset) { - TString session = "test-session"; - ui32 gen = 1; - ui32 step = 2; - NPQ::TKeyPrefix ikeyDeprecated(NPQ::TKeyPrefix::TypeInfo, partition, NPQ::TKeyPrefix::MarkUserDeprecated); - ikeyDeprecated.Append(client.c_str(), client.size()); - + + TBuffer idata; + idata.Append(out.c_str(), out.size()); + + write->SetKey(ikey.Data(), ikey.Size()); + write->SetValue(idata.Data(), idata.Size()); +} + +void FillDeprecatedUserInfo(NKikimrClient::TKeyValueRequest_TCmdWrite* write, TString& client, ui32 partition, ui64 offset) { + TString session = "test-session"; + ui32 gen = 1; + ui32 step = 2; + NPQ::TKeyPrefix ikeyDeprecated(NPQ::TKeyPrefix::TypeInfo, partition, NPQ::TKeyPrefix::MarkUserDeprecated); + ikeyDeprecated.Append(client.c_str(), client.size()); + TBuffer idataDeprecated = NPQ::NDeprecatedUserData::Serialize(offset, gen, step, session); - write->SetKey(ikeyDeprecated.Data(), ikeyDeprecated.Size()); - write->SetValue(idataDeprecated.Data(), idataDeprecated.Size()); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TEST CASES -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - -} // NKikimr + write->SetKey(ikeyDeprecated.Data(), ikeyDeprecated.Size()); + write->SetValue(idataDeprecated.Data(), idataDeprecated.Size()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TEST CASES +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +} // NKikimr diff --git a/ydb/core/persqueue/pq_ut_slow.cpp b/ydb/core/persqueue/pq_ut_slow.cpp index b07e070c10..51041c1045 100644 --- a/ydb/core/persqueue/pq_ut_slow.cpp +++ b/ydb/core/persqueue/pq_ut_slow.cpp @@ -1,5 +1,5 @@ -#include "pq_ut.h" - +#include "pq_ut.h" + #include <ydb/core/testlib/basics/runtime.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/tx/schemeshard/schemeshard.h> @@ -10,53 +10,53 @@ #include <ydb/core/persqueue/events/global.h> #include <ydb/core/engine/minikql/flat_local_tx_factory.h> #include <ydb/core/security/ticket_parser.h> - + #include <ydb/core/testlib/fake_scheme_shard.h> #include <ydb/core/testlib/tablet_helpers.h> - + #include <library/cpp/testing/unittest/registar.h> - -#include <util/system/sanitizers.h> -#include <util/system/valgrind.h> - -namespace NKikimr { -Y_UNIT_TEST_SUITE(TPQTestSlow) { - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// SINGLE COMMAND TEST FUNCTIONS -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - -Y_UNIT_TEST(TestWriteVeryBigMessage) { - TTestContext tc; - RunTestWithReboots(tc.TabletIds, [&]() { - return tc.InitialEventsFilter.Prepare(); - }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { - TFinalizer finalizer(tc); - tc.Prepare(dispatchName, setup, activeZone); - tc.Runtime->SetScheduledLimit(200); - activeZone = false; - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //always delete - - TVector<std::pair<ui64, TString>> data; - data.push_back({1, TString{10, 'b'}}); - CmdWrite(1, "sourceIdx", data, tc, false, {}, false, "", -1, 40000); - data.clear(); - const ui32 size = PlainOrSoSlow(40*1024*1024, 1*1024*1024); - const ui32 so = PlainOrSoSlow(1, 0); - data.push_back({2, TString{size, 'a'}}); - CmdWrite(1, "sourceIdx", data, tc, false, {}, false, "", -1, 80000); - CmdWrite(0, "sourceIdx", data, tc, false, {}, false, "", -1, 0); + +#include <util/system/sanitizers.h> +#include <util/system/valgrind.h> + +namespace NKikimr { +Y_UNIT_TEST_SUITE(TPQTestSlow) { + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// SINGLE COMMAND TEST FUNCTIONS +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +Y_UNIT_TEST(TestWriteVeryBigMessage) { + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(200); + activeZone = false; + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc); //always delete + + TVector<std::pair<ui64, TString>> data; + data.push_back({1, TString{10, 'b'}}); + CmdWrite(1, "sourceIdx", data, tc, false, {}, false, "", -1, 40000); + data.clear(); + const ui32 size = PlainOrSoSlow(40*1024*1024, 1*1024*1024); + const ui32 so = PlainOrSoSlow(1, 0); + data.push_back({2, TString{size, 'a'}}); + CmdWrite(1, "sourceIdx", data, tc, false, {}, false, "", -1, 80000); + CmdWrite(0, "sourceIdx", data, tc, false, {}, false, "", -1, 0); activeZone = true; - PQGetPartInfo(so, 1, tc); - RestartTablet(tc); - PQGetPartInfo(so, 1, tc); - }); -} - - + PQGetPartInfo(so, 1, tc); + RestartTablet(tc); + PQGetPartInfo(so, 1, tc); + }); +} + + Y_UNIT_TEST(TestOnDiskStoredSourceIds) { TTestContext tc; RunTestWithReboots(tc.TabletIds, [&]() { @@ -65,8 +65,8 @@ Y_UNIT_TEST(TestOnDiskStoredSourceIds) { TFinalizer finalizer(tc); tc.Prepare(dispatchName, setup, activeZone); tc.Runtime->SetScheduledLimit(200); - - PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 6*1024*1024, true, 0, 3); //no important client, lifetimeseconds=0 - delete right now + + PQTabletPrepare(20000000, 100 * 1024 * 1024, 0, {}, tc, 2, 6*1024*1024, true, 0, 3); //no important client, lifetimeseconds=0 - delete right now TVector<TString> writtenSourceIds; @@ -158,5 +158,5 @@ Y_UNIT_TEST(TestOnDiskStoredSourceIds) { -} // TKeyValueTest -} // NKikimr +} // TKeyValueTest +} // NKikimr diff --git a/ydb/core/persqueue/read.h b/ydb/core/persqueue/read.h index 98e14680d2..d0bc79c692 100644 --- a/ydb/core/persqueue/read.h +++ b/ydb/core/persqueue/read.h @@ -1,43 +1,43 @@ -#pragma once +#pragma once -#include "partition.h" +#include "partition.h" #include "pq_l2_service.h" #include "cache_eviction.h" - + #include <ydb/core/keyvalue/keyvalue_flat_impl.h> #include <ydb/core/persqueue/events/internal.h> -namespace NKikimr { -namespace NPQ { - +namespace NKikimr { +namespace NPQ { + inline TString ToStringLocalTimeUpToSeconds(const TInstant &time) { return time.GetValue() ? time.ToStringLocalUpToSeconds() : "0"; } /// Intablet cache proxy: Partition <-> CacheProxy <-> KV class TPQCacheProxy : public TActorBootstrapped<TPQCacheProxy> { - public: + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PERSQUEUE_CACHE_ACTOR; } TPQCacheProxy(const TActorId& tablet, TString topicName, ui32 size) - : Tablet(tablet) + : Tablet(tablet) , TopicName(topicName) - , Cookie(0) + , Cookie(0) , Cache(topicName, size) , CountersUpdateTime(TAppData::TimeProvider->Now()) { Y_VERIFY(topicName.size(), "CacheProxy with empty topic name"); } - + void Bootstrap(const TActorContext& ctx) - { + { Y_UNUSED(ctx); - Become(&TThis::StateFunc); - } - - private: + Become(&TThis::StateFunc); + } + + private: ui64 SaveKvRequest(TKvRequest&& kvRequest) { ui64 cookie = Cookie++; @@ -45,11 +45,11 @@ namespace NPQ { Y_VERIFY(savedRequest.second); return cookie; } - + void SaveInProgress(const TKvRequest& kvRequest) { for (const TRequestedBlob& reqBlob : kvRequest.Blobs) { - TBlobId blob(kvRequest.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); + TBlobId blob(kvRequest.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); ReadsInProgress.insert(blob); } } @@ -57,7 +57,7 @@ namespace NPQ { bool CheckInProgress(const TActorContext& ctx, TKvRequest& kvRequest) { for (const TRequestedBlob& reqBlob : kvRequest.Blobs) { - TBlobId blob(kvRequest.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); + TBlobId blob(kvRequest.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); auto it = ReadsInProgress.find(blob); if (it != ReadsInProgress.end()) { LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Read request is blocked. Partition " @@ -74,7 +74,7 @@ namespace NPQ { { TVector<TKvRequest> unblocked; for (const TRequestedBlob& reqBlob : blocker.Blobs) { - TBlobId blob(blocker.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); + TBlobId blob(blocker.Partition, reqBlob.Offset, reqBlob.PartNo, reqBlob.Count, reqBlob.InternalPartsCount); ReadsInProgress.erase(blob); auto it = BlockedReads.find(blob); @@ -90,19 +90,19 @@ namespace NPQ { } void Handle(TEvPQ::TEvChangeCacheConfig::TPtr& ev, const TActorContext& ctx) - { + { Y_UNUSED(ev); Cache.Touch(ctx); - } - + } + void Handle(TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx) - { - Y_VERIFY(ev->Sender == Tablet); - Die(ctx); - } - - void Handle(TEvPQ::TEvBlobRequest::TPtr& ev, const TActorContext& ctx) - { + { + Y_VERIFY(ev->Sender == Tablet); + Die(ctx); + } + + void Handle(TEvPQ::TEvBlobRequest::TPtr& ev, const TActorContext& ctx) + { ui32 partition = ev->Get()->Partition; Cache.SetUserOffset(ctx, ev->Get()->User, partition, ev->Get()->ReadOffset); @@ -119,13 +119,13 @@ namespace NPQ { if (fromCache == kvReq.Blobs.size()) { // all from cache LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Reading cookie " << kvReq.CookiePQ << ". All " << fromCache << " blobs are from cache."); - + THolder<TEvPQ::TEvBlobResponse> response = kvReq.MakePQResponse(ctx); - response->Check(); - + response->Check(); + ctx.Send(kvReq.Sender, response.Release()); // -> Partition - return; - } + return; + } if (CheckInProgress(ctx, kvReq)) { LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Reading cookie " << kvReq.CookiePQ @@ -139,14 +139,14 @@ namespace NPQ { SaveInProgress(kvReq); THolder<TEvKeyValue::TEvRequest> request = kvReq.MakeKvRequest(); // before save ui64 cookie = SaveKvRequest(std::move(kvReq)); - request->Record.SetCookie(cookie); + request->Record.SetCookie(cookie); ctx.Send(Tablet, request.Release()); // -> KV - } - - void Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx) - { - auto resp = ev->Get()->Record; - Y_VERIFY(resp.HasCookie()); + } + + void Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx) + { + auto resp = ev->Get()->Record; + Y_VERIFY(resp.HasCookie()); auto it = KvRequests.find(resp.GetCookie()); Y_VERIFY(it != KvRequests.end()); @@ -165,61 +165,61 @@ namespace NPQ { auto resp = ev->Get()->Record; TVector<TRequestedBlob>& outBlobs = kvReq.Blobs; - ui32 cachedCount = std::accumulate(outBlobs.begin(), outBlobs.end(), 0u, [](ui32 sum, const TRequestedBlob& blob) { - return sum + (blob.Value.empty() ? 0 : 1); - }); + ui32 cachedCount = std::accumulate(outBlobs.begin(), outBlobs.end(), 0u, [](ui32 sum, const TRequestedBlob& blob) { + return sum + (blob.Value.empty() ? 0 : 1); + }); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Got results. " << resp.ReadResultSize() << " of " << outBlobs.size() << " from KV. Status " << resp.GetStatus()); - + TErrorInfo error; - if (resp.GetStatus() != NMsgBusProxy::MSTATUS_OK) { + if (resp.GetStatus() != NMsgBusProxy::MSTATUS_OK) { LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Got Error response for whole request status " << resp.GetStatus() << " cookie " << kvReq.CookiePQ); - error = TErrorInfo(NPersQueue::NErrorCode::ERROR, Sprintf("Got bad response: %s", resp.DebugString().c_str())); - } else { + error = TErrorInfo(NPersQueue::NErrorCode::ERROR, Sprintf("Got bad response: %s", resp.DebugString().c_str())); + } else { Y_VERIFY(resp.ReadResultSize() && resp.ReadResultSize() + cachedCount == outBlobs.size(), "Unexpected KV read result size %" PRIu64 " for cached %" PRIu32 "/%" PRIu64 " blobs, proto %s", resp.ReadResultSize(), cachedCount, outBlobs.size(), ev->Get()->ToString().data()); TVector<bool> kvBlobs(outBlobs.size(), false); - ui32 pos = 0; - for (ui32 i = 0; i < resp.ReadResultSize(); ++i, ++pos) { - auto r = resp.MutableReadResult(i); - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Got results. result " << i << " from KV. Status " << r->GetStatus()); - if (r->GetStatus() == NKikimrProto::OVERRUN) { //this blob and next are not readed at all. Return as answer only previous blobs - Y_VERIFY(i > 0, "OVERRUN in first read request"); - break; - } else if (r->GetStatus() == NKikimrProto::OK) { - Y_VERIFY(r->HasValue() && r->GetValue().size()); - - // skip cached blobs, find position for the next value - while (pos < outBlobs.size() && outBlobs[pos].Value) { - ++pos; - } - - Y_VERIFY(pos < outBlobs.size(), "Got resulting blob with no place for it"); + ui32 pos = 0; + for (ui32 i = 0; i < resp.ReadResultSize(); ++i, ++pos) { + auto r = resp.MutableReadResult(i); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Got results. result " << i << " from KV. Status " << r->GetStatus()); + if (r->GetStatus() == NKikimrProto::OVERRUN) { //this blob and next are not readed at all. Return as answer only previous blobs + Y_VERIFY(i > 0, "OVERRUN in first read request"); + break; + } else if (r->GetStatus() == NKikimrProto::OK) { + Y_VERIFY(r->HasValue() && r->GetValue().size()); + + // skip cached blobs, find position for the next value + while (pos < outBlobs.size() && outBlobs[pos].Value) { + ++pos; + } + + Y_VERIFY(pos < outBlobs.size(), "Got resulting blob with no place for it"); kvBlobs[pos] = true; - - Y_VERIFY(outBlobs[pos].Value.empty()); - outBlobs[pos].Value = r->GetValue(); - } else { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Got Error response " << r->GetStatus() - << " for " << i << "'s blob from " << resp.ReadResultSize() << " blobs"); - error = TErrorInfo(r->GetStatus() == NKikimrProto::NODATA ? NPersQueue::NErrorCode::READ_ERROR_TOO_SMALL_OFFSET - : NPersQueue::NErrorCode::ERROR, - Sprintf("Got bad response: %s", r->DebugString().c_str())); - break; - } - } + + Y_VERIFY(outBlobs[pos].Value.empty()); + outBlobs[pos].Value = r->GetValue(); + } else { + LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, "Got Error response " << r->GetStatus() + << " for " << i << "'s blob from " << resp.ReadResultSize() << " blobs"); + error = TErrorInfo(r->GetStatus() == NKikimrProto::NODATA ? NPersQueue::NErrorCode::READ_ERROR_TOO_SMALL_OFFSET + : NPersQueue::NErrorCode::ERROR, + Sprintf("Got bad response: %s", r->DebugString().c_str())); + break; + } + } Cache.SavePrefetchBlobs(ctx, kvReq, kvBlobs); - } - + } + TVector<TKvRequest> unblockedReads = RemoveFromProgress(ctx, kvReq); // before kvReq.MakePQResponse() THolder<TEvPQ::TEvBlobResponse> response = kvReq.MakePQResponse(ctx, error); - response->Check(); + response->Check(); ctx.Send(kvReq.Sender, response.Release()); // -> Partition // Retry previously blocked reads. Should be called after saving blobs in cache @@ -227,8 +227,8 @@ namespace NPQ { ReadBlobs(ctx, std::move(ubr)); UpdateCounters(ctx); - } - + } + void OnKvWriteResult(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx, TKvRequest& kvReq) { auto resp = ev->Get()->Record; @@ -248,8 +248,8 @@ namespace NPQ { THolder<TEvKeyValue::TEvResponse> response = MakeHolder<TEvKeyValue::TEvResponse>(); response->Record = std::move(ev->Get()->Record); - response->Record.ClearCookie(); //cookie must not leak to Partition - it uses cookie for SetOffset requests - + response->Record.ClearCookie(); //cookie must not leak to Partition - it uses cookie for SetOffset requests + ctx.Send(kvReq.Sender, response.Release()); // -> Partition UpdateCounters(ctx); @@ -275,11 +275,11 @@ namespace NPQ { Y_VERIFY(strKey.size() == TKey::KeySize(), "Unexpected key size: %" PRIu64, strKey.size()); TString value = cmd.GetValue(); kvReq.Partition = key.GetPartition(); - TRequestedBlob blob(key.GetOffset(), key.GetPartNo(), key.GetCount(), key.GetInternalPartsCount(), value.size(), value); + TRequestedBlob blob(key.GetOffset(), key.GetPartNo(), key.GetCount(), key.GetInternalPartsCount(), value.size(), value); kvReq.Blobs.push_back(blob); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "CacheProxy. Passthrough blob. Partition " - << kvReq.Partition << " offset " << blob.Offset << " partNo " << blob.PartNo << " count " << blob.Count << " size " << value.size()); + << kvReq.Partition << " offset " << blob.Offset << " partNo " << blob.PartNo << " count " << blob.Count << " size " << value.size()); } else { kvReq.MetadataWritesCount++; } @@ -309,11 +309,11 @@ namespace NPQ { UpdateCounters(ctx); } - void HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorContext& ctx) - { - TStringStream out; + void HandleMonitoring(TEvPQ::TEvMonRequest::TPtr& ev, const TActorContext& ctx) + { + TStringStream out; HTML(out) - { + { DIV_CLASS_ID("tab-pane fade", "cache") { TABLE_SORTABLE_CLASS("table") { TABLEHEAD() { @@ -337,15 +337,15 @@ namespace NPQ { TABLED() {out << data->GetValue().size();} TABLED() {out << ToStringLocalTimeUpToSeconds(data->GetAccessTime());} } - } - + } + } } } } ctx.Send(ev->Sender, new TEvPQ::TEvMonResponse(Max<ui32>(), TVector<TString>(), out.Str())); - } - + } + void UpdateCounters(const TActorContext& ctx) { static const ui32 UPDATE_TIMEOUT_S = 5; @@ -364,30 +364,30 @@ namespace NPQ { CountersUpdateTime = now; } - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { HFunc(TEvPQ::TEvBlobRequest, Handle); // read requests HFunc(TEvents::TEvPoisonPill, Handle); - HFunc(TEvPQ::TEvMonRequest, HandleMonitoring); + HFunc(TEvPQ::TEvMonRequest, HandleMonitoring); HFunc(TEvKeyValue::TEvRequest, Handle); // write requests HFunc(TEvKeyValue::TEvResponse, Handle); // read & write responses - HFunc(TEvPQ::TEvChangeCacheConfig, Handle); + HFunc(TEvPQ::TEvChangeCacheConfig, Handle); HFunc(TEvPqCache::TEvCacheL2Response, Handle); - default: - break; - }; - } - + default: + break; + }; + } + TActorId Tablet; TString TopicName; - ui64 Cookie; + ui64 Cookie; // any TKvRequest would be placed into KvRequests or into BlockedReads depending on ReadsInProgress content THashMap<ui64, TKvRequest> KvRequests; THashMap<TBlobId, TVector<TKvRequest>> BlockedReads; THashSet<TBlobId> ReadsInProgress; TIntabletCache Cache; TInstant CountersUpdateTime; - }; - -} //NPQ -} //NKikimr + }; + +} //NPQ +} //NKikimr diff --git a/ydb/core/persqueue/read_balancer.cpp b/ydb/core/persqueue/read_balancer.cpp index ae7d5af1d9..4eb9c7206d 100644 --- a/ydb/core/persqueue/read_balancer.cpp +++ b/ydb/core/persqueue/read_balancer.cpp @@ -1,346 +1,346 @@ -#include "read_balancer.h" +#include "read_balancer.h" #include <ydb/core/tablet/tablet_exception.h> #include <library/cpp/monlib/service/pages/templates.h> -#include <library/cpp/string_utils/base64/base64.h> - -namespace NKikimr { -namespace NPQ { - - -using namespace NTabletFlatExecutor; - +#include <library/cpp/string_utils/base64/base64.h> + +namespace NKikimr { +namespace NPQ { + + +using namespace NTabletFlatExecutor; + static constexpr TDuration ACL_SUCCESS_RETRY_TIMEOUT = TDuration::Seconds(30); static constexpr TDuration ACL_ERROR_RETRY_TIMEOUT = TDuration::Seconds(5); static constexpr TDuration ACL_EXPIRATION_TIMEOUT = TDuration::Minutes(5); -bool TPersQueueReadBalancer::TTxPreInit::Execute(TTransactionContext& txc, const TActorContext& ctx) { - Y_UNUSED(ctx); - NIceDb::TNiceDb(txc.DB).Materialize<Schema>(); - return true; -} - -void TPersQueueReadBalancer::TTxPreInit::Complete(const TActorContext& ctx) { - Self->Execute(new TTxInit(Self), ctx); -} - - -bool TPersQueueReadBalancer::TTxInit::Execute(TTransactionContext& txc, const TActorContext& ctx) { - try { - Y_UNUSED(ctx); //read config - NIceDb::TNiceDb db(txc.DB); - - auto dataRowset = db.Table<Schema::Data>().Range().Select(); - auto partsRowset = db.Table<Schema::Partitions>().Range().Select(); - auto groupsRowset = db.Table<Schema::Groups>().Range().Select(); - auto tabletsRowset = db.Table<Schema::Tablets>().Range().Select(); - - if (!dataRowset.IsReady() || !partsRowset.IsReady() || !groupsRowset.IsReady() || !tabletsRowset.IsReady()) - return false; - - while (!dataRowset.EndOfSet()) { //found out topic info - Y_VERIFY(!Self->Inited); - Self->PathId = dataRowset.GetValue<Schema::Data::PathId>(); - Self->Topic = dataRowset.GetValue<Schema::Data::Topic>(); - Self->Path = dataRowset.GetValue<Schema::Data::Path>(); - Self->Version = dataRowset.GetValue<Schema::Data::Version>(); - Self->MaxPartsPerTablet = dataRowset.GetValueOrDefault<Schema::Data::MaxPartsPerTablet>(0); +bool TPersQueueReadBalancer::TTxPreInit::Execute(TTransactionContext& txc, const TActorContext& ctx) { + Y_UNUSED(ctx); + NIceDb::TNiceDb(txc.DB).Materialize<Schema>(); + return true; +} + +void TPersQueueReadBalancer::TTxPreInit::Complete(const TActorContext& ctx) { + Self->Execute(new TTxInit(Self), ctx); +} + + +bool TPersQueueReadBalancer::TTxInit::Execute(TTransactionContext& txc, const TActorContext& ctx) { + try { + Y_UNUSED(ctx); //read config + NIceDb::TNiceDb db(txc.DB); + + auto dataRowset = db.Table<Schema::Data>().Range().Select(); + auto partsRowset = db.Table<Schema::Partitions>().Range().Select(); + auto groupsRowset = db.Table<Schema::Groups>().Range().Select(); + auto tabletsRowset = db.Table<Schema::Tablets>().Range().Select(); + + if (!dataRowset.IsReady() || !partsRowset.IsReady() || !groupsRowset.IsReady() || !tabletsRowset.IsReady()) + return false; + + while (!dataRowset.EndOfSet()) { //found out topic info + Y_VERIFY(!Self->Inited); + Self->PathId = dataRowset.GetValue<Schema::Data::PathId>(); + Self->Topic = dataRowset.GetValue<Schema::Data::Topic>(); + Self->Path = dataRowset.GetValue<Schema::Data::Path>(); + Self->Version = dataRowset.GetValue<Schema::Data::Version>(); + Self->MaxPartsPerTablet = dataRowset.GetValueOrDefault<Schema::Data::MaxPartsPerTablet>(0); Self->SchemeShardId = dataRowset.GetValueOrDefault<Schema::Data::SchemeShardId>(0); - Self->NextPartitionId = dataRowset.GetValueOrDefault<Schema::Data::NextPartitionId>(0); - - TString config = dataRowset.GetValueOrDefault<Schema::Data::Config>(""); - if (!config.empty()) { - bool res = Self->TabletConfig.ParseFromString(config); - Y_VERIFY(res); - Self->Consumers.clear(); - for (const auto& rr : Self->TabletConfig.GetReadRules()) { - Self->Consumers.insert(rr); - } - } - Self->Inited = true; - if (!dataRowset.Next()) - return false; - } - - while (!partsRowset.EndOfSet()) { //found out tablets for partitions - ++Self->NumActiveParts; - ui32 part = partsRowset.GetValue<Schema::Partitions::Partition>(); - ui64 tabletId = partsRowset.GetValue<Schema::Partitions::TabletId>(); + Self->NextPartitionId = dataRowset.GetValueOrDefault<Schema::Data::NextPartitionId>(0); + + TString config = dataRowset.GetValueOrDefault<Schema::Data::Config>(""); + if (!config.empty()) { + bool res = Self->TabletConfig.ParseFromString(config); + Y_VERIFY(res); + Self->Consumers.clear(); + for (const auto& rr : Self->TabletConfig.GetReadRules()) { + Self->Consumers.insert(rr); + } + } + Self->Inited = true; + if (!dataRowset.Next()) + return false; + } + + while (!partsRowset.EndOfSet()) { //found out tablets for partitions + ++Self->NumActiveParts; + ui32 part = partsRowset.GetValue<Schema::Partitions::Partition>(); + ui64 tabletId = partsRowset.GetValue<Schema::Partitions::TabletId>(); Self->PartitionsInfo[part] = {tabletId, EPartitionState::EPS_FREE, TActorId(), part + 1}; - if (!partsRowset.Next()) - return false; - } - - while (!groupsRowset.EndOfSet()) { //found out tablets for partitions - ui32 groupId = groupsRowset.GetValue<Schema::Groups::GroupId>(); - ui32 partition = groupsRowset.GetValue<Schema::Groups::Partition>(); - Y_VERIFY(groupId > 0); - auto jt = Self->PartitionsInfo.find(partition); - Y_VERIFY(jt != Self->PartitionsInfo.end()); - jt->second.GroupId = groupId; - - Self->NoGroupsInBase = false; - - if (!groupsRowset.Next()) - return false; - } - - Y_VERIFY(Self->ClientsInfo.empty()); - - for (auto& p : Self->PartitionsInfo) { - ui32 groupId = p.second.GroupId; - Self->GroupsInfo[groupId].push_back(p.first); - - } - Self->TotalGroups = Self->GroupsInfo.size(); - - - while (!tabletsRowset.EndOfSet()) { //found out tablets for partitions - ui64 tabletId = tabletsRowset.GetValue<Schema::Tablets::TabletId>(); - TTabletInfo info; - info.Owner = tabletsRowset.GetValue<Schema::Tablets::Owner>(); - info.Idx = tabletsRowset.GetValue<Schema::Tablets::Idx>(); - Self->MaxIdx = Max(Self->MaxIdx, info.Idx); - - Self->TabletsInfo[tabletId] = info; - if (!tabletsRowset.Next()) - return false; - } - - Self->Generation = txc.Generation; - } catch (const TNotReadyTabletException&) { - return false; - } catch (...) { - Y_FAIL("there must be no leaked exceptions"); - } - return true; -} - -void TPersQueueReadBalancer::TTxInit::Complete(const TActorContext& ctx) { - Self->SignalTabletActive(ctx); - if (Self->Inited) - Self->InitDone(ctx); -} - -bool TPersQueueReadBalancer::TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { - NIceDb::TNiceDb db(txc.DB); - TString config; - bool res = Self->TabletConfig.SerializeToString(&config); - Y_VERIFY(res); - db.Table<Schema::Data>().Key(1).Update( - NIceDb::TUpdate<Schema::Data::PathId>(Self->PathId), - NIceDb::TUpdate<Schema::Data::Topic>(Self->Topic), - NIceDb::TUpdate<Schema::Data::Path>(Self->Path), - NIceDb::TUpdate<Schema::Data::Version>(Self->Version), - NIceDb::TUpdate<Schema::Data::MaxPartsPerTablet>(Self->MaxPartsPerTablet), + if (!partsRowset.Next()) + return false; + } + + while (!groupsRowset.EndOfSet()) { //found out tablets for partitions + ui32 groupId = groupsRowset.GetValue<Schema::Groups::GroupId>(); + ui32 partition = groupsRowset.GetValue<Schema::Groups::Partition>(); + Y_VERIFY(groupId > 0); + auto jt = Self->PartitionsInfo.find(partition); + Y_VERIFY(jt != Self->PartitionsInfo.end()); + jt->second.GroupId = groupId; + + Self->NoGroupsInBase = false; + + if (!groupsRowset.Next()) + return false; + } + + Y_VERIFY(Self->ClientsInfo.empty()); + + for (auto& p : Self->PartitionsInfo) { + ui32 groupId = p.second.GroupId; + Self->GroupsInfo[groupId].push_back(p.first); + + } + Self->TotalGroups = Self->GroupsInfo.size(); + + + while (!tabletsRowset.EndOfSet()) { //found out tablets for partitions + ui64 tabletId = tabletsRowset.GetValue<Schema::Tablets::TabletId>(); + TTabletInfo info; + info.Owner = tabletsRowset.GetValue<Schema::Tablets::Owner>(); + info.Idx = tabletsRowset.GetValue<Schema::Tablets::Idx>(); + Self->MaxIdx = Max(Self->MaxIdx, info.Idx); + + Self->TabletsInfo[tabletId] = info; + if (!tabletsRowset.Next()) + return false; + } + + Self->Generation = txc.Generation; + } catch (const TNotReadyTabletException&) { + return false; + } catch (...) { + Y_FAIL("there must be no leaked exceptions"); + } + return true; +} + +void TPersQueueReadBalancer::TTxInit::Complete(const TActorContext& ctx) { + Self->SignalTabletActive(ctx); + if (Self->Inited) + Self->InitDone(ctx); +} + +bool TPersQueueReadBalancer::TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { + NIceDb::TNiceDb db(txc.DB); + TString config; + bool res = Self->TabletConfig.SerializeToString(&config); + Y_VERIFY(res); + db.Table<Schema::Data>().Key(1).Update( + NIceDb::TUpdate<Schema::Data::PathId>(Self->PathId), + NIceDb::TUpdate<Schema::Data::Topic>(Self->Topic), + NIceDb::TUpdate<Schema::Data::Path>(Self->Path), + NIceDb::TUpdate<Schema::Data::Version>(Self->Version), + NIceDb::TUpdate<Schema::Data::MaxPartsPerTablet>(Self->MaxPartsPerTablet), NIceDb::TUpdate<Schema::Data::SchemeShardId>(Self->SchemeShardId), - NIceDb::TUpdate<Schema::Data::NextPartitionId>(Self->NextPartitionId), - NIceDb::TUpdate<Schema::Data::Config>(config)); - for (auto& p : DeletedPartitions) { - db.Table<Schema::Partitions>().Key(p).Delete(); - } - for (auto& p : NewPartitions) { - db.Table<Schema::Partitions>().Key(p.first).Update( - NIceDb::TUpdate<Schema::Partitions::TabletId>(p.second.TabletId)); - } - for (auto & p : NewGroups) { - db.Table<Schema::Groups>().Key(p.first, p.second).Update(); - } - for (auto& p : NewTablets) { - db.Table<Schema::Tablets>().Key(p.first).Update( - NIceDb::TUpdate<Schema::Tablets::Owner>(p.second.Owner), - NIceDb::TUpdate<Schema::Tablets::Idx>(p.second.Idx)); - } - return true; -} - -void TPersQueueReadBalancer::TTxWrite::Complete(const TActorContext &ctx) { - for (auto& actor : Self->WaitingResponse) { - THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; - res->Record.SetStatus(NKikimrPQ::OK); - res->Record.SetTxId(Self->TxId); - res->Record.SetOrigin(Self->TabletID()); - ctx.Send(actor, res.Release()); - } - Self->WaitingResponse.clear(); - - Self->NoGroupsInBase = false; - - Self->Inited = true; - Self->InitDone(ctx); -} - - -bool TPersQueueReadBalancer::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) { - if (!ev) { - return true; - } - - TString str = GenerateStat(); - ctx.Send(ev->Sender, new NMon::TEvRemoteHttpInfoRes(str)); - return true; -} - -TString TPersQueueReadBalancer::GenerateStat() { - TStringStream str; - HTML(str) { - H2() {str << "PersQueueReadBalancer Tablet";} - H3() {str << "Topic: " << Topic;} - H3() {str << "Generation: " << Generation;} - H3() {str << "Inited: " << Inited;} - H3() {str << "ActivePipes: " << PipesInfo.size();} - if (Inited) { - H3() {str << "Active partitions: " << NumActiveParts;} - H3() {str << "[Total/Max/Avg]WriteSpeedSec: " << TotalAvgSpeedSec << "/" << MaxAvgSpeedSec << "/" << TotalAvgSpeedSec / NumActiveParts;} - H3() {str << "[Total/Max/Avg]WriteSpeedMin: " << TotalAvgSpeedMin << "/" << MaxAvgSpeedMin << "/" << TotalAvgSpeedMin / NumActiveParts;} - H3() {str << "[Total/Max/Avg]WriteSpeedHour: " << TotalAvgSpeedHour << "/" << MaxAvgSpeedHour << "/" << TotalAvgSpeedHour / NumActiveParts;} - H3() {str << "[Total/Max/Avg]WriteSpeedDay: " << TotalAvgSpeedDay << "/" << MaxAvgSpeedDay << "/" << TotalAvgSpeedDay / NumActiveParts;} - } - - UL_CLASS("nav nav-tabs") { - LI_CLASS("active") { - str << "<a href=\"#main\" data-toggle=\"tab\">partitions</a>"; - } - for (auto& pp : ClientsInfo) { - LI() { - str << "<a href=\"#client_" << Base64Encode(pp.first) << "\" data-toggle=\"tab\">" << NPersQueue::ConvertOldConsumerName(pp.first) << "</a>"; - } - } - } - DIV_CLASS("tab-content") { - DIV_CLASS_ID("tab-pane fade in active", "main") { - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "partition";} - TABLEH() {str << "group";} - TABLEH() {str << "tabletId";} - } - } - TABLEBODY() { - for (auto& p : PartitionsInfo) { - TABLER() { - TABLED() { str << p.first;} - TABLED() { str << p.second.GroupId;} - TABLED() { str << p.second.TabletId;} - } - } - } - } - } - for (auto& p : ClientsInfo) { - DIV_CLASS_ID("tab-pane fade", "client_" + Base64Encode(p.first)) { - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "partition";} - TABLEH() {str << "group";} - TABLEH() {str << "tabletId";} - TABLEH() {str << "state";} - TABLEH() {str << "session";} - } - } - TABLEBODY() { - for (auto& ci : p.second.ClientGroupsInfo) { - for (auto& pp : ci.second.PartitionsInfo) { - TABLER() { - TABLED() { str << pp.first;} - TABLED() { str << ci.second.Group;} - TABLED() { str << pp.second.TabletId;} - TABLED() { str << (ui32)pp.second.State;} - auto it = ci.second.SessionsInfo.find(std::make_pair(pp.second.Session, ci.second.RandomNumber)); - Y_VERIFY((it == ci.second.SessionsInfo.end()) == (pp.second.State == EPS_FREE)); - TABLED() { str << (pp.second.State != EPS_FREE ? it->second.Session : "");} - } - } - } - } - } - - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "session";} - TABLEH() {str << "group";} - TABLEH() {str << "suspended partitions";} - TABLEH() {str << "active partitions";} - TABLEH() {str << "total partitions";} - } - } - TABLEBODY() { - - for (auto& ci : p.second.ClientGroupsInfo) { - for (auto& pp : ci.second.SessionsInfo) { - TABLER() { - TABLED() { str << pp.second.Session;} - TABLED() { str << ci.second.Group;} - TABLED() { str << pp.second.NumSuspended;} - TABLED() { str << pp.second.NumActive - pp.second.NumSuspended;} - TABLED() { str << (pp.second.NumActive);} - } - } - TABLER() { - TABLED() { str << "FREE";} - TABLED() { str << ci.second.Group;} - TABLED() { str << 0;} - TABLED() { str << ci.second.FreePartitions.size();} - TABLED() { str << ci.second.FreePartitions.size();} - } - } - } - } - } - } - } - } - return str.Str(); -} - - -void TPersQueueReadBalancer::HandleOnInit(TEvPersQueue::TEvUpdateBalancerConfig::TPtr &ev, const TActorContext&) { - - UpdateEvents.push_back(ev->Release().Release()); -} - -void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvGetPartitionIdForWrite::TPtr &ev, const TActorContext &ctx) { - NextPartitionIdForWrite = (NextPartitionIdForWrite + 1) % TotalGroups; //TODO: change here when there will be more than 1 partition in partition_group. + NIceDb::TUpdate<Schema::Data::NextPartitionId>(Self->NextPartitionId), + NIceDb::TUpdate<Schema::Data::Config>(config)); + for (auto& p : DeletedPartitions) { + db.Table<Schema::Partitions>().Key(p).Delete(); + } + for (auto& p : NewPartitions) { + db.Table<Schema::Partitions>().Key(p.first).Update( + NIceDb::TUpdate<Schema::Partitions::TabletId>(p.second.TabletId)); + } + for (auto & p : NewGroups) { + db.Table<Schema::Groups>().Key(p.first, p.second).Update(); + } + for (auto& p : NewTablets) { + db.Table<Schema::Tablets>().Key(p.first).Update( + NIceDb::TUpdate<Schema::Tablets::Owner>(p.second.Owner), + NIceDb::TUpdate<Schema::Tablets::Idx>(p.second.Idx)); + } + return true; +} + +void TPersQueueReadBalancer::TTxWrite::Complete(const TActorContext &ctx) { + for (auto& actor : Self->WaitingResponse) { + THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; + res->Record.SetStatus(NKikimrPQ::OK); + res->Record.SetTxId(Self->TxId); + res->Record.SetOrigin(Self->TabletID()); + ctx.Send(actor, res.Release()); + } + Self->WaitingResponse.clear(); + + Self->NoGroupsInBase = false; + + Self->Inited = true; + Self->InitDone(ctx); +} + + +bool TPersQueueReadBalancer::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) { + if (!ev) { + return true; + } + + TString str = GenerateStat(); + ctx.Send(ev->Sender, new NMon::TEvRemoteHttpInfoRes(str)); + return true; +} + +TString TPersQueueReadBalancer::GenerateStat() { + TStringStream str; + HTML(str) { + H2() {str << "PersQueueReadBalancer Tablet";} + H3() {str << "Topic: " << Topic;} + H3() {str << "Generation: " << Generation;} + H3() {str << "Inited: " << Inited;} + H3() {str << "ActivePipes: " << PipesInfo.size();} + if (Inited) { + H3() {str << "Active partitions: " << NumActiveParts;} + H3() {str << "[Total/Max/Avg]WriteSpeedSec: " << TotalAvgSpeedSec << "/" << MaxAvgSpeedSec << "/" << TotalAvgSpeedSec / NumActiveParts;} + H3() {str << "[Total/Max/Avg]WriteSpeedMin: " << TotalAvgSpeedMin << "/" << MaxAvgSpeedMin << "/" << TotalAvgSpeedMin / NumActiveParts;} + H3() {str << "[Total/Max/Avg]WriteSpeedHour: " << TotalAvgSpeedHour << "/" << MaxAvgSpeedHour << "/" << TotalAvgSpeedHour / NumActiveParts;} + H3() {str << "[Total/Max/Avg]WriteSpeedDay: " << TotalAvgSpeedDay << "/" << MaxAvgSpeedDay << "/" << TotalAvgSpeedDay / NumActiveParts;} + } + + UL_CLASS("nav nav-tabs") { + LI_CLASS("active") { + str << "<a href=\"#main\" data-toggle=\"tab\">partitions</a>"; + } + for (auto& pp : ClientsInfo) { + LI() { + str << "<a href=\"#client_" << Base64Encode(pp.first) << "\" data-toggle=\"tab\">" << NPersQueue::ConvertOldConsumerName(pp.first) << "</a>"; + } + } + } + DIV_CLASS("tab-content") { + DIV_CLASS_ID("tab-pane fade in active", "main") { + TABLE_SORTABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() {str << "partition";} + TABLEH() {str << "group";} + TABLEH() {str << "tabletId";} + } + } + TABLEBODY() { + for (auto& p : PartitionsInfo) { + TABLER() { + TABLED() { str << p.first;} + TABLED() { str << p.second.GroupId;} + TABLED() { str << p.second.TabletId;} + } + } + } + } + } + for (auto& p : ClientsInfo) { + DIV_CLASS_ID("tab-pane fade", "client_" + Base64Encode(p.first)) { + TABLE_SORTABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() {str << "partition";} + TABLEH() {str << "group";} + TABLEH() {str << "tabletId";} + TABLEH() {str << "state";} + TABLEH() {str << "session";} + } + } + TABLEBODY() { + for (auto& ci : p.second.ClientGroupsInfo) { + for (auto& pp : ci.second.PartitionsInfo) { + TABLER() { + TABLED() { str << pp.first;} + TABLED() { str << ci.second.Group;} + TABLED() { str << pp.second.TabletId;} + TABLED() { str << (ui32)pp.second.State;} + auto it = ci.second.SessionsInfo.find(std::make_pair(pp.second.Session, ci.second.RandomNumber)); + Y_VERIFY((it == ci.second.SessionsInfo.end()) == (pp.second.State == EPS_FREE)); + TABLED() { str << (pp.second.State != EPS_FREE ? it->second.Session : "");} + } + } + } + } + } + + TABLE_SORTABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() {str << "session";} + TABLEH() {str << "group";} + TABLEH() {str << "suspended partitions";} + TABLEH() {str << "active partitions";} + TABLEH() {str << "total partitions";} + } + } + TABLEBODY() { + + for (auto& ci : p.second.ClientGroupsInfo) { + for (auto& pp : ci.second.SessionsInfo) { + TABLER() { + TABLED() { str << pp.second.Session;} + TABLED() { str << ci.second.Group;} + TABLED() { str << pp.second.NumSuspended;} + TABLED() { str << pp.second.NumActive - pp.second.NumSuspended;} + TABLED() { str << (pp.second.NumActive);} + } + } + TABLER() { + TABLED() { str << "FREE";} + TABLED() { str << ci.second.Group;} + TABLED() { str << 0;} + TABLED() { str << ci.second.FreePartitions.size();} + TABLED() { str << ci.second.FreePartitions.size();} + } + } + } + } + } + } + } + } + return str.Str(); +} + + +void TPersQueueReadBalancer::HandleOnInit(TEvPersQueue::TEvUpdateBalancerConfig::TPtr &ev, const TActorContext&) { + + UpdateEvents.push_back(ev->Release().Release()); +} + +void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvGetPartitionIdForWrite::TPtr &ev, const TActorContext &ctx) { + NextPartitionIdForWrite = (NextPartitionIdForWrite + 1) % TotalGroups; //TODO: change here when there will be more than 1 partition in partition_group. THolder<TEvPersQueue::TEvGetPartitionIdForWriteResponse> response = MakeHolder<TEvPersQueue::TEvGetPartitionIdForWriteResponse>(); - response->Record.SetPartitionId(NextPartitionIdForWrite); - ctx.Send(ev->Sender, response.Release()); - if (NextPartitionIdForWrite == StartPartitionIdForWrite) { // randomize next cycle - StartPartitionIdForWrite = NextPartitionIdForWrite = rand() % TotalGroups; - } -} - - + response->Record.SetPartitionId(NextPartitionIdForWrite); + ctx.Send(ev->Sender, response.Release()); + if (NextPartitionIdForWrite == StartPartitionIdForWrite) { // randomize next cycle + StartPartitionIdForWrite = NextPartitionIdForWrite = rand() % TotalGroups; + } +} + + void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvCheckACL::TPtr &ev, const TActorContext &ctx) { - - if (!AppData(ctx)->PQConfig.GetCheckACL()) { - RespondWithACL(ev, NKikimrPQ::EAccess::ALLOWED, "", ctx); - return; - } - + + if (!AppData(ctx)->PQConfig.GetCheckACL()) { + RespondWithACL(ev, NKikimrPQ::EAccess::ALLOWED, "", ctx); + return; + } + if (ctx.Now() > LastACLUpdate + ACL_EXPIRATION_TIMEOUT || Topic.empty()) { //Topic.empty is only for tests - WaitingACLRequests.push_back(ev); - return; - } - + WaitingACLRequests.push_back(ev); + return; + } + auto& record = ev->Get()->Record; - - if (record.GetToken().empty()) { - if (record.GetOperation() == NKikimrPQ::EOperation::WRITE_OP && TabletConfig.GetRequireAuthWrite() || - record.GetOperation() == NKikimrPQ::EOperation::READ_OP && TabletConfig.GetRequireAuthRead()) { - RespondWithACL(ev, NKikimrPQ::EAccess::DENIED, TStringBuilder() << "topic " << Topic << " requires authentication", ctx); - } else { - RespondWithACL(ev, NKikimrPQ::EAccess::ALLOWED, "", ctx); + + if (record.GetToken().empty()) { + if (record.GetOperation() == NKikimrPQ::EOperation::WRITE_OP && TabletConfig.GetRequireAuthWrite() || + record.GetOperation() == NKikimrPQ::EOperation::READ_OP && TabletConfig.GetRequireAuthRead()) { + RespondWithACL(ev, NKikimrPQ::EAccess::DENIED, TStringBuilder() << "topic " << Topic << " requires authentication", ctx); + } else { + RespondWithACL(ev, NKikimrPQ::EAccess::ALLOWED, "", ctx); } - return; + return; } - - NACLib::TUserToken token(record.GetToken()); - CheckACL(ev, token, ctx); + + NACLib::TUserToken token(record.GetToken()); + CheckACL(ev, token, ctx); } - + void TPersQueueReadBalancer::RespondWithACL( const TEvPersQueue::TEvCheckACL::TPtr &request, const NKikimrPQ::EAccess &access, @@ -356,8 +356,8 @@ void TPersQueueReadBalancer::RespondWithACL( void TPersQueueReadBalancer::CheckACL(const TEvPersQueue::TEvCheckACL::TPtr &request, const NACLib::TUserToken& token, const TActorContext &ctx) { NACLib::EAccessRights rights = NACLib::EAccessRights::UpdateRow; - const auto& record = request->Get()->Record; - switch(record.GetOperation()) { + const auto& record = request->Get()->Record; + switch(record.GetOperation()) { case NKikimrPQ::EOperation::READ_OP: rights = NACLib::EAccessRights::SelectRow; break; @@ -366,56 +366,56 @@ void TPersQueueReadBalancer::CheckACL(const TEvPersQueue::TEvCheckACL::TPtr &req break; }; - TString user = record.HasUser() ? record.GetUser() : ""; - - if (record.GetOperation() == NKikimrPQ::EOperation::READ_OP) { + TString user = record.HasUser() ? record.GetUser() : ""; + + if (record.GetOperation() == NKikimrPQ::EOperation::READ_OP) { if (!Consumers.contains(user)) { - RespondWithACL(request, NKikimrPQ::EAccess::DENIED, TStringBuilder() << "no read rule provided for consumer '" << NPersQueue::ConvertOldConsumerName(user) << "' that allows to read topic from cluster '" - << NPersQueue::GetDC(Topic) <<"'; may be there is read rule with mode all-original only and you are reading with mirrored topics. Change read-rule to mirror-to-<cluster> or options of reading process.", ctx); - return; - } - } - if (ACL.CheckAccess(rights, token)) { - RespondWithACL(request, NKikimrPQ::EAccess::ALLOWED, "", ctx); + RespondWithACL(request, NKikimrPQ::EAccess::DENIED, TStringBuilder() << "no read rule provided for consumer '" << NPersQueue::ConvertOldConsumerName(user) << "' that allows to read topic from cluster '" + << NPersQueue::GetDC(Topic) <<"'; may be there is read rule with mode all-original only and you are reading with mirrored topics. Change read-rule to mirror-to-<cluster> or options of reading process.", ctx); + return; + } + } + if (ACL.CheckAccess(rights, token)) { + RespondWithACL(request, NKikimrPQ::EAccess::ALLOWED, "", ctx); } else { - RespondWithACL(request, NKikimrPQ::EAccess::DENIED, TStringBuilder() << "access denied for consumer '" << NPersQueue::ConvertOldConsumerName(user) << "' : no " << (rights == NACLib::EAccessRights::SelectRow ? "ReadTopic" : "WriteTopic") << " permission" , ctx); + RespondWithACL(request, NKikimrPQ::EAccess::DENIED, TStringBuilder() << "access denied for consumer '" << NPersQueue::ConvertOldConsumerName(user) << "' : no " << (rights == NACLib::EAccessRights::SelectRow ? "ReadTopic" : "WriteTopic") << " permission" , ctx); + } +} + +void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvWakeupClient::TPtr &ev, const TActorContext& ctx) { + auto jt = ClientsInfo.find(ev->Get()->Client); + if (jt == ClientsInfo.end()) + return; + auto it = jt->second.ClientGroupsInfo.find(ev->Get()->Group); + if (it != jt->second.ClientGroupsInfo.end()) { + it->second.WakeupScheduled = false; + it->second.Balance(ctx); } } -void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvWakeupClient::TPtr &ev, const TActorContext& ctx) { - auto jt = ClientsInfo.find(ev->Get()->Client); - if (jt == ClientsInfo.end()) - return; - auto it = jt->second.ClientGroupsInfo.find(ev->Get()->Group); - if (it != jt->second.ClientGroupsInfo.end()) { - it->second.WakeupScheduled = false; - it->second.Balance(ctx); - } -} - -void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvDescribe::TPtr &ev, const TActorContext& ctx) { +void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvDescribe::TPtr &ev, const TActorContext& ctx) { if (ctx.Now() > LastACLUpdate + ACL_EXPIRATION_TIMEOUT || Topic.empty()) { //Topic.empty is only for tests - WaitingDescribeRequests.push_back(ev); - return; - } else { - THolder<TEvPersQueue::TEvDescribeResponse> res{new TEvPersQueue::TEvDescribeResponse}; - res->Record.MutableConfig()->CopyFrom(TabletConfig); - res->Record.SetVersion(Version); - res->Record.SetTopicName(Topic); - res->Record.SetPartitionPerTablet(MaxPartsPerTablet); - res->Record.SetSchemeShardId(SchemeShardId); - res->Record.SetBalancerTabletId(TabletID()); - res->Record.SetSecurityObject(ACL.SerializeAsString()); - for (auto& parts : PartitionsInfo) { - auto p = res->Record.AddPartitions(); - p->SetPartition(parts.first); - p->SetTabletId(parts.second.TabletId); - } - ctx.Send(ev->Sender, res.Release()); - } -} - - + WaitingDescribeRequests.push_back(ev); + return; + } else { + THolder<TEvPersQueue::TEvDescribeResponse> res{new TEvPersQueue::TEvDescribeResponse}; + res->Record.MutableConfig()->CopyFrom(TabletConfig); + res->Record.SetVersion(Version); + res->Record.SetTopicName(Topic); + res->Record.SetPartitionPerTablet(MaxPartsPerTablet); + res->Record.SetSchemeShardId(SchemeShardId); + res->Record.SetBalancerTabletId(TabletID()); + res->Record.SetSecurityObject(ACL.SerializeAsString()); + for (auto& parts : PartitionsInfo) { + auto p = res->Record.AddPartitions(); + p->SetPartition(parts.first); + p->SetTabletId(parts.second.TabletId); + } + ctx.Send(ev->Sender, res.Release()); + } +} + + void TPersQueueReadBalancer::Handle(TEvents::TEvPoisonPill &ev, const TActorContext& ctx) { Y_UNUSED(ev); Y_UNUSED(ctx); @@ -423,253 +423,253 @@ void TPersQueueReadBalancer::Handle(TEvents::TEvPoisonPill &ev, const TActorCont ctx.Send(Tablet(), new TEvents::TEvPoisonPill); } - -void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvUpdateBalancerConfig::TPtr &ev, const TActorContext& ctx) { - auto& record = ev->Get()->Record; - if ((int)record.GetVersion() < Version && Inited) { - THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; - res->Record.SetStatus(NKikimrPQ::ERROR_BAD_VERSION); - res->Record.SetTxId(record.GetTxId()); - res->Record.SetOrigin(TabletID()); - ctx.Send(ev->Sender, res.Release()); + +void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvUpdateBalancerConfig::TPtr &ev, const TActorContext& ctx) { + auto& record = ev->Get()->Record; + if ((int)record.GetVersion() < Version && Inited) { + THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; + res->Record.SetStatus(NKikimrPQ::ERROR_BAD_VERSION); + res->Record.SetTxId(record.GetTxId()); + res->Record.SetOrigin(TabletID()); + ctx.Send(ev->Sender, res.Release()); + return; + } + + if ((int)record.GetVersion() == Version) { + if (!WaitingResponse.empty()) { //got transaction infly + WaitingResponse.push_back(ev->Sender); + } else { //version already applied + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "BALANCER Topic " << Topic << "Tablet " << TabletID() + << " Config already applied version " << record.GetVersion() << " actor " << ev->Sender + << " txId " << record.GetTxId()); + THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; + res->Record.SetStatus(NKikimrPQ::OK); + res->Record.SetTxId(record.GetTxId()); + res->Record.SetOrigin(TabletID()); + ctx.Send(ev->Sender, res.Release()); + } return; - } - - if ((int)record.GetVersion() == Version) { - if (!WaitingResponse.empty()) { //got transaction infly - WaitingResponse.push_back(ev->Sender); - } else { //version already applied - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "BALANCER Topic " << Topic << "Tablet " << TabletID() - << " Config already applied version " << record.GetVersion() << " actor " << ev->Sender - << " txId " << record.GetTxId()); - THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; - res->Record.SetStatus(NKikimrPQ::OK); - res->Record.SetTxId(record.GetTxId()); - res->Record.SetOrigin(TabletID()); - ctx.Send(ev->Sender, res.Release()); - } - return; - } - - if ((int)record.GetVersion() > Version && !WaitingResponse.empty()) { //old transaction is not done yet - THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; - res->Record.SetStatus(NKikimrPQ::ERROR_UPDATE_IN_PROGRESS); - res->Record.SetTxId(ev->Get()->Record.GetTxId()); - res->Record.SetOrigin(TabletID()); - ctx.Send(ev->Sender, res.Release()); - } - WaitingResponse.push_back(ev->Sender); - - Version = record.GetVersion(); - MaxPartsPerTablet = record.GetPartitionPerTablet(); - PathId = record.GetPathId(); - Topic = record.GetTopicName(); - Path = record.GetPath(); - TxId = record.GetTxId(); - TabletConfig = record.GetTabletConfig(); + } + + if ((int)record.GetVersion() > Version && !WaitingResponse.empty()) { //old transaction is not done yet + THolder<TEvPersQueue::TEvUpdateConfigResponse> res{new TEvPersQueue::TEvUpdateConfigResponse}; + res->Record.SetStatus(NKikimrPQ::ERROR_UPDATE_IN_PROGRESS); + res->Record.SetTxId(ev->Get()->Record.GetTxId()); + res->Record.SetOrigin(TabletID()); + ctx.Send(ev->Sender, res.Release()); + } + WaitingResponse.push_back(ev->Sender); + + Version = record.GetVersion(); + MaxPartsPerTablet = record.GetPartitionPerTablet(); + PathId = record.GetPathId(); + Topic = record.GetTopicName(); + Path = record.GetPath(); + TxId = record.GetTxId(); + TabletConfig = record.GetTabletConfig(); SchemeShardId = record.GetSchemeShardId(); - TotalGroups = record.HasTotalGroupCount() ? record.GetTotalGroupCount() : 0; - ui32 prevNextPartitionId = NextPartitionId; - NextPartitionId = record.HasNextPartitionId() ? record.GetNextPartitionId() : 0; + TotalGroups = record.HasTotalGroupCount() ? record.GetTotalGroupCount() : 0; + ui32 prevNextPartitionId = NextPartitionId; + NextPartitionId = record.HasNextPartitionId() ? record.GetNextPartitionId() : 0; THashMap<ui32, TPartitionInfo> partitionsInfo; - - Consumers.clear(); - for (const auto& rr : TabletConfig.GetReadRules()) { - Consumers.insert(rr); - } - - TVector<std::pair<ui32, TPartInfo>> newPartitions; + + Consumers.clear(); + for (const auto& rr : TabletConfig.GetReadRules()) { + Consumers.insert(rr); + } + + TVector<std::pair<ui32, TPartInfo>> newPartitions; TVector<ui32> deletedPartitions; - TVector<std::pair<ui64, TTabletInfo>> newTablets; - TVector<std::pair<ui32, ui32>> newGroups; - - for (auto& p : record.GetTablets()) { - auto it = TabletsInfo.find(p.GetTabletId()); - if (it == TabletsInfo.end()) { - TTabletInfo info{p.GetOwner(), p.GetIdx()}; - TabletsInfo[p.GetTabletId()] = info; - newTablets.push_back(std::make_pair(p.GetTabletId(), info)); - } - } - - ui32 prevGroups = GroupsInfo.size(); - - for (auto& p : record.GetPartitions()) { - auto it = PartitionsInfo.find(p.GetPartition()); - ui32 group = p.HasGroup() ? p.GetGroup() : p.GetPartition() + 1; - Y_VERIFY(group > 0); - - if (NoGroupsInBase) { - Y_VERIFY(group <= TotalGroups || TotalGroups == 0); - newGroups.push_back(std::make_pair(group, p.GetPartition())); - } - if (it == PartitionsInfo.end()) { - Y_VERIFY(group <= TotalGroups && group > prevGroups || TotalGroups == 0); - Y_VERIFY(p.GetPartition() >= prevNextPartitionId && p.GetPartition() < NextPartitionId || NextPartitionId == 0); + TVector<std::pair<ui64, TTabletInfo>> newTablets; + TVector<std::pair<ui32, ui32>> newGroups; + + for (auto& p : record.GetTablets()) { + auto it = TabletsInfo.find(p.GetTabletId()); + if (it == TabletsInfo.end()) { + TTabletInfo info{p.GetOwner(), p.GetIdx()}; + TabletsInfo[p.GetTabletId()] = info; + newTablets.push_back(std::make_pair(p.GetTabletId(), info)); + } + } + + ui32 prevGroups = GroupsInfo.size(); + + for (auto& p : record.GetPartitions()) { + auto it = PartitionsInfo.find(p.GetPartition()); + ui32 group = p.HasGroup() ? p.GetGroup() : p.GetPartition() + 1; + Y_VERIFY(group > 0); + + if (NoGroupsInBase) { + Y_VERIFY(group <= TotalGroups || TotalGroups == 0); + newGroups.push_back(std::make_pair(group, p.GetPartition())); + } + if (it == PartitionsInfo.end()) { + Y_VERIFY(group <= TotalGroups && group > prevGroups || TotalGroups == 0); + Y_VERIFY(p.GetPartition() >= prevNextPartitionId && p.GetPartition() < NextPartitionId || NextPartitionId == 0); partitionsInfo[p.GetPartition()] = {p.GetTabletId(), EPS_FREE, TActorId(), group}; - newPartitions.push_back(std::make_pair(p.GetPartition(), TPartInfo{p.GetTabletId(), group})); - if (!NoGroupsInBase) - newGroups.push_back(std::make_pair(group, p.GetPartition())); - GroupsInfo[group].push_back(p.GetPartition()); - ++NumActiveParts; - } else { //group is already defined - Y_VERIFY(it->second.GroupId == group); - partitionsInfo[p.GetPartition()] = it->second; - } - } - - if (TotalGroups == 0) { - NextPartitionId = TotalGroups = GroupsInfo.size(); - } - - Y_VERIFY(GroupsInfo.size() == TotalGroups); - - for (auto& p : PartitionsInfo) { - if (partitionsInfo.find(p.first) == partitionsInfo.end()) { - Y_FAIL("deleting of partitions is not fully supported yet"); - deletedPartitions.push_back(p.first); - } - } - PartitionsInfo = partitionsInfo; - - for (auto& p : ClientsInfo) { - auto mainGroup = p.second.ClientGroupsInfo.find(0); - for (auto& part : newPartitions) { - ui32 group = part.second.Group; - auto it = p.second.SessionsWithGroup ? p.second.ClientGroupsInfo.find(group) : mainGroup; - if (it == p.second.ClientGroupsInfo.end()) { - Y_VERIFY(p.second.SessionsWithGroup); - p.second.AddGroup(group); - it = p.second.ClientGroupsInfo.find(group); - } - it->second.FreePartitions.push_back(part.first); + newPartitions.push_back(std::make_pair(p.GetPartition(), TPartInfo{p.GetTabletId(), group})); + if (!NoGroupsInBase) + newGroups.push_back(std::make_pair(group, p.GetPartition())); + GroupsInfo[group].push_back(p.GetPartition()); + ++NumActiveParts; + } else { //group is already defined + Y_VERIFY(it->second.GroupId == group); + partitionsInfo[p.GetPartition()] = it->second; + } + } + + if (TotalGroups == 0) { + NextPartitionId = TotalGroups = GroupsInfo.size(); + } + + Y_VERIFY(GroupsInfo.size() == TotalGroups); + + for (auto& p : PartitionsInfo) { + if (partitionsInfo.find(p.first) == partitionsInfo.end()) { + Y_FAIL("deleting of partitions is not fully supported yet"); + deletedPartitions.push_back(p.first); + } + } + PartitionsInfo = partitionsInfo; + + for (auto& p : ClientsInfo) { + auto mainGroup = p.second.ClientGroupsInfo.find(0); + for (auto& part : newPartitions) { + ui32 group = part.second.Group; + auto it = p.second.SessionsWithGroup ? p.second.ClientGroupsInfo.find(group) : mainGroup; + if (it == p.second.ClientGroupsInfo.end()) { + Y_VERIFY(p.second.SessionsWithGroup); + p.second.AddGroup(group); + it = p.second.ClientGroupsInfo.find(group); + } + it->second.FreePartitions.push_back(part.first); it->second.PartitionsInfo[part.first] = {part.second.TabletId, EPS_FREE, TActorId(), group}; - it->second.ScheduleBalance(ctx); - } - } - RebuildStructs(); - - Execute(new TTxWrite(this, std::move(deletedPartitions), std::move(newPartitions), std::move(newTablets), std::move(newGroups)), ctx); -} - - -TStringBuilder TPersQueueReadBalancer::GetPrefix() const { - return TStringBuilder() << "tablet " << TabletID() << " topic " << Topic << " "; -} - -TStringBuilder TPersQueueReadBalancer::TClientGroupInfo::GetPrefix() const { - return TStringBuilder() << "tablet " << TabletId << " topic " << Topic << " "; -} - -TStringBuilder TPersQueueReadBalancer::TClientInfo::GetPrefix() const { - return TStringBuilder() << "tablet " << TabletId << " topic " << Topic << " "; -} - -void TPersQueueReadBalancer::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx) -{ - auto it = PipesInfo.find(ev->Get()->ClientId); - - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "pipe " << ev->Get()->ClientId << " disconnected; active server actors: " - << (it != PipesInfo.end() ? it->second.ServerActors : -1)); - - if (it != PipesInfo.end()) { - if (--(it->second.ServerActors) > 0) - return; - if (!it->second.Session.empty()) { - LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "pipe " << ev->Get()->ClientId << " client " << it->second.ClientId << " disconnected session " << it->second.Session); - UnregisterSession(it->first, ctx); - } else { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "pipe " << ev->Get()->ClientId << " disconnected no session"); - PipesInfo.erase(it); - } - } -} - -void TPersQueueReadBalancer::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) -{ - RestartPipe(ev->Get()->TabletId, ctx); + it->second.ScheduleBalance(ctx); + } + } + RebuildStructs(); + + Execute(new TTxWrite(this, std::move(deletedPartitions), std::move(newPartitions), std::move(newTablets), std::move(newGroups)), ctx); +} + + +TStringBuilder TPersQueueReadBalancer::GetPrefix() const { + return TStringBuilder() << "tablet " << TabletID() << " topic " << Topic << " "; +} + +TStringBuilder TPersQueueReadBalancer::TClientGroupInfo::GetPrefix() const { + return TStringBuilder() << "tablet " << TabletId << " topic " << Topic << " "; +} + +TStringBuilder TPersQueueReadBalancer::TClientInfo::GetPrefix() const { + return TStringBuilder() << "tablet " << TabletId << " topic " << Topic << " "; +} + +void TPersQueueReadBalancer::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx) +{ + auto it = PipesInfo.find(ev->Get()->ClientId); + + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "pipe " << ev->Get()->ClientId << " disconnected; active server actors: " + << (it != PipesInfo.end() ? it->second.ServerActors : -1)); + + if (it != PipesInfo.end()) { + if (--(it->second.ServerActors) > 0) + return; + if (!it->second.Session.empty()) { + LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "pipe " << ev->Get()->ClientId << " client " << it->second.ClientId << " disconnected session " << it->second.Session); + UnregisterSession(it->first, ctx); + } else { + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "pipe " << ev->Get()->ClientId << " disconnected no session"); + PipesInfo.erase(it); + } + } +} + +void TPersQueueReadBalancer::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) +{ + RestartPipe(ev->Get()->TabletId, ctx); RequestTabletIfNeeded(ev->Get()->TabletId, ctx); -} - - -void TPersQueueReadBalancer::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) -{ - if (ev->Get()->Status != NKikimrProto::OK) { - RestartPipe(ev->Get()->TabletId, ctx); +} + + +void TPersQueueReadBalancer::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) +{ + if (ev->Get()->Status != NKikimrProto::OK) { + RestartPipe(ev->Get()->TabletId, ctx); RequestTabletIfNeeded(ev->Get()->TabletId, ctx); - } -} - -void TPersQueueReadBalancer::RestartPipe(const ui64 tabletId, const TActorContext& ctx) -{ - auto it = TabletPipes.find(tabletId); - if (it != TabletPipes.end()) { - NTabletPipe::CloseClient(ctx, it->second); - TabletPipes.erase(it); - } -} - - + } +} + +void TPersQueueReadBalancer::RestartPipe(const ui64 tabletId, const TActorContext& ctx) +{ + auto it = TabletPipes.find(tabletId); + if (it != TabletPipes.end()) { + NTabletPipe::CloseClient(ctx, it->second); + TabletPipes.erase(it); + } +} + + void TPersQueueReadBalancer::RequestTabletIfNeeded(const ui64 tabletId, const TActorContext& ctx) -{ +{ if ((tabletId == SchemeShardId && !WaitingForACL) || (tabletId != SchemeShardId && !WaitingForStat.contains(tabletId))) - return; - - auto it = TabletPipes.find(tabletId); + return; + + auto it = TabletPipes.find(tabletId); TActorId pipeClient; - if (it == TabletPipes.end()) { - NTabletPipe::TClientConfig clientConfig; + if (it == TabletPipes.end()) { + NTabletPipe::TClientConfig clientConfig; pipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, tabletId, clientConfig)); - TabletPipes[tabletId] = pipeClient; - } else { - pipeClient = it->second; - } + TabletPipes[tabletId] = pipeClient; + } else { + pipeClient = it->second; + } if (tabletId == SchemeShardId) { NTabletPipe::SendData(ctx, pipeClient, new NSchemeShard::TEvSchemeShard::TEvDescribeScheme(tabletId, PathId)); } else { NTabletPipe::SendData(ctx, pipeClient, new TEvPersQueue::TEvStatus()); } -} - - -void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) -{ - const auto& record = ev->Get()->Record; - ui64 tabletId = record.GetTabletId(); - bool res = WaitingForStat.erase(tabletId); - if (!res) //ignore if already processed - return; - for (const auto& partRes : record.GetPartResult()) { - TotalAvgSpeedSec += partRes.GetAvgWriteSpeedPerSec(); - MaxAvgSpeedSec = Max<ui64>(MaxAvgSpeedSec, partRes.GetAvgWriteSpeedPerSec()); - TotalAvgSpeedMin += partRes.GetAvgWriteSpeedPerMin(); - MaxAvgSpeedMin = Max<ui64>(MaxAvgSpeedMin, partRes.GetAvgWriteSpeedPerMin()); - TotalAvgSpeedHour += partRes.GetAvgWriteSpeedPerHour(); - MaxAvgSpeedHour = Max<ui64>(MaxAvgSpeedHour, partRes.GetAvgWriteSpeedPerHour()); - TotalAvgSpeedDay += partRes.GetAvgWriteSpeedPerDay(); - MaxAvgSpeedDay = Max<ui64>(MaxAvgSpeedDay, partRes.GetAvgWriteSpeedPerDay()); - } - if (WaitingForStat.empty()) { - CheckStat(ctx); - } -} - -void TPersQueueReadBalancer::AnswerWaitingRequests(const TActorContext& ctx) { - TVector<TEvPersQueue::TEvCheckACL::TPtr> ww; - ww.swap(WaitingACLRequests); - for (auto& r : ww) { - Handle(r, ctx); - } - - TVector<TEvPersQueue::TEvDescribe::TPtr> dr; - dr.swap(WaitingDescribeRequests); - for (auto& r : dr) { - Handle(r, ctx); - } - -} - +} + + +void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) +{ + const auto& record = ev->Get()->Record; + ui64 tabletId = record.GetTabletId(); + bool res = WaitingForStat.erase(tabletId); + if (!res) //ignore if already processed + return; + for (const auto& partRes : record.GetPartResult()) { + TotalAvgSpeedSec += partRes.GetAvgWriteSpeedPerSec(); + MaxAvgSpeedSec = Max<ui64>(MaxAvgSpeedSec, partRes.GetAvgWriteSpeedPerSec()); + TotalAvgSpeedMin += partRes.GetAvgWriteSpeedPerMin(); + MaxAvgSpeedMin = Max<ui64>(MaxAvgSpeedMin, partRes.GetAvgWriteSpeedPerMin()); + TotalAvgSpeedHour += partRes.GetAvgWriteSpeedPerHour(); + MaxAvgSpeedHour = Max<ui64>(MaxAvgSpeedHour, partRes.GetAvgWriteSpeedPerHour()); + TotalAvgSpeedDay += partRes.GetAvgWriteSpeedPerDay(); + MaxAvgSpeedDay = Max<ui64>(MaxAvgSpeedDay, partRes.GetAvgWriteSpeedPerDay()); + } + if (WaitingForStat.empty()) { + CheckStat(ctx); + } +} + +void TPersQueueReadBalancer::AnswerWaitingRequests(const TActorContext& ctx) { + TVector<TEvPersQueue::TEvCheckACL::TPtr> ww; + ww.swap(WaitingACLRequests); + for (auto& r : ww) { + Handle(r, ctx); + } + + TVector<TEvPersQueue::TEvDescribe::TPtr> dr; + dr.swap(WaitingDescribeRequests); + for (auto& r : dr) { + Handle(r, ctx); + } + +} + void TPersQueueReadBalancer::Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext& ctx) { Y_UNUSED(ctx); if (!WaitingForACL) //ignore if already processed @@ -679,38 +679,38 @@ void TPersQueueReadBalancer::Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSch if (record.GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { ACL.Clear(); Y_PROTOBUF_SUPPRESS_NODISCARD ACL.MutableACL()->ParseFromString(record.GetPathDescription().GetSelf().GetEffectiveACL()); - LastACLUpdate = ctx.Now(); + LastACLUpdate = ctx.Now(); ctx.Schedule(TDuration::Seconds(AppData(ctx)->PQConfig.GetBalancerMetadataRetryTimeoutSec()), new TEvPersQueue::TEvUpdateACL()); - - AnswerWaitingRequests(ctx); + + AnswerWaitingRequests(ctx); } else { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "couldn't receive ACL due to " << record.GetStatus()); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "couldn't receive ACL due to " << record.GetStatus()); ctx.Schedule(ACL_ERROR_RETRY_TIMEOUT, new TEvPersQueue::TEvUpdateACL()); } } -void TPersQueueReadBalancer::CheckStat(const TActorContext& ctx) { - Y_UNUSED(ctx); - //TODO: Deside about changing number of partitions and send request to SchemeShard - //TODO: make AlterTopic request via TX_PROXY -} - -void TPersQueueReadBalancer::GetStat(const TActorContext& ctx) { - if (!WaitingForStat.empty()) //if there is request infly - return; - TotalAvgSpeedSec = MaxAvgSpeedSec = 0; - TotalAvgSpeedMin = MaxAvgSpeedMin = 0; - TotalAvgSpeedHour = MaxAvgSpeedHour = 0; - TotalAvgSpeedDay = MaxAvgSpeedDay = 0; - for (auto& p : PartitionsInfo) { - const ui64& tabletId = p.second.TabletId; - bool res = WaitingForStat.insert(tabletId).second; - if (!res) //already asked stat - continue; +void TPersQueueReadBalancer::CheckStat(const TActorContext& ctx) { + Y_UNUSED(ctx); + //TODO: Deside about changing number of partitions and send request to SchemeShard + //TODO: make AlterTopic request via TX_PROXY +} + +void TPersQueueReadBalancer::GetStat(const TActorContext& ctx) { + if (!WaitingForStat.empty()) //if there is request infly + return; + TotalAvgSpeedSec = MaxAvgSpeedSec = 0; + TotalAvgSpeedMin = MaxAvgSpeedMin = 0; + TotalAvgSpeedHour = MaxAvgSpeedHour = 0; + TotalAvgSpeedDay = MaxAvgSpeedDay = 0; + for (auto& p : PartitionsInfo) { + const ui64& tabletId = p.second.TabletId; + bool res = WaitingForStat.insert(tabletId).second; + if (!res) //already asked stat + continue; RequestTabletIfNeeded(tabletId, ctx); - } -} - + } +} + void TPersQueueReadBalancer::GetACL(const TActorContext& ctx) { if (WaitingForACL) // if there is request infly return; @@ -722,55 +722,55 @@ void TPersQueueReadBalancer::GetACL(const TActorContext& ctx) { } } -void TPersQueueReadBalancer::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext& ctx) -{ +void TPersQueueReadBalancer::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext& ctx) +{ const TActorId& sender = ev->Get()->ClientId; - auto it = PipesInfo.find(sender); - if (it == PipesInfo.end()) { + auto it = PipesInfo.find(sender); + if (it == PipesInfo.end()) { PipesInfo.insert({sender, {"", "", TActorId(), false, 1}}); - } else { - it->second.ServerActors++; - } - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "pipe " << sender << " connected; active server actors: " << PipesInfo[sender].ServerActors); -} - -TPersQueueReadBalancer::TClientGroupInfo& TPersQueueReadBalancer::TClientInfo::AddGroup(const ui32 group) { - TClientGroupInfo& clientInfo = ClientGroupsInfo[group]; - clientInfo.Group = group; - clientInfo.ClientId = ClientId; - clientInfo.Topic = Topic; - clientInfo.TabletId = TabletId; + } else { + it->second.ServerActors++; + } + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "pipe " << sender << " connected; active server actors: " << PipesInfo[sender].ServerActors); +} + +TPersQueueReadBalancer::TClientGroupInfo& TPersQueueReadBalancer::TClientInfo::AddGroup(const ui32 group) { + TClientGroupInfo& clientInfo = ClientGroupsInfo[group]; + clientInfo.Group = group; + clientInfo.ClientId = ClientId; + clientInfo.Topic = Topic; + clientInfo.TabletId = TabletId; clientInfo.Path = Path; - clientInfo.Generation = Generation; - clientInfo.Step = &Step; - - clientInfo.RandomNumber = TAppData::RandomProvider->GenRand64(); - return clientInfo; -} - -void TPersQueueReadBalancer::TClientInfo::FillEmptyGroup(const ui32 group, const THashMap<ui32, TPartitionInfo>& partitionsInfo) { - auto& clientInfo = AddGroup(group); - - for (auto& p : partitionsInfo) { - if (p.second.GroupId == group || group == 0) { //check group - clientInfo.PartitionsInfo.insert(p); - clientInfo.FreePartitions.push_back(p.first); - } - } -} - -void TPersQueueReadBalancer::TClientInfo::AddSession(const ui32 group, const THashMap<ui32, TPartitionInfo>& partitionsInfo, + clientInfo.Generation = Generation; + clientInfo.Step = &Step; + + clientInfo.RandomNumber = TAppData::RandomProvider->GenRand64(); + return clientInfo; +} + +void TPersQueueReadBalancer::TClientInfo::FillEmptyGroup(const ui32 group, const THashMap<ui32, TPartitionInfo>& partitionsInfo) { + auto& clientInfo = AddGroup(group); + + for (auto& p : partitionsInfo) { + if (p.second.GroupId == group || group == 0) { //check group + clientInfo.PartitionsInfo.insert(p); + clientInfo.FreePartitions.push_back(p.first); + } + } +} + +void TPersQueueReadBalancer::TClientInfo::AddSession(const ui32 group, const THashMap<ui32, TPartitionInfo>& partitionsInfo, const TActorId& sender, const NKikimrPQ::TRegisterReadSession& record) { - + TActorId pipe = ActorIdFromProto(record.GetPipeClient()); - - Y_VERIFY(pipe); - - if (ClientGroupsInfo.find(group) == ClientGroupsInfo.end()) { - FillEmptyGroup(group, partitionsInfo); - } - - auto it = ClientGroupsInfo.find(group); + + Y_VERIFY(pipe); + + if (ClientGroupsInfo.find(group) == ClientGroupsInfo.end()) { + FillEmptyGroup(group, partitionsInfo); + } + + auto it = ClientGroupsInfo.find(group); it->second.SessionsInfo.insert({ std::make_pair(pipe, it->second.RandomNumber), TClientGroupInfo::TSessionInfo( @@ -779,406 +779,406 @@ void TPersQueueReadBalancer::TClientInfo::AddSession(const ui32 group, const THa sender.NodeId(), TAppData::TimeProvider->Now() ) }); -} - - -void TPersQueueReadBalancer::HandleOnInit(TEvPersQueue::TEvRegisterReadSession::TPtr& ev, const TActorContext&) -{ - Y_FAIL(""); - RegisterEvents.push_back(ev->Release().Release()); -} - - -void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvRegisterReadSession::TPtr& ev, const TActorContext& ctx) -{ - const auto& record = ev->Get()->Record; - +} + + +void TPersQueueReadBalancer::HandleOnInit(TEvPersQueue::TEvRegisterReadSession::TPtr& ev, const TActorContext&) +{ + Y_FAIL(""); + RegisterEvents.push_back(ev->Release().Release()); +} + + +void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvRegisterReadSession::TPtr& ev, const TActorContext& ctx) +{ + const auto& record = ev->Get()->Record; + TActorId pipe = ActorIdFromProto(record.GetPipeClient()); - LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "client " << record.GetClientId() << " register session for pipe " << pipe << " session " << record.GetSession()); - - Y_VERIFY(!record.GetSession().empty()); - Y_VERIFY(!record.GetClientId().empty()); - - Y_VERIFY(pipe); - - //TODO: check here that pipe with clientPipe=sender is still connected - - auto jt = PipesInfo.find(pipe); - if (jt == PipesInfo.end()) { - LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " pipe " << pipe - << " is not connected and got register session request for session " << record.GetSession()); - return; - } - - TVector<ui32> groups; + LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "client " << record.GetClientId() << " register session for pipe " << pipe << " session " << record.GetSession()); + + Y_VERIFY(!record.GetSession().empty()); + Y_VERIFY(!record.GetClientId().empty()); + + Y_VERIFY(pipe); + + //TODO: check here that pipe with clientPipe=sender is still connected + + auto jt = PipesInfo.find(pipe); + if (jt == PipesInfo.end()) { + LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " pipe " << pipe + << " is not connected and got register session request for session " << record.GetSession()); + return; + } + + TVector<ui32> groups; groups.reserve(record.GroupsSize()); - for (auto& group : record.GetGroups()) { - groups.push_back(group); - if (groups.back() == 0 || groups.back() > TotalGroups) { - THolder<TEvPersQueue::TEvError> response(new TEvPersQueue::TEvError); - response->Record.SetCode(NPersQueue::NErrorCode::BAD_REQUEST); - response->Record.SetDescription(TStringBuilder() << "no group " << groups.back() << " in topic " << Topic); - ctx.Send(ev->Sender, response.Release()); - return; - } - } - - jt->second = {record.GetClientId(), record.GetSession(), ev->Sender, !groups.empty(), jt->second.ServerActors}; - - auto it = ClientsInfo.find(record.GetClientId()); - if (it == ClientsInfo.end()) { - auto p = ClientsInfo.insert({record.GetClientId(), TClientInfo{}}); - Y_VERIFY(p.second); - it = p.first; - it->second.ClientId = record.GetClientId(); - it->second.Topic = Topic; - it->second.TabletId = TabletID(); + for (auto& group : record.GetGroups()) { + groups.push_back(group); + if (groups.back() == 0 || groups.back() > TotalGroups) { + THolder<TEvPersQueue::TEvError> response(new TEvPersQueue::TEvError); + response->Record.SetCode(NPersQueue::NErrorCode::BAD_REQUEST); + response->Record.SetDescription(TStringBuilder() << "no group " << groups.back() << " in topic " << Topic); + ctx.Send(ev->Sender, response.Release()); + return; + } + } + + jt->second = {record.GetClientId(), record.GetSession(), ev->Sender, !groups.empty(), jt->second.ServerActors}; + + auto it = ClientsInfo.find(record.GetClientId()); + if (it == ClientsInfo.end()) { + auto p = ClientsInfo.insert({record.GetClientId(), TClientInfo{}}); + Y_VERIFY(p.second); + it = p.first; + it->second.ClientId = record.GetClientId(); + it->second.Topic = Topic; + it->second.TabletId = TabletID(); it->second.Path = Path; - it->second.Generation = Generation; - it->second.Step = 0; - } - if (!groups.empty()) { - ++it->second.SessionsWithGroup; - } - - if (it->second.SessionsWithGroup > 0 && groups.empty()) { + it->second.Generation = Generation; + it->second.Step = 0; + } + if (!groups.empty()) { + ++it->second.SessionsWithGroup; + } + + if (it->second.SessionsWithGroup > 0 && groups.empty()) { groups.reserve(TotalGroups); - for (ui32 i = 1; i <= TotalGroups; ++i) { - groups.push_back(i); - } - } - - if (!groups.empty()) { - auto jt = it->second.ClientGroupsInfo.find(0); - if (jt != it->second.ClientGroupsInfo.end()) { - it->second.KillGroup(0, ctx); - } - for (auto g : groups) { - it->second.AddSession(g, PartitionsInfo, ev->Sender, record); - } - for (ui32 group = 1; group <= TotalGroups; ++group) { - if (it->second.ClientGroupsInfo.find(group) == it->second.ClientGroupsInfo.end()) { - it->second.FillEmptyGroup(group, PartitionsInfo); - } - } - } else { - it->second.AddSession(0, PartitionsInfo, ev->Sender, record); - Y_VERIFY(it->second.ClientGroupsInfo.size() == 1); - } - RegisterSession(pipe, ctx); -} - - -void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr& ev, const TActorContext& ctx) -{ - const auto& record = ev->Get()->Record; - auto it = ClientsInfo.find(record.GetClientId()); - THolder<TEvPersQueue::TEvReadSessionsInfoResponse> response(new TEvPersQueue::TEvReadSessionsInfoResponse()); - - response->Record.SetTabletId(TabletID()); - - if (it != ClientsInfo.end()) { - for (auto& c : it->second.ClientGroupsInfo) { - for (auto& p : c.second.PartitionsInfo) { - auto pi = response->Record.AddPartitionInfo(); - pi->SetPartition(p.first); - if (p.second.State == EPS_ACTIVE) { - auto jt = c.second.SessionsInfo.find(std::make_pair(p.second.Session, c.second.RandomNumber)); - Y_VERIFY(jt != c.second.SessionsInfo.end()); - pi->SetClientNode(jt->second.ClientNode); - pi->SetProxyNodeId(jt->second.ProxyNodeId); - pi->SetSession(jt->second.Session); + for (ui32 i = 1; i <= TotalGroups; ++i) { + groups.push_back(i); + } + } + + if (!groups.empty()) { + auto jt = it->second.ClientGroupsInfo.find(0); + if (jt != it->second.ClientGroupsInfo.end()) { + it->second.KillGroup(0, ctx); + } + for (auto g : groups) { + it->second.AddSession(g, PartitionsInfo, ev->Sender, record); + } + for (ui32 group = 1; group <= TotalGroups; ++group) { + if (it->second.ClientGroupsInfo.find(group) == it->second.ClientGroupsInfo.end()) { + it->second.FillEmptyGroup(group, PartitionsInfo); + } + } + } else { + it->second.AddSession(0, PartitionsInfo, ev->Sender, record); + Y_VERIFY(it->second.ClientGroupsInfo.size() == 1); + } + RegisterSession(pipe, ctx); +} + + +void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr& ev, const TActorContext& ctx) +{ + const auto& record = ev->Get()->Record; + auto it = ClientsInfo.find(record.GetClientId()); + THolder<TEvPersQueue::TEvReadSessionsInfoResponse> response(new TEvPersQueue::TEvReadSessionsInfoResponse()); + + response->Record.SetTabletId(TabletID()); + + if (it != ClientsInfo.end()) { + for (auto& c : it->second.ClientGroupsInfo) { + for (auto& p : c.second.PartitionsInfo) { + auto pi = response->Record.AddPartitionInfo(); + pi->SetPartition(p.first); + if (p.second.State == EPS_ACTIVE) { + auto jt = c.second.SessionsInfo.find(std::make_pair(p.second.Session, c.second.RandomNumber)); + Y_VERIFY(jt != c.second.SessionsInfo.end()); + pi->SetClientNode(jt->second.ClientNode); + pi->SetProxyNodeId(jt->second.ProxyNodeId); + pi->SetSession(jt->second.Session); pi->SetTimestamp(jt->second.Timestamp.Seconds()); - } else { - pi->SetClientNode(""); - pi->SetProxyNodeId(0); - pi->SetSession(""); - pi->SetTimestamp(0); - } - } - for (auto& s : c.second.SessionsInfo) { - auto si = response->Record.AddReadSessions(); - si->SetSession(s.second.Session); - + } else { + pi->SetClientNode(""); + pi->SetProxyNodeId(0); + pi->SetSession(""); + pi->SetTimestamp(0); + } + } + for (auto& s : c.second.SessionsInfo) { + auto si = response->Record.AddReadSessions(); + si->SetSession(s.second.Session); + ActorIdToProto(s.second.Sender, si->MutableSessionActor()); - } - } - } - ctx.Send(ev->Sender, response.Release()); -} - - -void TPersQueueReadBalancer::TClientInfo::KillGroup(const ui32 group, const TActorContext& ctx) { - Y_VERIFY(group == 0); - auto it = ClientGroupsInfo.find(group); - Y_VERIFY(it != ClientGroupsInfo.end()); - for (auto& s : it->second.SessionsInfo) { - THolder<TEvPersQueue::TEvError> response(new TEvPersQueue::TEvError); - response->Record.SetCode(NPersQueue::NErrorCode::ERROR); - response->Record.SetDescription(TStringBuilder() << "there are new sessions with group, old session without group will be killed - recreate it, please"); - ctx.Send(s.second.Sender, response.Release()); - LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() <<"client " << ClientId << " kill session pipe " << s.first.first << " session " << s.second.Session); - } - ClientGroupsInfo.erase(it); -} - -void TPersQueueReadBalancer::TClientInfo::MergeGroups(const TActorContext& ctx) { - Y_VERIFY(ClientGroupsInfo.find(0) == ClientGroupsInfo.end()); - - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << ClientId << " merge groups"); - - auto& clientInfo = AddGroup(0); - - ui32 numSessions = 0; - ui32 numGroups = 0; - - for (auto it = ClientGroupsInfo.begin(); it != ClientGroupsInfo.end();) { - auto jt = it++; - if (jt->first == 0) { - continue; - } - ++numGroups; - for (auto& pi : jt->second.PartitionsInfo) { - bool res = clientInfo.PartitionsInfo.insert(pi).second; - Y_VERIFY(res); - } - for (auto& si : jt->second.SessionsInfo) { - auto key = si.first; - key.second = clientInfo.RandomNumber; - auto it = clientInfo.SessionsInfo.find(key); - if (it == clientInfo.SessionsInfo.end()) { - clientInfo.SessionsInfo.insert(std::make_pair(key, si.second)); //there must be all sessions in all groups - } else { - it->second.NumActive += si.second.NumActive; - it->second.NumSuspended += si.second.NumSuspended; - } - ++numSessions; - } - for (auto& fp : jt->second.FreePartitions) { - clientInfo.FreePartitions.push_back(fp); - } - ClientGroupsInfo.erase(jt); - } - Y_VERIFY(clientInfo.SessionsInfo.size() * numGroups == numSessions); - Y_VERIFY(ClientGroupsInfo.size() == 1); - clientInfo.ScheduleBalance(ctx); - -} - -void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx) -{ - const auto& record = ev->Get()->Record; + } + } + } + ctx.Send(ev->Sender, response.Release()); +} + + +void TPersQueueReadBalancer::TClientInfo::KillGroup(const ui32 group, const TActorContext& ctx) { + Y_VERIFY(group == 0); + auto it = ClientGroupsInfo.find(group); + Y_VERIFY(it != ClientGroupsInfo.end()); + for (auto& s : it->second.SessionsInfo) { + THolder<TEvPersQueue::TEvError> response(new TEvPersQueue::TEvError); + response->Record.SetCode(NPersQueue::NErrorCode::ERROR); + response->Record.SetDescription(TStringBuilder() << "there are new sessions with group, old session without group will be killed - recreate it, please"); + ctx.Send(s.second.Sender, response.Release()); + LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() <<"client " << ClientId << " kill session pipe " << s.first.first << " session " << s.second.Session); + } + ClientGroupsInfo.erase(it); +} + +void TPersQueueReadBalancer::TClientInfo::MergeGroups(const TActorContext& ctx) { + Y_VERIFY(ClientGroupsInfo.find(0) == ClientGroupsInfo.end()); + + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << ClientId << " merge groups"); + + auto& clientInfo = AddGroup(0); + + ui32 numSessions = 0; + ui32 numGroups = 0; + + for (auto it = ClientGroupsInfo.begin(); it != ClientGroupsInfo.end();) { + auto jt = it++; + if (jt->first == 0) { + continue; + } + ++numGroups; + for (auto& pi : jt->second.PartitionsInfo) { + bool res = clientInfo.PartitionsInfo.insert(pi).second; + Y_VERIFY(res); + } + for (auto& si : jt->second.SessionsInfo) { + auto key = si.first; + key.second = clientInfo.RandomNumber; + auto it = clientInfo.SessionsInfo.find(key); + if (it == clientInfo.SessionsInfo.end()) { + clientInfo.SessionsInfo.insert(std::make_pair(key, si.second)); //there must be all sessions in all groups + } else { + it->second.NumActive += si.second.NumActive; + it->second.NumSuspended += si.second.NumSuspended; + } + ++numSessions; + } + for (auto& fp : jt->second.FreePartitions) { + clientInfo.FreePartitions.push_back(fp); + } + ClientGroupsInfo.erase(jt); + } + Y_VERIFY(clientInfo.SessionsInfo.size() * numGroups == numSessions); + Y_VERIFY(ClientGroupsInfo.size() == 1); + clientInfo.ScheduleBalance(ctx); + +} + +void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx) +{ + const auto& record = ev->Get()->Record; TActorId sender = ActorIdFromProto(record.GetPipeClient()); - const TString& clientId = record.GetClientId(); - - auto pit = PartitionsInfo.find(record.GetPartition()); - if (pit == PartitionsInfo.end()) { - LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " pipe " << sender << " got deleted partition " << record); - return; - } - - ui32 group = pit->second.GroupId; - Y_VERIFY(group > 0); - - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " released partition from pipe " << sender - << " session " << record.GetSession() << " partition " << record.GetPartition() << " group " << group); - - auto it = ClientsInfo.find(clientId); - if (it == ClientsInfo.end()) { - LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " pipe " << sender - << " is not connected adn got release partitions request for session " << record.GetSession()); - return; - } - if (!it->second.SessionsWithGroup) { - group = 0; - } - auto cit = it->second.ClientGroupsInfo.find(group); - if (cit == it->second.ClientGroupsInfo.end()) { - LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " pipe " << sender - << " is not connected and got release partitions request for session " << record.GetSession()); - return; - } - - auto jt = cit->second.PartitionsInfo.find(record.GetPartition()); - - auto kt = cit->second.SessionsInfo.find(std::make_pair(sender, cit->second.RandomNumber)); - if (kt == cit->second.SessionsInfo.end()) { //already dead session - return; - } - Y_VERIFY(kt != cit->second.SessionsInfo.end()); - Y_VERIFY(jt != cit->second.PartitionsInfo.end()); + const TString& clientId = record.GetClientId(); + + auto pit = PartitionsInfo.find(record.GetPartition()); + if (pit == PartitionsInfo.end()) { + LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " pipe " << sender << " got deleted partition " << record); + return; + } + + ui32 group = pit->second.GroupId; + Y_VERIFY(group > 0); + + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " released partition from pipe " << sender + << " session " << record.GetSession() << " partition " << record.GetPartition() << " group " << group); + + auto it = ClientsInfo.find(clientId); + if (it == ClientsInfo.end()) { + LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " pipe " << sender + << " is not connected adn got release partitions request for session " << record.GetSession()); + return; + } + if (!it->second.SessionsWithGroup) { + group = 0; + } + auto cit = it->second.ClientGroupsInfo.find(group); + if (cit == it->second.ClientGroupsInfo.end()) { + LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << record.GetClientId() << " pipe " << sender + << " is not connected and got release partitions request for session " << record.GetSession()); + return; + } + + auto jt = cit->second.PartitionsInfo.find(record.GetPartition()); + + auto kt = cit->second.SessionsInfo.find(std::make_pair(sender, cit->second.RandomNumber)); + if (kt == cit->second.SessionsInfo.end()) { //already dead session + return; + } + Y_VERIFY(kt != cit->second.SessionsInfo.end()); + Y_VERIFY(jt != cit->second.PartitionsInfo.end()); jt->second.Session = TActorId(); - jt->second.State = EPS_FREE; - cit->second.FreePartitions.push_back(jt->first); - - --kt->second.NumActive; - --kt->second.NumSuspended; - - cit->second.ScheduleBalance(ctx); -} - - - -void TPersQueueReadBalancer::RebuildStructs() { - //TODO : bug here in case of deleting number of partitions - //TODO : track session with smallest and biggest number of (active but not suspended partitions -} - + jt->second.State = EPS_FREE; + cit->second.FreePartitions.push_back(jt->first); + + --kt->second.NumActive; + --kt->second.NumSuspended; + + cit->second.ScheduleBalance(ctx); +} + + + +void TPersQueueReadBalancer::RebuildStructs() { + //TODO : bug here in case of deleting number of partitions + //TODO : track session with smallest and biggest number of (active but not suspended partitions +} + void TPersQueueReadBalancer::RegisterSession(const TActorId& pipe, const TActorContext& ctx) -{ - //TODO : change structs for only this session, not all client - auto it = PipesInfo.find(pipe); - Y_VERIFY(it != PipesInfo.end()); - auto jt = ClientsInfo.find(it->second.ClientId); - Y_VERIFY(jt != ClientsInfo.end()); - for (auto& c : jt->second.ClientGroupsInfo) { - c.second.ScheduleBalance(ctx); - } -} - +{ + //TODO : change structs for only this session, not all client + auto it = PipesInfo.find(pipe); + Y_VERIFY(it != PipesInfo.end()); + auto jt = ClientsInfo.find(it->second.ClientId); + Y_VERIFY(jt != ClientsInfo.end()); + for (auto& c : jt->second.ClientGroupsInfo) { + c.second.ScheduleBalance(ctx); + } +} + void TPersQueueReadBalancer::UnregisterSession(const TActorId& pipe, const TActorContext& ctx) -{ - //TODO : change structs for only this session - auto it = PipesInfo.find(pipe); - Y_VERIFY(it != PipesInfo.end()); - const TString& clientId = it->second.ClientId; - auto jt = ClientsInfo.find(clientId); - Y_VERIFY(jt != ClientsInfo.end()); - for (auto& c : jt->second.ClientGroupsInfo) { - for (auto& p : c.second.PartitionsInfo) { //TODO: reverse map - if (p.second.Session == pipe) { +{ + //TODO : change structs for only this session + auto it = PipesInfo.find(pipe); + Y_VERIFY(it != PipesInfo.end()); + const TString& clientId = it->second.ClientId; + auto jt = ClientsInfo.find(clientId); + Y_VERIFY(jt != ClientsInfo.end()); + for (auto& c : jt->second.ClientGroupsInfo) { + for (auto& p : c.second.PartitionsInfo) { //TODO: reverse map + if (p.second.Session == pipe) { p.second.Session = TActorId(); - p.second.State = EPS_FREE; - c.second.FreePartitions.push_back(p.first); - } - } - bool res = c.second.SessionsInfo.erase(std::make_pair(pipe, c.second.RandomNumber)); - if (res) - c.second.ScheduleBalance(ctx); - } - if (it->second.WithGroups && --jt->second.SessionsWithGroup == 0) { - jt->second.MergeGroups(ctx); - } - - PipesInfo.erase(pipe); -} - -void TPersQueueReadBalancer::TClientGroupInfo::ScheduleBalance(const TActorContext& ctx) { - if (WakeupScheduled) - return; - WakeupScheduled = true; - ctx.Send(ctx.SelfID, new TEvPersQueue::TEvWakeupClient(ClientId, Group)); -} - - -void TPersQueueReadBalancer::TClientGroupInfo::Balance(const TActorContext& ctx) { - - //TODO: use filled structs - ui32 total = PartitionsInfo.size(); - ui32 sessionsCount = SessionsInfo.size(); - if (sessionsCount == 0) - return; //no sessions, no problems - - //FreePartitions and PipeInfo[].NumActive are consistent - ui32 desired = total / sessionsCount; - - ui32 allowPlusOne = total % sessionsCount; - ui32 cur = allowPlusOne; - //request partitions from sessions if needed - for (auto& p : SessionsInfo) { - ui32 realDesired = (cur > 0) ? desired + 1 : desired; - if (cur > 0) - --cur; - if (p.second.NumActive <= realDesired + p.second.NumSuspended) { - continue; - } else { - ui32 canRequest = 0; - Y_VERIFY(p.second.NumActive > realDesired + p.second.NumSuspended); - canRequest = p.second.NumActive - realDesired - p.second.NumSuspended; - Y_VERIFY(canRequest > 0); - ReleasePartition(p.first.first, Group, canRequest, ctx); - } - } - - //give free partitions to starving sessions - if (FreePartitions.empty()) - return; - cur = allowPlusOne; - for (auto& p : SessionsInfo) { - ui32 realDesired = (cur > 0) ? desired + 1 : desired; - if (cur > 0) - --cur; - if( p.second.NumActive >= realDesired) continue; - ui32 req = realDesired - p.second.NumActive; - while (req > 0) { - --req; - Y_VERIFY(!FreePartitions.empty()); - LockPartition(p.first.first, FreePartitions.front(), ctx); - FreePartitions.pop_front(); - if (FreePartitions.empty()) - return; - } - Y_VERIFY(p.second.NumActive >= desired && p.second.NumActive <= desired + 1); - } - Y_VERIFY(FreePartitions.empty()); -} - - + p.second.State = EPS_FREE; + c.second.FreePartitions.push_back(p.first); + } + } + bool res = c.second.SessionsInfo.erase(std::make_pair(pipe, c.second.RandomNumber)); + if (res) + c.second.ScheduleBalance(ctx); + } + if (it->second.WithGroups && --jt->second.SessionsWithGroup == 0) { + jt->second.MergeGroups(ctx); + } + + PipesInfo.erase(pipe); +} + +void TPersQueueReadBalancer::TClientGroupInfo::ScheduleBalance(const TActorContext& ctx) { + if (WakeupScheduled) + return; + WakeupScheduled = true; + ctx.Send(ctx.SelfID, new TEvPersQueue::TEvWakeupClient(ClientId, Group)); +} + + +void TPersQueueReadBalancer::TClientGroupInfo::Balance(const TActorContext& ctx) { + + //TODO: use filled structs + ui32 total = PartitionsInfo.size(); + ui32 sessionsCount = SessionsInfo.size(); + if (sessionsCount == 0) + return; //no sessions, no problems + + //FreePartitions and PipeInfo[].NumActive are consistent + ui32 desired = total / sessionsCount; + + ui32 allowPlusOne = total % sessionsCount; + ui32 cur = allowPlusOne; + //request partitions from sessions if needed + for (auto& p : SessionsInfo) { + ui32 realDesired = (cur > 0) ? desired + 1 : desired; + if (cur > 0) + --cur; + if (p.second.NumActive <= realDesired + p.second.NumSuspended) { + continue; + } else { + ui32 canRequest = 0; + Y_VERIFY(p.second.NumActive > realDesired + p.second.NumSuspended); + canRequest = p.second.NumActive - realDesired - p.second.NumSuspended; + Y_VERIFY(canRequest > 0); + ReleasePartition(p.first.first, Group, canRequest, ctx); + } + } + + //give free partitions to starving sessions + if (FreePartitions.empty()) + return; + cur = allowPlusOne; + for (auto& p : SessionsInfo) { + ui32 realDesired = (cur > 0) ? desired + 1 : desired; + if (cur > 0) + --cur; + if( p.second.NumActive >= realDesired) continue; + ui32 req = realDesired - p.second.NumActive; + while (req > 0) { + --req; + Y_VERIFY(!FreePartitions.empty()); + LockPartition(p.first.first, FreePartitions.front(), ctx); + FreePartitions.pop_front(); + if (FreePartitions.empty()) + return; + } + Y_VERIFY(p.second.NumActive >= desired && p.second.NumActive <= desired + 1); + } + Y_VERIFY(FreePartitions.empty()); +} + + void TPersQueueReadBalancer::TClientGroupInfo::LockPartition(const TActorId pipe, ui32 partition, const TActorContext& ctx) { - - auto jt = SessionsInfo.find(std::make_pair(pipe, RandomNumber)); - Y_VERIFY(jt != SessionsInfo.end()); - - auto& pipeInfo = jt->second; - - auto it = PartitionsInfo.find(partition); - Y_VERIFY(it != PartitionsInfo.end()); - it->second.Session = pipe; - it->second.State = EPS_ACTIVE; - ++pipeInfo.NumActive; - //TODO:rebuild structs - - THolder<TEvPersQueue::TEvLockPartition> res{new TEvPersQueue::TEvLockPartition}; - res->Record.SetSession(pipeInfo.Session); - res->Record.SetPartition(partition); - res->Record.SetTopic(Topic); + + auto jt = SessionsInfo.find(std::make_pair(pipe, RandomNumber)); + Y_VERIFY(jt != SessionsInfo.end()); + + auto& pipeInfo = jt->second; + + auto it = PartitionsInfo.find(partition); + Y_VERIFY(it != PartitionsInfo.end()); + it->second.Session = pipe; + it->second.State = EPS_ACTIVE; + ++pipeInfo.NumActive; + //TODO:rebuild structs + + THolder<TEvPersQueue::TEvLockPartition> res{new TEvPersQueue::TEvLockPartition}; + res->Record.SetSession(pipeInfo.Session); + res->Record.SetPartition(partition); + res->Record.SetTopic(Topic); res->Record.SetPath(Path); - res->Record.SetGeneration(Generation); - res->Record.SetStep(++(*Step)); - res->Record.SetClientId(ClientId); + res->Record.SetGeneration(Generation); + res->Record.SetStep(++(*Step)); + res->Record.SetClientId(ClientId); ActorIdToProto(pipe, res->Record.MutablePipeClient()); - res->Record.SetTabletId(PartitionsInfo[partition].TabletId); - - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << ClientId << " lock partition for pipe " - << pipe << " session " << pipeInfo.Session << " partition " << partition << " generation " << Generation << " step " << *Step); - - ctx.Send(pipeInfo.Sender, res.Release()); -} - + res->Record.SetTabletId(PartitionsInfo[partition].TabletId); + + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << ClientId << " lock partition for pipe " + << pipe << " session " << pipeInfo.Session << " partition " << partition << " generation " << Generation << " step " << *Step); + + ctx.Send(pipeInfo.Sender, res.Release()); +} + void TPersQueueReadBalancer::TClientGroupInfo::ReleasePartition(const TActorId pipe, const ui32 group, const ui32 count, const TActorContext& ctx) { - - auto it = SessionsInfo.find(std::make_pair(pipe, RandomNumber)); - Y_VERIFY(it != SessionsInfo.end()); - auto& sessionInfo = it->second; - - sessionInfo.NumSuspended += count; - - THolder<TEvPersQueue::TEvReleasePartition> res{new TEvPersQueue::TEvReleasePartition}; - res->Record.SetSession(sessionInfo.Session); - res->Record.SetTopic(Topic); + + auto it = SessionsInfo.find(std::make_pair(pipe, RandomNumber)); + Y_VERIFY(it != SessionsInfo.end()); + auto& sessionInfo = it->second; + + sessionInfo.NumSuspended += count; + + THolder<TEvPersQueue::TEvReleasePartition> res{new TEvPersQueue::TEvReleasePartition}; + res->Record.SetSession(sessionInfo.Session); + res->Record.SetTopic(Topic); res->Record.SetPath(Path); - res->Record.SetGeneration(Generation); - res->Record.SetClientId(ClientId); - res->Record.SetCount(count); - res->Record.SetGroup(group); + res->Record.SetGeneration(Generation); + res->Record.SetClientId(ClientId); + res->Record.SetCount(count); + res->Record.SetGroup(group); ActorIdToProto(pipe, res->Record.MutablePipeClient()); - - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << ClientId << " release partition group " << group - << " for pipe " << pipe << " session " << sessionInfo.Session); - - ctx.Send(sessionInfo.Sender, res.Release()); -} - - -} -} + + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "client " << ClientId << " release partition group " << group + << " for pipe " << pipe << " session " << sessionInfo.Session); + + ctx.Send(sessionInfo.Sender, res.Release()); +} + + +} +} diff --git a/ydb/core/persqueue/read_balancer.h b/ydb/core/persqueue/read_balancer.h index 64d29515e0..6a07edcb78 100644 --- a/ydb/core/persqueue/read_balancer.h +++ b/ydb/core/persqueue/read_balancer.h @@ -1,7 +1,7 @@ -#pragma once - -#include <util/system/hp_timer.h> - +#pragma once + +#include <util/system/hp_timer.h> + #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/base/tablet_pipe.h> #include <ydb/core/base/appdata.h> @@ -12,517 +12,517 @@ #include <ydb/core/tx/schemeshard/schemeshard.h> #include <ydb/core/engine/minikql/flat_local_tx_factory.h> #include <ydb/library/persqueue/topic_parser/topic_parser.h> - -namespace NKikimr { -namespace NPQ { - -using namespace NTabletFlatExecutor; - -class TMetricsTimeKeeper { -public: - TMetricsTimeKeeper(NMetrics::TResourceMetrics* metrics, const TActorContext& ctx) - : Ctx(ctx) - , Metrics(metrics) - {} - - ~TMetricsTimeKeeper() { - ui64 counter = ui64(CpuTimer.PassedReset() * 1000000.); - if (counter && Metrics) { - Metrics->CPU.Increment(counter); - Metrics->TryUpdate(Ctx); - } - } - -private: - const TActorContext& Ctx; - NMetrics::TResourceMetrics *Metrics; - THPTimer CpuTimer; -}; - - -enum EPartitionState { - StateRegular = 0, - StateWaitingFromSS, -}; - -class TPersQueueReadBalancer : public TActor<TPersQueueReadBalancer>, public TTabletExecutedFlat { - - struct Schema : NIceDb::Schema { - struct Data : Table<32> { - struct Key : Column<32, NScheme::NTypeIds::Uint32> {}; - struct PathId : Column<33, NScheme::NTypeIds::Uint64> {}; - struct Topic : Column<34, NScheme::NTypeIds::Utf8> {}; - struct Path : Column<35, NScheme::NTypeIds::Utf8> {}; - struct Version : Column<36, NScheme::NTypeIds::Uint32> {}; - struct Config : Column<40, NScheme::NTypeIds::Utf8> {}; - struct MaxPartsPerTablet : Column<41, NScheme::NTypeIds::Uint32> {}; + +namespace NKikimr { +namespace NPQ { + +using namespace NTabletFlatExecutor; + +class TMetricsTimeKeeper { +public: + TMetricsTimeKeeper(NMetrics::TResourceMetrics* metrics, const TActorContext& ctx) + : Ctx(ctx) + , Metrics(metrics) + {} + + ~TMetricsTimeKeeper() { + ui64 counter = ui64(CpuTimer.PassedReset() * 1000000.); + if (counter && Metrics) { + Metrics->CPU.Increment(counter); + Metrics->TryUpdate(Ctx); + } + } + +private: + const TActorContext& Ctx; + NMetrics::TResourceMetrics *Metrics; + THPTimer CpuTimer; +}; + + +enum EPartitionState { + StateRegular = 0, + StateWaitingFromSS, +}; + +class TPersQueueReadBalancer : public TActor<TPersQueueReadBalancer>, public TTabletExecutedFlat { + + struct Schema : NIceDb::Schema { + struct Data : Table<32> { + struct Key : Column<32, NScheme::NTypeIds::Uint32> {}; + struct PathId : Column<33, NScheme::NTypeIds::Uint64> {}; + struct Topic : Column<34, NScheme::NTypeIds::Utf8> {}; + struct Path : Column<35, NScheme::NTypeIds::Utf8> {}; + struct Version : Column<36, NScheme::NTypeIds::Uint32> {}; + struct Config : Column<40, NScheme::NTypeIds::Utf8> {}; + struct MaxPartsPerTablet : Column<41, NScheme::NTypeIds::Uint32> {}; struct SchemeShardId : Column<42, NScheme::NTypeIds::Uint64> {}; - struct NextPartitionId : Column<43, NScheme::NTypeIds::Uint64> {}; - - using TKey = TableKey<Key>; - using TColumns = TableColumns<Key, PathId, Topic, Path, Version, Config, MaxPartsPerTablet, SchemeShardId, NextPartitionId>; - }; - - struct Partitions : Table<33> { - struct Partition : Column<32, NScheme::NTypeIds::Uint32> {}; - struct TabletId : Column<33, NScheme::NTypeIds::Uint64> {}; - - struct State : Column<34, NScheme::NTypeIds::Uint32> {}; - - using TKey = TableKey<Partition>; - using TColumns = TableColumns<Partition, TabletId, State>; - }; - - struct Groups : Table<34> { - struct GroupId : Column<32, NScheme::NTypeIds::Uint32> {}; - struct Partition : Column<33, NScheme::NTypeIds::Uint32> {}; - - using TKey = TableKey<GroupId, Partition>; - using TColumns = TableColumns<GroupId, Partition>; - }; - - struct Tablets : Table<35> { - struct Owner : Column<32, NScheme::NTypeIds::Uint64> {}; - struct Idx : Column<33, NScheme::NTypeIds::Uint64> {}; - struct TabletId : Column<34, NScheme::NTypeIds::Uint64> {}; - - using TKey = TableKey<TabletId>; - using TColumns = TableColumns<Owner, Idx, TabletId>; - }; - - struct Operations : Table<36> { - struct Idx : Column<33, NScheme::NTypeIds::Uint64> {}; - struct State : Column<34, NScheme::NTypeIds::Utf8> {}; //serialzed protobuf - - using TKey = TableKey<Idx>; - using TColumns = TableColumns<Idx, State>; - }; - - using TTables = SchemaTables<Data, Partitions, Groups, Tablets, Operations>; - }; - - + struct NextPartitionId : Column<43, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey<Key>; + using TColumns = TableColumns<Key, PathId, Topic, Path, Version, Config, MaxPartsPerTablet, SchemeShardId, NextPartitionId>; + }; + + struct Partitions : Table<33> { + struct Partition : Column<32, NScheme::NTypeIds::Uint32> {}; + struct TabletId : Column<33, NScheme::NTypeIds::Uint64> {}; + + struct State : Column<34, NScheme::NTypeIds::Uint32> {}; + + using TKey = TableKey<Partition>; + using TColumns = TableColumns<Partition, TabletId, State>; + }; + + struct Groups : Table<34> { + struct GroupId : Column<32, NScheme::NTypeIds::Uint32> {}; + struct Partition : Column<33, NScheme::NTypeIds::Uint32> {}; + + using TKey = TableKey<GroupId, Partition>; + using TColumns = TableColumns<GroupId, Partition>; + }; + + struct Tablets : Table<35> { + struct Owner : Column<32, NScheme::NTypeIds::Uint64> {}; + struct Idx : Column<33, NScheme::NTypeIds::Uint64> {}; + struct TabletId : Column<34, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey<TabletId>; + using TColumns = TableColumns<Owner, Idx, TabletId>; + }; + + struct Operations : Table<36> { + struct Idx : Column<33, NScheme::NTypeIds::Uint64> {}; + struct State : Column<34, NScheme::NTypeIds::Utf8> {}; //serialzed protobuf + + using TKey = TableKey<Idx>; + using TColumns = TableColumns<Idx, State>; + }; + + using TTables = SchemaTables<Data, Partitions, Groups, Tablets, Operations>; + }; + + struct TTxPreInit : public ITransaction { - TPersQueueReadBalancer * const Self; - - TTxPreInit(TPersQueueReadBalancer *self) - : Self(self) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - - void Complete(const TActorContext& ctx) override; - }; - - friend struct TTxPreInit; - - + TPersQueueReadBalancer * const Self; + + TTxPreInit(TPersQueueReadBalancer *self) + : Self(self) + {} + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + + void Complete(const TActorContext& ctx) override; + }; + + friend struct TTxPreInit; + + struct TTxInit : public ITransaction { - TPersQueueReadBalancer * const Self; - - TTxInit(TPersQueueReadBalancer *self) - : Self(self) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - - void Complete(const TActorContext& ctx) override; - }; - - friend struct TTxInit; - - struct TPartInfo { - ui64 TabletId; - ui32 Group; - TPartInfo(const ui64 tabletId, const ui32 group) - : TabletId(tabletId) - , Group(group) - {} - }; - - struct TTabletInfo { - ui64 Owner; - ui64 Idx; - }; - + TPersQueueReadBalancer * const Self; + + TTxInit(TPersQueueReadBalancer *self) + : Self(self) + {} + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + + void Complete(const TActorContext& ctx) override; + }; + + friend struct TTxInit; + + struct TPartInfo { + ui64 TabletId; + ui32 Group; + TPartInfo(const ui64 tabletId, const ui32 group) + : TabletId(tabletId) + , Group(group) + {} + }; + + struct TTabletInfo { + ui64 Owner; + ui64 Idx; + }; + struct TTxWrite : public ITransaction { - TPersQueueReadBalancer * const Self; + TPersQueueReadBalancer * const Self; TVector<ui32> DeletedPartitions; - TVector<std::pair<ui32, TPartInfo>> NewPartitions; - TVector<std::pair<ui64, TTabletInfo>> NewTablets; - TVector<std::pair<ui32, ui32>> NewGroups; - - TTxWrite(TPersQueueReadBalancer *self, TVector<ui32>&& deletedPartitions, TVector<std::pair<ui32, TPartInfo>>&& newPartitions, - TVector<std::pair<ui64, TTabletInfo>>&& newTablets, TVector<std::pair<ui32, ui32>>&& newGroups) - : Self(self) - , DeletedPartitions(std::move(deletedPartitions)) - , NewPartitions(std::move(newPartitions)) - , NewTablets(std::move(newTablets)) - , NewGroups(std::move(newGroups)) - {} - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - - void Complete(const TActorContext &ctx) override; - }; - - - friend struct TTxWrite; - - void Handle(TEvents::TEvPoisonPill::TPtr&, const TActorContext &ctx) { - Become(&TThis::StateBroken); - ctx.Send(Tablet(), new TEvents::TEvPoisonPill); - } - - void HandleWakeup(TEvents::TEvWakeup::TPtr&, const TActorContext &ctx) { - GetStat(ctx); //TODO: do it only on signals from outerspace right now - - ctx.Schedule(TDuration::Seconds(30), new TEvents::TEvWakeup()); //TODO: remove it - } - + TVector<std::pair<ui32, TPartInfo>> NewPartitions; + TVector<std::pair<ui64, TTabletInfo>> NewTablets; + TVector<std::pair<ui32, ui32>> NewGroups; + + TTxWrite(TPersQueueReadBalancer *self, TVector<ui32>&& deletedPartitions, TVector<std::pair<ui32, TPartInfo>>&& newPartitions, + TVector<std::pair<ui64, TTabletInfo>>&& newTablets, TVector<std::pair<ui32, ui32>>&& newGroups) + : Self(self) + , DeletedPartitions(std::move(deletedPartitions)) + , NewPartitions(std::move(newPartitions)) + , NewTablets(std::move(newTablets)) + , NewGroups(std::move(newGroups)) + {} + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + + void Complete(const TActorContext &ctx) override; + }; + + + friend struct TTxWrite; + + void Handle(TEvents::TEvPoisonPill::TPtr&, const TActorContext &ctx) { + Become(&TThis::StateBroken); + ctx.Send(Tablet(), new TEvents::TEvPoisonPill); + } + + void HandleWakeup(TEvents::TEvWakeup::TPtr&, const TActorContext &ctx) { + GetStat(ctx); //TODO: do it only on signals from outerspace right now + + ctx.Schedule(TDuration::Seconds(30), new TEvents::TEvWakeup()); //TODO: remove it + } + void HandleUpdateACL(TEvPersQueue::TEvUpdateACL::TPtr&, const TActorContext &ctx) { GetACL(ctx); } - - void Die(const TActorContext& ctx) override { - for (auto& pipe : TabletPipes) { - NTabletPipe::CloseClient(ctx, pipe.second); - } - TabletPipes.clear(); - TActor<TPersQueueReadBalancer>::Die(ctx); - } - - void OnActivateExecutor(const TActorContext &ctx) override { - ResourceMetrics = Executor()->GetResourceMetrics(); - Become(&TThis::StateWork); + + void Die(const TActorContext& ctx) override { + for (auto& pipe : TabletPipes) { + NTabletPipe::CloseClient(ctx, pipe.second); + } + TabletPipes.clear(); + TActor<TPersQueueReadBalancer>::Die(ctx); + } + + void OnActivateExecutor(const TActorContext &ctx) override { + ResourceMetrics = Executor()->GetResourceMetrics(); + Become(&TThis::StateWork); if (Executor()->GetStats().IsFollower) Y_FAIL("is follower works well with Balancer?"); - else - Execute(new TTxPreInit(this), ctx); - } - - void OnDetach(const TActorContext &ctx) override { - Die(ctx); - } - - void OnTabletDead(TEvTablet::TEvTabletDead::TPtr&, const TActorContext &ctx) override { - Die(ctx); - } - - void DefaultSignalTabletActive(const TActorContext &ctx) override { - Y_UNUSED(ctx); //TODO: this is signal that tablet is ready for work - } - - void InitDone(const TActorContext &ctx) { - - StartPartitionIdForWrite = NextPartitionIdForWrite = rand() % TotalGroups; - - TStringBuilder s; - s << "BALANCER INIT DONE for " << Topic << ": "; - for (auto& p : PartitionsInfo) { - s << "(" << p.first << ", " << p.second.TabletId << ") "; - } - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, s); - for (auto& p : ClientsInfo) { - for (auto& c : p.second.ClientGroupsInfo) { - c.second.Balance(ctx); - } - } - - for (auto &ev : UpdateEvents) { - ctx.Send(ctx.SelfID, ev.Release()); - } - UpdateEvents.clear(); - - for (auto &ev : RegisterEvents) { - ctx.Send(ctx.SelfID, ev.Release()); - } - RegisterEvents.clear(); - - ctx.Schedule(TDuration::Seconds(30), new TEvents::TEvWakeup()); //TODO: remove it + else + Execute(new TTxPreInit(this), ctx); + } + + void OnDetach(const TActorContext &ctx) override { + Die(ctx); + } + + void OnTabletDead(TEvTablet::TEvTabletDead::TPtr&, const TActorContext &ctx) override { + Die(ctx); + } + + void DefaultSignalTabletActive(const TActorContext &ctx) override { + Y_UNUSED(ctx); //TODO: this is signal that tablet is ready for work + } + + void InitDone(const TActorContext &ctx) { + + StartPartitionIdForWrite = NextPartitionIdForWrite = rand() % TotalGroups; + + TStringBuilder s; + s << "BALANCER INIT DONE for " << Topic << ": "; + for (auto& p : PartitionsInfo) { + s << "(" << p.first << ", " << p.second.TabletId << ") "; + } + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, s); + for (auto& p : ClientsInfo) { + for (auto& c : p.second.ClientGroupsInfo) { + c.second.Balance(ctx); + } + } + + for (auto &ev : UpdateEvents) { + ctx.Send(ctx.SelfID, ev.Release()); + } + UpdateEvents.clear(); + + for (auto &ev : RegisterEvents) { + ctx.Send(ctx.SelfID, ev.Release()); + } + RegisterEvents.clear(); + + ctx.Schedule(TDuration::Seconds(30), new TEvents::TEvWakeup()); //TODO: remove it ctx.Send(ctx.SelfID, new TEvPersQueue::TEvUpdateACL()); - } - - bool OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) override; - TString GenerateStat(); - - void Handle(TEvPersQueue::TEvWakeupClient::TPtr &ev, const TActorContext& ctx); - void Handle(TEvPersQueue::TEvDescribe::TPtr &ev, const TActorContext& ctx); - - void HandleOnInit(TEvPersQueue::TEvUpdateBalancerConfig::TPtr &ev, const TActorContext& ctx); - void Handle(TEvPersQueue::TEvUpdateBalancerConfig::TPtr &ev, const TActorContext& ctx); - - void HandleOnInit(TEvPersQueue::TEvRegisterReadSession::TPtr &ev, const TActorContext& ctx); - void Handle(TEvPersQueue::TEvRegisterReadSession::TPtr &ev, const TActorContext& ctx); - - void Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr &ev, const TActorContext& ctx); + } + + bool OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) override; + TString GenerateStat(); + + void Handle(TEvPersQueue::TEvWakeupClient::TPtr &ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvDescribe::TPtr &ev, const TActorContext& ctx); + + void HandleOnInit(TEvPersQueue::TEvUpdateBalancerConfig::TPtr &ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvUpdateBalancerConfig::TPtr &ev, const TActorContext& ctx); + + void HandleOnInit(TEvPersQueue::TEvRegisterReadSession::TPtr &ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvRegisterReadSession::TPtr &ev, const TActorContext& ctx); + + void Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr &ev, const TActorContext& ctx); void Handle(TEvPersQueue::TEvCheckACL::TPtr&, const TActorContext&); - void Handle(TEvPersQueue::TEvGetPartitionIdForWrite::TPtr&, const TActorContext&); - - void Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext&); - void Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext&); - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext&); - void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext&); - - TStringBuilder GetPrefix() const; - + void Handle(TEvPersQueue::TEvGetPartitionIdForWrite::TPtr&, const TActorContext&); + + void Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext&); + void Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext&); + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext&); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext&); + + TStringBuilder GetPrefix() const; + void RequestTabletIfNeeded(const ui64 tabletId, const TActorContext&); - void RestartPipe(const ui64 tabletId, const TActorContext&); - void CheckStat(const TActorContext&); + void RestartPipe(const ui64 tabletId, const TActorContext&); + void CheckStat(const TActorContext&); void RespondWithACL( const TEvPersQueue::TEvCheckACL::TPtr &request, const NKikimrPQ::EAccess &access, const TString &error, const TActorContext &ctx); void CheckACL(const TEvPersQueue::TEvCheckACL::TPtr &request, const NACLib::TUserToken& token, const TActorContext &ctx); - void GetStat(const TActorContext&); + void GetStat(const TActorContext&); void GetACL(const TActorContext&); - void AnswerWaitingRequests(const TActorContext& ctx); - - void Handle(TEvPersQueue::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx); + void AnswerWaitingRequests(const TActorContext& ctx); + + void Handle(TEvPersQueue::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx); void Handle(TEvents::TEvPoisonPill &ev, const TActorContext& ctx); - - void Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx); void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext& ctx); - + void RegisterSession(const TActorId& pipe, const TActorContext& ctx); - struct TPipeInfo; + struct TPipeInfo; void UnregisterSession(const TActorId& pipe, const TActorContext& ctx); - void RebuildStructs(); - - bool Inited; - ui64 PathId; - TString Topic; - TString Path; - ui32 Generation; - int Version; - ui32 MaxPartsPerTablet; + void RebuildStructs(); + + bool Inited; + ui64 PathId; + TString Topic; + TString Path; + ui32 Generation; + int Version; + ui32 MaxPartsPerTablet; ui64 SchemeShardId; - NKikimrPQ::TPQTabletConfig TabletConfig; + NKikimrPQ::TPQTabletConfig TabletConfig; NACLib::TSecurityObject ACL; TInstant LastACLUpdate; - - THashSet<TString> Consumers; - - ui64 TxId; - ui32 NumActiveParts; - + + THashSet<TString> Consumers; + + ui64 TxId; + ui32 NumActiveParts; + TVector<TActorId> WaitingResponse; - TVector<TEvPersQueue::TEvCheckACL::TPtr> WaitingACLRequests; - TVector<TEvPersQueue::TEvDescribe::TPtr> WaitingDescribeRequests; - - struct TPipeInfo { - TString ClientId; - TString Session; + TVector<TEvPersQueue::TEvCheckACL::TPtr> WaitingACLRequests; + TVector<TEvPersQueue::TEvDescribe::TPtr> WaitingDescribeRequests; + + struct TPipeInfo { + TString ClientId; + TString Session; TActorId Sender; - bool WithGroups; - ui32 ServerActors; - }; - - enum EPartitionState { - EPS_FREE = 0, - EPS_ACTIVE = 1 - }; - - struct TPartitionInfo { - ui64 TabletId; - EPartitionState State; + bool WithGroups; + ui32 ServerActors; + }; + + enum EPartitionState { + EPS_FREE = 0, + EPS_ACTIVE = 1 + }; + + struct TPartitionInfo { + ui64 TabletId; + EPartitionState State; TActorId Session; - ui32 GroupId; - }; - - struct TClientGroupInfo { - struct TSessionInfo { + ui32 GroupId; + }; + + struct TClientGroupInfo { + struct TSessionInfo { TSessionInfo(const TString& session, const TActorId sender, const TString& clientNode, ui32 proxyNodeId, TInstant ts) - : Session(session) - , Sender(sender) - , NumSuspended(0) - , NumActive(0) - , ClientNode(clientNode) - , ProxyNodeId(proxyNodeId) - , Timestamp(ts) - {} - - TString Session; + : Session(session) + , Sender(sender) + , NumSuspended(0) + , NumActive(0) + , ClientNode(clientNode) + , ProxyNodeId(proxyNodeId) + , Timestamp(ts) + {} + + TString Session; TActorId Sender; - ui32 NumSuspended; - ui32 NumActive; - - TString ClientNode; - ui32 ProxyNodeId; + ui32 NumSuspended; + ui32 NumActive; + + TString ClientNode; + ui32 ProxyNodeId; TInstant Timestamp; - }; - - TString ClientId; - TString Topic; - ui64 TabletId; + }; + + TString ClientId; + TString Topic; + ui64 TabletId; TString Path; - ui32 Generation = 0; - ui64 RandomNumber = 0; - ui32* Step = nullptr; - - ui32 Group = 0; - - THashMap<ui32, TPartitionInfo> PartitionsInfo; // partitionId -> info - std::deque<ui32> FreePartitions; + ui32 Generation = 0; + ui64 RandomNumber = 0; + ui32* Step = nullptr; + + ui32 Group = 0; + + THashMap<ui32, TPartitionInfo> PartitionsInfo; // partitionId -> info + std::deque<ui32> FreePartitions; THashMap<std::pair<TActorId, ui64>, TSessionInfo> SessionsInfo; //map from ActorID and random value - need for reordering sessions in different topics - - void ScheduleBalance(const TActorContext& ctx); - void Balance(const TActorContext& ctx); + + void ScheduleBalance(const TActorContext& ctx); + void Balance(const TActorContext& ctx); void LockPartition(const TActorId pipe, ui32 partition, const TActorContext& ctx); void ReleasePartition(const TActorId pipe, const ui32 group, const ui32 count, const TActorContext& ctx); - TStringBuilder GetPrefix() const; - - bool WakeupScheduled = false; - }; - + TStringBuilder GetPrefix() const; + + bool WakeupScheduled = false; + }; + THashMap<ui32, TPartitionInfo> PartitionsInfo; - THashMap<ui32, TVector<ui32>> GroupsInfo; - - THashMap<ui64, TTabletInfo> TabletsInfo; - ui64 MaxIdx; - - ui32 NextPartitionId; - ui32 NextPartitionIdForWrite; - ui32 StartPartitionIdForWrite; - ui32 TotalGroups; - bool NoGroupsInBase; - - struct TClientInfo { - THashMap<ui32, TClientGroupInfo> ClientGroupsInfo; //map from group to info - ui32 SessionsWithGroup = 0; - - TString ClientId; - TString Topic; - ui64 TabletId; + THashMap<ui32, TVector<ui32>> GroupsInfo; + + THashMap<ui64, TTabletInfo> TabletsInfo; + ui64 MaxIdx; + + ui32 NextPartitionId; + ui32 NextPartitionIdForWrite; + ui32 StartPartitionIdForWrite; + ui32 TotalGroups; + bool NoGroupsInBase; + + struct TClientInfo { + THashMap<ui32, TClientGroupInfo> ClientGroupsInfo; //map from group to info + ui32 SessionsWithGroup = 0; + + TString ClientId; + TString Topic; + ui64 TabletId; TString Path; - ui32 Generation = 0; - ui32 Step = 0; - - void KillGroup(const ui32 group, const TActorContext& ctx); - void MergeGroups(const TActorContext& ctx); - TClientGroupInfo& AddGroup(const ui32 group); - void FillEmptyGroup(const ui32 group, const THashMap<ui32, TPartitionInfo>& partitionsInfo); - void AddSession(const ui32 group, const THashMap<ui32, TPartitionInfo>& partitionsInfo, + ui32 Generation = 0; + ui32 Step = 0; + + void KillGroup(const ui32 group, const TActorContext& ctx); + void MergeGroups(const TActorContext& ctx); + TClientGroupInfo& AddGroup(const ui32 group); + void FillEmptyGroup(const ui32 group, const THashMap<ui32, TPartitionInfo>& partitionsInfo); + void AddSession(const ui32 group, const THashMap<ui32, TPartitionInfo>& partitionsInfo, const TActorId& sender, const NKikimrPQ::TRegisterReadSession& record); - TStringBuilder GetPrefix() const; - - }; - - THashMap<TString, TClientInfo> ClientsInfo; //map from userId -> to info - + TStringBuilder GetPrefix() const; + + }; + + THashMap<TString, TClientInfo> ClientsInfo; //map from userId -> to info + THashMap<TActorId, TPipeInfo> PipesInfo; - - NMetrics::TResourceMetrics *ResourceMetrics; - + + NMetrics::TResourceMetrics *ResourceMetrics; + THashMap<ui64, TActorId> TabletPipes; - + THashSet<ui64> WaitingForStat; bool WaitingForACL; - ui64 TotalAvgSpeedSec; - ui64 MaxAvgSpeedSec; - ui64 TotalAvgSpeedMin; - ui64 MaxAvgSpeedMin; - ui64 TotalAvgSpeedHour; - ui64 MaxAvgSpeedHour; - ui64 TotalAvgSpeedDay; - ui64 MaxAvgSpeedDay; - - std::deque<TAutoPtr<TEvPersQueue::TEvRegisterReadSession>> RegisterEvents; - std::deque<TAutoPtr<TEvPersQueue::TEvPersQueue::TEvUpdateBalancerConfig>> UpdateEvents; -public: + ui64 TotalAvgSpeedSec; + ui64 MaxAvgSpeedSec; + ui64 TotalAvgSpeedMin; + ui64 MaxAvgSpeedMin; + ui64 TotalAvgSpeedHour; + ui64 MaxAvgSpeedHour; + ui64 TotalAvgSpeedDay; + ui64 MaxAvgSpeedDay; + + std::deque<TAutoPtr<TEvPersQueue::TEvRegisterReadSession>> RegisterEvents; + std::deque<TAutoPtr<TEvPersQueue::TEvPersQueue::TEvUpdateBalancerConfig>> UpdateEvents; +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PERSQUEUE_READ_BALANCER_ACTOR; - } - + } + TPersQueueReadBalancer(const TActorId &tablet, TTabletStorageInfo *info) - : TActor(&TThis::StateInit) + : TActor(&TThis::StateInit) , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) - , Inited(false) - , PathId(0) - , Generation(0) - , Version(-1) - , MaxPartsPerTablet(0) + , Inited(false) + , PathId(0) + , Generation(0) + , Version(-1) + , MaxPartsPerTablet(0) , SchemeShardId(0) , LastACLUpdate(TInstant::Zero()) - , TxId(0) - , NumActiveParts(0) - , MaxIdx(0) - , NextPartitionId(0) - , NextPartitionIdForWrite(0) - , StartPartitionIdForWrite(0) - , TotalGroups(0) - , NoGroupsInBase(true) - , ResourceMetrics(nullptr) + , TxId(0) + , NumActiveParts(0) + , MaxIdx(0) + , NextPartitionId(0) + , NextPartitionIdForWrite(0) + , StartPartitionIdForWrite(0) + , TotalGroups(0) + , NoGroupsInBase(true) + , ResourceMetrics(nullptr) , WaitingForACL(false) - , TotalAvgSpeedSec(0) - , MaxAvgSpeedSec(0) - , TotalAvgSpeedMin(0) - , MaxAvgSpeedMin(0) - , TotalAvgSpeedHour(0) - , MaxAvgSpeedHour(0) - , TotalAvgSpeedDay(0) - , MaxAvgSpeedDay(0) - {} - - STFUNC(StateInit) { - TMetricsTimeKeeper keeper(ResourceMetrics, ctx); - - switch (ev->GetTypeRewrite()) { + , TotalAvgSpeedSec(0) + , MaxAvgSpeedSec(0) + , TotalAvgSpeedMin(0) + , MaxAvgSpeedMin(0) + , TotalAvgSpeedHour(0) + , MaxAvgSpeedHour(0) + , TotalAvgSpeedDay(0) + , MaxAvgSpeedDay(0) + {} + + STFUNC(StateInit) { + TMetricsTimeKeeper keeper(ResourceMetrics, ctx); + + switch (ev->GetTypeRewrite()) { HFunc(TEvents::TEvPoisonPill, Handle); - HFunc(TEvPersQueue::TEvUpdateBalancerConfig, HandleOnInit); - HFunc(TEvPersQueue::TEvWakeupClient, Handle); - HFunc(TEvPersQueue::TEvDescribe, Handle); - HFunc(TEvPersQueue::TEvRegisterReadSession, HandleOnInit); - HFunc(TEvPersQueue::TEvGetReadSessionsInfo, Handle); - HFunc(TEvTabletPipe::TEvServerConnected, Handle); - HFunc(TEvTabletPipe::TEvServerDisconnected, Handle); - HFunc(TEvPersQueue::TEvCheckACL, Handle); - HFunc(TEvPersQueue::TEvGetPartitionIdForWrite, Handle); - default: - StateInitImpl(ev, ctx); - break; - } - } - - STFUNC(StateWork) { - TMetricsTimeKeeper keeper(ResourceMetrics, ctx); - - switch (ev->GetTypeRewrite()) { - HFunc(TEvents::TEvPoisonPill, Handle); - HFunc(TEvents::TEvWakeup, HandleWakeup); + HFunc(TEvPersQueue::TEvUpdateBalancerConfig, HandleOnInit); + HFunc(TEvPersQueue::TEvWakeupClient, Handle); + HFunc(TEvPersQueue::TEvDescribe, Handle); + HFunc(TEvPersQueue::TEvRegisterReadSession, HandleOnInit); + HFunc(TEvPersQueue::TEvGetReadSessionsInfo, Handle); + HFunc(TEvTabletPipe::TEvServerConnected, Handle); + HFunc(TEvTabletPipe::TEvServerDisconnected, Handle); + HFunc(TEvPersQueue::TEvCheckACL, Handle); + HFunc(TEvPersQueue::TEvGetPartitionIdForWrite, Handle); + default: + StateInitImpl(ev, ctx); + break; + } + } + + STFUNC(StateWork) { + TMetricsTimeKeeper keeper(ResourceMetrics, ctx); + + switch (ev->GetTypeRewrite()) { + HFunc(TEvents::TEvPoisonPill, Handle); + HFunc(TEvents::TEvWakeup, HandleWakeup); HFunc(TEvPersQueue::TEvUpdateACL, HandleUpdateACL); HFunc(TEvPersQueue::TEvCheckACL, Handle); - HFunc(TEvPersQueue::TEvGetPartitionIdForWrite, Handle); - HFunc(TEvPersQueue::TEvUpdateBalancerConfig, Handle); - HFunc(TEvPersQueue::TEvWakeupClient, Handle); - HFunc(TEvPersQueue::TEvDescribe, Handle); - HFunc(TEvPersQueue::TEvRegisterReadSession, Handle); - HFunc(TEvPersQueue::TEvGetReadSessionsInfo, Handle); - HFunc(TEvPersQueue::TEvPartitionReleased, Handle); - HFunc(TEvTabletPipe::TEvServerConnected, Handle); - HFunc(TEvTabletPipe::TEvServerDisconnected, Handle); - HFunc(TEvTabletPipe::TEvClientConnected, Handle); - HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); - HFunc(TEvPersQueue::TEvStatusResponse, Handle); + HFunc(TEvPersQueue::TEvGetPartitionIdForWrite, Handle); + HFunc(TEvPersQueue::TEvUpdateBalancerConfig, Handle); + HFunc(TEvPersQueue::TEvWakeupClient, Handle); + HFunc(TEvPersQueue::TEvDescribe, Handle); + HFunc(TEvPersQueue::TEvRegisterReadSession, Handle); + HFunc(TEvPersQueue::TEvGetReadSessionsInfo, Handle); + HFunc(TEvPersQueue::TEvPartitionReleased, Handle); + HFunc(TEvTabletPipe::TEvServerConnected, Handle); + HFunc(TEvTabletPipe::TEvServerDisconnected, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvPersQueue::TEvStatusResponse, Handle); HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); - - default: - HandleDefaultEvents(ev, ctx); - break; - } - } - - STFUNC(StateBroken) { - TMetricsTimeKeeper keeper(ResourceMetrics, ctx); - - switch (ev->GetTypeRewrite()) { - HFunc(TEvTablet::TEvTabletDead, HandleTabletDead) - } - } - -}; - -} -} + + default: + HandleDefaultEvents(ev, ctx); + break; + } + } + + STFUNC(StateBroken) { + TMetricsTimeKeeper keeper(ResourceMetrics, ctx); + + switch (ev->GetTypeRewrite()) { + HFunc(TEvTablet::TEvTabletDead, HandleTabletDead) + } + } + +}; + +} +} diff --git a/ydb/core/persqueue/subscriber.cpp b/ydb/core/persqueue/subscriber.cpp index e884714153..641c566a1c 100644 --- a/ydb/core/persqueue/subscriber.cpp +++ b/ydb/core/persqueue/subscriber.cpp @@ -1,84 +1,84 @@ -#include "subscriber.h" -#include "user_info.h" +#include "subscriber.h" +#include "user_info.h" #include <ydb/core/protos/counters_pq.pb.h> - -namespace NKikimr { -namespace NPQ { - -TSubscriberLogic::TSubscriberLogic() -{} - -TMaybe<TReadInfo> TSubscriberLogic::ForgetSubscription(const ui64 cookie) -{ - auto it = ReadInfo.find(cookie); - if (it == ReadInfo.end()) //already answered - return TMaybe<TReadInfo>(); - TReadInfo res(std::move(it->second)); - ReadInfo.erase(it); - return res; -} - -void TSubscriberLogic::AddSubscription(TReadInfo&& info, const ui64 cookie) -{ - Y_VERIFY(WaitingReads.empty() || WaitingReads.back().Offset == info.Offset); - info.IsSubscription = true; - WaitingReads.push_back({info.Offset, cookie}); - bool res = ReadInfo.insert({cookie, std::move(info)}).second; - Y_VERIFY(res); -} - + +namespace NKikimr { +namespace NPQ { + +TSubscriberLogic::TSubscriberLogic() +{} + +TMaybe<TReadInfo> TSubscriberLogic::ForgetSubscription(const ui64 cookie) +{ + auto it = ReadInfo.find(cookie); + if (it == ReadInfo.end()) //already answered + return TMaybe<TReadInfo>(); + TReadInfo res(std::move(it->second)); + ReadInfo.erase(it); + return res; +} + +void TSubscriberLogic::AddSubscription(TReadInfo&& info, const ui64 cookie) +{ + Y_VERIFY(WaitingReads.empty() || WaitingReads.back().Offset == info.Offset); + info.IsSubscription = true; + WaitingReads.push_back({info.Offset, cookie}); + bool res = ReadInfo.insert({cookie, std::move(info)}).second; + Y_VERIFY(res); +} + TVector<std::pair<TReadInfo, ui64>> TSubscriberLogic::CompleteSubscriptions(const ui64 endOffset) -{ +{ TVector<std::pair<TReadInfo, ui64>> res; - while (!WaitingReads.empty()) { - const ui64& offset = WaitingReads.front().Offset; - const ui64& cookie = WaitingReads.front().Cookie; - if (offset >= endOffset) - break; - auto it = ReadInfo.find(cookie); - if (it != ReadInfo.end()) { - it->second.Timestamp = TAppData::TimeProvider->Now(); - Y_VERIFY(it->second.Offset == offset); - res.emplace_back(std::move(it->second), it->first); - ReadInfo.erase(it); - } - WaitingReads.pop_front(); - } - return res; -} - - + while (!WaitingReads.empty()) { + const ui64& offset = WaitingReads.front().Offset; + const ui64& cookie = WaitingReads.front().Cookie; + if (offset >= endOffset) + break; + auto it = ReadInfo.find(cookie); + if (it != ReadInfo.end()) { + it->second.Timestamp = TAppData::TimeProvider->Now(); + Y_VERIFY(it->second.Offset == offset); + res.emplace_back(std::move(it->second), it->first); + ReadInfo.erase(it); + } + WaitingReads.pop_front(); + } + return res; +} + + TSubscriber::TSubscriber(const ui32 partition, TTabletCountersBase& counters, const TActorId& tablet) - : Subscriber() - , Partition(partition) - , Counters(counters) - , Tablet(tablet) -{} - + : Subscriber() + , Partition(partition) + , Counters(counters) + , Tablet(tablet) +{} + TMaybe<TReadInfo> TSubscriber::OnTimeout(TEvPQ::TEvReadTimeout::TPtr& ev) { - TMaybe<TReadInfo> res = Subscriber.ForgetSubscription(ev->Get()->Cookie); + TMaybe<TReadInfo> res = Subscriber.ForgetSubscription(ev->Get()->Cookie); if (res) { Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_TIMEOUT].Increment(1); } - return res; -} - -void TSubscriber::AddSubscription(TReadInfo&& info, const ui32 timeout, const ui64 cookie, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "waiting read cookie " << cookie << " partition " << Partition << " user " << info.User - << " offset " << info.Offset << " count " << info.Count << " size " << info.Size << " timeout " << timeout); - Subscriber.AddSubscription(std::move(info), cookie); - if (timeout == 0) - ctx.Send(ctx.SelfID, new TEvPQ::TEvReadTimeout(cookie)); - else - ctx.Schedule(TDuration::MilliSeconds(timeout), new TEvPQ::TEvReadTimeout(cookie)); -} - + return res; +} + +void TSubscriber::AddSubscription(TReadInfo&& info, const ui32 timeout, const ui64 cookie, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "waiting read cookie " << cookie << " partition " << Partition << " user " << info.User + << " offset " << info.Offset << " count " << info.Count << " size " << info.Size << " timeout " << timeout); + Subscriber.AddSubscription(std::move(info), cookie); + if (timeout == 0) + ctx.Send(ctx.SelfID, new TEvPQ::TEvReadTimeout(cookie)); + else + ctx.Schedule(TDuration::MilliSeconds(timeout), new TEvPQ::TEvReadTimeout(cookie)); +} + TVector<std::pair<TReadInfo, ui64>> TSubscriber::GetReads(const ui64 endOffset) { - auto res = Subscriber.CompleteSubscriptions(endOffset); - Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_OK].Increment(res.size()); - return res; -} - -}// NPQ -}// NKikimr - + auto res = Subscriber.CompleteSubscriptions(endOffset); + Counters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_OK].Increment(res.size()); + return res; +} + +}// NPQ +}// NKikimr + diff --git a/ydb/core/persqueue/subscriber.h b/ydb/core/persqueue/subscriber.h index b603d30adf..7bbfe41e1f 100644 --- a/ydb/core/persqueue/subscriber.h +++ b/ydb/core/persqueue/subscriber.h @@ -1,41 +1,41 @@ -#pragma once +#pragma once -#include "header.h" -#include "blob.h" +#include "header.h" +#include "blob.h" #include <ydb/core/tablet/tablet_counters.h> #include <ydb/core/base/appdata.h> #include <ydb/core/persqueue/events/internal.h> - -namespace NKikimr { -namespace NPQ { - -struct TUserInfo; - + +namespace NKikimr { +namespace NPQ { + +struct TUserInfo; + struct TReadAnswer { ui64 Size; THolder<IEventBase> Event; }; -struct TReadInfo { +struct TReadInfo { TString User; - TString ClientDC; - ui64 Offset; - ui16 PartNo; - ui32 Count; - ui32 Size; - ui64 Destination; - TInstant Timestamp; + TString ClientDC; + ui64 Offset; + ui16 PartNo; + ui32 Count; + ui32 Size; + ui64 Destination; + TInstant Timestamp; ui64 ReadTimestampMs; TDuration WaitQuotaTime; - - bool IsSubscription; - + + bool IsSubscription; + TVector<TRequestedBlob> Blobs; //offset, count, value - ui64 CachedOffset; //offset of head can be bigger than last databody offset + ui64 CachedOffset; //offset of head can be bigger than last databody offset TVector<TClientBlob> Cached; //records from head - - TReadInfo() = delete; + + TReadInfo() = delete; TReadInfo( const TString& user, const TString& clientDC, @@ -47,20 +47,20 @@ struct TReadInfo { ui64 readTimestampMs, TDuration waitQuotaTime ) - : User(user) - , ClientDC(clientDC) - , Offset(offset) - , PartNo(partNo) - , Count(count) - , Size(size) - , Destination(dst) - , Timestamp(TAppData::TimeProvider->Now()) + : User(user) + , ClientDC(clientDC) + , Offset(offset) + , PartNo(partNo) + , Count(count) + , Size(size) + , Destination(dst) + , Timestamp(TAppData::TimeProvider->Now()) , ReadTimestampMs(readTimestampMs) , WaitQuotaTime(waitQuotaTime) - , IsSubscription(false) - , CachedOffset(0) - {} - + , IsSubscription(false) + , CachedOffset(0) + {} + TReadAnswer FormAnswer( const TActorContext& ctx, const TEvPQ::TEvBlobResponse& response, @@ -80,56 +80,56 @@ struct TReadInfo { const ui64 sizeLag ) { TEvPQ::TEvBlobResponse response(0, TVector<TRequestedBlob>()); - return FormAnswer(ctx, response, endOffset, partition, ui, dst, sizeLag); - } -}; - -struct TOffsetCookie { - ui64 Offset; - ui64 Cookie; -}; - - -class TSubscriberLogic : public TNonCopyable { -public: - TSubscriberLogic(); - - //will store and return cookie - void AddSubscription(TReadInfo&& info, const ui64 cookie); - - //forget on timeout - TMaybe<TReadInfo> ForgetSubscription(const ui64 cookie); - - //form TReadInfo::Cached with new data and return ready reads + return FormAnswer(ctx, response, endOffset, partition, ui, dst, sizeLag); + } +}; + +struct TOffsetCookie { + ui64 Offset; + ui64 Cookie; +}; + + +class TSubscriberLogic : public TNonCopyable { +public: + TSubscriberLogic(); + + //will store and return cookie + void AddSubscription(TReadInfo&& info, const ui64 cookie); + + //forget on timeout + TMaybe<TReadInfo> ForgetSubscription(const ui64 cookie); + + //form TReadInfo::Cached with new data and return ready reads TVector<std::pair<TReadInfo, ui64>> CompleteSubscriptions(const ui64 endOffset); -private: +private: THashMap<ui64, TReadInfo> ReadInfo; // cookie -> {...} - std::deque<TOffsetCookie> WaitingReads; -}; - - -class TSubscriber : public TNonCopyable { -public: + std::deque<TOffsetCookie> WaitingReads; +}; + + +class TSubscriber : public TNonCopyable { +public: TSubscriber(const ui32 partition, TTabletCountersBase& counters, const TActorId& tablet); - - //will wait for new data or timeout for this read and set timer for timeout ms - void AddSubscription(TReadInfo&& info, const ui32 timeout, const ui64 cookie, const TActorContext& ctx); - - //handle of timeout for some read + + //will wait for new data or timeout for this read and set timer for timeout ms + void AddSubscription(TReadInfo&& info, const ui32 timeout, const ui64 cookie, const TActorContext& ctx); + + //handle of timeout for some read TMaybe<TReadInfo> OnTimeout(TEvPQ::TEvReadTimeout::TPtr& ev); - - //get completed subscriptions + + //get completed subscriptions TVector<std::pair<TReadInfo, ui64>> GetReads(const ui64 endOffsets); - -private: - TSubscriberLogic Subscriber; - const ui32 Partition; - TTabletCountersBase& Counters; + +private: + TSubscriberLogic Subscriber; + const ui32 Partition; + TTabletCountersBase& Counters; TActorId Tablet; -}; - - - - -}// NPQ -}// NKikimr +}; + + + + +}// NPQ +}// NKikimr diff --git a/ydb/core/persqueue/type_decoders.h b/ydb/core/persqueue/type_decoders.h index 03290d71dc..30a8846923 100644 --- a/ydb/core/persqueue/type_decoders.h +++ b/ydb/core/persqueue/type_decoders.h @@ -172,7 +172,7 @@ public: } TAutoPtr<IChunkIterator> MakeIterator() const override { - return new TChunkIterator(Data, Mask.Size()); + return new TChunkIterator(Data, Mask.Size()); } TDataRef GetValue(size_t index) const override { @@ -186,27 +186,27 @@ public: private: class TChunkIterator : public IChunkIterator { public: - TChunkIterator(const TDataRef& data, const size_t maskSize) + TChunkIterator(const TDataRef& data, const size_t maskSize) : Current(data.Data() + sizeof(TCodecSig)) - , End(data.End() - maskSize) + , End(data.End() - maskSize) , MaskIter(data) { } TDataRef Next() override { const char* data = Current; Current += Size; - Y_VERIFY(Current <= End); + Y_VERIFY(Current <= End); return MaskIter.Next() ? TDataRef(data, Size) : TDataRef(); } TDataRef Peek() const override { - Y_VERIFY(Current + Size <= End); + Y_VERIFY(Current + Size <= End); return MaskIter.IsNotNull() ? TDataRef(Current, Size) : TDataRef(); } private: const char* Current; - const char* End; + const char* End; TDecoderMaskIterator<IsNullable> MaskIter; }; @@ -234,7 +234,7 @@ public: } TAutoPtr<IChunkIterator> MakeIterator() const override { - return new TChunkIterator(Data, Sizes, Data.End() - Mask.Size()); + return new TChunkIterator(Data, Sizes, Data.End() - Mask.Size()); } TDataRef GetValue(size_t index) const override { @@ -249,9 +249,9 @@ public: private: class TChunkIterator : public IChunkIterator { public: - TChunkIterator(const TDataRef& data, const ui32* sizes, const char* end) + TChunkIterator(const TDataRef& data, const ui32* sizes, const char* end) : Current(data.Data() + sizeof(TCodecSig)) - , End(end) + , End(end) , LastOffset(0) , CurrentOffset(sizes) , MaskIter(data) @@ -266,7 +266,7 @@ private: const char* data = Current; Current += size; - Y_VERIFY(data + size <= End); + Y_VERIFY(data + size <= End); return TDataRef(data, size); } ++CurrentOffset; @@ -274,16 +274,16 @@ private: } TDataRef Peek() const override { - if (MaskIter.IsNotNull()) { + if (MaskIter.IsNotNull()) { Y_VERIFY(Current + ReadUnaligned<ui32>(CurrentOffset) - LastOffset <= End); return TDataRef(Current, ReadUnaligned<ui32>(CurrentOffset) - LastOffset); - } + } return TDataRef(); } private: const char* Current; - const char* End; + const char* End; ui32 LastOffset; const ui32* CurrentOffset; TDecoderMaskIterator<IsNullable> MaskIter; @@ -302,17 +302,17 @@ class TVarIntValueDecoder { public: using TType = TIntType; - inline TType Peek(const char* data, const char* end) const { + inline TType Peek(const char* data, const char* end) const { i64 value; - auto bytes = in_long(value, data); - Y_VERIFY(data + bytes <= end); + auto bytes = in_long(value, data); + Y_VERIFY(data + bytes <= end); return value; } - inline size_t Load(const char* data, const char* end, TType& value) const { + inline size_t Load(const char* data, const char* end, TType& value) const { i64 loaded = 0; auto bytes = in_long(loaded, data); - Y_VERIFY(data + bytes <= end); + Y_VERIFY(data + bytes <= end); value = loaded; return bytes; } @@ -324,17 +324,17 @@ public: using TType = TIntType; using TUnsigned = std::make_unsigned_t<TType>; - inline TType Peek(const char* data, const char* end) const { + inline TType Peek(const char* data, const char* end) const { i64 value; - auto bytes = in_long(value, data); - Y_VERIFY(data + bytes <= end); + auto bytes = in_long(value, data); + Y_VERIFY(data + bytes <= end); return ZigZagDecode(static_cast<TUnsigned>(value)); } - inline size_t Load(const char* data, const char* end, TType& value) const { + inline size_t Load(const char* data, const char* end, TType& value) const { i64 loaded = 0; auto bytes = in_long(loaded, data); - Y_VERIFY(data + bytes <= end); + Y_VERIFY(data + bytes <= end); value = ZigZagDecode(static_cast<TUnsigned>(loaded)); return bytes; } @@ -345,14 +345,14 @@ class TDeltaValueDecoder : public TValueDecoder { public: using TType = typename TValueDecoder::TType; - inline TType Peek(const char* data, const char* end) const { + inline TType Peek(const char* data, const char* end) const { TType value; - TValueDecoder::Load(data, end, value); + TValueDecoder::Load(data, end, value); return Rev ? Last - value : Last + value; } - inline size_t Load(const char* data, const char* end, TType& value) { - auto bytes = TValueDecoder::Load(data, end, value); + inline size_t Load(const char* data, const char* end, TType& value) { + auto bytes = TValueDecoder::Load(data, end, value); if (Rev) Last -= value; else @@ -380,7 +380,7 @@ public: } TAutoPtr<IChunkIterator> MakeIterator() const override { - return new TChunkIterator(Data, Mask.Size()); + return new TChunkIterator(Data, Mask.Size()); } TDataRef GetValue(size_t index) const override { @@ -398,7 +398,7 @@ private: TType value; for (size_t count = index + 1 - Cache.size(); count; --count) if (MaskIter.Next()) { - Fetched += ValueDecoder.Load(Fetched, Data.End() - Mask.Size(), value); + Fetched += ValueDecoder.Load(Fetched, Data.End() - Mask.Size(), value); Cache.push_back(value); } else { Cache.push_back(0); @@ -413,16 +413,16 @@ private: private: class TChunkIterator : public IChunkIterator { public: - TChunkIterator(const TDataRef& data, const size_t maskSize) + TChunkIterator(const TDataRef& data, const size_t maskSize) : Current(data.Data() + sizeof(TCodecSig)) - , End(data.End() - maskSize) + , End(data.End() - maskSize) , MaskIter(data) { } TDataRef Next() override { if (MaskIter.Next()) { TType value; - Current += ValueDecoder.Load(Current, End, value); + Current += ValueDecoder.Load(Current, End, value); return TDataRef((const char*)&value, sizeof(value), true); } return TDataRef(); @@ -430,7 +430,7 @@ private: TDataRef Peek() const override { if (MaskIter.IsNotNull()) { - const TType value = ValueDecoder.Peek(Current, End); + const TType value = ValueDecoder.Peek(Current, End); return TDataRef((const char*)&value, sizeof(value), true); } return TDataRef(); @@ -438,7 +438,7 @@ private: private: const char* Current; - const char* End; + const char* End; TDecoderMaskIterator<IsNullable> MaskIter; TValueDecoder ValueDecoder; }; diff --git a/ydb/core/persqueue/user_info.cpp b/ydb/core/persqueue/user_info.cpp index 6d3674c7b3..26811765e2 100644 --- a/ydb/core/persqueue/user_info.cpp +++ b/ydb/core/persqueue/user_info.cpp @@ -33,8 +33,8 @@ TUsersInfoStorage::TUsersInfoStorage( ui64 tabletId, const TString& topicName, ui32 partition, - const TTabletCountersBase& counters, - const NKikimrPQ::TPQTabletConfig& config, + const TTabletCountersBase& counters, + const NKikimrPQ::TPQTabletConfig& config, const TString& cloudId, const TString& dbId, const TString& folderId @@ -43,7 +43,7 @@ TUsersInfoStorage::TUsersInfoStorage( , TabletId(tabletId) , TopicName(topicName) , Partition(partition) - , Config(config) + , Config(config) , CloudId(cloudId) , DbId(dbId) , FolderId(folderId) @@ -56,7 +56,7 @@ void TUsersInfoStorage::Init(TActorId tabletActor, TActorId partitionActor) { Y_VERIFY(!PartitionActor); TabletActor = tabletActor; PartitionActor = partitionActor; - + for (auto& userInfoPair : UsersInfo) { auto& userInfo = userInfoPair.second; Y_VERIFY(!userInfo.ReadSpeedLimiter); @@ -69,11 +69,11 @@ void TUsersInfoStorage::ParseDeprecated(const TString& key, const TString& data, Y_VERIFY(key[TKeyPrefix::MarkPosition()] == TKeyPrefix::MarkUserDeprecated); TString user = key.substr(TKeyPrefix::MarkedSize()); - TUserInfo* userInfo = GetIfExists(user); - if (userInfo && userInfo->Parsed) { - return; - } - + TUserInfo* userInfo = GetIfExists(user); + if (userInfo && userInfo->Parsed) { + return; + } + ui64 offset = 0; ui32 gen = 0; ui32 step = 0; @@ -82,7 +82,7 @@ void TUsersInfoStorage::ParseDeprecated(const TString& key, const TString& data, Y_VERIFY(offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, offset); if (!userInfo) { - Create(ctx, user, 0, false, session, gen, step, static_cast<i64>(offset), 0, TInstant::Zero()); + Create(ctx, user, 0, false, session, gen, step, static_cast<i64>(offset), 0, TInstant::Zero()); } else { userInfo->Session = session; userInfo->Generation = gen; @@ -108,7 +108,7 @@ void TUsersInfoStorage::Parse(const TString& key, const TString& data, const TAc TUserInfo* userInfo = GetIfExists(user); if (!userInfo) { Create( - ctx, user, userData.GetReadRuleGeneration(), false, userData.GetSession(), + ctx, user, userData.GetReadRuleGeneration(), false, userData.GetSession(), userData.GetGeneration(), userData.GetStep(), offset, userData.GetOffsetRewindSum(), TInstant::Zero() ); } else { @@ -117,25 +117,25 @@ void TUsersInfoStorage::Parse(const TString& key, const TString& data, const TAc userInfo->Step = userData.GetStep(); userInfo->Offset = offset; userInfo->ReadOffsetRewindSum = userData.GetOffsetRewindSum(); - userInfo->ReadRuleGeneration = userData.GetReadRuleGeneration(); + userInfo->ReadRuleGeneration = userData.GetReadRuleGeneration(); } - userInfo = GetIfExists(user); - Y_VERIFY(userInfo); - userInfo->Parsed = true; + userInfo = GetIfExists(user); + Y_VERIFY(userInfo); + userInfo->Parsed = true; +} + +void TUsersInfoStorage::Remove(const TString& user, const TActorContext& ctx) { + auto it = UsersInfo.find(user); + Y_VERIFY(it != UsersInfo.end()); + it->second.Clear(ctx); + UsersInfo.erase(it); } -void TUsersInfoStorage::Remove(const TString& user, const TActorContext& ctx) { - auto it = UsersInfo.find(user); - Y_VERIFY(it != UsersInfo.end()); - it->second.Clear(ctx); - UsersInfo.erase(it); -} - TUserInfo& TUsersInfoStorage::GetOrCreate(const TString& user, const TActorContext& ctx) { Y_VERIFY(!user.empty()); auto it = UsersInfo.find(user); if (it == UsersInfo.end()) { - return Create(ctx, user, 0, false, "", 0, 0, 0, 0, TInstant::Zero()); + return Create(ctx, user, 0, false, "", 0, 0, 0, 0, TInstant::Zero()); } return it->second; } @@ -150,20 +150,20 @@ THashMap<TString, TUserInfo>& TUsersInfoStorage::GetAll() { } TUserInfo& TUsersInfoStorage::Create( - const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session, + const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, TInstant readFromTimestamp ) { - + ui64 burst = 1'000'000'000, speed = 1'000'000'000; - if (AppData(ctx)->PQConfig.GetQuotingConfig().GetPartitionReadQuotaIsTwiceWriteQuota()) { - burst = Config.GetPartitionConfig().GetBurstSize() * 2; - speed = Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() * 2; - } + if (AppData(ctx)->PQConfig.GetQuotingConfig().GetPartitionReadQuotaIsTwiceWriteQuota()) { + burst = Config.GetPartitionConfig().GetBurstSize() * 2; + speed = Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() * 2; + } auto result = UsersInfo.emplace( std::piecewise_construct, std::forward_as_tuple(user), - std::forward_as_tuple(ctx, CreateReadSpeedLimiter(user), user, readRuleGeneration, important, TopicName, Partition, session, - gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, CloudId, DbId, FolderId, burst, speed) + std::forward_as_tuple(ctx, CreateReadSpeedLimiter(user), user, readRuleGeneration, important, TopicName, Partition, session, + gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, CloudId, DbId, FolderId, burst, speed) ); Y_VERIFY(result.second); return result.first->second; @@ -171,17 +171,17 @@ TUserInfo& TUsersInfoStorage::Create( void TUsersInfoStorage::Clear(const TActorContext& ctx) { for (auto& userInfoPair : UsersInfo) { - userInfoPair.second.Clear(ctx); + userInfoPair.second.Clear(ctx); + } + UsersInfo.clear(); +} + +void TUserInfo::Clear(const TActorContext& ctx) { + if (ReadSpeedLimiter) { + ctx.Send(ReadSpeedLimiter->Actor, new TEvents::TEvPoisonPill()); } - UsersInfo.clear(); } -void TUserInfo::Clear(const TActorContext& ctx) { - if (ReadSpeedLimiter) { - ctx.Send(ReadSpeedLimiter->Actor, new TEvents::TEvPoisonPill()); - } -} - THolder<TReadSpeedLimiterHolder> TUsersInfoStorage::CreateReadSpeedLimiter(const TString& user) const { const auto& quotingConfig = AppData()->PQConfig.GetQuotingConfig(); if (TabletActor && quotingConfig.GetEnableQuoting() && quotingConfig.GetEnableReadQuoting()) { diff --git a/ydb/core/persqueue/user_info.h b/ydb/core/persqueue/user_info.h index 4ab03f1069..258e947c46 100644 --- a/ydb/core/persqueue/user_info.h +++ b/ydb/core/persqueue/user_info.h @@ -1,8 +1,8 @@ -#pragma once - -#include "working_time_counter.h" -#include "subscriber.h" -#include "percentile_counter.h" +#pragma once + +#include "working_time_counter.h" +#include "subscriber.h" +#include "percentile_counter.h" #include "read_speed_limiter.h" #include <ydb/core/base/counters.h> @@ -10,14 +10,14 @@ #include <ydb/core/protos/pqconfig.pb.h> #include <ydb/core/tablet/tablet_counters_protobuf.h> #include <ydb/library/persqueue/topic_parser/topic_parser.h> - + #include <library/cpp/sliding_window/sliding_window.h> #include <util/generic/set.h> -namespace NKikimr { -namespace NPQ { - +namespace NKikimr { +namespace NPQ { + namespace NDeprecatedUserData { // [offset:64bit][generation:32bit][step:32bit][session:other bytes] TBuffer Serialize(ui64 offset, ui32 gen, ui32 step, const TString& session); @@ -27,195 +27,195 @@ namespace NDeprecatedUserData { static const ui32 MAX_USER_TS_CACHE_SIZE = 10'000; static const ui64 MIN_TIMESTAMP_MS = 1'000'000'000'000ll; // around 2002 year static const TString CLIENTID_TO_READ_INTERNALLY = "$without_consumer"; - -typedef TProtobufTabletLabeledCounters<EClientLabeledCounters_descriptor> TUserLabeledCounters; - - -class TQuotaTracker { - - class TAvgTracker { - public: - TAvgTracker(ui64 duration) - : Duration(duration) - , Sum(0) - { - Y_VERIFY(duration > 0); - } - - void Update(i64 value, i64 ts) - { - Values.push_back(std::make_pair(value, ts)); - i64 newStart = ts - Duration; - if (Values.size() > 1) { - Sum += GetSum(Values.size() - 2); + +typedef TProtobufTabletLabeledCounters<EClientLabeledCounters_descriptor> TUserLabeledCounters; + + +class TQuotaTracker { + + class TAvgTracker { + public: + TAvgTracker(ui64 duration) + : Duration(duration) + , Sum(0) + { + Y_VERIFY(duration > 0); + } + + void Update(i64 value, i64 ts) + { + Values.push_back(std::make_pair(value, ts)); + i64 newStart = ts - Duration; + if (Values.size() > 1) { + Sum += GetSum(Values.size() - 2); Y_VERIFY(Values.back().second >= Values.back().second); - } - while (Values.size() > 2 && newStart > Values[1].second) { - Sum -= GetSum(0); - Values.pop_front(); - } - } - - ui64 GetAvg() { - return (Values.size() > 1 && Values.back().second > Values.front().second) - ? Max<i64>(0, Sum / (Values.back().second - Values.front().second)) - : 0; - } - - private: - - i64 GetSum(ui32 pos) { - Y_VERIFY(pos + 1 < Values.size()); - return (Values[pos + 1].first + Values[pos].first) * (Values[pos + 1].second - Values[pos].second) / 2; - } - - private: - ui64 Duration; - i64 Sum; - std::deque<std::pair<i64, i64>> Values; - }; - - -public: - TQuotaTracker(const ui64 maxBurst, const ui64 speedPerSecond, const TInstant& timestamp) - : AvailableSize(maxBurst) - , SpeedPerSecond(speedPerSecond) - , LastUpdateTime(timestamp) - , MaxBurst(maxBurst) + } + while (Values.size() > 2 && newStart > Values[1].second) { + Sum -= GetSum(0); + Values.pop_front(); + } + } + + ui64 GetAvg() { + return (Values.size() > 1 && Values.back().second > Values.front().second) + ? Max<i64>(0, Sum / (Values.back().second - Values.front().second)) + : 0; + } + + private: + + i64 GetSum(ui32 pos) { + Y_VERIFY(pos + 1 < Values.size()); + return (Values[pos + 1].first + Values[pos].first) * (Values[pos + 1].second - Values[pos].second) / 2; + } + + private: + ui64 Duration; + i64 Sum; + std::deque<std::pair<i64, i64>> Values; + }; + + +public: + TQuotaTracker(const ui64 maxBurst, const ui64 speedPerSecond, const TInstant& timestamp) + : AvailableSize(maxBurst) + , SpeedPerSecond(speedPerSecond) + , LastUpdateTime(timestamp) + , MaxBurst(maxBurst) , AvgMin(60'000) //avg avail in bytes per sec for last minute - , AvgSec(1000) //avg avail in bytes per sec - , QuotedTime(0) - {} - - ui64 GetQuotedTime() const { - return QuotedTime; - } - - void UpdateConfig(const ui64 maxBurst, const ui64 speedPerSecond) - { - SpeedPerSecond = speedPerSecond; - MaxBurst = maxBurst; - AvailableSize = maxBurst; - } - - void Update(const TInstant& timestamp); - - bool CanExaust() const { - return AvailableSize > 0; - } - - void Exaust(const ui64 size, const TInstant& timestamp) { - Update(timestamp); - AvailableSize -= (i64)size; - Update(timestamp); - } - - ui64 GetAvailableAvgSec(const TInstant& timestamp) { - Update(timestamp); - return AvgSec.GetAvg(); - - } - - ui64 GetAvailableAvgMin(const TInstant& timestamp) { - Update(timestamp); - return AvgMin.GetAvg(); - } - - ui64 GetTotalSpeed() const { - return SpeedPerSecond; - } - -private: - i64 AvailableSize; - ui64 SpeedPerSecond; - TInstant LastUpdateTime; - ui64 MaxBurst; - - TAvgTracker AvgMin; - TAvgTracker AvgSec; - - ui64 QuotedTime; -}; - + , AvgSec(1000) //avg avail in bytes per sec + , QuotedTime(0) + {} + + ui64 GetQuotedTime() const { + return QuotedTime; + } + + void UpdateConfig(const ui64 maxBurst, const ui64 speedPerSecond) + { + SpeedPerSecond = speedPerSecond; + MaxBurst = maxBurst; + AvailableSize = maxBurst; + } + + void Update(const TInstant& timestamp); + + bool CanExaust() const { + return AvailableSize > 0; + } + + void Exaust(const ui64 size, const TInstant& timestamp) { + Update(timestamp); + AvailableSize -= (i64)size; + Update(timestamp); + } + + ui64 GetAvailableAvgSec(const TInstant& timestamp) { + Update(timestamp); + return AvgSec.GetAvg(); + + } + + ui64 GetAvailableAvgMin(const TInstant& timestamp) { + Update(timestamp); + return AvgMin.GetAvg(); + } + + ui64 GetTotalSpeed() const { + return SpeedPerSecond; + } + +private: + i64 AvailableSize; + ui64 SpeedPerSecond; + TInstant LastUpdateTime; + ui64 MaxBurst; + + TAvgTracker AvgMin; + TAvgTracker AvgSec; + + ui64 QuotedTime; +}; + struct TReadSpeedLimiterHolder { TReadSpeedLimiterHolder(const TActorId& actor, const TTabletCountersBase& baseline) : Actor(actor) { Baseline.Populate(baseline); } - + TActorId Actor; TTabletCountersBase Baseline; }; -struct TUserInfo { +struct TUserInfo { THolder<TReadSpeedLimiterHolder> ReadSpeedLimiter; TString Session = ""; - ui32 Generation = 0; - ui32 Step = 0; - i64 Offset = 0; + ui32 Generation = 0; + ui32 Step = 0; + i64 Offset = 0; TInstant WriteTimestamp; TInstant CreateTimestamp; TInstant ReadTimestamp; - bool ActualTimestamps = false; - - i64 ReadOffset = -1; + bool ActualTimestamps = false; + + i64 ReadOffset = -1; TInstant ReadWriteTimestamp; TInstant ReadCreateTimestamp; ui64 ReadOffsetRewindSum = 0; - - bool ReadScheduled = false; - - //cache is used for storing WriteTime;CreateTime for offsets. - //When client will commit to new position, timestamps for this offset could be in cache - not insane client should read data before commit + + bool ReadScheduled = false; + + //cache is used for storing WriteTime;CreateTime for offsets. + //When client will commit to new position, timestamps for this offset could be in cache - not insane client should read data before commit std::deque<std::pair<ui64, std::pair<TInstant, TInstant>>> Cache; - - bool Important = false; + + bool Important = false; TInstant ReadFromTimestamp; - bool HasReadRule = false; - TUserLabeledCounters LabeledCounters; - TString User; - ui64 ReadRuleGeneration = 0; - TString Topic; - - std::deque<TSimpleSharedPtr<TEvPQ::TEvSetClientInfo>> UserActs; - - std::deque<std::pair<TReadInfo, ui64>> ReadRequests; - - TQuotaTracker ReadQuota; - - TWorkingTimeCounter Counter; - NKikimr::NPQ::TMultiCounter BytesRead; - NKikimr::NPQ::TMultiCounter MsgsRead; - TMap<TString, NKikimr::NPQ::TMultiCounter> BytesReadFromDC; - - ui32 ActiveReads; - ui32 Subscriptions; - i64 EndOffset; - + bool HasReadRule = false; + TUserLabeledCounters LabeledCounters; + TString User; + ui64 ReadRuleGeneration = 0; + TString Topic; + + std::deque<TSimpleSharedPtr<TEvPQ::TEvSetClientInfo>> UserActs; + + std::deque<std::pair<TReadInfo, ui64>> ReadRequests; + + TQuotaTracker ReadQuota; + + TWorkingTimeCounter Counter; + NKikimr::NPQ::TMultiCounter BytesRead; + NKikimr::NPQ::TMultiCounter MsgsRead; + TMap<TString, NKikimr::NPQ::TMultiCounter> BytesReadFromDC; + + ui32 ActiveReads; + ui32 Subscriptions; + i64 EndOffset; + TVector<NSlidingWindow::TSlidingWindow<NSlidingWindow::TSumOperation<ui64>>> AvgReadBytes; - - NSlidingWindow::TSlidingWindow<NSlidingWindow::TMaxOperation<ui64>> WriteLagMs; - + + NSlidingWindow::TSlidingWindow<NSlidingWindow::TMaxOperation<ui64>> WriteLagMs; + std::shared_ptr<TPercentileCounter> ReadTimeLag; bool DoExternalRead = false; - - bool WriteInProgress = false; - - bool Parsed = false; - - TUserInfo(THolder<TReadSpeedLimiterHolder> readSpeedLimiter, const TString& user, - const ui64 readRuleGeneration, bool important, const TString& topic, - const ui32 partition, bool doExternalRead, + + bool WriteInProgress = false; + + bool Parsed = false; + + TUserInfo(THolder<TReadSpeedLimiterHolder> readSpeedLimiter, const TString& user, + const ui64 readRuleGeneration, bool important, const TString& topic, + const ui32 partition, bool doExternalRead, ui64 burst = 1'000'000'000, ui64 speed = 1'000'000'000) : ReadSpeedLimiter(std::move(readSpeedLimiter)) , Important(important) , LabeledCounters(user + "/" + (important ? "1" : "0") + "/" + topic, partition) , User(user) - , ReadRuleGeneration(readRuleGeneration) + , ReadRuleGeneration(readRuleGeneration) , Topic(topic) - , ReadQuota(burst, speed, TAppData::TimeProvider->Now()) + , ReadQuota(burst, speed, TAppData::TimeProvider->Now()) , Counter(nullptr) , BytesRead() , MsgsRead() @@ -226,102 +226,102 @@ struct TUserInfo { , DoExternalRead(doExternalRead) { } - - void ForgetSubscription(const TInstant& now) { - if(Subscriptions > 0) - --Subscriptions; - UpdateReadingTimeAndState(now); - } - - void UpdateReadingState() { - Counter.UpdateState(Subscriptions > 0 || ActiveReads > 0 || ReadRequests.size() > 0); //no data for read or got read requests from client - } - + + void ForgetSubscription(const TInstant& now) { + if(Subscriptions > 0) + --Subscriptions; + UpdateReadingTimeAndState(now); + } + + void UpdateReadingState() { + Counter.UpdateState(Subscriptions > 0 || ActiveReads > 0 || ReadRequests.size() > 0); //no data for read or got read requests from client + } + void UpdateReadingTimeAndState(TInstant now) { - Counter.UpdateWorkingTime(now); - UpdateReadingState(); - - if (EndOffset == GetReadOffset()) { //no data to read, so emulate client empty reads - WriteLagMs.Update(0, now); - } - if (Subscriptions > 0) { + Counter.UpdateWorkingTime(now); + UpdateReadingState(); + + if (EndOffset == GetReadOffset()) { //no data to read, so emulate client empty reads + WriteLagMs.Update(0, now); + } + if (Subscriptions > 0) { ReadTimestamp = now; - } - } - + } + } + void ReadDone(const TActorContext& ctx, const TInstant& now, ui64 readSize, ui32 readCount, const TString& clientDC) { - if (BytesRead && !clientDC.empty()) { + if (BytesRead && !clientDC.empty()) { if (BytesRead) BytesRead.Inc(readSize); if (MsgsRead) MsgsRead.Inc(readCount); - auto it = BytesReadFromDC.find(clientDC); - if (it == BytesReadFromDC.end()) { - auto pos = Topic.find("--"); - if (pos != TString::npos) { - auto labels = GetLabels(clientDC, Topic.substr(pos + 2)); - if (!labels.empty()) { - labels.pop_back(); - } + auto it = BytesReadFromDC.find(clientDC); + if (it == BytesReadFromDC.end()) { + auto pos = Topic.find("--"); + if (pos != TString::npos) { + auto labels = GetLabels(clientDC, Topic.substr(pos + 2)); + if (!labels.empty()) { + labels.pop_back(); + } it = BytesReadFromDC.emplace(clientDC, TMultiCounter(GetServiceCounters(AppData(ctx)->Counters, "pqproxy|readSession"), labels, {{"ClientDC", clientDC}, {"Client", User}, {"ConsumerPath", NPersQueue::ConvertOldConsumerName(User, ctx)}}, {"BytesReadFromDC"}, true)).first; - } - } - if (it != BytesReadFromDC.end()) - it->second.Inc(readSize); - } - ReadQuota.Exaust(readSize, now); - for (auto& avg : AvgReadBytes) { + } + } + if (it != BytesReadFromDC.end()) + it->second.Inc(readSize); + } + ReadQuota.Exaust(readSize, now); + for (auto& avg : AvgReadBytes) { avg.Update(readSize, now); - } - Y_VERIFY(ActiveReads > 0); - --ActiveReads; - UpdateReadingTimeAndState(now); + } + Y_VERIFY(ActiveReads > 0); + --ActiveReads; + UpdateReadingTimeAndState(now); ReadTimestamp = now; - } - + } + TUserInfo( const TActorContext& ctx, THolder<TReadSpeedLimiterHolder> readSpeedLimiter, const TString& user, - const ui64 readRuleGeneration, const bool important, const TString& topic, const ui32 partition, const TString &session, + const ui64 readRuleGeneration, const bool important, const TString& topic, const ui32 partition, const TString &session, ui32 gen, ui32 step, i64 offset, const ui64 readOffsetRewindSum, const TString& dcId, TInstant readFromTimestamp, const TString& cloudId, const TString& dbId, const TString& folderId, ui64 burst = 1'000'000'000, ui64 speed = 1'000'000'000 ) : ReadSpeedLimiter(std::move(readSpeedLimiter)) , Session(session) - , Generation(gen) - , Step(step) - , Offset(offset) + , Generation(gen) + , Step(step) + , Offset(offset) , WriteTimestamp(TAppData::TimeProvider->Now()) , CreateTimestamp(TAppData::TimeProvider->Now()) , ReadTimestamp(TAppData::TimeProvider->Now()) - , ActualTimestamps(false) - , ReadOffset(-1) + , ActualTimestamps(false) + , ReadOffset(-1) , ReadWriteTimestamp(TAppData::TimeProvider->Now()) , ReadCreateTimestamp(TAppData::TimeProvider->Now()) , ReadOffsetRewindSum(readOffsetRewindSum) - , ReadScheduled(false) - , Important(important) + , ReadScheduled(false) + , Important(important) , ReadFromTimestamp(readFromTimestamp) - , HasReadRule(false) - , LabeledCounters(user + "/" +(important ? "1" : "0") + "/" + topic, partition) - , User(user) - , ReadRuleGeneration(readRuleGeneration) - , Topic(topic) - , ReadQuota(burst, speed, TAppData::TimeProvider->Now()) - , Counter(nullptr) - , ActiveReads(0) - , Subscriptions(0) - , EndOffset(0) + , HasReadRule(false) + , LabeledCounters(user + "/" +(important ? "1" : "0") + "/" + topic, partition) + , User(user) + , ReadRuleGeneration(readRuleGeneration) + , Topic(topic) + , ReadQuota(burst, speed, TAppData::TimeProvider->Now()) + , Counter(nullptr) + , ActiveReads(0) + , Subscriptions(0) + , EndOffset(0) , AvgReadBytes{{TDuration::Seconds(1), 1000}, {TDuration::Minutes(1), 1000}, {TDuration::Hours(1), 2000}, {TDuration::Days(1), 2000}} - , WriteLagMs(TDuration::Minutes(1), 100) - { + , WriteLagMs(TDuration::Minutes(1), 100) + { if (AppData(ctx)->Counters) { if (AppData()->PQConfig.GetTopicsAreFirstClassCitizen()) { SetupStreamCounters(ctx, dcId, ToString<ui32>(partition), topic, cloudId, dbId, folderId); @@ -330,9 +330,9 @@ struct TUserInfo { return; SetupTopicCounters(ctx, dcId, ToString<ui32>(partition), topic); } - } - } - + } + } + void SetupStreamCounters(const TActorContext& ctx, const TString& dcId, const TString& partition, const TString& topic, const TString& cloudId, const TString& dbId, const TString& folderId) { @@ -409,76 +409,76 @@ struct TUserInfo { } - void SetQuota(const ui64 maxBurst, const ui64 speed) { - ReadQuota.UpdateConfig(maxBurst, speed); - } - - void Clear(const TActorContext& ctx); - + void SetQuota(const ui64 maxBurst, const ui64 speed) { + ReadQuota.UpdateConfig(maxBurst, speed); + } + + void Clear(const TActorContext& ctx); + void UpdateReadOffset(const i64 offset, TInstant writeTimestamp, TInstant createTimestamp, TInstant now) { - ReadOffset = offset; + ReadOffset = offset; ReadWriteTimestamp = writeTimestamp; ReadCreateTimestamp = createTimestamp; WriteLagMs.Update((ReadWriteTimestamp - ReadCreateTimestamp).MilliSeconds(), ReadWriteTimestamp); - if (Subscriptions > 0) { + if (Subscriptions > 0) { ReadTimestamp = now; - } - } - + } + } + void AddTimestampToCache(const ui64 offset, TInstant writeTimestamp, TInstant createTimestamp, bool isUserRead, TInstant now) - { - if ((ui64)Max<i64>(Offset, 0) == offset) { + { + if ((ui64)Max<i64>(Offset, 0) == offset) { WriteTimestamp = writeTimestamp; CreateTimestamp = createTimestamp; - ActualTimestamps = true; - if (ReadOffset == -1) { + ActualTimestamps = true; + if (ReadOffset == -1) { UpdateReadOffset(offset, writeTimestamp, createTimestamp, now); - } - } - if (isUserRead) { + } + } + if (isUserRead) { UpdateReadOffset(offset, writeTimestamp, createTimestamp, now); - if (ReadTimeLag) { + if (ReadTimeLag) { ReadTimeLag->IncFor((now - createTimestamp).MilliSeconds(), 1); - } - } - if (!Cache.empty() && Cache.back().first >= offset) //already got data in cache - return; + } + } + if (!Cache.empty() && Cache.back().first >= offset) //already got data in cache + return; Cache.push_back(std::make_pair(offset, std::make_pair(writeTimestamp, createTimestamp))); - if (Cache.size() > MAX_USER_TS_CACHE_SIZE) - Cache.pop_front(); - } - - bool UpdateTimestampFromCache() - { - while (!Cache.empty() && (i64)Cache.front().first < Offset) { - Cache.pop_front(); - } - if (!Cache.empty() && Cache.front().first == (ui64)Max<i64>(Offset, 0)) { + if (Cache.size() > MAX_USER_TS_CACHE_SIZE) + Cache.pop_front(); + } + + bool UpdateTimestampFromCache() + { + while (!Cache.empty() && (i64)Cache.front().first < Offset) { + Cache.pop_front(); + } + if (!Cache.empty() && Cache.front().first == (ui64)Max<i64>(Offset, 0)) { WriteTimestamp = Cache.front().second.first; CreateTimestamp = Cache.front().second.second; - ActualTimestamps = true; - if (ReadOffset == -1) { + ActualTimestamps = true; + if (ReadOffset == -1) { UpdateReadOffset(Offset - 1, Cache.front().second.first, Cache.front().second.second, TAppData::TimeProvider->Now()); - } - return true; - } - return false; - } - - void SetImportant(bool important) - { - Important = important; - LabeledCounters.SetGroup(User + "/" + (important ? "1" : "0") + "/" + Topic); - } - + } + return true; + } + return false; + } + + void SetImportant(bool important) + { + Important = important; + LabeledCounters.SetGroup(User + "/" + (important ? "1" : "0") + "/" + Topic); + } + i64 GetReadOffset() const { - return ReadOffset == -1 ? Offset : (ReadOffset + 1); //+1 because we want to track first not readed offset + return ReadOffset == -1 ? Offset : (ReadOffset + 1); //+1 because we want to track first not readed offset } TInstant GetReadTimestamp() const { - return ReadTimestamp; - } - + return ReadTimestamp; + } + TInstant GetWriteTimestamp() const { return Offset == EndOffset ? TAppData::TimeProvider->Now() : WriteTimestamp; } @@ -490,25 +490,25 @@ struct TUserInfo { TInstant GetReadWriteTimestamp() const { TInstant ts = ReadOffset == -1 ? WriteTimestamp : ReadWriteTimestamp; ts = GetReadOffset() >= EndOffset ? TAppData::TimeProvider->Now() : ts; - return ts; + return ts; + } + + ui64 GetWriteLagMs() const { + return WriteLagMs.GetValue(); } - ui64 GetWriteLagMs() const { - return WriteLagMs.GetValue(); - } - TInstant GetReadCreateTimestamp() const { TInstant ts = ReadOffset == -1 ? CreateTimestamp : ReadCreateTimestamp; ts = GetReadOffset() >= EndOffset ? TAppData::TimeProvider->Now() : ts; - return ts; + return ts; } - -}; - + +}; + class TUsersInfoStorage { public: TUsersInfoStorage(TString dcId, ui64 tabletId, const TString& topicName, ui32 partition, - const TTabletCountersBase& counters, const NKikimrPQ::TPQTabletConfig& config, + const TTabletCountersBase& counters, const NKikimrPQ::TPQTabletConfig& config, const TString& CloudId, const TString& DbId, const TString& FolderId); void Init(TActorId tabletActor, TActorId partitionActor); @@ -519,21 +519,21 @@ public: TUserInfo& GetOrCreate(const TString& user, const TActorContext& ctx); TUserInfo* GetIfExists(const TString& user); - void UpdateConfig(const NKikimrPQ::TPQTabletConfig& config) { - Config = config; - } - + void UpdateConfig(const NKikimrPQ::TPQTabletConfig& config) { + Config = config; + } + THashMap<TString, TUserInfo>& GetAll(); TUserInfo& Create( - const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString &session, + const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString &session, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, TInstant readFromTimestamp ); - + void Clear(const TActorContext& ctx); - void Remove(const TString& user, const TActorContext& ctx); - + void Remove(const TString& user, const TActorContext& ctx); + private: THolder<TReadSpeedLimiterHolder> CreateReadSpeedLimiter(const TString& user) const; @@ -548,12 +548,12 @@ private: TMaybe<TActorId> TabletActor; TMaybe<TActorId> PartitionActor; - NKikimrPQ::TPQTabletConfig Config; + NKikimrPQ::TPQTabletConfig Config; TString CloudId; TString DbId; TString FolderId; }; -} //NPQ -} //NKikimr +} //NPQ +} //NKikimr diff --git a/ydb/core/persqueue/ut/ya.make b/ydb/core/persqueue/ut/ya.make index 4093b34666..7cbb0b9496 100644 --- a/ydb/core/persqueue/ut/ya.make +++ b/ydb/core/persqueue/ut/ya.make @@ -1,12 +1,12 @@ UNITTEST_FOR(ydb/core/persqueue) - + OWNER( alexnick g:kikimr g:logbroker ) - -FORK_SUBTESTS() + +FORK_SUBTESTS() SPLIT_FACTOR(40) @@ -19,24 +19,24 @@ ELSE() TIMEOUT(600) ENDIF() -PEERDIR( +PEERDIR( library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion ydb/core/testlib ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils -) - +) + YQL_LAST_ABI_VERSION() -SRCS( +SRCS( internals_ut.cpp mirrorer_ut.cpp - pq_ut.cpp + pq_ut.cpp type_codecs_ut.cpp - pq_ut.h + pq_ut.h sourceid_ut.cpp user_info_ut.cpp -) - -END() +) + +END() diff --git a/ydb/core/persqueue/ut_slow/ya.make b/ydb/core/persqueue/ut_slow/ya.make index 412f37ac49..9e018d205b 100644 --- a/ydb/core/persqueue/ut_slow/ya.make +++ b/ydb/core/persqueue/ut_slow/ya.make @@ -1,33 +1,33 @@ UNITTEST_FOR(ydb/core/persqueue) - + OWNER( alexnick g:kikimr g:logbroker ) - -FORK_SUBTESTS() -SPLIT_FACTOR(20) +FORK_SUBTESTS() + +SPLIT_FACTOR(20) -SIZE(LARGE) +SIZE(LARGE) -TAG(ya:fat) +TAG(ya:fat) -TIMEOUT(3600) - -PEERDIR( +TIMEOUT(3600) + +PEERDIR( library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion ydb/core/testlib -) - +) + YQL_LAST_ABI_VERSION() -SRCS( - pq_ut_slow.cpp - pq_ut.h -) - -END() +SRCS( + pq_ut_slow.cpp + pq_ut.h +) + +END() diff --git a/ydb/core/persqueue/working_time_counter.h b/ydb/core/persqueue/working_time_counter.h index 0c454cbc8e..4f4d81d263 100644 --- a/ydb/core/persqueue/working_time_counter.h +++ b/ydb/core/persqueue/working_time_counter.h @@ -1,41 +1,41 @@ -#pragma once - +#pragma once + #include <ydb/core/protos/counters_pq.pb.h> - + #include <library/cpp/monlib/dynamic_counters/counters.h> -namespace NKikimr { -namespace NPQ { - +namespace NKikimr { +namespace NPQ { + -class TWorkingTimeCounter { -private: - bool IsInWorkingState; +class TWorkingTimeCounter { +private: + bool IsInWorkingState; NMonitoring::TDynamicCounters::TCounterPtr WorkingTimeMicroSec; - TInstant LastUpdateTimestamp; -public: - - TWorkingTimeCounter(NMonitoring::TDynamicCounters::TCounterPtr counter) - : IsInWorkingState(false) + TInstant LastUpdateTimestamp; +public: + + TWorkingTimeCounter(NMonitoring::TDynamicCounters::TCounterPtr counter) + : IsInWorkingState(false) , WorkingTimeMicroSec(counter) - {} - - void UpdateState(bool state) { - IsInWorkingState = state; - } - + {} + + void UpdateState(bool state) { + IsInWorkingState = state; + } + void UpdateWorkingTime(const TInstant now) { if (!WorkingTimeMicroSec) //no counter - return; - if (IsInWorkingState && LastUpdateTimestamp > TInstant::Zero()) { + return; + if (IsInWorkingState && LastUpdateTimestamp > TInstant::Zero()) { TDuration res = now - LastUpdateTimestamp; (*WorkingTimeMicroSec) += res.MicroSeconds(); LastUpdateTimestamp += res; - } else { - LastUpdateTimestamp = now; - } - } - + } else { + LastUpdateTimestamp = now; + } + } + void SetCounter(NMonitoring::TDynamicCounterPtr counter, const TVector<std::pair<TString, TString>>& subgroups, const std::tuple<TString, TString, bool>& expiring) { @@ -46,8 +46,8 @@ public: WorkingTimeMicroSec = counter->GetExpiringNamedCounter(std::get<0>(expiring), std::get<1>(expiring), std::get<2>(expiring)); - } -}; - -} //NPQ -} //NKikimr + } +}; + +} //NPQ +} //NKikimr diff --git a/ydb/core/persqueue/ya.make b/ydb/core/persqueue/ya.make index 7276fb46ab..50e89676ef 100644 --- a/ydb/core/persqueue/ya.make +++ b/ydb/core/persqueue/ya.make @@ -1,38 +1,38 @@ -LIBRARY() - -OWNER( - alexnick +LIBRARY() + +OWNER( + alexnick g:kikimr g:logbroker -) - -SRCS( +) + +SRCS( cluster_tracker.cpp blob.cpp event_helpers.cpp header.cpp percentile_counter.cpp - pq.cpp + pq.cpp pq_database.cpp - pq_impl.cpp + pq_impl.cpp sourceid.cpp mirrorer.cpp mirrorer.h ownerinfo.cpp - partition.cpp + partition.cpp pq_l2_cache.cpp - read_balancer.cpp + read_balancer.cpp read_speed_limiter.cpp subscriber.cpp type_codecs_defs.cpp user_info.cpp write_meta.cpp - actor_persqueue_client_iface.h -) - + actor_persqueue_client_iface.h +) + GENERATE_ENUM_SERIALIZATION(sourceid.h) -PEERDIR( +PEERDIR( library/cpp/actors/core library/cpp/html/pcdata library/cpp/json @@ -50,9 +50,9 @@ PEERDIR( ydb/library/persqueue/topic_parser ydb/public/lib/base ydb/public/sdk/cpp/client/ydb_persqueue_core -) - -END() +) + +END() RECURSE_FOR_TESTS( ut diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 98db6fc857..d64169d4fc 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -343,7 +343,7 @@ message TBootstrap { optional uint64 ProxySchemeCacheNodes = 2; optional uint64 ProxySchemeCacheDistNodes = 3; optional NKikimrTablet.TCompactionBroker CompactionBroker = 4; - optional NKikimrNodeLimits.TNodeLimitsConfig NodeLimits = 5; + optional NKikimrNodeLimits.TNodeLimitsConfig NodeLimits = 5; optional NKikimrResourceBroker.TResourceBrokerConfig ResourceBroker = 6; optional NKikimrSharedCache.TSharedCacheConfig SharedCacheConfig = 7; repeated NKikimrSchemeOp.TResourceProfile ResourceProfiles = 8; diff --git a/ydb/core/protos/counters.proto b/ydb/core/protos/counters.proto index e0ded87179..05a8c93d90 100644 --- a/ydb/core/protos/counters.proto +++ b/ydb/core/protos/counters.proto @@ -22,37 +22,37 @@ message TTxTypeOptions { optional string Name = 1; } -message TLabeledCounterOptions { - enum ECounterType { - CT_SIMPLE = 1; - CT_TIMELAG = 2; - CT_DERIV = 3; - } - - enum EAggregateFunc { //must be consistent with TTabletLabeledCountersBase::EAggreagateFunc - EAF_MAX = 1; - EAF_MIN = 2; - EAF_SUM = 3; - } - - optional string Name = 1; - optional EAggregateFunc AggrFunc = 2; - optional ECounterType Type = 3 [default = CT_SIMPLE]; -} - -message TLabeledCounterGroupNamesOptions { - repeated string Names = 1; -} - +message TLabeledCounterOptions { + enum ECounterType { + CT_SIMPLE = 1; + CT_TIMELAG = 2; + CT_DERIV = 3; + } + + enum EAggregateFunc { //must be consistent with TTabletLabeledCountersBase::EAggreagateFunc + EAF_MAX = 1; + EAF_MIN = 2; + EAF_SUM = 3; + } + + optional string Name = 1; + optional EAggregateFunc AggrFunc = 2; + optional ECounterType Type = 3 [default = CT_SIMPLE]; +} + +message TLabeledCounterGroupNamesOptions { + repeated string Names = 1; +} + extend google.protobuf.EnumValueOptions { optional TCounterOptions CounterOpts = 56672; optional TTxTypeOptions TxTypeOpts = 56673; - optional TLabeledCounterOptions LabeledCounterOpts = 56676; + optional TLabeledCounterOptions LabeledCounterOpts = 56676; } extend google.protobuf.EnumOptions { optional TCounterOptions GlobalCounterOpts = 56674; - optional TLabeledCounterGroupNamesOptions GlobalGroupNamesOpts = 56677; + optional TLabeledCounterGroupNamesOptions GlobalGroupNamesOpts = 56677; } extend google.protobuf.FileOptions { diff --git a/ydb/core/protos/counters_pq.proto b/ydb/core/protos/counters_pq.proto index 2ef5833ca4..7233758135 100644 --- a/ydb/core/protos/counters_pq.proto +++ b/ydb/core/protos/counters_pq.proto @@ -1,236 +1,236 @@ import "ydb/core/protos/counters.proto"; - -package NKikimr.NPQ; - -option java_package = "ru.yandex.kikimr.proto"; - -option (TabletTypeName) = "PQ"; // Used as prefix for all counters - -enum ECumulativeCounters { - COUNTER_PQ_BAD_REQUEST = 0 [(CounterOpts) = {Name: "BadRequest"}]; - COUNTER_PQ_INITIALIZING = 1 [(CounterOpts) = {Name: "Initializing"}]; - - COUNTER_PQ_CREATE_SESSION_OK = 2 [(CounterOpts) = {Name: "CreateSessionOk"}]; - COUNTER_PQ_CREATE_SESSION_ERROR = 3 [(CounterOpts) = {Name: "CreateSessionError"}]; - - COUNTER_PQ_DELETE_SESSION_OK = 4 [(CounterOpts) = {Name: "DeleteSessionOk"}]; - COUNTER_PQ_DELETE_SESSION_ERROR = 5 [(CounterOpts) = {Name: "DeleteSessionError"}]; - - COUNTER_PQ_SET_CLIENT_OFFSET_OK = 6 [(CounterOpts) = {Name: "SetClientOffsetOk"}]; - COUNTER_PQ_SET_CLIENT_OFFSET_ERROR = 7 [(CounterOpts) = {Name: "SetClientOffsetError"}]; - - COUNTER_PQ_GET_CLIENT_OFFSET_OK = 8 [(CounterOpts) = {Name: "GetClientOffsetOk"}]; - COUNTER_PQ_GET_CLIENT_OFFSET_ERROR = 9 [(CounterOpts) = {Name: "GetClientOffsetError"}]; - - COUNTER_PQ_READ_OK = 10 [(CounterOpts) = {Name: "ReadOk"}]; - COUNTER_PQ_READ_ERROR = 11 [(CounterOpts) = {Name: "ReadError"}]; - COUNTER_PQ_READ_ERROR_NO_SESSION = 12 [(CounterOpts) = {Name: "ReadErrorNoSession"}]; - COUNTER_PQ_READ_ERROR_SMALL_OFFSET = 13 [(CounterOpts) = {Name: "ReadErrorSmallOffset"}]; - COUNTER_PQ_READ_ERROR_BIG_OFFSET = 14 [(CounterOpts) = {Name: "ReadErrorBigOffset"}]; - COUNTER_PQ_READ_ERROR_IN_PROGRESS = 15 [(CounterOpts) = {Name: "ReadErrorInProgress"}]; - COUNTER_PQ_READ_HEAD_ONLY_OK = 16 [(CounterOpts) = {Name: "ReadHeadOnlyOk"}]; - COUNTER_PQ_READ_SUBSCRIPTION_OK = 17 [(CounterOpts) = {Name: "ReadSubscriptionOk"}]; - COUNTER_PQ_READ_SUBSCRIPTION_ERROR = 18 [(CounterOpts) = {Name: "ReadSubscriptionError"}]; - COUNTER_PQ_READ_SUBSCRIPTION_TIMEOUT = 19 [(CounterOpts) = {Name: "ReadSubscriptionTimeout"}]; - COUNTER_PQ_READ_BYTES = 20 [(CounterOpts) = {Name: "BytesRead"}]; - - COUNTER_PQ_WRITE_BYTES_OK = 21 [(CounterOpts) = {Name: "BytesWriteOk"}]; - COUNTER_PQ_WRITE_BYTES_ALREADY = 22 [(CounterOpts) = {Name: "BytesWriteAlready"}]; - COUNTER_PQ_WRITE_BYTES_ERROR = 23 [(CounterOpts) = {Name: "BytesWriteError"}]; - - COUNTER_PQ_WRITE_OK = 24 [(CounterOpts) = {Name: "WriteOk"}]; - COUNTER_PQ_WRITE_ALREADY = 25 [(CounterOpts) = {Name: "WriteAlready"}]; - COUNTER_PQ_WRITE_ERROR = 26 [(CounterOpts) = {Name: "WriteError"}]; - - COUNTER_PQ_WRITE_CYCLE_BYTES_TOTAL = 27 [(CounterOpts) = {Name: "BytesWriteCycleTotal"}]; - - COUNTER_PQ_WRITE_TIMESTAMP_CACHE_HIT = 28 [(CounterOpts) = {Name: "TimestampWriteCacheHit"}]; - COUNTER_PQ_WRITE_TIMESTAMP_CACHE_MISS = 29 [(CounterOpts) = {Name: "TimestampWriteCacheMiss"}]; - COUNTER_PQ_WRITE_TIMESTAMP_ERROR = 30 [(CounterOpts) = {Name: "TimestampWriteReadError"}]; - COUNTER_PQ_WRITE_TIMESTAMP_OFFSET_IS_LOST = 31 [(CounterOpts) = {Name: "TimestampWriteOffsetIsLost"}]; - - COUNTER_PQ_SID_CREATED = 32 [(CounterOpts) = {Name: "SourceIdCreated"}]; - - COUNTER_PQ_WRITE_BYTES_SMALL_OFFSET = 33 [(CounterOpts) = {Name: "BytesWriteSmallOffset"}]; - COUNTER_PQ_WRITE_SMALL_OFFSET = 34 [(CounterOpts) = {Name: "WriteSmallOffset"}]; - - COUNTER_PQ_TABLET_CPU_USAGE = 35 [(CounterOpts) = {Name: "CPUUsage"}]; + +package NKikimr.NPQ; + +option java_package = "ru.yandex.kikimr.proto"; + +option (TabletTypeName) = "PQ"; // Used as prefix for all counters + +enum ECumulativeCounters { + COUNTER_PQ_BAD_REQUEST = 0 [(CounterOpts) = {Name: "BadRequest"}]; + COUNTER_PQ_INITIALIZING = 1 [(CounterOpts) = {Name: "Initializing"}]; + + COUNTER_PQ_CREATE_SESSION_OK = 2 [(CounterOpts) = {Name: "CreateSessionOk"}]; + COUNTER_PQ_CREATE_SESSION_ERROR = 3 [(CounterOpts) = {Name: "CreateSessionError"}]; + + COUNTER_PQ_DELETE_SESSION_OK = 4 [(CounterOpts) = {Name: "DeleteSessionOk"}]; + COUNTER_PQ_DELETE_SESSION_ERROR = 5 [(CounterOpts) = {Name: "DeleteSessionError"}]; + + COUNTER_PQ_SET_CLIENT_OFFSET_OK = 6 [(CounterOpts) = {Name: "SetClientOffsetOk"}]; + COUNTER_PQ_SET_CLIENT_OFFSET_ERROR = 7 [(CounterOpts) = {Name: "SetClientOffsetError"}]; + + COUNTER_PQ_GET_CLIENT_OFFSET_OK = 8 [(CounterOpts) = {Name: "GetClientOffsetOk"}]; + COUNTER_PQ_GET_CLIENT_OFFSET_ERROR = 9 [(CounterOpts) = {Name: "GetClientOffsetError"}]; + + COUNTER_PQ_READ_OK = 10 [(CounterOpts) = {Name: "ReadOk"}]; + COUNTER_PQ_READ_ERROR = 11 [(CounterOpts) = {Name: "ReadError"}]; + COUNTER_PQ_READ_ERROR_NO_SESSION = 12 [(CounterOpts) = {Name: "ReadErrorNoSession"}]; + COUNTER_PQ_READ_ERROR_SMALL_OFFSET = 13 [(CounterOpts) = {Name: "ReadErrorSmallOffset"}]; + COUNTER_PQ_READ_ERROR_BIG_OFFSET = 14 [(CounterOpts) = {Name: "ReadErrorBigOffset"}]; + COUNTER_PQ_READ_ERROR_IN_PROGRESS = 15 [(CounterOpts) = {Name: "ReadErrorInProgress"}]; + COUNTER_PQ_READ_HEAD_ONLY_OK = 16 [(CounterOpts) = {Name: "ReadHeadOnlyOk"}]; + COUNTER_PQ_READ_SUBSCRIPTION_OK = 17 [(CounterOpts) = {Name: "ReadSubscriptionOk"}]; + COUNTER_PQ_READ_SUBSCRIPTION_ERROR = 18 [(CounterOpts) = {Name: "ReadSubscriptionError"}]; + COUNTER_PQ_READ_SUBSCRIPTION_TIMEOUT = 19 [(CounterOpts) = {Name: "ReadSubscriptionTimeout"}]; + COUNTER_PQ_READ_BYTES = 20 [(CounterOpts) = {Name: "BytesRead"}]; + + COUNTER_PQ_WRITE_BYTES_OK = 21 [(CounterOpts) = {Name: "BytesWriteOk"}]; + COUNTER_PQ_WRITE_BYTES_ALREADY = 22 [(CounterOpts) = {Name: "BytesWriteAlready"}]; + COUNTER_PQ_WRITE_BYTES_ERROR = 23 [(CounterOpts) = {Name: "BytesWriteError"}]; + + COUNTER_PQ_WRITE_OK = 24 [(CounterOpts) = {Name: "WriteOk"}]; + COUNTER_PQ_WRITE_ALREADY = 25 [(CounterOpts) = {Name: "WriteAlready"}]; + COUNTER_PQ_WRITE_ERROR = 26 [(CounterOpts) = {Name: "WriteError"}]; + + COUNTER_PQ_WRITE_CYCLE_BYTES_TOTAL = 27 [(CounterOpts) = {Name: "BytesWriteCycleTotal"}]; + + COUNTER_PQ_WRITE_TIMESTAMP_CACHE_HIT = 28 [(CounterOpts) = {Name: "TimestampWriteCacheHit"}]; + COUNTER_PQ_WRITE_TIMESTAMP_CACHE_MISS = 29 [(CounterOpts) = {Name: "TimestampWriteCacheMiss"}]; + COUNTER_PQ_WRITE_TIMESTAMP_ERROR = 30 [(CounterOpts) = {Name: "TimestampWriteReadError"}]; + COUNTER_PQ_WRITE_TIMESTAMP_OFFSET_IS_LOST = 31 [(CounterOpts) = {Name: "TimestampWriteOffsetIsLost"}]; + + COUNTER_PQ_SID_CREATED = 32 [(CounterOpts) = {Name: "SourceIdCreated"}]; + + COUNTER_PQ_WRITE_BYTES_SMALL_OFFSET = 33 [(CounterOpts) = {Name: "BytesWriteSmallOffset"}]; + COUNTER_PQ_WRITE_SMALL_OFFSET = 34 [(CounterOpts) = {Name: "WriteSmallOffset"}]; + + COUNTER_PQ_TABLET_CPU_USAGE = 35 [(CounterOpts) = {Name: "CPUUsage"}]; COUNTER_PQ_TABLET_NETWORK_BYTES_USAGE = 36 [(CounterOpts) = {Name: "NetworkUsage"}]; -} - -enum ESimpleCounters { - COUNTER_PQ_TABLET_CACHE_SIZE = 0 [(CounterOpts) = {Name: "TabletCacheSizeBytes"}]; - COUNTER_PQ_TABLET_CACHE_COUNT = 1 [(CounterOpts) = {Name: "TabletCacheSizeBlobs"}]; +} + +enum ESimpleCounters { + COUNTER_PQ_TABLET_CACHE_SIZE = 0 [(CounterOpts) = {Name: "TabletCacheSizeBytes"}]; + COUNTER_PQ_TABLET_CACHE_COUNT = 1 [(CounterOpts) = {Name: "TabletCacheSizeBlobs"}]; COUNTER_PQ_TABLET_CACHED_ON_READ = 2 [(CounterOpts) = {Name: "TabletCachedOnRead"}]; COUNTER_PQ_TABLET_CACHED_ON_WRATE = 3 [(CounterOpts) = {Name: "TabletCachedOnWrite"}]; - COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE = 4 [(CounterOpts) = {Name: "ReservedBytesSize"}]; - - COUNTER_PQ_TABLET_OPENED_PIPES = 5 [(CounterOpts) = {Name: "OpenedPipes"}]; - COUNTER_PQ_TABLET_INFLIGHT = 6 [(CounterOpts) = {Name: "RequestsInflight"}]; - -} - -enum EPercentileCounters { - option (GlobalCounterOpts) = { - Ranges { Value: 0 Name: "0 ms" } - Ranges { Value: 50 Name: "50 ms" } - Ranges { Value: 100 Name: "100 ms" } - Ranges { Value: 200 Name: "200 ms" } - Ranges { Value: 300 Name: "300 ms" } - Ranges { Value: 400 Name: "400 ms" } - Ranges { Value: 500 Name: "500 ms" } - Ranges { Value: 750 Name: "750 ms" } - Ranges { Value: 1000 Name: "1000 ms" } - Ranges { Value: 1500 Name: "1500 ms" } - Ranges { Value: 2000 Name: "2000 ms" } - Ranges { Value: 5000 Name: "5000 ms" } - Ranges { Value: 10000 Name: "10000 ms" } - Ranges { Value: 20000 Name: "20000 ms" } - Ranges { Value: 30000 Name: "30000 ms" } - Ranges { Value: 60000 Name: "60000 ms" } - Ranges { Value: 120000 Name: "120000 ms" } - }; - - COUNTER_LATENCY_PQ_GET_MAX_SEQ_NO = 0 [(CounterOpts) = {Name: "LatencyGetMaxSeqNo"}]; - COUNTER_LATENCY_PQ_DELETE_SESSION = 1 [(CounterOpts) = {Name: "LatencyDeleteSession"}]; - COUNTER_LATENCY_PQ_CREATE_SESSION = 2 [(CounterOpts) = {Name: "LatencyCreateSession"}]; - COUNTER_LATENCY_PQ_SET_OFFSET = 3 [(CounterOpts) = {Name: "LatencySetOffset"}]; - COUNTER_LATENCY_PQ_GET_OFFSET = 4 [(CounterOpts) = {Name: "LatencyGetOffset"}]; - COUNTER_LATENCY_PQ_WRITE = 5 [(CounterOpts) = {Name: "LatencyWrite"}]; - COUNTER_LATENCY_PQ_READ = 6 [(CounterOpts) = {Name: "LatencyRead"}]; - COUNTER_LATENCY_PQ_READ_OK = 7 [(CounterOpts) = {Name: "LatencyReadOk"}]; - COUNTER_LATENCY_PQ_READ_ERROR = 8 [(CounterOpts) = {Name: "LatencyReadError"}]; - COUNTER_LATENCY_PQ_READ_HEAD_ONLY = 9 [(CounterOpts) = {Name: "LatencyReadHeadOnly"}]; - COUNTER_LATENCY_PQ_GET_OWNERSHIP = 10 [(CounterOpts) = {Name: "LatencyGetOwnership"}]; - - COUNTER_LATENCY_PQ_WRITE_CYCLE = 11 [(CounterOpts) = {Name: "LatencyWriteCycle"}]; - COUNTER_LATENCY_PQ_INIT = 12 [(CounterOpts) = {Name: "LatencyInit"}]; - - COUNTER_PQ_WRITE_CYCLE_BYTES = 13 [(CounterOpts) = {Name: "WriteCycleBytes" - Ranges { Value: 0 Name: "0 Kb" } - Ranges { Value: 102400 Name: "100 Kb" } - Ranges { Value: 204800 Name: "200 Kb" } - Ranges { Value: 524288 Name: "512 Kb" } - Ranges { Value: 1048576 Name: "1 Mb" } - Ranges { Value: 2097152 Name: "2 Mb" } - Ranges { Value: 4194304 Name: "4 Mb" } - Ranges { Value: 6291456 Name: "6 Mb" } - Ranges { Value: 8388608 Name: "8 Mb" } - } - ]; - - COUNTER_PQ_WRITE_NEW_BYTES = 14 [(CounterOpts) = {Name: "WriteNewBytes" - Ranges { Value: 0 Name: "0 Kb" } - Ranges { Value: 102400 Name: "100 Kb" } - Ranges { Value: 204800 Name: "200 Kb" } - Ranges { Value: 524288 Name: "512 Kb" } - Ranges { Value: 1048576 Name: "1 Mb" } - Ranges { Value: 2097152 Name: "2 Mb" } - Ranges { Value: 4194304 Name: "4 Mb" } - Ranges { Value: 6291456 Name: "6 Mb" } - Ranges { Value: 8388608 Name: "8 Mb" } - } - ]; - - COUNTER_LATENCY_PQ_RECEIVE_QUEUE = 15 [(CounterOpts) = {Name: "LatencyReciveQueue"}]; - COUNTER_LATENCY_PQ_RESERVE_BYTES = 16 [(CounterOpts) = {Name: "LatencyReserveBytes"}]; + COUNTER_PQ_TABLET_RESERVED_BYTES_SIZE = 4 [(CounterOpts) = {Name: "ReservedBytesSize"}]; + + COUNTER_PQ_TABLET_OPENED_PIPES = 5 [(CounterOpts) = {Name: "OpenedPipes"}]; + COUNTER_PQ_TABLET_INFLIGHT = 6 [(CounterOpts) = {Name: "RequestsInflight"}]; + +} + +enum EPercentileCounters { + option (GlobalCounterOpts) = { + Ranges { Value: 0 Name: "0 ms" } + Ranges { Value: 50 Name: "50 ms" } + Ranges { Value: 100 Name: "100 ms" } + Ranges { Value: 200 Name: "200 ms" } + Ranges { Value: 300 Name: "300 ms" } + Ranges { Value: 400 Name: "400 ms" } + Ranges { Value: 500 Name: "500 ms" } + Ranges { Value: 750 Name: "750 ms" } + Ranges { Value: 1000 Name: "1000 ms" } + Ranges { Value: 1500 Name: "1500 ms" } + Ranges { Value: 2000 Name: "2000 ms" } + Ranges { Value: 5000 Name: "5000 ms" } + Ranges { Value: 10000 Name: "10000 ms" } + Ranges { Value: 20000 Name: "20000 ms" } + Ranges { Value: 30000 Name: "30000 ms" } + Ranges { Value: 60000 Name: "60000 ms" } + Ranges { Value: 120000 Name: "120000 ms" } + }; + + COUNTER_LATENCY_PQ_GET_MAX_SEQ_NO = 0 [(CounterOpts) = {Name: "LatencyGetMaxSeqNo"}]; + COUNTER_LATENCY_PQ_DELETE_SESSION = 1 [(CounterOpts) = {Name: "LatencyDeleteSession"}]; + COUNTER_LATENCY_PQ_CREATE_SESSION = 2 [(CounterOpts) = {Name: "LatencyCreateSession"}]; + COUNTER_LATENCY_PQ_SET_OFFSET = 3 [(CounterOpts) = {Name: "LatencySetOffset"}]; + COUNTER_LATENCY_PQ_GET_OFFSET = 4 [(CounterOpts) = {Name: "LatencyGetOffset"}]; + COUNTER_LATENCY_PQ_WRITE = 5 [(CounterOpts) = {Name: "LatencyWrite"}]; + COUNTER_LATENCY_PQ_READ = 6 [(CounterOpts) = {Name: "LatencyRead"}]; + COUNTER_LATENCY_PQ_READ_OK = 7 [(CounterOpts) = {Name: "LatencyReadOk"}]; + COUNTER_LATENCY_PQ_READ_ERROR = 8 [(CounterOpts) = {Name: "LatencyReadError"}]; + COUNTER_LATENCY_PQ_READ_HEAD_ONLY = 9 [(CounterOpts) = {Name: "LatencyReadHeadOnly"}]; + COUNTER_LATENCY_PQ_GET_OWNERSHIP = 10 [(CounterOpts) = {Name: "LatencyGetOwnership"}]; + + COUNTER_LATENCY_PQ_WRITE_CYCLE = 11 [(CounterOpts) = {Name: "LatencyWriteCycle"}]; + COUNTER_LATENCY_PQ_INIT = 12 [(CounterOpts) = {Name: "LatencyInit"}]; + + COUNTER_PQ_WRITE_CYCLE_BYTES = 13 [(CounterOpts) = {Name: "WriteCycleBytes" + Ranges { Value: 0 Name: "0 Kb" } + Ranges { Value: 102400 Name: "100 Kb" } + Ranges { Value: 204800 Name: "200 Kb" } + Ranges { Value: 524288 Name: "512 Kb" } + Ranges { Value: 1048576 Name: "1 Mb" } + Ranges { Value: 2097152 Name: "2 Mb" } + Ranges { Value: 4194304 Name: "4 Mb" } + Ranges { Value: 6291456 Name: "6 Mb" } + Ranges { Value: 8388608 Name: "8 Mb" } + } + ]; + + COUNTER_PQ_WRITE_NEW_BYTES = 14 [(CounterOpts) = {Name: "WriteNewBytes" + Ranges { Value: 0 Name: "0 Kb" } + Ranges { Value: 102400 Name: "100 Kb" } + Ranges { Value: 204800 Name: "200 Kb" } + Ranges { Value: 524288 Name: "512 Kb" } + Ranges { Value: 1048576 Name: "1 Mb" } + Ranges { Value: 2097152 Name: "2 Mb" } + Ranges { Value: 4194304 Name: "4 Mb" } + Ranges { Value: 6291456 Name: "6 Mb" } + Ranges { Value: 8388608 Name: "8 Mb" } + } + ]; + + COUNTER_LATENCY_PQ_RECEIVE_QUEUE = 15 [(CounterOpts) = {Name: "LatencyReciveQueue"}]; + COUNTER_LATENCY_PQ_RESERVE_BYTES = 16 [(CounterOpts) = {Name: "LatencyReserveBytes"}]; COUNTER_LATENCY_PQ_REGISTER_MESSAGE_GROUP = 17 [(CounterOpts) = {Name: "LatencyRegisterMessageGroup"}]; COUNTER_LATENCY_PQ_DEREGISTER_MESSAGE_GROUP = 18 [(CounterOpts) = {Name: "LatencyDeregisterMessageGroup"}]; COUNTER_LATENCY_PQ_SPLIT_MESSAGE_GROUP = 19 [(CounterOpts) = {Name: "LatencySplitMessageGroup"}]; -} - - -enum EClientLabeledCounters { - option (GlobalGroupNamesOpts) = { - Names: "client" - Names: "important" - Names: "topic" - }; - - METRIC_COMMIT_WRITE_TIME = 0 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - METRIC_COMMIT_CREATE_TIME = 1 [(LabeledCounterOpts) = {Name: "CreateTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - METRIC_COMMIT_MESSAGE_LAG = 2 [(LabeledCounterOpts) = {Name: "MessageLagByCommitted" AggrFunc : EAF_MAX}]; - METRIC_COMMIT_SIZE_LAG = 3 [(LabeledCounterOpts) = {Name: "SizeLagByCommitted" AggrFunc : EAF_MAX}]; - METRIC_READ_WRITE_TIME = 4 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastReadOld" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - METRIC_READ_TOTAL_TIME = 5 [(LabeledCounterOpts) = {Name: "TotalTimeLagMsByLastRead" AggrFunc : EAF_MAX}]; - METRIC_READ_MESSAGE_LAG = 6 [(LabeledCounterOpts) = {Name: "MessageLagByLastRead" AggrFunc : EAF_MAX}]; - METRIC_READ_SIZE_LAG = 7 [(LabeledCounterOpts) = {Name: "SizeLagByLastRead" AggrFunc : EAF_MAX}]; - METRIC_USER_PARTITIONS = 8 [(LabeledCounterOpts) = {Name: "UserPartitionsAnswered" AggrFunc : EAF_SUM}]; - METRIC_READ_TOTAL_MESSAGE_LAG = 9 [(LabeledCounterOpts) = {Name: "TotalMessageLagByLastRead" AggrFunc : EAF_SUM}]; - METRIC_READ_TOTAL_SIZE_LAG = 10 [(LabeledCounterOpts) = {Name: "TotalSizeLagByLastRead" AggrFunc : EAF_SUM}]; - METRIC_MIN_READ_QUOTA_BYTES_AVAIL_SEC = 11 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgSec" AggrFunc : EAF_MIN}]; - METRIC_MIN_READ_QUOTA_BYTES_AVAIL_MIN = 12 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgMin" AggrFunc : EAF_MIN}]; - - METRIC_READ_OFFSET_REWIND_SUM = 13 [(LabeledCounterOpts) = {Name: "ReadOffsetRewindSum" AggrFunc : EAF_SUM Type : CT_DERIV}]; - - METRIC_TOTAL_READ_SPEED_1 = 14 [(LabeledCounterOpts) = {Name: "ReadBytesPerSec" AggrFunc : EAF_SUM}]; - METRIC_MAX_READ_SPEED_1 = 15 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerSec" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_READ_SPEED_2 = 16 [(LabeledCounterOpts) = {Name: "ReadBytesPerMin" AggrFunc : EAF_SUM}]; - METRIC_MAX_READ_SPEED_2 = 17 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerMin" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_READ_SPEED_3 = 18 [(LabeledCounterOpts) = {Name: "ReadBytesPerHour" AggrFunc : EAF_SUM}]; - METRIC_MAX_READ_SPEED_3 = 19 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerHour" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_READ_SPEED_4 = 20 [(LabeledCounterOpts) = {Name: "ReadBytesPerDay" AggrFunc : EAF_SUM}]; - METRIC_MAX_READ_SPEED_4 = 21 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerDay" AggrFunc : EAF_MAX}]; - - METRIC_READ_QUOTA_BYTES = 22 [(LabeledCounterOpts) = {Name: "ReadBytesQuota" AggrFunc : EAF_MIN}]; - - METRIC_READ_TIME_LAG = 23 [(LabeledCounterOpts) = {Name: "ReadTimeLagMs" AggrFunc : EAF_MAX}]; - METRIC_WRITE_TIME_LAG = 24 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastRead" AggrFunc : EAF_MAX}]; - METRIC_LAST_READ_TIME = 25 [(LabeledCounterOpts) = {Name: "TimeSinceLastReadMs" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - - METRIC_READ_QUOTA_USAGE = 26 [(LabeledCounterOpts) = {Name: "PartitionMaxReadQuotaUsage" AggrFunc : EAF_MAX}]; - - METRIC_READ_OFFSET_REWIND_TOTAL = 27 [(LabeledCounterOpts) = {Name: "ReadOffsetRewindTotal" AggrFunc : EAF_SUM}]; - -} - - - -enum EPartitionLabeledCounters { - option (GlobalGroupNamesOpts) = { - Names: "topic" - }; - - METRIC_LIFE_TIME = 0 [(LabeledCounterOpts) = {Name: "PartitionLifeTimeMs" AggrFunc : EAF_MAX Type : CT_TIMELAG}]; - METRIC_INIT_TIME = 1 [(LabeledCounterOpts) = {Name: "PartitionInitTimeMs" AggrFunc : EAF_MAX}]; - METRIC_PARTITIONS = 2 [(LabeledCounterOpts) = {Name: "PartitionsAnswered" AggrFunc : EAF_SUM}]; - METRIC_NUM_SIDS = 3 [(LabeledCounterOpts) = {Name: "SourceIdCount" AggrFunc : EAF_SUM}]; - METRIC_MAX_NUM_SIDS = 4 [(LabeledCounterOpts) = {Name: "SourceIdMaxCount" AggrFunc : EAF_MAX}]; - METRIC_GAPS_COUNT = 5 [(LabeledCounterOpts) = {Name: "GapsCount" AggrFunc : EAF_SUM}]; - METRIC_MAX_GAPS_COUNT = 6 [(LabeledCounterOpts) = {Name: "GapsMaxCount" AggrFunc : EAF_MAX}]; - METRIC_GAPS_SIZE = 7 [(LabeledCounterOpts) = {Name: "GapsSize" AggrFunc : EAF_SUM}]; - METRIC_MAX_GAPS_SIZE = 8 [(LabeledCounterOpts) = {Name: "GapsMaxSize" AggrFunc : EAF_MAX}]; - METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC = 9 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgSec" AggrFunc : EAF_MIN}]; - METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN = 10 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgMin" AggrFunc : EAF_MIN}]; - METRIC_TOTAL_WRITE_SPEED_1 = 11 [(LabeledCounterOpts) = {Name: "WriteBytesPerSec" AggrFunc : EAF_SUM}]; - METRIC_MAX_WRITE_SPEED_1 = 12 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerSec" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_WRITE_SPEED_2 = 13 [(LabeledCounterOpts) = {Name: "WriteBytesPerMin" AggrFunc : EAF_SUM}]; - METRIC_MAX_WRITE_SPEED_2 = 14 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerMin" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_WRITE_SPEED_3 = 15 [(LabeledCounterOpts) = {Name: "WriteBytesPerHour" AggrFunc : EAF_SUM}]; - METRIC_MAX_WRITE_SPEED_3 = 16 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerHour" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_WRITE_SPEED_4 = 17 [(LabeledCounterOpts) = {Name: "WriteBytesPerDay" AggrFunc : EAF_SUM}]; - METRIC_MAX_WRITE_SPEED_4 = 18 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerDay" AggrFunc : EAF_MAX}]; - - METRIC_TOTAL_PART_SIZE = 19 [(LabeledCounterOpts) = {Name: "TotalPartSize" AggrFunc : EAF_SUM}]; - METRIC_MAX_PART_SIZE = 20 [(LabeledCounterOpts) = {Name: "MaxPartSize" AggrFunc : EAF_MAX}]; - - METRIC_WRITE_QUOTA_BYTES = 21 [(LabeledCounterOpts) = {Name: "WriteBytesQuota" AggrFunc : EAF_MIN}]; - - METRIC_WRITE_TIME_LAG_MS = 22 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastWrite" AggrFunc : EAF_MAX}]; - METRIC_LAST_WRITE_TIME = 23 [(LabeledCounterOpts) = {Name: "TimeSinceLastWriteMs" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - - METRIC_TOTAL_QUOTA_SPEED_1 = 24 [(LabeledCounterOpts) = {Name: "QuotaBytesPerSec" AggrFunc : EAF_SUM}]; - METRIC_MAX_QUOTA_SPEED_1 = 25 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerSec" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_QUOTA_SPEED_2 = 26 [(LabeledCounterOpts) = {Name: "QuotaBytesPerMin" AggrFunc : EAF_SUM}]; - METRIC_MAX_QUOTA_SPEED_2 = 27 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerMin" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_QUOTA_SPEED_3 = 28 [(LabeledCounterOpts) = {Name: "QuotaBytesPerHour" AggrFunc : EAF_SUM}]; - METRIC_MAX_QUOTA_SPEED_3 = 29 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerHour" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_QUOTA_SPEED_4 = 30 [(LabeledCounterOpts) = {Name: "QuotaBytesPerDay" AggrFunc : EAF_SUM}]; - METRIC_MAX_QUOTA_SPEED_4 = 31 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerDay" AggrFunc : EAF_MAX}]; - - METRIC_WRITE_QUOTA_USAGE = 32 [(LabeledCounterOpts) = {Name: "PartitionMaxWriteQuotaUsage" AggrFunc : EAF_MAX}]; - +} + + +enum EClientLabeledCounters { + option (GlobalGroupNamesOpts) = { + Names: "client" + Names: "important" + Names: "topic" + }; + + METRIC_COMMIT_WRITE_TIME = 0 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + METRIC_COMMIT_CREATE_TIME = 1 [(LabeledCounterOpts) = {Name: "CreateTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + METRIC_COMMIT_MESSAGE_LAG = 2 [(LabeledCounterOpts) = {Name: "MessageLagByCommitted" AggrFunc : EAF_MAX}]; + METRIC_COMMIT_SIZE_LAG = 3 [(LabeledCounterOpts) = {Name: "SizeLagByCommitted" AggrFunc : EAF_MAX}]; + METRIC_READ_WRITE_TIME = 4 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastReadOld" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + METRIC_READ_TOTAL_TIME = 5 [(LabeledCounterOpts) = {Name: "TotalTimeLagMsByLastRead" AggrFunc : EAF_MAX}]; + METRIC_READ_MESSAGE_LAG = 6 [(LabeledCounterOpts) = {Name: "MessageLagByLastRead" AggrFunc : EAF_MAX}]; + METRIC_READ_SIZE_LAG = 7 [(LabeledCounterOpts) = {Name: "SizeLagByLastRead" AggrFunc : EAF_MAX}]; + METRIC_USER_PARTITIONS = 8 [(LabeledCounterOpts) = {Name: "UserPartitionsAnswered" AggrFunc : EAF_SUM}]; + METRIC_READ_TOTAL_MESSAGE_LAG = 9 [(LabeledCounterOpts) = {Name: "TotalMessageLagByLastRead" AggrFunc : EAF_SUM}]; + METRIC_READ_TOTAL_SIZE_LAG = 10 [(LabeledCounterOpts) = {Name: "TotalSizeLagByLastRead" AggrFunc : EAF_SUM}]; + METRIC_MIN_READ_QUOTA_BYTES_AVAIL_SEC = 11 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgSec" AggrFunc : EAF_MIN}]; + METRIC_MIN_READ_QUOTA_BYTES_AVAIL_MIN = 12 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgMin" AggrFunc : EAF_MIN}]; + + METRIC_READ_OFFSET_REWIND_SUM = 13 [(LabeledCounterOpts) = {Name: "ReadOffsetRewindSum" AggrFunc : EAF_SUM Type : CT_DERIV}]; + + METRIC_TOTAL_READ_SPEED_1 = 14 [(LabeledCounterOpts) = {Name: "ReadBytesPerSec" AggrFunc : EAF_SUM}]; + METRIC_MAX_READ_SPEED_1 = 15 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerSec" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_READ_SPEED_2 = 16 [(LabeledCounterOpts) = {Name: "ReadBytesPerMin" AggrFunc : EAF_SUM}]; + METRIC_MAX_READ_SPEED_2 = 17 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerMin" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_READ_SPEED_3 = 18 [(LabeledCounterOpts) = {Name: "ReadBytesPerHour" AggrFunc : EAF_SUM}]; + METRIC_MAX_READ_SPEED_3 = 19 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerHour" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_READ_SPEED_4 = 20 [(LabeledCounterOpts) = {Name: "ReadBytesPerDay" AggrFunc : EAF_SUM}]; + METRIC_MAX_READ_SPEED_4 = 21 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerDay" AggrFunc : EAF_MAX}]; + + METRIC_READ_QUOTA_BYTES = 22 [(LabeledCounterOpts) = {Name: "ReadBytesQuota" AggrFunc : EAF_MIN}]; + + METRIC_READ_TIME_LAG = 23 [(LabeledCounterOpts) = {Name: "ReadTimeLagMs" AggrFunc : EAF_MAX}]; + METRIC_WRITE_TIME_LAG = 24 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastRead" AggrFunc : EAF_MAX}]; + METRIC_LAST_READ_TIME = 25 [(LabeledCounterOpts) = {Name: "TimeSinceLastReadMs" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + + METRIC_READ_QUOTA_USAGE = 26 [(LabeledCounterOpts) = {Name: "PartitionMaxReadQuotaUsage" AggrFunc : EAF_MAX}]; + + METRIC_READ_OFFSET_REWIND_TOTAL = 27 [(LabeledCounterOpts) = {Name: "ReadOffsetRewindTotal" AggrFunc : EAF_SUM}]; + +} + + + +enum EPartitionLabeledCounters { + option (GlobalGroupNamesOpts) = { + Names: "topic" + }; + + METRIC_LIFE_TIME = 0 [(LabeledCounterOpts) = {Name: "PartitionLifeTimeMs" AggrFunc : EAF_MAX Type : CT_TIMELAG}]; + METRIC_INIT_TIME = 1 [(LabeledCounterOpts) = {Name: "PartitionInitTimeMs" AggrFunc : EAF_MAX}]; + METRIC_PARTITIONS = 2 [(LabeledCounterOpts) = {Name: "PartitionsAnswered" AggrFunc : EAF_SUM}]; + METRIC_NUM_SIDS = 3 [(LabeledCounterOpts) = {Name: "SourceIdCount" AggrFunc : EAF_SUM}]; + METRIC_MAX_NUM_SIDS = 4 [(LabeledCounterOpts) = {Name: "SourceIdMaxCount" AggrFunc : EAF_MAX}]; + METRIC_GAPS_COUNT = 5 [(LabeledCounterOpts) = {Name: "GapsCount" AggrFunc : EAF_SUM}]; + METRIC_MAX_GAPS_COUNT = 6 [(LabeledCounterOpts) = {Name: "GapsMaxCount" AggrFunc : EAF_MAX}]; + METRIC_GAPS_SIZE = 7 [(LabeledCounterOpts) = {Name: "GapsSize" AggrFunc : EAF_SUM}]; + METRIC_MAX_GAPS_SIZE = 8 [(LabeledCounterOpts) = {Name: "GapsMaxSize" AggrFunc : EAF_MAX}]; + METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC = 9 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgSec" AggrFunc : EAF_MIN}]; + METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN = 10 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgMin" AggrFunc : EAF_MIN}]; + METRIC_TOTAL_WRITE_SPEED_1 = 11 [(LabeledCounterOpts) = {Name: "WriteBytesPerSec" AggrFunc : EAF_SUM}]; + METRIC_MAX_WRITE_SPEED_1 = 12 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerSec" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_WRITE_SPEED_2 = 13 [(LabeledCounterOpts) = {Name: "WriteBytesPerMin" AggrFunc : EAF_SUM}]; + METRIC_MAX_WRITE_SPEED_2 = 14 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerMin" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_WRITE_SPEED_3 = 15 [(LabeledCounterOpts) = {Name: "WriteBytesPerHour" AggrFunc : EAF_SUM}]; + METRIC_MAX_WRITE_SPEED_3 = 16 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerHour" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_WRITE_SPEED_4 = 17 [(LabeledCounterOpts) = {Name: "WriteBytesPerDay" AggrFunc : EAF_SUM}]; + METRIC_MAX_WRITE_SPEED_4 = 18 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerDay" AggrFunc : EAF_MAX}]; + + METRIC_TOTAL_PART_SIZE = 19 [(LabeledCounterOpts) = {Name: "TotalPartSize" AggrFunc : EAF_SUM}]; + METRIC_MAX_PART_SIZE = 20 [(LabeledCounterOpts) = {Name: "MaxPartSize" AggrFunc : EAF_MAX}]; + + METRIC_WRITE_QUOTA_BYTES = 21 [(LabeledCounterOpts) = {Name: "WriteBytesQuota" AggrFunc : EAF_MIN}]; + + METRIC_WRITE_TIME_LAG_MS = 22 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastWrite" AggrFunc : EAF_MAX}]; + METRIC_LAST_WRITE_TIME = 23 [(LabeledCounterOpts) = {Name: "TimeSinceLastWriteMs" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + + METRIC_TOTAL_QUOTA_SPEED_1 = 24 [(LabeledCounterOpts) = {Name: "QuotaBytesPerSec" AggrFunc : EAF_SUM}]; + METRIC_MAX_QUOTA_SPEED_1 = 25 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerSec" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_QUOTA_SPEED_2 = 26 [(LabeledCounterOpts) = {Name: "QuotaBytesPerMin" AggrFunc : EAF_SUM}]; + METRIC_MAX_QUOTA_SPEED_2 = 27 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerMin" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_QUOTA_SPEED_3 = 28 [(LabeledCounterOpts) = {Name: "QuotaBytesPerHour" AggrFunc : EAF_SUM}]; + METRIC_MAX_QUOTA_SPEED_3 = 29 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerHour" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_QUOTA_SPEED_4 = 30 [(LabeledCounterOpts) = {Name: "QuotaBytesPerDay" AggrFunc : EAF_SUM}]; + METRIC_MAX_QUOTA_SPEED_4 = 31 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerDay" AggrFunc : EAF_MAX}]; + + METRIC_WRITE_QUOTA_USAGE = 32 [(LabeledCounterOpts) = {Name: "PartitionMaxWriteQuotaUsage" AggrFunc : EAF_MAX}]; + METRIC_MIN_SID_LIFETIME = 33 [(LabeledCounterOpts) = {Name: "SourceIdMinLifetimeMs" AggrFunc : EAF_MIN}]; -} - +} + diff --git a/ydb/core/protos/counters_pq_labeled.proto b/ydb/core/protos/counters_pq_labeled.proto index 4460b1b388..d67890bf27 100644 --- a/ydb/core/protos/counters_pq_labeled.proto +++ b/ydb/core/protos/counters_pq_labeled.proto @@ -1,100 +1,100 @@ import "ydb/core/protos/counters.proto"; - -package NKikimr.NPQ; - -option java_package = "ru.yandex.kikimr.proto"; - - -enum EClientLabeledCounters { - option (GlobalGroupNamesOpts) = { - Names: "Client" - Names: "Important" - Names: "OriginDC" - Names: "Producer" - Names: "Topic" - }; - - METRIC_COMMIT_WRITE_TIME = 0 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - METRIC_COMMIT_CREATE_TIME = 1 [(LabeledCounterOpts) = {Name: "CreateTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - METRIC_COMMIT_MESSAGE_LAG = 2 [(LabeledCounterOpts) = {Name: "MessageLagByCommitted" AggrFunc : EAF_MAX}]; - METRIC_COMMIT_SIZE_LAG = 3 [(LabeledCounterOpts) = {Name: "SizeLagByCommitted" AggrFunc : EAF_MAX}]; - METRIC_READ_WRITE_TIME = 4 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastReadOld" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - METRIC_READ_TOTAL_TIME = 5 [(LabeledCounterOpts) = {Name: "TotalTimeLagMsByLastRead" AggrFunc : EAF_MAX}]; - METRIC_READ_MESSAGE_LAG = 6 [(LabeledCounterOpts) = {Name: "MessageLagByLastRead" AggrFunc : EAF_MAX}]; - METRIC_READ_SIZE_LAG = 7 [(LabeledCounterOpts) = {Name: "SizeLagByLastRead" AggrFunc : EAF_MAX}]; - METRIC_USER_PARTITIONS = 8 [(LabeledCounterOpts) = {Name: "UserPartitionsAnswered" AggrFunc : EAF_SUM}]; - METRIC_READ_TOTAL_MESSAGE_LAG = 9 [(LabeledCounterOpts) = {Name: "TotalMessageLagByLastRead" AggrFunc : EAF_SUM}]; - METRIC_READ_TOTAL_SIZE_LAG = 10 [(LabeledCounterOpts) = {Name: "TotalSizeLagByLastRead" AggrFunc : EAF_SUM}]; - METRIC_MIN_READ_QUOTA_BYTES_AVAIL_SEC = 11 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgSec" AggrFunc : EAF_MIN}]; - METRIC_MIN_READ_QUOTA_BYTES_AVAIL_MIN = 12 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgMin" AggrFunc : EAF_MIN}]; - - METRIC_READ_OFFSET_REWIND_SUM = 13 [(LabeledCounterOpts) = {Name: "ReadOffsetRewindSum" AggrFunc : EAF_SUM Type : CT_DERIV}]; - - METRIC_TOTAL_READ_SPEED_1 = 14 [(LabeledCounterOpts) = {Name: "ReadBytesPerSec" AggrFunc : EAF_SUM}]; - METRIC_MAX_READ_SPEED_1 = 15 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerSec" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_READ_SPEED_2 = 16 [(LabeledCounterOpts) = {Name: "ReadBytesPerMin" AggrFunc : EAF_SUM}]; - METRIC_MAX_READ_SPEED_2 = 17 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerMin" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_READ_SPEED_3 = 18 [(LabeledCounterOpts) = {Name: "ReadBytesPerHour" AggrFunc : EAF_SUM}]; - METRIC_MAX_READ_SPEED_3 = 19 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerHour" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_READ_SPEED_4 = 20 [(LabeledCounterOpts) = {Name: "ReadBytesPerDay" AggrFunc : EAF_SUM}]; - METRIC_MAX_READ_SPEED_4 = 21 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerDay" AggrFunc : EAF_MAX}]; - - METRIC_READ_QUOTA_BYTES = 22 [(LabeledCounterOpts) = {Name: "ReadBytesQuota" AggrFunc : EAF_MIN}]; - - METRIC_READ_TIME_LAG = 23 [(LabeledCounterOpts) = {Name: "ReadTimeLagMs" AggrFunc : EAF_MAX}]; - METRIC_WRITE_TIME_LAG = 24 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastRead" AggrFunc : EAF_MAX}]; - METRIC_LAST_READ_TIME = 25 [(LabeledCounterOpts) = {Name: "TimeSinceLastReadMs" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - + +package NKikimr.NPQ; + +option java_package = "ru.yandex.kikimr.proto"; + + +enum EClientLabeledCounters { + option (GlobalGroupNamesOpts) = { + Names: "Client" + Names: "Important" + Names: "OriginDC" + Names: "Producer" + Names: "Topic" + }; + + METRIC_COMMIT_WRITE_TIME = 0 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + METRIC_COMMIT_CREATE_TIME = 1 [(LabeledCounterOpts) = {Name: "CreateTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + METRIC_COMMIT_MESSAGE_LAG = 2 [(LabeledCounterOpts) = {Name: "MessageLagByCommitted" AggrFunc : EAF_MAX}]; + METRIC_COMMIT_SIZE_LAG = 3 [(LabeledCounterOpts) = {Name: "SizeLagByCommitted" AggrFunc : EAF_MAX}]; + METRIC_READ_WRITE_TIME = 4 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastReadOld" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + METRIC_READ_TOTAL_TIME = 5 [(LabeledCounterOpts) = {Name: "TotalTimeLagMsByLastRead" AggrFunc : EAF_MAX}]; + METRIC_READ_MESSAGE_LAG = 6 [(LabeledCounterOpts) = {Name: "MessageLagByLastRead" AggrFunc : EAF_MAX}]; + METRIC_READ_SIZE_LAG = 7 [(LabeledCounterOpts) = {Name: "SizeLagByLastRead" AggrFunc : EAF_MAX}]; + METRIC_USER_PARTITIONS = 8 [(LabeledCounterOpts) = {Name: "UserPartitionsAnswered" AggrFunc : EAF_SUM}]; + METRIC_READ_TOTAL_MESSAGE_LAG = 9 [(LabeledCounterOpts) = {Name: "TotalMessageLagByLastRead" AggrFunc : EAF_SUM}]; + METRIC_READ_TOTAL_SIZE_LAG = 10 [(LabeledCounterOpts) = {Name: "TotalSizeLagByLastRead" AggrFunc : EAF_SUM}]; + METRIC_MIN_READ_QUOTA_BYTES_AVAIL_SEC = 11 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgSec" AggrFunc : EAF_MIN}]; + METRIC_MIN_READ_QUOTA_BYTES_AVAIL_MIN = 12 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgMin" AggrFunc : EAF_MIN}]; + + METRIC_READ_OFFSET_REWIND_SUM = 13 [(LabeledCounterOpts) = {Name: "ReadOffsetRewindSum" AggrFunc : EAF_SUM Type : CT_DERIV}]; + + METRIC_TOTAL_READ_SPEED_1 = 14 [(LabeledCounterOpts) = {Name: "ReadBytesPerSec" AggrFunc : EAF_SUM}]; + METRIC_MAX_READ_SPEED_1 = 15 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerSec" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_READ_SPEED_2 = 16 [(LabeledCounterOpts) = {Name: "ReadBytesPerMin" AggrFunc : EAF_SUM}]; + METRIC_MAX_READ_SPEED_2 = 17 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerMin" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_READ_SPEED_3 = 18 [(LabeledCounterOpts) = {Name: "ReadBytesPerHour" AggrFunc : EAF_SUM}]; + METRIC_MAX_READ_SPEED_3 = 19 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerHour" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_READ_SPEED_4 = 20 [(LabeledCounterOpts) = {Name: "ReadBytesPerDay" AggrFunc : EAF_SUM}]; + METRIC_MAX_READ_SPEED_4 = 21 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerDay" AggrFunc : EAF_MAX}]; + + METRIC_READ_QUOTA_BYTES = 22 [(LabeledCounterOpts) = {Name: "ReadBytesQuota" AggrFunc : EAF_MIN}]; + + METRIC_READ_TIME_LAG = 23 [(LabeledCounterOpts) = {Name: "ReadTimeLagMs" AggrFunc : EAF_MAX}]; + METRIC_WRITE_TIME_LAG = 24 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastRead" AggrFunc : EAF_MAX}]; + METRIC_LAST_READ_TIME = 25 [(LabeledCounterOpts) = {Name: "TimeSinceLastReadMs" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + METRIC_READ_QUOTA_USAGE = 26 [(LabeledCounterOpts) = {Name: "PartitionMaxReadQuotaUsage" AggrFunc : EAF_MAX}]; -} - - - -enum EPartitionLabeledCounters { - option (GlobalGroupNamesOpts) = { - Names: "OriginDC" - Names: "Producer" - Names: "Topic" - }; - - METRIC_LIFE_TIME = 0 [(LabeledCounterOpts) = {Name: "PartitionLifeTimeMs" AggrFunc : EAF_MAX Type : CT_TIMELAG}]; - METRIC_INIT_TIME = 1 [(LabeledCounterOpts) = {Name: "PartitionInitTimeMs" AggrFunc : EAF_MAX}]; - METRIC_PARTITIONS = 2 [(LabeledCounterOpts) = {Name: "PartitionsAnswered" AggrFunc : EAF_SUM}]; - METRIC_NUM_SIDS = 3 [(LabeledCounterOpts) = {Name: "SourceIdCount" AggrFunc : EAF_SUM}]; - METRIC_MAX_NUM_SIDS = 4 [(LabeledCounterOpts) = {Name: "SourceIdMaxCount" AggrFunc : EAF_MAX}]; - METRIC_GAPS_COUNT = 5 [(LabeledCounterOpts) = {Name: "GapsCount" AggrFunc : EAF_SUM}]; - METRIC_MAX_GAPS_COUNT = 6 [(LabeledCounterOpts) = {Name: "GapsMaxCount" AggrFunc : EAF_MAX}]; - METRIC_GAPS_SIZE = 7 [(LabeledCounterOpts) = {Name: "GapsSize" AggrFunc : EAF_SUM}]; - METRIC_MAX_GAPS_SIZE = 8 [(LabeledCounterOpts) = {Name: "GapsMaxSize" AggrFunc : EAF_MAX}]; - METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC = 9 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgSec" AggrFunc : EAF_MIN}]; - METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN = 10 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgMin" AggrFunc : EAF_MIN}]; - METRIC_TOTAL_WRITE_SPEED_1 = 11 [(LabeledCounterOpts) = {Name: "WriteBytesPerSec" AggrFunc : EAF_SUM}]; - METRIC_MAX_WRITE_SPEED_1 = 12 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerSec" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_WRITE_SPEED_2 = 13 [(LabeledCounterOpts) = {Name: "WriteBytesPerMin" AggrFunc : EAF_SUM}]; - METRIC_MAX_WRITE_SPEED_2 = 14 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerMin" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_WRITE_SPEED_3 = 15 [(LabeledCounterOpts) = {Name: "WriteBytesPerHour" AggrFunc : EAF_SUM}]; - METRIC_MAX_WRITE_SPEED_3 = 16 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerHour" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_WRITE_SPEED_4 = 17 [(LabeledCounterOpts) = {Name: "WriteBytesPerDay" AggrFunc : EAF_SUM}]; - METRIC_MAX_WRITE_SPEED_4 = 18 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerDay" AggrFunc : EAF_MAX}]; - - METRIC_TOTAL_PART_SIZE = 19 [(LabeledCounterOpts) = {Name: "TotalPartSize" AggrFunc : EAF_SUM}]; - METRIC_MAX_PART_SIZE = 20 [(LabeledCounterOpts) = {Name: "MaxPartSize" AggrFunc : EAF_MAX}]; - - METRIC_WRITE_QUOTA_BYTES = 21 [(LabeledCounterOpts) = {Name: "WriteBytesQuota" AggrFunc : EAF_MIN}]; - - METRIC_WRITE_TIME_LAG_MS = 22 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastWrite" AggrFunc : EAF_MAX}]; - METRIC_LAST_WRITE_TIME = 23 [(LabeledCounterOpts) = {Name: "TimeSinceLastWriteMs" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; - - METRIC_TOTAL_QUOTA_SPEED_1 = 24 [(LabeledCounterOpts) = {Name: "QuotaBytesPerSec" AggrFunc : EAF_SUM}]; - METRIC_MAX_QUOTA_SPEED_1 = 25 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerSec" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_QUOTA_SPEED_2 = 26 [(LabeledCounterOpts) = {Name: "QuotaBytesPerMin" AggrFunc : EAF_SUM}]; - METRIC_MAX_QUOTA_SPEED_2 = 27 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerMin" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_QUOTA_SPEED_3 = 28 [(LabeledCounterOpts) = {Name: "QuotaBytesPerHour" AggrFunc : EAF_SUM}]; - METRIC_MAX_QUOTA_SPEED_3 = 29 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerHour" AggrFunc : EAF_MAX}]; - METRIC_TOTAL_QUOTA_SPEED_4 = 30 [(LabeledCounterOpts) = {Name: "QuotaBytesPerDay" AggrFunc : EAF_SUM}]; - METRIC_MAX_QUOTA_SPEED_4 = 31 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerDay" AggrFunc : EAF_MAX}]; - +} + + + +enum EPartitionLabeledCounters { + option (GlobalGroupNamesOpts) = { + Names: "OriginDC" + Names: "Producer" + Names: "Topic" + }; + + METRIC_LIFE_TIME = 0 [(LabeledCounterOpts) = {Name: "PartitionLifeTimeMs" AggrFunc : EAF_MAX Type : CT_TIMELAG}]; + METRIC_INIT_TIME = 1 [(LabeledCounterOpts) = {Name: "PartitionInitTimeMs" AggrFunc : EAF_MAX}]; + METRIC_PARTITIONS = 2 [(LabeledCounterOpts) = {Name: "PartitionsAnswered" AggrFunc : EAF_SUM}]; + METRIC_NUM_SIDS = 3 [(LabeledCounterOpts) = {Name: "SourceIdCount" AggrFunc : EAF_SUM}]; + METRIC_MAX_NUM_SIDS = 4 [(LabeledCounterOpts) = {Name: "SourceIdMaxCount" AggrFunc : EAF_MAX}]; + METRIC_GAPS_COUNT = 5 [(LabeledCounterOpts) = {Name: "GapsCount" AggrFunc : EAF_SUM}]; + METRIC_MAX_GAPS_COUNT = 6 [(LabeledCounterOpts) = {Name: "GapsMaxCount" AggrFunc : EAF_MAX}]; + METRIC_GAPS_SIZE = 7 [(LabeledCounterOpts) = {Name: "GapsSize" AggrFunc : EAF_SUM}]; + METRIC_MAX_GAPS_SIZE = 8 [(LabeledCounterOpts) = {Name: "GapsMaxSize" AggrFunc : EAF_MAX}]; + METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC = 9 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgSec" AggrFunc : EAF_MIN}]; + METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN = 10 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgMin" AggrFunc : EAF_MIN}]; + METRIC_TOTAL_WRITE_SPEED_1 = 11 [(LabeledCounterOpts) = {Name: "WriteBytesPerSec" AggrFunc : EAF_SUM}]; + METRIC_MAX_WRITE_SPEED_1 = 12 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerSec" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_WRITE_SPEED_2 = 13 [(LabeledCounterOpts) = {Name: "WriteBytesPerMin" AggrFunc : EAF_SUM}]; + METRIC_MAX_WRITE_SPEED_2 = 14 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerMin" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_WRITE_SPEED_3 = 15 [(LabeledCounterOpts) = {Name: "WriteBytesPerHour" AggrFunc : EAF_SUM}]; + METRIC_MAX_WRITE_SPEED_3 = 16 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerHour" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_WRITE_SPEED_4 = 17 [(LabeledCounterOpts) = {Name: "WriteBytesPerDay" AggrFunc : EAF_SUM}]; + METRIC_MAX_WRITE_SPEED_4 = 18 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerDay" AggrFunc : EAF_MAX}]; + + METRIC_TOTAL_PART_SIZE = 19 [(LabeledCounterOpts) = {Name: "TotalPartSize" AggrFunc : EAF_SUM}]; + METRIC_MAX_PART_SIZE = 20 [(LabeledCounterOpts) = {Name: "MaxPartSize" AggrFunc : EAF_MAX}]; + + METRIC_WRITE_QUOTA_BYTES = 21 [(LabeledCounterOpts) = {Name: "WriteBytesQuota" AggrFunc : EAF_MIN}]; + + METRIC_WRITE_TIME_LAG_MS = 22 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastWrite" AggrFunc : EAF_MAX}]; + METRIC_LAST_WRITE_TIME = 23 [(LabeledCounterOpts) = {Name: "TimeSinceLastWriteMs" AggrFunc : EAF_MIN Type : CT_TIMELAG}]; + + METRIC_TOTAL_QUOTA_SPEED_1 = 24 [(LabeledCounterOpts) = {Name: "QuotaBytesPerSec" AggrFunc : EAF_SUM}]; + METRIC_MAX_QUOTA_SPEED_1 = 25 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerSec" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_QUOTA_SPEED_2 = 26 [(LabeledCounterOpts) = {Name: "QuotaBytesPerMin" AggrFunc : EAF_SUM}]; + METRIC_MAX_QUOTA_SPEED_2 = 27 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerMin" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_QUOTA_SPEED_3 = 28 [(LabeledCounterOpts) = {Name: "QuotaBytesPerHour" AggrFunc : EAF_SUM}]; + METRIC_MAX_QUOTA_SPEED_3 = 29 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerHour" AggrFunc : EAF_MAX}]; + METRIC_TOTAL_QUOTA_SPEED_4 = 30 [(LabeledCounterOpts) = {Name: "QuotaBytesPerDay" AggrFunc : EAF_SUM}]; + METRIC_MAX_QUOTA_SPEED_4 = 31 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerDay" AggrFunc : EAF_MAX}]; + METRIC_WRITE_QUOTA_USAGE = 32 [(LabeledCounterOpts) = {Name: "PartitionMaxWriteQuotaUsage" AggrFunc : EAF_MAX}]; - -} - + +} + diff --git a/ydb/core/protos/counters_schemeshard.proto b/ydb/core/protos/counters_schemeshard.proto index 5d31c6778c..a839915c98 100644 --- a/ydb/core/protos/counters_schemeshard.proto +++ b/ydb/core/protos/counters_schemeshard.proto @@ -16,7 +16,7 @@ enum ESimpleCounters { COUNTER_TABLE_SHARD_ACTIVE_COUNT = 5 [(CounterOpts) = {Name: "TableShardsActive"}]; COUNTER_TABLE_SHARD_INACTIVE_COUNT = 6 [(CounterOpts) = {Name: "TableShardsInactive"}]; COUNTER_PQ_SHARD_COUNT = 7 [(CounterOpts) = {Name: "PqGroupShards"}]; - COUNTER_PQ_RB_SHARD_COUNT = 8 [(CounterOpts) = {Name: "PqReadBalancerShards"}]; + COUNTER_PQ_RB_SHARD_COUNT = 8 [(CounterOpts) = {Name: "PqReadBalancerShards"}]; COUNTER_SUB_DOMAIN_COUNT = 9 [(CounterOpts) = {Name: "SubDomains"}]; COUNTER_SUB_DOMAIN_COORDINATOR_COUNT = 10 [(CounterOpts) = {Name: "SubDomainsCoordinators"}]; COUNTER_SUB_DOMAIN_MEDIATOR_COUNT = 11 [(CounterOpts) = {Name: "SubDomainsMediators"}]; @@ -133,15 +133,15 @@ enum ESimpleCounters { COUNTER_IN_FLIGHT_OPS_TxCreateSequence = 109 [(CounterOpts) = {Name: "InFlightOps/CreateSequence"}]; COUNTER_IN_FLIGHT_OPS_TxAlterSequence = 110 [(CounterOpts) = {Name: "InFlightOps/AlterSequence"}]; COUNTER_IN_FLIGHT_OPS_TxDropSequence = 111 [(CounterOpts) = {Name: "InFlightOps/DropSequence"}]; - - COUNTER_STREAM_SHARDS_COUNT = 112 [(CounterOpts) = {Name: "StreamShardsCount"}]; - COUNTER_STREAM_SHARDS_QUOTA = 113 [(CounterOpts) = {Name: "StreamShardsQuota"}]; + + COUNTER_STREAM_SHARDS_COUNT = 112 [(CounterOpts) = {Name: "StreamShardsCount"}]; + COUNTER_STREAM_SHARDS_QUOTA = 113 [(CounterOpts) = {Name: "StreamShardsQuota"}]; COUNTER_BACKGROUND_COMPACTION_QUEUE_SIZE = 114 [(CounterOpts) = {Name: "BackgroundCompactionQueueSize"}]; - - COUNTER_STREAM_RESERVED_THROUGHPUT = 115 [(CounterOpts) = {Name: "StreamReservedThroughput"}]; - COUNTER_STREAM_RESERVED_STORAGE = 116 [(CounterOpts) = {Name: "StreamReservedStorage"}]; - COUNTER_STREAM_RESERVED_STORAGE_QUOTA = 117 [(CounterOpts) = {Name: "StreamReservedStorageQuota"}]; + + COUNTER_STREAM_RESERVED_THROUGHPUT = 115 [(CounterOpts) = {Name: "StreamReservedThroughput"}]; + COUNTER_STREAM_RESERVED_STORAGE = 116 [(CounterOpts) = {Name: "StreamReservedStorage"}]; + COUNTER_STREAM_RESERVED_STORAGE_QUOTA = 117 [(CounterOpts) = {Name: "StreamReservedStorageQuota"}]; COUNTER_REPLICATION_COUNT = 118 [(CounterOpts) = {Name: "Replications"}]; COUNTER_REPLICATION_CONTROLLER_COUNT = 119 [(CounterOpts) = {Name: "ReplicationControllers"}]; diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 71fcbd5541..33e598c1c2 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -896,19 +896,19 @@ message TPersQueueGroupDescription { optional string Name = 1; // mandatory optional uint64 PathId = 2; - optional uint32 TotalGroupCount = 3; // mandatory - message TPartitionToAdd { - optional uint32 PartitionId = 1; - optional uint32 GroupId = 2; - } - repeated TPartitionToAdd PartitionsToAdd = 9; - repeated uint32 PartitionsToDelete = 10; - optional uint32 NextPartitionId = 11; + optional uint32 TotalGroupCount = 3; // mandatory + message TPartitionToAdd { + optional uint32 PartitionId = 1; + optional uint32 GroupId = 2; + } + repeated TPartitionToAdd PartitionsToAdd = 9; + repeated uint32 PartitionsToDelete = 10; + optional uint32 NextPartitionId = 11; optional uint32 PartitionPerTablet = 4; // default = 10 optional NKikimrPQ.TPQTabletConfig PQTabletConfig = 5; // mandatory repeated TPartition Partitions = 6; // do not set optional uint64 AlterVersion = 7; - optional uint64 BalancerTabletID = 8; + optional uint64 BalancerTabletID = 8; // Can be passed upon creation. // PQTabletConfig.PartitionKeySchema must be set. @@ -1395,8 +1395,8 @@ message TDirEntry { optional uint64 PathVersion = 13; optional EPathSubType PathSubType = 14; optional TPathVersion Version = 15; - - optional uint64 BalancerTabletID = 999; //temporary optimization for old PQ read/write protocol. Must be removed later + + optional uint64 BalancerTabletID = 999; //temporary optimization for old PQ read/write protocol. Must be removed later } // Describes single partition (range or point) of a table diff --git a/ydb/core/protos/flat_tx_scheme.proto b/ydb/core/protos/flat_tx_scheme.proto index ad92ba703c..dc06523e5c 100644 --- a/ydb/core/protos/flat_tx_scheme.proto +++ b/ydb/core/protos/flat_tx_scheme.proto @@ -36,9 +36,9 @@ enum EStatus { StatusPreconditionFailed = 14; StatusRedirectDomain = 15; StatusQuotaExceeded = 16; - StatusResourceExhausted = 17; + StatusResourceExhausted = 17; StatusReserved18 = 18; - StatusReserved19 = 19; + StatusReserved19 = 19; // when adding a new status and keeping parse compatibility with the old version // rename existing reserved status to desired one, and add new reserved status to // the end of reserved statuses @@ -141,8 +141,8 @@ message TSchemeLimits { optional uint64 MaxPathElementLength = 12; optional string ExtraPathSymbolsAllowed = 13; - - optional uint64 MaxPQPartitions = 14; + + optional uint64 MaxPQPartitions = 14; } message TEvInitTenantSchemeShard { diff --git a/ydb/core/protos/grpc.proto b/ydb/core/protos/grpc.proto index e2d9acff4b..797cc2de82 100644 --- a/ydb/core/protos/grpc.proto +++ b/ydb/core/protos/grpc.proto @@ -1,11 +1,11 @@ -syntax = "proto3"; - +syntax = "proto3"; + package NKikimrClient; - + import "ydb/core/protos/msgbus.proto"; import "ydb/core/protos/msgbus_kv.proto"; import "ydb/core/protos/msgbus_pq.proto"; - + service TGRpcServer { // TODO @@ -29,14 +29,14 @@ service TGRpcServer { rpc WhoAmI(TWhoAmI) returns (TResponse); ///////////////////////////////////////////////////////////////////////////////////////////////// - // CHOOSE PROXY INTERFACE - ///////////////////////////////////////////////////////////////////////////////////////////////// - rpc ChooseProxy(TChooseProxyRequest) returns (TResponse); - - ///////////////////////////////////////////////////////////////////////////////////////////////// + // CHOOSE PROXY INTERFACE + ///////////////////////////////////////////////////////////////////////////////////////////////// + rpc ChooseProxy(TChooseProxyRequest) returns (TResponse); + + ///////////////////////////////////////////////////////////////////////////////////////////////// // PERSISTENT QUEUE CLIENT INTERFACE ///////////////////////////////////////////////////////////////////////////////////////////////// - + rpc PersQueueRequest(TPersQueueRequest) returns (TResponse); ///////////////////////////////////////////////////////////////////////////////////////////////// @@ -113,4 +113,4 @@ service TGRpcServer { rpc DbSchema(TJSON) returns (TJSON); rpc DbOperation(TJSON) returns (TJSON); rpc DbBatch(TJSON) returns (TJSON); -} +} diff --git a/ydb/core/protos/grpc_pq_old.proto b/ydb/core/protos/grpc_pq_old.proto index cf20a2e000..ed273e1404 100755 --- a/ydb/core/protos/grpc_pq_old.proto +++ b/ydb/core/protos/grpc_pq_old.proto @@ -1,40 +1,40 @@ -package NKikimrPQClient; - -message TKeyValue { - optional string Key = 1; - optional string Value = 2; -} - +package NKikimrPQClient; + +message TKeyValue { + optional string Key = 1; + optional string Value = 2; +} + message TMapType { - repeated TKeyValue Items = 1; -} - -message TDataChunk { - message TSessionHeader { - optional string Server = 1; - optional string File = 2; - optional string Ident = 3; - optional string LogType = 4; - } - - optional TSessionHeader Meta = 124; - - // fileid in key - optional uint64 SeqNo = 1; - optional uint64 CreateTime = 3; // chunk creation time in ms - - optional bytes Ip = 8; - - enum EChunkType { - REGULAR = 0; - GROW = 1; - } - - optional EChunkType ChunkType = 9 [default = REGULAR]; - + repeated TKeyValue Items = 1; +} + +message TDataChunk { + message TSessionHeader { + optional string Server = 1; + optional string File = 2; + optional string Ident = 3; + optional string LogType = 4; + } + + optional TSessionHeader Meta = 124; + + // fileid in key + optional uint64 SeqNo = 1; + optional uint64 CreateTime = 3; // chunk creation time in ms + + optional bytes Ip = 8; + + enum EChunkType { + REGULAR = 0; + GROW = 1; + } + + optional EChunkType ChunkType = 9 [default = REGULAR]; + optional int64 Codec = 10; - + optional TMapType ExtraFields = 126; - - optional bytes Data = 127; // ~ 64K -} + + optional bytes Data = 127; // ~ 64K +} diff --git a/ydb/core/protos/grpc_status_proxy.proto b/ydb/core/protos/grpc_status_proxy.proto index cd9f31bf29..e1cc0dc7ee 100644 --- a/ydb/core/protos/grpc_status_proxy.proto +++ b/ydb/core/protos/grpc_status_proxy.proto @@ -1,10 +1,10 @@ -syntax = "proto3"; - -package NKikimrGRpcProxy; - -message TEvGetStatusRequest { -} - -message TEvGetStatusResponse { - uint64 Weight = 1; //value between 0 and 1000000; 100000 means not allowed -}
\ No newline at end of file +syntax = "proto3"; + +package NKikimrGRpcProxy; + +message TEvGetStatusRequest { +} + +message TEvGetStatusResponse { + uint64 Weight = 1; //value between 0 and 1000000; 100000 means not allowed +}
\ No newline at end of file diff --git a/ydb/core/protos/msgbus.proto b/ydb/core/protos/msgbus.proto index eca4cceaaa..df7cda5980 100644 --- a/ydb/core/protos/msgbus.proto +++ b/ydb/core/protos/msgbus.proto @@ -188,10 +188,10 @@ message TResponse { optional TFlatTxId FlatTxId = 700; // TPersQueueRequest - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 800; // TODO: rename to something more PQ-specific + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 800; // TODO: rename to something more PQ-specific optional TPersQueuePartitionResponse PartitionResponse = 801; optional TPersQueueMetaResponse MetaResponse = 802; - optional TPersQueueFetchResponse FetchResponse = 803; + optional TPersQueueFetchResponse FetchResponse = 803; // TKeyValueRequest optional uint64 Cookie = 900; @@ -238,11 +238,11 @@ message TResponse { optional NKikimrBlobStorage.TConfigResponse BlobStorageConfigResponse = 1030; optional TResolveNodeResponse ResolveNodeResponse = 1040; - - // TChooseProxyRequest - optional string ProxyName = 1050; - optional uint64 ProxyCookie = 1051; - + + // TChooseProxyRequest + optional string ProxyName = 1050; + optional uint64 ProxyCookie = 1051; + // TWhoAmI optional string UserName = 1100; repeated string Groups = 1101; @@ -498,12 +498,12 @@ message TJSON { optional TJsonSettings JsonSettings = 3; }; -message TChooseProxyRequest { +message TChooseProxyRequest { optional uint32 DataCenterNum = 1; optional string DataCenter = 3; - optional bool PreferLocalProxy = 2 [default = false]; -} - + optional bool PreferLocalProxy = 2 [default = false]; +} + message TWhoAmI { optional bool ReturnGroups = 1; optional string SecurityToken = 5; diff --git a/ydb/core/protos/msgbus_kv.proto b/ydb/core/protos/msgbus_kv.proto index ce830ee42c..87c65e5176 100644 --- a/ydb/core/protos/msgbus_kv.proto +++ b/ydb/core/protos/msgbus_kv.proto @@ -6,14 +6,14 @@ message TKeyValueRequest { enum EStorageChannel { MAIN = 0; EXTRA = 1; - EXTRA2 = 2; + EXTRA2 = 2; EXTRA3 = 3; EXTRA4 = 4; EXTRA5 = 5; - EXTRA6 = 6; - EXTRA7 = 7; - EXTRA8 = 8; - EXTRA9 = 9; + EXTRA6 = 6; + EXTRA7 = 7; + EXTRA8 = 8; + EXTRA9 = 9; INLINE = 65535; } diff --git a/ydb/core/protos/msgbus_pq.proto b/ydb/core/protos/msgbus_pq.proto index 2cba582a59..849b292383 100644 --- a/ydb/core/protos/msgbus_pq.proto +++ b/ydb/core/protos/msgbus_pq.proto @@ -1,95 +1,95 @@ import "ydb/core/protos/pqconfig.proto"; import "ydb/public/api/protos/draft/persqueue_error_codes.proto"; import "library/cpp/actors/protos/actors.proto"; - + package NKikimrClient; -option java_package = "ru.yandex.kikimr.proto"; - - -message TPersQueuePartitionRequest { - message TCmdRead { - optional string ClientId = 1; // mandatory - optional string SessionId = 2; // if not set, then read without session - optional int64 Offset = 3; // mandatory - optional int32 PartNo = 7 [default = 0]; //if not set then treat as 0 - optional int32 Count = 4; //optional - optional int32 Bytes = 5; //optional, max value = 25Mb - optional int32 TimeoutMs = 6; //ms, default = 1000 +option java_package = "ru.yandex.kikimr.proto"; + + +message TPersQueuePartitionRequest { + message TCmdRead { + optional string ClientId = 1; // mandatory + optional string SessionId = 2; // if not set, then read without session + optional int64 Offset = 3; // mandatory + optional int32 PartNo = 7 [default = 0]; //if not set then treat as 0 + optional int32 Count = 4; //optional + optional int32 Bytes = 5; //optional, max value = 25Mb + optional int32 TimeoutMs = 6; //ms, default = 1000 optional int32 MaxTimeLagMs = 8; // optional, default = infinity, why we use int instead of uint? - optional uint64 ReadTimestampMs = 9; //optional, default = 0 - - optional bool MirrorerRequest = 10 [default = false]; - - optional string ClientDC = 11; + optional uint64 ReadTimestampMs = 9; //optional, default = 0 + + optional bool MirrorerRequest = 10 [default = false]; + + optional string ClientDC = 11; optional string PartitionKey = 12; optional string ExplicitHash = 13; optional bool ExternalOperation = 14 [default = false]; - } - - message TCmdCreateSession { - optional string ClientId = 1; - optional string SessionId = 2; - optional uint64 Generation = 3; - optional uint64 Step = 4; - } - - message TCmdDeleteSession { - optional string ClientId = 1; // mandatory - optional string SessionId = 2; // optional - } - - message TCmdSetClientOffset { - optional string ClientId = 1; // mandatory - optional int64 Offset = 2; // mandatory - optional string SessionId = 4; // if not set, then no checks - - optional bool MirrorerRequest = 10 [default = false]; - } - - message TCmdGetClientOffset { - optional string ClientId = 1; // mandatory - } - - message TCmdWrite { - optional bytes SourceId = 1; //mandatory - optional int64 SeqNo = 2; //mandatory - optional bytes Data = 4; //mandatory - optional int32 PartNo = 5; //fill it for multi-part message - optional int32 TotalParts = 6; //fill it for first part of multi-part message - optional int32 TotalSize = 7; // fill it for first part of multi-part message - optional int64 CreateTimeMS = 8; //mandatory + } + + message TCmdCreateSession { + optional string ClientId = 1; + optional string SessionId = 2; + optional uint64 Generation = 3; + optional uint64 Step = 4; + } + + message TCmdDeleteSession { + optional string ClientId = 1; // mandatory + optional string SessionId = 2; // optional + } + + message TCmdSetClientOffset { + optional string ClientId = 1; // mandatory + optional int64 Offset = 2; // mandatory + optional string SessionId = 4; // if not set, then no checks + + optional bool MirrorerRequest = 10 [default = false]; + } + + message TCmdGetClientOffset { + optional string ClientId = 1; // mandatory + } + + message TCmdWrite { + optional bytes SourceId = 1; //mandatory + optional int64 SeqNo = 2; //mandatory + optional bytes Data = 4; //mandatory + optional int32 PartNo = 5; //fill it for multi-part message + optional int32 TotalParts = 6; //fill it for first part of multi-part message + optional int32 TotalSize = 7; // fill it for first part of multi-part message + optional int64 CreateTimeMS = 8; //mandatory optional bool DisableDeduplication = 9 [ default = false ]; - optional int64 WriteTimeMS = 10; //for mirroring only - optional int32 UncompressedSize = 12; //fill it for all parts - - optional string ClientDC = 11; + optional int64 WriteTimeMS = 10; //for mirroring only + optional int32 UncompressedSize = 12; //fill it for all parts + + optional string ClientDC = 11; optional string PartitionKey = 13; optional bytes ExplicitHash = 14; optional bool ExternalOperation = 15 [ default = false ]; - } - - message TCmdUpdateWriteTimestamp { - optional int64 WriteTimeMS = 1; //for mirroring only - } - - message TCmdGetMaxSeqNo { - repeated bytes SourceId = 2; //list of sourceIds to request - } - - message TCmdGetOwnership { // get write ownership for partition - optional string Owner = 1 [default = "default"]; - optional bool Force = 2 [ default = true]; - } - - message TCmdReserveBytes { - optional uint32 Size = 1; - optional bool LastRequest = 2; // append size to last request's reservation - } - + } + + message TCmdUpdateWriteTimestamp { + optional int64 WriteTimeMS = 1; //for mirroring only + } + + message TCmdGetMaxSeqNo { + repeated bytes SourceId = 2; //list of sourceIds to request + } + + message TCmdGetOwnership { // get write ownership for partition + optional string Owner = 1 [default = "default"]; + optional bool Force = 2 [ default = true]; + } + + message TCmdReserveBytes { + optional uint32 Size = 1; + optional bool LastRequest = 2; // append size to last request's reservation + } + message TCmdRegisterMessageGroup { // Id of message group (SourceId) optional bytes Id = 1; @@ -111,336 +111,336 @@ message TPersQueuePartitionRequest { repeated TCmdRegisterMessageGroup RegisterGroups = 2; } - optional string Topic = 1; //mandatory for request for partitions - optional int32 Partition = 2; //mandatory for request for partitions - optional string OwnerCookie = 3; //mandatory for write - optional int64 MessageNo = 12; //mandatory for write - optional int64 CmdWriteOffset = 13; //optional - - repeated TCmdWrite CmdWrite = 4; - optional TCmdGetMaxSeqNo CmdGetMaxSeqNo = 5; - optional TCmdDeleteSession CmdDeleteSession = 6; - optional TCmdCreateSession CmdCreateSession = 7; - optional TCmdRead CmdRead = 8; - optional TCmdSetClientOffset CmdSetClientOffset = 9; - optional TCmdGetClientOffset CmdGetClientOffset = 10; - optional TCmdGetOwnership CmdGetOwnership = 11; - optional TCmdReserveBytes CmdReserveBytes = 14; + optional string Topic = 1; //mandatory for request for partitions + optional int32 Partition = 2; //mandatory for request for partitions + optional string OwnerCookie = 3; //mandatory for write + optional int64 MessageNo = 12; //mandatory for write + optional int64 CmdWriteOffset = 13; //optional + + repeated TCmdWrite CmdWrite = 4; + optional TCmdGetMaxSeqNo CmdGetMaxSeqNo = 5; + optional TCmdDeleteSession CmdDeleteSession = 6; + optional TCmdCreateSession CmdCreateSession = 7; + optional TCmdRead CmdRead = 8; + optional TCmdSetClientOffset CmdSetClientOffset = 9; + optional TCmdGetClientOffset CmdGetClientOffset = 10; + optional TCmdGetOwnership CmdGetOwnership = 11; + optional TCmdReserveBytes CmdReserveBytes = 14; optional TCmdRegisterMessageGroup CmdRegisterMessageGroup = 20; optional TCmdDeregisterMessageGroup CmdDeregisterMessageGroup = 21; optional TCmdSplitMessageGroup CmdSplitMessageGroup = 22; - + optional NActorsProto.TActorId PipeClient = 15; - - optional uint64 Cookie = 16; //client cookie to be returned in response - - optional TCmdUpdateWriteTimestamp CmdUpdateWriteTimestamp = 17; - + + optional uint64 Cookie = 16; //client cookie to be returned in response + + optional TCmdUpdateWriteTimestamp CmdUpdateWriteTimestamp = 17; + optional bool IsDirectWrite = 18 [default = false]; optional uint64 PutUnitsSize = 19; -} - -message TPersQueueMetaRequest { - message TCmdCreateTopic { - optional string Topic = 1; //mandatory, topic name - optional int32 NumPartitions = 2; //mandatory, not zero - optional NKikimrPQ.TPQTabletConfig Config = 3; - optional int32 NumPartitionsPerTablet = 4 [default = 5]; - } - message TCmdChangeTopic { - optional string Topic = 1; //mandatory - optional int32 NumPartitions = 2; //mandatory, not zero - optional NKikimrPQ.TPQTabletConfig Config = 3; // if not set then config is not changed - } - - message TCmdDeleteTopic { +} + +message TPersQueueMetaRequest { + message TCmdCreateTopic { + optional string Topic = 1; //mandatory, topic name + optional int32 NumPartitions = 2; //mandatory, not zero + optional NKikimrPQ.TPQTabletConfig Config = 3; + optional int32 NumPartitionsPerTablet = 4 [default = 5]; + } + message TCmdChangeTopic { + optional string Topic = 1; //mandatory + optional int32 NumPartitions = 2; //mandatory, not zero + optional NKikimrPQ.TPQTabletConfig Config = 3; // if not set then config is not changed + } + + message TCmdDeleteTopic { optional string Topic = 1; //mandatory - } - - message TCmdGetTopicMetadata { - repeated string Topic = 1; //if not set - describe all topics - } - - message TTopicRequest { - optional string Topic = 1; //must be set - repeated int32 Partition = 2; // if empty - return info for all partitions - } - - message TCmdGetPartitionLocations { - repeated TTopicRequest TopicRequest = 1; //if not set - describe all topics - optional string Host = 3; // if set - filter answer for partitions only from this host - } - - message TCmdGetPartitionOffsets { - repeated TTopicRequest TopicRequest = 1; //if not set - describe all topics - optional string ClientId = 2; //if presented then client offset for this user is reported too - } - - message TCmdGetPartitionStatus { - repeated TTopicRequest TopicRequest = 1; //if not set - describe all topics - optional string ClientId = 2; //if not presended then sum of metrics for all clients - } - - message TCmdGetReadSessionsInfo { - optional string ClientId = 1; //manadatory - repeated string Topic = 2; //mandatory - } - - optional TCmdCreateTopic CmdCreateTopic = 1; - optional TCmdChangeTopic CmdChangeTopic = 2; - optional TCmdDeleteTopic CmdDeleteTopic = 3; - optional TCmdGetTopicMetadata CmdGetTopicMetadata = 4; - optional TCmdGetPartitionLocations CmdGetPartitionLocations = 5; - optional TCmdGetPartitionOffsets CmdGetPartitionOffsets = 6; - optional TCmdGetPartitionStatus CmdGetPartitionStatus = 7; - optional TCmdGetReadSessionsInfo CmdGetReadSessionsInfo = 8; -} - - -message TPersQueueFetchRequest { - message TPartitionInfo { - optional string Topic = 1; // must be set - optional int32 Partition = 2; // must be set - optional int64 Offset = 3; // must be set - optional int32 MaxBytes = 4; // must be set + } + + message TCmdGetTopicMetadata { + repeated string Topic = 1; //if not set - describe all topics + } + + message TTopicRequest { + optional string Topic = 1; //must be set + repeated int32 Partition = 2; // if empty - return info for all partitions + } + + message TCmdGetPartitionLocations { + repeated TTopicRequest TopicRequest = 1; //if not set - describe all topics + optional string Host = 3; // if set - filter answer for partitions only from this host + } + + message TCmdGetPartitionOffsets { + repeated TTopicRequest TopicRequest = 1; //if not set - describe all topics + optional string ClientId = 2; //if presented then client offset for this user is reported too + } + + message TCmdGetPartitionStatus { + repeated TTopicRequest TopicRequest = 1; //if not set - describe all topics + optional string ClientId = 2; //if not presended then sum of metrics for all clients + } + + message TCmdGetReadSessionsInfo { + optional string ClientId = 1; //manadatory + repeated string Topic = 2; //mandatory + } + + optional TCmdCreateTopic CmdCreateTopic = 1; + optional TCmdChangeTopic CmdChangeTopic = 2; + optional TCmdDeleteTopic CmdDeleteTopic = 3; + optional TCmdGetTopicMetadata CmdGetTopicMetadata = 4; + optional TCmdGetPartitionLocations CmdGetPartitionLocations = 5; + optional TCmdGetPartitionOffsets CmdGetPartitionOffsets = 6; + optional TCmdGetPartitionStatus CmdGetPartitionStatus = 7; + optional TCmdGetReadSessionsInfo CmdGetReadSessionsInfo = 8; +} + + +message TPersQueueFetchRequest { + message TPartitionInfo { + optional string Topic = 1; // must be set + optional int32 Partition = 2; // must be set + optional int64 Offset = 3; // must be set + optional int32 MaxBytes = 4; // must be set optional uint64 ReadTimestampMs = 5; //optional, default = 0 - } - repeated TPartitionInfo Partition = 1; - optional int32 TotalMaxBytes = 2; //must be set - optional int32 WaitMs = 3 [default = 0]; - optional string ClientId = 4; //must be set - - optional bool MirrorerRequest = 5 [default = false]; -} - - -message TPersQueueRequest { - optional TPersQueuePartitionRequest PartitionRequest = 1; - optional TPersQueueMetaRequest MetaRequest = 2; - optional TPersQueueFetchRequest FetchRequest = 3; - - //only one from data, meta or fetch request must be set. + } + repeated TPartitionInfo Partition = 1; + optional int32 TotalMaxBytes = 2; //must be set + optional int32 WaitMs = 3 [default = 0]; + optional string ClientId = 4; //must be set + + optional bool MirrorerRequest = 5 [default = false]; +} + + +message TPersQueueRequest { + optional TPersQueuePartitionRequest PartitionRequest = 1; + optional TPersQueueMetaRequest MetaRequest = 2; + optional TPersQueueFetchRequest FetchRequest = 3; + + //only one from data, meta or fetch request must be set. optional string SecurityToken = 5; optional string Ticket = 6; //if set, check for acl - - optional string RequestId = 100; //for logging -} - - -message TPersQueueMetaResponse { - message TCmdGetPartitionOffsetsResult { - message TTopicResult { - optional string Topic = 1; - repeated NKikimrPQ.TOffsetsResponse.TPartResult PartitionResult = 2; - - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 3; - optional string ErrorReason = 4; //filled if ErrorCode is not OK - // add read rule version here - } - repeated TTopicResult TopicResult = 1; - } - - message TCmdGetTopicMetadataResult { - message TTopicInfo { - optional string Topic = 1; //mandatory - optional int32 NumPartitions = 2; //mandatory - optional NKikimrPQ.TPQTabletConfig Config = 3; //mandatory - - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 4; - optional string ErrorReason = 5; //filled if ErrorCode is not OK - - } - repeated TTopicInfo TopicInfo = 2; - } - message TCmdGetPartitionLocationsResult { - message TTopicResult { - message TPartitionLocation { - optional int32 Partition = 1; // mandatory - optional string Host = 2; //mandatory - optional int32 HostId = 3; //mandatory, internal id of node - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 6; - optional string ErrorReason = 7; //filled if ErrorCode is not OK - } - - optional string Topic = 1; //mandatory - repeated TPartitionLocation PartitionLocation = 4; - - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 5; - optional string ErrorReason = 6; //filled if ErrorCode is not OK - - } - repeated TTopicResult TopicResult = 2; - } - - message TCmdGetPartitionStatusResult { - message TTopicResult { - optional string Topic = 1; //mandatory - repeated NKikimrPQ.TStatusResponse.TPartResult PartitionResult = 2; - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 3; - optional string ErrorReason = 4; //filled if ErrorCode is not OK - } - repeated TTopicResult TopicResult = 2; - } - - message TCmdGetReadSessionsInfoResult { - message TPartitionResult { - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 700; - optional string ErrorReason = 701; //filled if ErrorCode is not OK - - optional uint32 Partition = 1; - optional uint64 ClientOffset = 2; - optional uint64 StartOffset = 3; - optional uint64 EndOffset = 4; - optional uint64 MessageLag = 5; - optional uint64 TimeLag = 7; - optional string Session = 8; - optional string ClientNode = 9; - optional string ProxyNode = 10; - optional string TabletNode = 11; - optional string Timestamp = 12; - optional uint64 ClientReadOffset = 13; - optional uint64 ReadTimeLag = 14; + + optional string RequestId = 100; //for logging +} + + +message TPersQueueMetaResponse { + message TCmdGetPartitionOffsetsResult { + message TTopicResult { + optional string Topic = 1; + repeated NKikimrPQ.TOffsetsResponse.TPartResult PartitionResult = 2; + + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 3; + optional string ErrorReason = 4; //filled if ErrorCode is not OK + // add read rule version here + } + repeated TTopicResult TopicResult = 1; + } + + message TCmdGetTopicMetadataResult { + message TTopicInfo { + optional string Topic = 1; //mandatory + optional int32 NumPartitions = 2; //mandatory + optional NKikimrPQ.TPQTabletConfig Config = 3; //mandatory + + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 4; + optional string ErrorReason = 5; //filled if ErrorCode is not OK + + } + repeated TTopicInfo TopicInfo = 2; + } + message TCmdGetPartitionLocationsResult { + message TTopicResult { + message TPartitionLocation { + optional int32 Partition = 1; // mandatory + optional string Host = 2; //mandatory + optional int32 HostId = 3; //mandatory, internal id of node + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 6; + optional string ErrorReason = 7; //filled if ErrorCode is not OK + } + + optional string Topic = 1; //mandatory + repeated TPartitionLocation PartitionLocation = 4; + + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 5; + optional string ErrorReason = 6; //filled if ErrorCode is not OK + + } + repeated TTopicResult TopicResult = 2; + } + + message TCmdGetPartitionStatusResult { + message TTopicResult { + optional string Topic = 1; //mandatory + repeated NKikimrPQ.TStatusResponse.TPartResult PartitionResult = 2; + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 3; + optional string ErrorReason = 4; //filled if ErrorCode is not OK + } + repeated TTopicResult TopicResult = 2; + } + + message TCmdGetReadSessionsInfoResult { + message TPartitionResult { + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 700; + optional string ErrorReason = 701; //filled if ErrorCode is not OK + + optional uint32 Partition = 1; + optional uint64 ClientOffset = 2; + optional uint64 StartOffset = 3; + optional uint64 EndOffset = 4; + optional uint64 MessageLag = 5; + optional uint64 TimeLag = 7; + optional string Session = 8; + optional string ClientNode = 9; + optional string ProxyNode = 10; + optional string TabletNode = 11; + optional string Timestamp = 12; + optional uint64 ClientReadOffset = 13; + optional uint64 ReadTimeLag = 14; optional uint32 TabletNodeId = 15; - } - - message TTopicResult { - optional string Topic = 1; //mandatory - repeated TPartitionResult PartitionResult = 2; - - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 3; - optional string ErrorReason = 4; //filled if ErrorCode is not OK - - // add read rule version here - - } - repeated TTopicResult TopicResult = 2; - - message TSessionResult { - optional string Session = 1; - - message TPartitionResult { - optional string Topic = 1; - optional uint64 Partition = 2; - - repeated uint64 NextCommits = 16; - optional uint64 LastReadId = 17; - optional uint64 ReadIdCommitted = 18; - optional uint64 AssignId = 19; - - optional uint64 Timestamp = 20; - } - optional uint64 Timestamp = 5; - - optional string ClientNode = 6; - optional string ProxyNode = 7; - - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 8; - optional string ErrorReason = 9; //filled if ErrorCode is not OK - - repeated TPartitionResult PartitionResult = 10; - } - repeated TSessionResult SessionResult = 3; - } - - optional TCmdGetPartitionOffsetsResult CmdGetPartitionOffsetsResult = 1; - optional TCmdGetTopicMetadataResult CmdGetTopicMetadataResult = 2; - optional TCmdGetPartitionLocationsResult CmdGetPartitionLocationsResult = 3; - optional TCmdGetPartitionStatusResult CmdGetPartitionStatusResult = 4; - optional TCmdGetReadSessionsInfoResult CmdGetReadSessionsInfoResult = 5; - -} - - -message TCmdReadResult { - message TResult { - optional uint64 Offset = 1; - optional bytes Data = 2; - optional bytes SourceId = 3; - optional uint64 SeqNo = 4; - - optional uint32 PartNo = 5; //will be filled for multi-parted message - optional uint32 TotalParts = 6; //will be filled for multi-parted message - optional uint32 TotalSize = 7; //will be filled for PartNo=0 TotalParts > 1 - - optional uint64 WriteTimestampMS = 8; - optional uint64 CreateTimestampMS = 9; - optional uint32 UncompressedSize = 10; + } + + message TTopicResult { + optional string Topic = 1; //mandatory + repeated TPartitionResult PartitionResult = 2; + + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 3; + optional string ErrorReason = 4; //filled if ErrorCode is not OK + + // add read rule version here + + } + repeated TTopicResult TopicResult = 2; + + message TSessionResult { + optional string Session = 1; + + message TPartitionResult { + optional string Topic = 1; + optional uint64 Partition = 2; + + repeated uint64 NextCommits = 16; + optional uint64 LastReadId = 17; + optional uint64 ReadIdCommitted = 18; + optional uint64 AssignId = 19; + + optional uint64 Timestamp = 20; + } + optional uint64 Timestamp = 5; + + optional string ClientNode = 6; + optional string ProxyNode = 7; + + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 8; + optional string ErrorReason = 9; //filled if ErrorCode is not OK + + repeated TPartitionResult PartitionResult = 10; + } + repeated TSessionResult SessionResult = 3; + } + + optional TCmdGetPartitionOffsetsResult CmdGetPartitionOffsetsResult = 1; + optional TCmdGetTopicMetadataResult CmdGetTopicMetadataResult = 2; + optional TCmdGetPartitionLocationsResult CmdGetPartitionLocationsResult = 3; + optional TCmdGetPartitionStatusResult CmdGetPartitionStatusResult = 4; + optional TCmdGetReadSessionsInfoResult CmdGetReadSessionsInfoResult = 5; + +} + + +message TCmdReadResult { + message TResult { + optional uint64 Offset = 1; + optional bytes Data = 2; + optional bytes SourceId = 3; + optional uint64 SeqNo = 4; + + optional uint32 PartNo = 5; //will be filled for multi-parted message + optional uint32 TotalParts = 6; //will be filled for multi-parted message + optional uint32 TotalSize = 7; //will be filled for PartNo=0 TotalParts > 1 + + optional uint64 WriteTimestampMS = 8; + optional uint64 CreateTimestampMS = 9; + optional uint32 UncompressedSize = 10; optional string PartitionKey = 11; optional bytes ExplicitHash = 12; - } - optional uint64 MaxOffset = 2; - repeated TResult Result = 3; - optional uint32 BlobsFromDisk = 4; - optional uint32 BlobsFromCache = 5; - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 6; //filled for FetchRequest if there is partition error - optional string ErrorReason = 7; //filled for FetchRequest if there is partition error - optional uint64 BlobsCachedSize = 8; - optional uint64 SizeLag = 9; - optional uint64 RealReadOffset = 10; + } + optional uint64 MaxOffset = 2; + repeated TResult Result = 3; + optional uint32 BlobsFromDisk = 4; + optional uint32 BlobsFromCache = 5; + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 6; //filled for FetchRequest if there is partition error + optional string ErrorReason = 7; //filled for FetchRequest if there is partition error + optional uint64 BlobsCachedSize = 8; + optional uint64 SizeLag = 9; + optional uint64 RealReadOffset = 10; optional uint64 WaitQuotaTimeMs = 11; -} - - -message TPersQueueFetchResponse { - message TPartResult { - optional string Topic = 1; - optional int32 Partition = 2; - optional TCmdReadResult ReadResult = 3; - } - - repeated TPartResult PartResult = 1; -} - - -message TPersQueuePartitionResponse { - message TCmdWriteResult { - optional bool AlreadyWritten = 1; //true if record already presented in partition - optional bytes SourceId = 2; - optional int64 SeqNo = 3; - optional int64 Offset = 4; //offset of this record or last for this sourceId if AlreadyWritten is true - optional int32 PartNo = 5; - optional int64 WriteTimestampMS = 6; - optional int64 MaxSeqNo = 7; //last seqNo is set when AlreadyWritten is true - - //write stat - optional uint32 PartitionQuotedTimeMs = 8; +} + + +message TPersQueueFetchResponse { + message TPartResult { + optional string Topic = 1; + optional int32 Partition = 2; + optional TCmdReadResult ReadResult = 3; + } + + repeated TPartResult PartResult = 1; +} + + +message TPersQueuePartitionResponse { + message TCmdWriteResult { + optional bool AlreadyWritten = 1; //true if record already presented in partition + optional bytes SourceId = 2; + optional int64 SeqNo = 3; + optional int64 Offset = 4; //offset of this record or last for this sourceId if AlreadyWritten is true + optional int32 PartNo = 5; + optional int64 WriteTimestampMS = 6; + optional int64 MaxSeqNo = 7; //last seqNo is set when AlreadyWritten is true + + //write stat + optional uint32 PartitionQuotedTimeMs = 8; optional uint32 TopicQuotedTimeMs = 11; - optional uint32 TotalTimeInPartitionQueueMs = 9; - optional uint32 WriteTimeMs = 10; - } - - message TCmdGetMaxSeqNoResult { - message TSourceIdInfo { - optional bytes SourceId = 1; //mandatory - optional int64 SeqNo = 2; // will not be set if no records with such sourceId in partition - optional int64 Offset = 3; //the same - optional int64 WriteTimestampMS = 4; //the same + optional uint32 TotalTimeInPartitionQueueMs = 9; + optional uint32 WriteTimeMs = 10; + } + + message TCmdGetMaxSeqNoResult { + message TSourceIdInfo { + optional bytes SourceId = 1; //mandatory + optional int64 SeqNo = 2; // will not be set if no records with such sourceId in partition + optional int64 Offset = 3; //the same + optional int64 WriteTimestampMS = 4; //the same optional bool Explicit = 5; optional NKikimrPQ.TMessageGroupInfo.EState State = 6; - } - repeated TSourceIdInfo SourceIdInfo = 2; - } - - message TCmdGetClientOffsetResult { - optional uint64 Offset = 2; // will not be set if no client offset is known - optional uint64 EndOffset = 3; - optional uint64 WriteTimestampMS = 5; //kikimr Write Timestamp of record on Offset (next to be readed record); is not set if no such record exists (no lag) - optional uint64 CreateTimestampMS = 6; //create Timestamp of record on Offset (next to be readed record); is not set if no such record exists (no lag) - optional uint64 SizeLag = 7; - optional uint64 WriteTimestampEstimateMS = 8; - - } - - message TCmdGetOwnershipResult { - optional string OwnerCookie = 1; - } - - repeated TCmdWriteResult CmdWriteResult = 1; - optional TCmdGetMaxSeqNoResult CmdGetMaxSeqNoResult = 2; - optional TCmdReadResult CmdReadResult = 3; - optional TCmdGetClientOffsetResult CmdGetClientOffsetResult = 4; - optional TCmdGetOwnershipResult CmdGetOwnershipResult = 5; - - optional uint64 Cookie = 6; -} + } + repeated TSourceIdInfo SourceIdInfo = 2; + } + + message TCmdGetClientOffsetResult { + optional uint64 Offset = 2; // will not be set if no client offset is known + optional uint64 EndOffset = 3; + optional uint64 WriteTimestampMS = 5; //kikimr Write Timestamp of record on Offset (next to be readed record); is not set if no such record exists (no lag) + optional uint64 CreateTimestampMS = 6; //create Timestamp of record on Offset (next to be readed record); is not set if no such record exists (no lag) + optional uint64 SizeLag = 7; + optional uint64 WriteTimestampEstimateMS = 8; + + } + + message TCmdGetOwnershipResult { + optional string OwnerCookie = 1; + } + + repeated TCmdWriteResult CmdWriteResult = 1; + optional TCmdGetMaxSeqNoResult CmdGetMaxSeqNoResult = 2; + optional TCmdReadResult CmdReadResult = 3; + optional TCmdGetClientOffsetResult CmdGetClientOffsetResult = 4; + optional TCmdGetOwnershipResult CmdGetOwnershipResult = 5; + + optional uint64 Cookie = 6; +} diff --git a/ydb/core/protos/netclassifier.proto b/ydb/core/protos/netclassifier.proto index 65f2f5fabf..0c71eae7bf 100644 --- a/ydb/core/protos/netclassifier.proto +++ b/ydb/core/protos/netclassifier.proto @@ -3,15 +3,15 @@ package NKikimrNetClassifier; option java_package = "ru.yandex.kikimr.proto"; message TNetClassifierUpdaterConfig { - enum EFormat { - TSV = 0; - NETBOX = 1; - } - + enum EFormat { + TSV = 0; + NETBOX = 1; + } + optional string NetDataSourceUrl = 1; optional uint32 RetryIntervalSeconds = 2; optional uint32 NetDataUpdateIntervalSeconds = 3; - optional EFormat Format = 4 [default = TSV]; + optional EFormat Format = 4 [default = TSV]; repeated string NetBoxTags = 5; } diff --git a/ydb/core/protos/node_limits.proto b/ydb/core/protos/node_limits.proto index 0691e56f80..5aaf8c6fda 100644 --- a/ydb/core/protos/node_limits.proto +++ b/ydb/core/protos/node_limits.proto @@ -1,12 +1,12 @@ -package NKikimrNodeLimits; -option java_package = "ru.yandex.kikimr.proto"; - -message TNodeLimitsConfig { - message TPersQueueNodeConfig { - optional uint64 SharedCacheSizeMb = 1 [default = 8192]; - optional uint32 CacheKeepTimeSec = 2 [default = 10]; - } - - optional TPersQueueNodeConfig PersQueueNodeConfig = 1; -} - +package NKikimrNodeLimits; +option java_package = "ru.yandex.kikimr.proto"; + +message TNodeLimitsConfig { + message TPersQueueNodeConfig { + optional uint64 SharedCacheSizeMb = 1 [default = 8192]; + optional uint32 CacheKeepTimeSec = 2 [default = 10]; + } + + optional TPersQueueNodeConfig PersQueueNodeConfig = 1; +} + diff --git a/ydb/core/protos/pqconfig.proto b/ydb/core/protos/pqconfig.proto index c7a97924c0..7c85927449 100644 --- a/ydb/core/protos/pqconfig.proto +++ b/ydb/core/protos/pqconfig.proto @@ -8,54 +8,54 @@ import "ydb/core/protos/netclassifier.proto"; import "ydb/core/protos/services.proto"; import "library/cpp/actors/protos/actors.proto"; - -package NKikimrPQ; -option java_package = "ru.yandex.kikimr.proto"; - -message TPartitionMeta { - optional uint64 StartOffset = 1; - optional uint64 EndOffset = 2; -} - +package NKikimrPQ; +option java_package = "ru.yandex.kikimr.proto"; + + +message TPartitionMeta { + optional uint64 StartOffset = 1; + optional uint64 EndOffset = 2; +} + message TPQConfig { optional uint32 ACLRetryTimeoutSec = 1 [default = 300]; - optional uint32 BalancerMetadataRetryTimeoutSec = 2 [default = 240]; - optional uint32 MaxBlobsPerLevel = 3 [default = 64]; // will produce 8mb blobs at last level - //32 => 1mb blobs at last level - optional uint32 MaxBlobSize = 4 [default = 8388608]; //8mb + optional uint32 BalancerMetadataRetryTimeoutSec = 2 [default = 240]; + optional uint32 MaxBlobsPerLevel = 3 [default = 64]; // will produce 8mb blobs at last level + //32 => 1mb blobs at last level + optional uint32 MaxBlobSize = 4 [default = 8388608]; //8mb optional uint32 ClustersUpdateTimeoutSec = 5 [default = 30]; optional bool Enabled = 6 [default = false]; // Enable PQ proxies optional uint32 MetaCacheTimeoutSec = 7 [default = 30]; - optional uint32 MaxReadCookies = 8 [default = 100000]; - - optional bool CheckACL = 9 [default = false]; - + optional uint32 MaxReadCookies = 8 [default = 100000]; + + optional bool CheckACL = 9 [default = false]; + optional uint32 SourceIdCleanupPeriodSec = 10 [default = 60]; // 24 hours // TODO: What is '24 hours'? Default is 60 seconds. optional uint32 SourceIdMaxLifetimeSec = 11 [default = 1382400]; // 16 days - optional uint32 SourceIdTotalShardsCount = 12 [default = 131072]; - - optional NKikimrClient.TKeyValueRequest.ETactic Tactic = 13 [default = MAX_THROUGHPUT]; - - optional bool RequireCredentialsInNewProtocol = 14 [default = false]; + optional uint32 SourceIdTotalShardsCount = 12 [default = 131072]; + + optional NKikimrClient.TKeyValueRequest.ETactic Tactic = 13 [default = MAX_THROUGHPUT]; + + optional bool RequireCredentialsInNewProtocol = 14 [default = false]; optional string ClusterTablePath = 15 [default = "/Root/PQ/Config/V2/Cluster"]; optional string VersionTablePath = 16 [default = "/Root/PQ/Config/V2/Versions"]; optional uint32 ClustersUpdateTimeoutOnErrorSec = 17 [default = 1]; - optional uint32 WriteInitLatencyBigMs = 19 [default = 900]; - optional uint32 ReadInitLatencyBigMs = 20 [default = 900]; - optional uint32 CommitLatencyBigMs = 21 [default = 900]; - optional uint32 WriteLatencyBigMs = 22 [default = 550]; - optional uint32 ReadLatencyBigMs = 23 [default = 550]; - optional uint32 ReadLatencyFromDiskBigMs = 28 [default = 1000]; - - + optional uint32 WriteInitLatencyBigMs = 19 [default = 900]; + optional uint32 ReadInitLatencyBigMs = 20 [default = 900]; + optional uint32 CommitLatencyBigMs = 21 [default = 900]; + optional uint32 WriteLatencyBigMs = 22 [default = 550]; + optional uint32 ReadLatencyBigMs = 23 [default = 550]; + optional uint32 ReadLatencyFromDiskBigMs = 28 [default = 1000]; + + message TQuotingConfig { optional bool EnableQuoting = 1; optional string QuotersDirectoryPath = 2 [default = "/Root/PersQueue/System/Quoters"]; @@ -69,22 +69,22 @@ message TPQConfig { } optional bool EnableReadQuoting = 4 [default = false]; optional uint64 ReadCreditBytes = 5 [default = 100000]; - - optional uint64 QuotaWaitDurationMs = 6 [default = 0]; // 0 means infinity - - optional bool PartitionReadQuotaIsTwiceWriteQuota = 7 [default = false]; - + + optional uint64 QuotaWaitDurationMs = 6 [default = 0]; // 0 means infinity + + optional bool PartitionReadQuotaIsTwiceWriteQuota = 7 [default = false]; + } optional TQuotingConfig QuotingConfig = 18; - + // Time duration that we wait before we consider remote cluster enabled for load balancing purposes optional uint32 RemoteClusterEnabledDelaySec = 24 [default = 300]; // 5 minutes optional uint32 CloseClientSessionWithEnabledRemotePreferredClusterDelaySec = 25 [default = 300]; // 5 minutes - - optional bool RoundRobinPartitionMapping = 26 [default = true]; - - optional string Root = 27 [default = "/Root/PQ"]; - + + optional bool RoundRobinPartitionMapping = 26 [default = true]; + + optional string Root = 27 [default = "/Root/PQ"]; + message TPQLibSettings { optional uint32 ThreadsCount = 1 [default = 1]; optional uint32 CompressionPoolThreads = 2 [default = 1]; @@ -97,13 +97,13 @@ message TPQConfig { } optional TMirrorConfig MirrorConfig = 29; - - optional uint64 MinWriteLatencyMs = 30 [default = 0]; - - repeated TChannelProfile ChannelProfiles = 31; - + + optional uint64 MinWriteLatencyMs = 30 [default = 0]; + + repeated TChannelProfile ChannelProfiles = 31; + optional bool TopicsAreFirstClassCitizen = 32 [default = false]; - + optional string SourceIdTablePath = 33 [default = "/Root/PQ/SourceIdMeta2"]; repeated uint32 ValidWriteSpeedLimitsKbPerSec = 34; @@ -123,9 +123,9 @@ message TPQConfig { } optional TBillingMeteringConfig BillingMeteringConfig = 35; - - optional NKikimrNodeLimits.TNodeLimitsConfig.TPersQueueNodeConfig PersQueueNodeConfig = 36; - + + optional NKikimrNodeLimits.TNodeLimitsConfig.TPersQueueNodeConfig PersQueueNodeConfig = 36; + optional bool EnableProtoSourceIdInfo = 37 [default = false]; optional string Database = 38; @@ -133,7 +133,7 @@ message TPQConfig { message TClientServiceType { optional string Name = 1 [default = "data-transfer"]; optional string ReadPricingName = 2; - optional uint32 MaxReadRulesCountPerTopic = 3 [default = 0]; // 0 means no limit. If you want to allow only zero count, then do not specify ClientServiceType. + optional uint32 MaxReadRulesCountPerTopic = 3 [default = 0]; // 0 means no limit. If you want to allow only zero count, then do not specify ClientServiceType. } repeated TClientServiceType ClientServiceType = 39; @@ -177,39 +177,39 @@ message TMirrorPartitionConfig { optional bool SyncWriteTime = 10 [default = false]; } -message TPartitionConfig { +message TPartitionConfig { optional int32 MaxCountInPartition = 1 [default = 10000000]; - optional int64 MaxSizeInPartition = 2 [default = 1099511627776]; - optional int32 LifetimeSeconds = 3; //mandatory, must be set + optional int64 MaxSizeInPartition = 2 [default = 1099511627776]; + optional int32 LifetimeSeconds = 3; //mandatory, must be set // List of ClientIds, for which we don't delete data until they are read by these clients - repeated string ImportantClientId = 4; //can be empty - optional uint32 LowWatermark = 5 [default = 6291456]; //6Mb, compact blobs if they at least this big. + repeated string ImportantClientId = 4; //can be empty + optional uint32 LowWatermark = 5 [default = 6291456]; //6Mb, compact blobs if they at least this big. optional uint32 SourceIdLifetimeSeconds = 6 [ default = 1382400]; //16 days optional uint32 SourceIdMaxCounts = 31 [default = 6000000]; // Maximum number of stored sourceId records in partition // default - generate 5 new source id each second during 14 days - - optional uint64 WriteSpeedInBytesPerSecond = 7 [default = 50000000]; - optional uint64 BurstSize = 8 [default = 50000000]; - - message TReadQuota { - optional string ClientId = 1; - optional uint64 SpeedInBytesPerSecond = 2; - optional uint64 BurstSize = 3; - } - - repeated TReadQuota ReadQuota = 11; - optional uint64 MaxWriteInflightSize = 9 [default = 30000000]; //near 30mb - optional uint64 BorderWriteInflightSize = 12 [default = 10000000]; //near 10mb - - optional uint32 NumChannels = 10 [default = 10]; - - optional uint32 TotalPartitions = 13 [default = 1]; + + optional uint64 WriteSpeedInBytesPerSecond = 7 [default = 50000000]; + optional uint64 BurstSize = 8 [default = 50000000]; + + message TReadQuota { + optional string ClientId = 1; + optional uint64 SpeedInBytesPerSecond = 2; + optional uint64 BurstSize = 3; + } + + repeated TReadQuota ReadQuota = 11; + optional uint64 MaxWriteInflightSize = 9 [default = 30000000]; //near 30mb + optional uint64 BorderWriteInflightSize = 12 [default = 10000000]; //near 10mb + + optional uint32 NumChannels = 10 [default = 10]; + + optional uint32 TotalPartitions = 13 [default = 1]; repeated TChannelProfile ExplicitChannelProfiles = 14; optional TMirrorPartitionConfig MirrorFrom = 15; -}; - +}; + message TPartitionKeyRange { // Inclusive left border. Emptiness means -inf. optional bytes FromBound = 1; @@ -224,41 +224,41 @@ message TMessageGroup { optional TPartitionKeyRange KeyRange = 2; } -message TPQTabletConfig { - optional uint64 CacheSize = 1 [default = 104857600]; //100Mb, per tablet - optional TPartitionConfig PartitionConfig = 2; //mandatory +message TPQTabletConfig { + optional uint64 CacheSize = 1 [default = 104857600]; //100Mb, per tablet + optional TPartitionConfig PartitionConfig = 2; //mandatory repeated uint32 PartitionIds = 3; //will be filled by schemeshard, don't touch. Deprecated by Partitions (id: 31) optional string TopicName = 4; // also filled by schemeshard - optional uint32 Version = 5; //also filled by schemeshard - optional bool LocalDC = 6 [default = false]; + optional uint32 Version = 5; //also filled by schemeshard + optional bool LocalDC = 6 [default = false]; optional bool RequireAuthWrite = 7 [default = false]; optional bool RequireAuthRead = 8 [default = false]; optional string Producer = 9; optional string Ident = 10; optional string Topic = 11; optional string DC = 12; - - // ReadRules, ReadTopicTimestampMs, ReadRuleVersions, ConsumerFormatVersions and ConsumersCodecs form a consumer data array stored by columns - repeated string ReadRules = 13; - repeated uint64 ReadFromTimestampsMs = 14; - repeated uint64 ConsumerFormatVersions = 15; - message TCodecs { - repeated int64 Ids = 1; - repeated string Codecs = 2; - } - repeated TCodecs ConsumerCodecs = 16; + + // ReadRules, ReadTopicTimestampMs, ReadRuleVersions, ConsumerFormatVersions and ConsumersCodecs form a consumer data array stored by columns + repeated string ReadRules = 13; + repeated uint64 ReadFromTimestampsMs = 14; + repeated uint64 ConsumerFormatVersions = 15; + message TCodecs { + repeated int64 Ids = 1; + repeated string Codecs = 2; + } + repeated TCodecs ConsumerCodecs = 16; repeated string ReadRuleServiceTypes = 17; - - optional uint64 FormatVersion = 20; - optional TCodecs Codecs = 21; - - repeated uint64 ReadRuleVersions = 22; - repeated uint64 ReadRuleGenerations = 32; - + + optional uint64 FormatVersion = 20; + optional TCodecs Codecs = 21; + + repeated uint64 ReadRuleVersions = 22; + repeated uint64 ReadRuleGenerations = 32; + optional string TopicPath = 23; - - optional uint64 AbcId = 24; - optional string AbcSlug = 25; + + optional uint64 AbcId = 24; + optional string AbcSlug = 25; optional string YcCloudId = 26 [default = ""]; optional string YcFolderId = 27 [default = ""]; @@ -277,7 +277,7 @@ message TPQTabletConfig { } repeated TPartition Partitions = 31; // filled by schemeshard } - + message TMessageGroupInfo { enum EState { STATE_UNKNOWN = 0; @@ -293,80 +293,80 @@ message TMessageGroupInfo { optional TPartitionKeyRange KeyRange = 6; optional EState State = 7; } - + message TBootstrapConfig { repeated TMessageGroup ExplicitMessageGroups = 1; } -message TUpdateConfig { - optional uint64 TxId = 1; - optional TPQTabletConfig TabletConfig = 2; +message TUpdateConfig { + optional uint64 TxId = 1; + optional TPQTabletConfig TabletConfig = 2; optional TBootstrapConfig BootstrapConfig = 3; // passed only upon creation -} - -message TUpdateBalancerConfig { //for schemeshard use only - optional uint64 TxId = 1; - optional uint64 PathId = 2; - optional string TopicName = 3; - optional string Path = 4; - optional uint32 Version = 5; - - optional uint32 TotalGroupCount = 11; - optional uint32 NextPartitionId = 12; - - optional TPQTabletConfig TabletConfig = 7; - optional uint32 PartitionPerTablet = 8; +} + +message TUpdateBalancerConfig { //for schemeshard use only + optional uint64 TxId = 1; + optional uint64 PathId = 2; + optional string TopicName = 3; + optional string Path = 4; + optional uint32 Version = 5; + + optional uint32 TotalGroupCount = 11; + optional uint32 NextPartitionId = 12; + + optional TPQTabletConfig TabletConfig = 7; + optional uint32 PartitionPerTablet = 8; optional uint64 SchemeShardId = 9; - message TPartition { - optional uint32 Partition = 1; - optional uint64 TabletId = 2; - optional uint32 Group = 3; - } - - repeated TPartition Partitions = 6; - - message TTablet { - optional uint64 TabletId = 1; - optional uint64 Owner = 2; - optional uint64 Idx = 3; - } - - repeated TTablet Tablets = 10; -} - -message TDescribe { -} - -message TDescribeResponse { - optional string TopicName = 1; - optional uint32 Version = 2; - optional TPQTabletConfig Config = 3; - optional uint32 PartitionPerTablet = 4; + message TPartition { + optional uint32 Partition = 1; + optional uint64 TabletId = 2; + optional uint32 Group = 3; + } + + repeated TPartition Partitions = 6; + + message TTablet { + optional uint64 TabletId = 1; + optional uint64 Owner = 2; + optional uint64 Idx = 3; + } + + repeated TTablet Tablets = 10; +} + +message TDescribe { +} + +message TDescribeResponse { + optional string TopicName = 1; + optional uint32 Version = 2; + optional TPQTabletConfig Config = 3; + optional uint32 PartitionPerTablet = 4; optional uint64 SchemeShardId = 6; - optional uint64 BalancerTabletId = 7; - - optional bytes SecurityObject = 8; //NACLibProto.TSecurityObject - - message TPartition { - optional uint32 Partition = 1; - optional uint64 TabletId = 2; - } - - repeated TPartition Partitions = 5; -} - + optional uint64 BalancerTabletId = 7; + + optional bytes SecurityObject = 8; //NACLibProto.TSecurityObject + + message TPartition { + optional uint32 Partition = 1; + optional uint64 TabletId = 2; + } + + repeated TPartition Partitions = 5; +} + message TCheckACL { optional EOperation Operation = 2; optional NPersQueueCommon.Credentials Auth = 4; //leaved for compatibility - optional string User = 5; - optional bytes Token = 6; + optional string User = 5; + optional bytes Token = 6; } message TCheckACLResponse { optional EAccess Access = 1; optional string Topic = 2; optional string Path = 3; - + optional string Error = 4; } @@ -381,104 +381,104 @@ enum EAccess { UNKNOWN = 3; } -message TGetPartitionIdForWrite { -} - -message TGetPartitionIdForWriteResponse { - optional uint64 PartitionId = 1; -} - -message TRegisterReadSession { - optional string Session = 1; +message TGetPartitionIdForWrite { +} + +message TGetPartitionIdForWriteResponse { + optional uint64 PartitionId = 1; +} + +message TRegisterReadSession { + optional string Session = 1; optional NActorsProto.TActorId PipeClient = 2; - optional string ClientId = 3; - optional string ClientNode = 4; - - repeated uint32 Groups = 5; -} - -message TGetReadSessionsInfo { - optional string ClientId = 1; -} - - -message TReadSessionStatus { //request to PQ_READ_PROXY -} - - -message TReadSessionStatusResponse { - message TPartitionStatus { - repeated uint64 NextCommits = 1; - optional uint64 LastReadId = 2; - optional uint64 ReadIdCommitted = 3; - optional uint64 AssignId = 4; - optional string Topic = 5; - optional uint64 Partition = 7; - optional uint64 TimestampMs = 8; - } - - repeated TPartitionStatus Partition = 8; - - optional string Session = 4; - - optional uint64 Timestamp = 5; - - optional string ClientNode = 6; - optional uint32 ProxyNodeId = 7; - -} - - -message TReadSessionsInfoResponse { - message TPartitionInfo { - optional uint32 Partition = 1; - optional string ClientNode = 2; - optional uint32 ProxyNodeId = 3; - optional string Session = 4; - optional uint64 Timestamp = 5; - } - repeated TPartitionInfo PartitionInfo = 1; - optional uint64 TabletId = 2; - - message TReadSessionInfo { + optional string ClientId = 3; + optional string ClientNode = 4; + + repeated uint32 Groups = 5; +} + +message TGetReadSessionsInfo { + optional string ClientId = 1; +} + + +message TReadSessionStatus { //request to PQ_READ_PROXY +} + + +message TReadSessionStatusResponse { + message TPartitionStatus { + repeated uint64 NextCommits = 1; + optional uint64 LastReadId = 2; + optional uint64 ReadIdCommitted = 3; + optional uint64 AssignId = 4; + optional string Topic = 5; + optional uint64 Partition = 7; + optional uint64 TimestampMs = 8; + } + + repeated TPartitionStatus Partition = 8; + + optional string Session = 4; + + optional uint64 Timestamp = 5; + + optional string ClientNode = 6; + optional uint32 ProxyNodeId = 7; + +} + + +message TReadSessionsInfoResponse { + message TPartitionInfo { + optional uint32 Partition = 1; + optional string ClientNode = 2; + optional uint32 ProxyNodeId = 3; + optional string Session = 4; + optional uint64 Timestamp = 5; + } + repeated TPartitionInfo PartitionInfo = 1; + optional uint64 TabletId = 2; + + message TReadSessionInfo { optional NActorsProto.TActorId SessionActor = 1; - optional string Session = 2; - - } - repeated TReadSessionInfo ReadSessions = 3; -} - -message TLockPartition { - optional uint32 Partition = 1; - optional uint64 TabletId = 2; - optional string Topic = 3; - optional uint32 Generation = 4; - optional uint32 Step = 5; - optional string Session = 6; - optional string ClientId = 7; + optional string Session = 2; + + } + repeated TReadSessionInfo ReadSessions = 3; +} + +message TLockPartition { + optional uint32 Partition = 1; + optional uint64 TabletId = 2; + optional string Topic = 3; + optional uint32 Generation = 4; + optional uint32 Step = 5; + optional string Session = 6; + optional string ClientId = 7; optional NActorsProto.TActorId PipeClient = 8; optional string Path = 9; -} - -message TReleasePartition { - optional string Topic = 1; - optional uint64 Generation = 2; - optional string Session = 3; - optional string ClientId = 4; - optional uint32 Count = 5; +} + +message TReleasePartition { + optional string Topic = 1; + optional uint64 Generation = 2; + optional string Session = 3; + optional string ClientId = 4; + optional uint32 Count = 5; optional NActorsProto.TActorId PipeClient = 6; - optional uint32 Group = 7; + optional uint32 Group = 7; optional string Path = 8; -} - -message TPartitionReleased { - optional uint32 Partition = 1; - optional string Topic = 2; - optional string Session = 3; +} + +message TPartitionReleased { + optional uint32 Partition = 1; + optional string Topic = 2; + optional string Session = 3; optional NActorsProto.TActorId PipeClient = 4; - optional string ClientId = 5; -} - + optional string ClientId = 5; +} + enum ETabletState { ENormal = 0; EDropped = 1; @@ -500,160 +500,160 @@ message TTabletState { optional ETabletState State = 1; } -enum EStatus{ - OK = 0; - ERROR = 1; - ERROR_BAD_VERSION = 2; - ERROR_UPDATE_IN_PROGRESS = 3; -} - -message TUpdateConfigResponse { - optional uint64 TxId = 1; - optional uint64 Origin = 2; - optional EStatus Status = 3; -} - -message TOffsets { - optional string ClientId = 1; -} - -message TOffsetsResponse { - message TPartResult { - optional int32 Partition = 1; - optional int64 StartOffset = 2; - optional int64 EndOffset = 3; - optional int64 ClientOffset = 4; //Presented only if clientId is specified in request - optional int64 WriteTimestampMS = 5; //Presented only if clientId is set and Wtime is computed(can be not computed for some time after commit) - optional int64 CreateTimestampMS = 6; //the same - - optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 7; - optional string ErrorReason = 8; //filled if ErrorCode is not OK - - optional int64 ClientReadOffset = 9; //Presented only if clientId is specified in request - optional int64 ReadWriteTimestampMS = 10; //Presented only if clientId is set and Wtime is computed(can be not computed for some time after commit) - optional int64 ReadCreateTimestampMS = 11; //the same - - optional uint64 WriteTimestampEstimateMS = 12; - } - - optional uint64 TabletId = 1; - repeated TPartResult PartResult = 2; -} - -message TStatus { - optional string ClientId = 1; -} - -message TClientPosition { - optional int64 Offset = 1; - optional uint64 Size = 2; - optional uint64 WriteTimestamp = 3; - optional uint64 CreateTimestamp = 4; -} - -message TClientInfo { - optional string ClientId = 1; - optional TClientPosition WritePosition = 2; - optional TClientPosition ReadPosition = 3; - - optional uint64 WriteLagMs = 4; - optional uint64 ReadLagMs = 5; - optional uint64 LastReadTimestampMs = 8; - optional uint64 TotalLagMs = 9; -} - -message TStatusResponse { - enum EStatus { - STATUS_OK = 0; - STATUS_UNKNOWN = 1; - STATUS_INITIALIZING = 2; - STATUS_PARTITION_IS_FULL = 3; - STATUS_DISK_IS_FULL = 4; - } - +enum EStatus{ + OK = 0; + ERROR = 1; + ERROR_BAD_VERSION = 2; + ERROR_UPDATE_IN_PROGRESS = 3; +} + +message TUpdateConfigResponse { + optional uint64 TxId = 1; + optional uint64 Origin = 2; + optional EStatus Status = 3; +} + +message TOffsets { + optional string ClientId = 1; +} + +message TOffsetsResponse { + message TPartResult { + optional int32 Partition = 1; + optional int64 StartOffset = 2; + optional int64 EndOffset = 3; + optional int64 ClientOffset = 4; //Presented only if clientId is specified in request + optional int64 WriteTimestampMS = 5; //Presented only if clientId is set and Wtime is computed(can be not computed for some time after commit) + optional int64 CreateTimestampMS = 6; //the same + + optional NPersQueue.NErrorCode.EErrorCode ErrorCode = 7; + optional string ErrorReason = 8; //filled if ErrorCode is not OK + + optional int64 ClientReadOffset = 9; //Presented only if clientId is specified in request + optional int64 ReadWriteTimestampMS = 10; //Presented only if clientId is set and Wtime is computed(can be not computed for some time after commit) + optional int64 ReadCreateTimestampMS = 11; //the same + + optional uint64 WriteTimestampEstimateMS = 12; + } + + optional uint64 TabletId = 1; + repeated TPartResult PartResult = 2; +} + +message TStatus { + optional string ClientId = 1; +} + +message TClientPosition { + optional int64 Offset = 1; + optional uint64 Size = 2; + optional uint64 WriteTimestamp = 3; + optional uint64 CreateTimestamp = 4; +} + +message TClientInfo { + optional string ClientId = 1; + optional TClientPosition WritePosition = 2; + optional TClientPosition ReadPosition = 3; + + optional uint64 WriteLagMs = 4; + optional uint64 ReadLagMs = 5; + optional uint64 LastReadTimestampMs = 8; + optional uint64 TotalLagMs = 9; +} + +message TStatusResponse { + enum EStatus { + STATUS_OK = 0; + STATUS_UNKNOWN = 1; + STATUS_INITIALIZING = 2; + STATUS_PARTITION_IS_FULL = 3; + STATUS_DISK_IS_FULL = 4; + } + message TErrorMessage { optional uint64 Timestamp = 1; optional NKikimrServices.EServiceKikimr Service = 2; optional string Message = 3; } - message TPartResult { - optional int32 Partition = 1; // mandatory - optional EStatus Status = 2; - optional int32 LastInitDurationSeconds = 3; - optional int32 CreationTimestamp = 4; - optional int32 GapCount = 5; - optional int64 GapSize = 6; - optional int64 AvgWriteSpeedPerSec = 7; - optional int64 AvgWriteSpeedPerMin = 8; - optional int64 AvgWriteSpeedPerHour = 9; - optional int64 AvgWriteSpeedPerDay = 10; - - //if no clientId specified and request then sum there speed - optional int64 AvgReadSpeedPerSec = 11; - optional int64 AvgReadSpeedPerMin = 12; - optional int64 AvgReadSpeedPerHour = 13; - optional int64 AvgReadSpeedPerDay = 14; - - optional int64 ReadBytesQuota = 15; - optional int64 WriteBytesQuota = 16; - - optional int64 PartitionSize = 17; - - optional TClientInfo LagsInfo = 18; //just for one client! - - optional int64 StartOffset = 19; - optional int64 EndOffset = 20; - - optional int64 LastWriteTimestampMs = 21; - optional int64 WriteLagMs = 22; - - optional int64 AvgQuotaSpeedPerSec = 23; - optional int64 AvgQuotaSpeedPerMin = 24; - optional int64 AvgQuotaSpeedPerHour = 25; - optional int64 AvgQuotaSpeedPerDay = 26; - - optional int64 SourceIdCount = 27; - optional int64 SourceIdRetentionPeriodSec = 28; + message TPartResult { + optional int32 Partition = 1; // mandatory + optional EStatus Status = 2; + optional int32 LastInitDurationSeconds = 3; + optional int32 CreationTimestamp = 4; + optional int32 GapCount = 5; + optional int64 GapSize = 6; + optional int64 AvgWriteSpeedPerSec = 7; + optional int64 AvgWriteSpeedPerMin = 8; + optional int64 AvgWriteSpeedPerHour = 9; + optional int64 AvgWriteSpeedPerDay = 10; + + //if no clientId specified and request then sum there speed + optional int64 AvgReadSpeedPerSec = 11; + optional int64 AvgReadSpeedPerMin = 12; + optional int64 AvgReadSpeedPerHour = 13; + optional int64 AvgReadSpeedPerDay = 14; + + optional int64 ReadBytesQuota = 15; + optional int64 WriteBytesQuota = 16; + + optional int64 PartitionSize = 17; + + optional TClientInfo LagsInfo = 18; //just for one client! + + optional int64 StartOffset = 19; + optional int64 EndOffset = 20; + + optional int64 LastWriteTimestampMs = 21; + optional int64 WriteLagMs = 22; + + optional int64 AvgQuotaSpeedPerSec = 23; + optional int64 AvgQuotaSpeedPerMin = 24; + optional int64 AvgQuotaSpeedPerHour = 25; + optional int64 AvgQuotaSpeedPerDay = 26; + + optional int64 SourceIdCount = 27; + optional int64 SourceIdRetentionPeriodSec = 28; repeated TErrorMessage Errors = 29; - } - - optional uint64 TabletId = 1; - repeated TPartResult PartResult = 2; -} - -message THasDataInfo { - optional int32 Partition = 1; - optional uint64 Offset = 2; - optional uint64 Deadline = 3; //in ms + } + + optional uint64 TabletId = 1; + repeated TPartResult PartResult = 2; +} + +message THasDataInfo { + optional int32 Partition = 1; + optional uint64 Offset = 2; + optional uint64 Deadline = 3; //in ms optional NActorsProto.TActorId Sender = 4; - optional uint64 Cookie = 5; - - optional string ClientId = 6; -} - -message THasDataInfoResponse { //signal - optional uint64 EndOffset = 1; - optional uint64 Cookie = 2; - optional uint64 SizeLag = 3; - optional uint64 WriteTimestampEstimateMS = 4; -} - -message TBatchHeader { - optional uint64 Offset = 1; - optional uint32 PartNo = 2; - optional uint32 Count = 3; - optional uint32 InternalPartsCount = 4; - optional uint32 UnpackedSize = 5; - optional uint32 PayloadSize = 6; - enum EPayloadFormat { - EUncompressed = 0; - ECompressed = 1; - } - optional uint32 Format = 7; //for EPayloadFormat + optional uint64 Cookie = 5; + + optional string ClientId = 6; +} + +message THasDataInfoResponse { //signal + optional uint64 EndOffset = 1; + optional uint64 Cookie = 2; + optional uint64 SizeLag = 3; + optional uint64 WriteTimestampEstimateMS = 4; +} + +message TBatchHeader { + optional uint64 Offset = 1; + optional uint32 PartNo = 2; + optional uint32 Count = 3; + optional uint32 InternalPartsCount = 4; + optional uint32 UnpackedSize = 5; + optional uint32 PayloadSize = 6; + enum EPayloadFormat { + EUncompressed = 0; + ECompressed = 1; + } + optional uint32 Format = 7; //for EPayloadFormat optional bool HasKinesis = 8; -} +} message TUserInfo { optional uint64 Offset = 1; @@ -661,7 +661,7 @@ message TUserInfo { optional uint32 Step = 3; optional string Session = 4; optional uint64 OffsetRewindSum = 5; - optional uint64 ReadRuleGeneration = 6; + optional uint64 ReadRuleGeneration = 6; } message TPartitionClientInfo { diff --git a/ydb/core/protos/serverless_proxy_config.proto b/ydb/core/protos/serverless_proxy_config.proto index 3bdcd5706b..43ed3c331d 100644 --- a/ydb/core/protos/serverless_proxy_config.proto +++ b/ydb/core/protos/serverless_proxy_config.proto @@ -10,10 +10,10 @@ message TServerlessProxyConfig { } optional THttpServerConfig HttpServerConfig = 1; - - optional TGRpcConfig GRpcConfig = 9; - - repeated string YandexCloudServiceRegion = 2; + + optional TGRpcConfig GRpcConfig = 9; + + repeated string YandexCloudServiceRegion = 2; repeated TDatabaseEndpoints KnownEndpoints = 3; @@ -25,9 +25,9 @@ message TServerlessProxyConfig { optional string DatabaseServiceEndpoint = 7; optional bool TestMode = 8; - optional uint32 DatabaseListRefreshPeriodSeconds = 10 [default = 10]; - - optional TLogConfig LogConfig = 11; - + optional uint32 DatabaseListRefreshPeriodSeconds = 10 [default = 10]; + + optional TLogConfig LogConfig = 11; + } diff --git a/ydb/core/protos/services.proto b/ydb/core/protos/services.proto index 488a37017d..c17c8a7dc3 100644 --- a/ydb/core/protos/services.proto +++ b/ydb/core/protos/services.proto @@ -89,7 +89,7 @@ enum EServiceKikimr { TABLET_SAUSAGECACHE = 314; TABLET_FLATEX = 315; TABLET_FLATBOOT = 316; - TABLET_AGGREGATOR = 317; + TABLET_AGGREGATOR = 317; TABLET_OPS_HOST = 666; OPS_COMPACT = 667; OPS_BACKUP = 668; @@ -158,19 +158,19 @@ enum EServiceKikimr { KEYVALUE = 420; WILSON = 430; - - // PERSQUEUE section - PERSQUEUE = 440; + + // PERSQUEUE section + PERSQUEUE = 440; PQ_METACACHE = 441; - PQ_READ_PROXY = 442; - PQ_WRITE_PROXY = 443; + PQ_READ_PROXY = 442; + PQ_WRITE_PROXY = 443; PQ_MIRRORER = 446; PQ_READ_SPEED_LIMITER = 447; - PERSQUEUE_READ_BALANCER = 448; - - CHOOSE_PROXY = 444; + PERSQUEUE_READ_BALANCER = 448; + + CHOOSE_PROXY = 444; LB_CONFIG_MANAGER = 445; - + TOKEN_BUILDER = 450; TICKET_PARSER = 455; BLACKBOX_VALIDATOR = 460; @@ -190,7 +190,7 @@ enum EServiceKikimr { // HEALTH section HEALTH = 500; - MEMORY_PROFILER = 510; + MEMORY_PROFILER = 510; DATASHARD_BACKUP = 522; diff --git a/ydb/core/protos/subdomains.proto b/ydb/core/protos/subdomains.proto index 54c6deeebb..5d665ff381 100644 --- a/ydb/core/protos/subdomains.proto +++ b/ydb/core/protos/subdomains.proto @@ -71,9 +71,9 @@ message TDomainDescription { optional TDiskSpaceUsage DiskSpaceUsage = 11; // TODO: temp for SLYDB-95 optional TSchemeQuotas DeclaredSchemeQuotas = 12; - - optional uint64 PQPartitionsInside = 13; - optional uint64 PQPartitionsLimit = 14; + + optional uint64 PQPartitionsInside = 13; + optional uint64 PQPartitionsLimit = 14; optional Ydb.Cms.DatabaseQuotas DatabaseQuotas = 15; optional TDomainState DomainState = 16; diff --git a/ydb/core/protos/tablet_counters_aggregator.proto b/ydb/core/protos/tablet_counters_aggregator.proto index d60d442141..b19d5a89da 100644 --- a/ydb/core/protos/tablet_counters_aggregator.proto +++ b/ydb/core/protos/tablet_counters_aggregator.proto @@ -4,8 +4,8 @@ import "ydb/core/protos/tablet.proto"; package NKikimrTabletCountersAggregator; option java_package = "ru.yandex.kikimr.proto"; -option cc_enable_arenas = true; - +option cc_enable_arenas = true; + message TTabletCounters { repeated uint64 SimpleCounters = 1; repeated uint64 CumulativeCounters = 2; @@ -24,33 +24,33 @@ message TEvTabletCountersRequest { message TEvTabletCountersResponse { repeated TTabletCountersInfo CountersInfo = 1; } - -message TEvTabletLabeledCountersRequest { + +message TEvTabletLabeledCountersRequest { optional NKikimrTabletBase.TTabletTypes.EType TabletType = 1; - optional string Group = 2; + optional string Group = 2; optional uint64 LabeledCounterId = 3; // Version 1 optional uint32 Version = 4 [default = 1]; -} - -message TTabletLabeledCounter { - optional uint64 Value = 1; - optional uint64 Id = 2; +} + +message TTabletLabeledCounter { + optional uint64 Value = 1; + optional uint64 Id = 2; optional string Name = 3; // Version 1 - optional NKikimr.TLabeledCounterOptions.EAggregateFunc AggregateFunc = 4; - optional NKikimr.TLabeledCounterOptions.ECounterType Type = 5; + optional NKikimr.TLabeledCounterOptions.EAggregateFunc AggregateFunc = 4; + optional NKikimr.TLabeledCounterOptions.ECounterType Type = 5; optional uint32 NameId = 6; // Version 2 -} - -message TTabletLabeledCounters { - optional string Group = 1; +} + +message TTabletLabeledCounters { + optional string Group = 1; optional string GroupNames = 2; // Version 1 - repeated TTabletLabeledCounter LabeledCounter = 3; - - optional string Delimiter = 4; -} - -message TEvTabletLabeledCountersResponse { - repeated TTabletLabeledCounters LabeledCountersByGroup = 1; - repeated uint32 Nodes = 2; + repeated TTabletLabeledCounter LabeledCounter = 3; + + optional string Delimiter = 4; +} + +message TEvTabletLabeledCountersResponse { + repeated TTabletLabeledCounters LabeledCountersByGroup = 1; + repeated uint32 Nodes = 2; repeated string CounterNames = 3; // Version 2 -} +} diff --git a/ydb/core/protos/tablet_database.proto b/ydb/core/protos/tablet_database.proto index 289159fb91..a6658688a6 100644 --- a/ydb/core/protos/tablet_database.proto +++ b/ydb/core/protos/tablet_database.proto @@ -12,7 +12,7 @@ message TExecutorSettings { optional uint32 TabletChannelIndex = 4; optional bool KeepInMemory = 5; } - + message TCompactionPolicy { message TGenerationPolicy { optional uint32 GenerationId = 1; diff --git a/ydb/core/protos/ya.make b/ydb/core/protos/ya.make index 2f0cad1a61..70bb65514c 100644 --- a/ydb/core/protos/ya.make +++ b/ydb/core/protos/ya.make @@ -1,6 +1,6 @@ PROTO_LIBRARY() -GRPC() +GRPC() OWNER( fomichev @@ -45,7 +45,7 @@ SRCS( counters_kesus.proto counters_keyvalue.proto counters_olapshard.proto - counters_pq.proto + counters_pq.proto counters_replication.proto counters_schemeshard.proto counters_sequenceshard.proto @@ -77,10 +77,10 @@ SRCS( msgbus.proto msgbus_health.proto msgbus_kv.proto - msgbus_pq.proto + msgbus_pq.proto netclassifier.proto node_broker.proto - node_limits.proto + node_limits.proto profiler.proto query_stats.proto replication.proto @@ -118,12 +118,12 @@ SRCS( tx_scheme.proto tx_sequenceshard.proto pdiskfit.proto - pqconfig.proto + pqconfig.proto auth.proto key.proto - grpc.proto - grpc_pq_old.proto - grpc_status_proxy.proto + grpc.proto + grpc_pq_old.proto + grpc_status_proxy.proto ydb_result_set_old.proto ydb_table_impl.proto scheme_board.proto diff --git a/ydb/core/tablet/tablet_counters.cpp b/ydb/core/tablet/tablet_counters.cpp index a60bd67543..b2ef0d4ad7 100644 --- a/ydb/core/tablet/tablet_counters.cpp +++ b/ydb/core/tablet/tablet_counters.cpp @@ -143,109 +143,109 @@ void TTabletCountersBase::OutputProto(NKikimrTabletBase::TTabletCountersBase& op } } -//////////////////////////////////////////// -/// The TTabletLabeledCountersBase class -//////////////////////////////////////////// - -//////////////////////////////////////////// -// private -//////////////////////////////////////////// -TTabletLabeledCountersBase::TTabletLabeledCountersBase(const TTabletLabeledCountersBase& rp) - : TTabletLabeledCountersBase() -{ - *this = rp; -} - -//////////////////////////////////////////// -TTabletLabeledCountersBase& -TTabletLabeledCountersBase::operator = (const TTabletLabeledCountersBase& rp) { - if (&rp == this) - return *this; - - if (!HasCounters()) { - Counters.Reset(rp.Counters); - Ids.Reset(rp.Ids); - MetaInfo = rp.MetaInfo; - Types = rp.Types; - GroupNames = rp.GroupNames; - AggregateFunc = rp.AggregateFunc; - } else { - Counters.SetTo(rp.Counters); - Ids.SetTo(rp.Ids); - } - Group = rp.Group; - Drop = rp.Drop; - return *this; -} - +//////////////////////////////////////////// +/// The TTabletLabeledCountersBase class +//////////////////////////////////////////// + +//////////////////////////////////////////// +// private +//////////////////////////////////////////// +TTabletLabeledCountersBase::TTabletLabeledCountersBase(const TTabletLabeledCountersBase& rp) + : TTabletLabeledCountersBase() +{ + *this = rp; +} + +//////////////////////////////////////////// +TTabletLabeledCountersBase& +TTabletLabeledCountersBase::operator = (const TTabletLabeledCountersBase& rp) { + if (&rp == this) + return *this; + + if (!HasCounters()) { + Counters.Reset(rp.Counters); + Ids.Reset(rp.Ids); + MetaInfo = rp.MetaInfo; + Types = rp.Types; + GroupNames = rp.GroupNames; + AggregateFunc = rp.AggregateFunc; + } else { + Counters.SetTo(rp.Counters); + Ids.SetTo(rp.Ids); + } + Group = rp.Group; + Drop = rp.Drop; + return *this; +} + void TTabletLabeledCountersBase::OutputHtml(IOutputStream &os) const { HTML(os) { DIV_CLASS("row") { DIV_CLASS("col-md-12") {H3() {os << Group; }} - + } DIV_CLASS("row") { - for (ui32 i = 0, e = Counters.Size(); i < e; ++i) { + for (ui32 i = 0, e = Counters.Size(); i < e; ++i) { if (MetaInfo[i]) { DIV_CLASS("col-md-3") {Counters[i].OutputHtml(os, MetaInfo[i]);} DIV_CLASS("col-md-3") {Ids[i].OutputHtml(os, "id");} } - } + } + } + } +} + +void TTabletLabeledCountersBase::AggregateWith(const TTabletLabeledCountersBase& rp) { + if (!HasCounters()) { + *this = rp; + return; + } + if (rp.Counters.Size() != Counters.Size()) //do not merge different versions of counters; this can be on rolling update + return; + for (ui32 i = 0, e = Counters.Size(); i < e; ++i) { + if (AggregateFunc[i] != rp.AggregateFunc[i]) //do not merge different versions of counters + return; + switch (AggregateFunc[i]) { + case EAF_MIN: + if (Counters[i].Get() > rp.Counters[i].Get()) { + Counters[i].Set(rp.Counters[i].Get()); + Ids[i].Set(rp.Ids[i].Get()); + } + break; + case EAF_MAX: + if (Counters[i].Get() < rp.Counters[i].Get()) { + Counters[i].Set(rp.Counters[i].Get()); + Ids[i].Set(rp.Ids[i].Get()); + } + break; + case EAF_SUM: + Counters[i].Add(rp.GetCounters()[i].Get()); + Ids[i].Set(0); + break; + default: + Y_FAIL("unknown aggregate func"); } } -} - -void TTabletLabeledCountersBase::AggregateWith(const TTabletLabeledCountersBase& rp) { - if (!HasCounters()) { - *this = rp; - return; - } - if (rp.Counters.Size() != Counters.Size()) //do not merge different versions of counters; this can be on rolling update - return; - for (ui32 i = 0, e = Counters.Size(); i < e; ++i) { - if (AggregateFunc[i] != rp.AggregateFunc[i]) //do not merge different versions of counters - return; - switch (AggregateFunc[i]) { - case EAF_MIN: - if (Counters[i].Get() > rp.Counters[i].Get()) { - Counters[i].Set(rp.Counters[i].Get()); - Ids[i].Set(rp.Ids[i].Get()); - } - break; - case EAF_MAX: - if (Counters[i].Get() < rp.Counters[i].Get()) { - Counters[i].Set(rp.Counters[i].Get()); - Ids[i].Set(rp.Ids[i].Get()); - } - break; - case EAF_SUM: - Counters[i].Add(rp.GetCounters()[i].Get()); - Ids[i].Set(0); - break; - default: - Y_FAIL("unknown aggregate func"); - } - } - Drop = Drop || rp.Drop; -} - + Drop = Drop || rp.Drop; +} + IOutputStream& operator <<(IOutputStream& out, const TTabletLabeledCountersBase::EAggregateFunc& func) { - switch(func) { - case TTabletLabeledCountersBase::EAF_MIN: - out << "EAF_MIN"; - break; - case TTabletLabeledCountersBase::EAF_MAX: - out << "EAF_MAX"; - break; - case TTabletLabeledCountersBase::EAF_SUM: - out << "EAF_SUM"; - break; - default: - out << (ui32)func; - } - return out; -} - - + switch(func) { + case TTabletLabeledCountersBase::EAF_MIN: + out << "EAF_MIN"; + break; + case TTabletLabeledCountersBase::EAF_MAX: + out << "EAF_MAX"; + break; + case TTabletLabeledCountersBase::EAF_SUM: + out << "EAF_SUM"; + break; + default: + out << (ui32)func; + } + return out; +} + + } // end of NKikimr namespace diff --git a/ydb/core/tablet/tablet_counters.h b/ydb/core/tablet/tablet_counters.h index 86a9db2c3e..a8cc27e9ed 100644 --- a/ydb/core/tablet/tablet_counters.h +++ b/ydb/core/tablet/tablet_counters.h @@ -295,29 +295,29 @@ private: template <typename T> class TCountersArray : TNonCopyable { friend class TTabletCountersBase; - friend class TTabletLabeledCountersBase; + friend class TTabletLabeledCountersBase; public: typedef std::shared_ptr<T> TCountersHolder; // TCountersArray(ui32 countersQnt) : CountersQnt(countersQnt) - , CountersHolder(nullptr) + , CountersHolder(nullptr) , Counters(nullptr) { - if (CountersQnt) { + if (CountersQnt) { CountersHolder.reset(new T[CountersQnt](), &CheckedArrayDelete<T>); Counters = CountersHolder.get(); - } - } - - //not owning constructor - can refer to part of other counters - TCountersArray(TCountersHolder& countersHolder, T* counters, const ui32 countersQnt) - : CountersQnt(countersQnt) - , CountersHolder(countersHolder) - , Counters(counters) - { - } - + } + } + + //not owning constructor - can refer to part of other counters + TCountersArray(TCountersHolder& countersHolder, T* counters, const ui32 countersQnt) + : CountersQnt(countersQnt) + , CountersHolder(countersHolder) + , Counters(counters) + { + } + ~TCountersArray() { Counters = nullptr; @@ -352,10 +352,10 @@ private: Counters = nullptr; CountersQnt = rp.CountersQnt; - if (CountersQnt) { + if (CountersQnt) { CountersHolder.reset(new T[CountersQnt](), &CheckedArrayDelete<T>); Counters = CountersHolder.get(); - } + } for (ui32 i = 0, e = CountersQnt; i < e; ++i) { Counters[i].Initialize(rp.Counters[i]); @@ -389,7 +389,7 @@ private: // ui32 CountersQnt; - TCountersHolder CountersHolder; + TCountersHolder CountersHolder; T* Counters; }; @@ -422,24 +422,24 @@ public: , PercentileCountersMetaInfo(percentileCountersMetaInfo) {} - //Constructor only for access of other counters. Lifetime of class constructed this way must not exceed lifetime of existed one. - TTabletCountersBase(const ui32 simpleOffset, const ui32 cumulativeOffset, const ui32 percentileOffset, TTabletCountersBase* counters) - : SimpleCounters(counters->Simple().CountersHolder, counters->Simple().Counters + simpleOffset, - counters->Simple().Size() - simpleOffset) - , CumulativeCounters(counters->Cumulative().CountersHolder, counters->Cumulative().Counters + cumulativeOffset, - counters->Cumulative().Size() - cumulativeOffset) - , PercentileCounters(counters->Percentile().CountersHolder, counters->Percentile().Counters + percentileOffset, - counters->Percentile().Size() - percentileOffset) - , SimpleCountersMetaInfo(counters->SimpleCountersMetaInfo + simpleOffset) - , CumulativeCountersMetaInfo(counters->CumulativeCountersMetaInfo + cumulativeOffset) - , PercentileCountersMetaInfo(counters->PercentileCountersMetaInfo + percentileOffset) - - { - Y_VERIFY_DEBUG(counters->Simple().Size() > simpleOffset); - Y_VERIFY_DEBUG(counters->Cumulative().Size() > cumulativeOffset); - Y_VERIFY_DEBUG(counters->Percentile().Size() > percentileOffset); - } - + //Constructor only for access of other counters. Lifetime of class constructed this way must not exceed lifetime of existed one. + TTabletCountersBase(const ui32 simpleOffset, const ui32 cumulativeOffset, const ui32 percentileOffset, TTabletCountersBase* counters) + : SimpleCounters(counters->Simple().CountersHolder, counters->Simple().Counters + simpleOffset, + counters->Simple().Size() - simpleOffset) + , CumulativeCounters(counters->Cumulative().CountersHolder, counters->Cumulative().Counters + cumulativeOffset, + counters->Cumulative().Size() - cumulativeOffset) + , PercentileCounters(counters->Percentile().CountersHolder, counters->Percentile().Counters + percentileOffset, + counters->Percentile().Size() - percentileOffset) + , SimpleCountersMetaInfo(counters->SimpleCountersMetaInfo + simpleOffset) + , CumulativeCountersMetaInfo(counters->CumulativeCountersMetaInfo + cumulativeOffset) + , PercentileCountersMetaInfo(counters->PercentileCountersMetaInfo + percentileOffset) + + { + Y_VERIFY_DEBUG(counters->Simple().Size() > simpleOffset); + Y_VERIFY_DEBUG(counters->Cumulative().Size() > cumulativeOffset); + Y_VERIFY_DEBUG(counters->Percentile().Size() > percentileOffset); + } + virtual ~TTabletCountersBase() {} @@ -525,148 +525,148 @@ private: const char* const * PercentileCountersMetaInfo; }; - -//////////////////////////////////////////// -/// The TTabletLabeledCountersBase class -//////////////////////////////////////////// - -//labeled counters are aggregated by Label across all tablets -//Id - identificator of tablet or whatever you want -//You can have severel counters for different labels inside one tablet - -class TTabletLabeledCountersBase { -public: - // - enum EAggregateFunc { - EAF_MAX = 1, - EAF_MIN = 2, - EAF_SUM = 3 - }; - - TTabletLabeledCountersBase() - : Counters(0) - , Ids(0) - , MetaInfo(nullptr) - , Types(nullptr) - , AggregateFunc(nullptr) - , Group("") - , GroupNames(nullptr) - , Drop(false) - {} - - //metaInfo - counters names - //types - NKikimr::TLabeledCounterOptions::ECounterType - //aggrFuncs - EAggreagteFunc-s casted to ui8 - //group - '/' separated list of group-values (user1/topic1/...) - //groupNames - groups names (clientId,topic,...) - //id - id for this user counter groups (tabletID or whatever, if there is several concurrent labeledCounters-generators inside one tablet) - TTabletLabeledCountersBase(ui32 countersQnt, - const char* const * metaInfo, - const ui8* types, - const ui8* aggrFunc, + +//////////////////////////////////////////// +/// The TTabletLabeledCountersBase class +//////////////////////////////////////////// + +//labeled counters are aggregated by Label across all tablets +//Id - identificator of tablet or whatever you want +//You can have severel counters for different labels inside one tablet + +class TTabletLabeledCountersBase { +public: + // + enum EAggregateFunc { + EAF_MAX = 1, + EAF_MIN = 2, + EAF_SUM = 3 + }; + + TTabletLabeledCountersBase() + : Counters(0) + , Ids(0) + , MetaInfo(nullptr) + , Types(nullptr) + , AggregateFunc(nullptr) + , Group("") + , GroupNames(nullptr) + , Drop(false) + {} + + //metaInfo - counters names + //types - NKikimr::TLabeledCounterOptions::ECounterType + //aggrFuncs - EAggreagteFunc-s casted to ui8 + //group - '/' separated list of group-values (user1/topic1/...) + //groupNames - groups names (clientId,topic,...) + //id - id for this user counter groups (tabletID or whatever, if there is several concurrent labeledCounters-generators inside one tablet) + TTabletLabeledCountersBase(ui32 countersQnt, + const char* const * metaInfo, + const ui8* types, + const ui8* aggrFunc, const TString& group, const char* const * groupNames, const ui64 id) - : Counters(countersQnt) - , Ids(countersQnt) - , MetaInfo(metaInfo) - , Types(types) - , AggregateFunc(aggrFunc) - , Group(group) - , GroupNames(groupNames) - , Drop(false) - { - for (ui32 i = 0; i < countersQnt; ++i) - Ids[i].Set(id); - } - - virtual ~TTabletLabeledCountersBase() - {} - - bool HasCounters() const { - return (bool)Counters; - } - - // counters - TCountersArray<TTabletSimpleCounter>& GetCounters() { - return Counters; - } - - const TCountersArray<TTabletSimpleCounter>& GetCounters() const { - return Counters; - } - + : Counters(countersQnt) + , Ids(countersQnt) + , MetaInfo(metaInfo) + , Types(types) + , AggregateFunc(aggrFunc) + , Group(group) + , GroupNames(groupNames) + , Drop(false) + { + for (ui32 i = 0; i < countersQnt; ++i) + Ids[i].Set(id); + } + + virtual ~TTabletLabeledCountersBase() + {} + + bool HasCounters() const { + return (bool)Counters; + } + + // counters + TCountersArray<TTabletSimpleCounter>& GetCounters() { + return Counters; + } + + const TCountersArray<TTabletSimpleCounter>& GetCounters() const { + return Counters; + } + const TString& GetGroup() const { - return Group; - } - - - void SetGroup(const TString& group) { - Group = group; - } - - const TCountersArray<TTabletSimpleCounter>& GetIds() const { - return Ids; - } - - TCountersArray<TTabletSimpleCounter>& GetIds() { - return Ids; - } - - ui8 GetCounterType(ui32 index) const { - return Types[index]; - } - - const ui8* GetTypes() const { - return Types; - } - - const char * const * GetNames() const { - return MetaInfo; - } - - const ui8* GetAggrFuncs() const { - return AggregateFunc; - } - + return Group; + } + + + void SetGroup(const TString& group) { + Group = group; + } + + const TCountersArray<TTabletSimpleCounter>& GetIds() const { + return Ids; + } + + TCountersArray<TTabletSimpleCounter>& GetIds() { + return Ids; + } + + ui8 GetCounterType(ui32 index) const { + return Types[index]; + } + + const ui8* GetTypes() const { + return Types; + } + + const char * const * GetNames() const { + return MetaInfo; + } + + const ui8* GetAggrFuncs() const { + return AggregateFunc; + } + void OutputHtml(IOutputStream &os) const; - - // - const char* GetCounterName(ui32 index) const { - return MetaInfo[index]; - } - - const char* GetGroupName(ui32 index) const { - return GroupNames[index]; - } - - void SetDrop() { - Drop = true; - } - - bool GetDrop() const { - return Drop; - } - - //Counters will be filled with aggragated value by AggregateFunc, Ids will be filled with id from user counters with winning value - void AggregateWith(const TTabletLabeledCountersBase& rp); - - TTabletLabeledCountersBase(const TTabletLabeledCountersBase&); - TTabletLabeledCountersBase& operator = (const TTabletLabeledCountersBase&); - -private: - // - TCountersArray<TTabletSimpleCounter> Counters; - TCountersArray<TTabletSimpleCounter> Ids; - const char* const * MetaInfo; - const ui8* Types; - const ui8* AggregateFunc; + + // + const char* GetCounterName(ui32 index) const { + return MetaInfo[index]; + } + + const char* GetGroupName(ui32 index) const { + return GroupNames[index]; + } + + void SetDrop() { + Drop = true; + } + + bool GetDrop() const { + return Drop; + } + + //Counters will be filled with aggragated value by AggregateFunc, Ids will be filled with id from user counters with winning value + void AggregateWith(const TTabletLabeledCountersBase& rp); + + TTabletLabeledCountersBase(const TTabletLabeledCountersBase&); + TTabletLabeledCountersBase& operator = (const TTabletLabeledCountersBase&); + +private: + // + TCountersArray<TTabletSimpleCounter> Counters; + TCountersArray<TTabletSimpleCounter> Ids; + const char* const * MetaInfo; + const ui8* Types; + const ui8* AggregateFunc; TString Group; - const char* const * GroupNames; - bool Drop; -}; - - + const char* const * GroupNames; + bool Drop; +}; + + IOutputStream& operator <<(IOutputStream& out, const TTabletLabeledCountersBase::EAggregateFunc& func); - - + + } // end of NKikimr diff --git a/ydb/core/tablet/tablet_counters_aggregator.cpp b/ydb/core/tablet/tablet_counters_aggregator.cpp index eaa73cc965..1885b26e09 100644 --- a/ydb/core/tablet/tablet_counters_aggregator.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator.cpp @@ -20,7 +20,7 @@ #include <library/cpp/monlib/dynamic_counters/encode.h> #include <util/generic/xrange.h> -#include <util/string/vector.h> +#include <util/string/vector.h> #include <util/string/split.h> #ifdef _darwin_ @@ -55,9 +55,9 @@ bool IsHistogramAggregateSimpleName(TStringBuf name) { //////////////////////////////////////////// namespace { -const ui32 WAKEUP_TIMEOUT_SECONDS = 4; - - +const ui32 WAKEUP_TIMEOUT_SECONDS = 4; + + //////////////////////////////////////////// using TCountersVector = TVector<NMonitoring::TDynamicCounters::TCounterPtr>; @@ -146,34 +146,34 @@ public: void SetValue(ui64 tabletID, ui32 counterIndex, ui64 value, TTabletTypes::EType tabletType) { Y_VERIFY(counterIndex < CountersByTabletID.size(), "inconsistent counters for tablet type %s", TTabletTypes::TypeToStr(tabletType)); - auto it = CountersByTabletID[counterIndex].find(tabletID); - if (it != CountersByTabletID[counterIndex].end()) { - if (it->second != value) { - ChangedCounters[counterIndex] = true; - it->second = value; - } - } else { - CountersByTabletID[counterIndex].insert(std::make_pair(tabletID, value)); - ChangedCounters[counterIndex] = true; - } + auto it = CountersByTabletID[counterIndex].find(tabletID); + if (it != CountersByTabletID[counterIndex].end()) { + if (it->second != value) { + ChangedCounters[counterIndex] = true; + it->second = value; + } + } else { + CountersByTabletID[counterIndex].insert(std::make_pair(tabletID, value)); + ChangedCounters[counterIndex] = true; + } } void ForgetTablet(ui64 tabletId) { for (ui32 idx : xrange(CountersByTabletID.size())) { auto &counters = CountersByTabletID[idx]; counters.erase(tabletId); - ChangedCounters[idx] = true; + ChangedCounters[idx] = true; + } + } + + void RecalcAll() { + for (ui32 idx : xrange(CountersByTabletID.size())) { + if (ChangedCounters[idx]) + Recalc(idx); + ChangedCounters[idx] = false; } } - void RecalcAll() { - for (ui32 idx : xrange(CountersByTabletID.size())) { - if (ChangedCounters[idx]) - Recalc(idx); - ChangedCounters[idx] = false; - } - } - private: // NMonitoring::TDynamicCounterPtr CounterGroup; @@ -313,7 +313,7 @@ private: struct TTabletLabeledCountersResponseContext { NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse& Response; THashMap<TStringBuf, ui32> NamesToId; - + TTabletLabeledCountersResponseContext(NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse& response) : Response(response) {} @@ -330,70 +330,70 @@ struct TTabletLabeledCountersResponseContext { } }; -class TAggregatedLabeledCounters { -public: - // +class TAggregatedLabeledCounters { +public: + // TAggregatedLabeledCounters(ui32 count, const ui8* aggrFunc, const char * const * names, const ui8* types, const TString& groupNames) - : AggrFunc(aggrFunc) - , Names(names) - , GroupNames(groupNames) - , Types(types) - , AggrCounters(count, 0) - , Ids(count, 0) - , Changed(false) - , CountersByTabletID(count) - { - } - - void SetValue(ui64 tabletID, ui32 counterIndex, ui64 value, ui64 id) { - - CountersByTabletID[counterIndex][tabletID] = std::make_pair(value, id); - Changed = true; - } - - bool ForgetTablet(ui64 tabletId) { - for (ui32 idx : xrange(CountersByTabletID.size())) { - auto &counters = CountersByTabletID[idx]; - counters.erase(tabletId); - } - Changed = true; - return CountersByTabletID.size() == 0 || CountersByTabletID[0].size() == 0; - } - - ui32 Size() const { - return AggrCounters.size(); - } - - ui64 GetValue(ui32 index) const { - return AggrCounters[index]; - } - - ui64 GetId(ui32 index) const { - return Ids[index]; - } - + : AggrFunc(aggrFunc) + , Names(names) + , GroupNames(groupNames) + , Types(types) + , AggrCounters(count, 0) + , Ids(count, 0) + , Changed(false) + , CountersByTabletID(count) + { + } + + void SetValue(ui64 tabletID, ui32 counterIndex, ui64 value, ui64 id) { + + CountersByTabletID[counterIndex][tabletID] = std::make_pair(value, id); + Changed = true; + } + + bool ForgetTablet(ui64 tabletId) { + for (ui32 idx : xrange(CountersByTabletID.size())) { + auto &counters = CountersByTabletID[idx]; + counters.erase(tabletId); + } + Changed = true; + return CountersByTabletID.size() == 0 || CountersByTabletID[0].size() == 0; + } + + ui32 Size() const { + return AggrCounters.size(); + } + + ui64 GetValue(ui32 index) const { + return AggrCounters[index]; + } + + ui64 GetId(ui32 index) const { + return Ids[index]; + } + void FillGetRequestV1(NKikimrTabletCountersAggregator::TTabletLabeledCounters& labeledCounters, const TString& group, ui32 start, ui32 end) const { - if (Changed) { - for (ui32 idx : xrange(CountersByTabletID.size())) { - Recalc(idx); - } - Changed = false; - } - Y_VERIFY(end >= start); - Y_VERIFY(end <= Size()); - labeledCounters.SetGroupNames(GroupNames); - labeledCounters.SetGroup(group); - labeledCounters.SetDelimiter("/"); //TODO: change here to "|" - for (ui32 i = start; i < end; ++i) { - auto& labeledCounter = *labeledCounters.AddLabeledCounter(); - labeledCounter.SetValue(GetValue(i)); - labeledCounter.SetId(GetId(i)); - labeledCounter.SetName(Names[i]); - labeledCounter.SetAggregateFunc(NKikimr::TLabeledCounterOptions::EAggregateFunc(AggrFunc[i])); - labeledCounter.SetType(NKikimr::TLabeledCounterOptions::ECounterType(Types[i])); - } - } - + if (Changed) { + for (ui32 idx : xrange(CountersByTabletID.size())) { + Recalc(idx); + } + Changed = false; + } + Y_VERIFY(end >= start); + Y_VERIFY(end <= Size()); + labeledCounters.SetGroupNames(GroupNames); + labeledCounters.SetGroup(group); + labeledCounters.SetDelimiter("/"); //TODO: change here to "|" + for (ui32 i = start; i < end; ++i) { + auto& labeledCounter = *labeledCounters.AddLabeledCounter(); + labeledCounter.SetValue(GetValue(i)); + labeledCounter.SetId(GetId(i)); + labeledCounter.SetName(Names[i]); + labeledCounter.SetAggregateFunc(NKikimr::TLabeledCounterOptions::EAggregateFunc(AggrFunc[i])); + labeledCounter.SetType(NKikimr::TLabeledCounterOptions::ECounterType(Types[i])); + } + } + void FillGetRequestV2(TTabletLabeledCountersResponseContext& context, const TString& group) const { if (Changed) { for (ui32 idx : xrange(CountersByTabletID.size())) { @@ -403,7 +403,7 @@ public: } auto& labeledCounters = *context.Response.AddLabeledCountersByGroup(); labeledCounters.SetGroup(group); - labeledCounters.SetDelimiter("/"); //TODO: change here to "|" + labeledCounters.SetDelimiter("/"); //TODO: change here to "|" for (ui32 i = 0; i < Size(); ++i) { auto& labeledCounter = *labeledCounters.AddLabeledCounter(); labeledCounter.SetValue(GetValue(i)); @@ -412,58 +412,58 @@ public: labeledCounter.SetType(NKikimr::TLabeledCounterOptions::ECounterType(Types[i])); } } - -private: - // - NMonitoring::TDynamicCounterPtr CounterGroup; - const ui8* AggrFunc; - const char* const * Names; + +private: + // + NMonitoring::TDynamicCounterPtr CounterGroup; + const ui8* AggrFunc; + const char* const * Names; TString GroupNames; - const ui8* Types; - + const ui8* Types; + mutable TVector<ui64> AggrCounters; mutable TVector<ui64> Ids; - mutable bool Changed; - + mutable bool Changed; + using TCountersByTabletIDMap = THashMap<ui64, std::pair<ui64, ui64>>; //second pair is for counter and id TVector<TCountersByTabletIDMap> CountersByTabletID; - -private: - void Recalc(ui32 idx) const { - Y_VERIFY(idx < Ids.size()); - auto &counters = CountersByTabletID[idx]; - ui8 aggrFunc = AggrFunc[idx]; - std::pair<ui64, ui64> aggrVal{0,0}; - ui64 cntCount = counters.size(); - - Y_VERIFY(cntCount > 0); - if (aggrFunc == TTabletLabeledCountersBase::EAF_MIN) - aggrVal = counters.begin()->second; - - for (auto&& t : counters) { - const std::pair<ui64, ui64>& tValue = t.second; - switch (aggrFunc) { - case TTabletLabeledCountersBase::EAF_MIN: - aggrVal = Min(tValue, aggrVal); - break; - case TTabletLabeledCountersBase::EAF_MAX: - aggrVal = Max(tValue, aggrVal); - break; - case TTabletLabeledCountersBase::EAF_SUM: - aggrVal.first += tValue.first; - break; - default: - Y_FAIL("bad aggrFunc value"); - }; - } - AggrCounters[idx] = aggrVal.first; - Ids[idx] = aggrVal.second; - } -}; - - - + +private: + void Recalc(ui32 idx) const { + Y_VERIFY(idx < Ids.size()); + auto &counters = CountersByTabletID[idx]; + ui8 aggrFunc = AggrFunc[idx]; + std::pair<ui64, ui64> aggrVal{0,0}; + ui64 cntCount = counters.size(); + + Y_VERIFY(cntCount > 0); + if (aggrFunc == TTabletLabeledCountersBase::EAF_MIN) + aggrVal = counters.begin()->second; + + for (auto&& t : counters) { + const std::pair<ui64, ui64>& tValue = t.second; + switch (aggrFunc) { + case TTabletLabeledCountersBase::EAF_MIN: + aggrVal = Min(tValue, aggrVal); + break; + case TTabletLabeledCountersBase::EAF_MAX: + aggrVal = Max(tValue, aggrVal); + break; + case TTabletLabeledCountersBase::EAF_SUM: + aggrVal.first += tValue.first; + break; + default: + Y_FAIL("bad aggrFunc value"); + }; + } + AggrCounters[idx] = aggrVal.first; + Ids[idx] = aggrVal.second; + } +}; + + + } //////////////////////////////////////////// @@ -517,42 +517,42 @@ public: } void ApplyLabeledCounters(ui64 tabletID, TTabletTypes::EType tabletType, const TTabletLabeledCountersBase* labeledCounters) { - - auto iterTabletType = LabeledCountersByTabletTypeAndGroup.find(std::make_pair(tabletType, labeledCounters->GetGroup())); - - if (labeledCounters->GetDrop() ) { - if (iterTabletType != LabeledCountersByTabletTypeAndGroup.end()) { - LabeledCountersByTabletTypeAndGroup.erase(iterTabletType); - } - return; - } - - - if (iterTabletType == LabeledCountersByTabletTypeAndGroup.end()) { + + auto iterTabletType = LabeledCountersByTabletTypeAndGroup.find(std::make_pair(tabletType, labeledCounters->GetGroup())); + + if (labeledCounters->GetDrop() ) { + if (iterTabletType != LabeledCountersByTabletTypeAndGroup.end()) { + LabeledCountersByTabletTypeAndGroup.erase(iterTabletType); + } + return; + } + + + if (iterTabletType == LabeledCountersByTabletTypeAndGroup.end()) { TString tabletTypeStr = TTabletTypes::TypeToStr(tabletType); TString groupNames; TVector<TString> rr; StringSplitter(labeledCounters->GetGroup()).Split('/').SkipEmpty().Collect(&rr); // TODO: change here to "|" - for (ui32 i = 0; i < rr.size(); ++i) { - if (i > 0) - groupNames += '/'; - groupNames += labeledCounters->GetGroupName(i); - } + for (ui32 i = 0; i < rr.size(); ++i) { + if (i > 0) + groupNames += '/'; + groupNames += labeledCounters->GetGroupName(i); + } iterTabletType = LabeledCountersByTabletTypeAndGroup.emplace( - std::make_pair(tabletType, labeledCounters->GetGroup()), - new TAggregatedLabeledCounters(labeledCounters->GetCounters().Size(), labeledCounters->GetAggrFuncs(), - labeledCounters->GetNames(), labeledCounters->GetTypes(), groupNames) + std::make_pair(tabletType, labeledCounters->GetGroup()), + new TAggregatedLabeledCounters(labeledCounters->GetCounters().Size(), labeledCounters->GetAggrFuncs(), + labeledCounters->GetNames(), labeledCounters->GetTypes(), groupNames) ).first; - - } - - for (ui32 i = 0, e = labeledCounters->GetCounters().Size(); i < e; ++i) { - const ui64& value = labeledCounters->GetCounters()[i].Get(); - const ui64& id = labeledCounters->GetIds()[i].Get(); - iterTabletType->second->SetValue(tabletID, i, value, id); - } - } - + + } + + for (ui32 i = 0, e = labeledCounters->GetCounters().Size(); i < e; ++i) { + const ui64& value = labeledCounters->GetCounters()[i].Get(); + const ui64& id = labeledCounters->GetIds()[i].Get(); + iterTabletType->second->SetValue(tabletID, i, value, id); + } + } + void ForgetTablet(ui64 tabletID, TTabletTypes::EType tabletType, TPathId tenantPathId) { AllTypes.Forget(tabletID); // and now erase from every other path @@ -564,18 +564,18 @@ public: if (auto itPath = CountersByPathId.find(tenantPathId); itPath != CountersByPathId.end()) { itPath->second->Forget(tabletID, tabletType); } - //and from all labeledCounters that could have this tablet + //and from all labeledCounters that could have this tablet auto iterTabletTypeAndGroup = LabeledCountersByTabletTypeAndGroup.lower_bound(std::make_pair(tabletType, TString())); - for (; iterTabletTypeAndGroup != LabeledCountersByTabletTypeAndGroup.end() && iterTabletTypeAndGroup->first.first == tabletType; ) { - bool empty = iterTabletTypeAndGroup->second->ForgetTablet(tabletID); - if (empty) { - iterTabletTypeAndGroup = LabeledCountersByTabletTypeAndGroup.erase(iterTabletTypeAndGroup); - } else { - ++iterTabletTypeAndGroup; - } - } - - QuietTabletCounters.erase(tabletID); + for (; iterTabletTypeAndGroup != LabeledCountersByTabletTypeAndGroup.end() && iterTabletTypeAndGroup->first.first == tabletType; ) { + bool empty = iterTabletTypeAndGroup->second->ForgetTablet(tabletID); + if (empty) { + iterTabletTypeAndGroup = LabeledCountersByTabletTypeAndGroup.erase(iterTabletTypeAndGroup); + } else { + ++iterTabletTypeAndGroup; + } + } + + QuietTabletCounters.erase(tabletID); TString tabletIdStr = Sprintf("%" PRIu64, tabletID); Counters->RemoveSubgroup("tabletid", tabletIdStr.data()); @@ -643,19 +643,19 @@ public: } } - void QueryLabeledCounters(const NKikimrTabletCountersAggregator::TEvTabletLabeledCountersRequest& request, NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse& response, const TActorContext& ctx) { - - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "got request v" << request.GetVersion()); - + void QueryLabeledCounters(const NKikimrTabletCountersAggregator::TEvTabletLabeledCountersRequest& request, NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse& response, const TActorContext& ctx) { + + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "got request v" << request.GetVersion()); + TString group = request.HasGroup() ? request.GetGroup() : ""; TTabletTypes::EType tabletType = request.GetTabletType(); - ui32 cc = 0; - + ui32 cc = 0; + if (request.GetVersion() == 1) { auto iter = LabeledCountersByTabletTypeAndGroup.lower_bound(std::make_pair(tabletType, group)); for (; iter != LabeledCountersByTabletTypeAndGroup.end() && iter->first.first == tabletType && (group.empty() || iter->first.second == group); ++iter) { - + ui32 s = 0, e = iter->second->Size(); if (request.HasLabeledCounterId()) { s = request.GetLabeledCounterId(); @@ -667,8 +667,8 @@ public: iter->second->FillGetRequestV1(labeledCountersByGroup, iter->first.second, s, e); ++cc; - } - } else if (request.GetVersion() >= 2) { + } + } else if (request.GetVersion() >= 2) { TTabletLabeledCountersResponseContext context(response); auto iter = LabeledCountersByTabletTypeAndGroup.lower_bound({tabletType, TString()}); for (; iter != LabeledCountersByTabletTypeAndGroup.end() @@ -678,22 +678,22 @@ public: } ++cc; } - } - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "request processed, " << cc << " groups processed"); - } - - void RecalcAll() { - AllTypes.RecalcAll(); - for (auto& c : CountersByTabletType) { - c.second->RecalcAll(); - } + } + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "request processed, " << cc << " groups processed"); + } + + void RecalcAll() { + AllTypes.RecalcAll(); + for (auto& c : CountersByTabletType) { + c.second->RecalcAll(); + } if (YdbCounters) { YdbCounters->Initialize(Counters, CountersByTabletType); YdbCounters->Transform(); } - } - + } + void RemoveTabletsByPathId(TPathId pathId) { CountersByPathId.erase(pathId); } @@ -743,15 +743,15 @@ private: } } - void RecalcAll() { + void RecalcAll() { if (TabletExecutorCounters.IsInitialized) { TabletExecutorCounters.RecalcAll(); } if (TabletAppCounters.IsInitialized) { TabletAppCounters.RecalcAll(); } - } - + } + // db counters bool IsInitialized() const { @@ -928,7 +928,7 @@ private: const ui64 value = counters->Simple()[i].Get(); if (DoAggregateSimpleCounters) { AggregatedSimpleCounters.SetValue(tabletID, offset, value, tabletType); - } else { + } else { Y_VERIFY(offset < SimpleCounters.size(), "inconsistent counters for tablet type %s", TTabletTypes::TypeToStr(tabletType)); *SimpleCounters[offset] = value; } @@ -1015,16 +1015,16 @@ private: x = 0; } } - - void RecalcAll() { + + void RecalcAll() { if (DoAggregateSimpleCounters) { - AggregatedSimpleCounters.RecalcAll(); + AggregatedSimpleCounters.RecalcAll(); } if (DoAggregateCumulativeCounters) { AggregatedCumulativeCounters.RecalcAll(); } - } - + } + template <bool IsSaving> void Convert(NKikimrSysView::TDbCounters& sumCounters, NKikimrSysView::TDbCounters& maxCounters) @@ -1212,13 +1212,13 @@ private: TCounterPtr DatashardSizeBytes; TCounterPtr ResourcesStorageUsedBytes; TCounterPtr ResourcesStorageLimitBytes; - TCounterPtr ResourcesStreamUsedShards; - TCounterPtr ResourcesStreamLimitShards; - //TCounterPtr ResourcesStreamUsedShardsPercents; - TCounterPtr ResourcesStreamReservedThroughput; - TCounterPtr ResourcesStreamReservedStorage; - TCounterPtr ResourcesStreamReservedStorageLimit; - + TCounterPtr ResourcesStreamUsedShards; + TCounterPtr ResourcesStreamLimitShards; + //TCounterPtr ResourcesStreamUsedShardsPercents; + TCounterPtr ResourcesStreamReservedThroughput; + TCounterPtr ResourcesStreamReservedStorage; + TCounterPtr ResourcesStreamReservedStorageLimit; + THistogramPtr ShardCpuUtilization; TCounterPtr RowUpdates; @@ -1240,13 +1240,13 @@ private: TCounterPtr DiskSpaceTablesTotalBytes; TCounterPtr DiskSpaceSoftQuotaBytes; - TCounterPtr StreamShardsCount; - TCounterPtr StreamShardsQuota; - TCounterPtr StreamReservedThroughput; - TCounterPtr StreamReservedStorage; - TCounterPtr StreamReservedStorageLimit; - - + TCounterPtr StreamShardsCount; + TCounterPtr StreamShardsQuota; + TCounterPtr StreamReservedThroughput; + TCounterPtr StreamReservedStorage; + TCounterPtr StreamReservedStorageLimit; + + public: explicit TYdbTabletCounters(const NMonitoring::TDynamicCounterPtr& ydbGroup) { WriteRowCount = ydbGroup->GetNamedCounter("name", @@ -1280,23 +1280,23 @@ private: ResourcesStorageLimitBytes = ydbGroup->GetNamedCounter("name", "resources.storage.limit_bytes", false); - ResourcesStreamUsedShards = ydbGroup->GetNamedCounter("name", - "resources.stream.used_shards", false); - ResourcesStreamLimitShards = ydbGroup->GetNamedCounter("name", - "resources.stream.limit_shards", false); - - //ResourcesStreamUsedShardsPercents = ydbGroup->GetNamedCounter("name", - // "resources.stream.used_shards_percents", false); - - ResourcesStreamReservedThroughput = ydbGroup->GetNamedCounter("name", - "resources.stream.throughput.limit_bytes_per_second", false); - - ResourcesStreamReservedStorage = ydbGroup->GetNamedCounter("name", - "resources.stream.storage.reserved_bytes", false); - - ResourcesStreamReservedStorageLimit = ydbGroup->GetNamedCounter("name", - "resources.stream.storage.limit_bytes", false); - + ResourcesStreamUsedShards = ydbGroup->GetNamedCounter("name", + "resources.stream.used_shards", false); + ResourcesStreamLimitShards = ydbGroup->GetNamedCounter("name", + "resources.stream.limit_shards", false); + + //ResourcesStreamUsedShardsPercents = ydbGroup->GetNamedCounter("name", + // "resources.stream.used_shards_percents", false); + + ResourcesStreamReservedThroughput = ydbGroup->GetNamedCounter("name", + "resources.stream.throughput.limit_bytes_per_second", false); + + ResourcesStreamReservedStorage = ydbGroup->GetNamedCounter("name", + "resources.stream.storage.reserved_bytes", false); + + ResourcesStreamReservedStorageLimit = ydbGroup->GetNamedCounter("name", + "resources.stream.storage.limit_bytes", false); + ShardCpuUtilization = ydbGroup->GetNamedHistogram("name", "table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false); }; @@ -1341,13 +1341,13 @@ private: DiskSpaceTablesTotalBytes = appGroup->GetCounter("SUM(SchemeShard/DiskSpaceTablesTotalBytes)"); DiskSpaceSoftQuotaBytes = appGroup->GetCounter("SUM(SchemeShard/DiskSpaceSoftQuotaBytes)"); - - StreamShardsCount = appGroup->GetCounter("SUM(SchemeShard/StreamShardsCount)"); - StreamShardsQuota = appGroup->GetCounter("SUM(SchemeShard/StreamShardsQuota)"); - StreamReservedThroughput = appGroup->GetCounter("SUM(SchemeShard/StreamReservedThroughput)"); - StreamReservedStorage = appGroup->GetCounter("SUM(SchemeShard/StreamReservedStorage)"); - StreamReservedStorageLimit = appGroup->GetCounter("SUM(SchemeShard/StreamReservedStorageQuota)"); - + + StreamShardsCount = appGroup->GetCounter("SUM(SchemeShard/StreamShardsCount)"); + StreamShardsQuota = appGroup->GetCounter("SUM(SchemeShard/StreamShardsQuota)"); + StreamReservedThroughput = appGroup->GetCounter("SUM(SchemeShard/StreamReservedThroughput)"); + StreamReservedStorage = appGroup->GetCounter("SUM(SchemeShard/StreamReservedStorage)"); + StreamReservedStorageLimit = appGroup->GetCounter("SUM(SchemeShard/StreamReservedStorageQuota)"); + } } @@ -1374,19 +1374,19 @@ private: if (DiskSpaceTablesTotalBytes) { ResourcesStorageUsedBytes->Set(DiskSpaceTablesTotalBytes->Val()); ResourcesStorageLimitBytes->Set(DiskSpaceSoftQuotaBytes->Val()); - - auto quota = StreamShardsQuota->Val(); - ResourcesStreamUsedShards->Set(StreamShardsCount->Val()); - ResourcesStreamLimitShards->Set(quota); - /*if (quota > 0) { - ResourcesStreamUsedShardsPercents->Set(StreamShardsCount->Val() * 100.0 / (quota + 0.0)); - } else { - ResourcesStreamUsedShardsPercents->Set(0.0); - }*/ - - ResourcesStreamReservedThroughput->Set(StreamReservedThroughput->Val()); - ResourcesStreamReservedStorage->Set(StreamReservedStorage->Val()); - ResourcesStreamReservedStorageLimit->Set(StreamReservedStorageLimit->Val()); + + auto quota = StreamShardsQuota->Val(); + ResourcesStreamUsedShards->Set(StreamShardsCount->Val()); + ResourcesStreamLimitShards->Set(quota); + /*if (quota > 0) { + ResourcesStreamUsedShardsPercents->Set(StreamShardsCount->Val() * 100.0 / (quota + 0.0)); + } else { + ResourcesStreamUsedShardsPercents->Set(0.0); + }*/ + + ResourcesStreamReservedThroughput->Set(StreamReservedThroughput->Val()); + ResourcesStreamReservedStorage->Set(StreamReservedStorage->Val()); + ResourcesStreamReservedStorageLimit->Set(StreamReservedStorageLimit->Val()); } } @@ -1531,7 +1531,7 @@ private: typedef TMap<TTabletTypes::EType, THolder<TTabletCountersBase>> TAppCountersByTabletType; typedef TMap<std::pair<TTabletTypes::EType, TString>, TAutoPtr<TAggregatedLabeledCounters> > TLabeledCountersByTabletTypeAndGroup; - + TCountersByTabletType CountersByTabletType; TCountersByPathId CountersByPathId; TActorId DbWatcherActorId; @@ -1539,7 +1539,7 @@ private: TYdbTabletCountersPtr YdbCounters; - TLabeledCountersByTabletTypeAndGroup LabeledCountersByTabletTypeAndGroup; + TLabeledCountersByTabletTypeAndGroup LabeledCountersByTabletTypeAndGroup; THashMap<ui64, std::pair<TAutoPtr<TTabletCountersBase>, TAutoPtr<TTabletCountersBase>>> QuietTabletCounters; }; @@ -1577,11 +1577,11 @@ private: void HandleWork(TEvTabletCounters::TEvTabletAddCounters::TPtr &ev, const TActorContext &ctx); void HandleWork(TEvTabletCounters::TEvTabletCountersForgetTablet::TPtr &ev, const TActorContext &ctx); void HandleWork(TEvTabletCounters::TEvTabletCountersRequest::TPtr &ev, const TActorContext &ctx); - void HandleWork(TEvTabletCounters::TEvTabletAddLabeledCounters::TPtr &ev, const TActorContext &ctx); - void HandleWork(TEvTabletCounters::TEvTabletLabeledCountersRequest::TPtr &ev, const TActorContext &ctx); - void HandleWork(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx);//from cluster aggregator - void HandleWork(NMon::TEvHttpInfo::TPtr& ev, const TActorContext &ctx); - void HandleWakeup(const TActorContext &ctx); + void HandleWork(TEvTabletCounters::TEvTabletAddLabeledCounters::TPtr &ev, const TActorContext &ctx); + void HandleWork(TEvTabletCounters::TEvTabletLabeledCountersRequest::TPtr &ev, const TActorContext &ctx); + void HandleWork(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx);//from cluster aggregator + void HandleWork(NMon::TEvHttpInfo::TPtr& ev, const TActorContext &ctx); + void HandleWakeup(const TActorContext &ctx); void HandleWork(TEvTabletCounters::TEvRemoveDatabase::TPtr& ev); // @@ -1617,18 +1617,18 @@ TTabletCountersAggregatorActor::Bootstrap(const TActorContext &ctx) { } TabletMon = new TTabletMon(appData->Counters, Follower, DbWatcherActorId); - auto mon = appData->Mon; + auto mon = appData->Mon; if (mon) { if (!Follower) mon->RegisterActorPage(nullptr, "labeledcounters", "Labeled Counters", false, TlsActivationContext->ExecutorThread.ActorSystem, SelfId(), false); else mon->RegisterActorPage(nullptr, "followercounters", "Follower Counters", false, TlsActivationContext->ExecutorThread.ActorSystem, SelfId(), false); } - - ctx.Schedule(TDuration::Seconds(WAKEUP_TIMEOUT_SECONDS), new TEvents::TEvWakeup()); + + ctx.Schedule(TDuration::Seconds(WAKEUP_TIMEOUT_SECONDS), new TEvents::TEvWakeup()); } - + //////////////////////////////////////////// void TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletAddCounters::TPtr &ev, const TActorContext &ctx) { @@ -1639,16 +1639,16 @@ TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletAddCounte //////////////////////////////////////////// void -TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletAddLabeledCounters::TPtr &ev, const TActorContext &ctx) { - TEvTabletCounters::TEvTabletAddLabeledCounters* msg = ev->Get(); - LOG_DEBUG_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "got labeledCounters from tablet " << msg->TabletID); - TabletMon->ApplyLabeledCounters(msg->TabletID, msg->TabletType, msg->LabeledCounters.Get()); - - TabletTypeOfReceivedLabeledCounters.insert(msg->TabletType); -} - -//////////////////////////////////////////// -void +TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletAddLabeledCounters::TPtr &ev, const TActorContext &ctx) { + TEvTabletCounters::TEvTabletAddLabeledCounters* msg = ev->Get(); + LOG_DEBUG_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "got labeledCounters from tablet " << msg->TabletID); + TabletMon->ApplyLabeledCounters(msg->TabletID, msg->TabletType, msg->LabeledCounters.Get()); + + TabletTypeOfReceivedLabeledCounters.insert(msg->TabletType); +} + +//////////////////////////////////////////// +void TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletCountersForgetTablet::TPtr &ev, const TActorContext &ctx) { Y_UNUSED(ctx); TEvTabletCounters::TEvTabletCountersForgetTablet* msg = ev->Get(); @@ -1665,169 +1665,169 @@ TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletCountersR } //////////////////////////////////////////// -void -TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletLabeledCountersRequest::TPtr &ev, const TActorContext &ctx) { - TEvTabletCounters::TEvTabletLabeledCountersRequest* msg = ev->Get(); - TAutoPtr<TEvTabletCounters::TEvTabletLabeledCountersResponse> resp = new TEvTabletCounters::TEvTabletLabeledCountersResponse(); - TabletMon->QueryLabeledCounters(msg->Record, resp->Record, ctx); - ctx.Send(ev->Sender, resp.Release(), 0, ev->Cookie); -} - -//////////////////////////////////////////// -void -TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { - auto& response = ev->Get()->Record; - - auto it = HttpRequestHandlers.find(ev->Sender); - if (it == HttpRequestHandlers.end()) - return; +void +TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletLabeledCountersRequest::TPtr &ev, const TActorContext &ctx) { + TEvTabletCounters::TEvTabletLabeledCountersRequest* msg = ev->Get(); + TAutoPtr<TEvTabletCounters::TEvTabletLabeledCountersResponse> resp = new TEvTabletCounters::TEvTabletLabeledCountersResponse(); + TabletMon->QueryLabeledCounters(msg->Record, resp->Record, ctx); + ctx.Send(ev->Sender, resp.Release(), 0, ev->Cookie); +} + +//////////////////////////////////////////// +void +TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { + auto& response = ev->Get()->Record; + + auto it = HttpRequestHandlers.find(ev->Sender); + if (it == HttpRequestHandlers.end()) + return; TString html; - TStringOutput oss(html); - NMonitoring::TDynamicCounters counters; + TStringOutput oss(html); + NMonitoring::TDynamicCounters counters; const auto& params = it->second.second->Request.GetParams(); TString reqTabletType = params.Get("type"); - - auto mainGroup = counters.GetSubgroup("user_counters", reqTabletType); - - bool parsePQTopic = params.Has("parse_pq"); - ui32 version = 0; - if (parsePQTopic) { - TryFromString(params.Get("parse_pq"), version); - } - - for (ui32 i = 0; i < response.LabeledCountersByGroupSize(); ++i) { - const auto ucByGroup = response.GetLabeledCountersByGroup(i); + + auto mainGroup = counters.GetSubgroup("user_counters", reqTabletType); + + bool parsePQTopic = params.Has("parse_pq"); + ui32 version = 0; + if (parsePQTopic) { + TryFromString(params.Get("parse_pq"), version); + } + + for (ui32 i = 0; i < response.LabeledCountersByGroupSize(); ++i) { + const auto ucByGroup = response.GetLabeledCountersByGroup(i); TVector<TString> groups; TVector<TString> groupNames; - Y_VERIFY(ucByGroup.GetDelimiter() == "/"); + Y_VERIFY(ucByGroup.GetDelimiter() == "/"); StringSplitter(ucByGroup.GetGroup()).Split('/').SkipEmpty().Collect(&groups); - - if (parsePQTopic) { - bool skip = false; - for (ui32 j = 0; j < groups.size(); ++j) { - if (groups[j] == "total") { - skip = true; - break; - } - } - if (skip) - continue; - } - + + if (parsePQTopic) { + bool skip = false; + for (ui32 j = 0; j < groups.size(); ++j) { + if (groups[j] == "total") { + skip = true; + break; + } + } + if (skip) + continue; + } + StringSplitter(ucByGroup.GetGroupNames()).Split('/').SkipEmpty().Collect(&groupNames); - Y_VERIFY(groups.size() == groupNames.size(), "%s and %s", ucByGroup.GetGroup().c_str(), ucByGroup.GetGroupNames().c_str()); - auto group = mainGroup; - for (ui32 j = 0; j < groups.size(); ++j) { - if (parsePQTopic) { - groupNames[j] = TString(1, toupper(groupNames[j][0])) + groupNames[j].substr(1); - if (groupNames[j] == "Topic") { - if (NPersQueue::CorrectName(groups[j])) { - TString dc = to_title(NPersQueue::GetDC(groups[j])); - TString producer = NPersQueue::GetProducer(groups[j]); - TString topic = NPersQueue::GetRealTopic(groups[j]); - group = group->GetSubgroup("OriginDC", dc); - if (version > 1) { - topic = NPersQueue::GetTopicPath(groups[j]); - producer = NPersQueue::GetAccount(groups[j]); - group = group->GetSubgroup("Account", producer); - group = group->GetSubgroup("TopicPath", topic); - } else { - group = group->GetSubgroup("Producer", producer); - group = group->GetSubgroup("Topic", topic); - } - } else { - if (version > 1) { - group = group->GetSubgroup("OriginDC", "unknown"); - group = group->GetSubgroup("Account", "unknown"); - group = group->GetSubgroup("TopicPath", groups[j]); - } else { - group = group->GetSubgroup("OriginDC", "unknown"); - group = group->GetSubgroup("Producer", "unknown"); - group = group->GetSubgroup("Topic", groups[j]); - } - } - continue; - } - if (groupNames[j] == "Client") { + Y_VERIFY(groups.size() == groupNames.size(), "%s and %s", ucByGroup.GetGroup().c_str(), ucByGroup.GetGroupNames().c_str()); + auto group = mainGroup; + for (ui32 j = 0; j < groups.size(); ++j) { + if (parsePQTopic) { + groupNames[j] = TString(1, toupper(groupNames[j][0])) + groupNames[j].substr(1); + if (groupNames[j] == "Topic") { + if (NPersQueue::CorrectName(groups[j])) { + TString dc = to_title(NPersQueue::GetDC(groups[j])); + TString producer = NPersQueue::GetProducer(groups[j]); + TString topic = NPersQueue::GetRealTopic(groups[j]); + group = group->GetSubgroup("OriginDC", dc); + if (version > 1) { + topic = NPersQueue::GetTopicPath(groups[j]); + producer = NPersQueue::GetAccount(groups[j]); + group = group->GetSubgroup("Account", producer); + group = group->GetSubgroup("TopicPath", topic); + } else { + group = group->GetSubgroup("Producer", producer); + group = group->GetSubgroup("Topic", topic); + } + } else { + if (version > 1) { + group = group->GetSubgroup("OriginDC", "unknown"); + group = group->GetSubgroup("Account", "unknown"); + group = group->GetSubgroup("TopicPath", groups[j]); + } else { + group = group->GetSubgroup("OriginDC", "unknown"); + group = group->GetSubgroup("Producer", "unknown"); + group = group->GetSubgroup("Topic", groups[j]); + } + } + continue; + } + if (groupNames[j] == "Client") { group = group->GetSubgroup("ConsumerPath", NPersQueue::ConvertOldConsumerName(groups[j], ctx)); - continue; - } - } - group = group->GetSubgroup(groupNames[j], groups[j]); - } - for (ui32 j = 0; j < ucByGroup.LabeledCounterSize(); ++j) { - const auto& uc = ucByGroup.GetLabeledCounter(j); - bool deriv = (uc.GetType() == TLabeledCounterOptions::CT_DERIV); - TString counterName = uc.GetName(); - if (parsePQTopic && counterName.StartsWith("PQ/")) { - counterName = counterName.substr(3); - } - *group->GetCounter(counterName, deriv).Get() = uc.GetValue(); - } - } - - bool json = params.Has("json"); - bool spack = params.Has("spack"); - if (json) { - oss << NMonitoring::HTTPOKJSON; + continue; + } + } + group = group->GetSubgroup(groupNames[j], groups[j]); + } + for (ui32 j = 0; j < ucByGroup.LabeledCounterSize(); ++j) { + const auto& uc = ucByGroup.GetLabeledCounter(j); + bool deriv = (uc.GetType() == TLabeledCounterOptions::CT_DERIV); + TString counterName = uc.GetName(); + if (parsePQTopic && counterName.StartsWith("PQ/")) { + counterName = counterName.substr(3); + } + *group->GetCounter(counterName, deriv).Get() = uc.GetValue(); + } + } + + bool json = params.Has("json"); + bool spack = params.Has("spack"); + if (json) { + oss << NMonitoring::HTTPOKJSON; auto encoder = NMonitoring::CreateEncoder(&oss, NMonitoring::EFormat::JSON); counters.Accept(TString(), TString(), *encoder); - } else if (spack) { - oss.Write(NMonitoring::HTTPOKSPACK); + } else if (spack) { + oss.Write(NMonitoring::HTTPOKSPACK); auto encoder = NMonitoring::CreateEncoder(&oss, NMonitoring::EFormat::SPACK); - counters.Accept(TString(), TString(), *encoder); - } else { - counters.OutputHtml(oss); - } - oss.Flush(); - ctx.Send(it->second.first, new NMon::TEvHttpInfoRes(html, 0, (json || spack) ? NMon::TEvHttpInfoRes::Custom : NMon::TEvHttpInfoRes::Html)); - HttpRequestHandlers.erase(it); -} - - -//////////////////////////////////////////// + counters.Accept(TString(), TString(), *encoder); + } else { + counters.OutputHtml(oss); + } + oss.Flush(); + ctx.Send(it->second.first, new NMon::TEvHttpInfoRes(html, 0, (json || spack) ? NMon::TEvHttpInfoRes::Custom : NMon::TEvHttpInfoRes::Html)); + HttpRequestHandlers.erase(it); +} + + +//////////////////////////////////////////// void TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvRemoveDatabase::TPtr& ev) { TabletMon->RemoveTabletsByPathId(ev->Get()->PathId); } //////////////////////////////////////////// -void -TTabletCountersAggregatorActor::HandleWork(NMon::TEvHttpInfo::TPtr &ev, const TActorContext &ctx) { - +void +TTabletCountersAggregatorActor::HandleWork(NMon::TEvHttpInfo::TPtr &ev, const TActorContext &ctx) { + TString reqTabletType = ev->Get()->Request.GetParams().Get("type"); - ui32 workers = 0; + ui32 workers = 0; TryFromString(ev->Get()->Request.GetParams().Get("workers"), workers); - for (ui32 tabletType = 0; tabletType < TTabletTypes::USER_TYPE_START; ++tabletType) { + for (ui32 tabletType = 0; tabletType < TTabletTypes::USER_TYPE_START; ++tabletType) { if (!NKikimrTabletBase::TTabletTypes::EType_IsValid(tabletType)) continue; TString tabletTypeStr = TTabletTypes::TypeToStr((TTabletTypes::EType)tabletType); - if (tabletTypeStr == reqTabletType) { + if (tabletTypeStr == reqTabletType) { TActorId handler = CreateClusterLabeledCountersAggregator(ctx.SelfID, (TTabletTypes::EType)tabletType, ctx, 1, "", workers); - HttpRequestHandlers.insert(std::make_pair(handler, std::make_pair(ev->Sender, ev->Release()))); - return; - } - } - //reaching this point means that this is unknow tablet type, response with nothing + HttpRequestHandlers.insert(std::make_pair(handler, std::make_pair(ev->Sender, ev->Release()))); + return; + } + } + //reaching this point means that this is unknow tablet type, response with nothing TString html; - for (const auto& tabletType: TabletTypeOfReceivedLabeledCounters) { + for (const auto& tabletType: TabletTypeOfReceivedLabeledCounters) { TString tabletTypeStr = TTabletTypes::TypeToStr((TTabletTypes::EType)tabletType); - html += "<a href=\"?type=" + tabletTypeStr + "\">" + tabletTypeStr + " labeled counters</a><br>"; - html += "<a href=\"?type=" + tabletTypeStr + "&json=1\">" + tabletTypeStr + " labeled counters as json</a><br>"; - html += "<a href=\"?type=" + tabletTypeStr + "&spack=1\">" + tabletTypeStr + " labeled counters as spack</a><br>"; - } - ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(html)); -} - -//////////////////////////////////////////// -void -TTabletCountersAggregatorActor::HandleWakeup(const TActorContext &ctx) { - - TabletMon->RecalcAll(); - ctx.Schedule(TDuration::Seconds(WAKEUP_TIMEOUT_SECONDS), new TEvents::TEvWakeup()); -} - -//////////////////////////////////////////// + html += "<a href=\"?type=" + tabletTypeStr + "\">" + tabletTypeStr + " labeled counters</a><br>"; + html += "<a href=\"?type=" + tabletTypeStr + "&json=1\">" + tabletTypeStr + " labeled counters as json</a><br>"; + html += "<a href=\"?type=" + tabletTypeStr + "&spack=1\">" + tabletTypeStr + " labeled counters as spack</a><br>"; + } + ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(html)); +} + +//////////////////////////////////////////// +void +TTabletCountersAggregatorActor::HandleWakeup(const TActorContext &ctx) { + + TabletMon->RecalcAll(); + ctx.Schedule(TDuration::Seconds(WAKEUP_TIMEOUT_SECONDS), new TEvents::TEvWakeup()); +} + +//////////////////////////////////////////// /// public state functions //////////////////////////////////////////// STFUNC(TTabletCountersAggregatorActor::StateWork) { @@ -1836,21 +1836,21 @@ STFUNC(TTabletCountersAggregatorActor::StateWork) { HFunc(TEvTabletCounters::TEvTabletAddCounters, HandleWork); HFunc(TEvTabletCounters::TEvTabletCountersForgetTablet, HandleWork); HFunc(TEvTabletCounters::TEvTabletCountersRequest, HandleWork); - HFunc(TEvTabletCounters::TEvTabletAddLabeledCounters, HandleWork); - HFunc(TEvTabletCounters::TEvTabletLabeledCountersRequest, HandleWork); - HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, HandleWork); //from cluster aggregator, for http requests + HFunc(TEvTabletCounters::TEvTabletAddLabeledCounters, HandleWork); + HFunc(TEvTabletCounters::TEvTabletLabeledCountersRequest, HandleWork); + HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, HandleWork); //from cluster aggregator, for http requests hFunc(TEvTabletCounters::TEvRemoveDatabase, HandleWork); - HFunc(NMon::TEvHttpInfo, HandleWork); - CFunc(TEvents::TSystem::Wakeup, HandleWakeup); - + HFunc(NMon::TEvHttpInfo, HandleWork); + CFunc(TEvents::TSystem::Wakeup, HandleWakeup); + // HFunc(TEvents::TEvPoisonPill, Handle); // we do not need PoisonPill for the actor } } //////////////////////////////////////////// - -static ui32 AGGREGATOR_TIMEOUT_SECONDS = 60; - + +static ui32 AGGREGATOR_TIMEOUT_SECONDS = 60; + IActor* CreateTabletCountersAggregator(bool follower) { return new TTabletCountersAggregatorActor(follower); @@ -1861,306 +1861,306 @@ void TabletCountersForgetTablet(ui64 tabletId, TTabletTypes::EType tabletType, T identity.Send(countersAggregator, new TEvTabletCounters::TEvTabletCountersForgetTablet(tabletId, tabletType, tenantPathId)); } -/////////////////////////////////////////// - -TString ReplaceDelimiter(const TString& groups) { - TString res; - for (const auto& c : groups) { - if (c == '|') { - res += '/'; - } else { - res += c; - } - } - return res; -} - -void PreProcessResponse(TEvTabletCounters::TEvTabletLabeledCountersResponse* response) { - auto& record = response->Record; - for (auto & lc : (*record.MutableLabeledCountersByGroup())) { - if (lc.GetDelimiter() == "/") continue; - if (lc.GetDelimiter() == "|") { //convert - TString group = ReplaceDelimiter(lc.GetGroup()); - lc.SetGroup(group); - if (lc.HasGroupNames()) { - TString groupNames = ReplaceDelimiter(lc.GetGroupNames()); - lc.SetGroupNames(groupNames); - } - } - lc.SetDelimiter("/"); - } -} - - +/////////////////////////////////////////// + +TString ReplaceDelimiter(const TString& groups) { + TString res; + for (const auto& c : groups) { + if (c == '|') { + res += '/'; + } else { + res += c; + } + } + return res; +} + +void PreProcessResponse(TEvTabletCounters::TEvTabletLabeledCountersResponse* response) { + auto& record = response->Record; + for (auto & lc : (*record.MutableLabeledCountersByGroup())) { + if (lc.GetDelimiter() == "/") continue; + if (lc.GetDelimiter() == "|") { //convert + TString group = ReplaceDelimiter(lc.GetGroup()); + lc.SetGroup(group); + if (lc.HasGroupNames()) { + TString groupNames = ReplaceDelimiter(lc.GetGroupNames()); + lc.SetGroupNames(groupNames); + } + } + lc.SetDelimiter("/"); + } +} + + class TClusterLabeledCountersAggregatorActorV1 : public TActorBootstrapped<TClusterLabeledCountersAggregatorActorV1> { using TBase = TActorBootstrapped<TClusterLabeledCountersAggregatorActorV1>; TActorId Initiator; - TTabletTypes::EType TabletType; - ui32 NodesRequested; - ui32 NodesReceived; + TTabletTypes::EType TabletType; + ui32 NodesRequested; + ui32 NodesReceived; TVector<ui32> Nodes; THashMap<ui32, TAutoPtr<TEvTabletCounters::TEvTabletLabeledCountersResponse>> PerNodeResponse; - ui32 NumWorkers; - ui32 WorkerId; - - -public: + ui32 NumWorkers; + ui32 WorkerId; + + +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::TABLET_COUNTERS_AGGREGATOR; - } - - // + } + + // TClusterLabeledCountersAggregatorActorV1(const TActorId& parentActor, const TTabletTypes::EType tabletType, ui32 numWorkers = 0, ui32 workerId = 0) - : Initiator(parentActor) - , TabletType(tabletType) - , NodesRequested(0) - , NodesReceived(0) - , NumWorkers(numWorkers) - , WorkerId(workerId) //if NumWorkers is zero, then WorkerId is treat as desired number of workers - { - } - - void SendRequest(ui32 nodeId, const TActorContext &ctx) { + : Initiator(parentActor) + , TabletType(tabletType) + , NodesRequested(0) + , NodesReceived(0) + , NumWorkers(numWorkers) + , WorkerId(workerId) //if NumWorkers is zero, then WorkerId is treat as desired number of workers + { + } + + void SendRequest(ui32 nodeId, const TActorContext &ctx) { TActorId aggregatorServiceId = MakeTabletCountersAggregatorID(nodeId); - TAutoPtr<TEvTabletCounters::TEvTabletLabeledCountersRequest> request(new TEvTabletCounters::TEvTabletLabeledCountersRequest()); - request->Record.SetTabletType(TabletType); + TAutoPtr<TEvTabletCounters::TEvTabletLabeledCountersRequest> request(new TEvTabletCounters::TEvTabletLabeledCountersRequest()); + request->Record.SetTabletType(TabletType); request->Record.SetVersion(1); - ctx.Send(aggregatorServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - Nodes.emplace_back(nodeId); - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor request to node " << nodeId << " " << ctx.SelfID); - ++NodesRequested; - } - - void Die(const TActorContext& ctx) override { - for (const ui32 node : Nodes) { + ctx.Send(aggregatorServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); + Nodes.emplace_back(nodeId); + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor request to node " << nodeId << " " << ctx.SelfID); + ++NodesRequested; + } + + void Die(const TActorContext& ctx) override { + for (const ui32 node : Nodes) { ctx.Send(TActivationContext::InterconnectProxy(node), new TEvents::TEvUnsubscribe()); - } - TBase::Die(ctx); - } + } + TBase::Die(ctx); + } - void Bootstrap(const TActorContext& ctx) { - if (NumWorkers > 0) { + void Bootstrap(const TActorContext& ctx) { + if (NumWorkers > 0) { const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - TBase::Become(&TThis::StateRequestedBrowse); - ctx.Schedule(TDuration::Seconds(AGGREGATOR_TIMEOUT_SECONDS), new TEvents::TEvWakeup()); - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator new request V1 Initiator " << Initiator << " self " << ctx.SelfID << " worker " << WorkerId); - } else { - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator new request V1 " << ctx.SelfID); - for (ui32 i = 0; i < WorkerId; ++i) { - ctx.Register(new TClusterLabeledCountersAggregatorActorV1(ctx.SelfID, TabletType, WorkerId, i)); - } - NodesRequested = WorkerId; - TBase::Become(&TThis::StateRequested); - } - } - - STFUNC(StateRequestedBrowse) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvInterconnect::TEvNodesInfo, HandleBrowse); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - STFUNC(StateRequested) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, HandleResponse); - HFunc(TEvents::TEvUndelivered, Undelivered); - HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void HandleBrowse(TEvInterconnect::TEvNodesInfo::TPtr &ev, const TActorContext &ctx) { - const TEvInterconnect::TEvNodesInfo* nodesInfo = ev->Get(); - Y_VERIFY(!nodesInfo->Nodes.empty()); - Nodes.reserve(nodesInfo->Nodes.size()); - ui32 i = 0; - for (const auto& ni : nodesInfo->Nodes) { - ++i; - if (i % NumWorkers == WorkerId) { - SendRequest(ni.NodeId, ctx); - } - } - if (NodesRequested > 0) { - TBase::Become(&TThis::StateRequested); - } else { - ReplyAndDie(ctx); - } - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { - ui32 nodeId = ev.Get()->Cookie; - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor undelivered node " << nodeId << " " << ctx.SelfID); - if (PerNodeResponse.emplace(nodeId, nullptr).second) { - NodeResponseReceived(ctx); - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) { - ui32 nodeId = ev->Get()->NodeId; - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor disconnected node " << nodeId << " " << ctx.SelfID); - if (PerNodeResponse.emplace(nodeId, nullptr).second) { - NodeResponseReceived(ctx); - } - } - - void HandleResponse(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { - ui64 nodeId = ev.Get()->Cookie; - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor got response node " << nodeId << " " << ctx.SelfID); - PreProcessResponse(ev->Get()); - PerNodeResponse[nodeId] = ev->Release(); - NodeResponseReceived(ctx); - } - - void NodeResponseReceived(const TActorContext &ctx) { - ++NodesReceived; - if (NodesReceived >= NodesRequested) { - ReplyAndDie(ctx); - } - } - - void HandleTimeout(const TActorContext &ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor got TIMEOUT"); - ReplyAndDie(ctx); - } - - - void ReplyAndDie(const TActorContext& ctx) { - TAutoPtr<TEvTabletCounters::TEvTabletLabeledCountersResponse> response(new TEvTabletCounters::TEvTabletLabeledCountersResponse); - - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator all answers recieved - replying " << ctx.SelfID); - + ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); + TBase::Become(&TThis::StateRequestedBrowse); + ctx.Schedule(TDuration::Seconds(AGGREGATOR_TIMEOUT_SECONDS), new TEvents::TEvWakeup()); + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator new request V1 Initiator " << Initiator << " self " << ctx.SelfID << " worker " << WorkerId); + } else { + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator new request V1 " << ctx.SelfID); + for (ui32 i = 0; i < WorkerId; ++i) { + ctx.Register(new TClusterLabeledCountersAggregatorActorV1(ctx.SelfID, TabletType, WorkerId, i)); + } + NodesRequested = WorkerId; + TBase::Become(&TThis::StateRequested); + } + } + + STFUNC(StateRequestedBrowse) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvInterconnect::TEvNodesInfo, HandleBrowse); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + STFUNC(StateRequested) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, HandleResponse); + HFunc(TEvents::TEvUndelivered, Undelivered); + HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + void HandleBrowse(TEvInterconnect::TEvNodesInfo::TPtr &ev, const TActorContext &ctx) { + const TEvInterconnect::TEvNodesInfo* nodesInfo = ev->Get(); + Y_VERIFY(!nodesInfo->Nodes.empty()); + Nodes.reserve(nodesInfo->Nodes.size()); + ui32 i = 0; + for (const auto& ni : nodesInfo->Nodes) { + ++i; + if (i % NumWorkers == WorkerId) { + SendRequest(ni.NodeId, ctx); + } + } + if (NodesRequested > 0) { + TBase::Become(&TThis::StateRequested); + } else { + ReplyAndDie(ctx); + } + } + + void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { + ui32 nodeId = ev.Get()->Cookie; + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor undelivered node " << nodeId << " " << ctx.SelfID); + if (PerNodeResponse.emplace(nodeId, nullptr).second) { + NodeResponseReceived(ctx); + } + } + + void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) { + ui32 nodeId = ev->Get()->NodeId; + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor disconnected node " << nodeId << " " << ctx.SelfID); + if (PerNodeResponse.emplace(nodeId, nullptr).second) { + NodeResponseReceived(ctx); + } + } + + void HandleResponse(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { + ui64 nodeId = ev.Get()->Cookie; + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor got response node " << nodeId << " " << ctx.SelfID); + PreProcessResponse(ev->Get()); + PerNodeResponse[nodeId] = ev->Release(); + NodeResponseReceived(ctx); + } + + void NodeResponseReceived(const TActorContext &ctx) { + ++NodesReceived; + if (NodesReceived >= NodesRequested) { + ReplyAndDie(ctx); + } + } + + void HandleTimeout(const TActorContext &ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor got TIMEOUT"); + ReplyAndDie(ctx); + } + + + void ReplyAndDie(const TActorContext& ctx) { + TAutoPtr<TEvTabletCounters::TEvTabletLabeledCountersResponse> response(new TEvTabletCounters::TEvTabletLabeledCountersResponse); + + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator all answers recieved - replying " << ctx.SelfID); + TVector<ui8> types; TVector<ui8> aggrFuncs; TVector<const char*> names; - ui32 metaInfoCount = 0; + ui32 metaInfoCount = 0; THashMap<TString, TAutoPtr<TTabletLabeledCountersBase>> groupsToLabeledCounter; THashMap<TString, std::pair<ui32,ui32>> startPos; THashMap<TString, TString> groupToNames; - for (auto& resp : PerNodeResponse) { - if (!resp.second) - continue; - for (ui32 i = 0; i < resp.second->Record.LabeledCountersByGroupSize(); ++i) { - const auto& labeledCounterByGroup = resp.second->Record.GetLabeledCountersByGroup(i); + for (auto& resp : PerNodeResponse) { + if (!resp.second) + continue; + for (ui32 i = 0; i < resp.second->Record.LabeledCountersByGroupSize(); ++i) { + const auto& labeledCounterByGroup = resp.second->Record.GetLabeledCountersByGroup(i); const TString& group = labeledCounterByGroup.GetGroup(); - if (startPos.find(group) != startPos.end()) - continue; - ui32 count = labeledCounterByGroup.LabeledCounterSize(); - for (ui32 j = 0; j < count; ++j) { - const auto& labeledCounter = labeledCounterByGroup.GetLabeledCounter(j); - names.push_back(labeledCounter.GetName().c_str()); - aggrFuncs.push_back(labeledCounter.GetAggregateFunc()); - types.push_back(labeledCounter.GetType()); - } - startPos[group] = std::make_pair(metaInfoCount, count); - groupToNames[group] = labeledCounterByGroup.GetGroupNames(); - metaInfoCount += count; - } - } - for (auto& resp : PerNodeResponse) { - if (!resp.second) - continue; - //TODO: labeledCounterByGroup must have as key group + groupToNames[group] - in case of aggregation (changing group to total/total/total) - // keys may be equal for different metrics types - for (ui32 i = 0; i < resp.second->Record.LabeledCountersByGroupSize(); ++i) { - const auto& labeledCounterByGroup = resp.second->Record.GetLabeledCountersByGroup(i); - const TString& originalGroup = labeledCounterByGroup.GetGroup(); - ui32 count = Min<ui32>(startPos[originalGroup].second, labeledCounterByGroup.LabeledCounterSize()); - ui32 sp = startPos[originalGroup].first; - TAutoPtr<TTabletLabeledCountersBase> labeledCounters(new TTabletLabeledCountersBase(count, names.begin() + sp, - types.begin() + sp, aggrFuncs.begin() + sp, - originalGroup, nullptr, 0)); - for (ui32 j = 0; j < count; ++j) { - const auto& labeledCounter = labeledCounterByGroup.GetLabeledCounter(j); - labeledCounters->GetCounters()[j] = labeledCounter.GetValue(); - labeledCounters->GetIds()[j] = labeledCounter.GetId(); - } - - TVector<TString> aggrGroups; - TVector<TString> groupParts, groupParts2; + if (startPos.find(group) != startPos.end()) + continue; + ui32 count = labeledCounterByGroup.LabeledCounterSize(); + for (ui32 j = 0; j < count; ++j) { + const auto& labeledCounter = labeledCounterByGroup.GetLabeledCounter(j); + names.push_back(labeledCounter.GetName().c_str()); + aggrFuncs.push_back(labeledCounter.GetAggregateFunc()); + types.push_back(labeledCounter.GetType()); + } + startPos[group] = std::make_pair(metaInfoCount, count); + groupToNames[group] = labeledCounterByGroup.GetGroupNames(); + metaInfoCount += count; + } + } + for (auto& resp : PerNodeResponse) { + if (!resp.second) + continue; + //TODO: labeledCounterByGroup must have as key group + groupToNames[group] - in case of aggregation (changing group to total/total/total) + // keys may be equal for different metrics types + for (ui32 i = 0; i < resp.second->Record.LabeledCountersByGroupSize(); ++i) { + const auto& labeledCounterByGroup = resp.second->Record.GetLabeledCountersByGroup(i); + const TString& originalGroup = labeledCounterByGroup.GetGroup(); + ui32 count = Min<ui32>(startPos[originalGroup].second, labeledCounterByGroup.LabeledCounterSize()); + ui32 sp = startPos[originalGroup].first; + TAutoPtr<TTabletLabeledCountersBase> labeledCounters(new TTabletLabeledCountersBase(count, names.begin() + sp, + types.begin() + sp, aggrFuncs.begin() + sp, + originalGroup, nullptr, 0)); + for (ui32 j = 0; j < count; ++j) { + const auto& labeledCounter = labeledCounterByGroup.GetLabeledCounter(j); + labeledCounters->GetCounters()[j] = labeledCounter.GetValue(); + labeledCounters->GetIds()[j] = labeledCounter.GetId(); + } + + TVector<TString> aggrGroups; + TVector<TString> groupParts, groupParts2; StringSplitter(originalGroup).Split('/').SkipEmpty().Collect(&groupParts); - Y_VERIFY(groupParts.size() > 0); - groupParts2 = groupParts; - ui32 changePos = groupParts.size(); - TString group = originalGroup; - do { //repors for user/topic user/total; total;total - auto& aggr = groupsToLabeledCounter[group]; - groupToNames[group] = groupToNames[originalGroup]; - if (aggr == nullptr) { - aggr = new TTabletLabeledCountersBase(*labeledCounters); - } else { - aggr->AggregateWith(*labeledCounters); - } - if (changePos > 0) { - --changePos; - groupParts[changePos] = "total"; - group = ""; - for (auto& g: groupParts) { - if (!group.empty()) group += '/'; - group += g; - } - } else - break; - } while (true); - for (changePos = 0; changePos + 1 < groupParts.size(); ++changePos) { - groupParts2[changePos] = "total"; - group = ""; - for (auto& g: groupParts2) { - if (!group.empty()) group += '/'; - group += g; - } - auto& aggr = groupsToLabeledCounter[group]; - groupToNames[group] = groupToNames[originalGroup]; - if (aggr == nullptr) { - aggr = new TTabletLabeledCountersBase(*labeledCounters); - } else { - aggr->AggregateWith(*labeledCounters); - } - } - - } - response->Record.AddNodes(resp.first); - } - ui32 numGroups = 0, numCounters = 0; - for (auto& g : groupsToLabeledCounter) { - auto& labeledCounterByGroup = *response->Record.AddLabeledCountersByGroup(); - labeledCounterByGroup.SetGroup(g.first); - labeledCounterByGroup.SetGroupNames(groupToNames[g.first]); - labeledCounterByGroup.SetDelimiter("/"); //TODO: change to "|" - ++numGroups; - for (ui32 i = 0; i < g.second->GetCounters().Size(); ++i) { - if (g.second->GetTypes()[i] == TLabeledCounterOptions::CT_TIMELAG && g.second->GetCounters()[i].Get() == Max<i64>()) { //this means no data, do not report it - continue; - } - ++numCounters; - auto& labeledCounter = *labeledCounterByGroup.AddLabeledCounter(); - switch(g.second->GetTypes()[i]) { - case TLabeledCounterOptions::CT_SIMPLE: - case TLabeledCounterOptions::CT_DERIV: - labeledCounter.SetValue(g.second->GetCounters()[i].Get()); - break; - case TLabeledCounterOptions::CT_TIMELAG: { - ui64 milliSeconds = TAppData::TimeProvider->Now().MilliSeconds(); - labeledCounter.SetValue(milliSeconds > g.second->GetCounters()[i].Get() ? milliSeconds - g.second->GetCounters()[i].Get() : 0); - break; - } - default: - Y_FAIL("unknown type"); - } - labeledCounter.SetAggregateFunc(NKikimr::TLabeledCounterOptions::EAggregateFunc(g.second->GetAggrFuncs()[i])); - labeledCounter.SetType(NKikimr::TLabeledCounterOptions::ECounterType(g.second->GetTypes()[i])); - labeledCounter.SetId(g.second->GetIds()[i].Get()); - labeledCounter.SetName(g.second->GetCounterName(i)); - } - } - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator request processed - got " << numGroups << " groups and " << numCounters << " counters " << ctx.SelfID << " Initiator " << Initiator); - ui64 cookie = NumWorkers ? WorkerId : 0; - ctx.Send(Initiator, response.Release(), 0, cookie); - TBase::Die(ctx); - } -}; - + Y_VERIFY(groupParts.size() > 0); + groupParts2 = groupParts; + ui32 changePos = groupParts.size(); + TString group = originalGroup; + do { //repors for user/topic user/total; total;total + auto& aggr = groupsToLabeledCounter[group]; + groupToNames[group] = groupToNames[originalGroup]; + if (aggr == nullptr) { + aggr = new TTabletLabeledCountersBase(*labeledCounters); + } else { + aggr->AggregateWith(*labeledCounters); + } + if (changePos > 0) { + --changePos; + groupParts[changePos] = "total"; + group = ""; + for (auto& g: groupParts) { + if (!group.empty()) group += '/'; + group += g; + } + } else + break; + } while (true); + for (changePos = 0; changePos + 1 < groupParts.size(); ++changePos) { + groupParts2[changePos] = "total"; + group = ""; + for (auto& g: groupParts2) { + if (!group.empty()) group += '/'; + group += g; + } + auto& aggr = groupsToLabeledCounter[group]; + groupToNames[group] = groupToNames[originalGroup]; + if (aggr == nullptr) { + aggr = new TTabletLabeledCountersBase(*labeledCounters); + } else { + aggr->AggregateWith(*labeledCounters); + } + } + + } + response->Record.AddNodes(resp.first); + } + ui32 numGroups = 0, numCounters = 0; + for (auto& g : groupsToLabeledCounter) { + auto& labeledCounterByGroup = *response->Record.AddLabeledCountersByGroup(); + labeledCounterByGroup.SetGroup(g.first); + labeledCounterByGroup.SetGroupNames(groupToNames[g.first]); + labeledCounterByGroup.SetDelimiter("/"); //TODO: change to "|" + ++numGroups; + for (ui32 i = 0; i < g.second->GetCounters().Size(); ++i) { + if (g.second->GetTypes()[i] == TLabeledCounterOptions::CT_TIMELAG && g.second->GetCounters()[i].Get() == Max<i64>()) { //this means no data, do not report it + continue; + } + ++numCounters; + auto& labeledCounter = *labeledCounterByGroup.AddLabeledCounter(); + switch(g.second->GetTypes()[i]) { + case TLabeledCounterOptions::CT_SIMPLE: + case TLabeledCounterOptions::CT_DERIV: + labeledCounter.SetValue(g.second->GetCounters()[i].Get()); + break; + case TLabeledCounterOptions::CT_TIMELAG: { + ui64 milliSeconds = TAppData::TimeProvider->Now().MilliSeconds(); + labeledCounter.SetValue(milliSeconds > g.second->GetCounters()[i].Get() ? milliSeconds - g.second->GetCounters()[i].Get() : 0); + break; + } + default: + Y_FAIL("unknown type"); + } + labeledCounter.SetAggregateFunc(NKikimr::TLabeledCounterOptions::EAggregateFunc(g.second->GetAggrFuncs()[i])); + labeledCounter.SetType(NKikimr::TLabeledCounterOptions::ECounterType(g.second->GetTypes()[i])); + labeledCounter.SetId(g.second->GetIds()[i].Get()); + labeledCounter.SetName(g.second->GetCounterName(i)); + } + } + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator request processed - got " << numGroups << " groups and " << numCounters << " counters " << ctx.SelfID << " Initiator " << Initiator); + ui64 cookie = NumWorkers ? WorkerId : 0; + ctx.Send(Initiator, response.Release(), 0, cookie); + TBase::Die(ctx); + } +}; + class TClusterLabeledCountersAggregatorActorV2 : public TActorBootstrapped<TClusterLabeledCountersAggregatorActorV2> { using TBase = TActorBootstrapped<TClusterLabeledCountersAggregatorActorV2>; TActorId Initiator; @@ -2174,9 +2174,9 @@ class TClusterLabeledCountersAggregatorActorV2 : public TActorBootstrapped<TClus THashMap<TString, NKikimrTabletCountersAggregator::TTabletLabeledCounters*> IndexTabletLabeledCounters; THashMap<std::pair<NKikimrTabletCountersAggregator::TTabletLabeledCounters*, ui32>, NKikimrTabletCountersAggregator::TTabletLabeledCounter*> IndexTabletLabeledCounter; TString Group; - ui32 NumWorkers; - ui32 WorkerId; - bool NewFormat; + ui32 NumWorkers; + ui32 WorkerId; + bool NewFormat; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { @@ -2192,9 +2192,9 @@ public: , Response(new TEvTabletCounters::TEvTabletLabeledCountersResponse()) , ResponseContext(Response->Record) , Group(group) - , NumWorkers(numWorkers) - , WorkerId(workerId) - , NewFormat(newFormat) + , NumWorkers(numWorkers) + , WorkerId(workerId) + , NewFormat(newFormat) {} void SendRequest(ui32 nodeId, const TActorContext &ctx) { @@ -2204,11 +2204,11 @@ public: request->Record.SetTabletType(TabletType); if (!Group.empty()) { request->Record.SetGroup(Group); - + } ctx.Send(aggregatorServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); Nodes.emplace_back(nodeId); - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor request to node " << nodeId << " " << ctx.SelfID); + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor request to node " << nodeId << " " << ctx.SelfID); ++NodesRequested; } @@ -2219,7 +2219,7 @@ public: } NKikimrTabletCountersAggregator::TTabletLabeledCounters* counters = Response->Record.AddLabeledCountersByGroup(); counters->SetGroup(group); - counters->SetDelimiter("/"); //TODO:change to "|" + counters->SetDelimiter("/"); //TODO:change to "|" IndexTabletLabeledCounters.emplace(group, counters); return counters; } @@ -2242,8 +2242,8 @@ public: NKikimr::TLabeledCounterOptions::EAggregateFunc func(source.GetAggregateFunc()); if (type == TLabeledCounterOptions::CT_TIMELAG) { type = TLabeledCounterOptions::CT_SIMPLE; - auto now = TInstant::Now().MilliSeconds(); - value = now > value ? now - value : 0; + auto now = TInstant::Now().MilliSeconds(); + value = now > value ? now - value : 0; switch (func) { case NKikimr::TLabeledCounterOptions::EAF_MIN: func = NKikimr::TLabeledCounterOptions::EAF_MAX; @@ -2303,20 +2303,20 @@ public: } void Bootstrap(const TActorContext& ctx) { - if (NumWorkers > 0) { + if (NumWorkers > 0) { const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - TBase::Become(&TThis::StateRequestedBrowse); - ctx.Schedule(TDuration::Seconds(AGGREGATOR_TIMEOUT_SECONDS), new TEvents::TEvWakeup()); - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator new request V2 Initiator " << Initiator << " self " << ctx.SelfID << " worker " << WorkerId); - } else { - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator new request V2 " << ctx.SelfID); - for (ui32 i = 0; i < WorkerId; ++i) { - ctx.Register(new TClusterLabeledCountersAggregatorActorV2(ctx.SelfID, TabletType, Group, WorkerId, i)); - } - NodesRequested = WorkerId; - TBase::Become(&TThis::StateRequested); - } + ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); + TBase::Become(&TThis::StateRequestedBrowse); + ctx.Schedule(TDuration::Seconds(AGGREGATOR_TIMEOUT_SECONDS), new TEvents::TEvWakeup()); + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator new request V2 Initiator " << Initiator << " self " << ctx.SelfID << " worker " << WorkerId); + } else { + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator new request V2 " << ctx.SelfID); + for (ui32 i = 0; i < WorkerId; ++i) { + ctx.Register(new TClusterLabeledCountersAggregatorActorV2(ctx.SelfID, TabletType, Group, WorkerId, i)); + } + NodesRequested = WorkerId; + TBase::Become(&TThis::StateRequested); + } } STFUNC(StateRequestedBrowse) { @@ -2339,12 +2339,12 @@ public: const TEvInterconnect::TEvNodesInfo* nodesInfo = ev->Get(); Y_VERIFY(!nodesInfo->Nodes.empty()); Nodes.reserve(nodesInfo->Nodes.size()); - ui32 i = 0; + ui32 i = 0; for (const auto& ni : nodesInfo->Nodes) { - ++i; - if (i % NumWorkers == WorkerId) { - SendRequest(ni.NodeId, ctx); - } + ++i; + if (i % NumWorkers == WorkerId) { + SendRequest(ni.NodeId, ctx); + } } if (NodesRequested > 0) { TBase::Become(&TThis::StateRequested); @@ -2355,7 +2355,7 @@ public: void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { ui32 nodeId = ev.Get()->Cookie; - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor undelivered node " << nodeId << " " << ctx.SelfID); + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor undelivered node " << nodeId << " " << ctx.SelfID); if (PerNodeResponse.emplace(nodeId, nullptr).second) { NodeResponseReceived(ctx); } @@ -2363,7 +2363,7 @@ public: void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) { ui32 nodeId = ev->Get()->NodeId; - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor disconnected node " << nodeId << " " << ctx.SelfID); + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor disconnected node " << nodeId << " " << ctx.SelfID); if (PerNodeResponse.emplace(nodeId, nullptr).second) { NodeResponseReceived(ctx); } @@ -2371,10 +2371,10 @@ public: void HandleResponse(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { ui64 nodeId = ev.Get()->Cookie; - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor got response node " << nodeId << " " << ctx.SelfID); - PreProcessResponse(ev->Get()); + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor got response node " << nodeId << " " << ctx.SelfID); + PreProcessResponse(ev->Get()); auto it = PerNodeResponse.emplace(nodeId, ev->Release().Release()); - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor merged response node " << nodeId << " " << ctx.SelfID); + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator actor merged response node " << nodeId << " " << ctx.SelfID); if (it.second) { Merge(it.first->second->Record); NodeResponseReceived(ctx); @@ -2394,41 +2394,41 @@ public: } void ReplyAndDie(const TActorContext& ctx) { - LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator request processed " << ctx.SelfID << " Initiator " << Initiator); - ui64 cookie = NumWorkers ? WorkerId : 0; - if (NewFormat) { - for (auto& counters : *Response->Record.MutableLabeledCountersByGroup()) { - TVector<TString> groups; + LOG_INFO_S(ctx, NKikimrServices::TABLET_AGGREGATOR, "aggregator request processed " << ctx.SelfID << " Initiator " << Initiator); + ui64 cookie = NumWorkers ? WorkerId : 0; + if (NewFormat) { + for (auto& counters : *Response->Record.MutableLabeledCountersByGroup()) { + TVector<TString> groups; StringSplitter(counters.GetGroup()).SplitByString(counters.GetDelimiter()).SkipEmpty().Collect(&groups); - TStringBuf ff; - TString topic = ""; - TString dc = ""; - TString res; - if (groups.size() == 1) { //topic case - ff = groups[0]; - } else if (groups.size() == 3) { //client important topic + TStringBuf ff; + TString topic = ""; + TString dc = ""; + TString res; + if (groups.size() == 1) { //topic case + ff = groups[0]; + } else if (groups.size() == 3) { //client important topic res = NPersQueue::ConvertOldConsumerName(groups[0], ctx) + "|" + groups[1] + "|"; - ff = groups[2]; - } else { - continue; - } - if (ff.empty()) - continue; - TStringBuf tmp(ff.NextTok('.')); - if (tmp != "rt3") - continue; - dc = TString(ff.NextTok("--")); - if (dc.empty()) - continue; - if (ff.empty()) - continue; - topic = NPersQueue::ConvertOldTopicName(TString(ff)); - res += topic + "|" + dc; - counters.SetGroup(res); - counters.SetDelimiter("|"); - } - } - ctx.Send(Initiator, Response.Release(), 0, cookie); + ff = groups[2]; + } else { + continue; + } + if (ff.empty()) + continue; + TStringBuf tmp(ff.NextTok('.')); + if (tmp != "rt3") + continue; + dc = TString(ff.NextTok("--")); + if (dc.empty()) + continue; + if (ff.empty()) + continue; + topic = NPersQueue::ConvertOldTopicName(TString(ff)); + res += topic + "|" + dc; + counters.SetGroup(res); + counters.SetDelimiter("|"); + } + } + ctx.Send(Initiator, Response.Release(), 0, cookie); TBase::Die(ctx); } }; @@ -2436,21 +2436,21 @@ public: IActor* CreateClusterLabeledCountersAggregatorActor(const TActorId& parentActor, TTabletTypes::EType tabletType, ui32 version, const TString& group, const ui32 totalWorkersCount) { switch (version) { case 1: - return new TClusterLabeledCountersAggregatorActorV1(parentActor, tabletType, totalWorkersCount == 0 ? 1 : 0, totalWorkersCount); + return new TClusterLabeledCountersAggregatorActorV1(parentActor, tabletType, totalWorkersCount == 0 ? 1 : 0, totalWorkersCount); case 2: - return new TClusterLabeledCountersAggregatorActorV2(parentActor, tabletType, group, totalWorkersCount == 0 ? 1 : 0, totalWorkersCount); - case 3: //new format - return new TClusterLabeledCountersAggregatorActorV2(parentActor, tabletType, group, totalWorkersCount == 0 ? 1 : 0, totalWorkersCount, true); + return new TClusterLabeledCountersAggregatorActorV2(parentActor, tabletType, group, totalWorkersCount == 0 ? 1 : 0, totalWorkersCount); + case 3: //new format + return new TClusterLabeledCountersAggregatorActorV2(parentActor, tabletType, group, totalWorkersCount == 0 ? 1 : 0, totalWorkersCount, true); } return nullptr; -} - - +} + + TActorId CreateClusterLabeledCountersAggregator(const TActorId& parentActor, TTabletTypes::EType tabletType, const TActorContext& ctx, ui32 version, const TString& group, const ui32 totalWorkersCount) { - return ctx.Register(CreateClusterLabeledCountersAggregatorActor(parentActor, tabletType, version, group, totalWorkersCount), TMailboxType::HTSwap, AppData(ctx)->BatchPoolId); -} - - - - -} + return ctx.Register(CreateClusterLabeledCountersAggregatorActor(parentActor, tabletType, version, group, totalWorkersCount), TMailboxType::HTSwap, AppData(ctx)->BatchPoolId); +} + + + + +} diff --git a/ydb/core/tablet/tablet_counters_aggregator.h b/ydb/core/tablet/tablet_counters_aggregator.h index 9a20c24ee6..db350d41cb 100644 --- a/ydb/core/tablet/tablet_counters_aggregator.h +++ b/ydb/core/tablet/tablet_counters_aggregator.h @@ -18,9 +18,9 @@ namespace NKikimr { //////////////////////////////////////////// TActorId MakeTabletCountersAggregatorID(ui32 node, bool follower = false); - -static const ui32 WORKERS_COUNT = 0; - + +static const ui32 WORKERS_COUNT = 0; + //////////////////////////////////////////// struct TEvTabletCounters { // @@ -30,9 +30,9 @@ struct TEvTabletCounters { EvTabletCountersForgetTablet, EvTabletCountersRequest, EvTabletCountersResponse, - EvTabletAddLabeledCounters, - EvTabletLabeledCountersRequest, - EvTabletLabeledCountersResponse, + EvTabletAddLabeledCounters, + EvTabletLabeledCountersRequest, + EvTabletLabeledCountersResponse, EvRemoveDatabase, EvEnd }; @@ -62,20 +62,20 @@ struct TEvTabletCounters { {} }; - struct TEvTabletAddLabeledCounters : public TEventLocal<TEvTabletAddLabeledCounters, EvTabletAddLabeledCounters> { - // - const ui64 TabletID; + struct TEvTabletAddLabeledCounters : public TEventLocal<TEvTabletAddLabeledCounters, EvTabletAddLabeledCounters> { + // + const ui64 TabletID; const TTabletTypes::EType TabletType; - TAutoPtr<TTabletLabeledCountersBase> LabeledCounters; - TIntrusivePtr<TInFlightCookie> InFlightCounter; // Used to detect when previous event has been consumed by the aggregator + TAutoPtr<TTabletLabeledCountersBase> LabeledCounters; + TIntrusivePtr<TInFlightCookie> InFlightCounter; // Used to detect when previous event has been consumed by the aggregator TEvTabletAddLabeledCounters(TIntrusivePtr<TInFlightCookie> inFlightCounter, ui64 tabletID, TTabletTypes::EType tabletType, TAutoPtr<TTabletLabeledCountersBase> labeledCounters) - : TabletID(tabletID) - , TabletType(tabletType) - , LabeledCounters(labeledCounters) - , InFlightCounter(inFlightCounter) - {} - }; - + : TabletID(tabletID) + , TabletType(tabletType) + , LabeledCounters(labeledCounters) + , InFlightCounter(inFlightCounter) + {} + }; + // struct TEvTabletCountersForgetTablet : public TEventLocal<TEvTabletCountersForgetTablet, EvTabletCountersForgetTablet> { // @@ -97,16 +97,16 @@ struct TEvTabletCounters { struct TEvTabletCountersResponse : public TEventPB<TEvTabletCountersResponse, NKikimrTabletCountersAggregator::TEvTabletCountersResponse, EvTabletCountersResponse> { }; - // - struct TEvTabletLabeledCountersRequest : public TEventPB<TEvTabletLabeledCountersRequest, NKikimrTabletCountersAggregator::TEvTabletLabeledCountersRequest, EvTabletLabeledCountersRequest> { - }; - - struct TEvTabletLabeledCountersResponse : public TEventPB<TEvTabletLabeledCountersResponse, NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse, EvTabletLabeledCountersResponse> { - }; - + // + struct TEvTabletLabeledCountersRequest : public TEventPB<TEvTabletLabeledCountersRequest, NKikimrTabletCountersAggregator::TEvTabletLabeledCountersRequest, EvTabletLabeledCountersRequest> { + }; + + struct TEvTabletLabeledCountersResponse : public TEventPB<TEvTabletLabeledCountersResponse, NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse, EvTabletLabeledCountersResponse> { + }; + struct TEvRemoveDatabase : public TEventLocal<TEvRemoveDatabase, EvRemoveDatabase> { const TPathId PathId; - + explicit TEvRemoveDatabase(TPathId pathId) : PathId(pathId) {} @@ -129,21 +129,21 @@ TIntrusivePtr<NSysView::IDbCounters> CreateTabletDbCounters( //////////////////////////////////////////// IActor* CreateTabletCountersAggregator(bool follower); - -//////////////////////////////////////////// -//will create actor that aggregate LabeledCounters from all nodes and reports them as TEvTabletLabeledCountersResponse to parentActor + +//////////////////////////////////////////// +//will create actor that aggregate LabeledCounters from all nodes and reports them as TEvTabletLabeledCountersResponse to parentActor TActorId CreateClusterLabeledCountersAggregator( const TActorId& parentActor, TTabletTypes::EType tabletType, const TActorContext& ctx, ui32 version = 1, - const TString& group = TString(), const ui32 TotalWorkersCount = WORKERS_COUNT); - + const TString& group = TString(), const ui32 TotalWorkersCount = WORKERS_COUNT); + IActor* CreateClusterLabeledCountersAggregatorActor( const TActorId& parentActor, TTabletTypes::EType tabletType, ui32 version = 1, - const TString& group = TString(), const ui32 TotalWorkersCount = WORKERS_COUNT); - + const TString& group = TString(), const ui32 TotalWorkersCount = WORKERS_COUNT); + } // end of the NKikimr namespace diff --git a/ydb/core/tablet/tablet_counters_aggregator_ut.cpp b/ydb/core/tablet/tablet_counters_aggregator_ut.cpp index abcda5f15b..707ffb977e 100644 --- a/ydb/core/tablet/tablet_counters_aggregator_ut.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator_ut.cpp @@ -8,156 +8,156 @@ namespace NKikimr { using namespace NActors; -void TestHeavy(const ui32 v, ui32 numWorkers) { +void TestHeavy(const ui32 v, ui32 numWorkers) { + + TInstant t(Now()); - TInstant t(Now()); - TVector<TActorId> cc; TActorId aggregatorId; - TTestBasicRuntime runtime(1); - constexpr int NODES = 10; - constexpr int GROUPS = 1000; - constexpr int VALUES = 20; - - runtime.Initialize(TAppPrepare().Unwrap()); + TTestBasicRuntime runtime(1); + constexpr int NODES = 10; + constexpr int GROUPS = 1000; + constexpr int VALUES = 20; + + runtime.Initialize(TAppPrepare().Unwrap()); TActorId edge = runtime.AllocateEdgeActor(); - - runtime.SetLogPriority(NKikimrServices::TABLET_AGGREGATOR, NActors::NLog::PRI_DEBUG); - - IActor* aggregator = CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, v, TString(), numWorkers); - aggregatorId = runtime.Register(aggregator); - - if (numWorkers == 0) { - cc.push_back(aggregatorId); - ++numWorkers; - } - + + runtime.SetLogPriority(NKikimrServices::TABLET_AGGREGATOR, NActors::NLog::PRI_DEBUG); + + IActor* aggregator = CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, v, TString(), numWorkers); + aggregatorId = runtime.Register(aggregator); + + if (numWorkers == 0) { + cc.push_back(aggregatorId); + ++numWorkers; + } + runtime.SetRegistrationObserverFunc([&cc, &aggregatorId](TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId) { - TTestActorRuntime::DefaultRegistrationObserver(runtime, parentId, actorId); - if (parentId == aggregatorId) { - cc.push_back(actorId); - } - }); - - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, numWorkers); - runtime.DispatchEvents(options); - for (const auto& a : cc) { + TTestActorRuntime::DefaultRegistrationObserver(runtime, parentId, actorId); + if (parentId == aggregatorId) { + cc.push_back(actorId); + } + }); + + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, numWorkers); + runtime.DispatchEvents(options); + for (const auto& a : cc) { THolder<TEvInterconnect::TEvNodesInfo> nodesInfo = MakeHolder<TEvInterconnect::TEvNodesInfo>(); - for (auto i = 1; i <= NODES; ++i) { + for (auto i = 1; i <= NODES; ++i) { nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(i, "::", "localhost", "localhost", 1234, TNodeLocation())); - } - runtime.Send(new NActors::IEventHandle(a, edge, nodesInfo.Release()), 0, true); - } - - for (auto i = 1; i <= NODES; ++i) { + } + runtime.Send(new NActors::IEventHandle(a, edge, nodesInfo.Release()), 0, true); + } + + for (auto i = 1; i <= NODES; ++i) { THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = MakeHolder<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); - for (auto k = 0; k < GROUPS; ++k) { - char delim = (k % 2 == 0) ? '/' : '|'; - auto& group1 = *response->Record.AddLabeledCountersByGroup(); - group1.SetGroup(Sprintf("group%d%c%d", i, delim, k)); - group1.SetGroupNames(Sprintf("A%cB", delim)); - if (k % 4 != 0) - group1.SetDelimiter(TStringBuilder() << delim); - for (auto j = 0; j < VALUES; ++j) { - auto& counter1 = *group1.AddLabeledCounter(); - counter1.SetName(Sprintf("value%d", j)); - counter1.SetValue(13); - counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); - counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); - } - } - Cerr << "Sending message to " << cc[i % numWorkers] << " from " << aggregatorId << " id " << i << "\n"; - runtime.Send(new NActors::IEventHandle(cc[i % numWorkers], aggregatorId, response.Release(), 0, i), 0, true); - } - { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvInterconnect::EvNodesInfo, numWorkers); - runtime.DispatchEvents(options, TDuration::Seconds(1)); - } - - THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = runtime.GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); - - UNIT_ASSERT(response != nullptr); - UNIT_ASSERT_VALUES_EQUAL(response->Record.LabeledCountersByGroupSize(), NODES * GROUPS); - - Cerr << "TEST " << v << " " << numWorkers << " duration " << TInstant::Now() - t << "\n"; -} - - + for (auto k = 0; k < GROUPS; ++k) { + char delim = (k % 2 == 0) ? '/' : '|'; + auto& group1 = *response->Record.AddLabeledCountersByGroup(); + group1.SetGroup(Sprintf("group%d%c%d", i, delim, k)); + group1.SetGroupNames(Sprintf("A%cB", delim)); + if (k % 4 != 0) + group1.SetDelimiter(TStringBuilder() << delim); + for (auto j = 0; j < VALUES; ++j) { + auto& counter1 = *group1.AddLabeledCounter(); + counter1.SetName(Sprintf("value%d", j)); + counter1.SetValue(13); + counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); + counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); + } + } + Cerr << "Sending message to " << cc[i % numWorkers] << " from " << aggregatorId << " id " << i << "\n"; + runtime.Send(new NActors::IEventHandle(cc[i % numWorkers], aggregatorId, response.Release(), 0, i), 0, true); + } + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvInterconnect::EvNodesInfo, numWorkers); + runtime.DispatchEvents(options, TDuration::Seconds(1)); + } + + THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = runtime.GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); + + UNIT_ASSERT(response != nullptr); + UNIT_ASSERT_VALUES_EQUAL(response->Record.LabeledCountersByGroupSize(), NODES * GROUPS); + + Cerr << "TEST " << v << " " << numWorkers << " duration " << TInstant::Now() - t << "\n"; +} + + Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { Y_UNIT_TEST(SimpleAggregation) { TVector<TActorId> cc; TActorId aggregatorId; - + TTestBasicRuntime runtime(1); runtime.Initialize(TAppPrepare().Unwrap()); TActorId edge = runtime.AllocateEdgeActor(); - IActor* aggregator = CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, 2, TString(), 3); - aggregatorId = runtime.Register(aggregator); + IActor* aggregator = CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, 2, TString(), 3); + aggregatorId = runtime.Register(aggregator); runtime.SetRegistrationObserverFunc([&cc, &aggregatorId](TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId) { - TTestActorRuntime::DefaultRegistrationObserver(runtime, parentId, actorId); - if (parentId == aggregatorId) { - cc.push_back(actorId); - } - }); - - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); - runtime.DispatchEvents(options); - for (const auto& a : cc) { + TTestActorRuntime::DefaultRegistrationObserver(runtime, parentId, actorId); + if (parentId == aggregatorId) { + cc.push_back(actorId); + } + }); + + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); + runtime.DispatchEvents(options); + for (const auto& a : cc) { THolder<TEvInterconnect::TEvNodesInfo> nodesInfo = MakeHolder<TEvInterconnect::TEvNodesInfo>(); nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(1, "::", "localhost", "localhost", 1234, TNodeLocation())); nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(2, "::", "localhost", "localhost", 1234, TNodeLocation())); nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(3, "::", "localhost", "localhost", 1234, TNodeLocation())); - runtime.Send(new NActors::IEventHandle(a, edge, nodesInfo.Release()), 0, true); - } - + runtime.Send(new NActors::IEventHandle(a, edge, nodesInfo.Release()), 0, true); + } + { THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = MakeHolder<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); auto& group1 = *response->Record.AddLabeledCountersByGroup(); - group1.SetGroup("group1|group2"); - group1.SetGroupNames("AAA|BBB"); - group1.SetDelimiter("|"); + group1.SetGroup("group1|group2"); + group1.SetGroupNames("AAA|BBB"); + group1.SetDelimiter("|"); auto& counter1 = *group1.AddLabeledCounter(); counter1.SetName("value1"); counter1.SetValue(13); counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); - runtime.Send(new NActors::IEventHandle(cc[0], edge, response.Release(), 0, 1), 0, true); + runtime.Send(new NActors::IEventHandle(cc[0], edge, response.Release(), 0, 1), 0, true); } { THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = MakeHolder<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); response->Record.AddCounterNames("value1"); auto& group1 = *response->Record.AddLabeledCountersByGroup(); - group1.SetGroup("group1|group2"); - group1.SetGroupNames("AAA|BBB"); - group1.SetDelimiter("|"); + group1.SetGroup("group1|group2"); + group1.SetGroupNames("AAA|BBB"); + group1.SetDelimiter("|"); auto& counter1 = *group1.AddLabeledCounter(); counter1.SetNameId(0); counter1.SetValue(13); counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); - runtime.Send(new NActors::IEventHandle(cc[1], edge, response.Release(), 0, 2), 0, true); + runtime.Send(new NActors::IEventHandle(cc[1], edge, response.Release(), 0, 2), 0, true); } { THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = MakeHolder<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); response->Record.AddCounterNames("value1"); auto& group1 = *response->Record.AddLabeledCountersByGroup(); - group1.SetGroup("group1|group2"); - group1.SetGroupNames("AAA|BBB"); - group1.SetDelimiter("|"); + group1.SetGroup("group1|group2"); + group1.SetGroupNames("AAA|BBB"); + group1.SetDelimiter("|"); auto& counter1 = *group1.AddLabeledCounter(); counter1.SetNameId(0); counter1.SetValue(13); counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); - runtime.Send(new NActors::IEventHandle(cc[2], edge, response.Release(), 0, 3), 0, true); + runtime.Send(new NActors::IEventHandle(cc[2], edge, response.Release(), 0, 3), 0, true); } runtime.DispatchEvents(); @@ -168,7 +168,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { UNIT_ASSERT(response != nullptr); UNIT_ASSERT_VALUES_EQUAL(response->Record.LabeledCountersByGroupSize(), 1); const auto& group1 = response->Record.GetLabeledCountersByGroup(0); - UNIT_ASSERT_VALUES_EQUAL(group1.GetGroup(), "group1/group2"); + UNIT_ASSERT_VALUES_EQUAL(group1.GetGroup(), "group1/group2"); UNIT_ASSERT_VALUES_EQUAL(group1.LabeledCounterSize(), 1); UNIT_ASSERT_VALUES_EQUAL(group1.LabeledCounterSize(), 1); const auto& counter1 = group1.GetLabeledCounter(0); @@ -176,90 +176,90 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { UNIT_ASSERT_VALUES_EQUAL(counter1.GetValue(), 39); } - + Y_UNIT_TEST(HeavyAggregation) { - TestHeavy(2, 10); - TestHeavy(2, 20); - TestHeavy(2, 1); - TestHeavy(2, 0); - } + TestHeavy(2, 10); + TestHeavy(2, 20); + TestHeavy(2, 1); + TestHeavy(2, 0); + } - Y_UNIT_TEST(Version3Aggregation) { + Y_UNIT_TEST(Version3Aggregation) { TVector<TActorId> cc; TActorId aggregatorId; - - TTestBasicRuntime runtime(1); - - runtime.Initialize(TAppPrepare().Unwrap()); + + TTestBasicRuntime runtime(1); + + runtime.Initialize(TAppPrepare().Unwrap()); TActorId edge = runtime.AllocateEdgeActor(); - - IActor* aggregator = CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, 3, "rt3.*--*,cons*/*/rt.*--*", 3); - aggregatorId = runtime.Register(aggregator); - + + IActor* aggregator = CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, 3, "rt3.*--*,cons*/*/rt.*--*", 3); + aggregatorId = runtime.Register(aggregator); + runtime.SetRegistrationObserverFunc([&cc, &aggregatorId](TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId) { - TTestActorRuntime::DefaultRegistrationObserver(runtime, parentId, actorId); - if (parentId == aggregatorId) { - cc.push_back(actorId); - } - }); - - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); - runtime.DispatchEvents(options); - for (const auto& a : cc) { + TTestActorRuntime::DefaultRegistrationObserver(runtime, parentId, actorId); + if (parentId == aggregatorId) { + cc.push_back(actorId); + } + }); + + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); + runtime.DispatchEvents(options); + for (const auto& a : cc) { THolder<TEvInterconnect::TEvNodesInfo> nodesInfo = MakeHolder<TEvInterconnect::TEvNodesInfo>(); nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(1, "::", "localhost", "localhost", 1234, TNodeLocation())); nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(2, "::", "localhost", "localhost", 1234, TNodeLocation())); nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(3, "::", "localhost", "localhost", 1234, TNodeLocation())); - runtime.Send(new NActors::IEventHandle(a, edge, nodesInfo.Release()), 0, true); - } - - { + runtime.Send(new NActors::IEventHandle(a, edge, nodesInfo.Release()), 0, true); + } + + { THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = MakeHolder<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); - auto& group1 = *response->Record.AddLabeledCountersByGroup(); - group1.SetGroup("rt3.man--aba@caba--daba"); - group1.SetGroupNames("topic"); - group1.SetDelimiter("/"); - auto& counter1 = *group1.AddLabeledCounter(); - counter1.SetName("value1"); - counter1.SetValue(13); - counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); - counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); - runtime.Send(new NActors::IEventHandle(cc[0], edge, response.Release(), 0, 1), 0, true); - } - - { + auto& group1 = *response->Record.AddLabeledCountersByGroup(); + group1.SetGroup("rt3.man--aba@caba--daba"); + group1.SetGroupNames("topic"); + group1.SetDelimiter("/"); + auto& counter1 = *group1.AddLabeledCounter(); + counter1.SetName("value1"); + counter1.SetValue(13); + counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); + counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); + runtime.Send(new NActors::IEventHandle(cc[0], edge, response.Release(), 0, 1), 0, true); + } + + { THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = MakeHolder<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); - response->Record.AddCounterNames("value1"); - auto& group1 = *response->Record.AddLabeledCountersByGroup(); - group1.SetGroup("cons@aaa/1/rt3.man--aba@caba--daba"); - group1.SetGroupNames("consumer/important/topic"); - group1.SetDelimiter("/"); - auto& counter1 = *group1.AddLabeledCounter(); - counter1.SetNameId(0); - counter1.SetValue(13); - counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); - counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); - runtime.Send(new NActors::IEventHandle(cc[1], edge, response.Release(), 0, 2), 0, true); - } - - runtime.DispatchEvents(); - THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = runtime.GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); -#ifndef NDEBUG - Cerr << response->Record.DebugString() << Endl; -#endif - UNIT_ASSERT(response != nullptr); - Cerr << response->Record; - UNIT_ASSERT_VALUES_EQUAL(response->Record.LabeledCountersByGroupSize(), 2); - const auto& group1 = response->Record.GetLabeledCountersByGroup(1); - const auto& group2 = response->Record.GetLabeledCountersByGroup(0); - TVector<TString> res = {group1.GetGroup(), group2.GetGroup()}; - std::sort(res.begin(), res.end()); - - UNIT_ASSERT_VALUES_EQUAL(res[0], "aba/caba/daba|man"); - UNIT_ASSERT_VALUES_EQUAL(res[1], "cons/aaa|1|aba/caba/daba|man"); - } - + response->Record.AddCounterNames("value1"); + auto& group1 = *response->Record.AddLabeledCountersByGroup(); + group1.SetGroup("cons@aaa/1/rt3.man--aba@caba--daba"); + group1.SetGroupNames("consumer/important/topic"); + group1.SetDelimiter("/"); + auto& counter1 = *group1.AddLabeledCounter(); + counter1.SetNameId(0); + counter1.SetValue(13); + counter1.SetType(TLabeledCounterOptions::CT_SIMPLE); + counter1.SetAggregateFunc(TLabeledCounterOptions::EAF_SUM); + runtime.Send(new NActors::IEventHandle(cc[1], edge, response.Release(), 0, 2), 0, true); + } + + runtime.DispatchEvents(); + THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response = runtime.GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); +#ifndef NDEBUG + Cerr << response->Record.DebugString() << Endl; +#endif + UNIT_ASSERT(response != nullptr); + Cerr << response->Record; + UNIT_ASSERT_VALUES_EQUAL(response->Record.LabeledCountersByGroupSize(), 2); + const auto& group1 = response->Record.GetLabeledCountersByGroup(1); + const auto& group2 = response->Record.GetLabeledCountersByGroup(0); + TVector<TString> res = {group1.GetGroup(), group2.GetGroup()}; + std::sort(res.begin(), res.end()); + + UNIT_ASSERT_VALUES_EQUAL(res[0], "aba/caba/daba|man"); + UNIT_ASSERT_VALUES_EQUAL(res[1], "cons/aaa|1|aba/caba/daba|man"); + } + } } diff --git a/ydb/core/tablet/tablet_counters_protobuf.h b/ydb/core/tablet/tablet_counters_protobuf.h index c3fb397db5..11d3a334de 100644 --- a/ydb/core/tablet/tablet_counters_protobuf.h +++ b/ydb/core/tablet/tablet_counters_protobuf.h @@ -3,27 +3,27 @@ #include "tablet_counters.h" #include "tablet_counters_aggregator.h" #include <ydb/core/tablet_flat/defs.h> -#include <util/string/vector.h> +#include <util/string/vector.h> #include <util/string/split.h> namespace NKikimr { namespace NAux { -// Class that incapsulates protobuf options parsing for app counters -template <const NProtoBuf::EnumDescriptor* AppCountersDesc()> -struct TAppParsedOpts { +// Class that incapsulates protobuf options parsing for app counters +template <const NProtoBuf::EnumDescriptor* AppCountersDesc()> +struct TAppParsedOpts { public: const size_t Size; -protected: +protected: TVector<TString> NamesStrings; TVector<const char*> Names; TVector<TVector<TTabletPercentileCounter::TRangeDef>> Ranges; TVector<TTabletPercentileCounter::TRangeDef> AppGlobalRanges; TVector<bool> Integral; public: - explicit TAppParsedOpts(const size_t diff = 0) - : Size(AppCountersDesc()->value_count() + diff) + explicit TAppParsedOpts(const size_t diff = 0) + : Size(AppCountersDesc()->value_count() + diff) { const NProtoBuf::EnumDescriptor* appDesc = AppCountersDesc(); NamesStrings.reserve(Size); @@ -56,86 +56,86 @@ public: Integral.push_back(co.GetIntegral()); } - // Make plain strings out of Strokas to fullfil interface of TTabletCountersBase + // Make plain strings out of Strokas to fullfil interface of TTabletCountersBase for (const TString& s : NamesStrings) { Names.push_back(s.empty() ? nullptr : s.c_str()); - } - - // Parse protobuf options for enums itself - AppGlobalRanges = ParseRanges(appDesc->options().GetExtension(GlobalCounterOpts)); - } - virtual ~TAppParsedOpts() - {} - - const char* const * GetNames() const - { - return Names.begin(); - } - + } + + // Parse protobuf options for enums itself + AppGlobalRanges = ParseRanges(appDesc->options().GetExtension(GlobalCounterOpts)); + } + virtual ~TAppParsedOpts() + {} + + const char* const * GetNames() const + { + return Names.begin(); + } + virtual const TVector<TTabletPercentileCounter::TRangeDef>& GetRanges(size_t idx) const - { - Y_VERIFY(idx < Size); - if (!Ranges[idx].empty()) { - return Ranges[idx]; - } else { - if (!AppGlobalRanges.empty()) - return AppGlobalRanges; - } + { + Y_VERIFY(idx < Size); + if (!Ranges[idx].empty()) { + return Ranges[idx]; + } else { + if (!AppGlobalRanges.empty()) + return AppGlobalRanges; + } Y_FAIL("Ranges for percentile counter '%s' are not defined", AppCountersDesc()->value(idx)->full_name().c_str()); - } - + } + virtual bool GetIntegral(size_t idx) const { Y_VERIFY(idx < Size); return Integral[idx]; } -protected: +protected: TString GetFilePrefix(const NProtoBuf::FileDescriptor* desc) { - if (desc->options().HasExtension(TabletTypeName)) { - return desc->options().GetExtension(TabletTypeName) + "/"; - } else { + if (desc->options().HasExtension(TabletTypeName)) { + return desc->options().GetExtension(TabletTypeName) + "/"; + } else { return TString(); - } - } - + } + } + TVector<TTabletPercentileCounter::TRangeDef> ParseRanges(const TCounterOptions& co) - { + { TVector<TTabletPercentileCounter::TRangeDef> ranges; - ranges.reserve(co.RangesSize()); - for (size_t j = 0; j < co.RangesSize(); j++) { - const TRange& r = co.GetRanges(j); + ranges.reserve(co.RangesSize()); + for (size_t j = 0; j < co.RangesSize(); j++) { + const TRange& r = co.GetRanges(j); ranges.push_back(TTabletPercentileCounter::TRangeDef{r.GetValue(), r.GetName().c_str()}); - } - return ranges; - } -}; - -// Class that incapsulates protobuf options parsing for tx types and app counters -template <const NProtoBuf::EnumDescriptor* AppCountersDesc(), - const NProtoBuf::EnumDescriptor* TxCountersDesc(), - const NProtoBuf::EnumDescriptor* TxTypesDesc()> -struct TParsedOpts : public TAppParsedOpts<AppCountersDesc> { -typedef TAppParsedOpts<AppCountersDesc> TBase; -public: - const size_t TxOffset; - const size_t TxCountersSize; - using TBase::Size; -private: - using TBase::NamesStrings; - using TBase::Names; - using TBase::Ranges; + } + return ranges; + } +}; + +// Class that incapsulates protobuf options parsing for tx types and app counters +template <const NProtoBuf::EnumDescriptor* AppCountersDesc(), + const NProtoBuf::EnumDescriptor* TxCountersDesc(), + const NProtoBuf::EnumDescriptor* TxTypesDesc()> +struct TParsedOpts : public TAppParsedOpts<AppCountersDesc> { +typedef TAppParsedOpts<AppCountersDesc> TBase; +public: + const size_t TxOffset; + const size_t TxCountersSize; + using TBase::Size; +private: + using TBase::NamesStrings; + using TBase::Names; + using TBase::Ranges; using TBase::Integral; - using TBase::AppGlobalRanges; + using TBase::AppGlobalRanges; TVector<TTabletPercentileCounter::TRangeDef> TxGlobalRanges; -public: - TParsedOpts() - : TAppParsedOpts<AppCountersDesc>(TxCountersDesc()->value_count() * TxTypesDesc()->value_count()) - , TxOffset(AppCountersDesc()->value_count()) - , TxCountersSize(TxCountersDesc()->value_count()) - { +public: + TParsedOpts() + : TAppParsedOpts<AppCountersDesc>(TxCountersDesc()->value_count() * TxTypesDesc()->value_count()) + , TxOffset(AppCountersDesc()->value_count()) + , TxCountersSize(TxCountersDesc()->value_count()) + { const NProtoBuf::EnumDescriptor* txDesc = TxCountersDesc(); const NProtoBuf::EnumDescriptor* typesDesc = TxTypesDesc(); - + // Parse protobuf options for enum values for tx counters // Create a group of tx counters for each tx type for (int j = 0; j < typesDesc->value_count(); j++) { @@ -161,23 +161,23 @@ public: Y_VERIFY(!co.GetName().empty(), "counter '%s' number (%d) has an empty name", v->full_name().c_str(), v->number()); TVector<TTabletPercentileCounter::TRangeDef> ranges = TBase::ParseRanges(co); - NamesStrings.push_back(TBase::GetFilePrefix(typesDesc->file()) + txPrefix + co.GetName()); - Ranges.push_back(TBase::ParseRanges(co)); + NamesStrings.push_back(TBase::GetFilePrefix(typesDesc->file()) + txPrefix + co.GetName()); + Ranges.push_back(TBase::ParseRanges(co)); Integral.push_back(co.GetIntegral()); } } // Make plain strings out of Strokas to fullfil interface of TTabletCountersBase - for (size_t i = TxOffset; i < Size; ++i) { + for (size_t i = TxOffset; i < Size; ++i) { const TString& s = NamesStrings[i]; Names.push_back(s.empty() ? nullptr : s.c_str()); } // Parse protobuf options for enums itself - TxGlobalRanges = TBase::ParseRanges(txDesc->options().GetExtension(GlobalCounterOpts)); + TxGlobalRanges = TBase::ParseRanges(txDesc->options().GetExtension(GlobalCounterOpts)); } - virtual ~TParsedOpts() - {} + virtual ~TParsedOpts() + {} virtual const TVector<TTabletPercentileCounter::TRangeDef>& GetRanges(size_t idx) const { @@ -199,44 +199,44 @@ public: Y_FAIL("Ranges for percentile counter '%s' are not defined", TxCountersDesc()->value(idx2)->full_name().c_str()); } } -}; +}; + - -template <class T1, class T2> -struct TParsedOptsPair { +template <class T1, class T2> +struct TParsedOptsPair { private: - T1 Opts1; - T2 Opts2; + T1 Opts1; + T2 Opts2; TVector<const char*> Names; -public: - const size_t Size; -public: - TParsedOptsPair() - : Opts1() - , Opts2() - , Size(Opts1.Size + Opts2.Size) - { - Names.reserve(Size); - for (size_t i = 0; i < Opts1.Size; ++i) { - Names.push_back(Opts1.GetNames()[i]); +public: + const size_t Size; +public: + TParsedOptsPair() + : Opts1() + , Opts2() + , Size(Opts1.Size + Opts2.Size) + { + Names.reserve(Size); + for (size_t i = 0; i < Opts1.Size; ++i) { + Names.push_back(Opts1.GetNames()[i]); + } + for (size_t i = 0; i < Opts2.Size; ++i) { + Names.push_back(Opts2.GetNames()[i]); } - for (size_t i = 0; i < Opts2.Size; ++i) { - Names.push_back(Opts2.GetNames()[i]); - } } - const char* const * GetNames() const + const char* const * GetNames() const { - return Names.begin(); + return Names.begin(); } - + const TVector<TTabletPercentileCounter::TRangeDef>& GetRanges(size_t idx) const - { - Y_VERIFY(idx < Size); - if (idx < Opts1.Size) - return Opts1.GetRanges(idx); - return Opts2.GetRanges(idx - Opts1.Size); - } + { + Y_VERIFY(idx < Size); + if (idx < Opts1.Size) + return Opts1.GetRanges(idx); + return Opts2.GetRanges(idx - Opts1.Size); + } }; template <const NProtoBuf::EnumDescriptor* AppCountersDesc(), @@ -246,117 +246,117 @@ TParsedOpts<AppCountersDesc, TxCountersDesc, TxTypesDesc>* GetOpts() { // Use singleton to avoid thread-safety issues and parse enum descriptor once return Singleton<TParsedOpts<AppCountersDesc, TxCountersDesc, TxTypesDesc>>(); } - -template <const NProtoBuf::EnumDescriptor* AppCountersDesc()> -TAppParsedOpts<AppCountersDesc>* GetAppOpts() { - // Use singleton to avoid thread-safety issues and parse enum descriptor once - return Singleton<TAppParsedOpts<AppCountersDesc>>(); -} - -template <class T1, class T2> -TParsedOptsPair<T1,T2>* GetOptsPair() { - // Use singleton to avoid thread-safety issues and parse enum descriptor once - return Singleton<TParsedOptsPair<T1,T2>>(); - -} - - -// Class that incapsulates protobuf options parsing for user counters -template <const NProtoBuf::EnumDescriptor* LabeledCountersDesc()> -struct TLabeledCounterParsedOpts { -public: - const size_t Size; -protected: + +template <const NProtoBuf::EnumDescriptor* AppCountersDesc()> +TAppParsedOpts<AppCountersDesc>* GetAppOpts() { + // Use singleton to avoid thread-safety issues and parse enum descriptor once + return Singleton<TAppParsedOpts<AppCountersDesc>>(); +} + +template <class T1, class T2> +TParsedOptsPair<T1,T2>* GetOptsPair() { + // Use singleton to avoid thread-safety issues and parse enum descriptor once + return Singleton<TParsedOptsPair<T1,T2>>(); + +} + + +// Class that incapsulates protobuf options parsing for user counters +template <const NProtoBuf::EnumDescriptor* LabeledCountersDesc()> +struct TLabeledCounterParsedOpts { +public: + const size_t Size; +protected: TVector<TString> NamesStrings; TVector<const char*> Names; TVector<ui8> AggregateFuncs; TVector<ui8> Types; TVector<TString> GroupNamesStrings; TVector<const char*> GroupNames; -public: - explicit TLabeledCounterParsedOpts() - : Size(LabeledCountersDesc()->value_count()) - { +public: + explicit TLabeledCounterParsedOpts() + : Size(LabeledCountersDesc()->value_count()) + { const NProtoBuf::EnumDescriptor* labeledCounterDesc = LabeledCountersDesc(); - NamesStrings.reserve(Size); - Names.reserve(Size); - AggregateFuncs.reserve(Size); - Types.reserve(Size); - - // Parse protobuf options for enum values for app counters - for (ui32 i = 0; i < Size; ++i) { - const NProtoBuf::EnumValueDescriptor* vdesc = labeledCounterDesc->value(i); - Y_VERIFY(vdesc->number() == vdesc->index(), "counter '%s' number (%d) != index (%d)", + NamesStrings.reserve(Size); + Names.reserve(Size); + AggregateFuncs.reserve(Size); + Types.reserve(Size); + + // Parse protobuf options for enum values for app counters + for (ui32 i = 0; i < Size; ++i) { + const NProtoBuf::EnumValueDescriptor* vdesc = labeledCounterDesc->value(i); + Y_VERIFY(vdesc->number() == vdesc->index(), "counter '%s' number (%d) != index (%d)", vdesc->full_name().data(), vdesc->number(), vdesc->index()); - const TLabeledCounterOptions& co = vdesc->options().GetExtension(LabeledCounterOpts); - - NamesStrings.push_back(GetFilePrefix(labeledCounterDesc->file()) + co.GetName()); - AggregateFuncs.push_back(co.GetAggrFunc()); - Types.push_back(co.GetType()); - } - - // Make plain strings out of Strokas to fullfil interface of TTabletCountersBase + const TLabeledCounterOptions& co = vdesc->options().GetExtension(LabeledCounterOpts); + + NamesStrings.push_back(GetFilePrefix(labeledCounterDesc->file()) + co.GetName()); + AggregateFuncs.push_back(co.GetAggrFunc()); + Types.push_back(co.GetType()); + } + + // Make plain strings out of Strokas to fullfil interface of TTabletCountersBase for (const TString& s : NamesStrings) { Names.push_back(s.data()); - } - - //parse types for counter groups; - const TLabeledCounterGroupNamesOptions& gn = labeledCounterDesc->options().GetExtension(GlobalGroupNamesOpts); - ui32 size = gn.NamesSize(); - GroupNamesStrings.reserve(size); - GroupNames.reserve(size); - for (ui32 i = 0; i < size; ++i) { - GroupNamesStrings.push_back(gn.GetNames(i)); - } - + } + + //parse types for counter groups; + const TLabeledCounterGroupNamesOptions& gn = labeledCounterDesc->options().GetExtension(GlobalGroupNamesOpts); + ui32 size = gn.NamesSize(); + GroupNamesStrings.reserve(size); + GroupNames.reserve(size); + for (ui32 i = 0; i < size; ++i) { + GroupNamesStrings.push_back(gn.GetNames(i)); + } + for (const TString& s : GroupNamesStrings) { GroupNames.push_back(s.data()); - } - } - virtual ~TLabeledCounterParsedOpts() - {} - - const char* const * GetNames() const - { - return Names.begin(); - } - - const ui8* GetCounterTypes() const - { - return Types.begin(); - } - - const char* const * GetGroupNames() const - { - return GroupNames.begin(); - } - - size_t GetGroupNamesSize() const - { - return GroupNames.size(); - } - - const ui8* GetAggregateFuncs() const - { - return AggregateFuncs.begin(); - } - -protected: + } + } + virtual ~TLabeledCounterParsedOpts() + {} + + const char* const * GetNames() const + { + return Names.begin(); + } + + const ui8* GetCounterTypes() const + { + return Types.begin(); + } + + const char* const * GetGroupNames() const + { + return GroupNames.begin(); + } + + size_t GetGroupNamesSize() const + { + return GroupNames.size(); + } + + const ui8* GetAggregateFuncs() const + { + return AggregateFuncs.begin(); + } + +protected: TString GetFilePrefix(const NProtoBuf::FileDescriptor* desc) { - if (desc->options().HasExtension(TabletTypeName)) { - return desc->options().GetExtension(TabletTypeName) + "/"; - } else { + if (desc->options().HasExtension(TabletTypeName)) { + return desc->options().GetExtension(TabletTypeName) + "/"; + } else { return TString(); - } - } -}; - -template <const NProtoBuf::EnumDescriptor* LabeledCountersDesc()> -TLabeledCounterParsedOpts<LabeledCountersDesc>* GetLabeledCounterOpts() { - // Use singleton to avoid thread-safety issues and parse enum descriptor once - return Singleton<TLabeledCounterParsedOpts<LabeledCountersDesc>>(); -} - + } + } +}; + +template <const NProtoBuf::EnumDescriptor* LabeledCountersDesc()> +TLabeledCounterParsedOpts<LabeledCountersDesc>* GetLabeledCounterOpts() { + // Use singleton to avoid thread-safety issues and parse enum descriptor once + return Singleton<TLabeledCounterParsedOpts<LabeledCountersDesc>>(); +} + } // NAux // Base class for all tablet counters classes with tx type counters @@ -421,44 +421,44 @@ template <const NProtoBuf::EnumDescriptor* SimpleDesc(), const NProtoBuf::EnumDescriptor* PercentileDesc(), const NProtoBuf::EnumDescriptor* TxTypeDesc()> class TProtobufTabletCounters : public TTabletCountersWithTxTypes { -public: - typedef NAux::TParsedOpts<SimpleDesc, ETxTypeSimpleCounters_descriptor, TxTypeDesc> TSimpleOpts; - typedef NAux::TParsedOpts<CumulativeDesc, ETxTypeCumulativeCounters_descriptor, TxTypeDesc> TCumulativeOpts; - typedef NAux::TParsedOpts<PercentileDesc, ETxTypePercentileCounters_descriptor, TxTypeDesc> TPercentileOpts; - - static TSimpleOpts* SimpleOpts() { +public: + typedef NAux::TParsedOpts<SimpleDesc, ETxTypeSimpleCounters_descriptor, TxTypeDesc> TSimpleOpts; + typedef NAux::TParsedOpts<CumulativeDesc, ETxTypeCumulativeCounters_descriptor, TxTypeDesc> TCumulativeOpts; + typedef NAux::TParsedOpts<PercentileDesc, ETxTypePercentileCounters_descriptor, TxTypeDesc> TPercentileOpts; + + static TSimpleOpts* SimpleOpts() { return NAux::GetOpts<SimpleDesc, ETxTypeSimpleCounters_descriptor, TxTypeDesc>(); } - static TCumulativeOpts* CumulativeOpts() { + static TCumulativeOpts* CumulativeOpts() { return NAux::GetOpts<CumulativeDesc, ETxTypeCumulativeCounters_descriptor, TxTypeDesc>(); } - static TPercentileOpts* PercentileOpts() { + static TPercentileOpts* PercentileOpts() { return NAux::GetOpts<PercentileDesc, ETxTypePercentileCounters_descriptor, TxTypeDesc>(); } - + TProtobufTabletCounters() : TTabletCountersWithTxTypes( SimpleOpts()->Size, CumulativeOpts()->Size, PercentileOpts()->Size, SimpleOpts()->GetNames(), CumulativeOpts()->GetNames(), PercentileOpts()->GetNames() ) { - FillOffsets(); - InitCounters(); - } - - //constructor from external counters - TProtobufTabletCounters(const ui32 simpleOffset, const ui32 cumulativeOffset, const ui32 percentileOffset, TTabletCountersBase* counters) - : TTabletCountersWithTxTypes(simpleOffset, cumulativeOffset, percentileOffset, counters) - { - FillOffsets(); - InitCounters(); - } - -private: - void FillOffsets() - { + FillOffsets(); + InitCounters(); + } + + //constructor from external counters + TProtobufTabletCounters(const ui32 simpleOffset, const ui32 cumulativeOffset, const ui32 percentileOffset, TTabletCountersBase* counters) + : TTabletCountersWithTxTypes(simpleOffset, cumulativeOffset, percentileOffset, counters) + { + FillOffsets(); + InitCounters(); + } + +private: + void FillOffsets() + { // Initialize stuff for counter addressing Size[CT_SIMPLE] = SimpleOpts()->Size; TxOffset[CT_SIMPLE] = SimpleOpts()->TxOffset; @@ -469,10 +469,10 @@ private: Size[CT_PERCENTILE] = PercentileOpts()->Size; TxOffset[CT_PERCENTILE] = PercentileOpts()->TxOffset; TxCountersSize[CT_PERCENTILE] = PercentileOpts()->TxCountersSize; - } + } - void InitCounters() - { + void InitCounters() + { // Initialize percentile counters const auto* opts = PercentileOpts(); for (size_t i = 0; i < opts->Size; i++) { @@ -485,157 +485,157 @@ private: } }; -// Tablet counters with app counters (SimpleDesc, CumulativeDesc, PercentileDesc) only -template <const NProtoBuf::EnumDescriptor* SimpleDesc(), - const NProtoBuf::EnumDescriptor* CumulativeDesc(), - const NProtoBuf::EnumDescriptor* PercentileDesc()> -class TAppProtobufTabletCounters : public TTabletCountersBase { -public: - typedef NAux::TAppParsedOpts<SimpleDesc> TSimpleOpts; - typedef NAux::TAppParsedOpts<CumulativeDesc> TCumulativeOpts; - typedef NAux::TAppParsedOpts<PercentileDesc> TPercentileOpts; - - static TSimpleOpts* SimpleOpts() { - return NAux::GetAppOpts<SimpleDesc>(); - } - - static TCumulativeOpts* CumulativeOpts() { - return NAux::GetAppOpts<CumulativeDesc>(); - } - - static TPercentileOpts* PercentileOpts() { - return NAux::GetAppOpts<PercentileDesc>(); - } - - TAppProtobufTabletCounters() - : TTabletCountersBase( - SimpleOpts()->Size, CumulativeOpts()->Size, PercentileOpts()->Size, - SimpleOpts()->GetNames(), CumulativeOpts()->GetNames(), PercentileOpts()->GetNames() - ) - { - InitCounters(); - } - - //constructor from external counters - TAppProtobufTabletCounters(const ui32 simpleOffset, const ui32 cumulativeOffset, const ui32 percentileOffset, TTabletCountersBase* counters) - : TTabletCountersBase(simpleOffset, cumulativeOffset, percentileOffset, counters) - { - InitCounters(); - } - -private: - void InitCounters() - { - // Initialize percentile counters - const auto* opts = PercentileOpts(); - for (size_t i = 0; i < opts->Size; i++) { +// Tablet counters with app counters (SimpleDesc, CumulativeDesc, PercentileDesc) only +template <const NProtoBuf::EnumDescriptor* SimpleDesc(), + const NProtoBuf::EnumDescriptor* CumulativeDesc(), + const NProtoBuf::EnumDescriptor* PercentileDesc()> +class TAppProtobufTabletCounters : public TTabletCountersBase { +public: + typedef NAux::TAppParsedOpts<SimpleDesc> TSimpleOpts; + typedef NAux::TAppParsedOpts<CumulativeDesc> TCumulativeOpts; + typedef NAux::TAppParsedOpts<PercentileDesc> TPercentileOpts; + + static TSimpleOpts* SimpleOpts() { + return NAux::GetAppOpts<SimpleDesc>(); + } + + static TCumulativeOpts* CumulativeOpts() { + return NAux::GetAppOpts<CumulativeDesc>(); + } + + static TPercentileOpts* PercentileOpts() { + return NAux::GetAppOpts<PercentileDesc>(); + } + + TAppProtobufTabletCounters() + : TTabletCountersBase( + SimpleOpts()->Size, CumulativeOpts()->Size, PercentileOpts()->Size, + SimpleOpts()->GetNames(), CumulativeOpts()->GetNames(), PercentileOpts()->GetNames() + ) + { + InitCounters(); + } + + //constructor from external counters + TAppProtobufTabletCounters(const ui32 simpleOffset, const ui32 cumulativeOffset, const ui32 percentileOffset, TTabletCountersBase* counters) + : TTabletCountersBase(simpleOffset, cumulativeOffset, percentileOffset, counters) + { + InitCounters(); + } + +private: + void InitCounters() + { + // Initialize percentile counters + const auto* opts = PercentileOpts(); + for (size_t i = 0; i < opts->Size; i++) { if (!opts->GetNames()[i]) { continue; } - const auto& vec = opts->GetRanges(i); + const auto& vec = opts->GetRanges(i); Percentile()[i].Initialize(vec.size(), vec.begin(), opts->GetIntegral(i)); - } - } -}; - - -// Will store all counters for both types in T1 and itself. It's mean that -// FirstTabletCounters will be of type T1, but as base class (TTabletCountersBase) will contail ALL COUNTERS from T1 and T2. -// T1 and T2 can be obtained with GetFirstTabletCounters and GetSecondTabletCounters() methods, and counters can be changed separetly. -// T1 object and TProtobufTabletCountersPair itself will contail all couters with all changes. -// Of course, T1 and T2 are not thread safe - they must be accessed only from one thread both. -// You can construct Pair<T1, Pair<T2,T3>> and so on if you need it. -template <class T1, class T2> -class TProtobufTabletCountersPair : public TTabletCountersBase { -private: - TAutoPtr<T1> FirstTabletCounters; - TAutoPtr<T2> SecondTabletCounters; - -public: - typedef NAux::TParsedOptsPair<typename T1::TSimpleOpts, typename T2::TSimpleOpts> TSimpleOpts; - typedef NAux::TParsedOptsPair<typename T1::TCumulativeOpts, typename T2::TCumulativeOpts> TCumulativeOpts; - typedef NAux::TParsedOptsPair<typename T1::TPercentileOpts, typename T2::TPercentileOpts> TPercentileOpts; - - - static TSimpleOpts* SimpleOpts() { - return NAux::GetOptsPair<typename T1::TSimpleOpts, typename T2::TSimpleOpts>(); - } - - static TCumulativeOpts* CumulativeOpts() { - return NAux::GetOptsPair<typename T1::TCumulativeOpts, typename T2::TCumulativeOpts>(); - } - - static TPercentileOpts* PercentileOpts() { - return NAux::GetOptsPair<typename T1::TPercentileOpts, typename T2::TPercentileOpts>(); - } - - TProtobufTabletCountersPair() - : TTabletCountersBase( - SimpleOpts()->Size, CumulativeOpts()->Size, PercentileOpts()->Size, - SimpleOpts()->GetNames(), CumulativeOpts()->GetNames(), PercentileOpts()->GetNames() - ) - , FirstTabletCounters(new T1(0, 0, 0, dynamic_cast<TTabletCountersBase*>(this))) - , SecondTabletCounters(new T2(T1::SimpleOpts()->Size, T1::CumulativeOpts()->Size, T1::PercentileOpts()->Size, - dynamic_cast<TTabletCountersBase*>(this))) - { - } - - //constructor from external counters - TProtobufTabletCountersPair(const ui32 simpleOffset, const ui32 cumulativeOffset, const ui32 percentileOffset, TTabletCountersBase* counters) - : TTabletCountersBase(simpleOffset, cumulativeOffset, percentileOffset, counters) - , FirstTabletCounters(new T1(0, 0, 0, dynamic_cast<TTabletCountersBase*>(this))) - , SecondTabletCounters(new T2(T1::SimpleOpts()->Size, T1::CumulativeOpts()->Size, T1::PercentileOpts()->Size, - dynamic_cast<TTabletCountersBase*>(this))) - { - } - - - TAutoPtr<T1>& GetFirstTabletCounters() - { - return FirstTabletCounters; - } - - const TAutoPtr<T1>& GetFirstTabletCounters() const - { - return FirstTabletCounters; - } - - TAutoPtr<T2>& GetSecondTabletCounters() - { - return SecondTabletCounters; - } - - const TAutoPtr<T2>& GetSecondTabletCounters() const - { - return SecondTabletCounters; - } -}; - - - -// Tablet app user counters -template <const NProtoBuf::EnumDescriptor* SimpleDesc()> -class TProtobufTabletLabeledCounters : public TTabletLabeledCountersBase { -public: - typedef NAux::TLabeledCounterParsedOpts<SimpleDesc> TLabeledCounterOpts; - - static TLabeledCounterOpts* SimpleOpts() { - return NAux::GetLabeledCounterOpts<SimpleDesc>(); - } - + } + } +}; + + +// Will store all counters for both types in T1 and itself. It's mean that +// FirstTabletCounters will be of type T1, but as base class (TTabletCountersBase) will contail ALL COUNTERS from T1 and T2. +// T1 and T2 can be obtained with GetFirstTabletCounters and GetSecondTabletCounters() methods, and counters can be changed separetly. +// T1 object and TProtobufTabletCountersPair itself will contail all couters with all changes. +// Of course, T1 and T2 are not thread safe - they must be accessed only from one thread both. +// You can construct Pair<T1, Pair<T2,T3>> and so on if you need it. +template <class T1, class T2> +class TProtobufTabletCountersPair : public TTabletCountersBase { +private: + TAutoPtr<T1> FirstTabletCounters; + TAutoPtr<T2> SecondTabletCounters; + +public: + typedef NAux::TParsedOptsPair<typename T1::TSimpleOpts, typename T2::TSimpleOpts> TSimpleOpts; + typedef NAux::TParsedOptsPair<typename T1::TCumulativeOpts, typename T2::TCumulativeOpts> TCumulativeOpts; + typedef NAux::TParsedOptsPair<typename T1::TPercentileOpts, typename T2::TPercentileOpts> TPercentileOpts; + + + static TSimpleOpts* SimpleOpts() { + return NAux::GetOptsPair<typename T1::TSimpleOpts, typename T2::TSimpleOpts>(); + } + + static TCumulativeOpts* CumulativeOpts() { + return NAux::GetOptsPair<typename T1::TCumulativeOpts, typename T2::TCumulativeOpts>(); + } + + static TPercentileOpts* PercentileOpts() { + return NAux::GetOptsPair<typename T1::TPercentileOpts, typename T2::TPercentileOpts>(); + } + + TProtobufTabletCountersPair() + : TTabletCountersBase( + SimpleOpts()->Size, CumulativeOpts()->Size, PercentileOpts()->Size, + SimpleOpts()->GetNames(), CumulativeOpts()->GetNames(), PercentileOpts()->GetNames() + ) + , FirstTabletCounters(new T1(0, 0, 0, dynamic_cast<TTabletCountersBase*>(this))) + , SecondTabletCounters(new T2(T1::SimpleOpts()->Size, T1::CumulativeOpts()->Size, T1::PercentileOpts()->Size, + dynamic_cast<TTabletCountersBase*>(this))) + { + } + + //constructor from external counters + TProtobufTabletCountersPair(const ui32 simpleOffset, const ui32 cumulativeOffset, const ui32 percentileOffset, TTabletCountersBase* counters) + : TTabletCountersBase(simpleOffset, cumulativeOffset, percentileOffset, counters) + , FirstTabletCounters(new T1(0, 0, 0, dynamic_cast<TTabletCountersBase*>(this))) + , SecondTabletCounters(new T2(T1::SimpleOpts()->Size, T1::CumulativeOpts()->Size, T1::PercentileOpts()->Size, + dynamic_cast<TTabletCountersBase*>(this))) + { + } + + + TAutoPtr<T1>& GetFirstTabletCounters() + { + return FirstTabletCounters; + } + + const TAutoPtr<T1>& GetFirstTabletCounters() const + { + return FirstTabletCounters; + } + + TAutoPtr<T2>& GetSecondTabletCounters() + { + return SecondTabletCounters; + } + + const TAutoPtr<T2>& GetSecondTabletCounters() const + { + return SecondTabletCounters; + } +}; + + + +// Tablet app user counters +template <const NProtoBuf::EnumDescriptor* SimpleDesc()> +class TProtobufTabletLabeledCounters : public TTabletLabeledCountersBase { +public: + typedef NAux::TLabeledCounterParsedOpts<SimpleDesc> TLabeledCounterOpts; + + static TLabeledCounterOpts* SimpleOpts() { + return NAux::GetLabeledCounterOpts<SimpleDesc>(); + } + TProtobufTabletLabeledCounters(const TString& group, const ui64 id) - : TTabletLabeledCountersBase( - SimpleOpts()->Size, SimpleOpts()->GetNames(), SimpleOpts()->GetCounterTypes(), - SimpleOpts()->GetAggregateFuncs(), group, SimpleOpts()->GetGroupNames(), id) - { + : TTabletLabeledCountersBase( + SimpleOpts()->Size, SimpleOpts()->GetNames(), SimpleOpts()->GetCounterTypes(), + SimpleOpts()->GetAggregateFuncs(), group, SimpleOpts()->GetGroupNames(), id) + { TVector<TString> groups; StringSplitter(group).Split('/').SkipEmpty().Collect(&groups); //TODO: change here to "|" - Y_VERIFY(SimpleOpts()->GetGroupNamesSize() == groups.size()); - } - -private: -}; - - - + Y_VERIFY(SimpleOpts()->GetGroupNamesSize() == groups.size()); + } + +private: +}; + + + } // end of NKikimr diff --git a/ydb/core/testlib/tablet_helpers.cpp b/ydb/core/testlib/tablet_helpers.cpp index 91810c9847..a853040b93 100644 --- a/ydb/core/testlib/tablet_helpers.cpp +++ b/ydb/core/testlib/tablet_helpers.cpp @@ -1166,9 +1166,9 @@ namespace NKikimr { bootstrapperActorId = Boot(ctx, type, &CreateKeyValueFlat, DataGroupErasure); } else if (type == defaultTabletTypes.ColumnShard) { bootstrapperActorId = Boot(ctx, type, &CreateColumnShard, DataGroupErasure); - } else if (type == defaultTabletTypes.PersQueue) { + } else if (type == defaultTabletTypes.PersQueue) { bootstrapperActorId = Boot(ctx, type, &CreatePersQueue, DataGroupErasure); - } else if (type == defaultTabletTypes.PersQueueReadBalancer) { + } else if (type == defaultTabletTypes.PersQueueReadBalancer) { bootstrapperActorId = Boot(ctx, type, &CreatePersQueueReadBalancer, DataGroupErasure); } else if (type == defaultTabletTypes.Coordinator) { bootstrapperActorId = Boot(ctx, type, &CreateFlatTxCoordinator, DataGroupErasure); diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index bc4485ea20..d4907c26f1 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -257,7 +257,7 @@ namespace Tests { GRpcServerRootCounters = MakeIntrusive<NMonitoring::TDynamicCounters>(); auto& counters = GRpcServerRootCounters; - + auto& appData = Runtime->GetAppData(); // Setup discovery for typically used services on the node @@ -282,8 +282,8 @@ namespace Tests { auto future = grpcService->Prepare( system, NMsgBusProxy::CreatePersQueueMetaCacheV2Id(), - NMsgBusProxy::CreateMsgBusProxyId(), - counters + NMsgBusProxy::CreateMsgBusProxyId(), + counters ); auto startCb = [grpcService] (NThreading::TFuture<void> result) { if (result.HasException()) { @@ -1556,19 +1556,19 @@ namespace Tests { return (NMsgBusProxy::EResponseStatus)response.GetStatus(); } - NMsgBusProxy::EResponseStatus TClient::DeleteTopic(const TString& parent, const TString& name) { - TAutoPtr<NMsgBusProxy::TBusSchemeOperation> request(new NMsgBusProxy::TBusSchemeOperation()); - auto *op = request->Record.MutableTransaction()->MutableModifyScheme(); + NMsgBusProxy::EResponseStatus TClient::DeleteTopic(const TString& parent, const TString& name) { + TAutoPtr<NMsgBusProxy::TBusSchemeOperation> request(new NMsgBusProxy::TBusSchemeOperation()); + auto *op = request->Record.MutableTransaction()->MutableModifyScheme(); op->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpDropPersQueueGroup); - op->SetWorkingDir(parent); - op->MutableDrop()->SetName(name); - TAutoPtr<NBus::TBusMessage> reply; - NBus::EMessageStatus status = SendAndWaitCompletion(request.Release(), reply); - UNIT_ASSERT_VALUES_EQUAL(status, NBus::MESSAGE_OK); - const NKikimrClient::TResponse &response = dynamic_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; - return (NMsgBusProxy::EResponseStatus)response.GetStatus(); - } - + op->SetWorkingDir(parent); + op->MutableDrop()->SetName(name); + TAutoPtr<NBus::TBusMessage> reply; + NBus::EMessageStatus status = SendAndWaitCompletion(request.Release(), reply); + UNIT_ASSERT_VALUES_EQUAL(status, NBus::MESSAGE_OK); + const NKikimrClient::TResponse &response = dynamic_cast<NMsgBusProxy::TBusResponse *>(reply.Get())->Record; + return (NMsgBusProxy::EResponseStatus)response.GetStatus(); + } + TAutoPtr<NMsgBusProxy::TBusResponse> TClient::TryDropPersQueueGroup(const TString& parent, const TString& name) { TAutoPtr<NMsgBusProxy::TBusSchemeOperation> request(new NMsgBusProxy::TBusSchemeOperation()); auto * op = request->Record.MutableTransaction()->MutableModifyScheme(); @@ -2026,7 +2026,7 @@ namespace Tests { TString res = SendTabletMonQuery(runtime, hive, TString("/app?page=SetDown&node=") + ToString(nodeId) + "&down=" + (up ? "0" : "1")); if (!res.empty() && !res.Contains("Error")) return res; - + } UNIT_ASSERT_C(false, "Failed to mark node in hive"); return TString(); diff --git a/ydb/core/testlib/test_client.h b/ydb/core/testlib/test_client.h index c8e6f8c40c..2064752ab4 100644 --- a/ydb/core/testlib/test_client.h +++ b/ydb/core/testlib/test_client.h @@ -381,7 +381,7 @@ namespace Tests { NMsgBusProxy::EResponseStatus CreateOlapTable(const TString& parent, const NKikimrSchemeOp::TColumnTableDescription& table); NMsgBusProxy::EResponseStatus CreateSolomon(const TString& parent, const TString& name, ui32 parts = 4, ui32 channelProfile = 0); NMsgBusProxy::EResponseStatus StoreTableBackup(const TString& parent, const NKikimrSchemeOp::TBackupTask& task); - NMsgBusProxy::EResponseStatus DeleteTopic(const TString& parent, const TString& name); + NMsgBusProxy::EResponseStatus DeleteTopic(const TString& parent, const TString& name); TAutoPtr<NMsgBusProxy::TBusResponse> TryDropPersQueueGroup(const TString& parent, const TString& name); TAutoPtr<NMsgBusProxy::TBusResponse> Ls(const TString& path); static TPathVersion ExtractPathVersion(const TAutoPtr<NMsgBusProxy::TBusResponse>& describe); @@ -439,7 +439,7 @@ namespace Tests { THolder<NKesus::TEvKesus::TEvGetConfigResult> GetKesusConfig(TTestActorRuntime* runtime, const TString& kesusPath); - protected: + protected: template <class TMsg> TString PrintResult(NBus::TBusMessage* msg, size_t maxSz = 1000) { auto res = dynamic_cast<TMsg*>(msg); diff --git a/ydb/core/testlib/test_pq_client.h b/ydb/core/testlib/test_pq_client.h index 82e345b483..f059923250 100644 --- a/ydb/core/testlib/test_pq_client.h +++ b/ydb/core/testlib/test_pq_client.h @@ -15,59 +15,59 @@ #include <library/cpp/testing/unittest/registar.h> #include <util/string/printf.h> -#include <util/system/tempfile.h> +#include <util/system/tempfile.h> namespace NKikimr { namespace NPersQueueTests { -using namespace NNetClassifier; -using namespace NKikimr::Tests; - - -inline Tests::TServerSettings PQSettings(ui16 port, ui32 nodesCount = 2, bool roundrobin = true, const TString& yql_timeout = "10", const THolder<TTempFileHandle>& netDataFile = nullptr) { +using namespace NNetClassifier; +using namespace NKikimr::Tests; + + +inline Tests::TServerSettings PQSettings(ui16 port, ui32 nodesCount = 2, bool roundrobin = true, const TString& yql_timeout = "10", const THolder<TTempFileHandle>& netDataFile = nullptr) { NKikimrPQ::TPQConfig pqConfig; - NKikimrProto::TAuthConfig authConfig; - authConfig.SetUseBlackBox(false); - authConfig.SetUseAccessService(false); - authConfig.SetUseAccessServiceTLS(false); - authConfig.SetUseStaff(false); - authConfig.MutableTVMConfig()->SetEnabled(true); - authConfig.MutableTVMConfig()->SetServiceTVMId(10); + NKikimrProto::TAuthConfig authConfig; + authConfig.SetUseBlackBox(false); + authConfig.SetUseAccessService(false); + authConfig.SetUseAccessServiceTLS(false); + authConfig.SetUseStaff(false); + authConfig.MutableTVMConfig()->SetEnabled(true); + authConfig.MutableTVMConfig()->SetServiceTVMId(10); authConfig.MutableTVMConfig()->SetPublicKeys(NTvmAuth::NUnittest::TVMKNIFE_PUBLIC_KEYS); - authConfig.MutableTVMConfig()->SetUpdatePublicKeys(false); - pqConfig.SetRoundRobinPartitionMapping(roundrobin); - const TString query = R"___( - DECLARE $userNameHint AS Utf8; DECLARE $uid AS Uint64; - SELECT DISTINCT(name) FROM (SELECT name FROM [/Root/PQ/Config/V2/Producer] WHERE tvmClientId = YQL::ToString($uid) AND ($userNameHint = name OR $userNameHint = "") - UNION ALL SELECT name FROM [/Root/PQ/Config/V2/Consumer] WHERE tvmClientId = YQL::ToString($uid) AND ($userNameHint = name OR $userNameHint = "")); - )___"; - - authConfig.MutableUserRegistryConfig()->SetQuery(query); - + authConfig.MutableTVMConfig()->SetUpdatePublicKeys(false); + pqConfig.SetRoundRobinPartitionMapping(roundrobin); + const TString query = R"___( + DECLARE $userNameHint AS Utf8; DECLARE $uid AS Uint64; + SELECT DISTINCT(name) FROM (SELECT name FROM [/Root/PQ/Config/V2/Producer] WHERE tvmClientId = YQL::ToString($uid) AND ($userNameHint = name OR $userNameHint = "") + UNION ALL SELECT name FROM [/Root/PQ/Config/V2/Consumer] WHERE tvmClientId = YQL::ToString($uid) AND ($userNameHint = name OR $userNameHint = "")); + )___"; + + authConfig.MutableUserRegistryConfig()->SetQuery(query); + pqConfig.SetEnabled(true); pqConfig.SetMaxReadCookies(10); - for (int i = 0; i < 12; ++i) { - auto profile = pqConfig.AddChannelProfiles(); - Y_UNUSED(profile); - profile->SetPoolKind("test"); - } - - Tests::TServerSettings settings(port, authConfig, pqConfig); + for (int i = 0; i < 12; ++i) { + auto profile = pqConfig.AddChannelProfiles(); + Y_UNUSED(profile); + profile->SetPoolKind("test"); + } + + Tests::TServerSettings settings(port, authConfig, pqConfig); settings.SetDomainName("Root").SetNodeCount(nodesCount); - TVector<NKikimrKqp::TKqpSetting> kqpSettings; - NKikimrKqp::TKqpSetting kqpSetting; - kqpSetting.SetName("_KqpQueryTimeout"); - - kqpSetting.SetValue(yql_timeout); - kqpSettings.push_back(kqpSetting); - settings.SetKqpSettings(kqpSettings); + TVector<NKikimrKqp::TKqpSetting> kqpSettings; + NKikimrKqp::TKqpSetting kqpSetting; + kqpSetting.SetName("_KqpQueryTimeout"); + + kqpSetting.SetValue(yql_timeout); + kqpSettings.push_back(kqpSetting); + settings.SetKqpSettings(kqpSettings); settings.PQClusterDiscoveryConfig.SetEnabled(true); settings.PQClusterDiscoveryConfig.SetTimedCountersUpdateIntervalSeconds(1); - if (netDataFile) - settings.NetClassifierConfig.SetNetDataFilePath(netDataFile->Name()); - + if (netDataFile) + settings.NetClassifierConfig.SetNetDataFilePath(netDataFile->Name()); + return settings; } @@ -117,9 +117,9 @@ struct TRequestCreatePQ { TString User; ui64 ReadSpeed; - TVector<TString> ReadRules; - TVector<TString> Important; - + TVector<TString> ReadRules; + TVector<TString> Important; + std::optional<NKikimrPQ::TMirrorPartitionConfig> MirrorFrom; ui64 SourceIdMaxCount; @@ -138,29 +138,29 @@ struct TRequestCreatePQ { config->MutablePartitionConfig()->SetSourceIdMaxCounts(SourceIdMaxCount); config->MutablePartitionConfig()->SetLowWatermark(LowWatermark); - config->SetLocalDC(true); - - auto codec = config->MutableCodecs(); - codec->AddIds(0); - codec->AddCodecs("raw"); - codec->AddIds(1); - codec->AddCodecs("gzip"); - codec->AddIds(2); - codec->AddCodecs("lzop"); - - for (auto& i : Important) { - config->MutablePartitionConfig()->AddImportantClientId(i); - } - + config->SetLocalDC(true); + + auto codec = config->MutableCodecs(); + codec->AddIds(0); + codec->AddCodecs("raw"); + codec->AddIds(1); + codec->AddCodecs("gzip"); + codec->AddIds(2); + codec->AddCodecs("lzop"); + + for (auto& i : Important) { + config->MutablePartitionConfig()->AddImportantClientId(i); + } + config->MutablePartitionConfig()->SetWriteSpeedInBytesPerSecond(WriteSpeed); config->MutablePartitionConfig()->SetBurstSize(WriteSpeed); - for (auto& rr : ReadRules) { - config->AddReadRules(rr); + for (auto& rr : ReadRules) { + config->AddReadRules(rr); config->AddReadFromTimestampsMs(0); config->AddConsumerFormatVersions(0); config->AddReadRuleVersions(0); config->AddConsumerCodecs()->AddIds(0); - } + } if (!ReadRules.empty()) { config->SetRequireAuthRead(true); } @@ -524,8 +524,8 @@ public: , Kikimr(GetClientConfig()) { auto driverConfig = NYdb::TDriverConfig() - .SetEndpoint(TStringBuilder() << "localhost:" << GRpcPort) - .SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)); + .SetEndpoint(TStringBuilder() << "localhost:" << GRpcPort) + .SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)); if (databaseName) { driverConfig.SetDatabase(*databaseName); } @@ -558,10 +558,10 @@ public: MkDir("/Root", "PQ"); } - NYdb::TDriver* GetDriver() { - return Driver.Get(); - } - + NYdb::TDriver* GetDriver() { + return Driver.Get(); + } + void InitSourceIds(const TString& path = DEFAULT_SRC_IDS_PATH) { TFsPath fsPath(path); CreateTable(fsPath.Dirname(), @@ -676,35 +676,35 @@ public: return info; } - void InitUserRegistry() { - MkDir("/Root/PQ", "Config"); - MkDir("/Root/PQ/Config", "V2"); - + void InitUserRegistry() { + MkDir("/Root/PQ", "Config"); + MkDir("/Root/PQ/Config", "V2"); + RunYqlSchemeQuery(R"___( - CREATE TABLE [/Root/PQ/Config/V2/Consumer] ( - name Utf8, - tvmClientId Utf8, - PRIMARY KEY (name) - ); - CREATE TABLE [/Root/PQ/Config/V2/Producer] ( - name Utf8, - tvmClientId Utf8, - PRIMARY KEY (name) - ); + CREATE TABLE [/Root/PQ/Config/V2/Consumer] ( + name Utf8, + tvmClientId Utf8, + PRIMARY KEY (name) + ); + CREATE TABLE [/Root/PQ/Config/V2/Producer] ( + name Utf8, + tvmClientId Utf8, + PRIMARY KEY (name) + ); )___"); - + RunYqlDataQuery(R"___( - UPSERT INTO [/Root/PQ/Config/V2/Consumer] (name, tvmClientId) VALUES - ("user1", "1"), - ("user2", "1"), - ("user5", "1"), - ("user3", "2"); - UPSERT INTO [/Root/PQ/Config/V2/Producer] (name, tvmClientId) VALUES + UPSERT INTO [/Root/PQ/Config/V2/Consumer] (name, tvmClientId) VALUES + ("user1", "1"), + ("user2", "1"), + ("user5", "1"), + ("user3", "2"); + UPSERT INTO [/Root/PQ/Config/V2/Producer] (name, tvmClientId) VALUES ("user4", "2"), ("topic1", "1"); )___"); - } - + } + void UpdateDC(const TString& name, bool local, bool enabled) { const TString query = Sprintf( R"___( @@ -781,25 +781,25 @@ public: } - void RestartPartitionTablets(TTestActorRuntime* runtime, const TString& topic) { - TAutoPtr<NMsgBusProxy::TBusResponse> res = Ls("/Root/PQ/" + topic); - Cerr << res->Record << "\n"; - const auto& pq = res->Record.GetPathDescription().GetPersQueueGroup(); - THashSet<ui64> tablets; - for (ui32 i = 0; i < pq.PartitionsSize(); ++i) { - tablets.insert(pq.GetPartitions(i).GetTabletId()); - } + void RestartPartitionTablets(TTestActorRuntime* runtime, const TString& topic) { + TAutoPtr<NMsgBusProxy::TBusResponse> res = Ls("/Root/PQ/" + topic); + Cerr << res->Record << "\n"; + const auto& pq = res->Record.GetPathDescription().GetPersQueueGroup(); + THashSet<ui64> tablets; + for (ui32 i = 0; i < pq.PartitionsSize(); ++i) { + tablets.insert(pq.GetPartitions(i).GetTabletId()); + } TActorId sender = runtime->AllocateEdgeActor(); - for (auto & tablet : tablets) { + for (auto & tablet : tablets) { ForwardToTablet(*runtime, tablet, sender, new TEvents::TEvPoisonPill(), 0); - TDispatchOptions options; - try { - runtime->DispatchEvents(options); - } catch (TEmptyEventQueueException&) { - } - } - } - + TDispatchOptions options; + try { + runtime->DispatchEvents(options); + } catch (TEmptyEventQueueException&) { + } + } + } + bool TopicDeleted(const TString& name) { TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); auto req = request->Record.MutableMetaRequest()->MutableCmdGetTopicMetadata(); @@ -826,7 +826,7 @@ public: TAutoPtr<NBus::TBusMessage>& reply, ui64 maxPrintSize = 0) { NBus::EMessageStatus status = SyncCall(request, reply); TString msgStr; - UNIT_ASSERT_VALUES_EQUAL(status, NBus::MESSAGE_OK); + UNIT_ASSERT_VALUES_EQUAL(status, NBus::MESSAGE_OK); if (maxPrintSize) { msgStr = PrintResult<NMsgBusProxy::TBusResponse>(reply.Get(), maxPrintSize); } else { @@ -836,31 +836,31 @@ public: return dynamic_cast<NMsgBusProxy::TBusResponse*>(reply.Get()); } - void CreateConsumer(const TString& oldName) { + void CreateConsumer(const TString& oldName) { auto name = NPersQueue::ConvertOldConsumerName(oldName); RunYqlSchemeQuery("CREATE TABLE [/Root/PQ/" + name + "] (" + "Topic Utf8, Partition Uint32, Offset Uint64, PRIMARY KEY (Topic,Partition) );"); - } + } - void GrantConsumerAccess(const TString& oldName, const TString& subj) { - NACLib::TDiffACL acl; - acl.AddAccess(NACLib::EAccessType::Allow, NACLib::ReadAttributes, subj); - acl.AddAccess(NACLib::EAccessType::Allow, NACLib::WriteAttributes, subj); + void GrantConsumerAccess(const TString& oldName, const TString& subj) { + NACLib::TDiffACL acl; + acl.AddAccess(NACLib::EAccessType::Allow, NACLib::ReadAttributes, subj); + acl.AddAccess(NACLib::EAccessType::Allow, NACLib::WriteAttributes, subj); auto name = NPersQueue::ConvertOldConsumerName(oldName); - auto pos = name.rfind("/"); - Y_VERIFY(pos != TString::npos); - auto pref = "/Root/PQ/" + name.substr(0, pos); - ModifyACL(pref, name.substr(pos + 1), acl.SerializeAsString()); - } - - - void CreateTopicNoLegacy(const TString& name, ui32 partsCount, bool doWait = true, bool canWrite = true) { + auto pos = name.rfind("/"); + Y_VERIFY(pos != TString::npos); + auto pref = "/Root/PQ/" + name.substr(0, pos); + ModifyACL(pref, name.substr(pos + 1), acl.SerializeAsString()); + } + + + void CreateTopicNoLegacy(const TString& name, ui32 partsCount, bool doWait = true, bool canWrite = true) { TString path = name; if (UseConfigTables) { path = TStringBuilder() << "/Root/PQ/" << name; } auto pqClient = NYdb::NPersQueue::TPersQueueClient(*Driver); - auto settings = NYdb::NPersQueue::TCreateTopicSettings().PartitionsCount(partsCount).ClientWriteDisabled(!canWrite); + auto settings = NYdb::NPersQueue::TCreateTopicSettings().PartitionsCount(partsCount).ClientWriteDisabled(!canWrite); Cerr << "===Create topic: " << path << Endl; auto res = pqClient.CreateTopic(path, settings); //ToDo - hack, cannot avoid legacy compat yet as PQv1 still uses RequestProcessor from core/client/server @@ -869,26 +869,26 @@ public: } if (doWait) { res.Wait(); - Cerr << "Create topic result: " << res.GetValue().IsSuccess() << " " << res.GetValue().GetIssues().ToString() << "\n"; - UNIT_ASSERT(res.GetValue().IsSuccess()); + Cerr << "Create topic result: " << res.GetValue().IsSuccess() << " " << res.GetValue().GetIssues().ToString() << "\n"; + UNIT_ASSERT(res.GetValue().IsSuccess()); } } - void WaitTopicInit(const TString& topic) { - auto pqClient = NYdb::NPersQueue::TPersQueueClient(*Driver); - do { - auto writer = pqClient.CreateWriteSession(NYdb::NPersQueue::TWriteSessionSettings().Path(topic) - .MessageGroupId("src").ClusterDiscoveryMode(NYdb::NPersQueue::EClusterDiscoveryMode::Off)); - auto ev = *(writer->GetEvent(true)); - if (std::holds_alternative<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(ev)) - break; - if (std::holds_alternative<NYdb::NPersQueue::TSessionClosedEvent>(ev)) { - Cerr << std::get<NYdb::NPersQueue::TSessionClosedEvent>(ev).DebugString() << "\n"; - } - Sleep(TDuration::MilliSeconds(100)); - } while (true); - } - + void WaitTopicInit(const TString& topic) { + auto pqClient = NYdb::NPersQueue::TPersQueueClient(*Driver); + do { + auto writer = pqClient.CreateWriteSession(NYdb::NPersQueue::TWriteSessionSettings().Path(topic) + .MessageGroupId("src").ClusterDiscoveryMode(NYdb::NPersQueue::EClusterDiscoveryMode::Off)); + auto ev = *(writer->GetEvent(true)); + if (std::holds_alternative<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(ev)) + break; + if (std::holds_alternative<NYdb::NPersQueue::TSessionClosedEvent>(ev)) { + Cerr << std::get<NYdb::NPersQueue::TSessionClosedEvent>(ev).DebugString() << "\n"; + } + Sleep(TDuration::MilliSeconds(100)); + } while (true); + } + void CreateTopic( const TRequestCreatePQ& createRequest, bool doWait = true @@ -1003,7 +1003,7 @@ public: return; } - void DeleteTopic2(const TString& name, NPersQueue::NErrorCode::EErrorCode expectedStatus = NPersQueue::NErrorCode::OK, bool waitForTopicDeletion = true) { + void DeleteTopic2(const TString& name, NPersQueue::NErrorCode::EErrorCode expectedStatus = NPersQueue::NErrorCode::OK, bool waitForTopicDeletion = true) { Y_VERIFY(name.StartsWith("rt3.")); THolder<NMsgBusProxy::TBusPersQueue> request = TRequestDeletePQ{name}.GetRequest(); @@ -1028,7 +1028,7 @@ public: } RemoveTopic(name); const TInstant start = TInstant::Now(); - while (waitForTopicDeletion && !TopicDeleted(name)) { + while (waitForTopicDeletion && !TopicDeleted(name)) { Sleep(TDuration::MilliSeconds(50)); UNIT_ASSERT(TInstant::Now() - start < ::DEFAULT_DISPATCH_TIMEOUT); } @@ -1107,7 +1107,7 @@ public: response.CopyFrom(busResponse->Record); } - Cerr << response << "\n"; + Cerr << response << "\n"; UNIT_ASSERT_VALUES_EQUAL_C((NMsgBusProxy::EResponseStatus)response.GetStatus(), expectedStatus, "proxy failure"); if (expectedStatus == NMsgBusProxy::MSTATUS_OK) { @@ -1235,7 +1235,7 @@ public: UNIT_ASSERT_VALUES_EQUAL(clientOffsetCount, hasClientOffset); } - NKikimrClient::TResponse GetClientInfo(const TVector<TString>& topics, const TString& user, bool ok, const TVector<TString>& badTopics = {}) { + NKikimrClient::TResponse GetClientInfo(const TVector<TString>& topics, const TString& user, bool ok, const TVector<TString>& badTopics = {}) { THolder<NMsgBusProxy::TBusPersQueue> request = TRequestGetClientInfo().GetRequest(topics, user); Cerr << "Request: " << request->Record << Endl; @@ -1245,24 +1245,24 @@ public: Cerr << "Response: " << response->Record << "\n"; UNIT_ASSERT_VALUES_EQUAL_C((NMsgBusProxy::EResponseStatus)response->Record.GetStatus(), ok ? NMsgBusProxy::MSTATUS_OK : NMsgBusProxy::MSTATUS_ERROR, "proxy failure"); - THashSet<TString> good; - THashSet<TString> bad; - for (auto& t : badTopics) { - bad.insert(t); - } - for (auto& t : topics) { - if (!bad.contains(t)) { - good.insert(t); - } - } - for (auto& tt : response->Record.GetMetaResponse().GetCmdGetReadSessionsInfoResult().GetTopicResult()) { - const auto& topic = tt.GetTopic(); - if (bad.contains(topic)) { - UNIT_ASSERT(tt.GetErrorCode() != (ui32)NPersQueue::NErrorCode::OK); - } else { - UNIT_ASSERT(tt.GetErrorCode() == (ui32)NPersQueue::NErrorCode::OK); - } - } + THashSet<TString> good; + THashSet<TString> bad; + for (auto& t : badTopics) { + bad.insert(t); + } + for (auto& t : topics) { + if (!bad.contains(t)) { + good.insert(t); + } + } + for (auto& tt : response->Record.GetMetaResponse().GetCmdGetReadSessionsInfoResult().GetTopicResult()) { + const auto& topic = tt.GetTopic(); + if (bad.contains(topic)) { + UNIT_ASSERT(tt.GetErrorCode() != (ui32)NPersQueue::NErrorCode::OK); + } else { + UNIT_ASSERT(tt.GetErrorCode() == (ui32)NPersQueue::NErrorCode::OK); + } + } return response->Record; } @@ -1315,8 +1315,8 @@ public: for (ui32 i = 0; i < res.TopicResultSize(); ++i) { auto t = res.GetTopicResult(i); - if (t.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) - doRetry = true; + if (t.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) + doRetry = true; for (ui32 pi = 0; pi < t.PartitionLocationSize(); ++pi) { if (!t.GetPartitionLocation(pi).HasHostId()) { // Retry until the requested partiotions are successfully resolved @@ -1335,36 +1335,36 @@ public: return nodeIds; } - NKikimrClient::TPersQueueMetaResponse::TCmdGetTopicMetadataResult DescribeTopic(const TVector<TString>& topics, bool error = false) { + NKikimrClient::TPersQueueMetaResponse::TCmdGetTopicMetadataResult DescribeTopic(const TVector<TString>& topics, bool error = false) { THolder<NMsgBusProxy::TBusPersQueue> request = TRequestDescribePQ().GetRequest(topics); TAutoPtr<NBus::TBusMessage> reply; const NMsgBusProxy::TBusResponse* response = SendAndGetReply(request.Release(), reply); UNIT_ASSERT(response); - if ((NMsgBusProxy::EResponseStatus)response->Record.GetStatus() != NMsgBusProxy::MSTATUS_OK) { - UNIT_ASSERT(error); - return {}; - } - - UNIT_ASSERT_VALUES_EQUAL_C((NMsgBusProxy::EResponseStatus)response->Record.GetStatus(), NMsgBusProxy::MSTATUS_OK, + if ((NMsgBusProxy::EResponseStatus)response->Record.GetStatus() != NMsgBusProxy::MSTATUS_OK) { + UNIT_ASSERT(error); + return {}; + } + + UNIT_ASSERT_VALUES_EQUAL_C((NMsgBusProxy::EResponseStatus)response->Record.GetStatus(), NMsgBusProxy::MSTATUS_OK, "proxy failure"); auto res = response->Record.GetMetaResponse().GetCmdGetTopicMetadataResult(); - - UNIT_ASSERT(topics.size() <= res.TopicInfoSize()); - for (ui32 i = 0; i < res.TopicInfoSize(); ++i) { - const auto& topicInfo = res.GetTopicInfo(i); - if (error) { - UNIT_ASSERT(topicInfo.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING); - } else { + + UNIT_ASSERT(topics.size() <= res.TopicInfoSize()); + for (ui32 i = 0; i < res.TopicInfoSize(); ++i) { + const auto& topicInfo = res.GetTopicInfo(i); + if (error) { + UNIT_ASSERT(topicInfo.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING); + } else { UNIT_ASSERT(topicInfo.GetNumPartitions() > 0 || topicInfo.GetErrorCode() != (ui32)NPersQueue::NErrorCode::OK); UNIT_ASSERT(topicInfo.GetConfig().HasPartitionConfig() || topicInfo.GetErrorCode() != (ui32)NPersQueue::NErrorCode::OK); } - ui32 j = 0; - for (; j < topics.size() && topics[j] != topicInfo.GetTopic(); ++j); - UNIT_ASSERT(j == 0 || j != topics.size()); + ui32 j = 0; + for (; j < topics.size() && topics[j] != topicInfo.GetTopic(); ++j); + UNIT_ASSERT(j == 0 || j != topics.size()); } - return res; + return res; } void TestCase(const TVector<std::pair<TString, TVector<ui32>>>& topicsAndParts, ui32 resCount, ui32 hasClientOffset, bool ok) { diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 1683e1f87b..650506549e 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -1480,7 +1480,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { rootLimits.MaxConsistentCopyTargets = row.GetValueOrDefault<Schema::SubDomains::ConsistentCopyingTargetsLimit>(rootLimits.MaxConsistentCopyTargets); rootLimits.MaxPathElementLength = row.GetValueOrDefault<Schema::SubDomains::PathElementLength>(rootLimits.MaxPathElementLength); rootLimits.ExtraPathSymbolsAllowed = row.GetValueOrDefault<Schema::SubDomains::ExtraPathSymbolsAllowed>(rootLimits.ExtraPathSymbolsAllowed); - rootLimits.MaxPQPartitions = row.GetValueOrDefault<Schema::SubDomains::PQPartitionsLimit>(rootLimits.MaxPQPartitions); + rootLimits.MaxPQPartitions = row.GetValueOrDefault<Schema::SubDomains::PQPartitionsLimit>(rootLimits.MaxPQPartitions); } TSubDomainInfo::TPtr rootDomainInfo = new TSubDomainInfo(version, Self->RootPathId()); @@ -1538,8 +1538,8 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { limits.MaxConsistentCopyTargets = rowset.GetValueOrDefault<Schema::SubDomains::ConsistentCopyingTargetsLimit>(limits.MaxConsistentCopyTargets); limits.MaxPathElementLength = rowset.GetValueOrDefault<Schema::SubDomains::PathElementLength>(limits.MaxPathElementLength); limits.ExtraPathSymbolsAllowed = rowset.GetValueOrDefault<Schema::SubDomains::ExtraPathSymbolsAllowed>(limits.ExtraPathSymbolsAllowed); - limits.MaxPQPartitions = rowset.GetValueOrDefault<Schema::SubDomains::PQPartitionsLimit>(limits.MaxPQPartitions); - + limits.MaxPQPartitions = rowset.GetValueOrDefault<Schema::SubDomains::PQPartitionsLimit>(limits.MaxPQPartitions); + domainInfo->SetSchemeLimits(limits); if (rowset.HaveValue<Schema::SubDomains::DeclaredSchemeQuotas>()) { @@ -2230,9 +2230,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { pqGroup->TabletConfig = rowset.GetValue<Schema::PersQueueGroups::TabletConfig>(); pqGroup->MaxPartsPerTablet = rowset.GetValue<Schema::PersQueueGroups::MaxPQPerShard>(); pqGroup->AlterVersion = rowset.GetValue<Schema::PersQueueGroups::AlterVersion>(); - pqGroup->NextPartitionId = rowset.GetValueOrDefault<Schema::PersQueueGroups::NextPartitionId>(0); - pqGroup->TotalGroupCount = rowset.GetValueOrDefault<Schema::PersQueueGroups::TotalGroupCount>(0); - + pqGroup->NextPartitionId = rowset.GetValueOrDefault<Schema::PersQueueGroups::NextPartitionId>(0); + pqGroup->TotalGroupCount = rowset.GetValueOrDefault<Schema::PersQueueGroups::TotalGroupCount>(0); + const bool ok = pqGroup->FillKeySchema(pqGroup->TabletConfig); Y_VERIFY(ok); @@ -2263,7 +2263,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { TLocalPathId localPathId = rowset.GetValue<Schema::PersQueues::PathId>(); TPathId pathId(selfId, localPathId); pqInfo.PqId = rowset.GetValue<Schema::PersQueues::PqId>(); - pqInfo.GroupId = rowset.GetValueOrDefault<Schema::PersQueues::GroupId>(pqInfo.PqId + 1); + pqInfo.GroupId = rowset.GetValueOrDefault<Schema::PersQueues::GroupId>(pqInfo.PqId + 1); TLocalShardIdx localShardIdx = rowset.GetValue<Schema::PersQueues::ShardIdx>(); TShardIdx shardIdx = Self->MakeLocalId(localShardIdx); pqInfo.AlterVersion = rowset.GetValue<Schema::PersQueues::AlterVersion>(); @@ -2284,16 +2284,16 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { pqInfo.KeyRange->ToBound = rowset.GetValue<Schema::PersQueues::RangeEnd>(); } - auto it = Self->PersQueueGroups.find(pathId); + auto it = Self->PersQueueGroups.find(pathId); Y_VERIFY(it != Self->PersQueueGroups.end()); Y_VERIFY(it->second); TPersQueueGroupInfo::TPtr pqGroup = it->second; if (pqInfo.AlterVersion <= pqGroup->AlterVersion) ++pqGroup->TotalPartitionCount; - if (pqInfo.PqId >= pqGroup->NextPartitionId) { - pqGroup->NextPartitionId = pqInfo.PqId + 1; - pqGroup->TotalGroupCount = pqInfo.PqId + 1; - } + if (pqInfo.PqId >= pqGroup->NextPartitionId) { + pqGroup->NextPartitionId = pqInfo.PqId + 1; + pqGroup->TotalGroupCount = pqInfo.PqId + 1; + } TPQShardInfo::TPtr& pqShard = pqGroup->Shards[shardIdx]; if (!pqShard) { @@ -2306,40 +2306,40 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { } } - // Read PersQueue groups' alters - { - auto rowset = db.Table<Schema::PersQueueGroupAlters>().Range().Select(); - if (!rowset.IsReady()) - return false; - while (!rowset.EndOfSet()) { + // Read PersQueue groups' alters + { + auto rowset = db.Table<Schema::PersQueueGroupAlters>().Range().Select(); + if (!rowset.IsReady()) + return false; + while (!rowset.EndOfSet()) { TLocalPathId localPathId = rowset.GetValue<Schema::PersQueueGroupAlters::PathId>(); TPathId pathId(selfId, localPathId); - - TPersQueueGroupInfo::TPtr alterData = new TPersQueueGroupInfo(); - alterData->TabletConfig = rowset.GetValue<Schema::PersQueueGroupAlters::TabletConfig>(); - alterData->MaxPartsPerTablet = rowset.GetValue<Schema::PersQueueGroupAlters::MaxPQPerShard>(); - alterData->AlterVersion = rowset.GetValue<Schema::PersQueueGroupAlters::AlterVersion>(); - alterData->TotalGroupCount = rowset.GetValue<Schema::PersQueueGroupAlters::TotalGroupCount>(); - alterData->NextPartitionId = rowset.GetValueOrDefault<Schema::PersQueueGroupAlters::NextPartitionId>(alterData->TotalGroupCount); + + TPersQueueGroupInfo::TPtr alterData = new TPersQueueGroupInfo(); + alterData->TabletConfig = rowset.GetValue<Schema::PersQueueGroupAlters::TabletConfig>(); + alterData->MaxPartsPerTablet = rowset.GetValue<Schema::PersQueueGroupAlters::MaxPQPerShard>(); + alterData->AlterVersion = rowset.GetValue<Schema::PersQueueGroupAlters::AlterVersion>(); + alterData->TotalGroupCount = rowset.GetValue<Schema::PersQueueGroupAlters::TotalGroupCount>(); + alterData->NextPartitionId = rowset.GetValueOrDefault<Schema::PersQueueGroupAlters::NextPartitionId>(alterData->TotalGroupCount); alterData->BootstrapConfig = rowset.GetValue<Schema::PersQueueGroupAlters::BootstrapConfig>(); - + const bool ok = alterData->FillKeySchema(alterData->TabletConfig); Y_VERIFY(ok); - auto it = Self->PersQueueGroups.find(pathId); - Y_VERIFY(it != Self->PersQueueGroups.end()); - - alterData->TotalPartitionCount = it->second->GetTotalPartitionCountWithAlter(); - alterData->BalancerTabletID = it->second->BalancerTabletID; - alterData->BalancerShardIdx = it->second->BalancerShardIdx; - it->second->AlterData = alterData; - - if (!rowset.Next()) - return false; - } - } - - + auto it = Self->PersQueueGroups.find(pathId); + Y_VERIFY(it != Self->PersQueueGroups.end()); + + alterData->TotalPartitionCount = it->second->GetTotalPartitionCountWithAlter(); + alterData->BalancerTabletID = it->second->BalancerTabletID; + alterData->BalancerShardIdx = it->second->BalancerShardIdx; + it->second->AlterData = alterData; + + if (!rowset.Next()) + return false; + } + } + + // Read RTMR volumes { auto rowset = db.Table<Schema::RtmrVolumes>().Range().Select(); @@ -3675,8 +3675,8 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { Self->TabletCounters->Simple()[COUNTER_PQ_SHARD_COUNT].Add(1); break; case ETabletType::PersQueueReadBalancer: - Self->TabletCounters->Simple()[COUNTER_PQ_RB_SHARD_COUNT].Add(1); - break; + Self->TabletCounters->Simple()[COUNTER_PQ_RB_SHARD_COUNT].Add(1); + break; case ETabletType::BlockStoreVolume: Self->TabletCounters->Simple()[COUNTER_BLOCKSTORE_VOLUME_SHARD_COUNT].Add(1); break; @@ -3759,22 +3759,22 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { if (path->IsPQGroup()) { auto pqGroup = Self->PersQueueGroups.at(path->PathId); auto delta = pqGroup->AlterData ? pqGroup->AlterData->TotalGroupCount : pqGroup->TotalGroupCount; - auto tabletConfig = pqGroup->AlterData ? (pqGroup->AlterData->TabletConfig.empty() ? pqGroup->TabletConfig : pqGroup->AlterData->TabletConfig) - : pqGroup->TabletConfig; - NKikimrPQ::TPQTabletConfig config; - Y_VERIFY(!tabletConfig.empty()); - bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); - Y_VERIFY(parseOk); - - ui64 throughput = ((ui64)delta) * config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); - ui64 storage = throughput * config.GetPartitionConfig().GetLifetimeSeconds(); - + auto tabletConfig = pqGroup->AlterData ? (pqGroup->AlterData->TabletConfig.empty() ? pqGroup->TabletConfig : pqGroup->AlterData->TabletConfig) + : pqGroup->TabletConfig; + NKikimrPQ::TPQTabletConfig config; + Y_VERIFY(!tabletConfig.empty()); + bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); + Y_VERIFY(parseOk); + + ui64 throughput = ((ui64)delta) * config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); + ui64 storage = throughput * config.GetPartitionConfig().GetLifetimeSeconds(); + inclusivedomainInfo->IncPQPartitionsInside(delta); - inclusivedomainInfo->IncPQReservedStorage(storage); - + inclusivedomainInfo->IncPQReservedStorage(storage); + Self->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(delta); - Self->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Add(throughput); - Self->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(storage); + Self->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Add(throughput); + Self->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(storage); } if (path->PlannedToDrop()) { diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp index c405955f77..f7a47459b9 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp @@ -445,14 +445,14 @@ public: return result; } - NKikimrPQ::TPQTabletConfig tabletConfig, newTabletConfig; + NKikimrPQ::TPQTabletConfig tabletConfig, newTabletConfig; if (!pqGroup->TabletConfig.empty()) { bool parseOk = ParseFromStringNoSizeLimit(tabletConfig, pqGroup->TabletConfig); Y_VERIFY(parseOk, "Previously serialized pq tablet config cannot be parsed"); } - newTabletConfig = tabletConfig; + newTabletConfig = tabletConfig; + - TPersQueueGroupInfo::TPtr alterData = ParseParams(context, &newTabletConfig, alter, errStr); if (!alterData) { result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); @@ -512,45 +512,45 @@ public: if (alterData->ExpectedShardCount() > pqGroup->ShardCount()) { shardsToCreate += alterData->ExpectedShardCount() - pqGroup->ShardCount(); } - ui64 partitionsToCreate = alterData->PartitionsToAdd.size(); - - if (alterData->TotalGroupCount > TSchemeShard::MaxPQGroupPartitionsCount) { - errStr = TStringBuilder() - << "Invalid partition count specified: " << alterData->TotalGroupCount - << " vs " << TSchemeShard::MaxPQGroupPartitionsCount; - result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); - return result; - } - if ((ui32)newTabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() > TSchemeShard::MaxPQWriteSpeedPerPartition) { - errStr = TStringBuilder() - << "Invalid write speed per second in partition specified: " << newTabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() - << " vs " << TSchemeShard::MaxPQWriteSpeedPerPartition; - result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); - return result; - } - - if ((ui32)newTabletConfig.GetPartitionConfig().GetLifetimeSeconds() > TSchemeShard::MaxPQLifetimeSeconds) { - errStr = TStringBuilder() - << "Invalid retention period specified: " << newTabletConfig.GetPartitionConfig().GetLifetimeSeconds() - << " vs " << TSchemeShard::MaxPQLifetimeSeconds; - result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); - return result; - } - - ui64 newThroughput = ((ui64)(newTabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond())) * (alterData->TotalGroupCount); - ui64 oldThroughput = ((ui64)(tabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond())) * (pqGroup->TotalGroupCount); - ui64 newStorage = newThroughput * newTabletConfig.GetPartitionConfig().GetLifetimeSeconds(); - ui64 oldStorage = oldThroughput * tabletConfig.GetPartitionConfig().GetLifetimeSeconds(); - - ui64 storageToReserve = newStorage > oldStorage ? newStorage - oldStorage : 0; - + ui64 partitionsToCreate = alterData->PartitionsToAdd.size(); + + if (alterData->TotalGroupCount > TSchemeShard::MaxPQGroupPartitionsCount) { + errStr = TStringBuilder() + << "Invalid partition count specified: " << alterData->TotalGroupCount + << " vs " << TSchemeShard::MaxPQGroupPartitionsCount; + result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); + return result; + } + if ((ui32)newTabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() > TSchemeShard::MaxPQWriteSpeedPerPartition) { + errStr = TStringBuilder() + << "Invalid write speed per second in partition specified: " << newTabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() + << " vs " << TSchemeShard::MaxPQWriteSpeedPerPartition; + result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); + return result; + } + + if ((ui32)newTabletConfig.GetPartitionConfig().GetLifetimeSeconds() > TSchemeShard::MaxPQLifetimeSeconds) { + errStr = TStringBuilder() + << "Invalid retention period specified: " << newTabletConfig.GetPartitionConfig().GetLifetimeSeconds() + << " vs " << TSchemeShard::MaxPQLifetimeSeconds; + result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); + return result; + } + + ui64 newThroughput = ((ui64)(newTabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond())) * (alterData->TotalGroupCount); + ui64 oldThroughput = ((ui64)(tabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond())) * (pqGroup->TotalGroupCount); + ui64 newStorage = newThroughput * newTabletConfig.GetPartitionConfig().GetLifetimeSeconds(); + ui64 oldStorage = oldThroughput * tabletConfig.GetPartitionConfig().GetLifetimeSeconds(); + + ui64 storageToReserve = newStorage > oldStorage ? newStorage - oldStorage : 0; + { TPath::TChecker checks = path.Check(); checks .ShardsLimit(shardsToCreate) - .PathShardsLimit(shardsToCreate) - .PQPartitionsLimit(partitionsToCreate) - .PQReservedStorageLimit(storageToReserve); + .PathShardsLimit(shardsToCreate) + .PQPartitionsLimit(partitionsToCreate) + .PQReservedStorageLimit(storageToReserve); if (!checks) { TString explain = TStringBuilder() << "path fail checks" @@ -579,7 +579,7 @@ public: // This channel bindings are for PersQueue shards. They either use // explicit channel profiles, or reuse channel profile above. - const auto& partConfig = newTabletConfig.GetPartitionConfig(); + const auto& partConfig = newTabletConfig.GetPartitionConfig(); TChannelsBindings pqChannelsBinding; if (partConfig.ExplicitChannelProfilesSize() > 0) { // N.B. no validation necessary at this step @@ -614,17 +614,17 @@ public: context.OnComplete.PublishToSchemeBoard(OperationId, path.Base()->PathId); path.DomainInfo()->AddInternalShards(txState); - path.DomainInfo()->IncPQPartitionsInside(partitionsToCreate); - path.DomainInfo()->UpdatePQReservedStorage(oldStorage, newStorage); - - - context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Add(newThroughput); - context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Sub(oldThroughput); - - context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(newStorage); - context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Sub(oldStorage); - - context.SS->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(partitionsToCreate); + path.DomainInfo()->IncPQPartitionsInside(partitionsToCreate); + path.DomainInfo()->UpdatePQReservedStorage(oldStorage, newStorage); + + + context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Add(newThroughput); + context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Sub(oldThroughput); + + context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(newStorage); + context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Sub(oldStorage); + + context.SS->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(partitionsToCreate); path.Base()->IncShardsInside(shardsToCreate); State = NextState(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp index 09c7ad75c3..992fd3175c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp @@ -40,7 +40,7 @@ TPersQueueGroupInfo::TPtr CreatePersQueueGroup(TOperationContext& context, } if (partitionCount == 0 || partitionCount > TSchemeShard::MaxPQGroupPartitionsCount) { - status = NKikimrScheme::StatusInvalidParameter; + status = NKikimrScheme::StatusInvalidParameter; errStr = Sprintf("Invalid total partition count specified: %u", partitionCount); return nullptr; } @@ -51,22 +51,22 @@ TPersQueueGroupInfo::TPtr CreatePersQueueGroup(TOperationContext& context, return nullptr; } - if ((ui32)op.GetPQTabletConfig().GetPartitionConfig().GetWriteSpeedInBytesPerSecond() > TSchemeShard::MaxPQWriteSpeedPerPartition) { - status = NKikimrScheme::StatusInvalidParameter; - errStr = TStringBuilder() - << "Invalid write speed per second in partition specified: " << op.GetPQTabletConfig().GetPartitionConfig().GetWriteSpeedInBytesPerSecond() - << " vs " << TSchemeShard::MaxPQWriteSpeedPerPartition; - return nullptr; - } - - if ((ui32)op.GetPQTabletConfig().GetPartitionConfig().GetLifetimeSeconds() > TSchemeShard::MaxPQLifetimeSeconds) { - status = NKikimrScheme::StatusInvalidParameter; - errStr = TStringBuilder() - << "Invalid retention period specified: " << op.GetPQTabletConfig().GetPartitionConfig().GetLifetimeSeconds() - << " vs " << TSchemeShard::MaxPQLifetimeSeconds; - return nullptr; - } - + if ((ui32)op.GetPQTabletConfig().GetPartitionConfig().GetWriteSpeedInBytesPerSecond() > TSchemeShard::MaxPQWriteSpeedPerPartition) { + status = NKikimrScheme::StatusInvalidParameter; + errStr = TStringBuilder() + << "Invalid write speed per second in partition specified: " << op.GetPQTabletConfig().GetPartitionConfig().GetWriteSpeedInBytesPerSecond() + << " vs " << TSchemeShard::MaxPQWriteSpeedPerPartition; + return nullptr; + } + + if ((ui32)op.GetPQTabletConfig().GetPartitionConfig().GetLifetimeSeconds() > TSchemeShard::MaxPQLifetimeSeconds) { + status = NKikimrScheme::StatusInvalidParameter; + errStr = TStringBuilder() + << "Invalid retention period specified: " << op.GetPQTabletConfig().GetPartitionConfig().GetLifetimeSeconds() + << " vs " << TSchemeShard::MaxPQLifetimeSeconds; + return nullptr; + } + if (op.GetPQTabletConfig().PartitionKeySchemaSize()) { if (op.PartitionBoundariesSize() != (partitionCount - 1)) { status = NKikimrScheme::StatusInvalidParameter; @@ -385,25 +385,25 @@ public: } const ui64 shardsToCreate = pqGroup->ExpectedShardCount() + 1; - const ui64 partitionsToCreate = pqGroup->TotalPartitionCount; - - auto tabletConfig = pqGroup->TabletConfig; - NKikimrPQ::TPQTabletConfig config; - Y_VERIFY(!tabletConfig.empty()); - - bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); - Y_VERIFY(parseOk); - - ui64 throughput = ((ui64)partitionsToCreate) * config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); - ui64 storage = throughput * config.GetPartitionConfig().GetLifetimeSeconds(); - ui64 storageToReserve = storage; + const ui64 partitionsToCreate = pqGroup->TotalPartitionCount; + + auto tabletConfig = pqGroup->TabletConfig; + NKikimrPQ::TPQTabletConfig config; + Y_VERIFY(!tabletConfig.empty()); + + bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); + Y_VERIFY(parseOk); + + ui64 throughput = ((ui64)partitionsToCreate) * config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); + ui64 storage = throughput * config.GetPartitionConfig().GetLifetimeSeconds(); + ui64 storageToReserve = storage; { NSchemeShard::TPath::TChecker checks = dstPath.Check(); checks .ShardsLimit(shardsToCreate) - .PathShardsLimit(shardsToCreate) - .PQPartitionsLimit(partitionsToCreate) - .PQReservedStorageLimit(storageToReserve); + .PathShardsLimit(shardsToCreate) + .PQPartitionsLimit(partitionsToCreate) + .PQReservedStorageLimit(storageToReserve); if (!checks) { TString explain = TStringBuilder() << "dst path fail checks" @@ -539,14 +539,14 @@ public: dstPath.DomainInfo()->IncPathsInside(); dstPath.DomainInfo()->AddInternalShards(txState); - dstPath.DomainInfo()->IncPQPartitionsInside(partitionsToCreate); - dstPath.DomainInfo()->IncPQReservedStorage(storageToReserve); - - context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Add(throughput); - context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(storage); - - context.SS->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(partitionsToCreate); - + dstPath.DomainInfo()->IncPQPartitionsInside(partitionsToCreate); + dstPath.DomainInfo()->IncPQReservedStorage(storageToReserve); + + context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Add(throughput); + context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(storage); + + context.SS->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(partitionsToCreate); + dstPath.Base()->IncShardsInside(shardsToCreate); parentPath.Base()->IncAliveChildren(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp index a0ed89bd1f..c338b4c358 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp @@ -197,9 +197,9 @@ public: Y_VERIFY(!path->Dropped()); path->SetDropped(step, OperationId.GetTxId()); context.SS->PersistDropStep(db, pathId, step, OperationId); - TPersQueueGroupInfo::TPtr pqGroup = context.SS->PersQueueGroups.at(pathId); - Y_VERIFY(pqGroup); - + TPersQueueGroupInfo::TPtr pqGroup = context.SS->PersQueueGroups.at(pathId); + Y_VERIFY(pqGroup); + // KIKIMR-13173 // Repeat it here for a waile, delete it from TDeleteParts after // Initiate asynchonous deletion of all shards @@ -207,25 +207,25 @@ public: context.OnComplete.DeleteShard(shard.Idx); } - auto tabletConfig = pqGroup->TabletConfig; - NKikimrPQ::TPQTabletConfig config; - Y_VERIFY(!tabletConfig.empty()); - bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); - Y_VERIFY(parseOk); - - ui64 throughput = ((ui64)pqGroup->TotalPartitionCount) * config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); - ui64 storage = throughput * config.GetPartitionConfig().GetLifetimeSeconds(); - - auto domainInfo = context.SS->ResolveDomainInfo(pathId); - domainInfo->DecPathsInside(); - domainInfo->DecPQPartitionsInside(pqGroup->TotalPartitionCount); - domainInfo->DecPQReservedStorage(storage); - - context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Sub(throughput); - context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Sub(storage); - + auto tabletConfig = pqGroup->TabletConfig; + NKikimrPQ::TPQTabletConfig config; + Y_VERIFY(!tabletConfig.empty()); + bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); + Y_VERIFY(parseOk); + + ui64 throughput = ((ui64)pqGroup->TotalPartitionCount) * config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); + ui64 storage = throughput * config.GetPartitionConfig().GetLifetimeSeconds(); + + auto domainInfo = context.SS->ResolveDomainInfo(pathId); + domainInfo->DecPathsInside(); + domainInfo->DecPQPartitionsInside(pqGroup->TotalPartitionCount); + domainInfo->DecPQReservedStorage(storage); + + context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Sub(throughput); + context.SS->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Sub(storage); + context.SS->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Sub(pqGroup->TotalPartitionCount); - + parentDir->DecAliveChildren(); if (!AppData()->DisableSchemeShardCleanupOnDropForTest) { diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index d47b7c32cf..0715543a22 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -1644,9 +1644,9 @@ void TSchemeShard::PersistSchemeLimit(NIceDb::TNiceDb &db, const TPathId &pathId NIceDb::TUpdate<Schema::SubDomains::AclByteSizeLimit> (subDomain.GetSchemeLimits().MaxAclBytesSize), NIceDb::TUpdate<Schema::SubDomains::ConsistentCopyingTargetsLimit> (subDomain.GetSchemeLimits().MaxConsistentCopyTargets), NIceDb::TUpdate<Schema::SubDomains::PathElementLength> (subDomain.GetSchemeLimits().MaxPathElementLength), - NIceDb::TUpdate<Schema::SubDomains::ExtraPathSymbolsAllowed> (subDomain.GetSchemeLimits().ExtraPathSymbolsAllowed), - NIceDb::TUpdate<Schema::SubDomains::PQPartitionsLimit> (subDomain.GetSchemeLimits().MaxPQPartitions) - ); + NIceDb::TUpdate<Schema::SubDomains::ExtraPathSymbolsAllowed> (subDomain.GetSchemeLimits().ExtraPathSymbolsAllowed), + NIceDb::TUpdate<Schema::SubDomains::PQPartitionsLimit> (subDomain.GetSchemeLimits().MaxPQPartitions) + ); } void TSchemeShard::PersistStoragePools(NIceDb::TNiceDb& db, const TPathId& pathId, const TSubDomainInfo& subDomain) { @@ -2282,9 +2282,9 @@ void TSchemeShard::PersistPersQueueGroup(NIceDb::TNiceDb& db, TPathId pathId, co db.Table<Schema::PersQueueGroups>().Key(pathId.LocalPathId).Update( NIceDb::TUpdate<Schema::PersQueueGroups::TabletConfig>(pqGroup->TabletConfig), NIceDb::TUpdate<Schema::PersQueueGroups::MaxPQPerShard>(pqGroup->MaxPartsPerTablet), - NIceDb::TUpdate<Schema::PersQueueGroups::AlterVersion>(pqGroup->AlterVersion), - NIceDb::TUpdate<Schema::PersQueueGroups::TotalGroupCount>(pqGroup->TotalGroupCount), - NIceDb::TUpdate<Schema::PersQueueGroups::NextPartitionId>(pqGroup->NextPartitionId)); + NIceDb::TUpdate<Schema::PersQueueGroups::AlterVersion>(pqGroup->AlterVersion), + NIceDb::TUpdate<Schema::PersQueueGroups::TotalGroupCount>(pqGroup->TotalGroupCount), + NIceDb::TUpdate<Schema::PersQueueGroups::NextPartitionId>(pqGroup->NextPartitionId)); } void TSchemeShard::PersistRemovePersQueueGroup(NIceDb::TNiceDb& db, TPathId pathId) { @@ -2318,7 +2318,7 @@ void TSchemeShard::PersistAddPersQueueGroupAlter(NIceDb::TNiceDb& db, TPathId pa NIceDb::TUpdate<Schema::PersQueueGroupAlters::TabletConfig>(alterData->TabletConfig), NIceDb::TUpdate<Schema::PersQueueGroupAlters::MaxPQPerShard>(alterData->MaxPartsPerTablet), NIceDb::TUpdate<Schema::PersQueueGroupAlters::AlterVersion>(alterData->AlterVersion), - NIceDb::TUpdate<Schema::PersQueueGroupAlters::TotalGroupCount>(alterData->TotalGroupCount), + NIceDb::TUpdate<Schema::PersQueueGroupAlters::TotalGroupCount>(alterData->TotalGroupCount), NIceDb::TUpdate<Schema::PersQueueGroupAlters::NextPartitionId>(alterData->NextPartitionId), NIceDb::TUpdate<Schema::PersQueueGroupAlters::BootstrapConfig>(alterData->BootstrapConfig)); } @@ -6046,21 +6046,21 @@ void TSchemeShard::Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TP } void TSchemeShard::ChangeStreamShardsCount(i64 delta) { - TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(delta); -} - + TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(delta); +} + void TSchemeShard::ChangeStreamShardsQuota(i64 delta) { - TabletCounters->Simple()[COUNTER_STREAM_SHARDS_QUOTA].Add(delta); -} - -void TSchemeShard::ChangeStreamReservedStorageCount(i64 delta) { - TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(delta); -} - -void TSchemeShard::ChangeStreamReservedStorageQuota(i64 delta) { - TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE_QUOTA].Add(delta); -} - + TabletCounters->Simple()[COUNTER_STREAM_SHARDS_QUOTA].Add(delta); +} + +void TSchemeShard::ChangeStreamReservedStorageCount(i64 delta) { + TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(delta); +} + +void TSchemeShard::ChangeStreamReservedStorageQuota(i64 delta) { + TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE_QUOTA].Add(delta); +} + void TSchemeShard::ChangeDiskSpaceTablesDataBytes(i64 delta) { TabletCounters->Simple()[COUNTER_DISK_SPACE_TABLES_DATA_BYTES].Add(delta); } diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index 09626cad37..e1a1d08b9a 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -60,7 +60,7 @@ extern const ui64 NEW_TABLE_ALTER_VERSION; class TSchemeShard : public TActor<TSchemeShard> , public NTabletFlatExecutor::TTabletExecutedFlat - , public IQuotaCounters + , public IQuotaCounters { private: class TPipeClientFactory : public NTabletPipe::IClientFactory { @@ -102,9 +102,9 @@ public: static constexpr ui32 DefaultPQTabletPartitionsCount = 1; static constexpr ui32 MaxPQTabletPartitionsCount = 1000; static constexpr ui32 MaxPQGroupTabletsCount = 10*1000; - static constexpr ui32 MaxPQGroupPartitionsCount = 20*1000; - static constexpr ui32 MaxPQWriteSpeedPerPartition = 50*1024*1024; - static constexpr ui32 MaxPQLifetimeSeconds = 31 * 86400; + static constexpr ui32 MaxPQGroupPartitionsCount = 20*1000; + static constexpr ui32 MaxPQWriteSpeedPerPartition = 50*1024*1024; + static constexpr ui32 MaxPQLifetimeSeconds = 31 * 86400; static constexpr ui32 PublishChunkSize = 1000; static const TSchemeLimits DefaultLimits; @@ -195,7 +195,7 @@ public: TAutoPtr<NTabletPipe::IClientCache> PipeClientCache; TPipeTracker PipeTracker; - TCompactionStarter CompactionStarter; + TCompactionStarter CompactionStarter; TCompactionQueue* CompactionQueue = nullptr; bool EnableBackgroundCompaction = false; bool EnableBackgroundCompactionServerless = false; @@ -1049,17 +1049,17 @@ public: // } //NIndexBuilder public: - void ChangeStreamShardsCount(i64 delta) override; - void ChangeStreamShardsQuota(i64 delta) override; - void ChangeStreamReservedStorageCount(i64 delta) override; - void ChangeStreamReservedStorageQuota(i64 delta) override; + void ChangeStreamShardsCount(i64 delta) override; + void ChangeStreamShardsQuota(i64 delta) override; + void ChangeStreamReservedStorageCount(i64 delta) override; + void ChangeStreamReservedStorageQuota(i64 delta) override; void ChangeDiskSpaceTablesDataBytes(i64 delta) override; void ChangeDiskSpaceTablesIndexBytes(i64 delta) override; void ChangeDiskSpaceTablesTotalBytes(i64 delta) override; void ChangeDiskSpaceQuotaExceeded(i64 delta) override; void ChangeDiskSpaceHardQuotaBytes(i64 delta) override; void ChangeDiskSpaceSoftQuotaBytes(i64 delta) override; - + NLogin::TLoginProvider LoginProvider; private: diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 79074ad4d4..2e5710c3d3 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -977,8 +977,8 @@ struct TPersQueueGroupInfo : TSimpleRefCount<TPersQueueGroupInfo> { }; }; - ui64 TotalGroupCount = 0; - ui64 TotalPartitionCount = 0; + ui64 TotalGroupCount = 0; + ui64 TotalPartitionCount = 0; ui32 NextPartitionId = 0; THashSet<TPartitionToAdd, TPartitionToAdd::THash> PartitionsToAdd; THashSet<ui32> PartitionsToDelete; @@ -1117,11 +1117,11 @@ struct TSchemeQuotas : public TVector<TSchemeQuota> { mutable size_t LastKnownSize = 0; }; -struct IQuotaCounters { - virtual void ChangeStreamShardsCount(i64 delta) = 0; - virtual void ChangeStreamShardsQuota(i64 delta) = 0; - virtual void ChangeStreamReservedStorageQuota(i64 delta) = 0; - virtual void ChangeStreamReservedStorageCount(i64 delta) = 0; +struct IQuotaCounters { + virtual void ChangeStreamShardsCount(i64 delta) = 0; + virtual void ChangeStreamShardsQuota(i64 delta) = 0; + virtual void ChangeStreamReservedStorageQuota(i64 delta) = 0; + virtual void ChangeStreamReservedStorageCount(i64 delta) = 0; virtual void ChangeDiskSpaceTablesDataBytes(i64 delta) = 0; virtual void ChangeDiskSpaceTablesIndexBytes(i64 delta) = 0; virtual void ChangeDiskSpaceTablesTotalBytes(i64 delta) = 0; @@ -1289,54 +1289,54 @@ struct TSubDomainInfo: TSimpleRefCount<TSubDomainInfo> { PathsInsideCount -= delta; } - ui64 GetPQPartitionsInside() const { - return PQPartitionsInsideCount; - } - - void SetPQPartitionsInside(ui64 val) { - PQPartitionsInsideCount = val; - } - - void IncPQPartitionsInside(ui64 delta = 1) { - Y_VERIFY(Max<ui64>() - PQPartitionsInsideCount >= delta); - PQPartitionsInsideCount += delta; - } - - void DecPQPartitionsInside(ui64 delta = 1) { + ui64 GetPQPartitionsInside() const { + return PQPartitionsInsideCount; + } + + void SetPQPartitionsInside(ui64 val) { + PQPartitionsInsideCount = val; + } + + void IncPQPartitionsInside(ui64 delta = 1) { + Y_VERIFY(Max<ui64>() - PQPartitionsInsideCount >= delta); + PQPartitionsInsideCount += delta; + } + + void DecPQPartitionsInside(ui64 delta = 1) { Y_VERIFY_S(PQPartitionsInsideCount >= delta, "PQPartitionsInsideCount: " << PQPartitionsInsideCount << " delta: " << delta); PQPartitionsInsideCount -= delta; - } - - ui64 GetPQReservedStorage() const { - return PQReservedStorage; - } - - void SetPQReservedStorage(ui64 val) { - PQReservedStorage = val; - } - - void IncPQReservedStorage(ui64 delta = 1) { - Y_VERIFY(Max<ui64>() - PQReservedStorage >= delta); - PQReservedStorage += delta; - } - - void DecPQReservedStorage(ui64 delta = 1) { - Y_VERIFY_S(PQReservedStorage >= delta, "PQReservedStorage: " << PQReservedStorage << " delta: " << delta); - PQReservedStorage -= delta; - } - - void UpdatePQReservedStorage(ui64 oldStorage, ui64 newStorage) { - if (oldStorage == newStorage) - return; - DecPQReservedStorage(oldStorage); - IncPQReservedStorage(newStorage); - } - + } + + ui64 GetPQReservedStorage() const { + return PQReservedStorage; + } + + void SetPQReservedStorage(ui64 val) { + PQReservedStorage = val; + } + + void IncPQReservedStorage(ui64 delta = 1) { + Y_VERIFY(Max<ui64>() - PQReservedStorage >= delta); + PQReservedStorage += delta; + } + + void DecPQReservedStorage(ui64 delta = 1) { + Y_VERIFY_S(PQReservedStorage >= delta, "PQReservedStorage: " << PQReservedStorage << " delta: " << delta); + PQReservedStorage -= delta; + } + + void UpdatePQReservedStorage(ui64 oldStorage, ui64 newStorage) { + if (oldStorage == newStorage) + return; + DecPQReservedStorage(oldStorage); + IncPQReservedStorage(newStorage); + } + ui64 GetShardsInside() const { return InternalShards.size(); } - void ActualizeAlterData(const THashMap<TShardIdx, TShardInfo>& allShards, TInstant now, bool isExternal, IQuotaCounters* counters) { + void ActualizeAlterData(const THashMap<TShardIdx, TShardInfo>& allShards, TInstant now, bool isExternal, IQuotaCounters* counters) { Y_VERIFY(AlterData); AlterData->SetPathsInside(GetPathsInside()); @@ -1364,19 +1364,19 @@ struct TSubDomainInfo: TSimpleRefCount<TSubDomainInfo> { AlterData->CheckDiskSpaceQuotas(counters); CountDiskSpaceQuotas(counters, GetDiskSpaceQuotas(), AlterData->GetDiskSpaceQuotas()); - CountStreamShardsQuota(counters, GetStreamShardsQuota(), AlterData->GetStreamShardsQuota()); - CountStreamReservedStorageQuota(counters, GetStreamReservedStorageQuota(), AlterData->GetStreamReservedStorageQuota()); - - } - - ui64 GetStreamShardsQuota() const { - return DatabaseQuotas ? DatabaseQuotas->data_stream_shards_quota() : 0; - } - - ui64 GetStreamReservedStorageQuota() const { - return DatabaseQuotas ? DatabaseQuotas->data_stream_reserved_storage_quota() : 0; - } - + CountStreamShardsQuota(counters, GetStreamShardsQuota(), AlterData->GetStreamShardsQuota()); + CountStreamReservedStorageQuota(counters, GetStreamReservedStorageQuota(), AlterData->GetStreamReservedStorageQuota()); + + } + + ui64 GetStreamShardsQuota() const { + return DatabaseQuotas ? DatabaseQuotas->data_stream_shards_quota() : 0; + } + + ui64 GetStreamReservedStorageQuota() const { + return DatabaseQuotas ? DatabaseQuotas->data_stream_reserved_storage_quota() : 0; + } + TDuration GetTtlMinRunInterval() const { static constexpr auto TtlMinRunInterval = TDuration::Minutes(15); @@ -1406,7 +1406,7 @@ struct TSubDomainInfo: TSimpleRefCount<TSubDomainInfo> { return TDiskSpaceQuotas{ hardQuota, softQuota }; } - static void CountDiskSpaceQuotas(IQuotaCounters* counters, const TDiskSpaceQuotas& quotas) { + static void CountDiskSpaceQuotas(IQuotaCounters* counters, const TDiskSpaceQuotas& quotas) { if (quotas.HardQuota != 0) { counters->ChangeDiskSpaceHardQuotaBytes(quotas.HardQuota); } @@ -1415,7 +1415,7 @@ struct TSubDomainInfo: TSimpleRefCount<TSubDomainInfo> { } } - static void CountDiskSpaceQuotas(IQuotaCounters* counters, const TDiskSpaceQuotas& prev, const TDiskSpaceQuotas& next) { + static void CountDiskSpaceQuotas(IQuotaCounters* counters, const TDiskSpaceQuotas& prev, const TDiskSpaceQuotas& next) { i64 hardDelta = i64(next.HardQuota) - i64(prev.HardQuota); if (hardDelta != 0) { counters->ChangeDiskSpaceHardQuotaBytes(hardDelta); @@ -1426,30 +1426,30 @@ struct TSubDomainInfo: TSimpleRefCount<TSubDomainInfo> { } } - static void CountStreamShardsQuota(IQuotaCounters* counters, const i64 delta) { - counters->ChangeStreamShardsQuota(delta); - } - - static void CountStreamReservedStorageQuota(IQuotaCounters* counters, const i64 delta) { - counters->ChangeStreamReservedStorageQuota(delta); - } - - static void CountStreamShardsQuota(IQuotaCounters* counters, const i64& prev, const i64& next) { - counters->ChangeStreamShardsQuota(next - prev); - } - - static void CountStreamReservedStorageQuota(IQuotaCounters* counters, const i64& prev, const i64& next) { - counters->ChangeStreamReservedStorageQuota(next - prev); - } - - + static void CountStreamShardsQuota(IQuotaCounters* counters, const i64 delta) { + counters->ChangeStreamShardsQuota(delta); + } + + static void CountStreamReservedStorageQuota(IQuotaCounters* counters, const i64 delta) { + counters->ChangeStreamReservedStorageQuota(delta); + } + + static void CountStreamShardsQuota(IQuotaCounters* counters, const i64& prev, const i64& next) { + counters->ChangeStreamShardsQuota(next - prev); + } + + static void CountStreamReservedStorageQuota(IQuotaCounters* counters, const i64& prev, const i64& next) { + counters->ChangeStreamReservedStorageQuota(next - prev); + } + + /** * Checks current disk usage against disk quotas * * Returns true when DiskQuotaExceeded value has changed and needs to be * persisted and pushed to scheme board. */ - bool CheckDiskSpaceQuotas(IQuotaCounters* counters) { + bool CheckDiskSpaceQuotas(IQuotaCounters* counters) { auto quotas = GetDiskSpaceQuotas(); if (!quotas) { if (DiskQuotaExceeded) { @@ -1628,7 +1628,7 @@ struct TSubDomainInfo: TSimpleRefCount<TSubDomainInfo> { CoordinatorSelector = new TCoordinators(ProcessingParams); } - void AggrDiskSpaceUsage(IQuotaCounters* counters, const TTableInfo::TPartitionStats& newAggr, const TTableInfo::TPartitionStats& oldAggr = TTableInfo::TPartitionStats()) { + void AggrDiskSpaceUsage(IQuotaCounters* counters, const TTableInfo::TPartitionStats& newAggr, const TTableInfo::TPartitionStats& oldAggr = TTableInfo::TPartitionStats()) { DiskSpaceUsage.Tables.DataSize += (newAggr.DataSize - oldAggr.DataSize); counters->ChangeDiskSpaceTablesDataBytes(newAggr.DataSize - oldAggr.DataSize); @@ -1661,17 +1661,17 @@ struct TSubDomainInfo: TSimpleRefCount<TSubDomainInfo> { DatabaseQuotas.ConstructInPlace(databaseQuotas); } - void SetDatabaseQuotas(const Ydb::Cms::DatabaseQuotas& databaseQuotas, IQuotaCounters* counters) { + void SetDatabaseQuotas(const Ydb::Cms::DatabaseQuotas& databaseQuotas, IQuotaCounters* counters) { auto prev = GetDiskSpaceQuotas(); - auto prevs = GetStreamShardsQuota(); - auto prevrs = GetStreamReservedStorageQuota(); + auto prevs = GetStreamShardsQuota(); + auto prevrs = GetStreamReservedStorageQuota(); DatabaseQuotas.ConstructInPlace(databaseQuotas); auto next = GetDiskSpaceQuotas(); - auto nexts = GetStreamShardsQuota(); - auto nextrs = GetStreamReservedStorageQuota(); + auto nexts = GetStreamShardsQuota(); + auto nextrs = GetStreamReservedStorageQuota(); CountDiskSpaceQuotas(counters, prev, next); - CountStreamShardsQuota(counters, prevs, nexts); - CountStreamReservedStorageQuota(counters, prevrs, nextrs); + CountStreamShardsQuota(counters, prevs, nexts); + CountStreamReservedStorageQuota(counters, prevrs, nextrs); } void ApplyDeclaredSchemeQuotas(const NKikimrSubDomains::TSchemeQuotas& declaredSchemeQuotas, TInstant now) { @@ -1799,9 +1799,9 @@ private: THashSet<TShardIdx> SequenceShards; THashSet<TShardIdx> ReplicationControllers; - ui64 PQPartitionsInsideCount = 0; - ui64 PQReservedStorage = 0; - + ui64 PQPartitionsInsideCount = 0; + ui64 PQReservedStorage = 0; + TPathId ResourcesDomainId; TTabletId SharedHive = InvalidTabletId; diff --git a/ydb/core/tx/schemeshard/schemeshard_path.cpp b/ydb/core/tx/schemeshard/schemeshard_path.cpp index c1ff975446..7535ee673c 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path.cpp @@ -9,7 +9,7 @@ namespace NKikimr { namespace NSchemeShard { static constexpr ui64 MaxPQStorage = Max<ui64>() / 2; - + TPath::TChecker::TChecker(const TPath &path) : Path(path) , Failed(false) @@ -853,58 +853,58 @@ const TPath::TChecker& TPath::TChecker::ShardsLimit(ui64 delta, TPath::TChecker: return *this; } -const TPath::TChecker& TPath::TChecker::PQPartitionsLimit(ui64 delta, TPath::TChecker::EStatus status) const { - if (Failed) { - return *this; - } - - TSubDomainInfo::TPtr domainInfo = Path.DomainInfo(); - - if (!delta || domainInfo->GetPQPartitionsInside() + delta <= domainInfo->GetSchemeLimits().MaxPQPartitions && (!domainInfo->GetDatabaseQuotas() - || !domainInfo->GetDatabaseQuotas()->data_stream_shards_quota() - || domainInfo->GetPQPartitionsInside() + delta <= domainInfo->GetDatabaseQuotas()->data_stream_shards_quota())) { - return *this; - } - - Failed = true; - Status = status; - Explain << "data stream shards count has reached maximum value in the domain" - << ", data stream shards limit for domain: " << (domainInfo->GetDatabaseQuotas() ? domainInfo->GetDatabaseQuotas()->data_stream_shards_quota() : 0) << "(" << domainInfo->GetSchemeLimits().MaxPQPartitions << ")" - << ", data stream shards count inside domain: " << domainInfo->GetPQPartitionsInside() +const TPath::TChecker& TPath::TChecker::PQPartitionsLimit(ui64 delta, TPath::TChecker::EStatus status) const { + if (Failed) { + return *this; + } + + TSubDomainInfo::TPtr domainInfo = Path.DomainInfo(); + + if (!delta || domainInfo->GetPQPartitionsInside() + delta <= domainInfo->GetSchemeLimits().MaxPQPartitions && (!domainInfo->GetDatabaseQuotas() + || !domainInfo->GetDatabaseQuotas()->data_stream_shards_quota() + || domainInfo->GetPQPartitionsInside() + delta <= domainInfo->GetDatabaseQuotas()->data_stream_shards_quota())) { + return *this; + } + + Failed = true; + Status = status; + Explain << "data stream shards count has reached maximum value in the domain" + << ", data stream shards limit for domain: " << (domainInfo->GetDatabaseQuotas() ? domainInfo->GetDatabaseQuotas()->data_stream_shards_quota() : 0) << "(" << domainInfo->GetSchemeLimits().MaxPQPartitions << ")" + << ", data stream shards count inside domain: " << domainInfo->GetPQPartitionsInside() << ", intention to create new data stream shards: " << delta; - return *this; -} - -const TPath::TChecker& TPath::TChecker::PQReservedStorageLimit(ui64 delta, TPath::TChecker::EStatus status) const { - if (Failed) { - return *this; - } - - TSubDomainInfo::TPtr domainInfo = Path.DomainInfo(); - - if (!delta || !domainInfo->GetDatabaseQuotas() - || !domainInfo->GetDatabaseQuotas()->data_stream_reserved_storage_quota() - || domainInfo->GetPQReservedStorage() + delta <= domainInfo->GetDatabaseQuotas()->data_stream_reserved_storage_quota()) { - - if (domainInfo->GetPQReservedStorage() + delta <= MaxPQStorage) { - return *this; - } - } - - Failed = true; - Status = status; - Explain << "data stream reserved storage size has reached maximum value in the domain" - << ", data stream reserved storage size limit for domain: " - << (domainInfo->GetDatabaseQuotas() && domainInfo->GetDatabaseQuotas()->data_stream_reserved_storage_quota() - ? domainInfo->GetDatabaseQuotas()->data_stream_reserved_storage_quota() - : MaxPQStorage) << " bytes" - << ", data stream reserved storage size inside domain: " << domainInfo->GetPQReservedStorage() << " bytes" + return *this; +} + +const TPath::TChecker& TPath::TChecker::PQReservedStorageLimit(ui64 delta, TPath::TChecker::EStatus status) const { + if (Failed) { + return *this; + } + + TSubDomainInfo::TPtr domainInfo = Path.DomainInfo(); + + if (!delta || !domainInfo->GetDatabaseQuotas() + || !domainInfo->GetDatabaseQuotas()->data_stream_reserved_storage_quota() + || domainInfo->GetPQReservedStorage() + delta <= domainInfo->GetDatabaseQuotas()->data_stream_reserved_storage_quota()) { + + if (domainInfo->GetPQReservedStorage() + delta <= MaxPQStorage) { + return *this; + } + } + + Failed = true; + Status = status; + Explain << "data stream reserved storage size has reached maximum value in the domain" + << ", data stream reserved storage size limit for domain: " + << (domainInfo->GetDatabaseQuotas() && domainInfo->GetDatabaseQuotas()->data_stream_reserved_storage_quota() + ? domainInfo->GetDatabaseQuotas()->data_stream_reserved_storage_quota() + : MaxPQStorage) << " bytes" + << ", data stream reserved storage size inside domain: " << domainInfo->GetPQReservedStorage() << " bytes" << ", intention to reserve more storage for : " << delta << " bytes"; - return *this; -} - - - + return *this; +} + + + const TPath::TChecker& TPath::TChecker::PathShardsLimit(ui64 delta, TPath::TChecker::EStatus status) const { if (Failed) { return *this; @@ -1236,7 +1236,7 @@ TPath TPath::ResolveWithInactive(TOperationId opId, const TString path, TSchemeS TPath nullPrefix{ss}; auto pathParts = SplitPath(path); - int headSubTxId = opId.GetSubTxId() - 1; + int headSubTxId = opId.GetSubTxId() - 1; while (headSubTxId >= 0) { auto headOpId = TOperationId(opId.GetTxId(), headSubTxId); TTxState* txState = ss->FindTx(headOpId); diff --git a/ydb/core/tx/schemeshard/schemeshard_path.h b/ydb/core/tx/schemeshard/schemeshard_path.h index 1274317cdf..fb33c1cdd9 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path.h +++ b/ydb/core/tx/schemeshard/schemeshard_path.h @@ -78,14 +78,14 @@ public: const TChecker& FailOnExist(TPathElement::EPathType expectedType, bool acceptAlreadyExist) const; const TChecker& IsValidLeafName(EStatus status = EStatus::StatusSchemeError) const; const TChecker& DepthLimit(ui64 delta = 0, EStatus status = EStatus::StatusSchemeError) const; - const TChecker& PathsLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; - const TChecker& DirChildrenLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; - const TChecker& ShardsLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; - const TChecker& PathShardsLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; + const TChecker& PathsLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; + const TChecker& DirChildrenLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; + const TChecker& ShardsLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; + const TChecker& PathShardsLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; const TChecker& NotChildren(EStatus status = EStatus::StatusInvalidParameter) const; const TChecker& IsValidACL(const TString& acl, EStatus status = EStatus::StatusInvalidParameter) const; - const TChecker& PQPartitionsLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; - const TChecker& PQReservedStorageLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; + const TChecker& PQPartitionsLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; + const TChecker& PQReservedStorageLimit(ui64 delta = 1, EStatus status = EStatus::StatusResourceExhausted) const; }; public: diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 7d88b02ca4..3789d3af1b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -619,8 +619,8 @@ void TPathDescriber::DescribeDomainRoot(TPathElement::TPtr pathEl) { entry->SetPathsLimit(subDomainInfo->GetSchemeLimits().MaxPaths); entry->SetShardsInside(subDomainInfo->GetShardsInside()); entry->SetShardsLimit(subDomainInfo->GetSchemeLimits().MaxShards); - entry->SetPQPartitionsInside(subDomainInfo->GetPQPartitionsInside()); - entry->SetPQPartitionsLimit(subDomainInfo->GetSchemeLimits().MaxPQPartitions); + entry->SetPQPartitionsInside(subDomainInfo->GetPQPartitionsInside()); + entry->SetPQPartitionsLimit(subDomainInfo->GetSchemeLimits().MaxPQPartitions); NKikimrSubDomains::TDomainKey *resourcesKey = entry->MutableResourcesDomainKey(); resourcesKey->SetSchemeShard(subDomainInfo->GetResourcesDomainId().OwnerId); diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index 6eadff3f34..f293136fe8 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -714,7 +714,7 @@ struct Schema : NIceDb::Schema { struct ResourcesDomainLocalPathId : Column<19, NScheme::NTypeIds::Uint64> { using Type = TLocalPathId; static constexpr Type Default = InvalidLocalPathId; }; struct SharedHiveId : Column<20, NScheme::NTypeIds::Uint64> { using Type = TTabletId; static constexpr Type Default = InvalidTabletId; }; struct DeclaredSchemeQuotas : Column<21, NScheme::NTypeIds::String> {}; - struct PQPartitionsLimit : Column<22, NScheme::NTypeIds::Uint64> {}; + struct PQPartitionsLimit : Column<22, NScheme::NTypeIds::Uint64> {}; struct DatabaseQuotas : Column<23, NScheme::NTypeIds::String> {}; struct StateVersion : Column<24, NScheme::NTypeIds::Uint64> {}; struct DiskQuotaExceeded : Column<25, NScheme::NTypeIds::Bool> {}; @@ -742,7 +742,7 @@ struct Schema : NIceDb::Schema { ResourcesDomainOwnerPathId, ResourcesDomainLocalPathId, SharedHiveId, - DeclaredSchemeQuotas, + DeclaredSchemeQuotas, PQPartitionsLimit, DatabaseQuotas, StateVersion, diff --git a/ydb/core/tx/schemeshard/schemeshard_types.h b/ydb/core/tx/schemeshard/schemeshard_types.h index 7482d33e09..a60df6317d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_types.h @@ -28,9 +28,9 @@ struct TSchemeLimits { ui64 MaxShardsInPath = 35*1000; // In each path in database ui64 MaxConsistentCopyTargets = 1000; - // pq group - ui64 MaxPQPartitions = 1000000; - + // pq group + ui64 MaxPQPartitions = 1000000; + TSchemeLimits() = default; explicit TSchemeLimits(const NKikimrScheme::TSchemeLimits& proto); diff --git a/ydb/core/tx/schemeshard/ut_base.cpp b/ydb/core/tx/schemeshard/ut_base.cpp index cbac94dac4..a76dcf9d5b 100644 --- a/ydb/core/tx/schemeshard/ut_base.cpp +++ b/ydb/core/tx/schemeshard/ut_base.cpp @@ -5930,12 +5930,12 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { AsyncCreatePQGroup(runtime, ++txId, "/MyRoot", ""); AsyncCreatePQGroup(runtime, ++txId, "/MyRoot/DirA", "Name: \"PQGroup_2\"" - "TotalGroupCount: 10 " + "TotalGroupCount: 10 " "PartitionPerTablet: 10 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"); AsyncCreatePQGroup(runtime, ++txId, "/MyRoot/DirA", "Name: \"PQGroup_3\"" - "TotalGroupCount: 10 " + "TotalGroupCount: 10 " "PartitionPerTablet: 3 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"); TestModificationResult(runtime, txId-3, NKikimrScheme::StatusAccepted); @@ -5955,7 +5955,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { TestCreatePQGroup(runtime, ++txId, "/MyRoot/DirA", "Name: \"PQGroup_1\"" - "TotalGroupCount: 100 " + "TotalGroupCount: 100 " "PartitionPerTablet: 10 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}" ); @@ -5986,7 +5986,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { TestCreatePQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "TotalGroupCount: 4 " + "TotalGroupCount: 4 " "PartitionPerTablet: 3 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"); env.TestWaitNotification(runtime, txId); @@ -5998,12 +5998,12 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { // invalid params TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "TotalGroupCount: 3 " + "TotalGroupCount: 3 " "PartitionPerTablet: 3 ", {NKikimrScheme::StatusInvalidParameter}); TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "TotalGroupCount: 4 " + "TotalGroupCount: 4 " "PartitionPerTablet: 2 ", {NKikimrScheme::StatusInvalidParameter}); @@ -6012,7 +6012,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { // same sizes - reconfig TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "PartitionPerTablet: 3 "); // do not change TotalGroupCount + "PartitionPerTablet: 3 "); // do not change TotalGroupCount env.TestWaitNotification(runtime, txId); TestDescribeResult(DescribePath(runtime, "/MyRoot/PQGroup", true), @@ -6022,7 +6022,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "TotalGroupCount: 6 " + "TotalGroupCount: 6 " "PartitionPerTablet: 3 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 42}}"); env.TestWaitNotification(runtime, txId); @@ -6034,7 +6034,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "TotalGroupCount: 8 "); // do not change PartitionPerTablet + "TotalGroupCount: 8 "); // do not change PartitionPerTablet env.TestWaitNotification(runtime, txId); TestDescribeResult(DescribePath(runtime, "/MyRoot/PQGroup", true), @@ -6044,7 +6044,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "TotalGroupCount: 8 " + "TotalGroupCount: 8 " "PartitionPerTablet: 4 "); env.TestWaitNotification(runtime, txId); @@ -6055,7 +6055,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "TotalGroupCount: 14 " + "TotalGroupCount: 14 " "PartitionPerTablet: 4 "); env.TestWaitNotification(runtime, txId); @@ -6067,7 +6067,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { // Alter + Alter + reboot TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"" - "TotalGroupCount: 400 " + "TotalGroupCount: 400 " "PartitionPerTablet: 10 "); TestAlterPQGroup(runtime, ++txId, "/MyRoot", "Name: \"PQGroup\"", {NKikimrScheme::StatusMultipleModifications}); @@ -6339,17 +6339,17 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { "KeyColumnNames: [\"RowId\"]"; TString pqGroupConfig = "Name: \"DropMeBaby\"" - "TotalGroupCount: 100 " + "TotalGroupCount: 100 " "PartitionPerTablet: 10 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"; TString pqGroupConfig1 = "Name: \"DropMeBaby\"" - "TotalGroupCount: 2 " + "TotalGroupCount: 2 " "PartitionPerTablet: 1 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"; TString pqGroupAlter = "Name: \"DropMeBaby\"" - "TotalGroupCount: 3 "; + "TotalGroupCount: 3 "; TestMkDir(runtime, ++txId, "/MyRoot", "Ops"); @@ -6456,12 +6456,12 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { ui64 txId = 1000; TString pqGroupConfig = "Name: \"Isolda\"" - "TotalGroupCount: 40 " + "TotalGroupCount: 40 " "PartitionPerTablet: 10 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"; TString pqGroupAlter = "Name: \"Isolda\"" - "TotalGroupCount: 100 "; + "TotalGroupCount: 100 "; Cdbg << "Create + Drop + Create" << Endl; AsyncCreatePQGroup(runtime, ++txId, "/MyRoot", pqGroupConfig); @@ -6609,17 +6609,17 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { const TString pqPath = basePath + "/Isolda"; TString pqGroupConfig = "Name: \"Isolda\"" - "TotalGroupCount: 1 " + "TotalGroupCount: 1 " "PartitionPerTablet: 10 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"; TString pqGroupBigConfig = "Name: \"Isolda\"" - "TotalGroupCount: 1000 " + "TotalGroupCount: 1000 " "PartitionPerTablet: 10 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"; TString pqGroupBigAlter = "Name: \"Isolda\"" - "TotalGroupCount: 1000 "; + "TotalGroupCount: 1000 "; AsyncMkDir(runtime, ++txId, "/MyRoot", "A"); AsyncMkDir(runtime, ++txId, "/MyRoot/A", "B"); @@ -7826,7 +7826,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { TestDescribeResult(DescribePath(runtime, "/MyRoot/BSVolume"), {NLs::CheckMountToken("BSVolume", "Owner124")}); - // AssignVolume using TokenVersion + // AssignVolume using TokenVersion TestAssignBlockStoreVolume(runtime, ++txId, "/MyRoot", "BSVolume", "Owner125", 2); TestDescribeResult(DescribePath(runtime, "/MyRoot/BSVolume"), diff --git a/ydb/core/tx/schemeshard/ut_extsubdomain.cpp b/ydb/core/tx/schemeshard/ut_extsubdomain.cpp index 9d9d0980c2..0fa6a27bce 100644 --- a/ydb/core/tx/schemeshard/ut_extsubdomain.cpp +++ b/ydb/core/tx/schemeshard/ut_extsubdomain.cpp @@ -243,7 +243,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { env.TestWaitNotification(runtime, {txId, txId - 1}); TestAlterSubDomain(runtime, ++txId, "/MyRoot", - "Name: \"USER_0\" " + "Name: \"USER_0\" " "StoragePools { " " Name: \"pool-1\" " " Kind: \"hdd\" " diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp index 063da75a0f..6f999b268b 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp @@ -1342,7 +1342,7 @@ namespace NSchemeShardUT_Private { SetSchemeshardSchemaLimits(runtime, limits, TTestTxConfig::SchemeShard); } - + TString EscapedDoubleQoute(const TString src) { auto result = src; @@ -1376,14 +1376,14 @@ namespace NSchemeShardUT_Private { (let consCopy '('ConsistentCopyingTargetsLimit (Uint64 '%lu))) (let maxPathLength '('PathElementLength (Uint64 '%lu))) (let extraSymbols '('ExtraPathSymbolsAllowed (Utf8 '"%s"))) - (let pqPartitions '('PQPartitionsLimit (Uint64 '%lu))) - (let ret (AsList (UpdateRow 'SubDomains key '(depth paths child acl columns colName keyCols indices shards pathShards consCopy maxPathLength extraSymbols pqPartitions)))) + (let pqPartitions '('PQPartitionsLimit (Uint64 '%lu))) + (let ret (AsList (UpdateRow 'SubDomains key '(depth paths child acl columns colName keyCols indices shards pathShards consCopy maxPathLength extraSymbols pqPartitions)))) (return ret) ) )", domainId, limits.MaxDepth, limits.MaxPaths, limits.MaxChildrenInDir, limits.MaxAclBytesSize, limits.MaxTableColumns, limits.MaxTableColumnNameLength, limits.MaxTableKeyColumns, limits.MaxTableIndices, limits.MaxShards, limits.MaxShardsInPath, limits.MaxConsistentCopyTargets, - limits.MaxPathElementLength, escapedStr.c_str(), limits.MaxPQPartitions); + limits.MaxPathElementLength, escapedStr.c_str(), limits.MaxPQPartitions); Cdbg << prog << "\n"; NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, schemeShard, prog, result, err); Cdbg << result << "\n"; @@ -1393,37 +1393,37 @@ namespace NSchemeShardUT_Private { RebootTablet(runtime, schemeShard, sender); } - void SetSchemeshardDatabaseQuotas(TTestActorRuntime& runtime, Ydb::Cms::DatabaseQuotas databaseQuotas, ui64 domainId) { - + void SetSchemeshardDatabaseQuotas(TTestActorRuntime& runtime, Ydb::Cms::DatabaseQuotas databaseQuotas, ui64 domainId) { + SetSchemeshardDatabaseQuotas(runtime, databaseQuotas, domainId, TTestTxConfig::SchemeShard); - } - - void SetSchemeshardDatabaseQuotas(TTestActorRuntime& runtime, Ydb::Cms::DatabaseQuotas databaseQuotas, ui64 domainId, ui64 schemeShard) { - NKikimrMiniKQL::TResult result; - TString err; - - TString serialized; - Y_VERIFY(databaseQuotas.SerializeToString(&serialized)); - TString prog = Sprintf(R"( - ( - (let key '('('PathId (Uint64 '%lu)))) # RootPathId - (let quotas '('DatabaseQuotas (String '%s))) - (let ret (AsList (UpdateRow 'SubDomains key '(quotas)))) - (return ret) - ) - )", domainId, serialized.c_str()); - Cdbg << prog << "\n"; + } + + void SetSchemeshardDatabaseQuotas(TTestActorRuntime& runtime, Ydb::Cms::DatabaseQuotas databaseQuotas, ui64 domainId, ui64 schemeShard) { + NKikimrMiniKQL::TResult result; + TString err; + + TString serialized; + Y_VERIFY(databaseQuotas.SerializeToString(&serialized)); + TString prog = Sprintf(R"( + ( + (let key '('('PathId (Uint64 '%lu)))) # RootPathId + (let quotas '('DatabaseQuotas (String '%s))) + (let ret (AsList (UpdateRow 'SubDomains key '(quotas)))) + (return ret) + ) + )", domainId, serialized.c_str()); + Cdbg << prog << "\n"; NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, schemeShard, prog, result, err); - - Cdbg << result << "\n"; + + Cdbg << result << "\n"; UNIT_ASSERT_VALUES_EQUAL(status, NKikimrProto::EReplyStatus::OK); - - TActorId sender = runtime.AllocateEdgeActor(); + + TActorId sender = runtime.AllocateEdgeActor(); RebootTablet(runtime, schemeShard, sender); - - } - - + + } + + NKikimrSchemeOp::TTableDescription GetDatashardSchema(TTestActorRuntime& runtime, ui64 tabletId, ui64 tid) { NKikimrMiniKQL::TResult result; TString err; diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.h b/ydb/core/tx/schemeshard/ut_helpers/helpers.h index 8601f5d4cd..6b12c25534 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.h +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.h @@ -345,8 +345,8 @@ namespace NSchemeShardUT_Private { void SetSchemeshardReadOnlyMode(TTestActorRuntime& runtime, bool isReadOnly); void SetSchemeshardSchemaLimits(TTestActorRuntime& runtime, NSchemeShard::TSchemeLimits limits); void SetSchemeshardSchemaLimits(TTestActorRuntime& runtime, NSchemeShard::TSchemeLimits limits, ui64 schemeShard); - void SetSchemeshardDatabaseQuotas(TTestActorRuntime& runtime, Ydb::Cms::DatabaseQuotas databaseQuotas, ui64 domainId); - void SetSchemeshardDatabaseQuotas(TTestActorRuntime& runtime, Ydb::Cms::DatabaseQuotas databaseQuotas, ui64 dimainId, ui64 schemeShard); + void SetSchemeshardDatabaseQuotas(TTestActorRuntime& runtime, Ydb::Cms::DatabaseQuotas databaseQuotas, ui64 domainId); + void SetSchemeshardDatabaseQuotas(TTestActorRuntime& runtime, Ydb::Cms::DatabaseQuotas databaseQuotas, ui64 dimainId, ui64 schemeShard); NKikimrSchemeOp::TTableDescription GetDatashardSchema(TTestActorRuntime& runtime, ui64 tabletId, ui64 tid); diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index bdbf259c8d..c5d6c5a973 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -542,25 +542,25 @@ TCheckFunc PathsInsideDomain(ui64 count) { }; } -TCheckFunc PQPartitionsInsideDomain(ui64 count) { +TCheckFunc PQPartitionsInsideDomain(ui64 count) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_C(IsGoodDomainStatus(record.GetStatus()), "Unexpected status: " << record.GetStatus()); - const auto& pathDescr = record.GetPathDescription(); - const auto& domain = pathDescr.GetDomainDescription(); - const auto& curCount = domain.GetPQPartitionsInside(); - - UNIT_ASSERT_EQUAL_C(curCount, count, - "pq partitions inside domain count mistmach, domain with id " << domain.GetDomainKey().GetPathId() << - " has count " << curCount << - " but expected " << count); - }; -} - + const auto& pathDescr = record.GetPathDescription(); + const auto& domain = pathDescr.GetDomainDescription(); + const auto& curCount = domain.GetPQPartitionsInside(); + + UNIT_ASSERT_EQUAL_C(curCount, count, + "pq partitions inside domain count mistmach, domain with id " << domain.GetDomainKey().GetPathId() << + " has count " << curCount << + " but expected " << count); + }; +} + TCheckFunc PathsInsideDomainOneOf(TSet<ui64> variants) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_C(IsGoodDomainStatus(record.GetStatus()), "Unexpected status: " << record.GetStatus()); - + const auto& pathDescr = record.GetPathDescription(); const auto& domain = pathDescr.GetDomainDescription(); const auto& curCount = domain.GetPathsInside(); @@ -602,29 +602,29 @@ TCheckFunc ShardsInsideDomainOneOf(TSet<ui64> variants) { }; } -TCheckFunc DomainLimitsIs(ui64 maxPaths, ui64 maxShards, ui64 maxPQPartitions) { +TCheckFunc DomainLimitsIs(ui64 maxPaths, ui64 maxShards, ui64 maxPQPartitions) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrScheme::StatusSuccess); const auto& pathDescr = record.GetPathDescription(); const auto& domain = pathDescr.GetDomainDescription(); const auto& pathLimit = domain.GetPathsLimit(); const auto& shardsLimit = domain.GetShardsLimit(); - const auto& pqPartitionsLimit = domain.GetPQPartitionsLimit(); + const auto& pqPartitionsLimit = domain.GetPQPartitionsLimit(); UNIT_ASSERT_C(pathLimit == maxPaths, "paths limit mistmach, domain with id " << domain.GetDomainKey().GetPathId() << - " has limit " << pathLimit << + " has limit " << pathLimit << " but expected " << maxPaths); UNIT_ASSERT_C(shardsLimit == maxShards, "shards limit mistmach, domain with id " << domain.GetDomainKey().GetPathId() << - " has limit " << shardsLimit << + " has limit " << shardsLimit << " but expected " << maxShards); - - UNIT_ASSERT_C(!maxPQPartitions || pqPartitionsLimit == maxPQPartitions, - "pq partitions limit mistmach, domain with id " << domain.GetDomainKey().GetPathId() << - " has limit " << pqPartitionsLimit << - " but expected " << maxPQPartitions); + + UNIT_ASSERT_C(!maxPQPartitions || pqPartitionsLimit == maxPQPartitions, + "pq partitions limit mistmach, domain with id " << domain.GetDomainKey().GetPathId() << + " has limit " << pqPartitionsLimit << + " but expected " << maxPQPartitions); }; } @@ -959,21 +959,21 @@ TCheckFunc KesusConfigIs(ui64 self_check_period_millis, ui64 session_grace_perio }; } -TCheckFunc DatabaseQuotas(ui64 dataStreamShards) { +TCheckFunc DatabaseQuotas(ui64 dataStreamShards) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_C(IsGoodDomainStatus(record.GetStatus()), "Unexpected status: " << record.GetStatus()); - const auto& pathDescr = record.GetPathDescription(); - const auto& domain = pathDescr.GetDomainDescription(); + const auto& pathDescr = record.GetPathDescription(); + const auto& domain = pathDescr.GetDomainDescription(); const auto count = domain.GetDatabaseQuotas().data_stream_shards_quota(); - - UNIT_ASSERT_C(count == dataStreamShards, - "data stream shards inside domain count mistmach, domain with id " << domain.GetDomainKey().GetPathId() << - " has data stream shards " << count << - " but expected " << dataStreamShards); - }; -} - + + UNIT_ASSERT_C(count == dataStreamShards, + "data stream shards inside domain count mistmach, domain with id " << domain.GetDomainKey().GetPathId() << + " has data stream shards " << count << + " but expected " << dataStreamShards); + }; +} + TCheckFunc PartitionKeys(TVector<TString> lastShardKeys) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { const auto& pathDescr = record.GetPathDescription(); diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index d031d52d46..33022e4257 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -33,11 +33,11 @@ namespace NLs { TCheckFunc PathVersionOneOf(TSet<ui64> versions); TCheckFunc PathsInsideDomain(ui64 count); - TCheckFunc PQPartitionsInsideDomain(ui64 count); + TCheckFunc PQPartitionsInsideDomain(ui64 count); TCheckFunc PathsInsideDomainOneOf(TSet<ui64> variants); TCheckFunc ShardsInsideDomain(ui64 count); TCheckFunc ShardsInsideDomainOneOf(TSet<ui64> variants); - TCheckFunc DomainLimitsIs(ui64 maxPaths, ui64 maxShards, ui64 maxPQPartitions = 0); + TCheckFunc DomainLimitsIs(ui64 maxPaths, ui64 maxShards, ui64 maxPQPartitions = 0); TCheckFunc FreezeStateEqual(NKikimrSchemeOp::EFreezeState expectedState); @@ -125,8 +125,8 @@ namespace NLs { TCheckFunc HasNotEffectiveRight(const TString& right); TCheckFunc KesusConfigIs(ui64 self_check_period_millis, ui64 session_grace_period_millis); - TCheckFunc DatabaseQuotas(ui64 dataStreamShards); - + TCheckFunc DatabaseQuotas(ui64 dataStreamShards); + template<class TCheck> void PerformAllChecks(const NKikimrScheme::TEvDescribeSchemeResult& result, TCheck&& check) { check(result); diff --git a/ydb/core/tx/schemeshard/ut_pq_reboots.cpp b/ydb/core/tx/schemeshard/ut_pq_reboots.cpp index da24126bb9..8627855297 100644 --- a/ydb/core/tx/schemeshard/ut_pq_reboots.cpp +++ b/ydb/core/tx/schemeshard/ut_pq_reboots.cpp @@ -34,18 +34,18 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { TInactiveZone inactive(activeZone); TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup_2"), {NLs::Finished, - NLs::PathVersionEqual(2), - NLs::PQPartitionsInsideDomain(10)}); + NLs::PathVersionEqual(2), + NLs::PQPartitionsInsideDomain(10)}); } - - TestCreatePQGroup(runtime, ++t.TxId, "/MyRoot/DirA/NotExistingDir", - "Name: \"PQGroup_2\"" - "TotalGroupCount: 10 " - "PartitionPerTablet: 10 " - "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}", - {ESts::StatusPathDoesNotExist} - ); - + + TestCreatePQGroup(runtime, ++t.TxId, "/MyRoot/DirA/NotExistingDir", + "Name: \"PQGroup_2\"" + "TotalGroupCount: 10 " + "PartitionPerTablet: 10 " + "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}", + {ESts::StatusPathDoesNotExist} + ); + }); } @@ -64,8 +64,8 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { TInactiveZone inactive(activeZone); TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup_2"), {NLs::Finished, - NLs::PathVersionEqual(2), - NLs::PQPartitionsInsideDomain(2)}); + NLs::PathVersionEqual(2), + NLs::PQPartitionsInsideDomain(2)}); } }); } @@ -95,7 +95,7 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { pqVer = TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup", true), {NLs::Finished, NLs::CheckPartCount("PQGroup", 10, 10, 1, 10), - NLs::PQPartitionsInsideDomain(10), + NLs::PQPartitionsInsideDomain(10), NLs::PathVersionEqual(2)}); auto numChannels = runtime.GetAppData().ChannelProfiles->Profiles[0].Channels.size(); @@ -126,7 +126,7 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { pqVer = TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup", true), {NLs::Finished, NLs::CheckPartCount("PQGroup", 10, 10, 1, 10), - NLs::PQPartitionsInsideDomain(10), + NLs::PQPartitionsInsideDomain(10), NLs::PathVersionEqual(3)}); { @@ -168,7 +168,7 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { pqVer = TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup", true), {NLs::Finished, NLs::CheckPartCount("PQGroup", 10, 10, 1, 10), - NLs::PQPartitionsInsideDomain(10), + NLs::PQPartitionsInsideDomain(10), NLs::PathVersionEqual(2)}); } @@ -193,8 +193,8 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup", true), {NLs::Finished, NLs::CheckPartCount("PQGroup", 11, 11, 1, 11), - NLs::PathVersionEqual(3), - NLs::PQPartitionsInsideDomain(11)}); + NLs::PathVersionEqual(3), + NLs::PQPartitionsInsideDomain(11)}); } }); } @@ -215,8 +215,8 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup_2"), {NLs::Finished, - NLs::PathVersionEqual(2), - NLs::PQPartitionsInsideDomain(2)}); + NLs::PathVersionEqual(2), + NLs::PQPartitionsInsideDomain(2)}); AsyncAlterPQGroup(runtime, t.TxId++, "/MyRoot/DirA", "Name: \"PQGroup_2\"" @@ -228,8 +228,8 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { t.TestEnv->TestWaitNotification(runtime, t.TxId-1); TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup_2"), {NLs::Finished, - NLs::PathVersionEqual(3), - NLs::PQPartitionsInsideDomain(8)}); + NLs::PathVersionEqual(3), + NLs::PQPartitionsInsideDomain(8)}); AsyncAlterPQGroup(runtime, t.TxId++, "/MyRoot/DirA", "Name: \"PQGroup_2\"" @@ -241,8 +241,8 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { t.TestEnv->TestWaitNotification(runtime, t.TxId-1); TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup_2"), {NLs::Finished, - NLs::PathVersionEqual(4), - NLs::PQPartitionsInsideDomain(20)}); + NLs::PathVersionEqual(4), + NLs::PQPartitionsInsideDomain(20)}); activeZone = false; }); @@ -277,8 +277,8 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { NLs::PathVersionEqual(7), NLs::ChildrenCount(0), NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomainOneOf({0, 1, 2, 3}), - NLs::PQPartitionsInsideDomain(0)}); + NLs::ShardsInsideDomainOneOf({0, 1, 2, 3}), + NLs::PQPartitionsInsideDomain(0)}); }); } @@ -299,8 +299,8 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { {NLs::PathExist, NLs::ChildrenCount(1), NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomainOneOf({0, 1, 2, 3}), - }); + NLs::ShardsInsideDomainOneOf({0, 1, 2, 3}), + }); }); } @@ -362,16 +362,16 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { TestDescribeResult(DescribePath(runtime, "/MyRoot/Isolda"), {NLs::PathExist, NLs::Finished, - NLs::PathVersionEqual(2), - NLs::PQPartitionsInsideDomain(4)}); + NLs::PathVersionEqual(2), + NLs::PQPartitionsInsideDomain(4)}); TestAlterPQGroup(runtime, txId++, "/MyRoot", pqGroupAlter); t.TestEnv->TestWaitNotification(runtime, txId-1); TestDescribeResult(DescribePath(runtime, "/MyRoot/Isolda"), {NLs::PathExist, NLs::Finished, - NLs::PathVersionEqual(3), - NLs::PQPartitionsInsideDomain(5)}); + NLs::PathVersionEqual(3), + NLs::PQPartitionsInsideDomain(5)}); TestAlterPQGroup(runtime, txId++, "/MyRoot", pqGroupAlter2); t.TestEnv->TestWaitNotification(runtime, txId-1); @@ -379,18 +379,18 @@ Y_UNIT_TEST_SUITE(TPqGroupTestReboots) { {NLs::PathExist, NLs::Finished, NLs::PathVersionEqual(4), - NLs::CheckPartCount("Isolda", 8, 2, 4, 8), - NLs::PQPartitionsInsideDomain(8)}); + NLs::CheckPartCount("Isolda", 8, 2, 4, 8), + NLs::PQPartitionsInsideDomain(8)}); TestDropPQGroup(runtime, txId++, "/MyRoot", "Isolda", {ESts::StatusAccepted}); t.TestEnv->TestWaitNotification(runtime, txId-1); TestDescribeResult(DescribePath(runtime, "/MyRoot/Isolda"), {NLs::PathNotExist}); - - TestDescribeResult(DescribePath(runtime, "/MyRoot", true), - {NLs::PathExist, - NLs::PQPartitionsInsideDomain(0)}); - + + TestDescribeResult(DescribePath(runtime, "/MyRoot", true), + {NLs::PathExist, + NLs::PQPartitionsInsideDomain(0)}); + }, true); } diff --git a/ydb/core/tx/schemeshard/ut_subdomain.cpp b/ydb/core/tx/schemeshard/ut_subdomain.cpp index 41afc3b5b2..d0d30bf7da 100644 --- a/ydb/core/tx/schemeshard/ut_subdomain.cpp +++ b/ydb/core/tx/schemeshard/ut_subdomain.cpp @@ -1910,8 +1910,8 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { TSchemeLimits lowLimits; lowLimits.MaxPaths = 3; lowLimits.MaxShards = 3; - lowLimits.MaxPQPartitions = 300; - + lowLimits.MaxPQPartitions = 300; + SetSchemeshardSchemaLimits(runtime, lowLimits); TestDescribeResult(DescribePath(runtime, "/MyRoot"), @@ -1923,26 +1923,26 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { "Coordinators: 1 " "Mediators: 1 " "TimeCastBucketsPerMediator: 2 " - "Name: \"USER_0\"" - " DatabaseQuotas {" - " data_stream_shards_quota: 3" - "}"); + "Name: \"USER_0\"" + " DatabaseQuotas {" + " data_stream_shards_quota: 3" + "}"); env.TestWaitNotification(runtime, 100); TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathExist , NLs::PathVersionEqual(3) - , NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions) + , NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions) , NLs::PathsInsideDomain(0) - , NLs::ShardsInsideDomain(2) - , NLs::DatabaseQuotas(3)}); + , NLs::ShardsInsideDomain(2) + , NLs::DatabaseQuotas(3)}); TestDescribeResult(DescribePath(runtime, "/MyRoot"), {NLs::PathExist - , NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions) + , NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions) , NLs::PathsInsideDomain(1) - , NLs::ShardsInsideDomain(0) - , NLs::DatabaseQuotas(0)}); + , NLs::ShardsInsideDomain(0) + , NLs::DatabaseQuotas(0)}); } Y_UNIT_TEST(SchemeLimitsRejects) { @@ -1960,14 +1960,14 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { lowLimits.MaxTableKeyColumns = 1; lowLimits.MaxShards = 6; lowLimits.MaxShardsInPath = 4; - lowLimits.MaxPQPartitions = 20; - - + lowLimits.MaxPQPartitions = 20; + + //lowLimits.ExtraPathSymbolsAllowed = "!\"#$%&'()*+,-.:;<=>?@[\\]^_`{|}~"; SetSchemeshardSchemaLimits(runtime, lowLimits); TestDescribeResult(DescribePath(runtime, "/MyRoot"), {NLs::PathExist, - NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions)}); + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions)}); //create subdomain { @@ -1976,20 +1976,20 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { "Coordinators: 1 " "Mediators: 1 " "TimeCastBucketsPerMediator: 2 " - "Name: \"USER_0\"" - " DatabaseQuotas {" - " data_stream_shards_quota: 2" - " data_stream_reserved_storage_quota: 200000" - "}"); - + "Name: \"USER_0\"" + " DatabaseQuotas {" + " data_stream_shards_quota: 2" + " data_stream_reserved_storage_quota: 200000" + "}"); + env.TestWaitNotification(runtime, txId - 1); TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathExist, NLs::PathVersionEqual(3), - NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions), + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions), NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(2), - NLs::DatabaseQuotas(2)}); + NLs::ShardsInsideDomain(2), + NLs::DatabaseQuotas(2)}); TestDescribeResult(DescribePath(runtime, "/MyRoot"), {NLs::PathExist, NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), @@ -2017,10 +2017,10 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathExist, NLs::PathVersionEqual(5), - NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions), + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions), NLs::PathsInsideDomain(3), - NLs::ShardsInsideDomain(2), - NLs::DatabaseQuotas(2)}); + NLs::ShardsInsideDomain(2), + NLs::DatabaseQuotas(2)}); } //clean @@ -2069,20 +2069,20 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { )", {NKikimrScheme::StatusResourceExhausted}); } - //clean + //clean { auto dirVer = TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0/1")); TestForceDropUnsafe(runtime, txId++, dirVer.PathId.LocalPathId); env.TestWaitNotification(runtime, txId - 1); TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathExist, - NLs::PathVersionEqual(10), + NLs::PathVersionEqual(10), NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(2)}); + NLs::ShardsInsideDomain(2)}); } - + //create tables, paths shards limit { TestMkDir(runtime, txId++, "/MyRoot/USER_0", "1"); @@ -2178,7 +2178,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { env.TestWaitNotification(runtime, txId - 1); TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathExist, - NLs::PathVersionEqual(14), + NLs::PathVersionEqual(14), NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), NLs::PathsInsideDomain(0), NLs::ShardsInsideDomain(2)}); @@ -2224,7 +2224,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { env.TestWaitNotification(runtime, txId - 1); TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathExist, - NLs::PathVersionEqual(18), + NLs::PathVersionEqual(18), NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), NLs::PathsInsideDomain(0), NLs::ShardsInsideDomain(2)}); @@ -2254,7 +2254,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { env.TestWaitNotification(runtime, txId - 1); TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathExist, - NLs::PathVersionEqual(24), + NLs::PathVersionEqual(24), NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), NLs::PathsInsideDomain(0), NLs::ShardsInsideDomain(2)}); @@ -2301,104 +2301,104 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { env.TestWaitNotification(runtime, txId - 1); TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), {NLs::PathExist, - NLs::PathVersionEqual(28), + NLs::PathVersionEqual(28), NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), NLs::PathsInsideDomain(0), NLs::ShardsInsideDomain(2)}); } - - //databaseQuotas limits - { - // Stream shards(partitions) limit is 2. Trying to create 3. - TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 3 - PartitionPerTablet: 2 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10 WriteSpeedInBytesPerSecond : 1000}} + + //databaseQuotas limits + { + // Stream shards(partitions) limit is 2. Trying to create 3. + TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 3 + PartitionPerTablet: 2 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10 WriteSpeedInBytesPerSecond : 1000}} + )", {NKikimrScheme::StatusResourceExhausted}); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathVersionEqual(28), + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), + NLs::PathsInsideDomain(0), + NLs::ShardsInsideDomain(2)}); + + // Stream reserved storage limit is 200000. Trying to reserve 200001. + TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 1 + PartitionPerTablet: 2 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 200001}} + )", {NKikimrScheme::StatusResourceExhausted}); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathVersionEqual(28), + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), + NLs::PathsInsideDomain(0), + NLs::ShardsInsideDomain(2)}); + + // Stream reserved storage limit is 200000. Trying to reserve 100000 - fit in it! + + TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 1 + PartitionPerTablet: 1 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100000}} + )"); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathVersionEqual(30), + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), + NLs::PathsInsideDomain(1), + NLs::ShardsInsideDomain(4)}); + + // Stream reserved storage limit is 200000. Trying to reserve 200000 - fit in it! + TestAlterPQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 2 + PartitionPerTablet: 1 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100000}} + )"); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathVersionEqual(30), + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), + NLs::PathsInsideDomain(1), + NLs::ShardsInsideDomain(5)}); + + // Stream reserved storage limit is 200000. Trying to reserve 20002 - do not fit in it! + TestAlterPQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 2 + PartitionPerTablet: 1 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100001}} )", {NKikimrScheme::StatusResourceExhausted}); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathVersionEqual(28), - NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), - NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(2)}); - - // Stream reserved storage limit is 200000. Trying to reserve 200001. - TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 1 - PartitionPerTablet: 2 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 200001}} - )", {NKikimrScheme::StatusResourceExhausted}); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathVersionEqual(28), - NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), - NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(2)}); - - // Stream reserved storage limit is 200000. Trying to reserve 100000 - fit in it! - - TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 1 - PartitionPerTablet: 1 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100000}} - )"); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathVersionEqual(30), - NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), - NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomain(4)}); - - // Stream reserved storage limit is 200000. Trying to reserve 200000 - fit in it! - TestAlterPQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 2 - PartitionPerTablet: 1 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100000}} - )"); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathVersionEqual(30), - NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), - NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomain(5)}); - - // Stream reserved storage limit is 200000. Trying to reserve 20002 - do not fit in it! - TestAlterPQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 2 - PartitionPerTablet: 1 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100001}} - )", {NKikimrScheme::StatusResourceExhausted}); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathVersionEqual(30), - NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), - NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomain(5)}); - - - } - + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathVersionEqual(30), + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards), + NLs::PathsInsideDomain(1), + NLs::ShardsInsideDomain(5)}); + + + } + //clear subdomain { TestDescribeResult(DescribePath(runtime, "/MyRoot"), @@ -2977,134 +2977,134 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { {LsCheckDiskQuotaExceeded(false)}); } - Y_UNIT_TEST(SchemeDatabaseQuotaRejects) { - TTestBasicRuntime runtime; - TTestEnv env(runtime); - ui64 txId = 100; - - TestDescribeResult(DescribePath(runtime, "/MyRoot"), - {NLs::PathExist}); - - // Create subdomain. - { - TestCreateSubDomain(runtime, txId++, "/MyRoot", - "PlanResolution: 50 " - "Coordinators: 1 " - "Mediators: 1 " - "TimeCastBucketsPerMediator: 2 " - "Name: \"USER_0\"" - " DatabaseQuotas {" - " data_stream_shards_quota: 2" - " data_stream_reserved_storage_quota: 200000" - "}"); - - env.TestWaitNotification(runtime, txId - 1); - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathVersionEqual(3), - NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(2), - NLs::DatabaseQuotas(2)}); - TestDescribeResult(DescribePath(runtime, "/MyRoot"), - {NLs::PathExist, - NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomain(0)}); - } - - - { - // Stream shards(partitions) limit is 2. Trying to create 3. - TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 3 - PartitionPerTablet: 2 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10 WriteSpeedInBytesPerSecond : 1000}} - )", {NKikimrScheme::StatusResourceExhausted}); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(2)}); - - // Stream reserved storage limit is 200000. Trying to reserve 200001. - TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 1 - PartitionPerTablet: 2 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 200001}} - )", {NKikimrScheme::StatusResourceExhausted}); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(2)}); - - // Stream reserved storage limit is 200000. Trying to reserve 100000 - fit in it! - - TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 1 - PartitionPerTablet: 1 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100000}} - )"); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomain(4)}); - - // Stream reserved storage limit is 200000. Trying to reserve 200000 - fit in it! - TestAlterPQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 2 - PartitionPerTablet: 1 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100000}} - )"); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomain(5)}); - - // Stream reserved storage limit is 200000. Trying to reserve 20002 - do not fit in it! - TestAlterPQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( - Name: "Isolda" - TotalGroupCount: 2 - PartitionPerTablet: 1 - PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100001}} - )", {NKikimrScheme::StatusResourceExhausted}); - - env.TestWaitNotification(runtime, txId - 1); - - TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), - {NLs::PathExist, - NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomain(5)}); - - - } - - //clear subdomain - { - TestDescribeResult(DescribePath(runtime, "/MyRoot"), - {NLs::PathExist, - NLs::PathsInsideDomain(1), - NLs::ShardsInsideDomain(0)}); - TestForceDropSubDomain(runtime, txId++, "/MyRoot", "USER_0"); - env.TestWaitNotification(runtime, txId - 1); - TestDescribeResult(DescribePath(runtime, "/MyRoot"), - {NLs::PathExist, - NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(0)}); - } - } + Y_UNIT_TEST(SchemeDatabaseQuotaRejects) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestDescribeResult(DescribePath(runtime, "/MyRoot"), + {NLs::PathExist}); + + // Create subdomain. + { + TestCreateSubDomain(runtime, txId++, "/MyRoot", + "PlanResolution: 50 " + "Coordinators: 1 " + "Mediators: 1 " + "TimeCastBucketsPerMediator: 2 " + "Name: \"USER_0\"" + " DatabaseQuotas {" + " data_stream_shards_quota: 2" + " data_stream_reserved_storage_quota: 200000" + "}"); + + env.TestWaitNotification(runtime, txId - 1); + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathVersionEqual(3), + NLs::PathsInsideDomain(0), + NLs::ShardsInsideDomain(2), + NLs::DatabaseQuotas(2)}); + TestDescribeResult(DescribePath(runtime, "/MyRoot"), + {NLs::PathExist, + NLs::PathsInsideDomain(1), + NLs::ShardsInsideDomain(0)}); + } + + + { + // Stream shards(partitions) limit is 2. Trying to create 3. + TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 3 + PartitionPerTablet: 2 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10 WriteSpeedInBytesPerSecond : 1000}} + )", {NKikimrScheme::StatusResourceExhausted}); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathsInsideDomain(0), + NLs::ShardsInsideDomain(2)}); + + // Stream reserved storage limit is 200000. Trying to reserve 200001. + TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 1 + PartitionPerTablet: 2 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 200001}} + )", {NKikimrScheme::StatusResourceExhausted}); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathsInsideDomain(0), + NLs::ShardsInsideDomain(2)}); + + // Stream reserved storage limit is 200000. Trying to reserve 100000 - fit in it! + + TestCreatePQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 1 + PartitionPerTablet: 1 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100000}} + )"); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathsInsideDomain(1), + NLs::ShardsInsideDomain(4)}); + + // Stream reserved storage limit is 200000. Trying to reserve 200000 - fit in it! + TestAlterPQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 2 + PartitionPerTablet: 1 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100000}} + )"); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathsInsideDomain(1), + NLs::ShardsInsideDomain(5)}); + + // Stream reserved storage limit is 200000. Trying to reserve 20002 - do not fit in it! + TestAlterPQGroup(runtime, txId++, "/MyRoot/USER_0/", R"( + Name: "Isolda" + TotalGroupCount: 2 + PartitionPerTablet: 1 + PQTabletConfig: {PartitionConfig { LifetimeSeconds : 1 WriteSpeedInBytesPerSecond : 100001}} + )", {NKikimrScheme::StatusResourceExhausted}); + + env.TestWaitNotification(runtime, txId - 1); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), + {NLs::PathExist, + NLs::PathsInsideDomain(1), + NLs::ShardsInsideDomain(5)}); + + + } + + //clear subdomain + { + TestDescribeResult(DescribePath(runtime, "/MyRoot"), + {NLs::PathExist, + NLs::PathsInsideDomain(1), + NLs::ShardsInsideDomain(0)}); + TestForceDropSubDomain(runtime, txId++, "/MyRoot", "USER_0"); + env.TestWaitNotification(runtime, txId - 1); + TestDescribeResult(DescribePath(runtime, "/MyRoot"), + {NLs::PathExist, + NLs::PathsInsideDomain(0), + NLs::ShardsInsideDomain(0)}); + } + } } - + diff --git a/ydb/core/viewer/browse_pq.h b/ydb/core/viewer/browse_pq.h index a065c1392f..3e3b32a21e 100644 --- a/ydb/core/viewer/browse_pq.h +++ b/ydb/core/viewer/browse_pq.h @@ -383,8 +383,8 @@ public: if (pbPathDescription.HasPersQueueGroup()) { const auto& pbPersQueueGroup(pbPathDescription.GetPersQueueGroup()); NKikimrViewer::TMetaCommonInfo& pbCommon = *metaInfo.MutableCommon(); - if (pbPersQueueGroup.PartitionsSize()) { - pbCommon.SetPartitions(pbPersQueueGroup.PartitionsSize()); + if (pbPersQueueGroup.PartitionsSize()) { + pbCommon.SetPartitions(pbPersQueueGroup.PartitionsSize()); } if (pbPersQueueGroup.HasPQTabletConfig()) { const auto& pbPQTabletConfig(pbPersQueueGroup.GetPQTabletConfig()); diff --git a/ydb/core/viewer/content/viewer.js b/ydb/core/viewer/content/viewer.js index dfb069cf39..1108f42278 100644 --- a/ydb/core/viewer/content/viewer.js +++ b/ydb/core/viewer/content/viewer.js @@ -2676,15 +2676,15 @@ function onTopicNodeComplete(result, obj) { var panelSchema = $("#panel-schema").find(".panel-body").get(0); panelSchema.innerHTML = "<table class='proplist'><table>"; var tab = $(panelSchema).find("table").get(0); - for (var i = 0; i < result.LabeledCountersByGroup.length; ++i) { - var labeledCountersByGroup = result.LabeledCountersByGroup[i]; + for (var i = 0; i < result.LabeledCountersByGroup.length; ++i) { + var labeledCountersByGroup = result.LabeledCountersByGroup[i]; var row = tab.insertRow(); - row.insertCell(-1).innerHTML = labeledCountersByGroup.Group; - for (var idx = 0; idx < labeledCountersByGroup.LabeledCounter.length; ++idx) { - var row = tab.insertRow(); - row.insertCell(-1).innerHTML = labeledCountersByGroup.LabeledCounter[idx].Name; - row.insertCell(-1).innerHTML = labeledCountersByGroup.LabeledCounter[idx].Value; - } + row.insertCell(-1).innerHTML = labeledCountersByGroup.Group; + for (var idx = 0; idx < labeledCountersByGroup.LabeledCounter.length; ++idx) { + var row = tab.insertRow(); + row.insertCell(-1).innerHTML = labeledCountersByGroup.LabeledCounter[idx].Name; + row.insertCell(-1).innerHTML = labeledCountersByGroup.LabeledCounter[idx].Value; + } } } diff --git a/ydb/core/viewer/json_labeledcounters.h b/ydb/core/viewer/json_labeledcounters.h index 4c5635a109..6fce18b32b 100644 --- a/ydb/core/viewer/json_labeledcounters.h +++ b/ydb/core/viewer/json_labeledcounters.h @@ -21,9 +21,9 @@ class TJsonLabeledCounters : public TActorBootstrapped<TJsonLabeledCounters> { TJsonSettings JsonSettings; TString Groups; TString GroupNames; - TString Topic; - TString Consumer; - TString DC; + TString Topic; + TString Consumer; + TString DC; TVector<TString> Counters; ui32 Version = 1; ui32 Timeout = 0; @@ -44,29 +44,29 @@ public: JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); Groups = params.Get("group"); - Topic = NPersQueue::ConvertNewTopicName(params.Get("topic")); - if (Topic.empty()) - Topic = "*"; + Topic = NPersQueue::ConvertNewTopicName(params.Get("topic")); + if (Topic.empty()) + Topic = "*"; Consumer = NPersQueue::ConvertNewConsumerName(params.Get("consumer"), ctx); - DC = params.Get("dc"); - if (DC.empty()) - DC = "*"; + DC = params.Get("dc"); + if (DC.empty()) + DC = "*"; GroupNames = params.Get("group_names"); Split(params.Get("counters"), ",", Counters); Version = FromStringWithDefault<ui32>(params.Get("version"), Version); Sort(Counters); - if (Version >= 3) { - TString topic = "rt3." + DC + "--" + Topic; - if (!Consumer.empty()) { - Groups = Consumer + "/*/" + topic; - if (Topic != "*") { - Groups += "," + topic; - } - } else { - Groups = topic; - } - } - CreateClusterLabeledCountersAggregator(ctx.SelfID, TTabletTypes::PERSQUEUE, ctx, Version, Version >= 2 ? Groups : TString()); + if (Version >= 3) { + TString topic = "rt3." + DC + "--" + Topic; + if (!Consumer.empty()) { + Groups = Consumer + "/*/" + topic; + if (Topic != "*") { + Groups += "," + topic; + } + } else { + Groups = topic; + } + } + CreateClusterLabeledCountersAggregator(ctx.SelfID, TTabletTypes::PERSQUEUE, ctx, Version, Version >= 2 ? Groups : TString()); Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); } @@ -104,7 +104,7 @@ public: } } } - } else if (Version >= 2) { + } else if (Version >= 2) { const NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse& source(ev->Get()->Record); TVector<TMaybe<ui32>> counterNamesMapping; counterNamesMapping.reserve(source.CounterNamesSize()); diff --git a/ydb/core/viewer/json_pqconsumerinfo.h b/ydb/core/viewer/json_pqconsumerinfo.h index b75da8aa60..b98dfc2632 100644 --- a/ydb/core/viewer/json_pqconsumerinfo.h +++ b/ydb/core/viewer/json_pqconsumerinfo.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include <library/cpp/actors/core/actor_bootstrapped.h> #include <library/cpp/actors/core/mon.h> #include <ydb/core/base/tablet_pipe.h> @@ -7,55 +7,55 @@ #include <ydb/core/client/server/msgbus_server_persqueue.h> #include <ydb/core/tx/tx_proxy/proxy.h> #include <ydb/library/persqueue/topic_parser/topic_parser.h> -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonPQConsumerInfo : public TActorBootstrapped<TJsonPQConsumerInfo> { - using TBase = TActorBootstrapped<TJsonPQConsumerInfo>; +#include "viewer.h" + +namespace NKikimr { +namespace NViewer { + +using namespace NActors; + +class TJsonPQConsumerInfo : public TActorBootstrapped<TJsonPQConsumerInfo> { + using TBase = TActorBootstrapped<TJsonPQConsumerInfo>; IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - NKikimrClient::TResponse Result; - TJsonSettings JsonSettings; - TString Topic; - TString Client; - TString DC; + NMon::TEvHttpInfo::TPtr Event; + NKikimrClient::TResponse Result; + TJsonSettings JsonSettings; + TString Topic; + TString Client; + TString DC; ui32 Version = 0; ui32 Timeout = 0; ui32 Requests = 0; ui32 Responses = 0; - -public: + +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::VIEWER_HANDLER; - } - + } + TJsonPQConsumerInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) : Viewer(viewer) , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { + {} + + void Bootstrap(const TActorContext& ctx) { const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); - Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); - Topic = params.Get("topic"); - Version = FromStringWithDefault<ui32>(params.Get("version"), 0); - DC = params.Get("dc"); - //TODO: make here list of topics - Client = params.Get("client"); - if (Version >= 3) { - Topic = "rt3." + DC + "--" + NPersQueue::ConvertNewTopicName(Topic); + JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), false); + JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); + Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); + Topic = params.Get("topic"); + Version = FromStringWithDefault<ui32>(params.Get("version"), 0); + DC = params.Get("dc"); + //TODO: make here list of topics + Client = params.Get("client"); + if (Version >= 3) { + Topic = "rt3." + DC + "--" + NPersQueue::ConvertNewTopicName(Topic); Client = NPersQueue::ConvertNewConsumerName(Client, ctx); - } else { - size_t pos = Topic.rfind('/'); - if (pos != TString::npos) { - Topic = Topic.substr(pos + 1); - } + } else { + size_t pos = Topic.rfind('/'); + if (pos != TString::npos) { + Topic = Topic.substr(pos + 1); + } } { NKikimrClient::TPersQueueRequest request; @@ -71,52 +71,52 @@ public: ctx.Register(NMsgBusProxy::CreateActorServerPersQueue(ctx.SelfID, request, NMsgBusProxy::CreatePersQueueMetaCacheV2Id(), nullptr)); ++Requests; } - Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - TBase::Die(ctx); - } - - STFUNC(StateRequestedTopicInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvPersQueue::TEvResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvPersQueue::TEvResponse::TPtr &ev, const TActorContext &ctx) { + Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); + } + + void Die(const TActorContext& ctx) override { + TBase::Die(ctx); + } + + STFUNC(StateRequestedTopicInfo) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPersQueue::TEvResponse, Handle); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + void Handle(TEvPersQueue::TEvResponse::TPtr &ev, const TActorContext &ctx) { Result.MergeFrom(ev->Get()->Record); if (++Responses == Requests) { ReplyAndDie(ctx); } - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; + } + + void ReplyAndDie(const TActorContext &ctx) { + TStringStream json; TProtoToJson::ProtoToJson(json, Result.GetMetaResponse(), JsonSettings); ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON() + json.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { + Die(ctx); + } + + void HandleTimeout(const TActorContext &ctx) { ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema<TJsonPQConsumerInfo> { - static TString GetSchema() { - TStringStream stream; + Die(ctx); + } +}; + +template <> +struct TJsonRequestSchema<TJsonPQConsumerInfo> { + static TString GetSchema() { + TStringStream stream; TProtoToJson::ProtoToJsonSchema<NKikimrClient::TPersQueueMetaResponse>(stream); - return stream.Str(); - } -}; - -template <> -struct TJsonRequestParameters<TJsonPQConsumerInfo> { - static TString GetParameters() { + return stream.Str(); + } +}; + +template <> +struct TJsonRequestParameters<TJsonPQConsumerInfo> { + static TString GetParameters() { return R"___([{"name":"topic","in":"query","description":"topic name","required":true,"type":"string"}, {"name":"dc","in":"query","description":"dc name (required with version >= 3)","required":false,"type":"string", "default":""}, {"name":"version","in":"query","description":"query version","required":false,"type":"integer", "default":"0"}, @@ -124,22 +124,22 @@ struct TJsonRequestParameters<TJsonPQConsumerInfo> { {"name":"enums","in":"query","description":"convert enums to strings","required":false,"type":"boolean","default":false}, {"name":"ui64","in":"query","description":"return ui64 as number","required":false,"type":"boolean","default":false}, {"name":"timeout","in":"query","description":"timeout in ms","required":false,"type":"integer","default":10000}])___"; - } -}; - -template <> -struct TJsonRequestSummary<TJsonPQConsumerInfo> { - static TString GetSummary() { + } +}; + +template <> +struct TJsonRequestSummary<TJsonPQConsumerInfo> { + static TString GetSummary() { return "\"Consumer-topic metrics\""; - } -}; - -template <> -struct TJsonRequestDescription<TJsonPQConsumerInfo> { - static TString GetDescription() { + } +}; + +template <> +struct TJsonRequestDescription<TJsonPQConsumerInfo> { + static TString GetDescription() { return "\"Returns consumer-topic metrics\""; - } -}; - -} -} + } +}; + +} +} diff --git a/ydb/core/viewer/json_topicinfo.h b/ydb/core/viewer/json_topicinfo.h index 6065d33cbb..cbf42ee727 100644 --- a/ydb/core/viewer/json_topicinfo.h +++ b/ydb/core/viewer/json_topicinfo.h @@ -1,108 +1,108 @@ -#pragma once +#pragma once #include <library/cpp/actors/core/actor_bootstrapped.h> #include <library/cpp/actors/core/mon.h> #include <ydb/core/base/tablet_pipe.h> #include <ydb/core/protos/services.pb.h> #include <ydb/core/tablet/tablet_counters_aggregator.h> #include <ydb/core/tx/tx_proxy/proxy.h> -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTopicInfo : public TActorBootstrapped<TJsonTopicInfo> { - using TBase = TActorBootstrapped<TJsonTopicInfo>; +#include "viewer.h" + +namespace NKikimr { +namespace NViewer { + +using namespace NActors; + +class TJsonTopicInfo : public TActorBootstrapped<TJsonTopicInfo> { + using TBase = TActorBootstrapped<TJsonTopicInfo>; IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse TopicInfoResult; - TJsonSettings JsonSettings; + NMon::TEvHttpInfo::TPtr Event; + NKikimrTabletCountersAggregator::TEvTabletLabeledCountersResponse TopicInfoResult; + TJsonSettings JsonSettings; TString Topic; TString Client; TString GroupNames; bool ShowAll = false; ui32 Timeout = 0; - -public: + +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::VIEWER_HANDLER; - } - + } + TJsonTopicInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) : Viewer(viewer) , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { + {} + + void Bootstrap(const TActorContext& ctx) { const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); - Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); - Topic = params.Get("path"); - Client = params.Has("client") ? params.Get("client") : "total"; + JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), false); + JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); + Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); + Topic = params.Get("path"); + Client = params.Has("client") ? params.Get("client") : "total"; GroupNames = params.Get("group_names"); - ShowAll = FromStringWithDefault<bool>(params.Get("all"), false); - size_t pos = Topic.rfind('/'); + ShowAll = FromStringWithDefault<bool>(params.Get("all"), false); + size_t pos = Topic.rfind('/'); if (pos != TString::npos) - Topic = Topic.substr(pos + 1); - //proxy is not used - CreateClusterLabeledCountersAggregator(ctx.SelfID, TTabletTypes::PERSQUEUE, ctx); - - Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - TBase::Die(ctx); - } - - STFUNC(StateRequestedTopicInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { + Topic = Topic.substr(pos + 1); + //proxy is not used + CreateClusterLabeledCountersAggregator(ctx.SelfID, TTabletTypes::PERSQUEUE, ctx); + + Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); + } + + void Die(const TActorContext& ctx) override { + TBase::Die(ctx); + } + + STFUNC(StateRequestedTopicInfo) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, Handle); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + void Handle(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { TString groupPrefix = Client + "/"; TString groupSuffix = "/" + Topic; - for (ui32 i = 0; i < ev->Get()->Record.LabeledCountersByGroupSize(); ++i) { - const auto& uc = ev->Get()->Record.GetLabeledCountersByGroup(i); + for (ui32 i = 0; i < ev->Get()->Record.LabeledCountersByGroupSize(); ++i) { + const auto& uc = ev->Get()->Record.GetLabeledCountersByGroup(i); const TString& group(uc.GetGroup()); if (ShowAll || (group.StartsWith(groupPrefix) && group.EndsWith(groupSuffix)) || uc.GetGroup() == Topic || uc.GetGroupNames() == GroupNames) { - TopicInfoResult.AddLabeledCountersByGroup()->CopyFrom(uc); - } - } - ReplyAndDie(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; + TopicInfoResult.AddLabeledCountersByGroup()->CopyFrom(uc); + } + } + ReplyAndDie(ctx); + } + + void ReplyAndDie(const TActorContext &ctx) { + TStringStream json; TProtoToJson::ProtoToJson(json, TopicInfoResult, JsonSettings); ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON() + json.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { + Die(ctx); + } + + void HandleTimeout(const TActorContext &ctx) { ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema<TJsonTopicInfo> { + Die(ctx); + } +}; + +template <> +struct TJsonRequestSchema<TJsonTopicInfo> { static TString GetSchema() { - TStringStream stream; + TStringStream stream; TProtoToJson::ProtoToJsonSchema<TEvTabletCounters::TEvTabletLabeledCountersResponse::ProtoRecordType>(stream); - return stream.Str(); - } -}; - -template <> -struct TJsonRequestParameters<TJsonTopicInfo> { + return stream.Str(); + } +}; + +template <> +struct TJsonRequestParameters<TJsonTopicInfo> { static TString GetParameters() { return R"___([{"name":"path","in":"query","description":"schema path","required":true,"type":"string"}, {"name":"client","in":"query","description":"client name","required":false,"type":"string","default":"total"}, @@ -110,22 +110,22 @@ struct TJsonRequestParameters<TJsonTopicInfo> { {"name":"all","in":"query","description":"return all topics and all clients","required":false,"type":"boolean","default":false}, {"name":"ui64","in":"query","description":"return ui64 as number","required":false,"type":"boolean"}, {"name":"timeout","in":"query","description":"timeout in ms","required":false,"type":"integer",default:10000}])___"; - } -}; - -template <> -struct TJsonRequestSummary<TJsonTopicInfo> { + } +}; + +template <> +struct TJsonRequestSummary<TJsonTopicInfo> { static TString GetSummary() { return "\"PQ topic information\""; - } -}; - -template <> -struct TJsonRequestDescription<TJsonTopicInfo> { + } +}; + +template <> +struct TJsonRequestDescription<TJsonTopicInfo> { static TString GetDescription() { return "\"Information about PQ topic\""; - } -}; - -} -} + } +}; + +} +} diff --git a/ydb/core/viewer/viewer.cpp b/ydb/core/viewer/viewer.cpp index c564d14c70..f1124115bf 100644 --- a/ydb/core/viewer/viewer.cpp +++ b/ydb/core/viewer/viewer.cpp @@ -29,8 +29,8 @@ #include "json_bscontrollerinfo.h" #include "json_config.h" #include "json_counters.h" -#include "json_topicinfo.h" -#include "json_pqconsumerinfo.h" +#include "json_topicinfo.h" +#include "json_pqconsumerinfo.h" #include "json_tabletcounters.h" #include "json_storage.h" #include "json_metainfo.h" @@ -188,8 +188,8 @@ public: JsonHandlers["/json/bscontrollerinfo"] = new TJsonHandler<TJsonBSControllerInfo>; JsonHandlers["/json/config"] = new TJsonHandler<TJsonConfig>; JsonHandlers["/json/counters"] = new TJsonHandler<TJsonCounters>; - JsonHandlers["/json/topicinfo"] = new TJsonHandler<TJsonTopicInfo>; - JsonHandlers["/json/pqconsumerinfo"] = new TJsonHandler<TJsonPQConsumerInfo>; + JsonHandlers["/json/topicinfo"] = new TJsonHandler<TJsonTopicInfo>; + JsonHandlers["/json/pqconsumerinfo"] = new TJsonHandler<TJsonPQConsumerInfo>; JsonHandlers["/json/tabletcounters"] = new TJsonHandler<TJsonTabletCounters>; JsonHandlers["/json/storage"] = new TJsonHandler<TJsonStorage>; JsonHandlers["/json/metainfo"] = new TJsonHandler<TJsonMetaInfo>; diff --git a/ydb/core/viewer/ya.make b/ydb/core/viewer/ya.make index 176f7697bc..214e3b6726 100644 --- a/ydb/core/viewer/ya.make +++ b/ydb/core/viewer/ya.make @@ -40,8 +40,8 @@ SRCS( json_tabletinfo.h json_tenants.h json_tenantinfo.h - json_topicinfo.h - json_pqconsumerinfo.h + json_topicinfo.h + json_pqconsumerinfo.h json_vdiskinfo.h json_wb_req.h json_whoami.h diff --git a/ydb/core/ymq/actor/events.h b/ydb/core/ymq/actor/events.h index 3f7c20a26f..9fff62c7cf 100644 --- a/ydb/core/ymq/actor/events.h +++ b/ydb/core/ymq/actor/events.h @@ -203,8 +203,8 @@ struct TSqsEvents { }; struct TEvClearQueueAttributesCache : public NActors::TEventLocal<TEvClearQueueAttributesCache, EvClearQueueAttributesCache> { - }; - + }; + struct TEvExecute : public NActors::TEventLocal<TEvExecute, EvExecute> { /// Query sender TActorId Sender; diff --git a/ydb/core/ymq/actor/index_events_processor.cpp b/ydb/core/ymq/actor/index_events_processor.cpp index 07c696dd1e..646e06420e 100644 --- a/ydb/core/ymq/actor/index_events_processor.cpp +++ b/ydb/core/ymq/actor/index_events_processor.cpp @@ -490,4 +490,4 @@ void IEventsWriterWrapper::Close() { } } -}; // namespace NKikimr::NSQS +}; // namespace NKikimr::NSQS diff --git a/ydb/core/ymq/actor/index_events_processor.h b/ydb/core/ymq/actor/index_events_processor.h index eaa3ef7d61..a8ae462064 100644 --- a/ydb/core/ymq/actor/index_events_processor.h +++ b/ydb/core/ymq/actor/index_events_processor.h @@ -11,7 +11,7 @@ -namespace NKikimr::NSQS { +namespace NKikimr::NSQS { @@ -145,4 +145,4 @@ private: }; -} // namespace NKikimr::NSQS +} // namespace NKikimr::NSQS diff --git a/ydb/core/ymq/actor/purge.cpp b/ydb/core/ymq/actor/purge.cpp index 4e9aca59d0..aff713d784 100644 --- a/ydb/core/ymq/actor/purge.cpp +++ b/ydb/core/ymq/actor/purge.cpp @@ -57,7 +57,7 @@ void TPurgeActor::MakeGetRetentionOffsetRequest(const ui64 shardId, TShard* shar shard->TargetBoundary = shard->BoundaryPurged; } }; - + TExecutorBuilder(SelfId(), RequestId_) .User(QueuePath_.UserName) .Queue(QueuePath_.QueueName) diff --git a/ydb/core/ymq/actor/service.cpp b/ydb/core/ymq/actor/service.cpp index b2b8aa8cf8..34c9a4ae46 100644 --- a/ydb/core/ymq/actor/service.cpp +++ b/ydb/core/ymq/actor/service.cpp @@ -330,9 +330,9 @@ void TSqsService::Bootstrap() { } auto factory = AppData()->SqsAuthFactory; - Y_VERIFY(factory); + Y_VERIFY(factory); - driverConfig.SetCredentialsProviderFactory(factory->CreateCredentialsProviderFactory(Cfg())); + driverConfig.SetCredentialsProviderFactory(factory->CreateCredentialsProviderFactory(Cfg())); YcSearchEventsConfig.Driver = MakeHolder<NYdb::TDriver>(driverConfig); MakeAndRegisterYcEventsProcessor(); @@ -1299,11 +1299,11 @@ void TSqsService::MakeAndRegisterYcEventsProcessor() { auto factory = AppData()->SqsEventsWriterFactory; Y_VERIFY(factory); - Register(new TSearchEventsProcessor( - root, YcSearchEventsConfig.ReindexInterval, YcSearchEventsConfig.RescanInterval, - MakeSimpleShared<NYdb::NTable::TTableClient>(*YcSearchEventsConfig.Driver), + Register(new TSearchEventsProcessor( + root, YcSearchEventsConfig.ReindexInterval, YcSearchEventsConfig.RescanInterval, + MakeSimpleShared<NYdb::NTable::TTableClient>(*YcSearchEventsConfig.Driver), factory->CreateEventsWriter(Cfg(), GetSqsServiceCounters(AppData()->Counters, "yc_unified_agent")) - )); + )); } // //IActor* CreateSqsService(const TYcSearchEventsConfig& ycSearchEventsConfig) { diff --git a/ydb/core/ymq/base/events_writer_iface.h b/ydb/core/ymq/base/events_writer_iface.h index c215d57161..4fcd6ab50a 100644 --- a/ydb/core/ymq/base/events_writer_iface.h +++ b/ydb/core/ymq/base/events_writer_iface.h @@ -1,34 +1,34 @@ -#pragma once - +#pragma once + #include <ydb/library/aclib/aclib.h> - + #include <ydb/core/protos/config.pb.h> -#include <library/cpp/monlib/dynamic_counters/counters.h> - -namespace NKikimr::NSQS { - -class IEventsWriterWrapper : public TAtomicRefCount<IEventsWriterWrapper> { -public: - virtual void Write(const TString& data) = 0; - virtual ~IEventsWriterWrapper() { - Close(); - }; - - void Close(); - - using TPtr = TIntrusivePtr<IEventsWriterWrapper>; -protected: - virtual void CloseImpl() = 0; -private: - bool Closed = false; -}; - - +#include <library/cpp/monlib/dynamic_counters/counters.h> + +namespace NKikimr::NSQS { + +class IEventsWriterWrapper : public TAtomicRefCount<IEventsWriterWrapper> { +public: + virtual void Write(const TString& data) = 0; + virtual ~IEventsWriterWrapper() { + Close(); + }; + + void Close(); + + using TPtr = TIntrusivePtr<IEventsWriterWrapper>; +protected: + virtual void CloseImpl() = 0; +private: + bool Closed = false; +}; + + class IEventsWriterFactory { -public: - virtual IEventsWriterWrapper::TPtr CreateEventsWriter(const NKikimrConfig::TSqsConfig& config, const NMonitoring::TDynamicCounterPtr& counters) const = 0; +public: + virtual IEventsWriterWrapper::TPtr CreateEventsWriter(const NKikimrConfig::TSqsConfig& config, const NMonitoring::TDynamicCounterPtr& counters) const = 0; virtual ~IEventsWriterFactory() - {} -}; - -} // namespace NKikimr::NSQS + {} +}; + +} // namespace NKikimr::NSQS diff --git a/ydb/core/ymq/base/ya.make b/ydb/core/ymq/base/ya.make index 19b622affb..14cd4e72a1 100644 --- a/ydb/core/ymq/base/ya.make +++ b/ydb/core/ymq/base/ya.make @@ -16,7 +16,7 @@ SRCS( queue_attributes.cpp queue_id.cpp secure_protobuf_printer.cpp - events_writer_iface.h + events_writer_iface.h ) GENERATE_ENUM_SERIALIZATION(query_id.h) diff --git a/ydb/core/ymq/http/http.cpp b/ydb/core/ymq/http/http.cpp index 9cd503d094..5d0963bfb2 100644 --- a/ydb/core/ymq/http/http.cpp +++ b/ydb/core/ymq/http/http.cpp @@ -153,7 +153,7 @@ void THttpRequest::WriteResponse(const TReplyParams& replyParams, const TSqsHttp httpResponse.SetContent(response.Body, response.ContentType); } - if (Parent_->Config.GetYandexCloudMode() && !IsPrivateRequest_) { + if (Parent_->Config.GetYandexCloudMode() && !IsPrivateRequest_) { // Send request attributes to the metering actor auto reportRequestAttributes = MakeHolder<TSqsEvents::TEvReportProcessedRequestAttributes>(); @@ -571,7 +571,7 @@ bool THttpRequest::SetupRequest() { AppData(Parent_->ActorSystem_)->SqsAuthFactory->RegisterAuthActor( *Parent_->ActorSystem_, std::move(data)); - + return true; } diff --git a/ydb/library/aclib/aclib.h b/ydb/library/aclib/aclib.h index e83dad85b4..655ca4b669 100644 --- a/ydb/library/aclib/aclib.h +++ b/ydb/library/aclib/aclib.h @@ -28,10 +28,10 @@ enum EAccessRights : ui32 { // bitmask GrantAccessRights = 0x00002000, // grant access rights (only own access rights) WriteUserAttributes = 0x00004000, // modify user attributes / KV ConnectDatabase = 0x00008000, // any type of request to DB - ReadStream = 0x00010000, // reading streams - WriteStream = 0x00020000, // writing streams - ReadTopic = 0x00040000, // reading topics - WritTopic = 0x00080000, // writing topics + ReadStream = 0x00010000, // reading streams + WriteStream = 0x00020000, // writing streams + ReadTopic = 0x00040000, // reading topics + WritTopic = 0x00080000, // writing topics GenericRead = SelectRow | ReadAttributes | DescribeSchema, GenericWrite = UpdateRow | EraseRow | WriteAttributes | CreateDirectory | CreateTable | CreateQueue | RemoveSchema | AlterSchema | WriteUserAttributes, diff --git a/ydb/library/http_proxy/authorization/ut/auth_helpers_ut.cpp b/ydb/library/http_proxy/authorization/ut/auth_helpers_ut.cpp index 5c4b9b030c..0fb753120a 100644 --- a/ydb/library/http_proxy/authorization/ut/auth_helpers_ut.cpp +++ b/ydb/library/http_proxy/authorization/ut/auth_helpers_ut.cpp @@ -1,4 +1,4 @@ -#include <ydb/library/http_proxy/authorization/auth_helpers.h> +#include <ydb/library/http_proxy/authorization/auth_helpers.h> #include <ydb/library/http_proxy/error/error.h> diff --git a/ydb/library/http_proxy/authorization/ut/signature_ut.cpp b/ydb/library/http_proxy/authorization/ut/signature_ut.cpp index 9e7798a0f4..062ac52146 100644 --- a/ydb/library/http_proxy/authorization/ut/signature_ut.cpp +++ b/ydb/library/http_proxy/authorization/ut/signature_ut.cpp @@ -1,4 +1,4 @@ -#include <ydb/library/http_proxy/authorization/signature.h> +#include <ydb/library/http_proxy/authorization/signature.h> #include <library/cpp/testing/unittest/registar.h> #include <ydb/library/http_proxy/error/error.h> diff --git a/ydb/library/http_proxy/authorization/ut/ya.make b/ydb/library/http_proxy/authorization/ut/ya.make index e44b49c881..4a127f6e8b 100644 --- a/ydb/library/http_proxy/authorization/ut/ya.make +++ b/ydb/library/http_proxy/authorization/ut/ya.make @@ -3,15 +3,15 @@ OWNER( g:sqs ) -UNITTEST_FOR(ydb/library/http_proxy/authorization) +UNITTEST_FOR(ydb/library/http_proxy/authorization) PEERDIR( ydb/library/http_proxy/error ) SRCS( - auth_helpers_ut.cpp - signature_ut.cpp + auth_helpers_ut.cpp + signature_ut.cpp ) END() diff --git a/ydb/library/persqueue/topic_parser/topic_parser.cpp b/ydb/library/persqueue/topic_parser/topic_parser.cpp index 66ae79c158..a9598b25b2 100644 --- a/ydb/library/persqueue/topic_parser/topic_parser.cpp +++ b/ydb/library/persqueue/topic_parser/topic_parser.cpp @@ -1,19 +1,19 @@ -#include "topic_parser.h" - +#include "topic_parser.h" + #include <ydb/core/base/appdata.h> #include <util/folder/path.h> -namespace NPersQueue { - +namespace NPersQueue { + namespace { TString FullPath(const TMaybe<TString> &database, const TString &path) { - if (database.Defined() && !path.StartsWith(*database) && !path.Contains('\0')) { - try { - return (TFsPath(*database) / path).GetPath(); - } catch(...) { - return path; - } + if (database.Defined() && !path.StartsWith(*database) && !path.Contains('\0')) { + try { + return (TFsPath(*database) / path).GetPath(); + } catch(...) { + return path; + } } else { return path; } @@ -67,7 +67,7 @@ TString NormalizeFullPath(const TString& fullPath) { return fullPath; } } - + TTopicsListController::TTopicsListController( const std::shared_ptr<TTopicNamesConverterFactory>& converterFactory, bool haveClusters, const TVector<TString>& clusters, const TString& localCluster @@ -118,5 +118,5 @@ TConverterFactoryPtr TTopicsListController::GetConverterFactory() const { return ConverterFactory; }; -} // namespace NPersQueue +} // namespace NPersQueue diff --git a/ydb/library/persqueue/topic_parser/topic_parser.h b/ydb/library/persqueue/topic_parser/topic_parser.h index 18c1f1f8dc..eb5a69cc37 100644 --- a/ydb/library/persqueue/topic_parser/topic_parser.h +++ b/ydb/library/persqueue/topic_parser/topic_parser.h @@ -1,21 +1,21 @@ -#pragma once - +#pragma once + #include <library/cpp/actors/core/actor.h> -#include <util/generic/string.h> +#include <util/generic/string.h> #include <util/generic/hash.h> #include <util/string/builder.h> #include <ydb/core/base/path.h> #include <ydb/library/persqueue/topic_parser_public/topic_parser.h> -namespace NPersQueue { - +namespace NPersQueue { + TString GetFullTopicPath(const NActors::TActorContext& ctx, TMaybe<TString> database, const TString& topicPath); TString ConvertNewConsumerName(const TString& consumer, const NActors::TActorContext& ctx); TString ConvertOldConsumerName(const TString& consumer, const NActors::TActorContext& ctx); TString MakeConsumerPath(const TString& consumer); - - + + #define CHECK_SET_VALID(cond, reason, statement) \ if (!(cond)) { \ Valid = false; \ @@ -350,5 +350,5 @@ private: TString NormalizeFullPath(const TString& fullPath); -} // namespace NPersQueue +} // namespace NPersQueue diff --git a/ydb/library/persqueue/topic_parser_public/topic_parser.cpp b/ydb/library/persqueue/topic_parser_public/topic_parser.cpp index 43419a952d..a6ad5809a2 100644 --- a/ydb/library/persqueue/topic_parser_public/topic_parser.cpp +++ b/ydb/library/persqueue/topic_parser_public/topic_parser.cpp @@ -1,152 +1,152 @@ -#include "topic_parser.h" - -#include <util/folder/path.h> - -namespace NPersQueue { - -bool CorrectName(const TString& topic) { - if (!topic.StartsWith("rt3.")) - return false; - auto pos = topic.find("--"); - if (pos == TString::npos || pos == 4) //dc is empty - return false; - pos += 2; //skip "--" - if (pos == topic.size()) // no real topic - return false; - auto pos2 = topic.find("--", pos); - if (pos2 == TString::npos) - return true; - if (pos2 == pos || pos2 + 2 == topic.size()) //producer or topic is empty - return false; - return true; -} - -TString GetDC(const TString& topic) { - if (!CorrectName(topic)) - return "unknown"; - auto pos = topic.find("--"); - Y_VERIFY(pos != TString::npos); - Y_VERIFY(pos > 4); //length of "rt3." - auto res = topic.substr(4, pos - 4); - return res; -} - -TString GetRealTopic(const TString& topic) { - if (!CorrectName(topic)) - return topic; - auto pos = topic.find("--"); - Y_VERIFY(pos != TString::npos); - Y_VERIFY(topic.size() > pos + 2); - return topic.substr(pos + 2); -} - -TString GetTopicPath(const TString& topic) { - return ConvertOldTopicName(GetRealTopic(topic)); -} - -TString GetAccount(const TString& topic) { - auto res = GetTopicPath(topic); - return res.substr(0, res.find("/")); -} - -TString GetProducer(const TString& topic) { - if (!CorrectName(topic)) - return "unknown"; - auto res = GetRealTopic(topic); - return res.substr(0, res.find("--")); -} - -TString ConvertNewTopicName(const TString& topic) { - TString t = NormalizePath(topic); - auto pos = t.rfind("/"); - if (pos == TString::npos) - return t; - TStringBuilder res; - for (ui32 i = 0; i < pos; ++i) { - if (t[i] == '/') res << '@'; - else res << t[i]; - } - res << "--"; - res << t.substr(pos + 1); - return res; -} - - -TString ConvertOldTopicName(const TString& topic) { - auto pos = topic.rfind("--"); - if (pos == TString::npos) - return topic; - TStringBuilder res; - for (ui32 i = 0; i < pos; ++i) { - if (topic[i] == '@') res << '/'; - else res << topic[i]; - } - res << "/"; - res << topic.substr(pos + 2); - return res; -} - -TString BuildFullTopicName(const TString& topicPath, const TString& topicDC) { - return "rt3." + topicDC + "--" + ConvertNewTopicName(topicPath); -} - -TString ConvertOldProducerName(const TString& producer) { - TStringBuilder res; - for (ui32 i = 0; i < producer.size(); ++i) { - if (producer[i] == '@') res << "/"; - else res << producer[i]; - } - return res; -} - - -TString NormalizePath(const TString& path) { - size_t st = 0; - size_t end = path.size(); - if (path.StartsWith("/")) st = 1; - if (path.EndsWith("/") && end > st) end--; - return path.substr(st, end - st); -} - - -TString ConvertNewConsumerName(const TString& consumer) { - TStringBuilder res; - ui32 pos = 0; - TString c = NormalizePath(consumer); - if (c.StartsWith("shared/")) - pos = 7; - for (ui32 i = pos; i < c.size(); ++i) { - if (c[i] == '/') res << "@"; - else res << c[i]; - } - return res; -} - -TString ConvertNewProducerName(const TString& producer) { - TStringBuilder res; - for (ui32 i = 0; i < producer.size(); ++i) { - if (producer[i] == '/') res << "@"; - else res << producer[i]; - } - return res; -} - - -TString ConvertOldConsumerName(const TString& consumer) { - TStringBuilder res; - bool shared = true; - for (ui32 i = 0; i < consumer.size(); ++i) { - if (consumer[i] == '@') { - res << "/"; - shared = false; - } else { - res << consumer[i]; - } - } - if (shared) - return TStringBuilder() << "shared/" << res; - return res; -} - - -} // namespace NPersQueue +#include "topic_parser.h" + +#include <util/folder/path.h> + +namespace NPersQueue { + +bool CorrectName(const TString& topic) { + if (!topic.StartsWith("rt3.")) + return false; + auto pos = topic.find("--"); + if (pos == TString::npos || pos == 4) //dc is empty + return false; + pos += 2; //skip "--" + if (pos == topic.size()) // no real topic + return false; + auto pos2 = topic.find("--", pos); + if (pos2 == TString::npos) + return true; + if (pos2 == pos || pos2 + 2 == topic.size()) //producer or topic is empty + return false; + return true; +} + +TString GetDC(const TString& topic) { + if (!CorrectName(topic)) + return "unknown"; + auto pos = topic.find("--"); + Y_VERIFY(pos != TString::npos); + Y_VERIFY(pos > 4); //length of "rt3." + auto res = topic.substr(4, pos - 4); + return res; +} + +TString GetRealTopic(const TString& topic) { + if (!CorrectName(topic)) + return topic; + auto pos = topic.find("--"); + Y_VERIFY(pos != TString::npos); + Y_VERIFY(topic.size() > pos + 2); + return topic.substr(pos + 2); +} + +TString GetTopicPath(const TString& topic) { + return ConvertOldTopicName(GetRealTopic(topic)); +} + +TString GetAccount(const TString& topic) { + auto res = GetTopicPath(topic); + return res.substr(0, res.find("/")); +} + +TString GetProducer(const TString& topic) { + if (!CorrectName(topic)) + return "unknown"; + auto res = GetRealTopic(topic); + return res.substr(0, res.find("--")); +} + +TString ConvertNewTopicName(const TString& topic) { + TString t = NormalizePath(topic); + auto pos = t.rfind("/"); + if (pos == TString::npos) + return t; + TStringBuilder res; + for (ui32 i = 0; i < pos; ++i) { + if (t[i] == '/') res << '@'; + else res << t[i]; + } + res << "--"; + res << t.substr(pos + 1); + return res; +} + + +TString ConvertOldTopicName(const TString& topic) { + auto pos = topic.rfind("--"); + if (pos == TString::npos) + return topic; + TStringBuilder res; + for (ui32 i = 0; i < pos; ++i) { + if (topic[i] == '@') res << '/'; + else res << topic[i]; + } + res << "/"; + res << topic.substr(pos + 2); + return res; +} + +TString BuildFullTopicName(const TString& topicPath, const TString& topicDC) { + return "rt3." + topicDC + "--" + ConvertNewTopicName(topicPath); +} + +TString ConvertOldProducerName(const TString& producer) { + TStringBuilder res; + for (ui32 i = 0; i < producer.size(); ++i) { + if (producer[i] == '@') res << "/"; + else res << producer[i]; + } + return res; +} + + +TString NormalizePath(const TString& path) { + size_t st = 0; + size_t end = path.size(); + if (path.StartsWith("/")) st = 1; + if (path.EndsWith("/") && end > st) end--; + return path.substr(st, end - st); +} + + +TString ConvertNewConsumerName(const TString& consumer) { + TStringBuilder res; + ui32 pos = 0; + TString c = NormalizePath(consumer); + if (c.StartsWith("shared/")) + pos = 7; + for (ui32 i = pos; i < c.size(); ++i) { + if (c[i] == '/') res << "@"; + else res << c[i]; + } + return res; +} + +TString ConvertNewProducerName(const TString& producer) { + TStringBuilder res; + for (ui32 i = 0; i < producer.size(); ++i) { + if (producer[i] == '/') res << "@"; + else res << producer[i]; + } + return res; +} + + +TString ConvertOldConsumerName(const TString& consumer) { + TStringBuilder res; + bool shared = true; + for (ui32 i = 0; i < consumer.size(); ++i) { + if (consumer[i] == '@') { + res << "/"; + shared = false; + } else { + res << consumer[i]; + } + } + if (shared) + return TStringBuilder() << "shared/" << res; + return res; +} + + +} // namespace NPersQueue diff --git a/ydb/library/persqueue/topic_parser_public/topic_parser.h b/ydb/library/persqueue/topic_parser_public/topic_parser.h index 3c27a4bfcd..2a5a438722 100644 --- a/ydb/library/persqueue/topic_parser_public/topic_parser.h +++ b/ydb/library/persqueue/topic_parser_public/topic_parser.h @@ -1,33 +1,33 @@ -#pragma once - -#include <util/generic/string.h> -#include <util/string/builder.h> - -namespace NPersQueue { - -TString GetDC(const TString& topic); - -TString GetRealTopic(const TString& topic); - -TString BuildFullTopicName(const TString& topicPath, const TString& topicDC); - -TString GetProducer(const TString& topic); -TString GetAccount(const TString& topic); -TString GetTopicPath(const TString& topic); - -TString NormalizePath(const TString& path); - -bool CorrectName(const TString& topic); - -TString ConvertNewTopicName(const TString& topic); - -TString ConvertNewConsumerName(const TString& consumer); -TString ConvertNewProducerName(const TString& consumer); - - -TString ConvertOldTopicName(const TString& topic); -TString ConvertOldProducerName(const TString& producer); -TString ConvertOldConsumerName(const TString& consumer); - - -} // namespace NPersQueue +#pragma once + +#include <util/generic/string.h> +#include <util/string/builder.h> + +namespace NPersQueue { + +TString GetDC(const TString& topic); + +TString GetRealTopic(const TString& topic); + +TString BuildFullTopicName(const TString& topicPath, const TString& topicDC); + +TString GetProducer(const TString& topic); +TString GetAccount(const TString& topic); +TString GetTopicPath(const TString& topic); + +TString NormalizePath(const TString& path); + +bool CorrectName(const TString& topic); + +TString ConvertNewTopicName(const TString& topic); + +TString ConvertNewConsumerName(const TString& consumer); +TString ConvertNewProducerName(const TString& consumer); + + +TString ConvertOldTopicName(const TString& topic); +TString ConvertOldProducerName(const TString& producer); +TString ConvertOldConsumerName(const TString& consumer); + + +} // namespace NPersQueue diff --git a/ydb/library/persqueue/topic_parser_public/ya.make b/ydb/library/persqueue/topic_parser_public/ya.make index b0bd7de2a4..14b7a3d77e 100644 --- a/ydb/library/persqueue/topic_parser_public/ya.make +++ b/ydb/library/persqueue/topic_parser_public/ya.make @@ -1,14 +1,14 @@ -OWNER( - galaxycrab - g:kikimr - g:logbroker -) - -LIBRARY() - -SRCS( - topic_parser.h - topic_parser.cpp -) +OWNER( + galaxycrab + g:kikimr + g:logbroker +) -END() +LIBRARY() + +SRCS( + topic_parser.h + topic_parser.cpp +) + +END() diff --git a/ydb/library/yql/minikql/mkql_program_builder.cpp b/ydb/library/yql/minikql/mkql_program_builder.cpp index 1f9f4c6618..47ae6e4c8f 100644 --- a/ydb/library/yql/minikql/mkql_program_builder.cpp +++ b/ydb/library/yql/minikql/mkql_program_builder.cpp @@ -2301,11 +2301,11 @@ TRuntimeNode TProgramBuilder::BitNot(TRuntimeNode data) { return Invoke(__func__, data.GetStaticType(), args); } -TRuntimeNode TProgramBuilder::CountBits(TRuntimeNode data) { +TRuntimeNode TProgramBuilder::CountBits(TRuntimeNode data) { const std::array<TRuntimeNode, 1> args = {{ data }}; return Invoke(__func__, data.GetStaticType(), args); -} - +} + TRuntimeNode TProgramBuilder::BitAnd(TRuntimeNode data1, TRuntimeNode data2) { const std::array<TRuntimeNode, 2> args = {{ data1, data2 }}; return Invoke(__func__, BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType()), args); diff --git a/ydb/library/yql/minikql/mkql_program_builder.h b/ydb/library/yql/minikql/mkql_program_builder.h index 523fc54a97..10f1ad7ccf 100644 --- a/ydb/library/yql/minikql/mkql_program_builder.h +++ b/ydb/library/yql/minikql/mkql_program_builder.h @@ -521,7 +521,7 @@ public: //-- bit logical functions TRuntimeNode BitNot(TRuntimeNode data); - TRuntimeNode CountBits(TRuntimeNode data); + TRuntimeNode CountBits(TRuntimeNode data); TRuntimeNode BitAnd(TRuntimeNode data1, TRuntimeNode data2); TRuntimeNode BitOr(TRuntimeNode data1, TRuntimeNode data2); TRuntimeNode BitXor(TRuntimeNode data1, TRuntimeNode data2); diff --git a/ydb/public/api/grpc/draft/ya.make b/ydb/public/api/grpc/draft/ya.make index 1220a37e67..f63be96521 100644 --- a/ydb/public/api/grpc/draft/ya.make +++ b/ydb/public/api/grpc/draft/ya.make @@ -14,7 +14,7 @@ OWNER( SRCS( dummy.proto ydb_clickhouse_internal_v1.proto - ydb_persqueue_v1.proto + ydb_persqueue_v1.proto ydb_datastreams_v1.proto ydb_experimental_v1.proto ydb_s3_internal_v1.proto diff --git a/ydb/public/api/grpc/draft/ydb_persqueue_v1.proto b/ydb/public/api/grpc/draft/ydb_persqueue_v1.proto index 9c78b1281b..873ee38b3c 100644 --- a/ydb/public/api/grpc/draft/ydb_persqueue_v1.proto +++ b/ydb/public/api/grpc/draft/ydb_persqueue_v1.proto @@ -1,98 +1,98 @@ -syntax = "proto3"; +syntax = "proto3"; option cc_enable_arenas = true; - -package Ydb.PersQueue.V1; - + +package Ydb.PersQueue.V1; + option java_package = "com.yandex.ydb.persqueue.v1"; - + import "ydb/public/api/protos/ydb_persqueue_cluster_discovery.proto"; import "ydb/public/api/protos/ydb_persqueue_v1.proto"; - -service PersQueueService { - - /** - * Creates Write Session - * Pipeline: - * client server - * Init(Topic, SourceId, ...) - * ----------------> - * Init(Partition, MaxSeqNo, ...) - * <---------------- - * write(data1, seqNo1) - * ----------------> - * write(data2, seqNo2) - * ----------------> - * ack(seqNo1, offset1, ...) - * <---------------- - * write(data3, seqNo3) - * ----------------> - * ack(seqNo2, offset2, ...) - * <---------------- - * issue(description, ...) - * <---------------- - */ - + +service PersQueueService { + + /** + * Creates Write Session + * Pipeline: + * client server + * Init(Topic, SourceId, ...) + * ----------------> + * Init(Partition, MaxSeqNo, ...) + * <---------------- + * write(data1, seqNo1) + * ----------------> + * write(data2, seqNo2) + * ----------------> + * ack(seqNo1, offset1, ...) + * <---------------- + * write(data3, seqNo3) + * ----------------> + * ack(seqNo2, offset2, ...) + * <---------------- + * issue(description, ...) + * <---------------- + */ + rpc StreamingWrite(stream StreamingWriteClientMessage) returns (stream StreamingWriteServerMessage); - - /** - * Creates Read Session - * Pipeline: - * client server - * Init(Topics, ClientId, ...) - * ----------------> - * Init(SessionId) - * <---------------- - * read1 - * ----------------> - * read2 - * ----------------> - * assign(Topic1, Cluster, Partition1, ...) - assigns and releases are optional - * <---------------- - * assign(Topic2, Clutster, Partition2, ...) - * <---------------- - * start_read(Topic1, Partition1, ...) - client must respond to assign request with this message. Only after this client will start recieving messages from this partition - * ----------------> - * release(Topic1, Partition1, ...) - * <---------------- - * released(Topic1, Partition1, ...) - only after released server will give this parittion to other session. - * ----------------> - * start_read(Topic2, Partition2, ...) - client must respond to assign request with this message. Only after this client will start recieving messages from this partition - * ----------------> - * read data(data, ...) - * <---------------- - * commit(cookie1) - * ----------------> - * committed(cookie1) - * <---------------- - * issue(description, ...) - * <---------------- - */ + + /** + * Creates Read Session + * Pipeline: + * client server + * Init(Topics, ClientId, ...) + * ----------------> + * Init(SessionId) + * <---------------- + * read1 + * ----------------> + * read2 + * ----------------> + * assign(Topic1, Cluster, Partition1, ...) - assigns and releases are optional + * <---------------- + * assign(Topic2, Clutster, Partition2, ...) + * <---------------- + * start_read(Topic1, Partition1, ...) - client must respond to assign request with this message. Only after this client will start recieving messages from this partition + * ----------------> + * release(Topic1, Partition1, ...) + * <---------------- + * released(Topic1, Partition1, ...) - only after released server will give this parittion to other session. + * ----------------> + * start_read(Topic2, Partition2, ...) - client must respond to assign request with this message. Only after this client will start recieving messages from this partition + * ----------------> + * read data(data, ...) + * <---------------- + * commit(cookie1) + * ----------------> + * committed(cookie1) + * <---------------- + * issue(description, ...) + * <---------------- + */ rpc MigrationStreamingRead(stream MigrationStreamingReadClientMessage) returns (stream MigrationStreamingReadServerMessage); - - // Get information about reading - rpc GetReadSessionsInfo(ReadInfoRequest) returns (ReadInfoResponse); - - - /* - * Describe topic command. - */ - rpc DescribeTopic(DescribeTopicRequest) returns (DescribeTopicResponse); - - - /* - * Drop topic command. - */ - rpc DropTopic(DropTopicRequest) returns (DropTopicResponse); - - /* - * Create topic command. - */ - rpc CreateTopic(CreateTopicRequest) returns (CreateTopicResponse); - - /* - * Alter topic command. - */ - rpc AlterTopic(AlterTopicRequest) returns (AlterTopicResponse); + + // Get information about reading + rpc GetReadSessionsInfo(ReadInfoRequest) returns (ReadInfoResponse); + + + /* + * Describe topic command. + */ + rpc DescribeTopic(DescribeTopicRequest) returns (DescribeTopicResponse); + + + /* + * Drop topic command. + */ + rpc DropTopic(DropTopicRequest) returns (DropTopicResponse); + + /* + * Create topic command. + */ + rpc CreateTopic(CreateTopicRequest) returns (CreateTopicResponse); + + /* + * Alter topic command. + */ + rpc AlterTopic(AlterTopicRequest) returns (AlterTopicResponse); /* * Add read rule command. @@ -104,7 +104,7 @@ service PersQueueService { */ rpc RemoveReadRule(RemoveReadRuleRequest) returns (RemoveReadRuleResponse); } - + service ClusterDiscoveryService { // Get PQ clusters which are eligible for the specified Write or Read Sessions rpc DiscoverClusters(Ydb.PersQueue.ClusterDiscovery.DiscoverClustersRequest) returns (Ydb.PersQueue.ClusterDiscovery.DiscoverClustersResponse); diff --git a/ydb/public/api/protos/draft/datastreams.proto b/ydb/public/api/protos/draft/datastreams.proto index 28021116b3..5c1354288a 100644 --- a/ydb/public/api/protos/draft/datastreams.proto +++ b/ydb/public/api/protos/draft/datastreams.proto @@ -24,8 +24,8 @@ extend google.protobuf.FieldOptions { enum EncryptionType { ENCRYPTION_UNDEFINED = 0; - NONE = 1; - KMS = 2; + NONE = 1; + KMS = 2; } message EnhancedMetrics { @@ -122,7 +122,7 @@ message Shard { // Id of the shard's parent string parent_shard_id = 3 [(FieldTransformer) = TRANSFORM_EMPTY_TO_NOTHING]; // The range of possible sequence numbers for the shard - SequenceNumberRange sequence_number_range = 4; + SequenceNumberRange sequence_number_range = 4; // Unique id of the shard within stream string shard_id = 5; } @@ -149,15 +149,15 @@ message ConsumerDescription { enum ShardIteratorType { SHARD_ITERATOR_UNDEFINED = 0; // Start reading exactly from the position denoted by a sequence number - AT_SEQUENCE_NUMBER = 1; + AT_SEQUENCE_NUMBER = 1; // Start reading right after the specified position - AFTER_SEQUENCE_NUMBER = 2; + AFTER_SEQUENCE_NUMBER = 2; // Start reading from the position denoted by a specific timestamp - AT_TIMESTAMP = 3; + AT_TIMESTAMP = 3; // Start reading from the oldest record - TRIM_HORIZON = 4; + TRIM_HORIZON = 4; // Start reading latest records - LATEST = 5; + LATEST = 5; } // Parameters used to filter out the response of ListShards @@ -182,7 +182,7 @@ message ShardFilter { string shard_id = 1; // Can only be used if AT_TIMESTAMP or FROM_TIMESTAMP are specified. int64 timestamp = 2 [(FieldTransformer) = TRANSFORM_DOUBLE_S_TO_INT_MS]; - ShardFilterType type = 3; + ShardFilterType type = 3; } // Represents starting position in the stream from which to start reading @@ -295,8 +295,8 @@ message ListStreamsRequest { string exclusive_start_stream_name = 2; // Max number of entries to return int32 limit = 3; - // Will make recurse listing if set to true. Otherwise will return only streams from root directory. - bool recurse = 4; + // Will make recurse listing if set to true. Otherwise will return only streams from root directory. + bool recurse = 4; } message ListStreamsResponse { @@ -412,7 +412,7 @@ message DescribeStreamResponse { } message DescribeStreamResult { - StreamDescription stream_description = 1; + StreamDescription stream_description = 1; } message RegisterStreamConsumerRequest { @@ -478,7 +478,7 @@ message PutRecordsResultEntry { string error_message = 2 [(FieldTransformer) = TRANSFORM_EMPTY_TO_NOTHING]; string error_code = 3 [(FieldTransformer) = TRANSFORM_EMPTY_TO_NOTHING]; string sequence_number = 4; - string shard_id = 5; + string shard_id = 5; } message GetRecordsRequest { @@ -529,7 +529,7 @@ message PutRecordResult { // Unique id of the record within shard string sequence_number = 2; // Id of the shard were the data was inserted - string shard_id = 3; + string shard_id = 3; } message PutRecordsRequest { diff --git a/ydb/public/api/protos/draft/persqueue_error_codes.proto b/ydb/public/api/protos/draft/persqueue_error_codes.proto index 00f2f8e2ab..8b2a098631 100644 --- a/ydb/public/api/protos/draft/persqueue_error_codes.proto +++ b/ydb/public/api/protos/draft/persqueue_error_codes.proto @@ -1,44 +1,44 @@ -syntax = "proto3"; - -package NPersQueue.NErrorCode; +syntax = "proto3"; + +package NPersQueue.NErrorCode; option java_package = "com.yandex.ydb.persqueue"; - -enum EErrorCode { - OK = 0; - INITIALIZING = 1; - OVERLOAD = 2; - BAD_REQUEST = 3; - WRONG_COOKIE = 4; + +enum EErrorCode { + OK = 0; + INITIALIZING = 1; + OVERLOAD = 2; + BAD_REQUEST = 3; + WRONG_COOKIE = 4; SOURCEID_DELETED = 24; - - WRITE_ERROR_PARTITION_IS_FULL = 5; - WRITE_ERROR_DISK_IS_FULL = 15; + + WRITE_ERROR_PARTITION_IS_FULL = 5; + WRITE_ERROR_DISK_IS_FULL = 15; WRITE_ERROR_BAD_OFFSET = 19; - - CREATE_SESSION_ALREADY_LOCKED = 6; - DELETE_SESSION_NO_SESSION = 7; - - READ_ERROR_IN_PROGRESS = 8; - READ_ERROR_NO_SESSION = 9; - READ_TIMEOUT = 10; // TODO: move to pqlib codes - READ_ERROR_TOO_SMALL_OFFSET = 11; - READ_ERROR_TOO_BIG_OFFSET = 12; - - SET_OFFSET_ERROR_COMMIT_TO_FUTURE = 13; - - TABLET_IS_DROPPED = 14; - - READ_NOT_DONE = 16; - - UNKNOWN_TOPIC = 17; - + + CREATE_SESSION_ALREADY_LOCKED = 6; + DELETE_SESSION_NO_SESSION = 7; + + READ_ERROR_IN_PROGRESS = 8; + READ_ERROR_NO_SESSION = 9; + READ_TIMEOUT = 10; // TODO: move to pqlib codes + READ_ERROR_TOO_SMALL_OFFSET = 11; + READ_ERROR_TOO_BIG_OFFSET = 12; + + SET_OFFSET_ERROR_COMMIT_TO_FUTURE = 13; + + TABLET_IS_DROPPED = 14; + + READ_NOT_DONE = 16; + + UNKNOWN_TOPIC = 17; + ACCESS_DENIED = 18; CLUSTER_DISABLED = 20; - WRONG_PARTITION_NUMBER = 21; - - CREATE_TIMEOUT = 22; // TODO: move to pqlib codes - IDLE_TIMEOUT = 23; // TODO: move to pqlib codes - - ERROR = 100; -} + WRONG_PARTITION_NUMBER = 21; + + CREATE_TIMEOUT = 22; // TODO: move to pqlib codes + IDLE_TIMEOUT = 23; // TODO: move to pqlib codes + + ERROR = 100; +} diff --git a/ydb/public/api/protos/persqueue_error_codes_v1.proto b/ydb/public/api/protos/persqueue_error_codes_v1.proto index 0059fdd92a..c6658305c2 100644 --- a/ydb/public/api/protos/persqueue_error_codes_v1.proto +++ b/ydb/public/api/protos/persqueue_error_codes_v1.proto @@ -1,43 +1,43 @@ -syntax = "proto3"; - -package Ydb.PersQueue.ErrorCode; +syntax = "proto3"; + +package Ydb.PersQueue.ErrorCode; option java_package = "com.yandex.ydb.persqueue.errorcode"; - -//500000..500999 -enum ErrorCode { - - OK = 0; - - INITIALIZING = 500001; - OVERLOAD = 500002; - BAD_REQUEST = 500003; - WRONG_COOKIE = 500004; + +//500000..500999 +enum ErrorCode { + + OK = 0; + + INITIALIZING = 500001; + OVERLOAD = 500002; + BAD_REQUEST = 500003; + WRONG_COOKIE = 500004; SOURCEID_DELETED = 500024; - - WRITE_ERROR_PARTITION_IS_FULL = 500005; - WRITE_ERROR_DISK_IS_FULL = 500015; - WRITE_ERROR_BAD_OFFSET = 500019; - - CREATE_SESSION_ALREADY_LOCKED = 500006; - DELETE_SESSION_NO_SESSION = 500007; - - READ_ERROR_IN_PROGRESS = 500008; - READ_ERROR_NO_SESSION = 500009; - READ_ERROR_TOO_SMALL_OFFSET = 500011; - READ_ERROR_TOO_BIG_OFFSET = 500012; - - SET_OFFSET_ERROR_COMMIT_TO_FUTURE = 500013; - - TABLET_IS_DROPPED = 500014; - - READ_NOT_DONE = 500016; - - UNKNOWN_TOPIC = 500017; - - ACCESS_DENIED = 500018; - CLUSTER_DISABLED = 500020; - - WRONG_PARTITION_NUMBER = 500021; + + WRITE_ERROR_PARTITION_IS_FULL = 500005; + WRITE_ERROR_DISK_IS_FULL = 500015; + WRITE_ERROR_BAD_OFFSET = 500019; + + CREATE_SESSION_ALREADY_LOCKED = 500006; + DELETE_SESSION_NO_SESSION = 500007; + + READ_ERROR_IN_PROGRESS = 500008; + READ_ERROR_NO_SESSION = 500009; + READ_ERROR_TOO_SMALL_OFFSET = 500011; + READ_ERROR_TOO_BIG_OFFSET = 500012; + + SET_OFFSET_ERROR_COMMIT_TO_FUTURE = 500013; + + TABLET_IS_DROPPED = 500014; + + READ_NOT_DONE = 500016; + + UNKNOWN_TOPIC = 500017; + + ACCESS_DENIED = 500018; + CLUSTER_DISABLED = 500020; + + WRONG_PARTITION_NUMBER = 500021; PREFERRED_CLUSTER_MISMATCHED = 500022; - ERROR = 500100; -} + ERROR = 500100; +} diff --git a/ydb/public/api/protos/ya.make b/ydb/public/api/protos/ya.make index d77ed88032..8a156403a3 100644 --- a/ydb/public/api/protos/ya.make +++ b/ydb/public/api/protos/ya.make @@ -16,13 +16,13 @@ PEERDIR( SRCS( draft/datastreams.proto draft/persqueue_common.proto - draft/persqueue_error_codes.proto + draft/persqueue_error_codes.proto draft/ydb_long_tx.proto draft/ydb_logstore.proto draft/yq_private.proto - persqueue_error_codes_v1.proto + persqueue_error_codes_v1.proto ydb_auth.proto - ydb_persqueue_v1.proto + ydb_persqueue_v1.proto ydb_persqueue_cluster_discovery.proto ydb_clickhouse_internal.proto ydb_cms.proto @@ -48,12 +48,12 @@ SRCS( ) CPP_PROTO_PLUGIN0(validation ydb/core/grpc_services/validation) - + # .pb.h are only available in C++ variant of PROTO_LIBRARY IF (MODULE_TAG == "CPP_PROTO") GENERATE_ENUM_SERIALIZATION(draft/persqueue_common.pb.h) GENERATE_ENUM_SERIALIZATION(ydb_persqueue_cluster_discovery.pb.h) - GENERATE_ENUM_SERIALIZATION(draft/datastreams.pb.h) + GENERATE_ENUM_SERIALIZATION(draft/datastreams.pb.h) ENDIF() EXCLUDE_TAGS(GO_PROTO) diff --git a/ydb/public/api/protos/ydb_cms.proto b/ydb/public/api/protos/ydb_cms.proto index c0b8169a2e..1a569c7081 100644 --- a/ydb/public/api/protos/ydb_cms.proto +++ b/ydb/public/api/protos/ydb_cms.proto @@ -86,11 +86,11 @@ message DatabaseQuotas { // again. This is useful to help avoid database from rapidly entering and // exiting from the overloaded state. uint64 data_size_soft_quota = 2; - - // A maximum count of shards in all data streams. - uint64 data_stream_shards_quota = 3; - // A maximum storage that will be reserved for all data stream shards. - uint64 data_stream_reserved_storage_quota = 5; + + // A maximum count of shards in all data streams. + uint64 data_stream_shards_quota = 3; + // A maximum storage that will be reserved for all data stream shards. + uint64 data_stream_reserved_storage_quota = 5; // A minimum value of `TtlSettings.run_interval_seconds` that can be specified. // Default is 1800 (15 minutes). uint32 ttl_min_run_internal_seconds = 4; diff --git a/ydb/public/api/protos/ydb_persqueue_v1.proto b/ydb/public/api/protos/ydb_persqueue_v1.proto index 00421fa1e4..93a7fb6c79 100644 --- a/ydb/public/api/protos/ydb_persqueue_v1.proto +++ b/ydb/public/api/protos/ydb_persqueue_v1.proto @@ -1,16 +1,16 @@ -syntax = "proto3"; +syntax = "proto3"; import "ydb/public/api/protos/ydb_operation.proto"; import "ydb/public/api/protos/ydb_scheme.proto"; import "ydb/public/api/protos/ydb_status_codes.proto"; import "ydb/public/api/protos/ydb_issue_message.proto"; import "ydb/public/api/protos/annotations/validation.proto"; - + package Ydb.PersQueue.V1; - + option java_package = "com.yandex.ydb.persqueue"; -option cc_enable_arenas = true; - +option cc_enable_arenas = true; + // NOTE: // * We use 'ms' suffix instead of google.protobuf.Timestamp and google.protobuf.Duration in order to utilize // packed encoding ('message' types can't be packed encoded). In non-repeated fields we use 'ms' for consistency. @@ -26,44 +26,44 @@ enum Codec { message SessionMetaValue { map<string, string> value = 1; -} - +} + /** * Represents range [start_offset, end_offset). */ message OffsetsRange { int64 start_offset = 1; int64 end_offset = 2; -} - -/** +} + +/** * Request for write session. Contains one of: * InitRequest - handshake request. * WriteRequest - portion of data to be written. * UpdateTokenRequest - user credentials if update is needed. - */ + */ message StreamingWriteClientMessage { oneof client_message { InitRequest init_request = 1; WriteRequest write_request = 2; UpdateTokenRequest update_token_request = 3; } - - // Handshake request that must be sent to server first. + + // Handshake request that must be sent to server first. message InitRequest { - // Path of topic to write to. + // Path of topic to write to. string topic = 1; // message group identifier of client data stream a.k.a. sourceId. string message_group_id = 2; // Some user metadata attached to this write session. map<string, string> session_meta = 3; - // Partition group to write to. + // Partition group to write to. // Zero means any group. int64 partition_group_id = 4; - + int64 max_supported_block_format_version = 5; - + string session_id = 100; // 0 for first init message and incremental value for connect retries. Used for server logging. int64 connection_attempt = 101; @@ -75,8 +75,8 @@ message StreamingWriteClientMessage { // Sanity check option. When no writing activity is done in idle_timeout_sec seconds, then session will be destroyed. Zero means infinity. int64 idle_timeout_ms = 200; - } - + } + // Represents portion of client messages. message WriteRequest { // Sequence numbers of messages in order that client will provide to server. @@ -110,46 +110,46 @@ message StreamingWriteClientMessage { // In block format version 0 each byte contains only block codec identifier repeated bytes blocks_headers = 10; repeated bytes blocks_data = 11; - } - + } + // In-session reauthentication and reauthorization, lets user increase session lifetime. You should wait for 'update_token_response' before sending next 'update_token_request'. message UpdateTokenRequest { string token = 1; } -} - -/** +} + +/** * Response for write session. Contains one of: * InitResponse - correct handshake response. * BatchWriteResponse - acknowledgment of storing client messages. * UpdateTokenResponse - acknowledgment of reauthentication and reauthorization. - */ + */ message StreamingWriteServerMessage { oneof server_message { InitResponse init_response = 3; BatchWriteResponse batch_write_response = 4; UpdateTokenResponse update_token_response = 5; } - + // Server status of response. Ydb.StatusIds.StatusCode status = 1; // Issues if any. repeated Ydb.Issue.IssueMessage issues = 2; - // Response for handshake. + // Response for handshake. message InitResponse { // Last persisted message's sequence number for this message group. int64 last_sequence_number = 1; - // Unique identifier of write session. Used for debug purposes. - string session_id = 2; - // Path of topic that matched for this write session. Used for debug purposes, will be the same as in Init request from client. + // Unique identifier of write session. Used for debug purposes. + string session_id = 2; + // Path of topic that matched for this write session. Used for debug purposes, will be the same as in Init request from client. string topic = 3; - // Write session is established to this cluster. Client data will be in instance of topic in this cluster. - string cluster = 4; + // Write session is established to this cluster. Client data will be in instance of topic in this cluster. + string cluster = 4; // Identifier of partition that is matched for this write session. int64 partition_id = 5; - + // Block (see StreamingWriteClientMessage.WriteRequest.blocks_data) format version supported by server or configured for a topic. Client must write data only with them. int64 block_format_version = 6; // Client can only use compression codecs from this set to write messages to topic, session will be closed with BAD_REQUEST otherwise. @@ -163,8 +163,8 @@ message StreamingWriteServerMessage { // Opaque blob, used for fast reconnects. bytes connection_meta = 9; - } - + } + // Message that represents acknowledgment for sequence of client messages. This sequence is persisted together so write statistics is for messages batch. message BatchWriteResponse { // Sequence numbers of persisted client messages. @@ -185,58 +185,58 @@ message StreamingWriteServerMessage { message UpdateTokenResponse { } - // Message with write statistics. + // Message with write statistics. message WriteStatistics { - // Time spent in persisting of data. + // Time spent in persisting of data. int64 persist_duration_ms = 1; - // Time spent in queue before persisting. + // Time spent in queue before persisting. int64 queued_in_partition_duration_ms = 2; - // Time spent awaiting for partition write quota. + // Time spent awaiting for partition write quota. int64 throttled_on_partition_duration_ms = 3; // Time spent awaiting for topic write quota. int64 throttled_on_topic_duration_ms = 4; - } + } } - - + + message Path { // Path of object (topic/consumer). string path = 1; } - + message KeyValue { string key = 1; string value = 2; -} - -/** - * Single read parameters for server. - */ -message ReadParams { - // Max messages to give to client in one read request. - uint32 max_read_messages_count = 1; - // Max size in bytes to give to client in one read request. - uint32 max_read_size = 2; -} - -/** - * Message that is used for addressing read for commiting. - */ -message CommitCookie { - // Assign identitifier of assigned partition from which read was done. - uint64 assign_id = 1; - // Incremental identifier of concrete partition read batch. - uint64 partition_cookie = 2; -} - -message CommitOffsetRange { - uint64 assign_id = 1; - uint64 start_offset = 2; - uint64 end_offset = 3; -} - +} + +/** + * Single read parameters for server. + */ +message ReadParams { + // Max messages to give to client in one read request. + uint32 max_read_messages_count = 1; + // Max size in bytes to give to client in one read request. + uint32 max_read_size = 2; +} + +/** + * Message that is used for addressing read for commiting. + */ +message CommitCookie { + // Assign identitifier of assigned partition from which read was done. + uint64 assign_id = 1; + // Incremental identifier of concrete partition read batch. + uint64 partition_cookie = 2; +} + +message CommitOffsetRange { + uint64 assign_id = 1; + uint64 start_offset = 2; + uint64 end_offset = 3; +} + // TODO: replace with it actual protocol client message -/** +/** * Request for read session. Contains one of: * InitRequest - handshake request. * ReadRequest - request for data. @@ -657,14 +657,14 @@ message PartitionStream { } /** - * Request for read session. Contains one of : - * Init - handshake request. - * Read - request for data. - * Commit - request for commit of some read data. - * Start_read - signal for server that client is ready to get data from partition. - * Released - signal for server that client finished working with partition. Must be sent only after corresponding Release request from server. - */ - + * Request for read session. Contains one of : + * Init - handshake request. + * Read - request for data. + * Commit - request for commit of some read data. + * Start_read - signal for server that client is ready to get data from partition. + * Released - signal for server that client finished working with partition. Must be sent only after corresponding Release request from server. + */ + message MigrationStreamingReadClientMessage { message TopicReadSettings { // Topic path. @@ -676,31 +676,31 @@ message MigrationStreamingReadClientMessage { int64 start_from_written_at_ms = 3; } - // Handshake request. + // Handshake request. message InitRequest { // Message that describes topic to read. - // Topics that will be read by this session. + // Topics that will be read by this session. repeated TopicReadSettings topics_read_settings = 1; - // Flag that indicates reading only of original topics in cluster or all including mirrored. - bool read_only_original = 2; + // Flag that indicates reading only of original topics in cluster or all including mirrored. + bool read_only_original = 2; // Path of consumer that is used for reading by this session. string consumer = 3; - + // Skip all messages that has write timestamp smaller than now - max_time_lag_ms. int64 max_lag_duration_ms = 4; // Read data only after this timestamp from all topics. int64 start_from_written_at_ms = 5; - + // Maximum block format version supported by the client. Server will asses this parameter and return actual data blocks version in // StreamingReadServerMessage.InitResponse.block_format_version_by_topic (and StreamingReadServerMessage.AddTopicResponse.block_format_version) // or error if client will not be able to read data. int64 max_supported_block_format_version = 6; - + // Maximal size of client cache for message_group_id, ip and meta, per partition. // There is separate caches for each partition partition streams. // There is separate caches for message group identifiers, ip and meta inside one partition partition stream. int64 max_meta_cache_size = 10; - + // State of client read session. Could be provided to server for retries. message State { message PartitionStreamState { @@ -742,103 +742,103 @@ message MigrationStreamingReadClientMessage { // TODO: remove after refactoring // Single read request params. ReadParams read_params = 42; - - bool ranges_mode = 442; + + bool ranges_mode = 442; //////////////////////////////////////////////////////////////////////////////////////////////////////////// - } - - // Request of single read. - message Read { - } - - message StartRead { - // Topic path of partition. - Path topic = 1; - // Cluster of topic instance. - string cluster = 2; - // Partition identifier. Explicit only for debug purposes. - uint64 partition = 3; - - // Assign identifier of Assign request from server. Used for mathing Assign requests from server with StartRead responses from client. - uint64 assign_id = 5; - - // Start reading from partition offset that is not less than read_offset. - // ReadParams.max_time_lag_ms and ReadParams.read_timestamp_ms could lead to skip of more messages. - // The same with actual committed offset. Regardless of set read_offset server will return data from maximal offset from read_offset, actual committed offset - // and offsets calculated from ReadParams.max_time_lag_ms and ReadParams.read_timestamp_ms. - uint64 read_offset = 6; - // All messages with offset less than commit_offset are processed by client. Server will commit this position if this is not done yet. - uint64 commit_offset = 7; - - // This option will enable sanity check on server for read_offset. Server will verify that read_offset is no less that actual committed offset. - // If verification will fail then server will kill this read session and client will find out error in reading logic. - // If client is not setting read_offset, sanity check will fail so do not set verify_read_offset if you not setting correct read_offset. - bool verify_read_offset = 8; //if true then check that committed position is <= ReadOffset; otherwise it means error in client logic - - } - // Signal for server that client finished working with this partition. Must be sent only after corresponding Release request from server. - // Server will give this partition to other read session only after Released signal. - message Released { - // Topic path of partition. - Path topic = 1; - // Cluster of topic instance. - string cluster = 2; - // Partition identifier. Explicit only for debug purposes. - uint64 partition = 3; - - // Assign identifier of Assign request from server. Used for mathing Assign requests from server with Released responses from client. - uint64 assign_id = 5; - } - - // Signal for server that client processed some read data. - message Commit { - // Partition read cookies that indicates processed data. - repeated CommitCookie cookies = 1; - - repeated CommitOffsetRange offset_ranges = 2; - } - - message Status { - // Topic path of partition. - Path topic = 1; - // Cluster of topic instance. - string cluster = 2; - // Partition identifier. Explicit only for debug purposes. - uint64 partition = 3; - - // Assign identifier of Assign request from server. Used for mathing Assign requests from server with Released responses from client. - uint64 assign_id = 5; - - } - - oneof request { + } + + // Request of single read. + message Read { + } + + message StartRead { + // Topic path of partition. + Path topic = 1; + // Cluster of topic instance. + string cluster = 2; + // Partition identifier. Explicit only for debug purposes. + uint64 partition = 3; + + // Assign identifier of Assign request from server. Used for mathing Assign requests from server with StartRead responses from client. + uint64 assign_id = 5; + + // Start reading from partition offset that is not less than read_offset. + // ReadParams.max_time_lag_ms and ReadParams.read_timestamp_ms could lead to skip of more messages. + // The same with actual committed offset. Regardless of set read_offset server will return data from maximal offset from read_offset, actual committed offset + // and offsets calculated from ReadParams.max_time_lag_ms and ReadParams.read_timestamp_ms. + uint64 read_offset = 6; + // All messages with offset less than commit_offset are processed by client. Server will commit this position if this is not done yet. + uint64 commit_offset = 7; + + // This option will enable sanity check on server for read_offset. Server will verify that read_offset is no less that actual committed offset. + // If verification will fail then server will kill this read session and client will find out error in reading logic. + // If client is not setting read_offset, sanity check will fail so do not set verify_read_offset if you not setting correct read_offset. + bool verify_read_offset = 8; //if true then check that committed position is <= ReadOffset; otherwise it means error in client logic + + } + // Signal for server that client finished working with this partition. Must be sent only after corresponding Release request from server. + // Server will give this partition to other read session only after Released signal. + message Released { + // Topic path of partition. + Path topic = 1; + // Cluster of topic instance. + string cluster = 2; + // Partition identifier. Explicit only for debug purposes. + uint64 partition = 3; + + // Assign identifier of Assign request from server. Used for mathing Assign requests from server with Released responses from client. + uint64 assign_id = 5; + } + + // Signal for server that client processed some read data. + message Commit { + // Partition read cookies that indicates processed data. + repeated CommitCookie cookies = 1; + + repeated CommitOffsetRange offset_ranges = 2; + } + + message Status { + // Topic path of partition. + Path topic = 1; + // Cluster of topic instance. + string cluster = 2; + // Partition identifier. Explicit only for debug purposes. + uint64 partition = 3; + + // Assign identifier of Assign request from server. Used for mathing Assign requests from server with Released responses from client. + uint64 assign_id = 5; + + } + + oneof request { InitRequest init_request = 1; - Read read = 2; - StartRead start_read = 3; - Commit commit = 4; - Released released = 5; - Status status = 6; - } - - // User credentials if update is needed or empty string. - bytes token = 20; -} - - - -/** - * Response for read session. Contains one of : - * Inited - handshake response from server. - * Batched_data - result of single read. - * Committed - acknowledgment for commit. - * Assigned - signal from server for assigning of partition. - * Release - signal from server for releasing of partition. - */ - + Read read = 2; + StartRead start_read = 3; + Commit commit = 4; + Released released = 5; + Status status = 6; + } + + // User credentials if update is needed or empty string. + bytes token = 20; +} + + + +/** + * Response for read session. Contains one of : + * Inited - handshake response from server. + * Batched_data - result of single read. + * Committed - acknowledgment for commit. + * Assigned - signal from server for assigning of partition. + * Release - signal from server for releasing of partition. + */ + message MigrationStreamingReadServerMessage { - // Handshake response. + // Handshake response. message InitResponse { - // Read session identifier for debug purposes. + // Read session identifier for debug purposes. string session_id = 1; // Block format version of data client will receive from topics. map<string, int64> block_format_version_by_topic = 2; @@ -846,268 +846,268 @@ message MigrationStreamingReadServerMessage { // Choosed maximan cache size by server. // Client must use cache of this size. Could change on retries - reduce size of cache in this case. int64 max_meta_cache_size = 10; - } - - // Signal that partition is assigned to this read session. Client must react on this signal by sending StartRead when ready. - message Assigned { - // Partition's topic path. - Path topic = 1; - // Topic's instance cluster name. - string cluster = 2; - // Partition identifier. topic:cluster:partition is unique addressing of partition. - uint64 partition = 3; - - // Assign idenfier. Is not unique between diffrent partitions. Used for matching Assigned request from server and StartRead response from client. - uint64 assign_id = 5; - - // Actual read offset. Equeal to last committed offset. - uint64 read_offset = 6; - // Offset of first not existing message in partition at this time. - uint64 end_offset = 7; - } - - // Partition release request from server. - message Release { - // Partition's topic path. - Path topic = 1; - // Topic's instance cluster name. - string cluster = 2; - // Partition identifier. topic:cluster:partition is unique addressing of partition. - uint64 partition = 3; - - // Assign idenfier. Used for matching Assigned and Release requests from server. - uint64 assign_id = 5; - - // If False then server is waiting for Released signal from client before giving of this partition for other read session. - // If True then server gives partition for other session right now. All futher commits for this partition has no effect. Server is not waiting for Released signal. - bool forceful_release = 6; - // Last known committed offset. - uint64 commit_offset = 7; - } - - // Acknowledgement for commits. - message Committed { - // List of cookies that correspond to commit of processing read data. - repeated CommitCookie cookies = 1; - - repeated CommitOffsetRange offset_ranges = 2; - - } - - // Readed data. - message DataBatch { - // One client message representation. - message MessageData { - // Partition offset in partition that assigned for message. - uint64 offset = 1; //unique value for clientside deduplication - Topic:Cluster:Partition:Offset - // Sequence number that provided with message on write from client. - uint64 seq_no = 2; - // Timestamp of creation of message provided on write from client. - uint64 create_timestamp_ms = 3; - // Codec that is used for data compressing. + } + + // Signal that partition is assigned to this read session. Client must react on this signal by sending StartRead when ready. + message Assigned { + // Partition's topic path. + Path topic = 1; + // Topic's instance cluster name. + string cluster = 2; + // Partition identifier. topic:cluster:partition is unique addressing of partition. + uint64 partition = 3; + + // Assign idenfier. Is not unique between diffrent partitions. Used for matching Assigned request from server and StartRead response from client. + uint64 assign_id = 5; + + // Actual read offset. Equeal to last committed offset. + uint64 read_offset = 6; + // Offset of first not existing message in partition at this time. + uint64 end_offset = 7; + } + + // Partition release request from server. + message Release { + // Partition's topic path. + Path topic = 1; + // Topic's instance cluster name. + string cluster = 2; + // Partition identifier. topic:cluster:partition is unique addressing of partition. + uint64 partition = 3; + + // Assign idenfier. Used for matching Assigned and Release requests from server. + uint64 assign_id = 5; + + // If False then server is waiting for Released signal from client before giving of this partition for other read session. + // If True then server gives partition for other session right now. All futher commits for this partition has no effect. Server is not waiting for Released signal. + bool forceful_release = 6; + // Last known committed offset. + uint64 commit_offset = 7; + } + + // Acknowledgement for commits. + message Committed { + // List of cookies that correspond to commit of processing read data. + repeated CommitCookie cookies = 1; + + repeated CommitOffsetRange offset_ranges = 2; + + } + + // Readed data. + message DataBatch { + // One client message representation. + message MessageData { + // Partition offset in partition that assigned for message. + uint64 offset = 1; //unique value for clientside deduplication - Topic:Cluster:Partition:Offset + // Sequence number that provided with message on write from client. + uint64 seq_no = 2; + // Timestamp of creation of message provided on write from client. + uint64 create_timestamp_ms = 3; + // Codec that is used for data compressing. Codec codec = 4; - // Compressed client message body. - bytes data = 5; - // Uncompressed size of client message body. - uint64 uncompressed_size = 6; + // Compressed client message body. + bytes data = 5; + // Uncompressed size of client message body. + uint64 uncompressed_size = 6; // kinesis data string partition_key = 7; bytes explicit_hash = 8; - } - - // Representation of sequence of client messages from one write session. - message Batch { - // Source identifier provided by client for this batch of client messages. - bytes source_id = 2; - // Client metadata attached to write session, the same for all messages in batch. - repeated KeyValue extra_fields = 3; - // Persist timestamp on server for batch. - uint64 write_timestamp_ms = 4; - // Peer address of node that created write session. - string ip = 5; - - // List of client messages. - repeated MessageData message_data = 1; - } - - // Representation of sequence of messages from one partition. - message PartitionData { - // Partition's topic path. - Path topic = 1; - // Topic's instance cluster name. - string cluster = 2; - // Partition identifier. topic:cluster:partition is unique addressing for partition. - uint64 partition = 3; - - // Client messages, divided by write sessions. - repeated Batch batches = 4; - - // Cookie for addressing this partition messages batch for committing. - CommitCookie cookie = 5; - - // Old formatted topic name with cluster inside. - string deprecated_topic = 10; - } - - // Client messages, divided by partitions. - repeated PartitionData partition_data = 1; - } - - // Response for status requst. - message PartitionStatus { - // Partition's topic path. - Path topic = 1; - // Topic's instance cluster name. - string cluster = 2; - // Partition identifier. topic:cluster:partition is unique addressing of partition. - uint64 partition = 3; - - // Assign idenfier. Used for matching Assigned and Release requests from server. - uint64 assign_id = 5; - - uint64 committed_offset = 6; - uint64 end_offset = 7; - uint64 write_watermark_ms = 8; - } - Ydb.StatusIds.StatusCode status = 1; - - repeated Ydb.Issue.IssueMessage issues = 2; - - oneof response { + } + + // Representation of sequence of client messages from one write session. + message Batch { + // Source identifier provided by client for this batch of client messages. + bytes source_id = 2; + // Client metadata attached to write session, the same for all messages in batch. + repeated KeyValue extra_fields = 3; + // Persist timestamp on server for batch. + uint64 write_timestamp_ms = 4; + // Peer address of node that created write session. + string ip = 5; + + // List of client messages. + repeated MessageData message_data = 1; + } + + // Representation of sequence of messages from one partition. + message PartitionData { + // Partition's topic path. + Path topic = 1; + // Topic's instance cluster name. + string cluster = 2; + // Partition identifier. topic:cluster:partition is unique addressing for partition. + uint64 partition = 3; + + // Client messages, divided by write sessions. + repeated Batch batches = 4; + + // Cookie for addressing this partition messages batch for committing. + CommitCookie cookie = 5; + + // Old formatted topic name with cluster inside. + string deprecated_topic = 10; + } + + // Client messages, divided by partitions. + repeated PartitionData partition_data = 1; + } + + // Response for status requst. + message PartitionStatus { + // Partition's topic path. + Path topic = 1; + // Topic's instance cluster name. + string cluster = 2; + // Partition identifier. topic:cluster:partition is unique addressing of partition. + uint64 partition = 3; + + // Assign idenfier. Used for matching Assigned and Release requests from server. + uint64 assign_id = 5; + + uint64 committed_offset = 6; + uint64 end_offset = 7; + uint64 write_watermark_ms = 8; + } + Ydb.StatusIds.StatusCode status = 1; + + repeated Ydb.Issue.IssueMessage issues = 2; + + oneof response { InitResponse init_response = 3; - DataBatch data_batch = 4; - Assigned assigned = 5; - Release release = 6; - Committed committed = 7; - PartitionStatus partition_status = 8; - } -} - -/** - * Reading information request sent from client to server. - */ - -message ReadInfoRequest { - Ydb.Operations.OperationParams operation_params = 1; - // List of topics that are beeing read. - repeated Path topics = 2; - // If get_only_original == false then return info about mirrored topics too. - bool get_only_original = 3; - // Consumer path that is reading specified topics. - Path consumer = 4; -} - - -/** - * Reading information response sent from server to client. - */ - -message ReadInfoResponse { - // Result of request will be inside operation. - Ydb.Operations.Operation operation = 1; -} - -/** - * Reading information message that will be inside ReadInfoResponse.operation. - */ - -message ReadInfoResult { - // Message containing information about concrete topic reading. - message TopicInfo { - // Message containing information about concrete topic's partition reading. - message PartitionInfo { - // Patition identifier inside topic. - uint64 partition = 1; - - // Request status of partition. - Ydb.StatusIds.StatusCode status = 2; - // Issues if any. - repeated Ydb.Issue.IssueMessage issues = 3; - - // Offset of first message in partition. - uint64 start_offset = 4; - // Offset of next not yet existing message in partition. - uint64 end_offset = 5; - - // Offset of consumer committed message a.k.a. first not processed message. - // If commit_offset == end_offset then all messages from partition are processed. - uint64 commit_offset = 6; - // Consumer lag in time between committed and last messages in partition. - uint64 commit_time_lag_ms = 7; - - // Offset of first not read message by consumer from this partition. - // read_offset can be bigger that committed_offset - consumer could read some messages but not yet commit them. - uint64 read_offset = 8; - // Consumer lag in time between read and last messages in partition. - uint64 read_time_lag_ms = 9; - - // Session identifier that locked and reading this partition right now. - string session_id = 10; - // Ip if node that created reading this session. - string client_node = 11; - // Host name of proxy node that processing this reading session. - string proxy_node = 12; + DataBatch data_batch = 4; + Assigned assigned = 5; + Release release = 6; + Committed committed = 7; + PartitionStatus partition_status = 8; + } +} + +/** + * Reading information request sent from client to server. + */ + +message ReadInfoRequest { + Ydb.Operations.OperationParams operation_params = 1; + // List of topics that are beeing read. + repeated Path topics = 2; + // If get_only_original == false then return info about mirrored topics too. + bool get_only_original = 3; + // Consumer path that is reading specified topics. + Path consumer = 4; +} + + +/** + * Reading information response sent from server to client. + */ + +message ReadInfoResponse { + // Result of request will be inside operation. + Ydb.Operations.Operation operation = 1; +} + +/** + * Reading information message that will be inside ReadInfoResponse.operation. + */ + +message ReadInfoResult { + // Message containing information about concrete topic reading. + message TopicInfo { + // Message containing information about concrete topic's partition reading. + message PartitionInfo { + // Patition identifier inside topic. + uint64 partition = 1; + + // Request status of partition. + Ydb.StatusIds.StatusCode status = 2; + // Issues if any. + repeated Ydb.Issue.IssueMessage issues = 3; + + // Offset of first message in partition. + uint64 start_offset = 4; + // Offset of next not yet existing message in partition. + uint64 end_offset = 5; + + // Offset of consumer committed message a.k.a. first not processed message. + // If commit_offset == end_offset then all messages from partition are processed. + uint64 commit_offset = 6; + // Consumer lag in time between committed and last messages in partition. + uint64 commit_time_lag_ms = 7; + + // Offset of first not read message by consumer from this partition. + // read_offset can be bigger that committed_offset - consumer could read some messages but not yet commit them. + uint64 read_offset = 8; + // Consumer lag in time between read and last messages in partition. + uint64 read_time_lag_ms = 9; + + // Session identifier that locked and reading this partition right now. + string session_id = 10; + // Ip if node that created reading this session. + string client_node = 11; + // Host name of proxy node that processing this reading session. + string proxy_node = 12; // Host name of node where partition leader is running. - string tablet_node = 13; - - // Assign identifier of actual partition assignment. - uint64 assign_id = 14; - // Timestamp of assignment. - uint64 assign_timestamp_ms = 15; - // Cookie of last performed read in session. - uint64 last_read_cookie = 16; - // Cookie upto whitch commits done. - uint64 committed_read_cookie = 17; - // Cookie that client wants to commit, but server is waiting for committed_read_cookie + 1. - repeated uint64 out_of_order_read_cookies_to_commit = 18; - } - // Topic path. - Path topic = 1; - // Topic original cluster. - string cluster = 2; - - // Status of whole topic. - Ydb.StatusIds.StatusCode status = 3; - // Issues if any. - repeated Ydb.Issue.IssueMessage issues = 4; - - // Reading info for partitions of this topic. - repeated PartitionInfo partitions = 5; - } - - // List of topics info. - repeated TopicInfo topics = 1; -} - - -/** - * Drop topic request sent from client to server. - */ - -message DropTopicRequest { - Ydb.Operations.OperationParams operation_params = 1; - // Topic path. - string path = 2; -} - - -/** - * Drop topic response sent from server to client. If topic is not existed then response status will be "SCHEME_ERROR". - */ - -message DropTopicResponse { - // Result of request will be inside operation. - Ydb.Operations.Operation operation = 1; -} - -/** - * Drop topic result message that will be inside DropTopicResponse.operation. - */ - -message DropTopicResult { -} - -/** + string tablet_node = 13; + + // Assign identifier of actual partition assignment. + uint64 assign_id = 14; + // Timestamp of assignment. + uint64 assign_timestamp_ms = 15; + // Cookie of last performed read in session. + uint64 last_read_cookie = 16; + // Cookie upto whitch commits done. + uint64 committed_read_cookie = 17; + // Cookie that client wants to commit, but server is waiting for committed_read_cookie + 1. + repeated uint64 out_of_order_read_cookies_to_commit = 18; + } + // Topic path. + Path topic = 1; + // Topic original cluster. + string cluster = 2; + + // Status of whole topic. + Ydb.StatusIds.StatusCode status = 3; + // Issues if any. + repeated Ydb.Issue.IssueMessage issues = 4; + + // Reading info for partitions of this topic. + repeated PartitionInfo partitions = 5; + } + + // List of topics info. + repeated TopicInfo topics = 1; +} + + +/** + * Drop topic request sent from client to server. + */ + +message DropTopicRequest { + Ydb.Operations.OperationParams operation_params = 1; + // Topic path. + string path = 2; +} + + +/** + * Drop topic response sent from server to client. If topic is not existed then response status will be "SCHEME_ERROR". + */ + +message DropTopicResponse { + // Result of request will be inside operation. + Ydb.Operations.Operation operation = 1; +} + +/** + * Drop topic result message that will be inside DropTopicResponse.operation. + */ + +message DropTopicResult { +} + +/** * Credentials settings */ @@ -1124,66 +1124,66 @@ message Credentials { } /** - * Message for describing topic internals. - */ - -message TopicSettings { - enum Format { - FORMAT_UNSPECIFIED = 0; - FORMAT_BASE = 1; - } - - // How many partitions in topic. Must less than database limit. Default limit - 10. + * Message for describing topic internals. + */ + +message TopicSettings { + enum Format { + FORMAT_UNSPECIFIED = 0; + FORMAT_BASE = 1; + } + + // How many partitions in topic. Must less than database limit. Default limit - 10. int32 partitions_count = 1 [(value) = "> 0"]; - // How long data in partition should be stored. Must be greater than 0 and less than limit for this database. Default limit - 36 hours. + // How long data in partition should be stored. Must be greater than 0 and less than limit for this database. Default limit - 36 hours. int64 retention_period_ms = 2 [(value) = "> 0"]; // How long last written seqno for message group should be stored. Must be greater then retention_period_ms and less then limit for this database. Default limit - 16 days. int64 message_group_seqno_retention_period_ms = 12 [(value) = ">= 0"]; // How many last written seqno for various message groups should be stored per partition. Must be less than limit for this database. Default limit - 6*10^6 values. int64 max_partition_message_groups_seqno_stored = 13 [(value) = ">= 0"]; - // Max format version that is allowed for writers. Must be value from enum FormatVersion. - // Writes with greater format version are forbiden. - Format supported_format = 3; - // List of allowed codecs for writers. - // Writes with codec not from this list are forbiden. - repeated Codec supported_codecs = 4 [(size).le = 100]; - // Max storage usage for each topic's partition. Must be less than database limit. Default limit - 130 GB. + // Max format version that is allowed for writers. Must be value from enum FormatVersion. + // Writes with greater format version are forbiden. + Format supported_format = 3; + // List of allowed codecs for writers. + // Writes with codec not from this list are forbiden. + repeated Codec supported_codecs = 4 [(size).le = 100]; + // Max storage usage for each topic's partition. Must be less than database limit. Default limit - 130 GB. int64 max_partition_storage_size = 5 [(value) = ">= 0"]; - // Partition write speed in bytes per second. Must be less than database limit. Default limit - 1 MB/s. + // Partition write speed in bytes per second. Must be less than database limit. Default limit - 1 MB/s. int64 max_partition_write_speed = 6 [(value) = ">= 0"]; - // Burst size for write in partition, in bytes. Must be less than database limit. Default limit - 1 MB. + // Burst size for write in partition, in bytes. Must be less than database limit. Default limit - 1 MB. int64 max_partition_write_burst = 7 [(value) = ">= 0"]; - - // Disallows client writes. Used for mirrored topics in federation. - bool client_write_disabled = 8; - // Message for read rules description. - message ReadRule { - // For what consumer this read rule is. Must be valid not empty consumer name. - // Is key for read rules. There could be only one read rule with corresponding consumer name. - string consumer_name = 1 [(required) = true]; - // Flag that this consumer is important. - bool important = 2; - // All messages with smaller timestamp of write will be skipped. + + // Disallows client writes. Used for mirrored topics in federation. + bool client_write_disabled = 8; + // Message for read rules description. + message ReadRule { + // For what consumer this read rule is. Must be valid not empty consumer name. + // Is key for read rules. There could be only one read rule with corresponding consumer name. + string consumer_name = 1 [(required) = true]; + // Flag that this consumer is important. + bool important = 2; + // All messages with smaller timestamp of write will be skipped. int64 starting_message_timestamp_ms = 3 [(value) = ">= 0"]; - // Max format version that is supported by this consumer. - // supported_format on topic must not be greater. - Format supported_format = 4; - // List of supported codecs by this consumer. - // supported_codecs on topic must be contained inside this list. - repeated Codec supported_codecs = 5 [(size).le = 100]; - - // Read rule version. Any non-negative integer. + // Max format version that is supported by this consumer. + // supported_format on topic must not be greater. + Format supported_format = 4; + // List of supported codecs by this consumer. + // supported_codecs on topic must be contained inside this list. + repeated Codec supported_codecs = 5 [(size).le = 100]; + + // Read rule version. Any non-negative integer. int64 version = 6 [(value) = ">= 0"]; // Client service type. string service_type = 7; - } - - // List of consumer read rules for this topic. - repeated ReadRule read_rules = 9 [(size).le = 3000]; - - // User and server attributes of topic. Server attributes starts from "_" and will be validated by server. - map<string, string> attributes = 10; + } + + // List of consumer read rules for this topic. + repeated ReadRule read_rules = 9 [(size).le = 3000]; + + // User and server attributes of topic. Server attributes starts from "_" and will be validated by server. + map<string, string> attributes = 10; // Message for remote mirror rule description. message RemoteMirrorRule { @@ -1202,66 +1202,66 @@ message TopicSettings { } // remote mirror rule for this topic. RemoteMirrorRule remote_mirror_rule = 11; -} - -/** - * Create topic request sent from client to server. - */ - -message CreateTopicRequest { - Ydb.Operations.OperationParams operation_params = 1; - // Topic path. - string path = 2 [(required) = true]; - // Topic settings. - TopicSettings settings = 4; -} - - -/** - * Create topic response sent from server to client. If topic is already exists then response status will be "ALREADY_EXISTS". - */ - -message CreateTopicResponse { - // Result of request will be inside operation. - Ydb.Operations.Operation operation = 1; -} - -/** - * Create topic result message that will be inside CreateTopicResponse.operation. - */ - -message CreateTopicResult { -} - -/** - * Update existing topic request sent from client to server. - */ - -message AlterTopicRequest { - Ydb.Operations.OperationParams operation_params = 1; - // Topic path. - string path = 2 [(required) = true]; - // New topic settings to be set. All options inside should be set despite same value. - TopicSettings settings = 4; -} - - -/** - * Update topic response sent from server to client. - */ - -message AlterTopicResponse { - // Result of request will be inside operation. - Ydb.Operations.Operation operation = 1; -} - -/** - * Update topic result message that will be inside UpdateTopicResponse.operation. - */ -message AlterTopicResult { -} - -/** +} + +/** + * Create topic request sent from client to server. + */ + +message CreateTopicRequest { + Ydb.Operations.OperationParams operation_params = 1; + // Topic path. + string path = 2 [(required) = true]; + // Topic settings. + TopicSettings settings = 4; +} + + +/** + * Create topic response sent from server to client. If topic is already exists then response status will be "ALREADY_EXISTS". + */ + +message CreateTopicResponse { + // Result of request will be inside operation. + Ydb.Operations.Operation operation = 1; +} + +/** + * Create topic result message that will be inside CreateTopicResponse.operation. + */ + +message CreateTopicResult { +} + +/** + * Update existing topic request sent from client to server. + */ + +message AlterTopicRequest { + Ydb.Operations.OperationParams operation_params = 1; + // Topic path. + string path = 2 [(required) = true]; + // New topic settings to be set. All options inside should be set despite same value. + TopicSettings settings = 4; +} + + +/** + * Update topic response sent from server to client. + */ + +message AlterTopicResponse { + // Result of request will be inside operation. + Ydb.Operations.Operation operation = 1; +} + +/** + * Update topic result message that will be inside UpdateTopicResponse.operation. + */ +message AlterTopicResult { +} + +/** * Add read rules for existing topic request. */ message AddReadRuleRequest { @@ -1314,34 +1314,34 @@ message RemoveReadRuleResult { /** - * Describe topic request sent from client to server. - */ - -message DescribeTopicRequest { - Ydb.Operations.OperationParams operation_params = 1; - // Topic path. - string path = 2 [(required) = true]; -} - - -/** - * Describe topic response sent from server to client. If topic is not existed then response status will be "SCHEME_ERROR". - */ - -message DescribeTopicResponse { - // Result of request will be inside operation. - Ydb.Operations.Operation operation = 1; -} - -/** - * Describe topic result message that will be inside DescribeTopicResponse.operation. - */ - -message DescribeTopicResult { - // Topic path. - Ydb.Scheme.Entry self = 1; - // Settings of topic. - TopicSettings settings = 2; - -} + * Describe topic request sent from client to server. + */ + +message DescribeTopicRequest { + Ydb.Operations.OperationParams operation_params = 1; + // Topic path. + string path = 2 [(required) = true]; +} + + +/** + * Describe topic response sent from server to client. If topic is not existed then response status will be "SCHEME_ERROR". + */ + +message DescribeTopicResponse { + // Result of request will be inside operation. + Ydb.Operations.Operation operation = 1; +} + +/** + * Describe topic result message that will be inside DescribeTopicResponse.operation. + */ + +message DescribeTopicResult { + // Topic path. + Ydb.Scheme.Entry self = 1; + // Settings of topic. + TopicSettings settings = 2; + +} diff --git a/ydb/public/lib/base/msgbus.h b/ydb/public/lib/base/msgbus.h index 6f43406d72..f11bd9771f 100644 --- a/ydb/public/lib/base/msgbus.h +++ b/ydb/public/lib/base/msgbus.h @@ -48,11 +48,11 @@ enum { MTYPE_CLIENT_FLAT_TX_STATUS_REQUEST = 10434, MTYPE_CLIENT_OLD_FLAT_DESCRIBE_REQUEST = 10435, // deprecated MTYPE_CLIENT_OLD_FLAT_DESCRIBE_RESPONSE = 10436, // deprecated - MTYPE_CLIENT_CREATE_TABLET = 10437, + MTYPE_CLIENT_CREATE_TABLET = 10437, MTYPE_CLIENT_LOAD_REQUEST = 10438, MTYPE_CLIENT_LOAD_RESPONSE = 10439, MTYPE_CLIENT_DIRECT_REQUEST_JOB_EXECUTION_STATUS = 10440, // deprecated - MTYPE_CLIENT_PERSQUEUE = 10441, + MTYPE_CLIENT_PERSQUEUE = 10441, MTYPE_CLIENT_DB_SCHEMA = 10443, MTYPE_CLIENT_DB_OPERATION = 10444, MTYPE_CLIENT_DB_RESPONSE = 10445, @@ -76,7 +76,7 @@ enum { MTYPE_CLIENT_CMS_REQUEST = 10465, MTYPE_CLIENT_CMS_RESPONSE = 10466, MTYPE_CLIENT_RESOURCE_BROKER_SET_CONFIG = 10467, - MTYPE_CLIENT_CHOOSE_PROXY = 10468, + MTYPE_CLIENT_CHOOSE_PROXY = 10468, MTYPE_CLIENT_SQS_REQUEST = 10469, MTYPE_CLIENT_SQS_RESPONSE = 10470, MTYPE_CLIENT_WHOAMI = 10471, @@ -140,7 +140,7 @@ struct TBusNodeRegistrationRequest : TBusMessage<TBusNodeRegistrationRequest, NK struct TBusNodeRegistrationResponse : TBusMessage<TBusNodeRegistrationResponse, NKikimrClient::TNodeRegistrationResponse, MTYPE_CLIENT_NODE_REGISTRATION_RESPONSE> {}; struct TBusCmsRequest : TBusMessage<TBusCmsRequest, NKikimrClient::TCmsRequest, MTYPE_CLIENT_CMS_REQUEST> {}; struct TBusCmsResponse : TBusMessage<TBusCmsResponse, NKikimrClient::TCmsResponse, MTYPE_CLIENT_CMS_RESPONSE> {}; -struct TBusChooseProxy : TBusMessage<TBusChooseProxy, NKikimrClient::TChooseProxyRequest, MTYPE_CLIENT_CHOOSE_PROXY> {}; +struct TBusChooseProxy : TBusMessage<TBusChooseProxy, NKikimrClient::TChooseProxyRequest, MTYPE_CLIENT_CHOOSE_PROXY> {}; struct TBusSqsRequest : TBusMessage<TBusSqsRequest, NKikimrClient::TSqsRequest, MTYPE_CLIENT_SQS_REQUEST> {}; struct TBusSqsResponse : TBusMessage<TBusSqsResponse, NKikimrClient::TSqsResponse, MTYPE_CLIENT_SQS_RESPONSE> {}; struct TBusWhoAmI : TBusMessage<TBusWhoAmI, NKikimrClient::TWhoAmI, MTYPE_CLIENT_WHOAMI> {}; @@ -210,7 +210,7 @@ public: RegisterType(new TBusKeyValue); RegisterType(new TBusOldKeyValue); RegisterType(new TBusKeyValueResponse); - RegisterType(new TBusPersQueue); + RegisterType(new TBusPersQueue); RegisterType(new TBusTabletKillRequest); RegisterType(new TBusTabletStateRequest); RegisterType(new TBusTabletCountersRequest); @@ -237,7 +237,7 @@ public: RegisterType(new TBusNodeRegistrationResponse); RegisterType(new TBusCmsRequest); RegisterType(new TBusCmsResponse); - RegisterType(new TBusChooseProxy); + RegisterType(new TBusChooseProxy); RegisterType(new TBusWhoAmI); RegisterType(new TBusStreamRequest); RegisterType(new TBusS3ListingRequest); diff --git a/ydb/public/lib/jwt/jwt.cpp b/ydb/public/lib/jwt/jwt.cpp index 5de14ef3ec..cbf77c46ff 100644 --- a/ydb/public/lib/jwt/jwt.cpp +++ b/ydb/public/lib/jwt/jwt.cpp @@ -1,58 +1,58 @@ -#undef __STDC_FORMAT_MACROS -#include <contrib/libs/jwt-cpp/jwt.h> - -#include "jwt.h" - -#include <library/cpp/json/json_reader.h> -#include <util/string/builder.h> - -namespace NYdb { - -TStringType MakeSignedJwt(const TJwtParams& params, const TDuration& lifetime) { - // this constant works on all envs (internal IAM, preprod cloud, prod cloud) - // according to potamus@, it's recommended audience - static const TStringType AUDIENCE{"https://iam.api.cloud.yandex.net/iam/v1/tokens"}; - const auto now = std::chrono::system_clock::now(); - const auto expire = now + std::chrono::milliseconds(lifetime.MilliSeconds()); - const auto token = jwt::create() - .set_key_id(params.KeyId) - .set_issuer(params.AccountId) - .set_issued_at(now) - .set_audience(AUDIENCE) - .set_expires_at(expire) - .sign(jwt::algorithm::ps256(params.PubKey, params.PrivKey)); - return TStringType{token}; -} - -TJwtParams ParseJwtParams(const TStringType& jsonParamsStr) { - NJson::TJsonValue json; - NJson::ReadJsonTree(jsonParamsStr, &json, true); - auto map = json.GetMap(); - TJwtParams result; - auto iter = map.find("id"); - if (iter == map.end()) - ythrow yexception() << "doesn't have \"id\" key"; - result.KeyId = iter->second.GetString(); - iter = map.find("service_account_id"); - if (iter == map.end()) { - iter = map.find("user_account_id"); - if (iter == map.end()) { - ythrow yexception() << "doesn't have \"service_account_id\" nor \"user_account_id\" key"; - } - } else if (map.find("user_account_id") != map.end()) { - ythrow yexception() << "both \"service_account_id\" and \"user_account_id\" keys are provided"; - } - result.AccountId = iter->second.GetString(); - iter = map.find("public_key"); - if (iter == map.end()) - ythrow yexception() << "doesn't have \"public_key\" key"; - result.PubKey = iter->second.GetString(); - iter = map.find("private_key"); - if (iter == map.end()) - ythrow yexception() << "doesn't have \"private_key\" key"; - result.PrivKey = iter->second.GetString(); - return result; -} - -} // namespace NYdb - +#undef __STDC_FORMAT_MACROS +#include <contrib/libs/jwt-cpp/jwt.h> + +#include "jwt.h" + +#include <library/cpp/json/json_reader.h> +#include <util/string/builder.h> + +namespace NYdb { + +TStringType MakeSignedJwt(const TJwtParams& params, const TDuration& lifetime) { + // this constant works on all envs (internal IAM, preprod cloud, prod cloud) + // according to potamus@, it's recommended audience + static const TStringType AUDIENCE{"https://iam.api.cloud.yandex.net/iam/v1/tokens"}; + const auto now = std::chrono::system_clock::now(); + const auto expire = now + std::chrono::milliseconds(lifetime.MilliSeconds()); + const auto token = jwt::create() + .set_key_id(params.KeyId) + .set_issuer(params.AccountId) + .set_issued_at(now) + .set_audience(AUDIENCE) + .set_expires_at(expire) + .sign(jwt::algorithm::ps256(params.PubKey, params.PrivKey)); + return TStringType{token}; +} + +TJwtParams ParseJwtParams(const TStringType& jsonParamsStr) { + NJson::TJsonValue json; + NJson::ReadJsonTree(jsonParamsStr, &json, true); + auto map = json.GetMap(); + TJwtParams result; + auto iter = map.find("id"); + if (iter == map.end()) + ythrow yexception() << "doesn't have \"id\" key"; + result.KeyId = iter->second.GetString(); + iter = map.find("service_account_id"); + if (iter == map.end()) { + iter = map.find("user_account_id"); + if (iter == map.end()) { + ythrow yexception() << "doesn't have \"service_account_id\" nor \"user_account_id\" key"; + } + } else if (map.find("user_account_id") != map.end()) { + ythrow yexception() << "both \"service_account_id\" and \"user_account_id\" keys are provided"; + } + result.AccountId = iter->second.GetString(); + iter = map.find("public_key"); + if (iter == map.end()) + ythrow yexception() << "doesn't have \"public_key\" key"; + result.PubKey = iter->second.GetString(); + iter = map.find("private_key"); + if (iter == map.end()) + ythrow yexception() << "doesn't have \"private_key\" key"; + result.PrivKey = iter->second.GetString(); + return result; +} + +} // namespace NYdb + diff --git a/ydb/public/lib/jwt/jwt.h b/ydb/public/lib/jwt/jwt.h index 46d0f9a90b..6d60e43cfe 100644 --- a/ydb/public/lib/jwt/jwt.h +++ b/ydb/public/lib/jwt/jwt.h @@ -1,21 +1,21 @@ -#pragma once - +#pragma once + #include <ydb/public/sdk/cpp/client/impl/ydb_internal/common/type_switcher.h> -#include <util/datetime/base.h> - -namespace NYdb { - -struct TJwtParams { - TStringType PrivKey; - TStringType PubKey; - TStringType AccountId; - TStringType KeyId; -}; - -TJwtParams ParseJwtParams(const TStringType& jsonParamsStr); -TStringType MakeSignedJwt( - const TJwtParams& params, - const TDuration& lifetime = TDuration::Hours(1) -); - -} // namespace NYdb +#include <util/datetime/base.h> + +namespace NYdb { + +struct TJwtParams { + TStringType PrivKey; + TStringType PubKey; + TStringType AccountId; + TStringType KeyId; +}; + +TJwtParams ParseJwtParams(const TStringType& jsonParamsStr); +TStringType MakeSignedJwt( + const TJwtParams& params, + const TDuration& lifetime = TDuration::Hours(1) +); + +} // namespace NYdb diff --git a/ydb/public/lib/jwt/ya.make b/ydb/public/lib/jwt/ya.make index 1aeb73f65d..f902b7223a 100644 --- a/ydb/public/lib/jwt/ya.make +++ b/ydb/public/lib/jwt/ya.make @@ -1,20 +1,20 @@ -LIBRARY() - -OWNER( +LIBRARY() + +OWNER( alexnick - komels - g:kikimr -) - -SRCS( - jwt.cpp - jwt.h -) - -PEERDIR( + komels + g:kikimr +) + +SRCS( + jwt.cpp + jwt.h +) + +PEERDIR( contrib/libs/jwt-cpp - library/cpp/json + library/cpp/json ydb/public/sdk/cpp/client/impl/ydb_internal/common -) - -END() +) + +END() diff --git a/ydb/public/lib/ydb_cli/commands/ydb_service_stream.cpp b/ydb/public/lib/ydb_cli/commands/ydb_service_stream.cpp index 5b167d3238..24b478f881 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_service_stream.cpp +++ b/ydb/public/lib/ydb_cli/commands/ydb_service_stream.cpp @@ -265,4 +265,4 @@ namespace NYdb::NConsoleClient { ThrowOnError(status); return EXIT_SUCCESS; } -}// namespace NYdb::NConsoleClient +}// namespace NYdb::NConsoleClient diff --git a/ydb/public/lib/ydb_cli/commands/ydb_service_stream.h b/ydb/public/lib/ydb_cli/commands/ydb_service_stream.h index 01eb34393f..f16d3b39b7 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_service_stream.h +++ b/ydb/public/lib/ydb_cli/commands/ydb_service_stream.h @@ -87,4 +87,4 @@ namespace NYdb::NConsoleClient { private: TString ConsumerName_; }; -}// namespace NYdb::NConsoleClient +}// namespace NYdb::NConsoleClient diff --git a/ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.cpp b/ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.cpp index e257529536..f8297f4323 100644 --- a/ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.cpp +++ b/ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.cpp @@ -83,7 +83,7 @@ namespace NYdb::NDataStreams::V1 { [&](Ydb::DataStreams::V1::ListStreamsRequest& req) { req.set_exclusive_start_stream_name(settings.ExclusiveStartStreamName_); req.set_limit(settings.Limit_); - req.set_recurse(settings.Recurse_); + req.set_recurse(settings.Recurse_); }); } diff --git a/ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.h b/ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.h index 84a878283a..783f51080d 100644 --- a/ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.h +++ b/ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.h @@ -102,7 +102,7 @@ namespace NYdb::NDataStreams::V1 { struct TListStreamsSettings : public NYdb::TOperationRequestSettings<TListStreamsSettings> { FLUENT_SETTING(ui32, Limit); FLUENT_SETTING(TString, ExclusiveStartStreamName); - FLUENT_SETTING_DEFAULT(bool, Recurse, false); + FLUENT_SETTING_DEFAULT(bool, Recurse, false); }; struct TDeleteStreamSettings : public NYdb::TOperationRequestSettings<TDeleteStreamSettings> { FLUENT_SETTING_DEFAULT(bool, EnforceConsumerDeletion, false); diff --git a/ydb/public/sdk/cpp/client/ydb_driver/driver.cpp b/ydb/public/sdk/cpp/client/ydb_driver/driver.cpp index 2ca4b965ff..de17c6e679 100644 --- a/ydb/public/sdk/cpp/client/ydb_driver/driver.cpp +++ b/ydb/public/sdk/cpp/client/ydb_driver/driver.cpp @@ -42,7 +42,7 @@ public: bool GetDrinOnDtors() const override { return DrainOnDtors; } TBalancingSettings GetBalancingSettings() const override { return BalancingSettings; } TDuration GetGRpcKeepAliveTimeout() const override { return GRpcKeepAliveTimeout; } - bool GetGRpcKeepAlivePermitWithoutCalls() const override { return GRpcKeepAlivePermitWithoutCalls; } + bool GetGRpcKeepAlivePermitWithoutCalls() const override { return GRpcKeepAlivePermitWithoutCalls; } TDuration GetSocketIdleTimeout() const override { return SocketIdleTimeout; } ui64 GetMemoryQuota() const override { return MemoryQuota; } const TLog& GetLog() const override { return Log; } @@ -67,7 +67,7 @@ public: bool DrainOnDtors = true; TBalancingSettings BalancingSettings = TBalancingSettings{EBalancingPolicy::UsePreferableLocation, TStringType()}; TDuration GRpcKeepAliveTimeout; - bool GRpcKeepAlivePermitWithoutCalls = false; + bool GRpcKeepAlivePermitWithoutCalls = false; TDuration SocketIdleTimeout = TDuration::Minutes(6); ui64 MemoryQuota = 0; TLog Log; // Null by default. @@ -162,11 +162,11 @@ TDriverConfig& TDriverConfig::SetGRpcKeepAliveTimeout(TDuration timeout) { return *this; } -TDriverConfig& TDriverConfig::SetGRpcKeepAlivePermitWithoutCalls(bool permitWithoutCalls) { - Impl_->GRpcKeepAlivePermitWithoutCalls = permitWithoutCalls; - return *this; -} - +TDriverConfig& TDriverConfig::SetGRpcKeepAlivePermitWithoutCalls(bool permitWithoutCalls) { + Impl_->GRpcKeepAlivePermitWithoutCalls = permitWithoutCalls; + return *this; +} + TDriverConfig& TDriverConfig::SetSocketIdleTimeout(TDuration timeout) { Impl_->SocketIdleTimeout = timeout; return *this; diff --git a/ydb/public/sdk/cpp/client/ydb_driver/driver.h b/ydb/public/sdk/cpp/client/ydb_driver/driver.h index 3f25f8eea8..39c91d8852 100644 --- a/ydb/public/sdk/cpp/client/ydb_driver/driver.h +++ b/ydb/public/sdk/cpp/client/ydb_driver/driver.h @@ -96,14 +96,14 @@ public: //! even with fast network //! default: disabled TDriverConfig& SetGRpcKeepAliveTimeout(TDuration timeout); - TDriverConfig& SetGRpcKeepAlivePermitWithoutCalls(bool permitWithoutCalls); + TDriverConfig& SetGRpcKeepAlivePermitWithoutCalls(bool permitWithoutCalls); //! Set inactive socket timeout. //! Used to close connections, that were inactive for given time. //! Closes unused connections every 1/10 of timeout, so deletion time is approximate. //! Use TDuration::Max() to disable. //! default: 6 minutes TDriverConfig& SetSocketIdleTimeout(TDuration timeout); - + //! Log backend. TDriverConfig& SetLog(THolder<TLogBackend> log); private: diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/common.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/common.h index efc758ef85..7287b69894 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/common.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/common.h @@ -18,8 +18,8 @@ void Cancel(NGrpc::IQueueClientContextPtr& context); NYql::TIssues MakeIssueWithSubIssues(const TString& description, const NYql::TIssues& subissues); -TString IssuesSingleLineString(const NYql::TIssues& issues); - +TString IssuesSingleLineString(const NYql::TIssues& issues); + size_t CalcDataSize(const TReadSessionEvent::TEvent& event); template <class TMessage> @@ -184,8 +184,8 @@ std::shared_ptr<ISessionConnectionProcessorFactory<TRequest, TResponse>> return std::make_shared<TSessionConnectionProcessorFactory<TService, TRequest, TResponse>>(rpc, std::move(connections), std::move(dbState)); } - - + + template <class TEvent_> struct TBaseEventInfo { using TEvent = TEvent_; @@ -205,66 +205,66 @@ struct TBaseEventInfo { {} }; - -class ISignalable { -public: - ISignalable() = default; - virtual ~ISignalable() {} - virtual void Signal() = 0; -}; - -// Waiter on queue. -// Future or GetEvent call -class TWaiter { -public: - TWaiter() = default; - - TWaiter(const TWaiter&) = delete; - TWaiter& operator=(const TWaiter&) = delete; - TWaiter(TWaiter&&) = default; - TWaiter& operator=(TWaiter&&) = default; - - TWaiter(NThreading::TPromise<void>&& promise, ISignalable* self) - : Promise(promise) - , Future(promise.Initialized() ? Promise.GetFuture() : NThreading::TFuture<void>()) - , Self(self) - { - } - - void Signal() { - if (Self) { - Self->Signal(); - } - if (Promise.Initialized() && !Promise.HasValue()) { - Promise.SetValue(); - } - } - - bool Valid() const { - if (!Future.Initialized()) return false; - return !Promise.Initialized() || Promise.GetFuture().StateId() == Future.StateId(); - } - - NThreading::TPromise<void> ExtractPromise() { - NThreading::TPromise<void> promise; - Y_VERIFY(!promise.Initialized()); - std::swap(Promise, promise); - return promise; - } - - NThreading::TFuture<void> GetFuture() { - Y_VERIFY(Future.Initialized()); - return Future; - } - -private: - NThreading::TPromise<void> Promise; - NThreading::TFuture<void> Future; - ISignalable* Self = nullptr; -}; - - - + +class ISignalable { +public: + ISignalable() = default; + virtual ~ISignalable() {} + virtual void Signal() = 0; +}; + +// Waiter on queue. +// Future or GetEvent call +class TWaiter { +public: + TWaiter() = default; + + TWaiter(const TWaiter&) = delete; + TWaiter& operator=(const TWaiter&) = delete; + TWaiter(TWaiter&&) = default; + TWaiter& operator=(TWaiter&&) = default; + + TWaiter(NThreading::TPromise<void>&& promise, ISignalable* self) + : Promise(promise) + , Future(promise.Initialized() ? Promise.GetFuture() : NThreading::TFuture<void>()) + , Self(self) + { + } + + void Signal() { + if (Self) { + Self->Signal(); + } + if (Promise.Initialized() && !Promise.HasValue()) { + Promise.SetValue(); + } + } + + bool Valid() const { + if (!Future.Initialized()) return false; + return !Promise.Initialized() || Promise.GetFuture().StateId() == Future.StateId(); + } + + NThreading::TPromise<void> ExtractPromise() { + NThreading::TPromise<void> promise; + Y_VERIFY(!promise.Initialized()); + std::swap(Promise, promise); + return promise; + } + + NThreading::TFuture<void> GetFuture() { + Y_VERIFY(Future.Initialized()); + return Future; + } + +private: + NThreading::TPromise<void> Promise; + NThreading::TFuture<void> Future; + ISignalable* Self = nullptr; +}; + + + // Class that is responsible for: // - events queue; // - signalling futures that wait for events; @@ -272,7 +272,7 @@ private: // - waking up waiters. // Thread safe. template <class TSettings_, class TEvent_, class TEventInfo_ = TBaseEventInfo<TEvent_>> -class TBaseSessionEventsQueue : public ISignalable { +class TBaseSessionEventsQueue : public ISignalable { protected: using TSelf = TBaseSessionEventsQueue<TSettings_, TEvent_, TEventInfo_>; using TSettings = TSettings_; @@ -316,7 +316,7 @@ protected: }); } - virtual void Post(const IExecutor::TPtr& executor, IExecutor::TFunction&& f) { + virtual void Post(const IExecutor::TPtr& executor, IExecutor::TFunction&& f) { executor->Post(std::move(f)); } @@ -324,28 +324,28 @@ protected: TEventInfo& EventInfo; }; - + public: TBaseSessionEventsQueue(const TSettings& settings) : Settings(settings) - , Waiter(NThreading::NewPromise<void>(), this) + , Waiter(NThreading::NewPromise<void>(), this) {} virtual ~TBaseSessionEventsQueue() = default; - - void Signal() override { - CondVar.Signal(); - } - + + void Signal() override { + CondVar.Signal(); + } + protected: virtual bool HasEventsImpl() const { // Assumes that we're under lock. return !Events.empty() || CloseEvent; } TWaiter PopWaiterImpl() { // Assumes that we're under lock. - TWaiter waiter(Waiter.ExtractPromise(), this); - return std::move(waiter); + TWaiter waiter(Waiter.ExtractPromise(), this); + return std::move(waiter); } void WaitEventsImpl() { // Assumes that we're under lock. Posteffect: HasEventsImpl() is true. @@ -354,28 +354,28 @@ protected: } } - void RenewWaiterImpl() { - if (Events.empty() && Waiter.GetFuture().HasValue()) { - Waiter = TWaiter(NThreading::NewPromise<void>(), this); - } - } - + void RenewWaiterImpl() { + if (Events.empty() && Waiter.GetFuture().HasValue()) { + Waiter = TWaiter(NThreading::NewPromise<void>(), this); + } + } + public: NThreading::TFuture<void> WaitEvent() { with_lock (Mutex) { if (HasEventsImpl()) { return NThreading::MakeFuture(); // Signalled } else { - Y_VERIFY(Waiter.Valid()); - auto res = Waiter.GetFuture(); - return res; + Y_VERIFY(Waiter.Valid()); + auto res = Waiter.GetFuture(); + return res; } } } protected: const TSettings& Settings; - TWaiter Waiter; + TWaiter Waiter; std::queue<TEventInfo> Events; TCondVar CondVar; TMutex Mutex; diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue.cpp index ac1a6e6803..a22ed23cea 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue.cpp @@ -89,10 +89,10 @@ TDescribeTopicResult::TTopicSettings::TTopicSettings(const Ydb::PersQueue::V1::T MaxPartitionWriteSpeed_ = settings.max_partition_write_speed(); MaxPartitionWriteBurst_ = settings.max_partition_write_burst(); ClientWriteDisabled_ = settings.client_write_disabled(); - AllowUnauthenticatedRead_ = AllowUnauthenticatedWrite_ = false; - AbcId_ = 0; - AbcSlug_ = ""; - + AllowUnauthenticatedRead_ = AllowUnauthenticatedWrite_ = false; + AbcId_ = 0; + AbcSlug_ = ""; + for (auto& pair : settings.attributes()) { if (pair.first == "_partitions_per_tablet") { PartitionsPerTablet_ = FromString<ui32>(pair.second); @@ -100,10 +100,10 @@ TDescribeTopicResult::TTopicSettings::TTopicSettings(const Ydb::PersQueue::V1::T AllowUnauthenticatedRead_ = FromString<bool>(pair.second); } else if (pair.first == "_allow_unauthenticated_write") { AllowUnauthenticatedWrite_ = FromString<bool>(pair.second); - } else if (pair.first == "_abc_id") { - AbcId_ = FromString<ui32>(pair.second); - } else if (pair.first == "_abc_slug") { - AbcSlug_ = pair.second; + } else if (pair.first == "_abc_id") { + AbcId_ = FromString<ui32>(pair.second); + } else if (pair.first == "_abc_slug") { + AbcSlug_ = pair.second; } } for (const auto& readRule : settings.read_rules()) { diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue_impl.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue_impl.cpp index 70f7d6b981..bd466fd6f3 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue_impl.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue_impl.cpp @@ -1,4 +1,4 @@ -#include "persqueue_impl.h" +#include "persqueue_impl.h" #include "read_session.h" #include "write_session.h" @@ -26,7 +26,7 @@ std::shared_ptr<IWriteSession> TPersQueueClient::TImpl::CreateWriteSession( const TWriteSessionSettings& settings ) { TMaybe<TWriteSessionSettings> maybeSettings; - if (!settings.CompressionExecutor_ || !settings.EventHandlers_.HandlersExecutor_ || !settings.ClusterDiscoveryMode_) { + if (!settings.CompressionExecutor_ || !settings.EventHandlers_.HandlersExecutor_ || !settings.ClusterDiscoveryMode_) { maybeSettings = settings; with_lock (Lock) { if (!settings.CompressionExecutor_) { @@ -35,9 +35,9 @@ std::shared_ptr<IWriteSession> TPersQueueClient::TImpl::CreateWriteSession( if (!settings.EventHandlers_.HandlersExecutor_) { maybeSettings->EventHandlers_.HandlersExecutor(Settings.DefaultHandlersExecutor_); } - if (!settings.ClusterDiscoveryMode_) { - maybeSettings->ClusterDiscoveryMode(Settings.ClusterDiscoveryMode_); - } + if (!settings.ClusterDiscoveryMode_) { + maybeSettings->ClusterDiscoveryMode(Settings.ClusterDiscoveryMode_); + } } } auto session = std::make_shared<TWriteSession>( diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue_impl.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue_impl.h index dc68409897..57e9dd43bf 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue_impl.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/persqueue_impl.h @@ -64,8 +64,8 @@ public: (*props.mutable_attributes())["_partitions_per_tablet"] = TStringBuilder() << settings.PartitionsPerTablet_; (*props.mutable_attributes())["_allow_unauthenticated_read"] = settings.AllowUnauthenticatedRead_ ? "true" : "false"; (*props.mutable_attributes())["_allow_unauthenticated_write"] = settings.AllowUnauthenticatedWrite_ ? "true" : "false"; - if (settings.AbcId_) (*props.mutable_attributes())["_abc_id"] = TStringBuilder() << *settings.AbcId_; - if (settings.AbcSlug_) (*props.mutable_attributes())["_abc_slug"] = TStringBuilder() << *settings.AbcSlug_; + if (settings.AbcId_) (*props.mutable_attributes())["_abc_id"] = TStringBuilder() << *settings.AbcId_; + if (settings.AbcSlug_) (*props.mutable_attributes())["_abc_slug"] = TStringBuilder() << *settings.AbcSlug_; for (const auto& readRule : settings.ReadRules_) { diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.cpp index b5c75d9631..e7dd0a87e5 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.cpp @@ -1,4 +1,4 @@ -#include "persqueue_impl.h" +#include "persqueue_impl.h" #include "read_session.h" #include "common.h" @@ -12,7 +12,7 @@ #include <util/generic/utility.h> #include <util/generic/yexception.h> #include <util/stream/mem.h> -#include <util/system/env.h> +#include <util/system/env.h> #include <variant> @@ -20,8 +20,8 @@ namespace NYdb::NPersQueue { static const TString DRIVER_IS_STOPPING_DESCRIPTION = "Driver is stopping"; -static const bool RangesMode = !GetEnv("PQ_OFFSET_RANGES_MODE").empty(); - +static const bool RangesMode = !GetEnv("PQ_OFFSET_RANGES_MODE").empty(); + std::pair<ui64, ui64> GetMessageOffsetRange(const TReadSessionEvent::TDataReceivedEvent& dataReceivedEvent, ui64 index) { if (dataReceivedEvent.IsCompressedMessages()) { const auto& msg = dataReceivedEvent.GetCompressedMessages()[index]; @@ -100,11 +100,11 @@ void TReadSession::Start() { } Log << TLOG_INFO << "Starting read session"; - if (Settings.DisableClusterDiscovery_) { - ProceedWithoutClusterDiscovery(); - } else { - StartClusterDiscovery(); - } + if (Settings.DisableClusterDiscovery_) { + ProceedWithoutClusterDiscovery(); + } else { + StartClusterDiscovery(); + } } bool TReadSession::ValidateSettings() { @@ -166,61 +166,61 @@ void TReadSession::StartClusterDiscovery() { /*ClientTimeout_*/TDuration::Seconds(5)); // TODO: make client timeout setting } - -void TReadSession::ProceedWithoutClusterDiscovery() { - TDeferredActions deferred; - with_lock (Lock) { - if (Aborting) { - return; - } - - TString normalizedName = "null"; - THashMap<TString, TClusterSessionInfo>::iterator clusterSessionInfoIter; - clusterSessionInfoIter = ClusterSessions.emplace(normalizedName, normalizedName).first; - TClusterSessionInfo& clusterSessionInfo = clusterSessionInfoIter->second; - clusterSessionInfo.ClusterEndpoint = DbDriverState->DiscoveryEndpoint; + +void TReadSession::ProceedWithoutClusterDiscovery() { + TDeferredActions deferred; + with_lock (Lock) { + if (Aborting) { + return; + } + + TString normalizedName = "null"; + THashMap<TString, TClusterSessionInfo>::iterator clusterSessionInfoIter; + clusterSessionInfoIter = ClusterSessions.emplace(normalizedName, normalizedName).first; + TClusterSessionInfo& clusterSessionInfo = clusterSessionInfoIter->second; + clusterSessionInfo.ClusterEndpoint = DbDriverState->DiscoveryEndpoint; clusterSessionInfo.Topics = Settings.Topics_; - CreateClusterSessionsImpl(); - } - ScheduleDumpCountersToLog(); -} - -void TReadSession::CreateClusterSessionsImpl() { - TDeferredActions deferred; - // Create cluster sessions. - ui64 partitionStreamIdStart = 1; - const size_t clusterSessionsCount = ClusterSessions.size(); - for (auto& [clusterName, clusterSessionInfo] : ClusterSessions) { - TReadSessionSettings sessionSettings = Settings; - sessionSettings.Topics_ = clusterSessionInfo.Topics; - if (sessionSettings.MaxMemoryUsageBytes_ > clusterSessionsCount && sessionSettings.MaxMemoryUsageBytes_ != std::numeric_limits<size_t>::max()) { - sessionSettings.MaxMemoryUsageBytes_ /= clusterSessionsCount; - } - Log << TLOG_DEBUG << "Starting session to cluster " << clusterName << " (" << clusterSessionInfo.ClusterEndpoint << ")"; - auto subclient = Client->GetClientForEndpoint(clusterSessionInfo.ClusterEndpoint); - auto context = subclient->CreateContext(); - if (!context) { - AbortImpl(EStatus::ABORTED, DRIVER_IS_STOPPING_DESCRIPTION, deferred); - return; - } - TStringBuilder logPrefix; - logPrefix << GetDatabaseLogPrefix(DbDriverState->Database) << "[" << SessionId << "] [" << clusterName << "] "; - TLog log = Log; - log.SetFormatter(GetPrefixLogFormatter(logPrefix)); - clusterSessionInfo.Session = - std::make_shared<TSingleClusterReadSessionImpl>( - sessionSettings, - clusterName, - log, - subclient->CreateReadSessionConnectionProcessorFactory(), - EventsQueue, - ErrorHandler, - context, - partitionStreamIdStart++, clusterSessionsCount); - clusterSessionInfo.Session->Start(); - } -} - + CreateClusterSessionsImpl(); + } + ScheduleDumpCountersToLog(); +} + +void TReadSession::CreateClusterSessionsImpl() { + TDeferredActions deferred; + // Create cluster sessions. + ui64 partitionStreamIdStart = 1; + const size_t clusterSessionsCount = ClusterSessions.size(); + for (auto& [clusterName, clusterSessionInfo] : ClusterSessions) { + TReadSessionSettings sessionSettings = Settings; + sessionSettings.Topics_ = clusterSessionInfo.Topics; + if (sessionSettings.MaxMemoryUsageBytes_ > clusterSessionsCount && sessionSettings.MaxMemoryUsageBytes_ != std::numeric_limits<size_t>::max()) { + sessionSettings.MaxMemoryUsageBytes_ /= clusterSessionsCount; + } + Log << TLOG_DEBUG << "Starting session to cluster " << clusterName << " (" << clusterSessionInfo.ClusterEndpoint << ")"; + auto subclient = Client->GetClientForEndpoint(clusterSessionInfo.ClusterEndpoint); + auto context = subclient->CreateContext(); + if (!context) { + AbortImpl(EStatus::ABORTED, DRIVER_IS_STOPPING_DESCRIPTION, deferred); + return; + } + TStringBuilder logPrefix; + logPrefix << GetDatabaseLogPrefix(DbDriverState->Database) << "[" << SessionId << "] [" << clusterName << "] "; + TLog log = Log; + log.SetFormatter(GetPrefixLogFormatter(logPrefix)); + clusterSessionInfo.Session = + std::make_shared<TSingleClusterReadSessionImpl>( + sessionSettings, + clusterName, + log, + subclient->CreateReadSessionConnectionProcessorFactory(), + EventsQueue, + ErrorHandler, + context, + partitionStreamIdStart++, clusterSessionsCount); + clusterSessionInfo.Session->Start(); + } +} + void TReadSession::OnClusterDiscovery(const TStatus& status, const Ydb::PersQueue::ClusterDiscovery::DiscoverClustersResult& result) { TDeferredActions deferred; with_lock (Lock) { @@ -313,7 +313,7 @@ void TReadSession::OnClusterDiscovery(const TStatus& status, const Ydb::PersQueu return; } - CreateClusterSessionsImpl(); + CreateClusterSessionsImpl(); } ScheduleDumpCountersToLog(); } @@ -340,10 +340,10 @@ void TReadSession::RestartClusterDiscoveryImpl(TDuration delay, TDeferredActions bool TReadSession::Close(TDuration timeout) { Log << TLOG_INFO << "Closing read session. Close timeout: " << timeout; - with_lock (Lock) { - Cancel(ClusterDiscoveryDelayContext); - Cancel(DumpCountersContext); - } + with_lock (Lock) { + Cancel(ClusterDiscoveryDelayContext); + Cancel(DumpCountersContext); + } // Log final counters. DumpCountersToLog(); @@ -424,7 +424,7 @@ bool TReadSession::Close(TDuration timeout) { void TReadSession::AbortImpl(TSessionClosedEvent&& closeEvent, TDeferredActions& deferred) { if (!Aborting) { Aborting = true; - Log << TLOG_NOTICE << "Aborting read session. Description: " << closeEvent.DebugString(); + Log << TLOG_NOTICE << "Aborting read session. Description: " << closeEvent.DebugString(); Cancel(ClusterDiscoveryDelayContext); Cancel(DumpCountersContext); for (auto& [cluster, sessionInfo] : ClusterSessions) { @@ -603,9 +603,9 @@ void TReadSession::DumpCountersToLog(size_t timeNumber) { } void TReadSession::ScheduleDumpCountersToLog(size_t timeNumber) { - with_lock(Lock) { - DumpCountersContext = Connections->CreateContext(); - } + with_lock(Lock) { + DumpCountersContext = Connections->CreateContext(); + } if (DumpCountersContext) { auto callback = [self = weak_from_this(), timeNumber](bool ok) { if (ok) { @@ -630,23 +630,23 @@ TLog TPartitionStreamImpl::GetLog() const { } void TPartitionStreamImpl::Commit(ui64 startOffset, ui64 endOffset) { - std::vector<std::pair<ui64, ui64>> toCommit; + std::vector<std::pair<ui64, ui64>> toCommit; if (auto sessionShared = Session.lock()) { - Y_VERIFY(endOffset > startOffset); - with_lock(sessionShared->Lock) { - if (!AddToCommitRanges(startOffset, endOffset, true)) // Add range for real commit always. - return; - - Y_VERIFY(!Commits.Empty()); - for (auto c : Commits) { - if (c.first >= endOffset) break; // Commit only gaps before client range. - toCommit.emplace_back(c); - } - Commits.EraseInterval(0, endOffset); // Drop only committed ranges; - } - for (auto range: toCommit) { - sessionShared->Commit(this, range.first, range.second); - } + Y_VERIFY(endOffset > startOffset); + with_lock(sessionShared->Lock) { + if (!AddToCommitRanges(startOffset, endOffset, true)) // Add range for real commit always. + return; + + Y_VERIFY(!Commits.Empty()); + for (auto c : Commits) { + if (c.first >= endOffset) break; // Commit only gaps before client range. + toCommit.emplace_back(c); + } + Commits.EraseInterval(0, endOffset); // Drop only committed ranges; + } + for (auto range: toCommit) { + sessionShared->Commit(this, range.first, range.second); + } } } @@ -678,8 +678,8 @@ void TPartitionStreamImpl::ResumeReading() { void TPartitionStreamImpl::SignalReadyEvents(TReadSessionEventsQueue* queue, TDeferredActions& deferred) { for (auto& event : EventsQueue) { - event.Signal(this, queue, deferred); - + event.Signal(this, queue, deferred); + if (!event.IsReady()) { break; } @@ -864,7 +864,7 @@ void TSingleClusterReadSessionImpl::InitImpl(TDeferredActions& deferred) { // As Log << TLOG_DEBUG << "Successfully connected. Initializing session"; Ydb::PersQueue::V1::MigrationStreamingReadClientMessage req; auto& init = *req.mutable_init_request(); - init.set_ranges_mode(RangesMode); + init.set_ranges_mode(RangesMode); for (const TTopicReadSettings& topic : Settings.Topics_) { auto* topicSettings = init.add_topics_read_settings(); topicSettings->set_topic(topic.Path_); @@ -907,7 +907,7 @@ void TSingleClusterReadSessionImpl::ContinueReadingDataImpl() { // Assumes that } bool TSingleClusterReadSessionImpl::IsActualPartitionStreamImpl(const TPartitionStreamImpl* partitionStream) { // Assumes that we're under lock. - auto actualPartitionStreamIt = PartitionStreams.find(partitionStream->GetAssignId()); + auto actualPartitionStreamIt = PartitionStreams.find(partitionStream->GetAssignId()); return actualPartitionStreamIt != PartitionStreams.end() && actualPartitionStreamIt->second->GetPartitionStreamId() == partitionStream->GetPartitionStreamId(); } @@ -958,7 +958,7 @@ void TSingleClusterReadSessionImpl::ConfirmPartitionStreamDestroy(TPartitionStre } CookieMapping.RemoveMapping(partitionStream->GetPartitionStreamId()); - PartitionStreams.erase(partitionStream->GetAssignId()); + PartitionStreams.erase(partitionStream->GetAssignId()); EventsQueue->PushEvent({partitionStream, weak_from_this(), TReadSessionEvent::TPartitionStreamClosedEvent(partitionStream, TReadSessionEvent::TPartitionStreamClosedEvent::EReason::DestroyConfirmedByUser)}, deferred); Ydb::PersQueue::V1::MigrationStreamingReadClientMessage req; @@ -980,21 +980,21 @@ void TSingleClusterReadSessionImpl::Commit(const TPartitionStreamImpl* partition } Ydb::PersQueue::V1::MigrationStreamingReadClientMessage req; bool hasSomethingToCommit = false; - if (RangesMode) { - hasSomethingToCommit = true; - auto* range = req.mutable_commit()->add_offset_ranges(); - range->set_assign_id(partitionStream->GetAssignId()); - range->set_start_offset(startOffset); - range->set_end_offset(endOffset); - } else { - for (ui64 offset = startOffset; offset < endOffset; ++offset) { - TPartitionCookieMapping::TCookie::TPtr cookie = CookieMapping.CommitOffset(partitionStream->GetPartitionStreamId(), offset); - if (cookie) { - hasSomethingToCommit = true; - auto* cookieInfo = req.mutable_commit()->add_cookies(); - cookieInfo->set_assign_id(partitionStream->GetAssignId()); - cookieInfo->set_partition_cookie(cookie->Cookie); - } + if (RangesMode) { + hasSomethingToCommit = true; + auto* range = req.mutable_commit()->add_offset_ranges(); + range->set_assign_id(partitionStream->GetAssignId()); + range->set_start_offset(startOffset); + range->set_end_offset(endOffset); + } else { + for (ui64 offset = startOffset; offset < endOffset; ++offset) { + TPartitionCookieMapping::TCookie::TPtr cookie = CookieMapping.CommitOffset(partitionStream->GetPartitionStreamId(), offset); + if (cookie) { + hasSomethingToCommit = true; + auto* cookieInfo = req.mutable_commit()->add_cookies(); + cookieInfo->set_assign_id(partitionStream->GetAssignId()); + cookieInfo->set_partition_cookie(cookie->Cookie); + } } } if (hasSomethingToCommit) { @@ -1026,7 +1026,7 @@ void TSingleClusterReadSessionImpl::OnUserRetrievedEvent(const TReadSessionEvent const i64 bytesCount = static_cast<i64>(CalcDataSize(event)); Y_ASSERT(bytesCount >= 0); - if (!std::get_if<TReadSessionEvent::TDataReceivedEvent>(&event)) { // Event is not data event. + if (!std::get_if<TReadSessionEvent::TDataReceivedEvent>(&event)) { // Event is not data event. return; } @@ -1050,16 +1050,16 @@ void TSingleClusterReadSessionImpl::WriteToProcessorImpl(Ydb::PersQueue::V1::Mig } } -bool TSingleClusterReadSessionImpl::HasCommitsInflightImpl() const { - for (const auto& [id, partitionStream] : PartitionStreams) { - if (partitionStream->HasCommitsInflight()) - return true; - } - return false; -} - +bool TSingleClusterReadSessionImpl::HasCommitsInflightImpl() const { + for (const auto& [id, partitionStream] : PartitionStreams) { + if (partitionStream->HasCommitsInflight()) + return true; + } + return false; +} + void TSingleClusterReadSessionImpl::ReadFromProcessorImpl(TDeferredActions& deferred) { // Assumes that we're under lock. - if (Closing && !HasCommitsInflightImpl()) { + if (Closing && !HasCommitsInflightImpl()) { Processor->Cancel(); CallCloseCallbackImpl(); return; @@ -1154,13 +1154,13 @@ void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::Migration } UpdateMemoryUsageStatisticsImpl(); for (Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::PartitionData& partitionData : *msg.mutable_partition_data()) { - auto partitionStreamIt = PartitionStreams.find(partitionData.cookie().assign_id()); + auto partitionStreamIt = PartitionStreams.find(partitionData.cookie().assign_id()); if (partitionStreamIt == PartitionStreams.end()) { ++*Settings.Counters_->Errors; BreakConnectionAndReconnectImpl(EStatus::INTERNAL_ERROR, TStringBuilder() << "Got unexpected partition stream data message. Topic: " << partitionData.topic() - << ". Partition: " << partitionData.partition() << " AssignId: " << partitionData.cookie().assign_id(), + << ". Partition: " << partitionData.partition() << " AssignId: " << partitionData.cookie().assign_id(), deferred); return; } @@ -1170,21 +1170,21 @@ void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::Migration ui64 firstOffset = std::numeric_limits<ui64>::max(); ui64 currentOffset = std::numeric_limits<ui64>::max(); - ui64 desiredOffset = partitionStream->GetFirstNotReadOffset(); + ui64 desiredOffset = partitionStream->GetFirstNotReadOffset(); for (const Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::Batch& batch : partitionData.batches()) { // Validate messages. for (const Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::MessageData& messageData : batch.message_data()) { // Check offsets continuity. - if (messageData.offset() != desiredOffset) { - bool res = partitionStream->AddToCommitRanges(desiredOffset, messageData.offset(), RangesMode); - Y_VERIFY(res); - } - - if (firstOffset == std::numeric_limits<ui64>::max()) { + if (messageData.offset() != desiredOffset) { + bool res = partitionStream->AddToCommitRanges(desiredOffset, messageData.offset(), RangesMode); + Y_VERIFY(res); + } + + if (firstOffset == std::numeric_limits<ui64>::max()) { firstOffset = messageData.offset(); } currentOffset = messageData.offset(); - desiredOffset = currentOffset + 1; + desiredOffset = currentOffset + 1; partitionStream->UpdateMaxReadOffset(currentOffset); const i64 messageSize = static_cast<i64>(messageData.data().size()); CompressedDataSize += messageSize; @@ -1193,17 +1193,17 @@ void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::Migration ++*Settings.Counters_->MessagesInflight; } } - if (firstOffset == std::numeric_limits<ui64>::max()) { - BreakConnectionAndReconnectImpl(EStatus::INTERNAL_ERROR, - TStringBuilder() << "Got empty data message. Topic: " - << partitionData.topic() - << ". Partition: " << partitionData.partition() - << " message: " << msg, - deferred); - return; - } - cookie->SetOffsetRange(std::make_pair(firstOffset, desiredOffset)); - partitionStream->SetFirstNotReadOffset(desiredOffset); + if (firstOffset == std::numeric_limits<ui64>::max()) { + BreakConnectionAndReconnectImpl(EStatus::INTERNAL_ERROR, + TStringBuilder() << "Got empty data message. Topic: " + << partitionData.topic() + << ". Partition: " << partitionData.partition() + << " message: " << msg, + deferred); + return; + } + cookie->SetOffsetRange(std::make_pair(firstOffset, desiredOffset)); + partitionStream->SetFirstNotReadOffset(desiredOffset); if (!CookieMapping.AddMapping(cookie)) { BreakConnectionAndReconnectImpl(EStatus::INTERNAL_ERROR, TStringBuilder() << "Got unexpected data message. Topic: " @@ -1229,16 +1229,16 @@ void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::Migration auto partitionStream = MakeIntrusive<TPartitionStreamImpl>(NextPartitionStreamId, msg.topic().path(), msg.cluster(), - msg.partition() + 1, // Group. + msg.partition() + 1, // Group. msg.partition(), // Partition. msg.assign_id(), - msg.read_offset(), + msg.read_offset(), weak_from_this(), ErrorHandler); NextPartitionStreamId += PartitionStreamIdStep; // Renew partition stream. - TIntrusivePtr<TPartitionStreamImpl>& currentPartitionStream = PartitionStreams[partitionStream->GetAssignId()]; + TIntrusivePtr<TPartitionStreamImpl>& currentPartitionStream = PartitionStreams[partitionStream->GetAssignId()]; if (currentPartitionStream) { CookieMapping.RemoveMapping(currentPartitionStream->GetPartitionStreamId()); EventsQueue->PushEvent({currentPartitionStream, weak_from_this(), TReadSessionEvent::TPartitionStreamClosedEvent(currentPartitionStream, TReadSessionEvent::TPartitionStreamClosedEvent::EReason::Lost)}, deferred); @@ -1250,13 +1250,13 @@ void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::Migration } void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::Release&& msg, TDeferredActions& deferred) { // Assumes that we're under lock. - auto partitionStreamIt = PartitionStreams.find(msg.assign_id()); - if (partitionStreamIt == PartitionStreams.end()) { + auto partitionStreamIt = PartitionStreams.find(msg.assign_id()); + if (partitionStreamIt == PartitionStreams.end()) { return; } TIntrusivePtr<TPartitionStreamImpl> partitionStream = partitionStreamIt->second; if (msg.forceful_release()) { - PartitionStreams.erase(msg.assign_id()); + PartitionStreams.erase(msg.assign_id()); CookieMapping.RemoveMapping(partitionStream->GetPartitionStreamId()); EventsQueue->PushEvent({partitionStream, weak_from_this(), TReadSessionEvent::TPartitionStreamClosedEvent(partitionStream, TReadSessionEvent::TPartitionStreamClosedEvent::EReason::Lost)}, deferred); } else { @@ -1265,35 +1265,35 @@ void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::Migration } void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::Committed&& msg, TDeferredActions& deferred) { // Assumes that we're under lock. - - Log << TLOG_DEBUG << "Committed response: " << msg; - - TMap<ui64, TIntrusivePtr<TPartitionStreamImpl>> partitionStreams; + + Log << TLOG_DEBUG << "Committed response: " << msg; + + TMap<ui64, TIntrusivePtr<TPartitionStreamImpl>> partitionStreams; for (const Ydb::PersQueue::V1::CommitCookie& cookieProto : msg.cookies()) { TPartitionCookieMapping::TCookie::TPtr cookie = CookieMapping.RetrieveCommittedCookie(cookieProto); if (cookie) { - cookie->PartitionStream->UpdateMaxCommittedOffset(cookie->OffsetRange.second); - partitionStreams[cookie->PartitionStream->GetPartitionStreamId()] = cookie->PartitionStream; + cookie->PartitionStream->UpdateMaxCommittedOffset(cookie->OffsetRange.second); + partitionStreams[cookie->PartitionStream->GetPartitionStreamId()] = cookie->PartitionStream; } } - for (auto& [id, partitionStream] : partitionStreams) { - EventsQueue->PushEvent({partitionStream, weak_from_this(), TReadSessionEvent::TCommitAcknowledgementEvent(partitionStream, partitionStream->GetMaxCommittedOffset())}, deferred); - } - - for (const auto& rangeProto : msg.offset_ranges()) { - auto partitionStreamIt = PartitionStreams.find(rangeProto.assign_id()); - if (partitionStreamIt != PartitionStreams.end()) { - auto partitionStream = partitionStreamIt->second; - partitionStream->UpdateMaxCommittedOffset(rangeProto.end_offset()); - EventsQueue->PushEvent({partitionStream, weak_from_this(), TReadSessionEvent::TCommitAcknowledgementEvent(partitionStream, rangeProto.end_offset())}, deferred); - } - } - + for (auto& [id, partitionStream] : partitionStreams) { + EventsQueue->PushEvent({partitionStream, weak_from_this(), TReadSessionEvent::TCommitAcknowledgementEvent(partitionStream, partitionStream->GetMaxCommittedOffset())}, deferred); + } + + for (const auto& rangeProto : msg.offset_ranges()) { + auto partitionStreamIt = PartitionStreams.find(rangeProto.assign_id()); + if (partitionStreamIt != PartitionStreams.end()) { + auto partitionStream = partitionStreamIt->second; + partitionStream->UpdateMaxCommittedOffset(rangeProto.end_offset()); + EventsQueue->PushEvent({partitionStream, weak_from_this(), TReadSessionEvent::TCommitAcknowledgementEvent(partitionStream, rangeProto.end_offset())}, deferred); + } + } + } void TSingleClusterReadSessionImpl::OnReadDoneImpl(Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::PartitionStatus&& msg, TDeferredActions& deferred) { // Assumes that we're under lock. - auto partitionStreamIt = PartitionStreams.find(msg.assign_id()); - if (partitionStreamIt == PartitionStreams.end()) { + auto partitionStreamIt = PartitionStreams.find(msg.assign_id()); + if (partitionStreamIt == PartitionStreams.end()) { return; } EventsQueue->PushEvent( @@ -1358,9 +1358,9 @@ void TSingleClusterReadSessionImpl::OnDataDecompressed(i64 sourceSize, i64 estim DecompressedDataSize += decompressedSize - estimatedDecompressedSize; constexpr double weight = 0.6; AverageCompressionRatio = weight * static_cast<double>(decompressedSize) / static_cast<double>(sourceSize) + (1 - weight) * AverageCompressionRatio; - if (Aborting) { - return; - } + if (Aborting) { + return; + } ContinueReadingDataImpl(); StartDecompressionTasksImpl(deferred); } @@ -1404,7 +1404,7 @@ void TSingleClusterReadSessionImpl::Close(std::function<void()> callback) { if (!Processor) { CallCloseCallbackImpl(); } else { - if (!HasCommitsInflightImpl()) { + if (!HasCommitsInflightImpl()) { Processor->Cancel(); CallCloseCallbackImpl(); } @@ -1548,9 +1548,9 @@ bool TSingleClusterReadSessionImpl::TPartitionCookieMapping::HasUnacknowledgedCo return CommitInflight != 0; } -TReadSessionEvent::TCreatePartitionStreamEvent::TCreatePartitionStreamEvent(TPartitionStream::TPtr partitionStream, ui64 committedOffset, ui64 endOffset) +TReadSessionEvent::TCreatePartitionStreamEvent::TCreatePartitionStreamEvent(TPartitionStream::TPtr partitionStream, ui64 committedOffset, ui64 endOffset) : PartitionStream(std::move(partitionStream)) - , CommittedOffset(committedOffset) + , CommittedOffset(committedOffset) , EndOffset(endOffset) { } @@ -1598,7 +1598,7 @@ TReadSessionEvent::TDataReceivedEvent::TDataReceivedEvent(TVector<TMessage> mess void TReadSessionEvent::TDataReceivedEvent::Commit() { for (auto [from, to] : OffsetRanges) { - static_cast<TPartitionStreamImpl*>(PartitionStream.Get())->Commit(from, to); + static_cast<TPartitionStreamImpl*>(PartitionStream.Get())->Commit(from, to); } } @@ -1638,7 +1638,7 @@ TString TReadSessionEvent::TCommitAcknowledgementEvent::DebugString() const { TString TReadSessionEvent::TCreatePartitionStreamEvent::DebugString() const { return TStringBuilder() << "CreatePartitionStream { PartitionStreamId: " << GetPartitionStream()->GetPartitionStreamId() << " PartitionId: " << GetPartitionStream()->GetPartitionId() - << " CommittedOffset: " << GetCommittedOffset() + << " CommittedOffset: " << GetCommittedOffset() << " EndOffset: " << GetEndOffset() << " }"; } @@ -1778,9 +1778,9 @@ void TReadSessionEventsQueue::PushEvent(TReadSessionEventInfo eventInfo, TDeferr } with_lock (Mutex) { - auto partitionStream = eventInfo.PartitionStream; + auto partitionStream = eventInfo.PartitionStream; eventInfo.MoveToPartitionStream(); - SignalReadyEventsImpl(partitionStream.Get(), deferred); + SignalReadyEventsImpl(partitionStream.Get(), deferred); } } @@ -1807,22 +1807,22 @@ TMaybe<TReadSessionEventsQueue::TEventInfo> TReadSessionEventsQueue::GetDataEven TVector<TReadSessionEvent::TDataReceivedEvent::TMessage> messages; TVector<TReadSessionEvent::TDataReceivedEvent::TCompressedMessage> compressedMessages; TIntrusivePtr<TPartitionStreamImpl> partitionStream = srcDataEventInfo.PartitionStream; - bool messageExtracted = false; + bool messageExtracted = false; while (srcDataEventInfo.HasReadyUnreadData() && *maxByteSize > 0) { const bool hasMoreUnpackedData = srcDataEventInfo.TakeData(&messages, &compressedMessages, maxByteSize); if (!hasMoreUnpackedData) { const bool messageIsFullyRead = !srcDataEventInfo.HasMoreData(); if (messageIsFullyRead) { partitionStream->PopEvent(); - messageExtracted = true; + messageExtracted = true; break; } } } - if (!messageExtracted) { - partitionStream->TopEvent().Signalled = false; - } - + if (!messageExtracted) { + partitionStream->TopEvent().Signalled = false; + } + if (messages.empty() && compressedMessages.empty()) { return Nothing(); } @@ -2026,7 +2026,7 @@ i64 TDataDecompressionInfo::StartDecompressionTasks(const IExecutor::TPtr& execu task = TDecompressionTask(this, partitionStream, &ReadyThresholds.back()); } } - if (task.AddedMessagesCount() > 0) { + if (task.AddedMessagesCount() > 0) { session->OnCreateNewDecompressionTask(); deferred.DeferStartExecutorTask(executor, std::move(task)); } else { @@ -2130,11 +2130,11 @@ TDataDecompressionInfo::TDecompressionTask::TDecompressionTask(TDataDecompressio { } -// Forward delcaration -namespace NCompressionDetails { - extern TString Decompress(const Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::MessageData& data); -} - +// Forward delcaration +namespace NCompressionDetails { + extern TString Decompress(const Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::MessageData& data); +} + void TDataDecompressionInfo::TDecompressionTask::operator()() { ui64 minOffset = Max<ui64>(); ui64 maxOffset = 0; @@ -2175,18 +2175,18 @@ void TDataDecompressionInfo::TDecompressionTask::operator()() { } } if (auto session = Parent->Session.lock()) { - session->GetLog() << TLOG_DEBUG << "Decompression task done. Partition: " << partition << " (" << minOffset << "-" << maxOffset << ")"; + session->GetLog() << TLOG_DEBUG << "Decompression task done. Partition: " << partition << " (" << minOffset << "-" << maxOffset << ")"; } Y_ASSERT(dataProcessed == SourceDataSize); std::shared_ptr<TSingleClusterReadSessionImpl> session = Parent->Session.lock(); - - if (session) { - session->OnDataDecompressed(SourceDataSize, EstimatedDecompressedSize, DecompressedSize, messagesProcessed); - } - - Parent->SourceDataNotProcessed -= dataProcessed; + + if (session) { + session->OnDataDecompressed(SourceDataSize, EstimatedDecompressedSize, DecompressedSize, messagesProcessed); + } + + Parent->SourceDataNotProcessed -= dataProcessed; Ready->Ready = true; - + if (session) { session->GetEventsQueue()->SignalReadyEvents(PartitionStream.Get()); } @@ -2194,7 +2194,7 @@ void TDataDecompressionInfo::TDecompressionTask::operator()() { void TRawPartitionStreamEvent::Signal(TPartitionStreamImpl* partitionStream, TReadSessionEventsQueue* queue, TDeferredActions& deferred) { if (!Signalled) { - Signalled = true; + Signalled = true; queue->SignalEventImpl(partitionStream, deferred); } } @@ -2240,8 +2240,8 @@ void TDeferredActions::DeferReconnection(std::shared_ptr<TSingleClusterReadSessi ReconnectionStatus = std::move(status); } -void TDeferredActions::DeferSignalWaiter(TWaiter&& waiter) { - Waiters.emplace_back(std::move(waiter)); +void TDeferredActions::DeferSignalWaiter(TWaiter&& waiter) { + Waiters.emplace_back(std::move(waiter)); } void TDeferredActions::DoActions() { @@ -2249,7 +2249,7 @@ void TDeferredActions::DoActions() { StartExecutorTasks(); AbortSession(); Reconnect(); - SignalWaiters(); + SignalWaiters(); } void TDeferredActions::Read() { @@ -2282,9 +2282,9 @@ void TDeferredActions::Reconnect() { } } -void TDeferredActions::SignalWaiters() { - for (auto& w : Waiters) { - w.Signal(); +void TDeferredActions::SignalWaiters() { + for (auto& w : Waiters) { + w.Signal(); } } @@ -2312,7 +2312,7 @@ public: // It will be removed from set by specifying proper right border. auto firstMessageOffsets = GetMessageOffsetRange(event, 0); auto lastMessageOffsets = GetMessageOffsetRange(event, event.GetMessagesCount() - 1); - + offsetSet.InsertInterval(firstMessageOffsets.first, lastMessageOffsets.second); if (CommitAfterProcessing) { @@ -2426,10 +2426,10 @@ TReadSessionSettings::TEventHandlers& TReadSessionSettings::TEventHandlers::Simp class TDeferredCommit::TImpl { public: - + void Add(const TPartitionStream::TPtr& partitionStream, ui64 startOffset, ui64 endOffset); void Add(const TPartitionStream::TPtr& partitionStream, ui64 offset); - + void Add(const TReadSessionEvent::TDataReceivedEvent::TMessage& message); void Add(const TReadSessionEvent::TDataReceivedEvent& dataReceivedEvent); @@ -2466,8 +2466,8 @@ void TDeferredCommit::Add(const TPartitionStream::TPtr& partitionStream, ui64 of GET_IMPL()->Add(partitionStream, offset); } -void TDeferredCommit::Add(const TReadSessionEvent::TDataReceivedEvent::TMessage& message) { - GET_IMPL()->Add(message); +void TDeferredCommit::Add(const TReadSessionEvent::TDataReceivedEvent::TMessage& message) { + GET_IMPL()->Add(message); } void TDeferredCommit::Add(const TReadSessionEvent::TDataReceivedEvent& dataReceivedEvent) { @@ -2482,9 +2482,9 @@ void TDeferredCommit::Commit() { } } -void TDeferredCommit::TImpl::Add(const TReadSessionEvent::TDataReceivedEvent::TMessage& message) { - Y_ASSERT(message.GetPartitionStream()); - Add(message.GetPartitionStream(), message.GetOffset()); +void TDeferredCommit::TImpl::Add(const TReadSessionEvent::TDataReceivedEvent::TMessage& message) { + Y_ASSERT(message.GetPartitionStream()); + Add(message.GetPartitionStream(), message.GetOffset()); } void TDeferredCommit::TImpl::Add(const TPartitionStream::TPtr& partitionStream, TDisjointIntervalTree<ui64>& offsetSet, ui64 startOffset, ui64 endOffset) { @@ -2534,7 +2534,7 @@ void TDeferredCommit::TImpl::Add(const TReadSessionEvent::TDataReceivedEvent& da void TDeferredCommit::TImpl::Commit() { for (auto&& [partitionStream, offsetRanges] : Offsets) { for (auto&& [startOffset, endOffset] : offsetRanges) { - static_cast<TPartitionStreamImpl*>(partitionStream.Get())->Commit(startOffset, endOffset); + static_cast<TPartitionStreamImpl*>(partitionStream.Get())->Commit(startOffset, endOffset); } } Offsets.clear(); diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.h index 9642b67d3c..b5b874954f 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.h @@ -7,8 +7,8 @@ #include <ydb/public/api/grpc/draft/ydb_persqueue_v1.grpc.pb.h> #include <ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h> -#include <library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h> - +#include <library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h> + #include <util/digest/numeric.h> #include <util/generic/hash.h> #include <util/system/condvar.h> @@ -62,7 +62,7 @@ public: void DeferAbortSession(const IErrorHandler::TPtr& errorHandler, EStatus statusCode, const TString& message); void DeferAbortSession(const IErrorHandler::TPtr& errorHandler, TPlainStatus&& status); void DeferReconnection(std::shared_ptr<TSingleClusterReadSessionImpl> session, const IErrorHandler::TPtr& errorHandler, TPlainStatus&& status); - void DeferSignalWaiter(TWaiter&& waiter); + void DeferSignalWaiter(TWaiter&& waiter); private: void DoActions(); @@ -71,7 +71,7 @@ private: void StartExecutorTasks(); void AbortSession(); void Reconnect(); - void SignalWaiters(); + void SignalWaiters(); private: // Read. @@ -86,8 +86,8 @@ private: IErrorHandler::TPtr ErrorHandler; TMaybe<TSessionClosedEvent> SessionClosedEvent; - // Waiters. - std::vector<TWaiter> Waiters; + // Waiters. + std::vector<TWaiter> Waiters; // Reconnection. std::shared_ptr<TSingleClusterReadSessionImpl> Session; @@ -188,9 +188,9 @@ private: size_t AddedDataSize() const { return SourceDataSize; } - size_t AddedMessagesCount() const { - return Messages.size(); - } + size_t AddedMessagesCount() const { + return Messages.size(); + } private: TDataDecompressionInfo* Parent; @@ -341,8 +341,8 @@ struct TRawPartitionStreamEvent { void Signal(TPartitionStreamImpl* partitionStream, TReadSessionEventsQueue* queue, TDeferredActions& deferred); }; - - + + class TPartitionStreamImpl : public TPartitionStream { public: struct TKey { // Hash<TKey> is defined later in this file. @@ -364,12 +364,12 @@ public: ui64 partitionGroupId, ui64 partitionId, ui64 assignId, - ui64 readOffset, + ui64 readOffset, std::weak_ptr<TSingleClusterReadSessionImpl> parentSession, IErrorHandler::TPtr errorHandler) : Key{topicPath, cluster, partitionId} , AssignId(assignId) - , FirstNotReadOffset(readOffset) + , FirstNotReadOffset(readOffset) , Session(std::move(parentSession)) , ErrorHandler(std::move(errorHandler)) { @@ -378,20 +378,20 @@ public: Cluster = std::move(cluster); PartitionGroupId = partitionGroupId; PartitionId = partitionId; - MaxCommittedOffset = readOffset; + MaxCommittedOffset = readOffset; } ~TPartitionStreamImpl(); - ui64 GetFirstNotReadOffset() const { - return FirstNotReadOffset; - } - - void SetFirstNotReadOffset(const ui64 offset) { - FirstNotReadOffset = offset; - } - - void Commit(ui64 startOffset, ui64 endOffset) /*override*/; + ui64 GetFirstNotReadOffset() const { + return FirstNotReadOffset; + } + + void SetFirstNotReadOffset(const ui64 offset) { + FirstNotReadOffset = offset; + } + + void Commit(ui64 startOffset, ui64 endOffset) /*override*/; void RequestStatus() override; void ConfirmCreate(TMaybe<ui64> readOffset, TMaybe<ui64> commitOffset); @@ -472,264 +472,264 @@ public: void UpdateMaxCommittedOffset(ui64 offset) { if (offset > MaxCommittedOffset) { - ClientCommits.EraseInterval(MaxCommittedOffset, offset); + ClientCommits.EraseInterval(MaxCommittedOffset, offset); MaxCommittedOffset = offset; } } - bool HasCommitsInflight() const { - if (ClientCommits.Empty()) - return false; - auto range = *ClientCommits.begin(); - if (range.first > MaxCommittedOffset) - return false; - // Here we got first range that can be committed by server. - // If offset to commit is from same position - then nothing is inflight. - if (!Commits.Empty() && Commits.begin()->first == range.first) - return false; - return true; - } - - bool AddToCommitRanges(const ui64 startOffset, const ui64 endOffset, bool rangesMode) { - if (ClientCommits.Intersects(startOffset, endOffset) || startOffset < MaxCommittedOffset) { + bool HasCommitsInflight() const { + if (ClientCommits.Empty()) + return false; + auto range = *ClientCommits.begin(); + if (range.first > MaxCommittedOffset) + return false; + // Here we got first range that can be committed by server. + // If offset to commit is from same position - then nothing is inflight. + if (!Commits.Empty() && Commits.begin()->first == range.first) + return false; + return true; + } + + bool AddToCommitRanges(const ui64 startOffset, const ui64 endOffset, bool rangesMode) { + if (ClientCommits.Intersects(startOffset, endOffset) || startOffset < MaxCommittedOffset) { ThrowFatalError(TStringBuilder() << "Invalid offset range [" << startOffset << ", " << endOffset << ") : range must start from " - << MaxCommittedOffset << " or has some offsets that are committed already. Partition stream id: " << PartitionStreamId << Endl); - return false; - } - if (rangesMode) { // Otherwise no need to send it to server. - Y_VERIFY(!Commits.Intersects(startOffset, endOffset)); - Commits.InsertInterval(startOffset, endOffset); - } - ClientCommits.InsertInterval(startOffset, endOffset); - return true; - } - - + << MaxCommittedOffset << " or has some offsets that are committed already. Partition stream id: " << PartitionStreamId << Endl); + return false; + } + if (rangesMode) { // Otherwise no need to send it to server. + Y_VERIFY(!Commits.Intersects(startOffset, endOffset)); + Commits.InsertInterval(startOffset, endOffset); + } + ClientCommits.InsertInterval(startOffset, endOffset); + return true; + } + + private: const TKey Key; ui64 AssignId; - ui64 FirstNotReadOffset; + ui64 FirstNotReadOffset; std::weak_ptr<TSingleClusterReadSessionImpl> Session; IErrorHandler::TPtr ErrorHandler; std::deque<TRawPartitionStreamEvent> EventsQueue; size_t DataDecompressionEventsCount = 0; ui64 MaxReadOffset = 0; ui64 MaxCommittedOffset = 0; - - TDisjointIntervalTree<ui64> Commits; - TDisjointIntervalTree<ui64> ClientCommits; + + TDisjointIntervalTree<ui64> Commits; + TDisjointIntervalTree<ui64> ClientCommits; +}; + + +class TReadSessionEventsQueue : public TBaseSessionEventsQueue<TReadSessionSettings, TReadSessionEvent::TEvent, TReadSessionEventInfo> { + using TParent = TBaseSessionEventsQueue<TReadSessionSettings, TReadSessionEvent::TEvent, TReadSessionEventInfo>; + +public: + explicit TReadSessionEventsQueue(const TSettings& settings, std::weak_ptr<IUserRetrievedEventCallback> session); + + TMaybe<TEventInfo> GetDataEventImpl(TEventInfo& srcDataEventInfo, size_t* maxByteSize); // Assumes that we're under lock. + + TMaybe<TEventInfo> TryGetEventImpl(size_t* maxByteSize) { // Assumes that we're under lock. + Y_ASSERT(HasEventsImpl()); + TVector<TReadSessionEvent::TDataReceivedEvent::TMessage> messages; + if (!Events.empty()) { + TEventInfo event = std::move(Events.front()); + Events.pop(); + RenewWaiterImpl(); + auto partitionStream = event.PartitionStream; + + if (!partitionStream->HasEvents()) { + Y_FAIL("can't be here - got events in global queue, but nothing in partition queue"); + return Nothing(); + } + + if (partitionStream->TopEvent().IsDataEvent()) { + return GetDataEventImpl(event, maxByteSize); + } + + event = TReadSessionEventInfo(partitionStream.Get(), event.Session, partitionStream->TopEvent().GetEvent()); + partitionStream->PopEvent(); + return event; + } + + Y_ASSERT(CloseEvent); + return TEventInfo(*CloseEvent, Session); + } + + TMaybe<TEventInfo> GetEventImpl(size_t* maxByteSize) { // Assumes that we're under lock and that the event queue has events. + do { + TMaybe<TEventInfo> result = TryGetEventImpl(maxByteSize); // We could have read all the data in current message previous time. + if (result) { + return result; + } + } while (HasEventsImpl()); + return Nothing(); + } + + TVector<TEvent> GetEvents(bool block = false, TMaybe<size_t> maxEventsCount = Nothing(), size_t maxByteSize = std::numeric_limits<size_t>::max()) { + TVector<TEventInfo> eventInfos; + const size_t maxCount = maxEventsCount ? *maxEventsCount : std::numeric_limits<size_t>::max(); + TDeferredActions deferred; + std::vector<TIntrusivePtr<TPartitionStreamImpl>> partitionStreamsForSignalling; + with_lock (Mutex) { + eventInfos.reserve(Min(Events.size() + CloseEvent.Defined(), maxCount)); + do { + if (block) { + WaitEventsImpl(); + } + + ApplyCallbacksToReadyEventsImpl(deferred); + + while (HasEventsImpl() && eventInfos.size() < maxCount && maxByteSize > 0) { + TMaybe<TEventInfo> event = GetEventImpl(&maxByteSize); + if (event) { + const TIntrusivePtr<TPartitionStreamImpl> partitionStreamForSignalling = event->IsDataEvent() ? event->PartitionStream : nullptr; + eventInfos.emplace_back(std::move(*event)); + if (eventInfos.back().IsSessionClosedEvent()) { + break; + } + if (partitionStreamForSignalling) { + partitionStreamsForSignalling.emplace_back(std::move(partitionStreamForSignalling)); + } + } + } + } while (block && (eventInfos.empty() || eventInfos.back().IsSessionClosedEvent())); + ApplyCallbacksToReadyEventsImpl(deferred); + for (const auto& partitionStreamForSignalling : partitionStreamsForSignalling) { + SignalReadyEventsImpl(partitionStreamForSignalling.Get(), deferred); + } + } + + TVector<TEvent> result; + result.reserve(eventInfos.size()); + for (TEventInfo& eventInfo : eventInfos) { + eventInfo.OnUserRetrievedEvent(); + result.emplace_back(std::move(eventInfo.GetEvent())); + } + return result; + } + + TMaybe<TEvent> GetEvent(bool block = false, size_t maxByteSize = std::numeric_limits<size_t>::max()) { + TMaybe<TEventInfo> eventInfo; + TDeferredActions deferred; + with_lock (Mutex) { + TIntrusivePtr<TPartitionStreamImpl> partitionStreamForSignalling; + do { + if (block) { + WaitEventsImpl(); + } + + const bool appliedCallbacks = ApplyCallbacksToReadyEventsImpl(deferred); + + if (HasEventsImpl()) { + eventInfo = GetEventImpl(&maxByteSize); + if (eventInfo && eventInfo->IsDataEvent()) { + partitionStreamForSignalling = eventInfo->PartitionStream; + } + } else if (!appliedCallbacks) { + return Nothing(); + } + } while (block && !eventInfo); + ApplyCallbacksToReadyEventsImpl(deferred); + if (partitionStreamForSignalling) { + SignalReadyEventsImpl(partitionStreamForSignalling.Get(), deferred); + } + } + if (eventInfo) { + eventInfo->OnUserRetrievedEvent(); + return std::move(eventInfo->Event); + } else { + return Nothing(); + } + } + + void Close(const TSessionClosedEvent& event, TDeferredActions& deferred) { + TWaiter waiter; + with_lock (Mutex) { + CloseEvent = event; + Closed = true; + waiter = TWaiter(Waiter.ExtractPromise(), this); + } + + TEventInfo info(event); + ApplyHandler(info, deferred); + + waiter.Signal(); + } + + bool HasCallbackForNextEventImpl() const; + bool ApplyCallbacksToReadyEventsImpl(TDeferredActions& deferred); + + // Push usual event. + void PushEvent(TReadSessionEventInfo eventInfo, TDeferredActions& deferred); + + // Push data event. + TDataDecompressionInfo* PushDataEvent(TIntrusivePtr<TPartitionStreamImpl> partitionStream, Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::PartitionData&& msg); + + void SignalEventImpl(TIntrusivePtr<TPartitionStreamImpl> partitionStream, TDeferredActions& deferred); // Assumes that we're under lock. + + void SignalReadyEvents(TPartitionStreamImpl* partitionStream); + + void SignalReadyEventsImpl(TPartitionStreamImpl* partitionStream, TDeferredActions& deferred); // Assumes that we're under lock. + + void SignalWaiterImpl(TDeferredActions& deferred) { + TWaiter waiter = PopWaiterImpl(); + deferred.DeferSignalWaiter(std::move(waiter)); // No effect if waiter is empty. + } + + void ClearAllEvents(); + +private: + struct THandlersVisitor : public TParent::TBaseHandlersVisitor { + THandlersVisitor(const TSettings& settings, TEventInfo& eventInfo, TDeferredActions& deferred) + : TBaseHandlersVisitor(settings, eventInfo) + , Deferred(deferred) + {} + +#define DECLARE_HANDLER(type, handler, answer) \ + bool operator()(type&) { \ + if (PushHandler<type>( \ + std::move(EventInfo), \ + Settings.EventHandlers_.handler, \ + Settings.EventHandlers_.CommonHandler_)) { \ + return answer; \ + } \ + return false; \ + } \ + /**/ + + DECLARE_HANDLER(TReadSessionEvent::TDataReceivedEvent, DataReceivedHandler_, true); + DECLARE_HANDLER(TReadSessionEvent::TCommitAcknowledgementEvent, CommitAcknowledgementHandler_, true); + DECLARE_HANDLER(TReadSessionEvent::TCreatePartitionStreamEvent, CreatePartitionStreamHandler_, true); + DECLARE_HANDLER(TReadSessionEvent::TDestroyPartitionStreamEvent, DestroyPartitionStreamHandler_, true); + DECLARE_HANDLER(TReadSessionEvent::TPartitionStreamStatusEvent, PartitionStreamStatusHandler_, true); + DECLARE_HANDLER(TReadSessionEvent::TPartitionStreamClosedEvent, PartitionStreamClosedHandler_, true); + DECLARE_HANDLER(TSessionClosedEvent, SessionClosedHandler_, false); // Not applied +#undef DECLARE_HANDLER + + bool Visit() { + return std::visit(*this, EventInfo.GetEvent()); + } + + void Post(const IExecutor::TPtr& executor, IExecutor::TFunction&& f) { + Deferred.DeferStartExecutorTask(executor, std::move(f)); + } + + TDeferredActions& Deferred; + }; + + bool ApplyHandler(TEventInfo& eventInfo, TDeferredActions& deferred) { + THandlersVisitor visitor(Settings, eventInfo, deferred); + return visitor.Visit(); + } + +private: + bool HasEventCallbacks; + std::weak_ptr<IUserRetrievedEventCallback> Session; }; - -class TReadSessionEventsQueue : public TBaseSessionEventsQueue<TReadSessionSettings, TReadSessionEvent::TEvent, TReadSessionEventInfo> { - using TParent = TBaseSessionEventsQueue<TReadSessionSettings, TReadSessionEvent::TEvent, TReadSessionEventInfo>; - -public: - explicit TReadSessionEventsQueue(const TSettings& settings, std::weak_ptr<IUserRetrievedEventCallback> session); - - TMaybe<TEventInfo> GetDataEventImpl(TEventInfo& srcDataEventInfo, size_t* maxByteSize); // Assumes that we're under lock. - - TMaybe<TEventInfo> TryGetEventImpl(size_t* maxByteSize) { // Assumes that we're under lock. - Y_ASSERT(HasEventsImpl()); - TVector<TReadSessionEvent::TDataReceivedEvent::TMessage> messages; - if (!Events.empty()) { - TEventInfo event = std::move(Events.front()); - Events.pop(); - RenewWaiterImpl(); - auto partitionStream = event.PartitionStream; - - if (!partitionStream->HasEvents()) { - Y_FAIL("can't be here - got events in global queue, but nothing in partition queue"); - return Nothing(); - } - - if (partitionStream->TopEvent().IsDataEvent()) { - return GetDataEventImpl(event, maxByteSize); - } - - event = TReadSessionEventInfo(partitionStream.Get(), event.Session, partitionStream->TopEvent().GetEvent()); - partitionStream->PopEvent(); - return event; - } - - Y_ASSERT(CloseEvent); - return TEventInfo(*CloseEvent, Session); - } - - TMaybe<TEventInfo> GetEventImpl(size_t* maxByteSize) { // Assumes that we're under lock and that the event queue has events. - do { - TMaybe<TEventInfo> result = TryGetEventImpl(maxByteSize); // We could have read all the data in current message previous time. - if (result) { - return result; - } - } while (HasEventsImpl()); - return Nothing(); - } - - TVector<TEvent> GetEvents(bool block = false, TMaybe<size_t> maxEventsCount = Nothing(), size_t maxByteSize = std::numeric_limits<size_t>::max()) { - TVector<TEventInfo> eventInfos; - const size_t maxCount = maxEventsCount ? *maxEventsCount : std::numeric_limits<size_t>::max(); - TDeferredActions deferred; - std::vector<TIntrusivePtr<TPartitionStreamImpl>> partitionStreamsForSignalling; - with_lock (Mutex) { - eventInfos.reserve(Min(Events.size() + CloseEvent.Defined(), maxCount)); - do { - if (block) { - WaitEventsImpl(); - } - - ApplyCallbacksToReadyEventsImpl(deferred); - - while (HasEventsImpl() && eventInfos.size() < maxCount && maxByteSize > 0) { - TMaybe<TEventInfo> event = GetEventImpl(&maxByteSize); - if (event) { - const TIntrusivePtr<TPartitionStreamImpl> partitionStreamForSignalling = event->IsDataEvent() ? event->PartitionStream : nullptr; - eventInfos.emplace_back(std::move(*event)); - if (eventInfos.back().IsSessionClosedEvent()) { - break; - } - if (partitionStreamForSignalling) { - partitionStreamsForSignalling.emplace_back(std::move(partitionStreamForSignalling)); - } - } - } - } while (block && (eventInfos.empty() || eventInfos.back().IsSessionClosedEvent())); - ApplyCallbacksToReadyEventsImpl(deferred); - for (const auto& partitionStreamForSignalling : partitionStreamsForSignalling) { - SignalReadyEventsImpl(partitionStreamForSignalling.Get(), deferred); - } - } - - TVector<TEvent> result; - result.reserve(eventInfos.size()); - for (TEventInfo& eventInfo : eventInfos) { - eventInfo.OnUserRetrievedEvent(); - result.emplace_back(std::move(eventInfo.GetEvent())); - } - return result; - } - - TMaybe<TEvent> GetEvent(bool block = false, size_t maxByteSize = std::numeric_limits<size_t>::max()) { - TMaybe<TEventInfo> eventInfo; - TDeferredActions deferred; - with_lock (Mutex) { - TIntrusivePtr<TPartitionStreamImpl> partitionStreamForSignalling; - do { - if (block) { - WaitEventsImpl(); - } - - const bool appliedCallbacks = ApplyCallbacksToReadyEventsImpl(deferred); - - if (HasEventsImpl()) { - eventInfo = GetEventImpl(&maxByteSize); - if (eventInfo && eventInfo->IsDataEvent()) { - partitionStreamForSignalling = eventInfo->PartitionStream; - } - } else if (!appliedCallbacks) { - return Nothing(); - } - } while (block && !eventInfo); - ApplyCallbacksToReadyEventsImpl(deferred); - if (partitionStreamForSignalling) { - SignalReadyEventsImpl(partitionStreamForSignalling.Get(), deferred); - } - } - if (eventInfo) { - eventInfo->OnUserRetrievedEvent(); - return std::move(eventInfo->Event); - } else { - return Nothing(); - } - } - - void Close(const TSessionClosedEvent& event, TDeferredActions& deferred) { - TWaiter waiter; - with_lock (Mutex) { - CloseEvent = event; - Closed = true; - waiter = TWaiter(Waiter.ExtractPromise(), this); - } - - TEventInfo info(event); - ApplyHandler(info, deferred); - - waiter.Signal(); - } - - bool HasCallbackForNextEventImpl() const; - bool ApplyCallbacksToReadyEventsImpl(TDeferredActions& deferred); - - // Push usual event. - void PushEvent(TReadSessionEventInfo eventInfo, TDeferredActions& deferred); - - // Push data event. - TDataDecompressionInfo* PushDataEvent(TIntrusivePtr<TPartitionStreamImpl> partitionStream, Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::PartitionData&& msg); - - void SignalEventImpl(TIntrusivePtr<TPartitionStreamImpl> partitionStream, TDeferredActions& deferred); // Assumes that we're under lock. - - void SignalReadyEvents(TPartitionStreamImpl* partitionStream); - - void SignalReadyEventsImpl(TPartitionStreamImpl* partitionStream, TDeferredActions& deferred); // Assumes that we're under lock. - - void SignalWaiterImpl(TDeferredActions& deferred) { - TWaiter waiter = PopWaiterImpl(); - deferred.DeferSignalWaiter(std::move(waiter)); // No effect if waiter is empty. - } - - void ClearAllEvents(); - -private: - struct THandlersVisitor : public TParent::TBaseHandlersVisitor { - THandlersVisitor(const TSettings& settings, TEventInfo& eventInfo, TDeferredActions& deferred) - : TBaseHandlersVisitor(settings, eventInfo) - , Deferred(deferred) - {} - -#define DECLARE_HANDLER(type, handler, answer) \ - bool operator()(type&) { \ - if (PushHandler<type>( \ - std::move(EventInfo), \ - Settings.EventHandlers_.handler, \ - Settings.EventHandlers_.CommonHandler_)) { \ - return answer; \ - } \ - return false; \ - } \ - /**/ - - DECLARE_HANDLER(TReadSessionEvent::TDataReceivedEvent, DataReceivedHandler_, true); - DECLARE_HANDLER(TReadSessionEvent::TCommitAcknowledgementEvent, CommitAcknowledgementHandler_, true); - DECLARE_HANDLER(TReadSessionEvent::TCreatePartitionStreamEvent, CreatePartitionStreamHandler_, true); - DECLARE_HANDLER(TReadSessionEvent::TDestroyPartitionStreamEvent, DestroyPartitionStreamHandler_, true); - DECLARE_HANDLER(TReadSessionEvent::TPartitionStreamStatusEvent, PartitionStreamStatusHandler_, true); - DECLARE_HANDLER(TReadSessionEvent::TPartitionStreamClosedEvent, PartitionStreamClosedHandler_, true); - DECLARE_HANDLER(TSessionClosedEvent, SessionClosedHandler_, false); // Not applied -#undef DECLARE_HANDLER - - bool Visit() { - return std::visit(*this, EventInfo.GetEvent()); - } - - void Post(const IExecutor::TPtr& executor, IExecutor::TFunction&& f) { - Deferred.DeferStartExecutorTask(executor, std::move(f)); - } - - TDeferredActions& Deferred; - }; - - bool ApplyHandler(TEventInfo& eventInfo, TDeferredActions& deferred) { - THandlersVisitor visitor(Settings, eventInfo, deferred); - return visitor.Visit(); - } - -private: - bool HasEventCallbacks; - std::weak_ptr<IUserRetrievedEventCallback> Session; -}; - - - + + } // namespace NYdb::NPersQueue template <> @@ -755,8 +755,8 @@ public: using TPtr = std::shared_ptr<TSingleClusterReadSessionImpl>; using IProcessor = IReadSessionConnectionProcessorFactory::IProcessor; - friend class TPartitionStreamImpl; - + friend class TPartitionStreamImpl; + TSingleClusterReadSessionImpl( const TReadSessionSettings& settings, const TString& clusterName, @@ -823,8 +823,8 @@ private: BreakConnectionAndReconnectImpl(TPlainStatus(statusCode, message), deferred); } - bool HasCommitsInflightImpl() const; - + bool HasCommitsInflightImpl() const; + void OnConnectTimeout(const NGrpc::IQueueClientContextPtr& connectTimeoutContext); void OnConnect(TPlainStatus&&, typename IProcessor::TPtr&&, const NGrpc::IQueueClientContextPtr& connectContext); void DestroyAllPartitionStreamsImpl(TDeferredActions& deferred); // Destroy all streams before setting new connection // Assumes that we're under lock. @@ -981,7 +981,7 @@ private: bool WaitingReadResponse = false; std::shared_ptr<Ydb::PersQueue::V1::MigrationStreamingReadServerMessage> ServerMessage; // Server message to write server response to. - THashMap<ui64, TIntrusivePtr<TPartitionStreamImpl>> PartitionStreams; // assignId -> Partition stream. + THashMap<ui64, TIntrusivePtr<TPartitionStreamImpl>> PartitionStreams; // assignId -> Partition stream. TPartitionCookieMapping CookieMapping; std::deque<TDecompressionQueueItem> DecompressionQueue; bool DataReadingSuspended = false; @@ -1071,11 +1071,11 @@ private: Ydb::PersQueue::ClusterDiscovery::DiscoverClustersRequest MakeClusterDiscoveryRequest() const; void StartClusterDiscovery(); void OnClusterDiscovery(const TStatus& status, const Ydb::PersQueue::ClusterDiscovery::DiscoverClustersResult& result); - void ProceedWithoutClusterDiscovery(); + void ProceedWithoutClusterDiscovery(); void RestartClusterDiscoveryImpl(TDuration delay, TDeferredActions& deferred); - void CreateClusterSessionsImpl(); + void CreateClusterSessionsImpl(); + - // Shutdown. void Abort(EStatus statusCode, NYql::TIssues&& issues); void Abort(EStatus statusCode, const TString& message); diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/write_session.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/write_session.cpp index 94b30d1404..af4e08b979 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/write_session.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/write_session.cpp @@ -1,21 +1,21 @@ #include "write_session.h" #include <ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h> -#include <library/cpp/string_utils/url/url.h> - -#include <util/generic/store_policy.h> -#include <util/generic/utility.h> +#include <library/cpp/string_utils/url/url.h> + +#include <util/generic/store_policy.h> +#include <util/generic/utility.h> #include <util/stream/buffer.h> - + namespace NYdb::NPersQueue { using NMonitoring::TDynamicCounterPtr; using TCounterPtr = NMonitoring::TDynamicCounters::TCounterPtr; - -const TDuration UPDATE_TOKEN_PERIOD = TDuration::Hours(1); - -namespace NCompressionDetails { - THolder<IOutputStream> CreateCoder(ECodec codec, TBuffer& result, int quality); + +const TDuration UPDATE_TOKEN_PERIOD = TDuration::Hours(1); + +namespace NCompressionDetails { + THolder<IOutputStream> CreateCoder(ECodec codec, TBuffer& result, int quality); } #define HISTOGRAM_SETUP NMonitoring::ExplicitHistogram({0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}) @@ -45,9 +45,9 @@ TWriteSession::TWriteSession( , Client(std::move(client)) , Connections(std::move(connections)) , DbDriverState(std::move(dbDriverState)) - , PrevToken(DbDriverState->CredentialsProvider ? DbDriverState->CredentialsProvider->GetAuthInfo() : "") + , PrevToken(DbDriverState->CredentialsProvider ? DbDriverState->CredentialsProvider->GetAuthInfo() : "") , EventsQueue(std::make_shared<TWriteSessionEventsQueue>(Settings)) - , InitSeqNoPromise(NThreading::NewPromise<ui64>()) + , InitSeqNoPromise(NThreading::NewPromise<ui64>()) , WakeupInterval( Settings.BatchFlushInterval_.GetOrElse(TDuration::Zero()) ? std::min(Settings.BatchFlushInterval_.GetOrElse(TDuration::Seconds(1)) / 5, TDuration::MilliSeconds(100)) @@ -77,18 +77,18 @@ void TWriteSession::Start(const TDuration& delay) { InitWriter(); } Started = true; - - DoCdsRequest(delay); + + DoCdsRequest(delay); } // Only called under lock TWriteSession::THandleResult TWriteSession::RestartImpl(const TPlainStatus& status) { THandleResult result; if (AtomicGet(Aborting)) { - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session is aborting and will not restart"; + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session is aborting and will not restart"; return result; } - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Got error. Status: " << status.Status << ". Description: " << IssuesSingleLineString(status.Issues); + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Got error. Status: " << status.Status << ". Description: " << IssuesSingleLineString(status.Issues); SessionEstablished = false; TMaybe<TDuration> nextDelay = TDuration::Zero(); if (!RetryState) { @@ -99,44 +99,44 @@ TWriteSession::THandleResult TWriteSession::RestartImpl(const TPlainStatus& stat if (nextDelay) { result.StartDelay = *nextDelay; result.DoRestart = true; - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session will restart in " << result.StartDelay.MilliSeconds() << " ms"; + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session will restart in " << result.StartDelay.MilliSeconds() << " ms"; ResetForRetryImpl(); } else { - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session will not restart after a fatal error"; + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session will not restart after a fatal error"; result.DoStop = true; CheckHandleResultImpl(result); } return result; } -bool IsFederation(const TString& endpoint) { - TStringBuf host = GetHost(endpoint); - return host == "logbroker.yandex.net" || host == "logbroker-prestable.yandex.net"; -} - +bool IsFederation(const TString& endpoint) { + TStringBuf host = GetHost(endpoint); + return host == "logbroker.yandex.net" || host == "logbroker-prestable.yandex.net"; +} + void TWriteSession::DoCdsRequest(TDuration delay) { - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session: Do CDS request"; - auto weakThis = weak_from_this(); + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session: Do CDS request"; + auto weakThis = weak_from_this(); if ( Settings.ClusterDiscoveryMode_ == EClusterDiscoveryMode::Off || (Settings.ClusterDiscoveryMode_ == EClusterDiscoveryMode::Auto && !IsFederation(DbDriverState->DiscoveryEndpoint)) ) { - DoConnect(delay, DbDriverState->DiscoveryEndpoint); - return; - } - - auto extractor = [weakThis] + DoConnect(delay, DbDriverState->DiscoveryEndpoint); + return; + } + + auto extractor = [weakThis] (google::protobuf::Any* any, TPlainStatus status) mutable { Ydb::PersQueue::ClusterDiscovery::DiscoverClustersResult result; if (any) { any->UnpackTo(&result); } TStatus st(std::move(status)); - if (auto sharedThis = weakThis.lock()) { - sharedThis->OnCdsResponse(st, result); - } + if (auto sharedThis = weakThis.lock()) { + sharedThis->OnCdsResponse(st, result); + } }; Ydb::PersQueue::ClusterDiscovery::DiscoverClustersRequest req; @@ -148,12 +148,12 @@ void TWriteSession::DoCdsRequest(TDuration delay) { if (Settings.PreferredCluster_.Defined()) params->set_preferred_cluster_name(*Settings.PreferredCluster_); - auto weakConnections = std::weak_ptr<TGRpcConnectionsImpl>(Connections); - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Do schedule cds request after " << delay.MilliSeconds() << " ms\n"; - auto cdsRequestCall = [req_=std::move(req), extr=std::move(extractor), weakConnections, dbState=DbDriverState, settings=Settings]() mutable { - if (auto connections = weakConnections.lock()) { - dbState->Log << TLOG_INFO << "MessageGroupId [" << settings.MessageGroupId_ << "] Running cds request ms\n"; - connections->RunDeferred<Ydb::PersQueue::V1::ClusterDiscoveryService, + auto weakConnections = std::weak_ptr<TGRpcConnectionsImpl>(Connections); + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Do schedule cds request after " << delay.MilliSeconds() << " ms\n"; + auto cdsRequestCall = [req_=std::move(req), extr=std::move(extractor), weakConnections, dbState=DbDriverState, settings=Settings]() mutable { + if (auto connections = weakConnections.lock()) { + dbState->Log << TLOG_INFO << "MessageGroupId [" << settings.MessageGroupId_ << "] Running cds request ms\n"; + connections->RunDeferred<Ydb::PersQueue::V1::ClusterDiscoveryService, Ydb::PersQueue::ClusterDiscovery::DiscoverClustersRequest, Ydb::PersQueue::ClusterDiscovery::DiscoverClustersResponse>( std::move(req_), @@ -162,8 +162,8 @@ void TWriteSession::DoCdsRequest(TDuration delay) { dbState, INITIAL_DEFERRED_CALL_DELAY, TRpcRequestSettings::Make(settings), - settings.ConnectTimeout_); // TODO: make client timeout setting - } + settings.ConnectTimeout_); // TODO: make client timeout setting + } }; Connections->ScheduleOneTimeTask(std::move(cdsRequestCall), delay); } @@ -171,7 +171,7 @@ void TWriteSession::DoCdsRequest(TDuration delay) { void TWriteSession::OnCdsResponse( TStatus& status, const Ydb::PersQueue::ClusterDiscovery::DiscoverClustersResult& result ) { - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Got CDS response: \n" << result.ShortDebugString(); + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Got CDS response: \n" << result.ShortDebugString(); TString endpoint, name; THandleResult handleResult; if (!status.IsSuccess()) { @@ -189,17 +189,17 @@ void TWriteSession::OnCdsResponse( EStatus errorStatus = EStatus::INTERNAL_ERROR; with_lock (Lock) { const Ydb::PersQueue::ClusterDiscovery::WriteSessionClusters& wsClusters = result.write_sessions_clusters(0); - bool isFirst = true; - + bool isFirst = true; + for (const auto& clusterInfo : wsClusters.clusters()) { TString normalizedName = clusterInfo.name(); normalizedName.to_lower(); - if(isFirst) { - isFirst = false; - PreferredClusterByCDS = clusterInfo.name(); - } - + if(isFirst) { + isFirst = false; + PreferredClusterByCDS = clusterInfo.name(); + } + if (!clusterInfo.available()) { if (TargetCluster && TargetCluster == normalizedName) { errorStatus = EStatus::UNAVAILABLE; @@ -247,7 +247,7 @@ void TWriteSession::OnCdsResponse( } CurrentCluster = name; } - DoConnect(TDuration::Zero(), endpoint); + DoConnect(TDuration::Zero(), endpoint); } @@ -264,21 +264,21 @@ void TWriteSession::InitWriter() { // No Lock, very initial start - no race yet } // Client method NThreading::TFuture<ui64> TWriteSession::GetInitSeqNo() { - if (Settings.ValidateSeqNo_) { + if (Settings.ValidateSeqNo_) { if (AutoSeqNoMode.Defined() && *AutoSeqNoMode) { - DbDriverState->Log << TLOG_ERR << LogPrefix() << "Cannot call GetInitSeqNo in Auto SeqNo mode"; + DbDriverState->Log << TLOG_ERR << LogPrefix() << "Cannot call GetInitSeqNo in Auto SeqNo mode"; ThrowFatalError("Cannot call GetInitSeqNo in Auto SeqNo mode"); } - else - AutoSeqNoMode = false; - } + else + AutoSeqNoMode = false; + } return InitSeqNoPromise.GetFuture(); } -TString DebugString(const TWriteSessionEvent::TEvent& event) { - return std::visit([](const auto& ev) { return ev.DebugString(); }, event); -} - +TString DebugString(const TWriteSessionEvent::TEvent& event) { + return std::visit([](const auto& ev) { return ev.DebugString(); }, event); +} + // Client method TMaybe<TWriteSessionEvent::TEvent> TWriteSession::GetEvent(bool block) { return EventsQueue->GetEvent(block); @@ -301,16 +301,16 @@ ui64 TWriteSession::GetNextSeqNoImpl(const TMaybe<ui64>& seqNo) { } } if (seqNo.Defined()) { - if (*AutoSeqNoMode) { - DbDriverState->Log << TLOG_ERR << LogPrefix() << "Cannot call write() with defined SeqNo on WriteSession running in auto-seqNo mode"; + if (*AutoSeqNoMode) { + DbDriverState->Log << TLOG_ERR << LogPrefix() << "Cannot call write() with defined SeqNo on WriteSession running in auto-seqNo mode"; ThrowFatalError( "Cannot call write() with defined SeqNo on WriteSession running in auto-seqNo mode" ); } else { seqNoValue = *seqNo; } - } else if (!(*AutoSeqNoMode)) { - DbDriverState->Log << TLOG_ERR << LogPrefix() << "Cannot call write() without defined SeqNo on WriteSession running in manual-seqNo mode"; + } else if (!(*AutoSeqNoMode)) { + DbDriverState->Log << TLOG_ERR << LogPrefix() << "Cannot call write() without defined SeqNo on WriteSession running in manual-seqNo mode"; ThrowFatalError( "Cannot call write() without defined SeqNo on WriteSession running in manual-seqNo mode" ); @@ -335,14 +335,14 @@ NThreading::TFuture<void> TWriteSession::WaitEvent() { } // Client method. -void TWriteSession::WriteInternal( - TContinuationToken&&, TStringBuf data, TMaybe<ECodec> codec, ui32 originalSize, TMaybe<ui64> seqNo, TMaybe<TInstant> createTimestamp - ) { +void TWriteSession::WriteInternal( + TContinuationToken&&, TStringBuf data, TMaybe<ECodec> codec, ui32 originalSize, TMaybe<ui64> seqNo, TMaybe<TInstant> createTimestamp + ) { TInstant createdAtValue = createTimestamp.Defined() ? *createTimestamp : TInstant::Now(); bool readyToAccept = false; size_t bufferSize = data.size(); with_lock(Lock) { - CurrentBatch.Add(GetNextSeqNoImpl(seqNo), createdAtValue, data, codec, originalSize); + CurrentBatch.Add(GetNextSeqNoImpl(seqNo), createdAtValue, data, codec, originalSize); FlushWriteIfRequiredImpl(); readyToAccept = OnMemoryUsageChangedImpl(bufferSize).NowOk; @@ -352,20 +352,20 @@ void TWriteSession::WriteInternal( } } -// Client method. -void TWriteSession::WriteEncoded( - TContinuationToken&& token, TStringBuf data, ECodec codec, ui32 originalSize, TMaybe<ui64> seqNo, TMaybe<TInstant> createTimestamp - ) { - WriteInternal(std::move(token), data, codec, originalSize, seqNo, createTimestamp); -} - -void TWriteSession::Write( - TContinuationToken&& token, TStringBuf data, TMaybe<ui64> seqNo, TMaybe<TInstant> createTimestamp - ) { - WriteInternal(std::move(token), data, {}, 0, seqNo, createTimestamp); -} - - +// Client method. +void TWriteSession::WriteEncoded( + TContinuationToken&& token, TStringBuf data, ECodec codec, ui32 originalSize, TMaybe<ui64> seqNo, TMaybe<TInstant> createTimestamp + ) { + WriteInternal(std::move(token), data, codec, originalSize, seqNo, createTimestamp); +} + +void TWriteSession::Write( + TContinuationToken&& token, TStringBuf data, TMaybe<ui64> seqNo, TMaybe<TInstant> createTimestamp + ) { + WriteInternal(std::move(token), data, {}, 0, seqNo, createTimestamp); +} + + // Only called under lock. TWriteSession::THandleResult TWriteSession::OnErrorImpl(NYdb::TPlainStatus&& status) { (*Counters->Errors)++; @@ -377,14 +377,14 @@ TWriteSession::THandleResult TWriteSession::OnErrorImpl(NYdb::TPlainStatus&& sta } // No lock -void TWriteSession::DoConnect(const TDuration& delay, const TString& endpoint) { - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Start write session. Will connect to endpoint: " << endpoint; +void TWriteSession::DoConnect(const TDuration& delay, const TString& endpoint) { + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Start write session. Will connect to endpoint: " << endpoint; NGrpc::IQueueClientContextPtr prevConnectContext; NGrpc::IQueueClientContextPtr prevConnectTimeoutContext; NGrpc::IQueueClientContextPtr prevConnectDelayContext; NGrpc::IQueueClientContextPtr connectContext = nullptr; - NGrpc::IQueueClientContextPtr connectDelayContext = nullptr; + NGrpc::IQueueClientContextPtr connectDelayContext = nullptr; NGrpc::IQueueClientContextPtr connectTimeoutContext = nullptr; TRpcRequestSettings reqSettings; std::shared_ptr<IWriteSessionConnectionProcessorFactory> connectionFactory; @@ -398,15 +398,15 @@ void TWriteSession::DoConnect(const TDuration& delay, const TString& endpoint) { ClientContext = std::move(clientContext); ServerMessage = std::make_shared<TServerMessage>(); - if (!ClientContext) { - AbortImpl(); - // Grpc and WriteSession is closing right now. - return; - } - + if (!ClientContext) { + AbortImpl(); + // Grpc and WriteSession is closing right now. + return; + } + connectContext = ClientContext->CreateContext(); - if (delay) - connectDelayContext = ClientContext->CreateContext(); + if (delay) + connectDelayContext = ClientContext->CreateContext(); connectTimeoutContext = ClientContext->CreateContext(); // Previous operations contexts. @@ -414,14 +414,14 @@ void TWriteSession::DoConnect(const TDuration& delay, const TString& endpoint) { // Set new context prevConnectContext = std::exchange(ConnectContext, connectContext); prevConnectTimeoutContext = std::exchange(ConnectTimeoutContext, connectTimeoutContext); - prevConnectDelayContext = std::exchange(ConnectDelayContext, connectDelayContext); + prevConnectDelayContext = std::exchange(ConnectDelayContext, connectDelayContext); Y_ASSERT(ConnectContext); Y_ASSERT(ConnectTimeoutContext); // Cancel previous operations. Cancel(prevConnectContext); - if (prevConnectDelayContext) - Cancel(prevConnectDelayContext); + if (prevConnectDelayContext) + Cancel(prevConnectDelayContext); Cancel(prevConnectTimeoutContext); Y_ASSERT(connectContext); Y_ASSERT(connectTimeoutContext); @@ -449,15 +449,15 @@ void TWriteSession::DoConnect(const TDuration& delay, const TString& endpoint) { std::move(connectContext), TDuration::Seconds(30) /* connect timeout */, // TODO: make connect timeout setting. std::move(connectTimeoutContext), - std::move(connectTimeoutCallback), - delay, - std::move(connectDelayContext) + std::move(connectTimeoutCallback), + delay, + std::move(connectDelayContext) ); } // RPC callback. void TWriteSession::OnConnectTimeout(const NGrpc::IQueueClientContextPtr& connectTimeoutContext) { - DbDriverState->Log << TLOG_ERR << LogPrefix() << "Write session: connect timeout"; + DbDriverState->Log << TLOG_ERR << LogPrefix() << "Write session: connect timeout"; THandleResult handleResult; with_lock (Lock) { if (ConnectTimeoutContext == connectTimeoutContext) { @@ -531,12 +531,12 @@ void TWriteSession::InitImpl() { init->set_partition_group_id(*Settings.PartitionGroupId_); } init->set_max_supported_block_format_version(0); - init->set_preferred_cluster(PreferredClusterByCDS); + init->set_preferred_cluster(PreferredClusterByCDS); for (const auto& attr : Settings.Meta_.Fields) { (*init->mutable_session_meta())[attr.first] = attr.second; } - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: send init request: "<< req.ShortDebugString(); + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: send init request: "<< req.ShortDebugString(); WriteToProcessorImpl(std::move(req)); } @@ -635,33 +635,33 @@ void TWriteSession::OnReadDone(NGrpc::TGrpcStatus&& grpcStatus, size_t connectio ProcessHandleResult(processResult.HandleResult); } -TString TWriteSession::LogPrefix() const { - return TStringBuilder() << "MessageGroupId [" << Settings.MessageGroupId_ << "] SessionId [" << SessionId << "] "; -} - -TString TWriteSessionEvent::TAcksEvent::DebugString() const { - TStringBuilder res; - res << "AcksEvent:"; - for (auto& ack : Acks) { - res << " { seqNo : " << ack.SeqNo << ", State : " << ack.State; - if (ack.Details) { - res << ", offset : " << ack.Details->Offset << ", partitionId : " << ack.Details->PartitionId; - } - res << " }"; - } - if (!Acks.empty() && Acks.back().Stat) { - auto& stat = Acks.back().Stat; - res << " write stat: Write time " << stat->WriteTime << " total time in partition queue " << stat->TotalTimeInPartitionQueue - << " partition quoted time " << stat->PartitionQuotedTime << " topic quoted time " << stat->TopicQuotedTime; - } - return res; -} - -TString TWriteSessionEvent::TReadyToAcceptEvent::DebugString() const { - return "ReadyToAcceptEvent"; -} - - +TString TWriteSession::LogPrefix() const { + return TStringBuilder() << "MessageGroupId [" << Settings.MessageGroupId_ << "] SessionId [" << SessionId << "] "; +} + +TString TWriteSessionEvent::TAcksEvent::DebugString() const { + TStringBuilder res; + res << "AcksEvent:"; + for (auto& ack : Acks) { + res << " { seqNo : " << ack.SeqNo << ", State : " << ack.State; + if (ack.Details) { + res << ", offset : " << ack.Details->Offset << ", partitionId : " << ack.Details->PartitionId; + } + res << " }"; + } + if (!Acks.empty() && Acks.back().Stat) { + auto& stat = Acks.back().Stat; + res << " write stat: Write time " << stat->WriteTime << " total time in partition queue " << stat->TotalTimeInPartitionQueue + << " partition quoted time " << stat->PartitionQuotedTime << " topic quoted time " << stat->TopicQuotedTime; + } + return res; +} + +TString TWriteSessionEvent::TReadyToAcceptEvent::DebugString() const { + return "ReadyToAcceptEvent"; +} + + TWriteSession::TProcessSrvMessageResult TWriteSession::ProcessServerMessageImpl() { TProcessSrvMessageResult result; switch (ServerMessage->GetServerMessageCase()) { @@ -676,7 +676,7 @@ TWriteSession::TProcessSrvMessageResult TWriteSession::ProcessServerMessageImpl( } case TServerMessage::kInitResponse: { const auto& initResponse = ServerMessage->init_response(); - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session established. Init response: " << initResponse.ShortDebugString(); + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session established. Init response: " << initResponse.ShortDebugString(); SessionId = initResponse.session_id(); PartitionId = initResponse.partition_id(); ui64 newLastSeqNo = initResponse.last_sequence_number(); @@ -702,14 +702,14 @@ TWriteSession::TProcessSrvMessageResult TWriteSession::ProcessServerMessageImpl( case TServerMessage::kBatchWriteResponse: { TWriteSessionEvent::TAcksEvent acksEvent; const auto& batchWriteResponse = ServerMessage->batch_write_response(); - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session got write response: " << batchWriteResponse.ShortDebugString(); - TWriteStat::TPtr writeStat = new TWriteStat{}; - const auto& stat = batchWriteResponse.write_statistics(); - writeStat->WriteTime = TDuration::MilliSeconds(stat.persist_duration_ms()); - writeStat->TotalTimeInPartitionQueue = TDuration::MilliSeconds(stat.queued_in_partition_duration_ms()); - writeStat->PartitionQuotedTime = TDuration::MilliSeconds(stat.throttled_on_partition_duration_ms()); - writeStat->TopicQuotedTime = TDuration::MilliSeconds(stat.throttled_on_topic_duration_ms()); - + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session got write response: " << batchWriteResponse.ShortDebugString(); + TWriteStat::TPtr writeStat = new TWriteStat{}; + const auto& stat = batchWriteResponse.write_statistics(); + writeStat->WriteTime = TDuration::MilliSeconds(stat.persist_duration_ms()); + writeStat->TotalTimeInPartitionQueue = TDuration::MilliSeconds(stat.queued_in_partition_duration_ms()); + writeStat->PartitionQuotedTime = TDuration::MilliSeconds(stat.throttled_on_partition_duration_ms()); + writeStat->TopicQuotedTime = TDuration::MilliSeconds(stat.throttled_on_topic_duration_ms()); + for (size_t messageIndex = 0, endIndex = batchWriteResponse.sequence_numbers_size(); messageIndex != endIndex; ++messageIndex) { // TODO: Fill writer statistics ui64 sequenceNumber = batchWriteResponse.sequence_numbers(messageIndex); @@ -722,7 +722,7 @@ TWriteSession::TProcessSrvMessageResult TWriteSession::ProcessServerMessageImpl( static_cast<ui64>(batchWriteResponse.offsets(messageIndex)), PartitionId, }, - writeStat, + writeStat, }); if (CleanupOnAcknowledged(sequenceNumber - SeqNoShift)) { @@ -734,9 +734,9 @@ TWriteSession::TProcessSrvMessageResult TWriteSession::ProcessServerMessageImpl( break; } case TServerMessage::kUpdateTokenResponse: { - UpdateTokenInProgress = false; - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: token updated successfully"; - UpdateTokenIfNeededImpl(); + UpdateTokenInProgress = false; + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: token updated successfully"; + UpdateTokenIfNeededImpl(); break; } } @@ -745,7 +745,7 @@ TWriteSession::TProcessSrvMessageResult TWriteSession::ProcessServerMessageImpl( bool TWriteSession::CleanupOnAcknowledged(ui64 sequenceNumber) { bool result = false; - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: acknoledged message " << sequenceNumber; + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: acknoledged message " << sequenceNumber; UpdateTimedCountersImpl(); const auto& sentFront = SentOriginalMessages.front(); ui64 size = 0; @@ -797,18 +797,18 @@ TMemoryUsageChange TWriteSession::OnMemoryUsageChangedImpl(i64 diff) { bool nowOk = MemoryUsage <= Settings.MaxMemoryUsage_; if (wasOk != nowOk) { if (wasOk) { - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Estimated memory usage " << MemoryUsage << "[B] reached maximum (" << Settings.MaxMemoryUsage_ << "[B])"; + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Estimated memory usage " << MemoryUsage << "[B] reached maximum (" << Settings.MaxMemoryUsage_ << "[B])"; } else { - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Estimated memory usage got back to normal " << MemoryUsage << "[B]"; + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Estimated memory usage got back to normal " << MemoryUsage << "[B]"; } } return {wasOk, nowOk}; } -TBuffer CompressBuffer(TVector<TStringBuf>& data, ECodec codec, i32 level) { +TBuffer CompressBuffer(TVector<TStringBuf>& data, ECodec codec, i32 level) { TBuffer result; - THolder<IOutputStream> coder = NCompressionDetails::CreateCoder(codec, result, level); + THolder<IOutputStream> coder = NCompressionDetails::CreateCoder(codec, result, level); for (auto& buffer : data) { coder->Write(buffer.data(), buffer.size()); } @@ -817,31 +817,31 @@ TBuffer CompressBuffer(TVector<TStringBuf>& data, ECodec codec, i32 level) { } // May call OnCompressed with sync executor. No external lock. -void TWriteSession::CompressImpl(TBlock&& block_) { +void TWriteSession::CompressImpl(TBlock&& block_) { auto weakThis = weak_from_this(); - bool isSyncCompression = !CompressionExecutor->IsAsync(); - Y_VERIFY(block_.Valid); + bool isSyncCompression = !CompressionExecutor->IsAsync(); + Y_VERIFY(block_.Valid); - std::shared_ptr<TBlock> blockPtr(std::make_shared<TBlock>()); - blockPtr->Move(block_); - auto lambda = [weakThis, codec = Settings.Codec_, level = Settings.CompressionLevel_, - isSyncCompression, blockPtr]() mutable + std::shared_ptr<TBlock> blockPtr(std::make_shared<TBlock>()); + blockPtr->Move(block_); + auto lambda = [weakThis, codec = Settings.Codec_, level = Settings.CompressionLevel_, + isSyncCompression, blockPtr]() mutable { if (auto sharedThis = weakThis.lock()) { - Y_VERIFY(!blockPtr->Compressed); - + Y_VERIFY(!blockPtr->Compressed); + auto compressedData = CompressBuffer( - blockPtr->OriginalDataRefs, codec, level + blockPtr->OriginalDataRefs, codec, level ); Y_VERIFY(!compressedData.Empty()); - blockPtr->Data = std::move(compressedData); - blockPtr->Compressed = true; - blockPtr->CodecID = GetCodecId(sharedThis->Settings.Codec_); - sharedThis->OnCompressed(std::move(*blockPtr), isSyncCompression); + blockPtr->Data = std::move(compressedData); + blockPtr->Compressed = true; + blockPtr->CodecID = GetCodecId(sharedThis->Settings.Codec_); + sharedThis->OnCompressed(std::move(*blockPtr), isSyncCompression); } - }; - - CompressionExecutor->Post(lambda); + }; + + CompressionExecutor->Post(lambda); } void TWriteSession::OnCompressed(TBlock&& block, bool isSyncCompression) { @@ -861,12 +861,12 @@ void TWriteSession::OnCompressed(TBlock&& block, bool isSyncCompression) { //Called under lock or synchronously if compression is sync TMemoryUsageChange TWriteSession::OnCompressedImpl(TBlock&& block) { UpdateTimedCountersImpl(); - Y_VERIFY(block.Valid); + Y_VERIFY(block.Valid); auto memoryUsage = OnMemoryUsageChangedImpl(static_cast<i64>(block.Data.size()) - block.OriginalMemoryUsage); (*Counters->BytesInflightUncompressed) -= block.OriginalSize; (*Counters->BytesInflightCompressed) += block.Data.size(); - PackedMessagesToSend.emplace(std::move(block)); + PackedMessagesToSend.emplace(std::move(block)); SendImpl(); return memoryUsage; } @@ -877,18 +877,18 @@ void TWriteSession::ResetForRetryImpl() { const size_t totalPackedMessages = PackedMessagesToSend.size() + SentPackedMessage.size(); const size_t totalOriginalMessages = OriginalMessagesToSend.size() + SentOriginalMessages.size(); while (!SentPackedMessage.empty()) { - PackedMessagesToSend.emplace(std::move(SentPackedMessage.front())); + PackedMessagesToSend.emplace(std::move(SentPackedMessage.front())); SentPackedMessage.pop(); } ui64 minSeqNo = PackedMessagesToSend.empty() ? LastSeqNo + 1 : PackedMessagesToSend.top().Offset; - std::queue<TOriginalMessage> freshOriginalMessagesToSend; - OriginalMessagesToSend.swap(freshOriginalMessagesToSend); + std::queue<TOriginalMessage> freshOriginalMessagesToSend; + OriginalMessagesToSend.swap(freshOriginalMessagesToSend); while (!SentOriginalMessages.empty()) { - OriginalMessagesToSend.emplace(std::move(SentOriginalMessages.front())); + OriginalMessagesToSend.emplace(std::move(SentOriginalMessages.front())); SentOriginalMessages.pop(); } while (!freshOriginalMessagesToSend.empty()) { - OriginalMessagesToSend.emplace(std::move(freshOriginalMessagesToSend.front())); + OriginalMessagesToSend.emplace(std::move(freshOriginalMessagesToSend.front())); freshOriginalMessagesToSend.pop(); } if (!OriginalMessagesToSend.empty() && OriginalMessagesToSend.front().SeqNo < minSeqNo) @@ -907,7 +907,7 @@ void TWriteSession::FlushWriteIfRequiredImpl() { || CurrentBatch.CurrentSize >= Settings.BatchFlushSizeBytes_.GetOrElse(0) || CurrentBatch.CurrentSize >= MaxBlockSize || CurrentBatch.Messages.size() >= MaxBlockMessageCount - || CurrentBatch.HasCodec() + || CurrentBatch.HasCodec() ) { WriteBatchImpl(); return; @@ -918,12 +918,12 @@ void TWriteSession::FlushWriteIfRequiredImpl() { // Involves compression, but still called under lock. size_t TWriteSession::WriteBatchImpl() { - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "write " << CurrentBatch.Messages.size() << " messages with seqNo from " + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "write " << CurrentBatch.Messages.size() << " messages with seqNo from " << CurrentBatch.Messages.begin()->SeqNo << " to " << CurrentBatch.Messages.back().SeqNo; Y_VERIFY(CurrentBatch.Messages.size() <= MaxBlockMessageCount); - const bool skipCompression = Settings.Codec_ == ECodec::RAW || CurrentBatch.HasCodec(); + const bool skipCompression = Settings.Codec_ == ECodec::RAW || CurrentBatch.HasCodec(); if (!skipCompression && Settings.CompressionExecutor_->IsAsync()) { MessagesAcquired += static_cast<ui64>(CurrentBatch.Acquire()); } @@ -944,12 +944,12 @@ size_t TWriteSession::WriteBatchImpl() { block.OriginalSize += datum.size(); block.OriginalMemoryUsage = CurrentBatch.Data.size(); block.OriginalDataRefs.emplace_back(datum); - if (CurrentBatch.Messages[i].Codec.Defined()) { - Y_VERIFY(CurrentBatch.Messages.size() == 1); - block.CodecID = GetCodecId(*CurrentBatch.Messages[i].Codec); - block.OriginalSize = CurrentBatch.Messages[i].OriginalSize; - block.Compressed = false; - } + if (CurrentBatch.Messages[i].Codec.Defined()) { + Y_VERIFY(CurrentBatch.Messages.size() == 1); + block.CodecID = GetCodecId(*CurrentBatch.Messages[i].Codec); + block.OriginalSize = CurrentBatch.Messages[i].OriginalSize; + block.Compressed = false; + } size += datum.size(); UpdateTimedCountersImpl(); (*Counters->BytesInflightUncompressed) += datum.size(); @@ -958,7 +958,7 @@ size_t TWriteSession::WriteBatchImpl() { } block.Data = std::move(CurrentBatch.Data); if (skipCompression) { - PackedMessagesToSend.emplace(std::move(block)); + PackedMessagesToSend.emplace(std::move(block)); } else { CompressImpl(std::move(block)); } @@ -978,8 +978,8 @@ bool TWriteSession::IsReadyToSendNextImpl() const { if (!SessionEstablished) { return false; } - if (Aborting) - return false; + if (Aborting) + return false; if (PackedMessagesToSend.empty()) { return false; } @@ -989,26 +989,26 @@ bool TWriteSession::IsReadyToSendNextImpl() const { return PackedMessagesToSend.top().Offset == OriginalMessagesToSend.front().SeqNo; } - -void TWriteSession::UpdateTokenIfNeededImpl() { - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: try to update token"; - - if (!DbDriverState->CredentialsProvider || UpdateTokenInProgress) - return; - TClientMessage clientMessage; - auto* updateRequest = clientMessage.mutable_update_token_request(); - auto token = DbDriverState->CredentialsProvider->GetAuthInfo(); - if (token == PrevToken) - return; - UpdateTokenInProgress = true; - updateRequest->set_token(token); - PrevToken = token; - - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: updating token"; - - Processor->Write(std::move(clientMessage)); -} - + +void TWriteSession::UpdateTokenIfNeededImpl() { + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: try to update token"; + + if (!DbDriverState->CredentialsProvider || UpdateTokenInProgress) + return; + TClientMessage clientMessage; + auto* updateRequest = clientMessage.mutable_update_token_request(); + auto token = DbDriverState->CredentialsProvider->GetAuthInfo(); + if (token == PrevToken) + return; + UpdateTokenInProgress = true; + updateRequest->set_token(token); + PrevToken = token; + + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: updating token"; + + Processor->Write(std::move(clientMessage)); +} + void TWriteSession::SendImpl() { // External cycle splits ready blocks into multiple gRPC messages. Current gRPC message size hard limit is 64MiB while(IsReadyToSendNextImpl()) { @@ -1018,8 +1018,8 @@ void TWriteSession::SendImpl() { // Sent blocks while we can without messages reordering while (IsReadyToSendNextImpl() && clientMessage.ByteSizeLong() < GetMaxGrpcMessageSize()) { - const auto& block = PackedMessagesToSend.top(); - Y_VERIFY(block.Valid); + const auto& block = PackedMessagesToSend.top(); + Y_VERIFY(block.Valid); for (size_t i = 0; i != block.MessageCount; ++i) { Y_VERIFY(!OriginalMessagesToSend.empty()); @@ -1030,7 +1030,7 @@ void TWriteSession::SendImpl() { writeRequest->add_message_sizes(message.Size); writeRequest->add_created_at_ms(message.CreatedAt.MilliSeconds()); - SentOriginalMessages.emplace(std::move(message)); + SentOriginalMessages.emplace(std::move(message)); OriginalMessagesToSend.pop(); } @@ -1047,13 +1047,13 @@ void TWriteSession::SendImpl() { } } - TBlock moveBlock; - moveBlock.Move(block); - SentPackedMessage.emplace(std::move(moveBlock)); + TBlock moveBlock; + moveBlock.Move(block); + SentPackedMessage.emplace(std::move(moveBlock)); PackedMessagesToSend.pop(); } - UpdateTokenIfNeededImpl(); - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Send " << writeRequest->sequence_numbers_size() << " message(s) (" + UpdateTokenIfNeededImpl(); + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Send " << writeRequest->sequence_numbers_size() << " message(s) (" << OriginalMessagesToSend.size() << " left), first sequence number is " << writeRequest->sequence_numbers(0); Processor->Write(std::move(clientMessage)); @@ -1063,8 +1063,8 @@ void TWriteSession::SendImpl() { // Client method, no Lock bool TWriteSession::Close(TDuration closeTimeout) { if (AtomicGet(Aborting)) - return false; - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session: close. Timeout = " << closeTimeout.MilliSeconds() << " ms"; + return false; + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session: close. Timeout = " << closeTimeout.MilliSeconds() << " ms"; auto startTime = TInstant::Now(); auto remaining = closeTimeout; bool ready = false; @@ -1074,8 +1074,8 @@ bool TWriteSession::Close(TDuration closeTimeout) { if (OriginalMessagesToSend.empty() && SentOriginalMessages.empty()) { ready = true; } - if (AtomicGet(Aborting)) - break; + if (AtomicGet(Aborting)) + break; } if (ready) { break; @@ -1084,20 +1084,20 @@ bool TWriteSession::Close(TDuration closeTimeout) { Sleep(Min(TDuration::MilliSeconds(100), remaining)); } with_lock(Lock) { - ready = (OriginalMessagesToSend.empty() && SentOriginalMessages.empty()) && !AtomicGet(Aborting); + ready = (OriginalMessagesToSend.empty() && SentOriginalMessages.empty()) && !AtomicGet(Aborting); } - with_lock(Lock) { - CloseImpl(EStatus::SUCCESS, NYql::TIssues{}); + with_lock(Lock) { + CloseImpl(EStatus::SUCCESS, NYql::TIssues{}); needSetSeqNoValue = !InitSeqNoSetDone && (InitSeqNoSetDone = true); - } + } if (needSetSeqNoValue) { InitSeqNoPromise.SetException("session closed"); } - if (ready) { - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session: gracefully shut down, all writes complete"; - } else { - DbDriverState->Log << TLOG_WARNING << LogPrefix() << "Write session: could not confirm all writes in time or session aborted, perform hard shutdown"; - } + if (ready) { + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session: gracefully shut down, all writes complete"; + } else { + DbDriverState->Log << TLOG_WARNING << LogPrefix() << "Write session: could not confirm all writes in time or session aborted, perform hard shutdown"; + } return ready; } @@ -1123,17 +1123,17 @@ void TWriteSession::HandleWakeUpImpl() { sharedThis->EventsQueue->PushEvent(TWriteSessionEvent::TReadyToAcceptEvent{TContinuationToken{}}); } }; - if (TInstant::Now() - LastTokenUpdate > UPDATE_TOKEN_PERIOD) { - LastTokenUpdate = TInstant::Now(); - UpdateTokenIfNeededImpl(); - } + if (TInstant::Now() - LastTokenUpdate > UPDATE_TOKEN_PERIOD) { + LastTokenUpdate = TInstant::Now(); + UpdateTokenIfNeededImpl(); + } const auto flushAfter = CurrentBatch.StartedAt == TInstant::Zero() ? WakeupInterval : WakeupInterval - Min(Now() - CurrentBatch.StartedAt, WakeupInterval); Connections->ScheduleCallback(flushAfter, std::move(callback)); } - + void TWriteSession::UpdateTimedCountersImpl() { auto now = TInstant::Now(); auto delta = (now - LastCountersUpdateTs).MilliSeconds(); @@ -1153,7 +1153,7 @@ void TWriteSession::UpdateTimedCountersImpl() { << Counters->counter->Val() \ /**/ - DbDriverState->Log << TLOG_INFO << LogPrefix() + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Counters: {" LOG_COUNTER(Errors) LOG_COUNTER(CurrentSessionLifetimeMs) @@ -1170,9 +1170,9 @@ void TWriteSession::UpdateTimedCountersImpl() { } } -void TWriteSession::AbortImpl() { +void TWriteSession::AbortImpl() { if (!AtomicGet(Aborting)) { - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: aborting"; + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: aborting"; AtomicSet(Aborting, 1); Cancel(ConnectContext); Cancel(ConnectTimeoutContext); @@ -1184,7 +1184,7 @@ void TWriteSession::AbortImpl() { } void TWriteSession::CloseImpl(EStatus statusCode, NYql::TIssues&& issues) { - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session will now close"; + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session will now close"; EventsQueue->Close(TSessionClosedEvent(statusCode, std::move(issues))); AbortImpl(); } @@ -1196,18 +1196,18 @@ void TWriteSession::CloseImpl(EStatus statusCode, const TString& message) { } void TWriteSession::CloseImpl(TPlainStatus&& status) { - DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session will now close"; + DbDriverState->Log << TLOG_INFO << LogPrefix() << "Write session will now close"; EventsQueue->Close(TSessionClosedEvent(std::move(status))); AbortImpl(); } TWriteSession::~TWriteSession() { - DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: destroy"; + DbDriverState->Log << TLOG_DEBUG << LogPrefix() << "Write session: destroy"; bool needClose = false; with_lock(Lock) { if (!AtomicGet(Aborting)) { CloseImpl(EStatus::SUCCESS, NYql::TIssues{}); - + needClose = !InitSeqNoSetDone && (InitSeqNoSetDone = true); } } @@ -1240,9 +1240,9 @@ ui64 TSimpleBlockingWriteSession::GetInitSeqNo() { bool TSimpleBlockingWriteSession::Write( TStringBuf data, TMaybe<ui64> seqNo, TMaybe<TInstant> createTimestamp, const TDuration& blockTimeout ) { - if (!IsAlive()) - return false; - + if (!IsAlive()) + return false; + auto continuationToken = WaitForToken(blockTimeout); if (continuationToken.Defined()) { Writer->Write(std::move(*continuationToken), std::move(data), seqNo, createTimestamp); @@ -1262,9 +1262,9 @@ TMaybe<TContinuationToken> TSimpleBlockingWriteSession::WaitForToken(const TDura ContinueTokens.pop(); } } - if (!IsAlive()) - return Nothing(); - + if (!IsAlive()) + return Nothing(); + if (token.Defined()) { return std::move(*token); } @@ -1295,7 +1295,7 @@ void TSimpleBlockingWriteSession::HandleAck(TWriteSessionEvent::TAcksEvent& even void TSimpleBlockingWriteSession::HandleReady(TWriteSessionEvent::TReadyToAcceptEvent& event) { with_lock(Lock) { - ContinueTokens.emplace(std::move(event.ContinuationToken)); + ContinueTokens.emplace(std::move(event.ContinuationToken)); } } void TSimpleBlockingWriteSession::HandleClosed(const TSessionClosedEvent&) { diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/write_session.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/write_session.h index b7c5bbfd11..324550bb02 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/write_session.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/write_session.h @@ -1,12 +1,12 @@ #pragma once #include "common.h" -#include "persqueue_impl.h" +#include "persqueue_impl.h" #include <ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h> -#include <util/generic/buffer.h> - +#include <util/generic/buffer.h> + namespace NYdb::NPersQueue { @@ -85,17 +85,17 @@ public: } void Close(const TSessionClosedEvent& event) { - TWaiter waiter; + TWaiter waiter; with_lock (Mutex) { CloseEvent = event; Closed = true; - waiter = TWaiter(Waiter.ExtractPromise(), this); + waiter = TWaiter(Waiter.ExtractPromise(), this); } TEventInfo info(event); ApplyHandler(info); - waiter.Signal(); + waiter.Signal(); } private: @@ -131,7 +131,7 @@ private: if (!Events.empty()) { TEventInfo event = std::move(Events.front()); Events.pop(); - RenewWaiterImpl(); + RenewWaiterImpl(); return event; } Y_ASSERT(CloseEvent); @@ -165,14 +165,14 @@ private: ui64 SeqNo; TInstant CreatedAt; TStringBuf DataRef; - TMaybe<ECodec> Codec; - ui32 OriginalSize; // only for coded messages - TMessage(ui64 seqNo, const TInstant& createdAt, TStringBuf data, TMaybe<ECodec> codec = {}, ui32 originalSize = 0) + TMaybe<ECodec> Codec; + ui32 OriginalSize; // only for coded messages + TMessage(ui64 seqNo, const TInstant& createdAt, TStringBuf data, TMaybe<ECodec> codec = {}, ui32 originalSize = 0) : SeqNo(seqNo) , CreatedAt(createdAt) , DataRef(data) - , Codec(codec) - , OriginalSize(originalSize) + , Codec(codec) + , OriginalSize(originalSize) {} }; @@ -183,18 +183,18 @@ private: TInstant StartedAt = TInstant::Zero(); bool Acquired = false; bool FlushRequested = false; - void Add(ui64 seqNo, const TInstant& createdAt, TStringBuf data, TMaybe<ECodec> codec, ui32 originalSize) { + void Add(ui64 seqNo, const TInstant& createdAt, TStringBuf data, TMaybe<ECodec> codec, ui32 originalSize) { if (StartedAt == TInstant::Zero()) StartedAt = TInstant::Now(); - CurrentSize += codec ? originalSize : data.size(); - Messages.emplace_back(seqNo, createdAt, data, codec, originalSize); + CurrentSize += codec ? originalSize : data.size(); + Messages.emplace_back(seqNo, createdAt, data, codec, originalSize); Acquired = false; } - bool HasCodec() const { - return Messages.empty() ? false : Messages.front().Codec.Defined(); - } - + bool HasCodec() const { + return Messages.empty() ? false : Messages.front().Codec.Defined(); + } + bool Acquire() { if (Acquired || Messages.empty()) return false; @@ -226,30 +226,30 @@ private: size_t OriginalSize = 0; size_t OriginalMemoryUsage = 0; TString CodecID = GetCodecId(ECodec::RAW); - mutable TVector<TStringBuf> OriginalDataRefs; - mutable TBuffer Data; + mutable TVector<TStringBuf> OriginalDataRefs; + mutable TBuffer Data; bool Compressed = false; - mutable bool Valid = true; - - TBlock& operator=(TBlock&&) = default; - TBlock(TBlock&&) = default; - TBlock() = default; - - //For taking ownership by copying from const object, f.e. lambda -> std::function, priority_queue - void Move(const TBlock& rhs) { - Offset = rhs.Offset; - MessageCount = rhs.MessageCount; - PartNumber = rhs.PartNumber; - OriginalSize = rhs.OriginalSize; - OriginalMemoryUsage = rhs.OriginalMemoryUsage; - CodecID = rhs.CodecID; - OriginalDataRefs.swap(rhs.OriginalDataRefs); - Data.Swap(rhs.Data); - Compressed = rhs.Compressed; - - rhs.Data.Clear(); - rhs.OriginalDataRefs.clear(); - } + mutable bool Valid = true; + + TBlock& operator=(TBlock&&) = default; + TBlock(TBlock&&) = default; + TBlock() = default; + + //For taking ownership by copying from const object, f.e. lambda -> std::function, priority_queue + void Move(const TBlock& rhs) { + Offset = rhs.Offset; + MessageCount = rhs.MessageCount; + PartNumber = rhs.PartNumber; + OriginalSize = rhs.OriginalSize; + OriginalMemoryUsage = rhs.OriginalMemoryUsage; + CodecID = rhs.CodecID; + OriginalDataRefs.swap(rhs.OriginalDataRefs); + Data.Swap(rhs.Data); + Compressed = rhs.Compressed; + + rhs.Data.Clear(); + rhs.OriginalDataRefs.clear(); + } }; struct TOriginalMessage { @@ -299,10 +299,10 @@ public: void Write(TContinuationToken&& continuationToken, TStringBuf data, TMaybe<ui64> seqNo = Nothing(), TMaybe<TInstant> createTimestamp = Nothing()) override; - void WriteEncoded(TContinuationToken&& continuationToken, TStringBuf data, ECodec codec, ui32 originalSize, - TMaybe<ui64> seqNo = Nothing(), TMaybe<TInstant> createTimestamp = Nothing()) override; - - + void WriteEncoded(TContinuationToken&& continuationToken, TStringBuf data, ECodec codec, ui32 originalSize, + TMaybe<ui64> seqNo = Nothing(), TMaybe<TInstant> createTimestamp = Nothing()) override; + + NThreading::TFuture<void> WaitEvent() override; // Empty maybe - block till all work is done. Otherwise block at most at closeTimeout duration. @@ -313,14 +313,14 @@ public: ~TWriteSession(); // will not call close - destroy everything without acks private: - - TString LogPrefix() const; - - void UpdateTokenIfNeededImpl(); - - void WriteInternal(TContinuationToken&& continuationToken, TStringBuf data, TMaybe<ECodec> codec, ui32 originalSize, - TMaybe<ui64> seqNo = Nothing(), TMaybe<TInstant> createTimestamp = Nothing()); - + + TString LogPrefix() const; + + void UpdateTokenIfNeededImpl(); + + void WriteInternal(TContinuationToken&& continuationToken, TStringBuf data, TMaybe<ECodec> codec, ui32 originalSize, + TMaybe<ui64> seqNo = Nothing(), TMaybe<TInstant> createTimestamp = Nothing()); + void FlushWriteIfRequiredImpl(); size_t WriteBatchImpl(); void Start(const TDuration& delay); @@ -333,7 +333,7 @@ private: void OnConnectTimeout(const NGrpc::IQueueClientContextPtr& connectTimeoutContext); void ResetForRetryImpl(); THandleResult RestartImpl(const TPlainStatus& status); - void DoConnect(const TDuration& delay, const TString& endpoint); + void DoConnect(const TDuration& delay, const TString& endpoint); void InitImpl(); void ReadFromProcessor(); // Assumes that we're under lock. void WriteToProcessorImpl(TClientMessage&& req); // Assumes that we're under lock. @@ -351,7 +351,7 @@ private: bool IsReadyToSendNextImpl() const; ui64 GetNextSeqNoImpl(const TMaybe<ui64>& seqNo); void SendImpl(); - void AbortImpl(); + void AbortImpl(); void CloseImpl(EStatus statusCode, NYql::TIssues&& issues); void CloseImpl(EStatus statusCode, const TString& message); void CloseImpl(TPlainStatus&& status); @@ -372,12 +372,12 @@ private: TString InitialCluster; TString CurrentCluster; bool OnSeqNoShift = false; - TString PreferredClusterByCDS; + TString PreferredClusterByCDS; std::shared_ptr<IWriteSessionConnectionProcessorFactory> ConnectionFactory; TDbDriverStatePtr DbDriverState; - TStringType PrevToken; - bool UpdateTokenInProgress = false; - TInstant LastTokenUpdate = TInstant::Zero(); + TStringType PrevToken; + bool UpdateTokenInProgress = false; + TInstant LastTokenUpdate = TInstant::Zero(); std::shared_ptr<TWriteSessionEventsQueue> EventsQueue; NGrpc::IQueueClientContextPtr ClientContext; // Common client context. NGrpc::IQueueClientContextPtr ConnectContext; @@ -397,11 +397,11 @@ private: TMessageBatch CurrentBatch; - std::queue<TOriginalMessage> OriginalMessagesToSend; - std::priority_queue<TBlock, std::vector<TBlock>, Greater> PackedMessagesToSend; + std::queue<TOriginalMessage> OriginalMessagesToSend; + std::priority_queue<TBlock, std::vector<TBlock>, Greater> PackedMessagesToSend; //! Messages that are sent but yet not acknowledged - std::queue<TOriginalMessage> SentOriginalMessages; - std::queue<TBlock> SentPackedMessage; + std::queue<TOriginalMessage> SentOriginalMessages; + std::queue<TBlock> SentPackedMessage; const size_t MaxBlockSize = std::numeric_limits<size_t>::max(); const size_t MaxBlockMessageCount = 1; //!< Max message count that can be packed into a single block. In block version 0 is equal to 1 for compatibility @@ -464,7 +464,7 @@ private: void HandleClosed(const TSessionClosedEvent&); TAdaptiveLock Lock; - std::queue<TContinuationToken> ContinueTokens; + std::queue<TContinuationToken> ContinueTokens; bool Closed = false; }; diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/ya.make b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/ya.make index 225fec6818..21c92fa949 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/ya.make +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/ya.make @@ -11,7 +11,7 @@ SRCS( write_session.cpp read_session.cpp persqueue.cpp - persqueue_impl.cpp + persqueue_impl.cpp ) PEERDIR( diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h index 280b119104..ee9aa75a0a 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h @@ -121,9 +121,9 @@ struct TDescribeTopicResult : public TStatus { GETTER(bool, AllowUnauthenticatedWrite); GETTER(bool, AllowUnauthenticatedRead); GETTER(ui32, PartitionsPerTablet); - GETTER(ui32, AbcId); - GETTER(TString, AbcSlug); - + GETTER(ui32, AbcId); + GETTER(TString, AbcSlug); + const TVector<TReadRule>& ReadRules() const { return ReadRules_; } @@ -144,8 +144,8 @@ struct TDescribeTopicResult : public TStatus { bool AllowUnauthenticatedRead_; bool AllowUnauthenticatedWrite_; ui32 PartitionsPerTablet_; - ui32 AbcId_; - TString AbcSlug_; + ui32 AbcId_; + TString AbcSlug_; TVector<TReadRule> ReadRules_; TMaybe<TRemoteMirrorRule> RemoteMirrorRule_; }; @@ -241,9 +241,9 @@ struct TTopicSettings : public TOperationRequestSettings<TDerived> { FLUENT_SETTING_DEFAULT(ui32, PartitionsPerTablet, 2); - FLUENT_SETTING_OPTIONAL(ui32, AbcId); - FLUENT_SETTING_OPTIONAL(TString, AbcSlug); - + FLUENT_SETTING_OPTIONAL(ui32, AbcId); + FLUENT_SETTING_OPTIONAL(TString, AbcSlug); + FLUENT_SETTING_DEFAULT(TVector<TReadRuleSettings>, ReadRules, {}); FLUENT_SETTING_OPTIONAL(TRemoteMirrorRuleSettings, RemoteMirrorRule); @@ -261,8 +261,8 @@ struct TTopicSettings : public TOperationRequestSettings<TDerived> { MaxPartitionWriteBurst_ = settings.MaxPartitionWriteBurst(); ClientWriteDisabled_ = settings.ClientWriteDisabled(); PartitionsPerTablet_ = settings.PartitionsPerTablet(); - if (settings.AbcId()) AbcId_ = settings.AbcId(); - if (!settings.AbcSlug().empty()) AbcSlug_ = settings.AbcSlug(); + if (settings.AbcId()) AbcId_ = settings.AbcId(); + if (!settings.AbcSlug().empty()) AbcSlug_ = settings.AbcSlug(); AllowUnauthenticatedRead_ = settings.AllowUnauthenticatedRead(); AllowUnauthenticatedWrite_ = settings.AllowUnauthenticatedWrite(); @@ -336,12 +336,12 @@ struct TWriteStat : public TThrRefBase { //}; -enum class EClusterDiscoveryMode { - Auto = 0, // enables cluster discovery only for hostname "logbroker.yandex.net" and "logbroker-prestable.yandex.net" - On, - Off -}; - +enum class EClusterDiscoveryMode { + Auto = 0, // enables cluster discovery only for hostname "logbroker.yandex.net" and "logbroker-prestable.yandex.net" + On, + Off +}; + class TContinuationToken : public TMoveOnly { friend class TWriteSession; private: @@ -418,7 +418,7 @@ struct TPartitionStream : public TThrRefBase { using TPtr = TIntrusivePtr<TPartitionStream>; -public: +public: //! Temporary stop receiving data from this partition stream. // virtual void StopReading() = 0; // Not implemented yet. @@ -468,7 +468,7 @@ protected: ui64 PartitionId; }; - + //! Events for read session. struct TReadSessionEvent { @@ -694,8 +694,8 @@ struct TReadSessionEvent { //! Committed offset. //! This means that from now the first available //! message offset in current partition - //! for current consumer is this offset. - //! All messages before are committed and futher never be available. + //! for current consumer is this offset. + //! All messages before are committed and futher never be available. ui64 GetCommittedOffset() const { return CommittedOffset; } @@ -711,15 +711,15 @@ struct TReadSessionEvent { //! Server request for creating partition stream. struct TCreatePartitionStreamEvent { - explicit TCreatePartitionStreamEvent(TPartitionStream::TPtr, ui64 committedOffset, ui64 endOffset); + explicit TCreatePartitionStreamEvent(TPartitionStream::TPtr, ui64 committedOffset, ui64 endOffset); const TPartitionStream::TPtr& GetPartitionStream() const { return PartitionStream; } - //! Current committed offset in partition stream. - ui64 GetCommittedOffset() const { - return CommittedOffset; + //! Current committed offset in partition stream. + ui64 GetCommittedOffset() const { + return CommittedOffset; } //! Offset of first not existing message in partition stream. @@ -736,7 +736,7 @@ struct TReadSessionEvent { private: TPartitionStream::TPtr PartitionStream; - ui64 CommittedOffset; + ui64 CommittedOffset; ui64 EndOffset; }; @@ -878,7 +878,7 @@ private: //! Event debug string. TString DebugString(const TReadSessionEvent::TEvent& event); - + //! Retry policy. //! Calculates delay before next retry. //! Has several default implementations: @@ -1001,7 +1001,7 @@ struct TWriteSessionEvent { TMaybe<TWrittenMessageDetails> Details; //! Write stats from server. See TWriteStat. nullptr for DISCARDED event. TWriteStat::TPtr Stat; - + }; struct TAcksEvent { @@ -1009,26 +1009,26 @@ struct TWriteSessionEvent { //! Acks for messages from one WriteBatch request could be emitted as several TAcksEvents - //! they are provided to client as soon as possible. TVector<TWriteAck> Acks; - - TString DebugString() const; - + + TString DebugString() const; + }; //! Indicates that a writer is ready to accept new message(s). //! Continuation token should be kept and then used in write methods. struct TReadyToAcceptEvent { TContinuationToken ContinuationToken; - - TString DebugString() const; - + + TString DebugString() const; + }; using TEvent = std::variant<TAcksEvent, TReadyToAcceptEvent, TSessionClosedEvent>; }; -//! Event debug string. -TString DebugString(const TWriteSessionEvent::TEvent& event); - +//! Event debug string. +TString DebugString(const TWriteSessionEvent::TEvent& event); + using TSessionClosedHandler = std::function<void(const TSessionClosedEvent&)>; //! Settings for write session. @@ -1081,11 +1081,11 @@ struct TWriteSessionSettings : public TRequestSettings<TWriteSessionSettings> { FLUENT_SETTING(IRetryPolicy::TPtr, RetryPolicy); //! User metadata that may be attached to write session. - TWriteSessionSettings& AppendSessionMeta(const TString& key, const TString& value) { - Meta_.Fields[key] = value; - return *this; - }; - + TWriteSessionSettings& AppendSessionMeta(const TString& key, const TString& value) { + Meta_.Fields[key] = value; + return *this; + }; + NYdb::NPersQueue::TWriteSessionMeta Meta_; //! Writer will accumulate messages until reaching up to BatchFlushSize bytes @@ -1097,7 +1097,7 @@ struct TWriteSessionSettings : public TRequestSettings<TWriteSessionSettings> { FLUENT_SETTING_OPTIONAL(TDuration, BatchFlushInterval); FLUENT_SETTING_OPTIONAL(ui64, BatchFlushSizeBytes); - FLUENT_SETTING_DEFAULT(TDuration, ConnectTimeout, TDuration::Seconds(30)); + FLUENT_SETTING_DEFAULT(TDuration, ConnectTimeout, TDuration::Seconds(30)); FLUENT_SETTING_OPTIONAL(TWriterCounters::TPtr, Counters); @@ -1138,13 +1138,13 @@ struct TWriteSessionSettings : public TRequestSettings<TWriteSessionSettings> { //! Event handlers. FLUENT_SETTING(TEventHandlers, EventHandlers); - - //! Enables validation of SeqNo. If enabled, then writer will check writing with seqNo and without it and throws exception. - FLUENT_SETTING_DEFAULT(bool, ValidateSeqNo, true); - - //! Manages cluster discovery mode. - FLUENT_SETTING_OPTIONAL(EClusterDiscoveryMode, ClusterDiscoveryMode); - + + //! Enables validation of SeqNo. If enabled, then writer will check writing with seqNo and without it and throws exception. + FLUENT_SETTING_DEFAULT(bool, ValidateSeqNo, true); + + //! Manages cluster discovery mode. + FLUENT_SETTING_OPTIONAL(EClusterDiscoveryMode, ClusterDiscoveryMode); + }; //! Read settings for single topic. @@ -1276,9 +1276,9 @@ struct TReadSessionSettings : public TRequestSettings<TReadSessionSettings> { } //! Disable Clusters discovery. ReadMirrored/ReadOriginal/ReadAll will not have any effect - //! if this option is true. - FLUENT_SETTING_DEFAULT(bool, DisableClusterDiscovery, false); - + //! if this option is true. + FLUENT_SETTING_DEFAULT(bool, DisableClusterDiscovery, false); + //! Maximum memory usage for read session. FLUENT_SETTING_DEFAULT(size_t, MaxMemoryUsageBytes, 100 * 1024 * 1024); @@ -1324,8 +1324,8 @@ struct TReadSessionSettings : public TRequestSettings<TReadSessionSettings> { //! 3. If ReadOnlyOriginal is false and one cluster is specified read will be done from all topic instances (mirrored and original) in one cluster. //! Use ReadMirrored() function for this variant. FLUENT_SETTING_VECTOR(TString, Clusters); - - FLUENT_SETTING_DEFAULT(TDuration, ConnectTimeout, TDuration::Seconds(30)); + + FLUENT_SETTING_DEFAULT(TDuration, ConnectTimeout, TDuration::Seconds(30)); }; //! Simple write session. Does not need event handlers. Does not provide Events, ContinuationTokens, write Acks. @@ -1377,12 +1377,12 @@ public: //! continuationToken - a token earlier provided to client with ReadyToAccept event. virtual void Write(TContinuationToken&& continuationToken, TStringBuf data, TMaybe<ui64> seqNo = Nothing(), TMaybe<TInstant> createTimestamp = Nothing()) = 0; - //! Write single message that is already coded by codec. Codec from settings does not apply to this message. - //! continuationToken - a token earlier provided to client with ReadyToAccept event. - //! originalSize - size of unpacked message - virtual void WriteEncoded(TContinuationToken&& continuationToken, TStringBuf data, ECodec codec, ui32 originalSize, TMaybe<ui64> seqNo = Nothing(), TMaybe<TInstant> createTimestamp = Nothing()) = 0; - - + //! Write single message that is already coded by codec. Codec from settings does not apply to this message. + //! continuationToken - a token earlier provided to client with ReadyToAccept event. + //! originalSize - size of unpacked message + virtual void WriteEncoded(TContinuationToken&& continuationToken, TStringBuf data, ECodec codec, ui32 originalSize, TMaybe<ui64> seqNo = Nothing(), TMaybe<TInstant> createTimestamp = Nothing()) = 0; + + //! Wait for all writes to complete (no more that closeTimeout()), than close. Empty maybe - means infinite timeout. //! return - true if all writes were completed and acked. false if timeout was reached and some writes were aborted. virtual bool Close(TDuration closeTimeout = TDuration::Max()) = 0; @@ -1452,16 +1452,16 @@ public: }; struct TPersQueueClientSettings : public TCommonClientSettingsBase<TPersQueueClientSettings> { - using TSelf = TPersQueueClientSettings; - + using TSelf = TPersQueueClientSettings; + //! Default executor for compression tasks. FLUENT_SETTING_DEFAULT(IExecutor::TPtr, DefaultCompressionExecutor, CreateThreadPoolExecutor(2)); //! Default executor for callbacks. FLUENT_SETTING_DEFAULT(IExecutor::TPtr, DefaultHandlersExecutor, CreateThreadPoolExecutor(1)); - - //! Manages cluster discovery mode. - FLUENT_SETTING_DEFAULT(EClusterDiscoveryMode, ClusterDiscoveryMode, EClusterDiscoveryMode::On); + + //! Manages cluster discovery mode. + FLUENT_SETTING_DEFAULT(EClusterDiscoveryMode, ClusterDiscoveryMode, EClusterDiscoveryMode::On); }; // PersQueue client. diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/basic_usage_ut.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/basic_usage_ut.cpp index 1ddd6364b6..7e0ac17404 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/basic_usage_ut.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/basic_usage_ut.cpp @@ -9,8 +9,8 @@ #include <util/generic/queue.h> #include <util/string/join.h> #include <util/system/event.h> -#include <util/stream/zlib.h> -#include <util/stream/str.h> +#include <util/stream/zlib.h> +#include <util/stream/str.h> #include <atomic> @@ -29,7 +29,7 @@ Y_UNIT_TEST_SUITE(BasicUsage) { using TWriteCallable = std::function<TFuture<TWriteResult>(TString& data, ui64 sequenceNumber, TInstant createdAt)>; - void WriteAndReadAndCommitRandomMessages(TPersQueueYdbSdkTestSetup* setup, TWriteCallable write, bool disableClusterDiscovery = false) { + void WriteAndReadAndCommitRandomMessages(TPersQueueYdbSdkTestSetup* setup, TWriteCallable write, bool disableClusterDiscovery = false) { auto log = setup->GetLog(); const TInstant start = TInstant::Now(); TVector<TString> messages; @@ -59,7 +59,7 @@ Y_UNIT_TEST_SUITE(BasicUsage) { log.Write(TLOG_INFO, "All messages are written"); std::shared_ptr<IReadSession> readSession = setup->GetPersQueueClient().CreateReadSession( - setup->GetReadSessionSettings().DisableClusterDiscovery(disableClusterDiscovery) + setup->GetReadSessionSettings().DisableClusterDiscovery(disableClusterDiscovery) ); // auto isStarted = consumer->Start().ExtractValueSync(); // AssertStreamingMessageCase(TReadResponse::kInit, isStarted.Response); @@ -68,7 +68,7 @@ Y_UNIT_TEST_SUITE(BasicUsage) { TMaybe<ui32> committedOffset; ui32 previousOffset = 0; bool closed = false; - while ((readMessageCount < messageCount || committedOffset <= previousOffset) && !closed) { + while ((readMessageCount < messageCount || committedOffset <= previousOffset) && !closed) { Cerr << "Get event on client\n"; auto event = *readSession->GetEvent(true); std::visit(TOverloaded { @@ -123,12 +123,12 @@ Y_UNIT_TEST_SUITE(BasicUsage) { << previousOffset << ")"; } - UNIT_ASSERT_VALUES_EQUAL(previousOffset + 1, committedOffset); + UNIT_ASSERT_VALUES_EQUAL(previousOffset + 1, committedOffset); UNIT_ASSERT_VALUES_EQUAL(readMessageCount, messageCount); log.Write(TLOG_INFO, Sprintf("Time took to write and read %u messages, %u [MiB] in total is %lu [s]", messageCount, (totalSize / 1024 / 1024), (TInstant::Now() - start).Seconds())); } - + void SimpleWriteAndValidateData( TPersQueueYdbSdkTestSetup* setup, TWriteSessionSettings& writeSettings, ui64 count, TMaybe<bool> shouldCaptureData = Nothing() @@ -137,7 +137,7 @@ Y_UNIT_TEST_SUITE(BasicUsage) { auto session = client.CreateSimpleBlockingWriteSession(writeSettings); TString messageBase = "message-"; TVector<TString> sentMessages; - + for (auto i = 0u; i < count; i++) { sentMessages.emplace_back(messageBase * (i+1) + ToString(i)); auto res = session->Write(sentMessages.back()); @@ -218,23 +218,23 @@ Y_UNIT_TEST_SUITE(BasicUsage) { WriteAndReadAndCommitRandomMessages(setup.get(), std::move(clientWrite)); } - Y_UNIT_TEST(TWriteSession_WriteAndReadAndCommitRandomMessagesNoClusterDiscovery) { - auto setup = std::make_shared<TPersQueueYdbSdkTestSetup>(TEST_CASE_NAME); - auto log = setup->GetLog(); - TYDBClientEventLoop clientEventLoop{setup}; - - TAutoEvent messagesWrittenToBuffer; - auto clientWrite = [&](TString& message, ui64 sequenceNumber, TInstant createdAt) { - auto promise = NewPromise<TWriteResult>(); - //log << TLOG_INFO << "Enqueue message with sequence number " << sequenceNumber; - clientEventLoop.MessageBuffer.Enqueue(TAcknowledgableMessage{message, sequenceNumber, createdAt, promise}); - messagesWrittenToBuffer.Signal(); - return promise.GetFuture(); - }; - - WriteAndReadAndCommitRandomMessages(setup.get(), std::move(clientWrite), true); - } - + Y_UNIT_TEST(TWriteSession_WriteAndReadAndCommitRandomMessagesNoClusterDiscovery) { + auto setup = std::make_shared<TPersQueueYdbSdkTestSetup>(TEST_CASE_NAME); + auto log = setup->GetLog(); + TYDBClientEventLoop clientEventLoop{setup}; + + TAutoEvent messagesWrittenToBuffer; + auto clientWrite = [&](TString& message, ui64 sequenceNumber, TInstant createdAt) { + auto promise = NewPromise<TWriteResult>(); + //log << TLOG_INFO << "Enqueue message with sequence number " << sequenceNumber; + clientEventLoop.MessageBuffer.Enqueue(TAcknowledgableMessage{message, sequenceNumber, createdAt, promise}); + messagesWrittenToBuffer.Signal(); + return promise.GetFuture(); + }; + + WriteAndReadAndCommitRandomMessages(setup.get(), std::move(clientWrite), true); + } + Y_UNIT_TEST(TSimpleWriteSession_AutoSeqNo_BasicUsage) { auto setup = std::make_shared<TPersQueueYdbSdkTestSetup>(TEST_CASE_NAME); auto& client = setup->GetPersQueueClient(); @@ -255,8 +255,8 @@ Y_UNIT_TEST_SUITE(BasicUsage) { UNIT_ASSERT(res); } - - + + Y_UNIT_TEST(TWriteSession_AutoBatching) { // ToDo: Re-enable once batching takes more than 1 message at once return; @@ -278,107 +278,107 @@ Y_UNIT_TEST_SUITE(BasicUsage) { } WaitMessagesAcked(writer, 1, seqNo); } - - Y_UNIT_TEST(TWriteSession_WriteEncoded) { - auto setup = std::make_shared<TPersQueueYdbSdkTestSetup>(TEST_CASE_NAME); - auto& client = setup->GetPersQueueClient(); - auto settings = setup->GetWriteSessionSettings(); - size_t batchSize = 100000000; - settings.BatchFlushInterval(TDuration::Seconds(1000)); // Batch on size, not on time. - settings.BatchFlushSizeBytes(batchSize); - auto writer = client.CreateWriteSession(settings); - TString message = "message"; - TString packed; - { - TStringOutput so(packed); - TZLibCompress oss(&so, ZLib::GZip, 6); - oss << message; - } - - Cerr << message << " " << packed << "\n"; - - { - auto event = *writer->GetEvent(true); - UNIT_ASSERT(!writer->WaitEvent().Wait(TDuration::Seconds(1))); - auto ev = writer->WaitEvent(); - UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); - auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); - writer->Write(std::move(continueToken), message); - UNIT_ASSERT(ev.Wait(TDuration::Seconds(1))); - } - { - auto event = *writer->GetEvent(true); - UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); - auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); - writer->Write(std::move(continueToken), ""); - } - { - auto event = *writer->GetEvent(true); - UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); - auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); - writer->WriteEncoded(std::move(continueToken), packed, ECodec::GZIP, message.size()); - } - - ui32 acks = 0, tokens = 0; - while(acks < 4 || tokens < 2) { - auto event = *writer->GetEvent(true); - if (std::holds_alternative<TWriteSessionEvent::TAcksEvent>(event)) acks += std::get<TWriteSessionEvent::TAcksEvent>(event).Acks.size(); - if (std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)) { - if (tokens == 0) { - auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); - writer->WriteEncoded(std::move(continueToken), "", ECodec::RAW, 0); - } - ++tokens; - } - Cerr << "GOT EVENT " << acks << " " << tokens << "\n"; - - } - UNIT_ASSERT(!writer->WaitEvent().Wait(TDuration::Seconds(5))); - - UNIT_ASSERT_VALUES_EQUAL(acks, 4); - UNIT_ASSERT_VALUES_EQUAL(tokens, 2); - - std::shared_ptr<IReadSession> readSession = setup->GetPersQueueClient().CreateReadSession( - setup->GetReadSessionSettings().DisableClusterDiscovery(true) - ); - ui32 readMessageCount = 0; - while (readMessageCount < 4) { - Cerr << "Get event on client\n"; - auto event = *readSession->GetEvent(true); - std::visit(TOverloaded { - [&](TReadSessionEvent::TDataReceivedEvent& event) { - for (auto& message: event.GetMessages()) { - TString sourceId = message.GetMessageGroupId(); - ui32 seqNo = message.GetSeqNo(); - UNIT_ASSERT_VALUES_EQUAL(readMessageCount + 1, seqNo); - ++readMessageCount; - UNIT_ASSERT_VALUES_EQUAL(message.GetData(), (seqNo % 2) == 1 ? "message" : ""); - } - }, - [&](TReadSessionEvent::TCommitAcknowledgementEvent&) { - UNIT_FAIL("no commits in test"); - }, - [&](TReadSessionEvent::TCreatePartitionStreamEvent& event) { - event.Confirm(); - }, - [&](TReadSessionEvent::TDestroyPartitionStreamEvent& event) { - event.Confirm(); - }, - [&](TReadSessionEvent::TPartitionStreamStatusEvent&) { - UNIT_FAIL("Test does not support lock sessions yet"); - }, - [&](TReadSessionEvent::TPartitionStreamClosedEvent&) { - UNIT_FAIL("Test does not support lock sessions yet"); - }, - [&](TSessionClosedEvent&) { - UNIT_FAIL("Session closed"); - } - - }, event); - } - } - - + + Y_UNIT_TEST(TWriteSession_WriteEncoded) { + auto setup = std::make_shared<TPersQueueYdbSdkTestSetup>(TEST_CASE_NAME); + auto& client = setup->GetPersQueueClient(); + auto settings = setup->GetWriteSessionSettings(); + size_t batchSize = 100000000; + settings.BatchFlushInterval(TDuration::Seconds(1000)); // Batch on size, not on time. + settings.BatchFlushSizeBytes(batchSize); + auto writer = client.CreateWriteSession(settings); + TString message = "message"; + TString packed; + { + TStringOutput so(packed); + TZLibCompress oss(&so, ZLib::GZip, 6); + oss << message; + } + + Cerr << message << " " << packed << "\n"; + + { + auto event = *writer->GetEvent(true); + UNIT_ASSERT(!writer->WaitEvent().Wait(TDuration::Seconds(1))); + auto ev = writer->WaitEvent(); + UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); + auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); + writer->Write(std::move(continueToken), message); + UNIT_ASSERT(ev.Wait(TDuration::Seconds(1))); + } + { + auto event = *writer->GetEvent(true); + UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); + auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); + writer->Write(std::move(continueToken), ""); + } + { + auto event = *writer->GetEvent(true); + UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); + auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); + writer->WriteEncoded(std::move(continueToken), packed, ECodec::GZIP, message.size()); + } + + ui32 acks = 0, tokens = 0; + while(acks < 4 || tokens < 2) { + auto event = *writer->GetEvent(true); + if (std::holds_alternative<TWriteSessionEvent::TAcksEvent>(event)) acks += std::get<TWriteSessionEvent::TAcksEvent>(event).Acks.size(); + if (std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)) { + if (tokens == 0) { + auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); + writer->WriteEncoded(std::move(continueToken), "", ECodec::RAW, 0); + } + ++tokens; + } + Cerr << "GOT EVENT " << acks << " " << tokens << "\n"; + + } + UNIT_ASSERT(!writer->WaitEvent().Wait(TDuration::Seconds(5))); + + UNIT_ASSERT_VALUES_EQUAL(acks, 4); + UNIT_ASSERT_VALUES_EQUAL(tokens, 2); + + std::shared_ptr<IReadSession> readSession = setup->GetPersQueueClient().CreateReadSession( + setup->GetReadSessionSettings().DisableClusterDiscovery(true) + ); + ui32 readMessageCount = 0; + while (readMessageCount < 4) { + Cerr << "Get event on client\n"; + auto event = *readSession->GetEvent(true); + std::visit(TOverloaded { + [&](TReadSessionEvent::TDataReceivedEvent& event) { + for (auto& message: event.GetMessages()) { + TString sourceId = message.GetMessageGroupId(); + ui32 seqNo = message.GetSeqNo(); + UNIT_ASSERT_VALUES_EQUAL(readMessageCount + 1, seqNo); + ++readMessageCount; + UNIT_ASSERT_VALUES_EQUAL(message.GetData(), (seqNo % 2) == 1 ? "message" : ""); + } + }, + [&](TReadSessionEvent::TCommitAcknowledgementEvent&) { + UNIT_FAIL("no commits in test"); + }, + [&](TReadSessionEvent::TCreatePartitionStreamEvent& event) { + event.Confirm(); + }, + [&](TReadSessionEvent::TDestroyPartitionStreamEvent& event) { + event.Confirm(); + }, + [&](TReadSessionEvent::TPartitionStreamStatusEvent&) { + UNIT_FAIL("Test does not support lock sessions yet"); + }, + [&](TReadSessionEvent::TPartitionStreamClosedEvent&) { + UNIT_FAIL("Test does not support lock sessions yet"); + }, + [&](TSessionClosedEvent&) { + UNIT_FAIL("Session closed"); + } + + }, event); + } + } + + Y_UNIT_TEST(TWriteSession_BatchingProducesContinueTokens) { // ToDo: Re-enable once batching takes more than 1 message at once return; diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/read_session_ut.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/read_session_ut.cpp index 01e5fb4873..9b2367e3f3 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/read_session_ut.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/read_session_ut.cpp @@ -294,11 +294,11 @@ struct TMockReadSessionProcessor : public TMockProcessorFactory<Ydb::PersQueue:: return Message(offset, Compress(sourceData, codec), codec, seqNo, createTimestamp); } - TServerReadInfo& BrokenCompressMessage(ui64 offset, const TString& sourceData, Ydb::PersQueue::V1::Codec codec = Ydb::PersQueue::V1::CODEC_GZIP, ui64 seqNo = 1, TInstant createTimestamp = TInstant::MilliSeconds(42)) { - return Message(offset, "broken_header_" + Compress(sourceData, codec), codec, seqNo, createTimestamp); - } - - + TServerReadInfo& BrokenCompressMessage(ui64 offset, const TString& sourceData, Ydb::PersQueue::V1::Codec codec = Ydb::PersQueue::V1::CODEC_GZIP, ui64 seqNo = 1, TInstant createTimestamp = TInstant::MilliSeconds(42)) { + return Message(offset, "broken_header_" + Compress(sourceData, codec), codec, seqNo, createTimestamp); + } + + TServerReadInfo& PartitionStreamStatus(ui64 committedOffset, ui64 endOffset, TInstant writeWatermark, const TString& topic = "TestTopic", const TString& cluster = "TestCluster", const ui64 partition = 1, const ui64 assignId = 1) { auto* req = Response.mutable_partition_status(); req->mutable_topic()->set_path(topic); @@ -487,7 +487,7 @@ public: std::shared_ptr<TReadSessionEventsQueue> GetEventsQueue(); ::IExecutor::TPtr GetDefaultExecutor(); - void SuccessfulInit(bool flag = true); + void SuccessfulInit(bool flag = true); TPartitionStream::TPtr CreatePartitionStream(const TString& topic = "TestTopic", const TString& cluster = "TestCluster", ui64 partition = 1, ui64 assignId = 1); // Assertions. @@ -582,10 +582,10 @@ TReadSessionImplTestSetup::TReadSessionImplTestSetup() { .Counters(MakeIntrusive<NYdb::NPersQueue::TReaderCounters>(MakeIntrusive<NMonitoring::TDynamicCounters>())); Log.SetFormatter(GetPrefixLogFormatter("")); - - Mock::AllowLeak(MockProcessor.Get()); - Mock::AllowLeak(MockProcessorFactory.get()); - Mock::AllowLeak(MockErrorHandler.Get()); + + Mock::AllowLeak(MockProcessor.Get()); + Mock::AllowLeak(MockProcessorFactory.get()); + Mock::AllowLeak(MockErrorHandler.Get()); } TReadSessionImplTestSetup::~TReadSessionImplTestSetup() noexcept(false) { @@ -637,11 +637,11 @@ std::shared_ptr<TReadSessionEventsQueue> TReadSessionImplTestSetup::GetEventsQue return EventsQueue; } -void TReadSessionImplTestSetup::SuccessfulInit(bool hasInitRequest) { +void TReadSessionImplTestSetup::SuccessfulInit(bool hasInitRequest) { EXPECT_CALL(*MockProcessorFactory, OnCreateProcessor(1)) .WillOnce([&](){ MockProcessorFactory->CreateProcessor(MockProcessor); }); - if (hasInitRequest) - EXPECT_CALL(*MockProcessor, OnInitRequest(_)); + if (hasInitRequest) + EXPECT_CALL(*MockProcessor, OnInitRequest(_)); MockProcessor->AddServerResponse(TMockReadSessionProcessor::TServerReadInfo().InitResponse("123-session-id-321")); GetSession()->Start(); MockProcessorFactory->Wait(); @@ -676,14 +676,14 @@ Y_UNIT_TEST_SUITE(PersQueueSdkReadSessionTest) { } std::shared_ptr<IReadSession> session = setup.GetPersQueueClient().CreateReadSession(settings); - TDeferredCommit dc; + TDeferredCommit dc; // Event 1: create partition stream. { TMaybe<TReadSessionEvent::TEvent> event = session->GetEvent(true); UNIT_ASSERT(event); UNIT_ASSERT_EVENT_TYPE(*event, TReadSessionEvent::TCreatePartitionStreamEvent); std::get<TReadSessionEvent::TCreatePartitionStreamEvent>(*event).Confirm(); - Cerr << "create event " << DebugString(*event) << Endl; + Cerr << "create event " << DebugString(*event) << Endl; } // Event 2: data. { @@ -695,46 +695,46 @@ Y_UNIT_TEST_SUITE(PersQueueSdkReadSessionTest) { for (auto& msg : dataEvent.GetMessages()) { UNIT_ASSERT(msg.GetData() == "message1" || msg.GetData() == "message2"); } - Cerr << "data event " << DebugString(*event) << Endl; + Cerr << "data event " << DebugString(*event) << Endl; if (commit) { - dc.Add(dataEvent); + dc.Add(dataEvent); + } + } + setup.WriteToTopic({"message3"}); + // Event 3: data. + { + TMaybe<TReadSessionEvent::TEvent> event = session->GetEvent(true); + UNIT_ASSERT(event); + UNIT_ASSERT_EVENT_TYPE(*event, TReadSessionEvent::TDataReceivedEvent); + TReadSessionEvent::TDataReceivedEvent& dataEvent = std::get<TReadSessionEvent::TDataReceivedEvent>(*event); + UNIT_ASSERT_VALUES_EQUAL(dataEvent.GetMessages().size(), 1); + for (auto& msg : dataEvent.GetMessages()) { + UNIT_ASSERT(msg.GetData() == "message3"); } + Cerr << "data event " << DebugString(*event) << Endl; + + dataEvent.Commit(); // Commit right now! } - setup.WriteToTopic({"message3"}); - // Event 3: data. - { - TMaybe<TReadSessionEvent::TEvent> event = session->GetEvent(true); - UNIT_ASSERT(event); - UNIT_ASSERT_EVENT_TYPE(*event, TReadSessionEvent::TDataReceivedEvent); - TReadSessionEvent::TDataReceivedEvent& dataEvent = std::get<TReadSessionEvent::TDataReceivedEvent>(*event); - UNIT_ASSERT_VALUES_EQUAL(dataEvent.GetMessages().size(), 1); - for (auto& msg : dataEvent.GetMessages()) { - UNIT_ASSERT(msg.GetData() == "message3"); - } - Cerr << "data event " << DebugString(*event) << Endl; - - dataEvent.Commit(); // Commit right now! - } - - dc.Commit(); - + + dc.Commit(); + if (close) { session->Close(TDuration::Seconds(30)); } - // Event 4: commit ack. + // Event 4: commit ack. if (commit) { TMaybe<TReadSessionEvent::TEvent> event = session->GetEvent(!close); // Event is expected to be already in queue if closed. UNIT_ASSERT(event); - Cerr << "commit ack event " << DebugString(*event) << Endl; - UNIT_ASSERT(std::holds_alternative<TReadSessionEvent::TCommitAcknowledgementEvent>(*event)); + Cerr << "commit ack event " << DebugString(*event) << Endl; + UNIT_ASSERT(std::holds_alternative<TReadSessionEvent::TCommitAcknowledgementEvent>(*event)); } if (close) { TMaybe<TReadSessionEvent::TEvent> event = session->GetEvent(false); UNIT_ASSERT(event); - Cerr << "close event " << DebugString(*event) << Endl; - UNIT_ASSERT(std::holds_alternative<TSessionClosedEvent>(*event)); + Cerr << "close event " << DebugString(*event) << Endl; + UNIT_ASSERT(std::holds_alternative<TSessionClosedEvent>(*event)); UNIT_ASSERT_STRING_CONTAINS(DebugString(*event), "Session was gracefully closed"); } } @@ -1190,18 +1190,18 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { setup.AssertNoEvents(); } - Y_UNIT_TEST(BrokenCompressedData) { - TReadSessionImplTestSetup setup; - setup.Settings.DecompressionExecutor(new TReorderingExecutor(1)); - setup.SuccessfulInit(); - TPartitionStream::TPtr stream = setup.CreatePartitionStream(); - setup.MockProcessor->AddServerResponse(TMockReadSessionProcessor::TServerReadInfo() - .PartitionData(1) - .Batch("src_id") + Y_UNIT_TEST(BrokenCompressedData) { + TReadSessionImplTestSetup setup; + setup.Settings.DecompressionExecutor(new TReorderingExecutor(1)); + setup.SuccessfulInit(); + TPartitionStream::TPtr stream = setup.CreatePartitionStream(); + setup.MockProcessor->AddServerResponse(TMockReadSessionProcessor::TServerReadInfo() + .PartitionData(1) + .Batch("src_id") .BrokenCompressMessage(1, "message") .CompressMessage(2, "message2") .CompressMessage(3, "message3")); - + // Exception was passed during decompression. { TMaybe<TReadSessionEvent::TEvent> event = setup.EventsQueue->GetEvent(true); @@ -1214,19 +1214,19 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { UNIT_ASSERT_VALUES_EQUAL(dataEvent.GetMessages()[1].GetData(), "message2"); UNIT_ASSERT_VALUES_EQUAL(dataEvent.GetMessages()[2].GetData(), "message3"); } - - setup.AssertNoEvents(); - } - + + setup.AssertNoEvents(); + } + void DecompressImpl(Ydb::PersQueue::V1::Codec codec, const TString& data = "msg", ::IExecutor::TPtr executor = nullptr) { TReadSessionImplTestSetup setup; if (executor) { setup.Settings.DecompressionExecutor(executor); } - setup.SuccessfulInit(false); + setup.SuccessfulInit(false); TPartitionStream::TPtr stream = setup.CreatePartitionStream(); - - EXPECT_CALL(*setup.MockProcessor, OnReadRequest(_)); + + EXPECT_CALL(*setup.MockProcessor, OnReadRequest(_)); setup.MockProcessor->AddServerResponse(TMockReadSessionProcessor::TServerReadInfo() .PartitionData(1) .Batch("src_id") @@ -1308,18 +1308,18 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { ui64 offset = 1; ui64 seqNo = 42; THashSet<ui64> committedCookies; - THashSet<ui64> committedOffsets; + THashSet<ui64> committedOffsets; EXPECT_CALL(*setup.MockProcessor, OnCommitRequest(_)) - .WillRepeatedly(Invoke([&committedCookies, &committedOffsets](const Ydb::PersQueue::V1::MigrationStreamingReadClientMessage::Commit& req) { + .WillRepeatedly(Invoke([&committedCookies, &committedOffsets](const Ydb::PersQueue::V1::MigrationStreamingReadClientMessage::Commit& req) { for (const auto& commit : req.cookies()) { committedCookies.insert(commit.partition_cookie()); } - for (const auto& range : req.offset_ranges()) { - Cerr << "GOT RANGE " << range.start_offset() << " " << range.end_offset() << "\n"; - for (ui64 i = range.start_offset(); i < range.end_offset(); ++i) { - committedOffsets.insert(i); - } - } + for (const auto& range : req.offset_ranges()) { + Cerr << "GOT RANGE " << range.start_offset() << " " << range.end_offset() << "\n"; + for (ui64 i = range.start_offset(); i < range.end_offset(); ++i) { + committedOffsets.insert(i); + } + } })); for (ui64 i = 1; i <= serverBatchesCount; ++i) { @@ -1368,16 +1368,16 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { } dataEvent.Commit(); } - if (committedOffsets.empty()) { - UNIT_ASSERT_VALUES_EQUAL(committedCookies.size(), serverBatchesCount); - for (ui64 i = 1; i <= serverBatchesCount; ++i) { - UNIT_ASSERT(committedCookies.contains(i)); - } - } else { - UNIT_ASSERT_VALUES_EQUAL(committedOffsets.size(), batches * messagesInBatch + 1); - for (ui64 i = 0; i <= batches * messagesInBatch; ++i) { - UNIT_ASSERT(committedOffsets.contains(i)); - } + if (committedOffsets.empty()) { + UNIT_ASSERT_VALUES_EQUAL(committedCookies.size(), serverBatchesCount); + for (ui64 i = 1; i <= serverBatchesCount; ++i) { + UNIT_ASSERT(committedCookies.contains(i)); + } + } else { + UNIT_ASSERT_VALUES_EQUAL(committedOffsets.size(), batches * messagesInBatch + 1); + for (ui64 i = 0; i <= batches * messagesInBatch; ++i) { + UNIT_ASSERT(committedOffsets.contains(i)); + } } UNIT_ASSERT_VALUES_EQUAL(executor->GetTasksAdded(), expectedTasks); @@ -1451,13 +1451,13 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { Y_UNIT_TEST(UnpackBigBatchWithTwoPartitions) { TReadSessionImplTestSetup setup; - + setup.Settings.MaxMemoryUsageBytes(5000); setup.SuccessfulInit(); TPartitionStream::TPtr stream1 = setup.CreatePartitionStream("TestTopic", "TestCluster", 1, 1); TPartitionStream::TPtr stream2 = setup.CreatePartitionStream("TestTopic", "TestCluster", 2, 2); - + const TString messageData = GenerateMessageData(100); const TString compressedMessageData = Compress(messageData); ui64 offset = 1; @@ -1568,7 +1568,7 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { bool has2 = false; EXPECT_CALL(*setup.MockProcessor, OnCommitRequest(_)) .WillRepeatedly(Invoke([&](const Ydb::PersQueue::V1::MigrationStreamingReadClientMessage::Commit& req) { - Cerr << "Got commit req " << req << "\n"; + Cerr << "Got commit req " << req << "\n"; for (const auto& commit : req.cookies()) { if (commit.partition_cookie() == 1) { has1 = true; @@ -1578,12 +1578,12 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { UNIT_ASSERT(false); } } - for (const auto& range : req.offset_ranges()) { - Cerr << "RANGE " << range.start_offset() << " " << range.end_offset() << "\n"; - if (range.start_offset() == 10 && range.end_offset() == 12) has1 = true; - else if (range.start_offset() == 0 && range.end_offset() == 10) has2 = true; - else UNIT_ASSERT(false); - } + for (const auto& range : req.offset_ranges()) { + Cerr << "RANGE " << range.start_offset() << " " << range.end_offset() << "\n"; + if (range.start_offset() == 10 && range.end_offset() == 12) has1 = true; + else if (range.start_offset() == 0 && range.end_offset() == 10) has2 = true; + else UNIT_ASSERT(false); + } })); for (int i = 0; i < 2; ++i) { @@ -1616,43 +1616,43 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { TReadSessionEvent::TDataReceivedEvent& dataEvent = std::get<TReadSessionEvent::TDataReceivedEvent>(*event); // First time. - dataEvent.GetMessages()[0].Commit(); + dataEvent.GetMessages()[0].Commit(); UNIT_ASSERT_EXCEPTION(dataEvent.GetMessages()[0].Commit(), NYdb::TContractViolation); } - Y_UNIT_TEST(DataReceivedCallbackReal) { - NYdb::NPersQueue::NTests::TPersQueueYdbSdkTestSetup setup("ReadSession"); - auto settings = setup.GetReadSessionSettings(); - - auto calledPromise = NThreading::NewPromise<void>(); - int time = 0; - - settings.EventHandlers_.SimpleDataHandlers([&](TReadSessionEvent::TDataReceivedEvent& event) { - for (auto& message: event.GetMessages()) { - ++time; - Cerr << "GOT MESSAGE: " << message.DebugString(true) << "\n"; - UNIT_ASSERT_VALUES_EQUAL(message.GetData(), TStringBuilder() << "message" << time); - if (time == 3) { - calledPromise.SetValue(); - } - } - UNIT_ASSERT(time <= 3); - }, true); - - std::shared_ptr<IReadSession> session = setup.GetPersQueueClient().CreateReadSession(settings); - - UNIT_ASSERT(!calledPromise.GetFuture().Wait(TDuration::Seconds(5))); - - setup.WriteToTopic({"message1"}, false); - setup.WriteToTopic({"message2"}, false); - Sleep(TDuration::Seconds(1)); - setup.WriteToTopic({"message3"}, false); - - calledPromise.GetFuture().Wait(); - Sleep(TDuration::Seconds(10)); - } - + Y_UNIT_TEST(DataReceivedCallbackReal) { + NYdb::NPersQueue::NTests::TPersQueueYdbSdkTestSetup setup("ReadSession"); + auto settings = setup.GetReadSessionSettings(); + + auto calledPromise = NThreading::NewPromise<void>(); + int time = 0; + + settings.EventHandlers_.SimpleDataHandlers([&](TReadSessionEvent::TDataReceivedEvent& event) { + for (auto& message: event.GetMessages()) { + ++time; + Cerr << "GOT MESSAGE: " << message.DebugString(true) << "\n"; + UNIT_ASSERT_VALUES_EQUAL(message.GetData(), TStringBuilder() << "message" << time); + if (time == 3) { + calledPromise.SetValue(); + } + } + UNIT_ASSERT(time <= 3); + }, true); + + std::shared_ptr<IReadSession> session = setup.GetPersQueueClient().CreateReadSession(settings); + + UNIT_ASSERT(!calledPromise.GetFuture().Wait(TDuration::Seconds(5))); + + setup.WriteToTopic({"message1"}, false); + setup.WriteToTopic({"message2"}, false); + Sleep(TDuration::Seconds(1)); + setup.WriteToTopic({"message3"}, false); + + calledPromise.GetFuture().Wait(); + Sleep(TDuration::Seconds(10)); + } + Y_UNIT_TEST(DataReceivedCallback) { TReadSessionImplTestSetup setup; setup.Settings.DecompressionExecutor(MakeIntrusive<TReorderingExecutor>(2ull)); @@ -1770,9 +1770,9 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { auto commitCalled = std::make_shared<NThreading::TPromise<void>>(NThreading::NewPromise<void>()); auto commitCalledFuture = commitCalled->GetFuture(); - - Mock::AllowLeak(setup.MockProcessor.Get()); - + + Mock::AllowLeak(setup.MockProcessor.Get()); + EXPECT_CALL(*setup.MockProcessor, OnCommitRequest(_)) .WillOnce([=](){ commitCalled->SetValue(); }); @@ -1810,15 +1810,15 @@ Y_UNIT_TEST_SUITE(ReadSessionImplTest) { if (!withCommit && withGracefulRelease) { UNIT_ASSERT(!destroyCalledFuture.Wait(TDuration::MilliSeconds(100))); - (*dataReceivedEvent)->Commit(); + (*dataReceivedEvent)->Commit(); UNIT_ASSERT(!destroyCalledFuture.Wait(TDuration::MilliSeconds(100))); } if (withCommit) { commitCalledFuture.Wait(); - } if (withCommit || withGracefulRelease) { - setup.MockProcessor->AddServerResponse(TMockReadSessionProcessor::TServerReadInfo() - .CommitAcknowledgement(1)); + } if (withCommit || withGracefulRelease) { + setup.MockProcessor->AddServerResponse(TMockReadSessionProcessor::TServerReadInfo() + .CommitAcknowledgement(1)); } destroyCalledFuture.Wait(); diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/retry_policy_ut.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/retry_policy_ut.cpp index e116f8bfd4..50d050e973 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/retry_policy_ut.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/retry_policy_ut.cpp @@ -17,31 +17,31 @@ Y_UNIT_TEST_SUITE(RetryPolicy) { helper.Write(true); helper.Policy->Initialized(); // Thus ignoring possible early retries on "cluster initializing" auto doBreakDown = [&] () { - helper.Policy->ExpectBreakDown(); - NThreading::TPromise<void> retriesPromise = NThreading::NewPromise(); - Cerr << "WAIT for retries...\n"; - helper.Policy->WaitForRetries(30, retriesPromise); - Cerr << "KICK tablets\n"; - helper.Setup->KickTablets(); - + helper.Policy->ExpectBreakDown(); + NThreading::TPromise<void> retriesPromise = NThreading::NewPromise(); + Cerr << "WAIT for retries...\n"; + helper.Policy->WaitForRetries(30, retriesPromise); + Cerr << "KICK tablets\n"; + helper.Setup->KickTablets(); + auto f1 = helper.Write(false); auto f2 = helper.Write(); - + auto retriesFuture = retriesPromise.GetFuture(); retriesFuture.Wait(); - Cerr << "WAIT for retries done\n"; - + Cerr << "WAIT for retries done\n"; + NThreading::TPromise<void> repairPromise = NThreading::NewPromise(); auto repairFuture = repairPromise.GetFuture(); helper.Policy->WaitForRepair(repairPromise); - - - Cerr << "ALLOW tablets\n"; - helper.Setup->AllowTablets(); - - Cerr << "WAIT for repair\n"; - repairFuture.Wait(); - Cerr << "REPAIR done\n"; + + + Cerr << "ALLOW tablets\n"; + helper.Setup->AllowTablets(); + + Cerr << "WAIT for repair\n"; + repairFuture.Wait(); + Cerr << "REPAIR done\n"; f1.Wait(); f2.Wait(); helper.Write(true); @@ -111,7 +111,7 @@ Y_UNIT_TEST_SUITE(RetryPolicy) { setup2.AddDataCenter("dc1", *setup1, true); setup1->AddDataCenter("dc2", setup2, true); setup1->Start(); - setup2.Start(false); + setup2.Start(false); Cerr << "=== Start session 1\n"; auto helper = MakeHolder<TYdbPqWriterTestHelper>("", nullptr, TString(), setup1); helper->Write(true); @@ -121,15 +121,15 @@ Y_UNIT_TEST_SUITE(RetryPolicy) { auto waitForReconnect = [&](bool enable) { Cerr << "=== Expect breakdown\n"; retryPolicy->ExpectBreakDown(); - - NThreading::TPromise<void> retriesPromise = NThreading::NewPromise(); - auto retriesFuture = retriesPromise.GetFuture(); - retryPolicy->WaitForRetries(1, retriesPromise); - - NThreading::TPromise<void> repairPromise = NThreading::NewPromise(); - auto repairFuture = repairPromise.GetFuture(); - retryPolicy->WaitForRepair(repairPromise); - + + NThreading::TPromise<void> retriesPromise = NThreading::NewPromise(); + auto retriesFuture = retriesPromise.GetFuture(); + retryPolicy->WaitForRetries(1, retriesPromise); + + NThreading::TPromise<void> repairPromise = NThreading::NewPromise(); + auto repairFuture = repairPromise.GetFuture(); + retryPolicy->WaitForRepair(repairPromise); + if (enable) { Cerr << "===Enabled DC1\n"; setup1->EnableDataCenter("dc1"); @@ -139,10 +139,10 @@ Y_UNIT_TEST_SUITE(RetryPolicy) { setup1->DisableDataCenter("dc1"); setup2.DisableDataCenter("dc1"); } - Sleep(TDuration::Seconds(5)); - - retriesFuture.Wait(); - repairFuture.Wait(); + Sleep(TDuration::Seconds(5)); + + retriesFuture.Wait(); + repairFuture.Wait(); }; Cerr << "===Wait for 1st reconnect\n"; waitForReconnect(false); @@ -154,7 +154,7 @@ Y_UNIT_TEST_SUITE(RetryPolicy) { auto setup1 = std::make_shared<TPersQueueYdbSdkTestSetup>(TEST_CASE_NAME, false); SDKTestSetup setup2("SeqNoShift_Dc2", false); setup2.SetSingleDataCenter("dc2"); - setup2.AddDataCenter("dc1", *setup1, true); + setup2.AddDataCenter("dc1", *setup1, true); setup2.Start(); setup1->AddDataCenter("dc2", setup2, true); setup1->Start(); @@ -231,7 +231,7 @@ Y_UNIT_TEST_SUITE(RetryPolicy) { auto CheckSeqNo = [&] (const TString& dcName, ui64 expectedSeqNo) { settings.PreferredCluster(dcName); settings.AllowFallbackToOtherClusters(false); - settings.RetryPolicy(nullptr); //switch to default policy; + settings.RetryPolicy(nullptr); //switch to default policy; auto writer = client.CreateWriteSession(settings); auto seqNo = writer->GetInitSeqNo().GetValueSync(); UNIT_ASSERT_VALUES_EQUAL(seqNo, expectedSeqNo); @@ -348,21 +348,21 @@ Y_UNIT_TEST_SUITE(RetryPolicy) { auto& client = setup->GetPersQueueClient(); auto writer = client.CreateWriteSession(settings); auto event = *writer->GetEvent(true); - Cerr << NYdb::NPersQueue::DebugString(event) << "\n"; - UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); + Cerr << NYdb::NPersQueue::DebugString(event) << "\n"; + UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); auto continueToken = std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken); TString message = "1234567890"; ui64 seqNo = 0; - setup->KickTablets(); + setup->KickTablets(); writer->Write(std::move(continueToken), message, ++seqNo); retryPolicy->ExpectBreakDown(); retryPolicy->WaitForRetriesSync(3); - while (seqNo < 10) { - auto event = *writer->GetEvent(true); - Cerr << NYdb::NPersQueue::DebugString(event) << "\n"; - UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); + while (seqNo < 10) { + auto event = *writer->GetEvent(true); + Cerr << NYdb::NPersQueue::DebugString(event) << "\n"; + UNIT_ASSERT(std::holds_alternative<TWriteSessionEvent::TReadyToAcceptEvent>(event)); writer->Write( - std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken), + std::move(std::get<TWriteSessionEvent::TReadyToAcceptEvent>(event).ContinuationToken), message, ++seqNo ); } diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils.h index 476d53cdf2..7fb219757b 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils.h @@ -36,7 +36,7 @@ public: NYdb::TDriverConfig cfg; cfg.SetEndpoint(TStringBuilder() << "localhost:" << Server.GrpcPort); cfg.SetDatabase("/Root"); - cfg.SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)); + cfg.SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)); Driver = MakeHolder<NYdb::TDriver>(cfg); } return *Driver; @@ -61,9 +61,9 @@ public: NYdb::NPersQueue::TWriteSessionSettings GetWriteSessionSettings() { TWriteSessionSettings settings; - settings - .Path(GetTestTopic()) - .MessageGroupId(GetTestMessageGroupId()); + settings + .Path(GetTestTopic()) + .MessageGroupId(GetTestMessageGroupId()); return settings; } }; diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.cpp index 31f21d2265..3cf6e7f8b8 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.cpp @@ -2,93 +2,93 @@ namespace NKikimr::NPersQueueTests { - using namespace NYdb::NPersQueue; + using namespace NYdb::NPersQueue; - std::shared_ptr<NYdb::NPersQueue::IWriteSession> CreateWriter( - NYdb::TDriver& driver, - const NYdb::NPersQueue::TWriteSessionSettings& settings, - std::shared_ptr<NYdb::ICredentialsProviderFactory> creds + std::shared_ptr<NYdb::NPersQueue::IWriteSession> CreateWriter( + NYdb::TDriver& driver, + const NYdb::NPersQueue::TWriteSessionSettings& settings, + std::shared_ptr<NYdb::ICredentialsProviderFactory> creds ) { - TPersQueueClientSettings clientSettings; - if (creds) clientSettings.CredentialsProviderFactory(creds); - return TPersQueueClient(driver, clientSettings).CreateWriteSession(TWriteSessionSettings(settings).ClusterDiscoveryMode(EClusterDiscoveryMode::Off)); + TPersQueueClientSettings clientSettings; + if (creds) clientSettings.CredentialsProviderFactory(creds); + return TPersQueueClient(driver, clientSettings).CreateWriteSession(TWriteSessionSettings(settings).ClusterDiscoveryMode(EClusterDiscoveryMode::Off)); } - std::shared_ptr<NYdb::NPersQueue::IWriteSession> CreateWriter( - NYdb::TDriver& driver, - const TString& topic, - const TString& sourceId, - std::optional<ui32> partitionGroup, - std::optional<TString> codec, - std::optional<bool> reconnectOnFailure, - std::shared_ptr<NYdb::ICredentialsProviderFactory> creds + std::shared_ptr<NYdb::NPersQueue::IWriteSession> CreateWriter( + NYdb::TDriver& driver, + const TString& topic, + const TString& sourceId, + std::optional<ui32> partitionGroup, + std::optional<TString> codec, + std::optional<bool> reconnectOnFailure, + std::shared_ptr<NYdb::ICredentialsProviderFactory> creds ) { - auto settings = TWriteSessionSettings().Path(topic).MessageGroupId(sourceId); - if (partitionGroup) settings.PartitionGroupId(*partitionGroup); - settings.RetryPolicy((reconnectOnFailure && *reconnectOnFailure) ? IRetryPolicy::GetDefaultPolicy() : IRetryPolicy::GetNoRetryPolicy()); - if (codec) { - if (*codec == "raw") - settings.Codec(ECodec::RAW); - if (*codec == "zstd") - settings.Codec(ECodec::ZSTD); - if (*codec == "lzop") - settings.Codec(ECodec::LZOP); - } - return CreateWriter(driver, settings, creds); + auto settings = TWriteSessionSettings().Path(topic).MessageGroupId(sourceId); + if (partitionGroup) settings.PartitionGroupId(*partitionGroup); + settings.RetryPolicy((reconnectOnFailure && *reconnectOnFailure) ? IRetryPolicy::GetDefaultPolicy() : IRetryPolicy::GetNoRetryPolicy()); + if (codec) { + if (*codec == "raw") + settings.Codec(ECodec::RAW); + if (*codec == "zstd") + settings.Codec(ECodec::ZSTD); + if (*codec == "lzop") + settings.Codec(ECodec::LZOP); + } + return CreateWriter(driver, settings, creds); + } + + std::shared_ptr<NYdb::NPersQueue::ISimpleBlockingWriteSession> CreateSimpleWriter( + NYdb::TDriver& driver, + const NYdb::NPersQueue::TWriteSessionSettings& settings + ) { + return TPersQueueClient(driver).CreateSimpleBlockingWriteSession(TWriteSessionSettings(settings).ClusterDiscoveryMode(EClusterDiscoveryMode::Off)); } - std::shared_ptr<NYdb::NPersQueue::ISimpleBlockingWriteSession> CreateSimpleWriter( - NYdb::TDriver& driver, - const NYdb::NPersQueue::TWriteSessionSettings& settings + std::shared_ptr<NYdb::NPersQueue::ISimpleBlockingWriteSession> CreateSimpleWriter( + NYdb::TDriver& driver, + const TString& topic, + const TString& sourceId, + std::optional<ui32> partitionGroup, + std::optional<TString> codec, + std::optional<bool> reconnectOnFailure ) { - return TPersQueueClient(driver).CreateSimpleBlockingWriteSession(TWriteSessionSettings(settings).ClusterDiscoveryMode(EClusterDiscoveryMode::Off)); + Y_UNUSED(codec); + auto settings = TWriteSessionSettings().Path(topic).MessageGroupId(sourceId); + if (partitionGroup) settings.PartitionGroupId(*partitionGroup); + settings.RetryPolicy((reconnectOnFailure && *reconnectOnFailure) ? IRetryPolicy::GetDefaultPolicy() : IRetryPolicy::GetNoRetryPolicy()); + return CreateSimpleWriter(driver, settings); } - std::shared_ptr<NYdb::NPersQueue::ISimpleBlockingWriteSession> CreateSimpleWriter( - NYdb::TDriver& driver, - const TString& topic, - const TString& sourceId, - std::optional<ui32> partitionGroup, - std::optional<TString> codec, - std::optional<bool> reconnectOnFailure + std::shared_ptr<NYdb::NPersQueue::IReadSession> CreateReader( + NYdb::TDriver& driver, + const NYdb::NPersQueue::TReadSessionSettings& settings, + std::shared_ptr<NYdb::ICredentialsProviderFactory> creds ) { - Y_UNUSED(codec); - auto settings = TWriteSessionSettings().Path(topic).MessageGroupId(sourceId); - if (partitionGroup) settings.PartitionGroupId(*partitionGroup); - settings.RetryPolicy((reconnectOnFailure && *reconnectOnFailure) ? IRetryPolicy::GetDefaultPolicy() : IRetryPolicy::GetNoRetryPolicy()); - return CreateSimpleWriter(driver, settings); + TPersQueueClientSettings clientSettings; + if (creds) clientSettings.CredentialsProviderFactory(creds); + return TPersQueueClient(driver, clientSettings).CreateReadSession(TReadSessionSettings(settings).DisableClusterDiscovery(true)); } - std::shared_ptr<NYdb::NPersQueue::IReadSession> CreateReader( - NYdb::TDriver& driver, - const NYdb::NPersQueue::TReadSessionSettings& settings, - std::shared_ptr<NYdb::ICredentialsProviderFactory> creds - ) { - TPersQueueClientSettings clientSettings; - if (creds) clientSettings.CredentialsProviderFactory(creds); - return TPersQueueClient(driver, clientSettings).CreateReadSession(TReadSessionSettings(settings).DisableClusterDiscovery(true)); - } - - TMaybe<TReadSessionEvent::TDataReceivedEvent> GetNextMessageSkipAssignment(std::shared_ptr<IReadSession>& reader, TDuration timeout) { + TMaybe<TReadSessionEvent::TDataReceivedEvent> GetNextMessageSkipAssignment(std::shared_ptr<IReadSession>& reader, TDuration timeout) { while (true) { - auto future = reader->WaitEvent(); - future.Wait(timeout); - - TMaybe<NYdb::NPersQueue::TReadSessionEvent::TEvent> event = reader->GetEvent(false, 1); - if (!event) - return {}; - if (auto dataEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent>(&*event)) { - return *dataEvent; - } else if (auto* createPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*event)) { - createPartitionStreamEvent->Confirm(); - } else if (auto* destroyPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDestroyPartitionStreamEvent>(&*event)) { - destroyPartitionStreamEvent->Confirm(); - } else if (auto* closeSessionEvent = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*event)) { - return {}; + auto future = reader->WaitEvent(); + future.Wait(timeout); + + TMaybe<NYdb::NPersQueue::TReadSessionEvent::TEvent> event = reader->GetEvent(false, 1); + if (!event) + return {}; + if (auto dataEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent>(&*event)) { + return *dataEvent; + } else if (auto* createPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*event)) { + createPartitionStreamEvent->Confirm(); + } else if (auto* destroyPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDestroyPartitionStreamEvent>(&*event)) { + destroyPartitionStreamEvent->Confirm(); + } else if (auto* closeSessionEvent = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*event)) { + return {}; } } - return {}; + return {}; } - -} + +} diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.h index 9901837081..8868ce64b2 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.h @@ -6,43 +6,43 @@ namespace NKikimr::NPersQueueTests { - std::shared_ptr<NYdb::NPersQueue::IWriteSession> CreateWriter( - NYdb::TDriver& driver, - const NYdb::NPersQueue::TWriteSessionSettings& settings, - std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = nullptr + std::shared_ptr<NYdb::NPersQueue::IWriteSession> CreateWriter( + NYdb::TDriver& driver, + const NYdb::NPersQueue::TWriteSessionSettings& settings, + std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = nullptr ); - std::shared_ptr<NYdb::NPersQueue::IWriteSession> CreateWriter( - NYdb::TDriver& driver, - const TString& topic, - const TString& sourceId, - std::optional<ui32> partitionGroup = {}, - std::optional<TString> codec = {}, - std::optional<bool> reconnectOnFailure = {}, - std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = nullptr + std::shared_ptr<NYdb::NPersQueue::IWriteSession> CreateWriter( + NYdb::TDriver& driver, + const TString& topic, + const TString& sourceId, + std::optional<ui32> partitionGroup = {}, + std::optional<TString> codec = {}, + std::optional<bool> reconnectOnFailure = {}, + std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = nullptr ); - std::shared_ptr<NYdb::NPersQueue::ISimpleBlockingWriteSession> CreateSimpleWriter( - NYdb::TDriver& driver, - const NYdb::NPersQueue::TWriteSessionSettings& settings + std::shared_ptr<NYdb::NPersQueue::ISimpleBlockingWriteSession> CreateSimpleWriter( + NYdb::TDriver& driver, + const NYdb::NPersQueue::TWriteSessionSettings& settings ); - std::shared_ptr<NYdb::NPersQueue::ISimpleBlockingWriteSession> CreateSimpleWriter( - NYdb::TDriver& driver, - const TString& topic, - const TString& sourceId, - std::optional<ui32> partitionGroup = {}, - std::optional<TString> codec = {}, - std::optional<bool> reconnectOnFailure = {} + std::shared_ptr<NYdb::NPersQueue::ISimpleBlockingWriteSession> CreateSimpleWriter( + NYdb::TDriver& driver, + const TString& topic, + const TString& sourceId, + std::optional<ui32> partitionGroup = {}, + std::optional<TString> codec = {}, + std::optional<bool> reconnectOnFailure = {} ); - std::shared_ptr<NYdb::NPersQueue::IReadSession> CreateReader( - NYdb::TDriver& driver, - const NYdb::NPersQueue::TReadSessionSettings& settings, - std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = nullptr - - ); + std::shared_ptr<NYdb::NPersQueue::IReadSession> CreateReader( + NYdb::TDriver& driver, + const NYdb::NPersQueue::TReadSessionSettings& settings, + std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = nullptr - TMaybe<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent> GetNextMessageSkipAssignment(std::shared_ptr<NYdb::NPersQueue::IReadSession>& reader, TDuration timeout = TDuration::Max()); - -} + ); + + TMaybe<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent> GetNextMessageSkipAssignment(std::shared_ptr<NYdb::NPersQueue::IReadSession>& reader, TDuration timeout = TDuration::Max()); + +} diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/sdk_test_setup.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/sdk_test_setup.h index aff1162993..677eb8c03d 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/sdk_test_setup.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/sdk_test_setup.h @@ -7,7 +7,7 @@ #define TEST_CASE_NAME (this->Name_) namespace NPersQueue { - + class SDKTestSetup { protected: TString TestCaseName; @@ -41,7 +41,7 @@ public: Server.ServerSettings.PQConfig.SetEnabled(true); Server.ServerSettings.PQConfig.SetRemoteClusterEnabledDelaySec(1); Server.ServerSettings.PQConfig.SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(1); - Server.ServerSettings.PQClusterDiscoveryConfig.SetEnabled(true); + Server.ServerSettings.PQClusterDiscoveryConfig.SetEnabled(true); SetNetDataViaFile("::1/128\t" + GetLocalCluster()); auto seed = TInstant::Now().MicroSeconds(); @@ -50,9 +50,9 @@ public: std::srand(seed); } - void Start(bool waitInit = true, bool addBrokenDatacenter = false) { + void Start(bool waitInit = true, bool addBrokenDatacenter = false) { Server.StartServer(false); - //Server.EnableLogs({NKikimrServices::PQ_WRITE_PROXY, NKikimrServices::PQ_READ_PROXY}); + //Server.EnableLogs({NKikimrServices::PQ_WRITE_PROXY, NKikimrServices::PQ_READ_PROXY}); Server.AnnoyingClient->InitRoot(); if (DataCenters.empty()) { THashMap<TString, NKikimr::NPersQueueTests::TPQTestClusterInfo> dataCenters; @@ -66,9 +66,9 @@ public: } Server.AnnoyingClient->InitSourceIds(); CreateTopic(GetTestTopic(), GetLocalCluster()); - if (waitInit) { - Server.WaitInit(GetTestTopic()); - } + if (waitInit) { + Server.WaitInit(GetTestTopic()); + } } TString GetTestTopic() const { @@ -114,20 +114,20 @@ public: UNIT_ASSERT_C(!initResponse.Response.HasError(), "Failed to start: " << initResponse.Response); } - void WriteToTopic(const TVector<TString>& data, bool compress = true) { + void WriteToTopic(const TVector<TString>& data, bool compress = true) { - auto client = NYdb::NPersQueue::TPersQueueClient(*(Server.AnnoyingClient->GetDriver())); - NYdb::NPersQueue::TWriteSessionSettings settings; - settings.Path(GetTestTopic()).MessageGroupId(GetTestMessageGroupId()); - if (!compress) settings.Codec(NYdb::NPersQueue::ECodec::RAW); - auto writer = client.CreateSimpleBlockingWriteSession(settings); + auto client = NYdb::NPersQueue::TPersQueueClient(*(Server.AnnoyingClient->GetDriver())); + NYdb::NPersQueue::TWriteSessionSettings settings; + settings.Path(GetTestTopic()).MessageGroupId(GetTestMessageGroupId()); + if (!compress) settings.Codec(NYdb::NPersQueue::ECodec::RAW); + auto writer = client.CreateSimpleBlockingWriteSession(settings); for (const TString& d : data) { Log << TLOG_INFO << "WriteToTopic: " << d; - auto res = writer->Write(d); - UNIT_ASSERT(res); + auto res = writer->Write(d); + UNIT_ASSERT(res); } - writer->Close(); + writer->Close(); } void SetSingleDataCenter(const TString& name = "dc1") { @@ -176,15 +176,15 @@ public: } void KickTablets() { - for (ui32 i = 0; i < Server.CleverServer->StaticNodes() + Server.CleverServer->DynamicNodes(); i++) { + for (ui32 i = 0; i < Server.CleverServer->StaticNodes() + Server.CleverServer->DynamicNodes(); i++) { Server.AnnoyingClient->MarkNodeInHive(Server.CleverServer->GetRuntime(), i, false); - } - for (ui32 i = 0; i < Server.CleverServer->StaticNodes() + Server.CleverServer->DynamicNodes(); i++) { + } + for (ui32 i = 0; i < Server.CleverServer->StaticNodes() + Server.CleverServer->DynamicNodes(); i++) { Server.AnnoyingClient->KickNodeInHive(Server.CleverServer->GetRuntime(), i); } } void AllowTablets() { - for (ui32 i = 0; i < Server.CleverServer->StaticNodes() + Server.CleverServer->DynamicNodes(); i++) { + for (ui32 i = 0; i < Server.CleverServer->StaticNodes() + Server.CleverServer->DynamicNodes(); i++) { Server.AnnoyingClient->MarkNodeInHive(Server.CleverServer->GetRuntime(), i, true); } } diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/test_server.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/test_server.h index 8eb897e1cc..250ce03620 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/test_server.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/test_server.h @@ -76,7 +76,7 @@ public: } void WaitInit(const TString& topic) { - AnnoyingClient->WaitTopicInit(topic); + AnnoyingClient->WaitTopicInit(topic); } bool PrepareNetDataFile(const TString& content = "::1/128\tdc1") { @@ -89,10 +89,10 @@ public: return true; } - void UpdateDC(const TString& name, bool local, bool enabled) { - AnnoyingClient->UpdateDC(name, local, enabled); - } - + void UpdateDC(const TString& name, bool local, bool enabled) { + AnnoyingClient->UpdateDC(name, local, enabled); + } + public: TSimpleSharedPtr<TPortManager> PortManager; ui16 Port; diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/test_utils.h b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/test_utils.h index a5a83cef79..f5841d6277 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/test_utils.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/test_utils.h @@ -5,7 +5,7 @@ #include <util/generic/overloaded.h> #include <library/cpp/testing/unittest/registar.h> -#include "sdk_test_setup.h" +#include "sdk_test_setup.h" namespace NPersQueue { diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/ya.make b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/ya.make index 2938d3ff24..b5f051599c 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/ya.make +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/ya.make @@ -1,26 +1,26 @@ -LIBRARY() - -OWNER(g:logbroker) - -SRCS( +LIBRARY() + +OWNER(g:logbroker) + +SRCS( data_plane_helpers.cpp - sdk_test_setup.h - test_utils.h - test_server.h - test_server.cpp -) - -PEERDIR( - library/cpp/grpc/server - library/cpp/testing/unittest + sdk_test_setup.h + test_utils.h + test_server.h + test_server.cpp +) + +PEERDIR( + library/cpp/grpc/server + library/cpp/testing/unittest ydb/core/testlib ydb/library/persqueue/topic_parser_public ydb/public/sdk/cpp/client/ydb_driver ydb/public/sdk/cpp/client/ydb_persqueue_core ydb/public/sdk/cpp/client/ydb_persqueue_public ydb/public/sdk/cpp/client/ydb_table -) - -YQL_LAST_ABI_VERSION() - -END() +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/with_offset_ranges_mode_ut/ya.make b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/with_offset_ranges_mode_ut/ya.make index 5d3957876e..fc27259e6e 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/with_offset_ranges_mode_ut/ya.make +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/with_offset_ranges_mode_ut/ya.make @@ -1,45 +1,45 @@ -UNITTEST() - -OWNER( +UNITTEST() + +OWNER( g:kikimr g:logbroker -) - -IF (SANITIZER_TYPE) - TIMEOUT(1200) - SIZE(LARGE) - TAG(ya:fat) -ELSE() - TIMEOUT(600) - SIZE(MEDIUM) -ENDIF() - -FORK_SUBTESTS() - -PEERDIR( - library/cpp/testing/gmock_in_unittest +) + +IF (SANITIZER_TYPE) + TIMEOUT(1200) + SIZE(LARGE) + TAG(ya:fat) +ELSE() + TIMEOUT(600) + SIZE(MEDIUM) +ENDIF() + +FORK_SUBTESTS() + +PEERDIR( + library/cpp/testing/gmock_in_unittest ydb/public/lib/json_value ydb/public/lib/yson_value ydb/public/sdk/cpp/client/ydb_driver ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils -) - -YQL_LAST_ABI_VERSION() - -ENV(PQ_OFFSET_RANGES_MODE="1") - +) + +YQL_LAST_ABI_VERSION() + +ENV(PQ_OFFSET_RANGES_MODE="1") + SRCDIR( ydb/public/sdk/cpp/client/ydb_persqueue_core/ut ydb/public/sdk/cpp/client/ydb_persqueue_core ) - -SRCS( - common_ut.cpp - read_session_ut.cpp - basic_usage_ut.cpp - compress_executor_ut.cpp - retry_policy_ut.cpp + +SRCS( + common_ut.cpp + read_session_ut.cpp + basic_usage_ut.cpp + compress_executor_ut.cpp + retry_policy_ut.cpp ut_utils.cpp -) - -END() +) + +END() diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ya.make b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ya.make index 36bee0f023..d19d4f6b2e 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/ya.make +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/ya.make @@ -8,7 +8,7 @@ OWNER( GENERATE_ENUM_SERIALIZATION(ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h) SRCS( - persqueue.h + persqueue.h proto_accessor.cpp ) diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/codecs.cpp b/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/codecs.cpp index 8da9935278..9c3bcbd25c 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/codecs.cpp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/codecs.cpp @@ -1,72 +1,72 @@ -#include <library/cpp/streams/zstd/zstd.h> -#include <util/stream/buffer.h> -#include <util/stream/zlib.h> -#include <util/stream/mem.h> - -#include "codecs.h" - -namespace NYdb::NPersQueue { -namespace NCompressionDetails { - -using TInputStreamVariant = std::variant<std::monostate, TZLibDecompress, TZstdDecompress>; - -IInputStream* CreateDecompressorStream(TInputStreamVariant& inputStreamStorage, Ydb::PersQueue::V1::Codec codec, IInputStream* origin) { - switch (codec) { - case Ydb::PersQueue::V1::CODEC_GZIP: - return &inputStreamStorage.emplace<TZLibDecompress>(origin); - case Ydb::PersQueue::V1::CODEC_LZOP: - throw yexception() << "LZO codec is disabled"; - case Ydb::PersQueue::V1::CODEC_ZSTD: - return &inputStreamStorage.emplace<TZstdDecompress>(origin); - default: - //case Ydb::PersQueue::V1::CODEC_RAW: - //case Ydb::PersQueue::V1::CODEC_UNSPECIFIED: +#include <library/cpp/streams/zstd/zstd.h> +#include <util/stream/buffer.h> +#include <util/stream/zlib.h> +#include <util/stream/mem.h> + +#include "codecs.h" + +namespace NYdb::NPersQueue { +namespace NCompressionDetails { + +using TInputStreamVariant = std::variant<std::monostate, TZLibDecompress, TZstdDecompress>; + +IInputStream* CreateDecompressorStream(TInputStreamVariant& inputStreamStorage, Ydb::PersQueue::V1::Codec codec, IInputStream* origin) { + switch (codec) { + case Ydb::PersQueue::V1::CODEC_GZIP: + return &inputStreamStorage.emplace<TZLibDecompress>(origin); + case Ydb::PersQueue::V1::CODEC_LZOP: + throw yexception() << "LZO codec is disabled"; + case Ydb::PersQueue::V1::CODEC_ZSTD: + return &inputStreamStorage.emplace<TZstdDecompress>(origin); + default: + //case Ydb::PersQueue::V1::CODEC_RAW: + //case Ydb::PersQueue::V1::CODEC_UNSPECIFIED: throw yexception() << "unsupported codec value : " << ui64(codec); - } -} - -TString Decompress(const Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::MessageData& data) { - TMemoryInput input(data.data().data(), data.data().size()); - TString result; - TStringOutput resultOutput(result); - TInputStreamVariant inputStreamStorage; - TransferData(CreateDecompressorStream(inputStreamStorage, data.codec(), &input), &resultOutput); - return result; -} - - -class TZLibToStringCompressor: private TEmbedPolicy<TBufferOutput>, public TZLibCompress { -public: - TZLibToStringCompressor(TBuffer& dst, ZLib::StreamType type, size_t quality) - : TEmbedPolicy<TBufferOutput>(dst) - , TZLibCompress(TEmbedPolicy::Ptr(), type, quality) - { - } -}; - -class TZstdToStringCompressor: private TEmbedPolicy<TBufferOutput>, public TZstdCompress { -public: - TZstdToStringCompressor(TBuffer& dst, int quality) - : TEmbedPolicy<TBufferOutput>(dst) - , TZstdCompress(TEmbedPolicy::Ptr(), quality) - { - } -}; - -THolder<IOutputStream> CreateCoder(ECodec codec, TBuffer& result, int quality) { - switch (codec) { - case ECodec::GZIP: - return MakeHolder<TZLibToStringCompressor>(result, ZLib::GZip, quality >= 0 ? quality : 6); - case ECodec::LZOP: - throw yexception() << "LZO codec is disabled"; - case ECodec::ZSTD: - return MakeHolder<TZstdToStringCompressor>(result, quality); - default: - Y_FAIL("NOT IMPLEMENTED CODEC TYPE"); - } -} - - -} // namespace NDecompressionDetails - -} // namespace NYdb::NPersQueue + } +} + +TString Decompress(const Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::MessageData& data) { + TMemoryInput input(data.data().data(), data.data().size()); + TString result; + TStringOutput resultOutput(result); + TInputStreamVariant inputStreamStorage; + TransferData(CreateDecompressorStream(inputStreamStorage, data.codec(), &input), &resultOutput); + return result; +} + + +class TZLibToStringCompressor: private TEmbedPolicy<TBufferOutput>, public TZLibCompress { +public: + TZLibToStringCompressor(TBuffer& dst, ZLib::StreamType type, size_t quality) + : TEmbedPolicy<TBufferOutput>(dst) + , TZLibCompress(TEmbedPolicy::Ptr(), type, quality) + { + } +}; + +class TZstdToStringCompressor: private TEmbedPolicy<TBufferOutput>, public TZstdCompress { +public: + TZstdToStringCompressor(TBuffer& dst, int quality) + : TEmbedPolicy<TBufferOutput>(dst) + , TZstdCompress(TEmbedPolicy::Ptr(), quality) + { + } +}; + +THolder<IOutputStream> CreateCoder(ECodec codec, TBuffer& result, int quality) { + switch (codec) { + case ECodec::GZIP: + return MakeHolder<TZLibToStringCompressor>(result, ZLib::GZip, quality >= 0 ? quality : 6); + case ECodec::LZOP: + throw yexception() << "LZO codec is disabled"; + case ECodec::ZSTD: + return MakeHolder<TZstdToStringCompressor>(result, quality); + default: + Y_FAIL("NOT IMPLEMENTED CODEC TYPE"); + } +} + + +} // namespace NDecompressionDetails + +} // namespace NYdb::NPersQueue diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/codecs.h b/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/codecs.h index d4b83b311f..e3fc717765 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/codecs.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/codecs.h @@ -1,17 +1,17 @@ -#pragma once -#include <util/stream/output.h> +#pragma once +#include <util/stream/output.h> #include <ydb/public/api/protos/ydb_persqueue_v1.pb.h> #include <ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h> - - -namespace NYdb::NPersQueue { -namespace NCompressionDetails { - -extern TString Decompress(const Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::MessageData& data); - -THolder<IOutputStream> CreateCoder(ECodec codec, TBuffer& result, int quality); - -} // namespace NDecompressionDetails - -} // namespace NYdb::NPersQueue - + + +namespace NYdb::NPersQueue { +namespace NCompressionDetails { + +extern TString Decompress(const Ydb::PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch::MessageData& data); + +THolder<IOutputStream> CreateCoder(ECodec codec, TBuffer& result, int quality); + +} // namespace NDecompressionDetails + +} // namespace NYdb::NPersQueue + diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/ya.make b/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/ya.make index d4cd3b0463..b6aca2ee17 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/ya.make +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs/ya.make @@ -1,22 +1,22 @@ -LIBRARY() - -OWNER( +LIBRARY() + +OWNER( g:kikimr - g:logbroker -) - -SRCS( - codecs.h - codecs.cpp -) - -PEERDIR( - library/cpp/streams/zstd + g:logbroker +) + +SRCS( + codecs.h + codecs.cpp +) + +PEERDIR( + library/cpp/streams/zstd ydb/library/yql/public/issue/protos ydb/public/api/grpc/draft ydb/public/api/protos -) - -PROVIDES(pq_codecs_2) - -END() +) + +PROVIDES(pq_codecs_2) + +END() diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_public/persqueue.h b/ydb/public/sdk/cpp/client/ydb_persqueue_public/persqueue.h index 7063b982de..44ba01d94f 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_public/persqueue.h +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_public/persqueue.h @@ -1,2 +1,2 @@ -#pragma once +#pragma once #include <ydb/public/sdk/cpp/client/ydb_persqueue_core/persqueue.h> diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_public/ya.make b/ydb/public/sdk/cpp/client/ydb_persqueue_public/ya.make index 0717c82559..25272c7127 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_public/ya.make +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_public/ya.make @@ -1,17 +1,17 @@ -LIBRARY() - -OWNER( +LIBRARY() + +OWNER( g:kikimr - g:logbroker -) - -SRCS( - persqueue.h -) - -PEERDIR( + g:logbroker +) + +SRCS( + persqueue.h +) + +PEERDIR( ydb/public/sdk/cpp/client/ydb_persqueue_core ydb/public/sdk/cpp/client/ydb_persqueue_public/codecs -) - -END() +) + +END() diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.h b/ydb/public/sdk/cpp/client/ydb_table/table.h index 4797f25efd..42d21d2fec 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.h +++ b/ydb/public/sdk/cpp/client/ydb_table/table.h @@ -1627,10 +1627,10 @@ public: TAsyncCommitTransactionResult Commit(const TCommitTxSettings& settings = TCommitTxSettings()); TAsyncStatus Rollback(const TRollbackTxSettings& settings = TRollbackTxSettings()); - TSession GetSession() const { - return Session_; - } - + TSession GetSession() const { + return Session_; + } + private: TTransaction(const TSession& session, const TString& txId); diff --git a/ydb/services/datastreams/datastreams_proxy.cpp b/ydb/services/datastreams/datastreams_proxy.cpp index f0c4128d81..03a6d06213 100644 --- a/ydb/services/datastreams/datastreams_proxy.cpp +++ b/ydb/services/datastreams/datastreams_proxy.cpp @@ -102,9 +102,9 @@ namespace NKikimr::NDataStreams::V1 { class TCreateStreamActor : public TPQGrpcSchemaBase<TCreateStreamActor, NKikimr::NGRpcService::TEvDataStreamsCreateStreamRequest> { using TBase = TPQGrpcSchemaBase<TCreateStreamActor, TEvDataStreamsCreateStreamRequest>; - TActorId NewSchemeCache; + TActorId NewSchemeCache; public: - TCreateStreamActor(NKikimr::NGRpcService::TEvDataStreamsCreateStreamRequest* request, TActorId newSchemeCache); + TCreateStreamActor(NKikimr::NGRpcService::TEvDataStreamsCreateStreamRequest* request, TActorId newSchemeCache); ~TCreateStreamActor() = default; void Bootstrap(const NActors::TActorContext& ctx); @@ -115,17 +115,17 @@ namespace NKikimr::NDataStreams::V1 { void Handle(TEvTxUserProxy::TEvProposeTransactionStatus::TPtr& ev, const TActorContext& ctx); }; - - TCreateStreamActor::TCreateStreamActor(NKikimr::NGRpcService::TEvDataStreamsCreateStreamRequest* request, TActorId newSchemeCache) + + TCreateStreamActor::TCreateStreamActor(NKikimr::NGRpcService::TEvDataStreamsCreateStreamRequest* request, TActorId newSchemeCache) : TBase(request, request->GetProtoRequest()->stream_name()) - , NewSchemeCache(newSchemeCache) + , NewSchemeCache(newSchemeCache) { - Y_UNUSED(NewSchemeCache); + Y_UNUSED(NewSchemeCache); } void TCreateStreamActor::Bootstrap(const NActors::TActorContext& ctx) { TBase::Bootstrap(ctx); - SendProposeRequest(ctx); + SendProposeRequest(ctx); Become(&TCreateStreamActor::StateWork); } @@ -133,7 +133,7 @@ namespace NKikimr::NDataStreams::V1 { Y_UNUSED(ev); Y_UNUSED(ctx); } - + void TCreateStreamActor::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TActorContext& ctx, const TString& workingDir, const TString& name) @@ -144,13 +144,13 @@ namespace NKikimr::NDataStreams::V1 { topicSettings.set_partitions_count(GetProtoRequest()->shard_count()); topicSettings.set_retention_period_ms(RetentionPeriod(GetProtoRequest()->retention_period_hours()).MilliSeconds()); topicSettings.set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); - topicSettings.add_supported_codecs(Ydb::PersQueue::V1::CODEC_RAW); + topicSettings.add_supported_codecs(Ydb::PersQueue::V1::CODEC_RAW); topicSettings.set_max_partition_write_speed(PartitionWriteSpeedInBytesPerSec(GetProtoRequest()->write_quota_kb_per_sec())); - topicSettings.set_max_partition_write_burst(PartitionWriteSpeedInBytesPerSec(GetProtoRequest()->write_quota_kb_per_sec())); + topicSettings.set_max_partition_write_burst(PartitionWriteSpeedInBytesPerSec(GetProtoRequest()->write_quota_kb_per_sec())); - if (workingDir != proposal.Record.GetDatabaseName() && !proposal.Record.GetDatabaseName().empty()) { - return ReplyWithError(Ydb::StatusIds::BAD_REQUEST, Ydb::PersQueue::ErrorCode::BAD_REQUEST, "streams can be created only at database root", ctx); - } + if (workingDir != proposal.Record.GetDatabaseName() && !proposal.Record.GetDatabaseName().empty()) { + return ReplyWithError(Ydb::StatusIds::BAD_REQUEST, Ydb::PersQueue::ErrorCode::BAD_REQUEST, "streams can be created only at database root", ctx); + } TString error; auto status = NKikimr::NGRpcProxy::V1::FillProposeRequestImpl(name, topicSettings, modifyScheme, ctx, false, error); modifyScheme.SetWorkingDir(workingDir); @@ -377,7 +377,7 @@ namespace NKikimr::NDataStreams::V1 { return ReplyWithError(Ydb::StatusIds::BAD_REQUEST, Ydb::PersQueue::ErrorCode::BAD_REQUEST, error, ctx); } groupConfig.MutablePQTabletConfig()->MutablePartitionConfig()->SetWriteSpeedInBytesPerSecond(GetProtoRequest()->write_quota_kb_per_sec() * 1024LL); - groupConfig.MutablePQTabletConfig()->MutablePartitionConfig()->SetBurstSize(GetProtoRequest()->write_quota_kb_per_sec() * 1024LL); + groupConfig.MutablePQTabletConfig()->MutablePartitionConfig()->SetBurstSize(GetProtoRequest()->write_quota_kb_per_sec() * 1024LL); } //----------------------------------------------------------------------------------------------------------- @@ -432,44 +432,44 @@ namespace NKikimr::NDataStreams::V1 { void StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx); void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { - if (ev->Get()->Status != NKikimrProto::EReplyStatus::OK) { - ReplyWithError(Ydb::StatusIds::SCHEME_ERROR, Ydb::PersQueue::ErrorCode::ERROR, - TStringBuilder() << "Cannot connect to tablet " << ev->Get()->TabletId, ctx); - } - } - - void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { - ReplyWithError(Ydb::StatusIds::SCHEME_ERROR, Ydb::PersQueue::ErrorCode::ERROR, - TStringBuilder() << "Cannot connect to tablet " << ev->Get()->TabletId, ctx); - } - - void Handle(TEvPersQueue::TEvOffsetsResponse::TPtr& ev, const TActorContext& ctx) { - for (auto& part : ev->Get()->Record.GetPartResult()) { - StartEndOffsetsPerPartition[part.GetPartition()] = std::make_pair<ui64, ui64>(part.GetStartOffset(), part.GetEndOffset()); - } - if (--RequestsInfly == 0) { - ReplyAndDie(ctx); - } - } - + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + if (ev->Get()->Status != NKikimrProto::EReplyStatus::OK) { + ReplyWithError(Ydb::StatusIds::SCHEME_ERROR, Ydb::PersQueue::ErrorCode::ERROR, + TStringBuilder() << "Cannot connect to tablet " << ev->Get()->TabletId, ctx); + } + } + + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + ReplyWithError(Ydb::StatusIds::SCHEME_ERROR, Ydb::PersQueue::ErrorCode::ERROR, + TStringBuilder() << "Cannot connect to tablet " << ev->Get()->TabletId, ctx); + } + + void Handle(TEvPersQueue::TEvOffsetsResponse::TPtr& ev, const TActorContext& ctx) { + for (auto& part : ev->Get()->Record.GetPartResult()) { + StartEndOffsetsPerPartition[part.GetPartition()] = std::make_pair<ui64, ui64>(part.GetStartOffset(), part.GetEndOffset()); + } + if (--RequestsInfly == 0) { + ReplyAndDie(ctx); + } + } + void Die(const TActorContext& ctx) override { - //close all pipes - for (auto& pipe : Pipes) { - NTabletPipe::CloseClient(ctx, pipe); - } - TBase::Die(ctx); - } - - private: - void ReplyAndDie(const TActorContext& ctx); - + //close all pipes + for (auto& pipe : Pipes) { + NTabletPipe::CloseClient(ctx, pipe); + } + TBase::Die(ctx); + } + + private: + void ReplyAndDie(const TActorContext& ctx); + NKikimrSchemeOp::TDirEntry SelfInfo; NKikimrSchemeOp::TPersQueueGroupDescription PQGroup; - std::vector<TActorId> Pipes; - ui32 RequestsInfly = 0; - std::map<ui64, std::pair<ui64, ui64>> StartEndOffsetsPerPartition; + std::vector<TActorId> Pipes; + ui32 RequestsInfly = 0; + std::map<ui64, std::pair<ui64, ui64>> StartEndOffsetsPerPartition; }; TDescribeStreamActor::TDescribeStreamActor(NKikimr::NGRpcService::TEvDataStreamsDescribeStreamRequest* request) @@ -485,9 +485,9 @@ namespace NKikimr::NDataStreams::V1 { void TDescribeStreamActor::StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { switch (ev->GetTypeRewrite()) { - HFunc(TEvPersQueue::TEvOffsetsResponse, Handle); - HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); - HFunc(TEvTabletPipe::TEvClientConnected, Handle); + HFunc(TEvPersQueue::TEvOffsetsResponse, Handle); + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); default: TBase::StateWork(ev, ctx); } } @@ -497,11 +497,11 @@ namespace NKikimr::NDataStreams::V1 { Y_VERIFY(result->ResultSet.size() == 1); // describe only one topic const auto& response = result->ResultSet.front(); const TString path = JoinSeq("/", response.Path); - + if (ReplyIfNotTopic(ev, ctx)) { return; } - + Y_VERIFY(response.PQGroupInfo); PQGroup = response.PQGroupInfo->Description; @@ -532,61 +532,61 @@ namespace NKikimr::NDataStreams::V1 { } } - void TDescribeStreamActor::ReplyAndDie(const TActorContext& ctx) { - Ydb::DataStreams::V1::DescribeStreamResult result; - + void TDescribeStreamActor::ReplyAndDie(const TActorContext& ctx) { + Ydb::DataStreams::V1::DescribeStreamResult result; + auto& pqConfig = PQGroup.GetPQTabletConfig(); - ui32 writeSpeed = pqConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() / 1024; - auto& description = *result.mutable_stream_description(); - description.set_stream_name(GetProtoRequest()->stream_name()); - ui32 retentionPeriodHours = TInstant::Seconds(pqConfig.GetPartitionConfig().GetLifetimeSeconds()).Hours(); - description.set_retention_period_hours(retentionPeriodHours); - description.set_write_quota_kb_per_sec(writeSpeed); + ui32 writeSpeed = pqConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() / 1024; + auto& description = *result.mutable_stream_description(); + description.set_stream_name(GetProtoRequest()->stream_name()); + ui32 retentionPeriodHours = TInstant::Seconds(pqConfig.GetPartitionConfig().GetLifetimeSeconds()).Hours(); + description.set_retention_period_hours(retentionPeriodHours); + description.set_write_quota_kb_per_sec(writeSpeed); if (SelfInfo.GetCreateFinished()) { description.set_stream_status(Ydb::DataStreams::V1::StreamDescription::ACTIVE); } else { description.set_stream_status(Ydb::DataStreams::V1::StreamDescription::CREATING); - } - - bool startShardFound = GetProtoRequest()->exclusive_start_shard_id().empty(); - description.set_has_more_shards(false); - + } + + bool startShardFound = GetProtoRequest()->exclusive_start_shard_id().empty(); + description.set_has_more_shards(false); + description.set_owner(SelfInfo.GetOwner()); description.set_stream_creation_timestamp(TInstant::MilliSeconds(SelfInfo.GetCreateStep()).Seconds()); - - int limit = GetProtoRequest()->limit() == 0 ? 100 : GetProtoRequest()->limit(); - + + int limit = GetProtoRequest()->limit() == 0 ? 100 : GetProtoRequest()->limit(); + for (uint32_t i = 0; i < (uint32_t)PQGroup.GetPartitions().size(); ++i) { ui32 partitionId = PQGroup.GetPartitions(i).GetPartitionId(); - TString shardName = GetShardName(partitionId); - if (shardName == GetProtoRequest()->exclusive_start_shard_id()) { - startShardFound = true; - } else if (startShardFound) { - if (description.shards_size() >= limit) { - description.set_has_more_shards(true); - break; - } else { - auto* shard = description.add_shards(); - shard->set_shard_id(shardName); - auto* rangeProto = shard->mutable_hash_key_range(); + TString shardName = GetShardName(partitionId); + if (shardName == GetProtoRequest()->exclusive_start_shard_id()) { + startShardFound = true; + } else if (startShardFound) { + if (description.shards_size() >= limit) { + description.set_has_more_shards(true); + break; + } else { + auto* shard = description.add_shards(); + shard->set_shard_id(shardName); + auto* rangeProto = shard->mutable_hash_key_range(); auto range = RangeFromShardNumber(partitionId, PQGroup.GetPartitions().size()); - rangeProto->set_starting_hash_key(Uint128ToDecimalString(range.Start)); - rangeProto->set_ending_hash_key(Uint128ToDecimalString(range.End)); - auto it = StartEndOffsetsPerPartition.find(partitionId); - if (it != StartEndOffsetsPerPartition.end()) { - auto* rangeProto = shard->mutable_sequence_number_range(); - rangeProto->set_starting_sequence_number(TStringBuilder() << it->second.first); - } - } - } - } - if (!startShardFound) { - return ReplyWithResult(Ydb::StatusIds::BAD_REQUEST, ctx); - } - return ReplyWithResult(Ydb::StatusIds::SUCCESS, result, ctx); - } - - + rangeProto->set_starting_hash_key(Uint128ToDecimalString(range.Start)); + rangeProto->set_ending_hash_key(Uint128ToDecimalString(range.End)); + auto it = StartEndOffsetsPerPartition.find(partitionId); + if (it != StartEndOffsetsPerPartition.end()) { + auto* rangeProto = shard->mutable_sequence_number_range(); + rangeProto->set_starting_sequence_number(TStringBuilder() << it->second.first); + } + } + } + } + if (!startShardFound) { + return ReplyWithResult(Ydb::StatusIds::BAD_REQUEST, ctx); + } + return ReplyWithResult(Ydb::StatusIds::SUCCESS, result, ctx); + } + + //----------------------------------------------------------------------------------- class TListStreamsActor : public TRpcSchemeRequestActor<TListStreamsActor, NKikimr::NGRpcService::TEvDataStreamsListStreamsRequest> { @@ -606,13 +606,13 @@ namespace NKikimr::NDataStreams::V1 { void SendPendingRequests(const TActorContext& ctx); void SendResponse(const TActorContext& ctx); - void ReplyWithError(Ydb::StatusIds::StatusCode status, Ydb::PersQueue::ErrorCode::ErrorCode pqStatus, - const TString& messageText, const NActors::TActorContext& ctx) { - this->Request_->RaiseIssue(FillIssue(messageText, pqStatus)); - this->Request_->ReplyWithYdbStatus(status); - this->Die(ctx); - } - + void ReplyWithError(Ydb::StatusIds::StatusCode status, Ydb::PersQueue::ErrorCode::ErrorCode pqStatus, + const TString& messageText, const NActors::TActorContext& ctx) { + this->Request_->RaiseIssue(FillIssue(messageText, pqStatus)); + this->Request_->ReplyWithYdbStatus(status); + this->Die(ctx); + } + private: static constexpr ui32 MAX_IN_FLIGHT = 5; @@ -631,17 +631,17 @@ namespace NKikimr::NDataStreams::V1 { void TListStreamsActor::Bootstrap(const NActors::TActorContext& ctx) { TBase::Bootstrap(ctx); if (!Request_->GetDatabaseName()) { - return ReplyWithError(Ydb::StatusIds::BAD_REQUEST,Ydb::PersQueue::ErrorCode::BAD_REQUEST, - "Request without dabase is forbiden", ctx); - } - - if (this->Request_->GetInternalToken().empty()) { - if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { - return ReplyWithError(Ydb::StatusIds::UNAUTHORIZED, Ydb::PersQueue::ErrorCode::ACCESS_DENIED, - "Unauthenticated access is forbidden, please provide credentials", ctx); - } - } - + return ReplyWithError(Ydb::StatusIds::BAD_REQUEST,Ydb::PersQueue::ErrorCode::BAD_REQUEST, + "Request without dabase is forbiden", ctx); + } + + if (this->Request_->GetInternalToken().empty()) { + if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { + return ReplyWithError(Ydb::StatusIds::UNAUTHORIZED, Ydb::PersQueue::ErrorCode::ACCESS_DENIED, + "Unauthenticated access is forbidden, please provide credentials", ctx); + } + } + SendNavigateRequest(ctx, *Request_->GetDatabaseName()); Become(&TListStreamsActor::StateWork); } @@ -693,10 +693,10 @@ namespace NKikimr::NDataStreams::V1 { NSchemeCache::TSchemeCacheNavigate::TEntry entry; entry.Path = NKikimr::SplitPath(path); - if (!this->Request_->GetInternalToken().empty()) { - schemeCacheRequest->UserToken = new NACLib::TUserToken(this->Request_->GetInternalToken()); - } - + if (!this->Request_->GetInternalToken().empty()) { + schemeCacheRequest->UserToken = new NACLib::TUserToken(this->Request_->GetInternalToken()); + } + entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; schemeCacheRequest->ResultSet.emplace_back(entry); WaitingList.push_back(std::make_unique<TEvTxProxySchemeCache::TEvNavigateKeySet>(schemeCacheRequest.release())); @@ -721,9 +721,9 @@ namespace NKikimr::NDataStreams::V1 { TString childFullPath = JoinPath({JoinPath(entry.Path), child.Name}); switch (child.Kind) { case NSchemeCache::TSchemeCacheNavigate::EKind::KindPath: - if (GetProtoRequest()->recurse()) { - SendNavigateRequest(ctx, childFullPath); - } + if (GetProtoRequest()->recurse()) { + SendNavigateRequest(ctx, childFullPath); + } break; case NSchemeCache::TSchemeCacheNavigate::EKind::KindTopic: Topics.push_back(childFullPath); @@ -823,7 +823,7 @@ namespace NKikimr::NDataStreams::V1 { const auto& streamReadRulesNames = pqGroupDescription.GetPQTabletConfig().GetReadRules(); const auto& streamReadRulesReadFromTimestamps = pqGroupDescription.GetPQTabletConfig().GetReadFromTimestampsMs(); const auto alreadyRead = NextToken.GetAlreadyRead(); - + if (alreadyRead > (ui32)streamReadRulesNames.size()) { return ReplyWithError(Ydb::StatusIds::BAD_REQUEST, Ydb::PersQueue::ErrorCode::ERROR, TStringBuilder() << "Provided next_token is malformed - " << @@ -912,16 +912,16 @@ namespace NKikimr::NDataStreams::V1 { if (readRule.version() == 0) { readRule.set_version(selfInfo.GetVersion().GetPQVersion()); } - auto serviceTypes = GetSupportedClientServiceTypes(ctx); - TString error = AddReadRuleToConfig(pqConfig, readRule, serviceTypes, ctx); - bool hasDuplicates = false; + auto serviceTypes = GetSupportedClientServiceTypes(ctx); + TString error = AddReadRuleToConfig(pqConfig, readRule, serviceTypes, ctx); + bool hasDuplicates = false; if (error.Empty()) { - hasDuplicates = CheckReadRulesConfig(*pqConfig, serviceTypes, error); + hasDuplicates = CheckReadRulesConfig(*pqConfig, serviceTypes, error); } if (!error.Empty()) { return ReplyWithError(hasDuplicates ? Ydb::StatusIds::ALREADY_EXISTS : Ydb::StatusIds::BAD_REQUEST, - hasDuplicates ? Ydb::PersQueue::ErrorCode::OK : Ydb::PersQueue::ErrorCode::BAD_REQUEST, error, ctx); + hasDuplicates ? Ydb::PersQueue::ErrorCode::OK : Ydb::PersQueue::ErrorCode::BAD_REQUEST, error, ctx); } } @@ -1693,7 +1693,7 @@ namespace NKikimr::NDataStreams::V1 { } void TDataStreamsService::Handle(NKikimr::NGRpcService::TEvDataStreamsCreateStreamRequest::TPtr& ev, const TActorContext& ctx) { - ctx.Register(new TCreateStreamActor(ev->Release().Release(), NewSchemeCache)); + ctx.Register(new TCreateStreamActor(ev->Release().Release(), NewSchemeCache)); } void TDataStreamsService::Handle(NKikimr::NGRpcService::TEvDataStreamsDeleteStreamRequest::TPtr& ev, const TActorContext& ctx) { diff --git a/ydb/services/datastreams/datastreams_ut.cpp b/ydb/services/datastreams/datastreams_ut.cpp index 23f0149d51..2ede2a2d05 100644 --- a/ydb/services/datastreams/datastreams_ut.cpp +++ b/ydb/services/datastreams/datastreams_ut.cpp @@ -11,7 +11,7 @@ #include <ydb/public/api/grpc/draft/ydb_datastreams_v1.grpc.pb.h> #include <library/cpp/json/json_reader.h> -#include <library/cpp/digest/md5/md5.h> +#include <library/cpp/digest/md5/md5.h> #include <random> @@ -19,7 +19,7 @@ using namespace NYdb; using namespace NYdb::NTable; using namespace NKikimr::NPersQueueTests; -using namespace NKikimr::NDataStreams::V1; +using namespace NKikimr::NDataStreams::V1; namespace YDS_V1 = Ydb::DataStreams::V1; namespace NYDS_V1 = NYdb::NDataStreams::V1; struct WithSslAndAuth : TKikimrTestSettings { @@ -37,23 +37,23 @@ public: appConfig.MutablePQConfig()->SetTopicsAreFirstClassCitizen(true); appConfig.MutablePQConfig()->SetEnabled(true); appConfig.MutablePQConfig()->SetMetaCacheRefreshIntervalMilliSeconds(30000); - appConfig.MutablePQConfig()->MutableQuotingConfig()->SetEnableQuoting(true); - appConfig.MutablePQConfig()->MutableQuotingConfig()->SetQuotaWaitDurationMs(300); - appConfig.MutablePQConfig()->MutableQuotingConfig()->SetPartitionReadQuotaIsTwiceWriteQuota(true); + appConfig.MutablePQConfig()->MutableQuotingConfig()->SetEnableQuoting(true); + appConfig.MutablePQConfig()->MutableQuotingConfig()->SetQuotaWaitDurationMs(300); + appConfig.MutablePQConfig()->MutableQuotingConfig()->SetPartitionReadQuotaIsTwiceWriteQuota(true); appConfig.MutablePQConfig()->MutableBillingMeteringConfig()->SetEnabled(true); appConfig.MutablePQConfig()->MutableBillingMeteringConfig()->SetFlushIntervalSec(1); appConfig.MutablePQConfig()->AddClientServiceType()->SetName("data-streams"); MeteringFile = MakeHolder<TTempFileHandle>("meteringData.txt"); appConfig.MutableMeteringConfig()->SetMeteringFilePath(MeteringFile->Name()); - + if (secure) { appConfig.MutablePQConfig()->SetRequireCredentialsInNewProtocol(true); } KikimrServer = std::make_unique<TKikimr>(std::move(appConfig)); ui16 grpc = KikimrServer->GetPort(); TString location = TStringBuilder() << "localhost:" << grpc; - auto driverConfig = TDriverConfig().SetEndpoint(location).SetLog(CreateLogBackend("cerr", TLOG_DEBUG)); + auto driverConfig = TDriverConfig().SetEndpoint(location).SetLog(CreateLogBackend("cerr", TLOG_DEBUG)); if (secure) { driverConfig.UseSecureConnection(NYdbSslTestData::CaCrt); } else { @@ -72,13 +72,13 @@ public: auto result = schemeClient.ModifyPermissions("/Root", NYdb::NScheme::TModifyPermissionsSettings().AddGrantPermissions(permissions) ).ExtractValueSync(); - Cerr << result.GetIssues().ToString() << "\n"; - UNIT_ASSERT(result.IsSuccess()); + Cerr << result.GetIssues().ToString() << "\n"; + UNIT_ASSERT(result.IsSuccess()); } - - TClient client(*(KikimrServer->ServerSettings)); - UNIT_ASSERT_VALUES_EQUAL(NMsgBusProxy::MSTATUS_OK, - client.AlterUserAttributes("/", "Root", {{"folder_id", "somefolder"},{"cloud_id", "somecloud"}, {"database_id", "root"}})); + + TClient client(*(KikimrServer->ServerSettings)); + UNIT_ASSERT_VALUES_EQUAL(NMsgBusProxy::MSTATUS_OK, + client.AlterUserAttributes("/", "Root", {{"folder_id", "somefolder"},{"cloud_id", "somecloud"}, {"database_id", "root"}})); } public: @@ -92,43 +92,43 @@ public: using TInsecureDatastreamsTestServer = TDatastreamsTestServer<TKikimrWithGrpcAndRootSchema, false>; using TSecureDatastreamsTestServer = TDatastreamsTestServer<TKikimrWithGrpcAndRootSchemaSecure, true>; -void CheckMeteringFile(TTempFileHandle* meteringFile, const TString& streamPath) { - Sleep(TDuration::Seconds(1)); - meteringFile->Flush(); - meteringFile->Close(); - auto input = TFileInput(TFile(meteringFile->Name(), RdOnly | OpenExisting)); - ui64 totalLines = 0; - TString line; - while(input.ReadLine(line)) { - totalLines++; - Cerr << "Got line from metering file data: '" << line << "'" << Endl; - NJson::TJsonValue json; - NJson::ReadJsonTree(line, &json, true); - auto& map = json.GetMap(); - UNIT_ASSERT(map.contains("cloud_id")); - UNIT_ASSERT(map.contains("folder_id")); - UNIT_ASSERT(map.contains("resource_id")); - UNIT_ASSERT(map.find("resource_id")->second.GetString() == streamPath); - UNIT_ASSERT(map.contains("labels")); - UNIT_ASSERT_VALUES_EQUAL(map.find("labels")->second.GetMap().size(), 2); - UNIT_ASSERT(map.contains("tags")); - UNIT_ASSERT(map.contains("source_id")); - UNIT_ASSERT(map.contains("source_wt")); - UNIT_ASSERT(map.find("cloud_id")->second.GetString() == "somecloud"); - UNIT_ASSERT(map.find("folder_id")->second.GetString() == "somefolder"); +void CheckMeteringFile(TTempFileHandle* meteringFile, const TString& streamPath) { + Sleep(TDuration::Seconds(1)); + meteringFile->Flush(); + meteringFile->Close(); + auto input = TFileInput(TFile(meteringFile->Name(), RdOnly | OpenExisting)); + ui64 totalLines = 0; + TString line; + while(input.ReadLine(line)) { + totalLines++; + Cerr << "Got line from metering file data: '" << line << "'" << Endl; + NJson::TJsonValue json; + NJson::ReadJsonTree(line, &json, true); + auto& map = json.GetMap(); + UNIT_ASSERT(map.contains("cloud_id")); + UNIT_ASSERT(map.contains("folder_id")); + UNIT_ASSERT(map.contains("resource_id")); + UNIT_ASSERT(map.find("resource_id")->second.GetString() == streamPath); + UNIT_ASSERT(map.contains("labels")); + UNIT_ASSERT_VALUES_EQUAL(map.find("labels")->second.GetMap().size(), 2); + UNIT_ASSERT(map.contains("tags")); + UNIT_ASSERT(map.contains("source_id")); + UNIT_ASSERT(map.contains("source_wt")); + UNIT_ASSERT(map.find("cloud_id")->second.GetString() == "somecloud"); + UNIT_ASSERT(map.find("folder_id")->second.GetString() == "somefolder"); UNIT_ASSERT(map.find("resource_id")->second.GetString() == streamPath); - auto& tags = map.find("tags")->second.GetMap(); - if (!tags.empty()) { - UNIT_ASSERT_VALUES_EQUAL(tags.size(), 3); - } - UNIT_ASSERT(map.contains("usage")); - auto& usage = map.find("usage")->second.GetMap(); - UNIT_ASSERT(usage.find("quantity")->second.GetInteger() >= 0); - } - UNIT_ASSERT(totalLines >= 2); -} - - + auto& tags = map.find("tags")->second.GetMap(); + if (!tags.empty()) { + UNIT_ASSERT_VALUES_EQUAL(tags.size(), 3); + } + UNIT_ASSERT(map.contains("usage")); + auto& usage = map.find("usage")->second.GetMap(); + UNIT_ASSERT(usage.find("quantity")->second.GetInteger() >= 0); + } + UNIT_ASSERT(totalLines >= 2); +} + + #define Y_UNIT_TEST_NAME this->Name_; Y_UNIT_TEST_SUITE(DataStreams) { @@ -147,16 +147,16 @@ Y_UNIT_TEST_SUITE(DataStreams) { auto result = testServer.DataStreamsClient->CreateStream(streamName, NYDS_V1::TCreateStreamSettings().ShardCount(3)).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - if (result.GetStatus() != EStatus::SUCCESS) { - result.GetIssues().PrintTo(Cerr); - } + if (result.GetStatus() != EStatus::SUCCESS) { + result.GetIssues().PrintTo(Cerr); + } UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); } { auto result = testServer.DataStreamsClient->DescribeStream(streamName).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().stream_status(), YDS_V1::StreamDescription::ACTIVE); @@ -164,21 +164,21 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().write_quota_kb_per_sec(), 1024); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().retention_period_hours(), 24); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards().size(), 3); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(0).sequence_number_range().starting_sequence_number(), "0"); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(0).hash_key_range().starting_hash_key(), "0"); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(0).hash_key_range().ending_hash_key(), "113427455640312821154458202477256070484"); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(1).hash_key_range().starting_hash_key(), "113427455640312821154458202477256070485"); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(1).hash_key_range().ending_hash_key(), "226854911280625642308916404954512140969"); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(2).hash_key_range().starting_hash_key(), "226854911280625642308916404954512140970"); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(2).hash_key_range().ending_hash_key(), "340282366920938463463374607431768211455"); - } - + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards().size(), 3); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(0).sequence_number_range().starting_sequence_number(), "0"); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(0).hash_key_range().starting_hash_key(), "0"); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(0).hash_key_range().ending_hash_key(), "113427455640312821154458202477256070484"); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(1).hash_key_range().starting_hash_key(), "113427455640312821154458202477256070485"); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(1).hash_key_range().ending_hash_key(), "226854911280625642308916404954512140969"); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(2).hash_key_range().starting_hash_key(), "226854911280625642308916404954512140970"); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards(2).hash_key_range().ending_hash_key(), "340282366920938463463374607431768211455"); + } + { auto result = testServer.DataStreamsClient->DescribeStreamSummary(streamName).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description_summary().stream_status(), YDS_V1::StreamDescription::ACTIVE); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description_summary().stream_name(), streamName); @@ -187,24 +187,24 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description_summary().open_shard_count(), 3); } - { + { auto result = testServer.DataStreamsClient->CreateStream("testfolder/" + streamName).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::BAD_REQUEST); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::BAD_REQUEST); } - { // for metering purposes + { // for metering purposes std::vector<NYDS_V1::TDataRecord> records; - for (ui32 i = 1; i <= 30; ++i) { - TString data = Sprintf("%04u", i); - records.push_back({data, data, ""}); - } + for (ui32 i = 1; i <= 30; ++i) { + TString data = Sprintf("%04u", i); + records.push_back({data, data, ""}); + } auto result = testServer.DataStreamsClient->PutRecords(streamName, records).ExtractValueSync(); - Cerr << result.GetResult().DebugString() << Endl; - UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - } - + Cerr << result.GetResult().DebugString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + } + { auto result = testServer.DataStreamsClient->ListStreams().ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); @@ -228,7 +228,7 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); } - + // now when stream is created delete should work fine { auto result = testServer.DataStreamsClient->DeleteStream(streamName).ExtractValueSync(); @@ -271,7 +271,7 @@ Y_UNIT_TEST_SUITE(DataStreams) { for (ui32 streamIdx = 0; streamIdx < 5; streamIdx++) { TStringBuilder streamNameX = TStringBuilder() << folderIdx << streamName << streamIdx; auto result = testServer.DataStreamsClient->CreateStream(streamNameX, NYDS_V1::TCreateStreamSettings().ShardCount(10)).ExtractValueSync(); - Cerr << result.GetIssues().ToString() << "\n"; + Cerr << result.GetIssues().ToString() << "\n"; UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); } @@ -411,11 +411,11 @@ Y_UNIT_TEST_SUITE(DataStreams) { auto result = testServer.DataStreamsClient->DescribeStream(streamName).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards_size(), 20); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().retention_period_hours(), 5); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().write_quota_kb_per_sec(), 128); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().owner(), "user@builtin"); - UNIT_ASSERT(result.GetResult().stream_description().stream_creation_timestamp() > 0); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().shards_size(), 20); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().retention_period_hours(), 5); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().write_quota_kb_per_sec(), 128); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().stream_description().owner(), "user@builtin"); + UNIT_ASSERT(result.GetResult().stream_description().stream_creation_timestamp() > 0); } } @@ -476,34 +476,34 @@ Y_UNIT_TEST_SUITE(DataStreams) { { TString exclusiveStartShardId; THashSet<TString> describedShards; - for (int i = 0; i < 8; i += 2) { + for (int i = 0; i < 8; i += 2) { auto result = testServer.DataStreamsClient->DescribeStream(streamName, NYDS_V1::TDescribeStreamSettings() - .Limit(2) + .Limit(2) .ExclusiveStartShardId(exclusiveStartShardId) ).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - auto& description = result.GetResult().stream_description(); - UNIT_ASSERT_VALUES_EQUAL(description.shards().size(), 2); + auto& description = result.GetResult().stream_description(); + UNIT_ASSERT_VALUES_EQUAL(description.shards().size(), 2); UNIT_ASSERT_VALUES_EQUAL(description.has_more_shards(), true); for (const auto& shard : description.shards()) { describedShards.insert(shard.shard_id()); } - exclusiveStartShardId = description.shards(1).shard_id(); + exclusiveStartShardId = description.shards(1).shard_id(); } { auto result = testServer.DataStreamsClient->DescribeStream(streamName, NYDS_V1::TDescribeStreamSettings() - .Limit(2) + .Limit(2) .ExclusiveStartShardId( exclusiveStartShardId) ).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - auto &description = result.GetResult().stream_description(); - UNIT_ASSERT_VALUES_EQUAL(description.shards().size(), 1); + auto &description = result.GetResult().stream_description(); + UNIT_ASSERT_VALUES_EQUAL(description.shards().size(), 1); UNIT_ASSERT_VALUES_EQUAL(description.has_more_shards(), false); for (const auto& shard : description.shards()) { describedShards.insert(shard.shard_id()); @@ -511,7 +511,7 @@ Y_UNIT_TEST_SUITE(DataStreams) { } // check for total number of shards - UNIT_ASSERT_EQUAL(describedShards.size(), 9); + UNIT_ASSERT_EQUAL(describedShards.size(), 9); } } @@ -536,17 +536,17 @@ Y_UNIT_TEST_SUITE(DataStreams) { } kikimr->GetRuntime()->SetLogPriority(NKikimrServices::PQ_READ_PROXY, NLog::EPriority::PRI_DEBUG); kikimr->GetRuntime()->SetLogPriority(NKikimrServices::PQ_WRITE_PROXY, NLog::EPriority::PRI_DEBUG); - + NYDS_V1::TDataStreamsClient client(*driver, TCommonClientSettings().AuthToken("user2@builtin")); - - TString dataStr = "9876543210"; - + + TString dataStr = "9876543210"; + auto putRecordResult = client.PutRecord("/Root/" + streamName, {dataStr, dataStr, dataStr}).ExtractValueSync(); - Cerr << putRecordResult.GetResult().DebugString() << Endl; - - UNIT_ASSERT_VALUES_EQUAL(putRecordResult.IsTransportError(), false); - UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetStatus(), EStatus::SUCCESS); - + Cerr << putRecordResult.GetResult().DebugString() << Endl; + + UNIT_ASSERT_VALUES_EQUAL(putRecordResult.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetStatus(), EStatus::SUCCESS); + { std::vector<NYDS_V1::TDataRecord> records; for (ui32 i = 1; i <= 30; ++i) { @@ -561,32 +561,32 @@ Y_UNIT_TEST_SUITE(DataStreams) { NYdb::NPersQueue::TPersQueueClient pqClient(*driver); - { + { auto result = testServer.DataStreamsClient->RegisterStreamConsumer(streamName, "user1", NYDS_V1::TRegisterStreamConsumerSettings()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().consumer().consumer_name(), "user1"); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().consumer().consumer_status(), YDS_V1::ConsumerDescription_ConsumerStatus_ACTIVE); - } - + } + auto session = pqClient.CreateReadSession(NYdb::NPersQueue::TReadSessionSettings() - .ConsumerName("user1") + .ConsumerName("user1") .DisableClusterDiscovery(true) .AppendTopics(NYdb::NPersQueue::TTopicReadSettings().Path("/Root/" + streamName))); ui32 readCount = 0; while (readCount < 31) { auto event = session->GetEvent(true); - + if (auto* dataReceivedEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent>(&*event)) { for (const auto& item : dataReceivedEvent->GetMessages()) { - Cerr << item.DebugString(true) << Endl; + Cerr << item.DebugString(true) << Endl; UNIT_ASSERT_VALUES_EQUAL(item.GetData(), item.GetPartitionKey()); - auto hashKey = item.GetExplicitHash().empty() ? HexBytesToDecimal(MD5::Calc(item.GetPartitionKey())) : BytesToDecimal(item.GetExplicitHash()); - UNIT_ASSERT_VALUES_EQUAL(NKikimr::NDataStreams::V1::ShardFromDecimal(hashKey, 5), item.GetPartitionStream()->GetPartitionId()); - UNIT_ASSERT(!item.GetIp().empty()); + auto hashKey = item.GetExplicitHash().empty() ? HexBytesToDecimal(MD5::Calc(item.GetPartitionKey())) : BytesToDecimal(item.GetExplicitHash()); + UNIT_ASSERT_VALUES_EQUAL(NKikimr::NDataStreams::V1::ShardFromDecimal(hashKey, 5), item.GetPartitionStream()->GetPartitionId()); + UNIT_ASSERT(!item.GetIp().empty()); if (item.GetData() == dataStr) { - UNIT_ASSERT_VALUES_EQUAL(item.GetExplicitHash(), dataStr); + UNIT_ASSERT_VALUES_EQUAL(item.GetExplicitHash(), dataStr); } readCount++; } @@ -669,91 +669,91 @@ Y_UNIT_TEST_SUITE(DataStreams) { {"", shortEnoughKey, shortEnoughExplicitHash}}).ExtractValueSync(); UNIT_ASSERT(result.IsSuccess()); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - + result = client.PutRecords(streamName, {{"", shortEnoughKey, "0"}, {"", shortEnoughKey, "0"}}).ExtractValueSync(); - UNIT_ASSERT(result.IsSuccess()); - UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - - UNIT_ASSERT(result.IsSuccess()); - UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - + UNIT_ASSERT(result.IsSuccess()); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + + UNIT_ASSERT(result.IsSuccess()); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + kikimr->GetRuntime()->SetLogPriority(NKikimrServices::PQ_READ_PROXY, NLog::EPriority::PRI_INFO); kikimr->GetRuntime()->SetLogPriority(NKikimrServices::PERSQUEUE, NLog::EPriority::PRI_INFO); - - { + + { std::vector<NYDS_V1::TDataRecord> records; - TString data = TString(1024*1024, 'a'); - records.push_back({data, "key", ""}); - records.push_back({data, "key", ""}); - records.push_back({data, "key", ""}); - records.push_back({data, "key", ""}); - - Cerr << "First put records\n"; + TString data = TString(1024*1024, 'a'); + records.push_back({data, "key", ""}); + records.push_back({data, "key", ""}); + records.push_back({data, "key", ""}); + records.push_back({data, "key", ""}); + + Cerr << "First put records\n"; auto result = client.PutRecords(streamPath, records).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - if (result.GetStatus() != EStatus::SUCCESS) { - result.GetIssues().PrintTo(Cerr); - } - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().failed_record_count(), 0); - Cerr << result.GetResult().DebugString() << Endl; - Cerr << "Second put records\n"; - + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + if (result.GetStatus() != EStatus::SUCCESS) { + result.GetIssues().PrintTo(Cerr); + } + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().failed_record_count(), 0); + Cerr << result.GetResult().DebugString() << Endl; + Cerr << "Second put records\n"; + result = client.PutRecords(streamPath, records).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - if (result.GetStatus() != EStatus::SUCCESS) { - result.GetIssues().PrintTo(Cerr); - } - Cerr << result.GetResult().DebugString() << Endl; - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().failed_record_count(), 4); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().records(0).error_code(), "ProvisionedThroughputExceededException"); - - Sleep(TDuration::Seconds(4)); - - Cerr << "Third put records\n"; + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + if (result.GetStatus() != EStatus::SUCCESS) { + result.GetIssues().PrintTo(Cerr); + } + Cerr << result.GetResult().DebugString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().failed_record_count(), 4); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().records(0).error_code(), "ProvisionedThroughputExceededException"); + + Sleep(TDuration::Seconds(4)); + + Cerr << "Third put records\n"; result = client.PutRecords(streamPath, records).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - if (result.GetStatus() != EStatus::SUCCESS) { - result.GetIssues().PrintTo(Cerr); - } - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().failed_record_count(), 0); - Cerr << result.GetResult().DebugString() << Endl; - - } - + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + if (result.GetStatus() != EStatus::SUCCESS) { + result.GetIssues().PrintTo(Cerr); + } + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().failed_record_count(), 0); + Cerr << result.GetResult().DebugString() << Endl; + + } + NYdb::NPersQueue::TPersQueueClient pqClient(*driver); - - { - NYdb::NPersQueue::TAddReadRuleSettings addReadRuleSettings; - addReadRuleSettings.ReadRule(NYdb::NPersQueue::TReadRuleSettings().Version(1).ConsumerName("user1")); + + { + NYdb::NPersQueue::TAddReadRuleSettings addReadRuleSettings; + addReadRuleSettings.ReadRule(NYdb::NPersQueue::TReadRuleSettings().Version(1).ConsumerName("user1")); auto res = pqClient.AddReadRule(streamPath, addReadRuleSettings); - res.Wait(); - UNIT_ASSERT(res.GetValue().IsSuccess()); - } - auto session = pqClient.CreateReadSession(NYdb::NPersQueue::TReadSessionSettings() - .ConsumerName("user1") - .DisableClusterDiscovery(true) - .RetryPolicy(NYdb::NPersQueue::IRetryPolicy::GetNoRetryPolicy()) + res.Wait(); + UNIT_ASSERT(res.GetValue().IsSuccess()); + } + auto session = pqClient.CreateReadSession(NYdb::NPersQueue::TReadSessionSettings() + .ConsumerName("user1") + .DisableClusterDiscovery(true) + .RetryPolicy(NYdb::NPersQueue::IRetryPolicy::GetNoRetryPolicy()) .AppendTopics(NYdb::NPersQueue::TTopicReadSettings().Path(streamPath))); - ui32 readCount = 0; - while (readCount < 14) { - auto event = session->GetEvent(true); - - if (auto* dataReceivedEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent>(&*event)) { - for (const auto& item : dataReceivedEvent->GetMessages()) { - Cout << "GOT MESSAGE: " << item.DebugString(false) << Endl; - readCount++; - } - } else if (auto* createPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*event)) { - createPartitionStreamEvent->Confirm(); - } else if (auto* destroyPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDestroyPartitionStreamEvent>(&*event)) { - destroyPartitionStreamEvent->Confirm(); - } else if (auto* closeSessionEvent = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*event)) { - break; - } - } - UNIT_ASSERT_VALUES_EQUAL(readCount, 14); + ui32 readCount = 0; + while (readCount < 14) { + auto event = session->GetEvent(true); + + if (auto* dataReceivedEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent>(&*event)) { + for (const auto& item : dataReceivedEvent->GetMessages()) { + Cout << "GOT MESSAGE: " << item.DebugString(false) << Endl; + readCount++; + } + } else if (auto* createPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*event)) { + createPartitionStreamEvent->Confirm(); + } else if (auto* destroyPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDestroyPartitionStreamEvent>(&*event)) { + destroyPartitionStreamEvent->Confirm(); + } else if (auto* closeSessionEvent = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*event)) { + break; + } + } + UNIT_ASSERT_VALUES_EQUAL(readCount, 14); } Y_UNIT_TEST(TestPutRecords) { @@ -765,7 +765,7 @@ Y_UNIT_TEST_SUITE(DataStreams) { auto result = testServer.DataStreamsClient->CreateStream(streamPath, NYDS_V1::TCreateStreamSettings().ShardCount(5)).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } NYDS_V1::TDataStreamsClient client(*driver, TCommonClientSettings().AuthToken("user2@builtin")); @@ -795,7 +795,7 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); Cerr << "PutRecordsResponse = " << result.GetResult().DebugString() << Endl; UNIT_ASSERT_VALUES_EQUAL(result.GetResult().failed_record_count(), 0); - UNIT_ASSERT_VALUES_EQUAL(result.GetResult().records_size(), records.size()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().records_size(), records.size()); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().encryption_type(), YDS_V1::EncryptionType::NONE); TString dataStr = "9876543210"; @@ -803,11 +803,11 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(putRecordResult.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetStatus(), EStatus::SUCCESS); Cerr << "PutRecord response = " << putRecordResult.GetResult().DebugString() << Endl; - UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetResult().shard_id(), "shard-000004"); + UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetResult().shard_id(), "shard-000004"); UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetResult().sequence_number(), "7"); UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetResult().encryption_type(), YDS_V1::EncryptionType::NONE); - + } } @@ -815,64 +815,64 @@ Y_UNIT_TEST_SUITE(DataStreams) { TInsecureDatastreamsTestServer testServer; const TString streamName = TStringBuilder() << "stream_" << Y_UNIT_TEST_NAME; SET_YDS_LOCALS; - { + { auto result = testServer.DataStreamsClient->CreateStream(streamName, NYDS_V1::TCreateStreamSettings().ShardCount(5)).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); - UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); - } + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + } kikimr->GetRuntime()->SetLogPriority(NKikimrServices::PQ_READ_PROXY, NLog::EPriority::PRI_DEBUG); kikimr->GetRuntime()->SetLogPriority(NKikimrServices::PQ_WRITE_PROXY, NLog::EPriority::PRI_DEBUG); - + NYDS_V1::TDataStreamsClient client(*driver, TCommonClientSettings().AuthToken("user2@builtin")); - - + + auto putRecordResult = client.PutRecord("/Root/" + streamName, {"", "key", ""}).ExtractValueSync(); - Cerr << putRecordResult.GetResult().DebugString() << Endl; - - UNIT_ASSERT_VALUES_EQUAL(putRecordResult.IsTransportError(), false); - UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetStatus(), EStatus::SUCCESS); - UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetResult().sequence_number(), "0"); - + Cerr << putRecordResult.GetResult().DebugString() << Endl; + + UNIT_ASSERT_VALUES_EQUAL(putRecordResult.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetStatus(), EStatus::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(putRecordResult.GetResult().sequence_number(), "0"); + NYdb::NPersQueue::TPersQueueClient pqClient(*driver); - - { + + { auto result = testServer.DataStreamsClient->RegisterStreamConsumer(streamName, "user1", NYDS_V1::TRegisterStreamConsumerSettings()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().consumer().consumer_name(), "user1"); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().consumer().consumer_status(), YDS_V1::ConsumerDescription_ConsumerStatus_ACTIVE); - } - - auto session = pqClient.CreateReadSession(NYdb::NPersQueue::TReadSessionSettings() - .ConsumerName("user1") - .DisableClusterDiscovery(true) + } + + auto session = pqClient.CreateReadSession(NYdb::NPersQueue::TReadSessionSettings() + .ConsumerName("user1") + .DisableClusterDiscovery(true) .AppendTopics(NYdb::NPersQueue::TTopicReadSettings().Path("/Root/" + streamName))); - while (true) { - auto event = session->GetEvent(true); - - if (auto* dataReceivedEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent>(&*event)) { - for (const auto& item : dataReceivedEvent->GetMessages()) { - Cerr << item.DebugString(true) << Endl; - UNIT_ASSERT_VALUES_EQUAL(item.GetData(), ""); - UNIT_ASSERT_VALUES_EQUAL(item.GetPartitionKey(), "key"); - } - UNIT_ASSERT_VALUES_EQUAL(dataReceivedEvent->GetMessages().size(), 1); - break; - } else if (auto* createPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*event)) { - createPartitionStreamEvent->Confirm(); - } else if (auto* destroyPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDestroyPartitionStreamEvent>(&*event)) { - destroyPartitionStreamEvent->Confirm(); - } else if (auto* closeSessionEvent = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*event)) { - UNIT_ASSERT(false); - break; - } else { - Y_FAIL("not a data!"); - } - } - } - + while (true) { + auto event = session->GetEvent(true); + + if (auto* dataReceivedEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDataReceivedEvent>(&*event)) { + for (const auto& item : dataReceivedEvent->GetMessages()) { + Cerr << item.DebugString(true) << Endl; + UNIT_ASSERT_VALUES_EQUAL(item.GetData(), ""); + UNIT_ASSERT_VALUES_EQUAL(item.GetPartitionKey(), "key"); + } + UNIT_ASSERT_VALUES_EQUAL(dataReceivedEvent->GetMessages().size(), 1); + break; + } else if (auto* createPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*event)) { + createPartitionStreamEvent->Confirm(); + } else if (auto* destroyPartitionStreamEvent = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDestroyPartitionStreamEvent>(&*event)) { + destroyPartitionStreamEvent->Confirm(); + } else if (auto* closeSessionEvent = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*event)) { + UNIT_ASSERT(false); + break; + } else { + Y_FAIL("not a data!"); + } + } + } + Y_UNIT_TEST(TestListStreamConsumers) { TInsecureDatastreamsTestServer testServer; const TString streamName = TStringBuilder() << "stream_" << Y_UNIT_TEST_NAME; @@ -883,7 +883,7 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); } - + // List stream consumers -> OK { auto result = testServer.DataStreamsClient->ListStreamConsumers(streamName, @@ -892,7 +892,7 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); UNIT_ASSERT_VALUES_EQUAL(result.GetResult().consumers().size(), 0); } - + // List stream consumers more than allowed -> get BAD_REQUEST { auto result = testServer.DataStreamsClient->ListStreamConsumers(streamName, diff --git a/ydb/services/datastreams/put_records_actor.h b/ydb/services/datastreams/put_records_actor.h index 26548ebd57..8f06a99157 100644 --- a/ydb/services/datastreams/put_records_actor.h +++ b/ydb/services/datastreams/put_records_actor.h @@ -17,29 +17,29 @@ namespace NKikimr::NDataStreams::V1 { - + struct TPutRecordsItem { TString Data; TString Key; TString ExplicitHash; - TString Ip; + TString Ip; }; - TString GetSerializedData(const TPutRecordsItem& item) { - NKikimrPQClient::TDataChunk proto; - - proto.SetIp(item.Ip); - proto.SetCodec(0); // NPersQueue::CODEC_RAW - proto.SetData(item.Data); - - TString str; - bool res = proto.SerializeToString(&str); - Y_VERIFY(res); - return str; - } - - + TString GetSerializedData(const TPutRecordsItem& item) { + NKikimrPQClient::TDataChunk proto; + + proto.SetIp(item.Ip); + proto.SetCodec(0); // NPersQueue::CODEC_RAW + proto.SetData(item.Data); + + TString str; + bool res = proto.SerializeToString(&str); + Y_VERIFY(res); + return str; + } + + class TDatastreamsPartitionActor : public TActorBootstrapped<TDatastreamsPartitionActor> { public: using TBase = TActorBootstrapped<TDatastreamsPartitionActor>; @@ -101,9 +101,9 @@ namespace NKikimr::NDataStreams::V1 { ui64 totalSize = 0; for (const auto& item : DataToWrite) { auto w = request.MutablePartitionRequest()->AddCmdWrite(); - w->SetData(GetSerializedData(item)); + w->SetData(GetSerializedData(item)); w->SetPartitionKey(item.Key); - w->SetExplicitHash(item.ExplicitHash); + w->SetExplicitHash(item.ExplicitHash); w->SetDisableDeduplication(true); w->SetCreateTimeMS(TInstant::Now().MilliSeconds()); w->SetUncompressedSize(item.Data.size()); @@ -158,12 +158,12 @@ namespace NKikimr::NDataStreams::V1 { Die(ctx); } - void Die(const TActorContext& ctx) override { - if (PipeClient) - NTabletPipe::CloseClient(ctx, PipeClient); - TBase::Die(ctx); - } - + void Die(const TActorContext& ctx) override { + if (PipeClient) + NTabletPipe::CloseClient(ctx, PipeClient); + TBase::Die(ctx); + } + private: NActors::TActorId ParentId; ui64 TabletId = 0; @@ -183,9 +183,9 @@ namespace NKikimr::NDataStreams::V1 { if (putRecordsRequest.records_size() > 500) { return TStringBuilder() << "Too many records in a single PutRecords request: " << putRecordsRequest.records_size() << " > 500"; } - ui64 totalSize = 0; + ui64 totalSize = 0; for (const auto& record : putRecordsRequest.records()) { - totalSize += record.partition_key().size() + record.data().size(); + totalSize += record.partition_key().size() + record.data().size(); if (record.partition_key().empty()) { return "Empty partition key"; } @@ -199,10 +199,10 @@ namespace NKikimr::NDataStreams::V1 { return TStringBuilder() << record.explicit_hash_key() << " is not a valid 128 bit decimal"; } } - if (totalSize > 5 * 1024 * 1024) { - return TStringBuilder() << "Total size of PutRecords request of " << totalSize << " bytes exceed limit of " << (5*1024*1024) << " bytes"; - - } + if (totalSize > 5 * 1024 * 1024) { + return TStringBuilder() << "Total size of PutRecords request of " << totalSize << " bytes exceed limit of " << (5*1024*1024) << " bytes"; + + } return ""; } } @@ -232,8 +232,8 @@ namespace NKikimr::NDataStreams::V1 { NActors::TActorId NewSchemeCache; Ydb::DataStreams::V1::PutRecordsResult PutRecordsResult; - TString Ip; - + TString Ip; + void SendNavigateRequest(const TActorContext &ctx); void Handle(NDataStreams::V1::TEvDataStreams::TEvPartitionActorResult::TPtr& ev, const TActorContext& ctx); @@ -253,7 +253,7 @@ namespace NKikimr::NDataStreams::V1 { TPutRecordsActorBase<TDerived, TProto>::TPutRecordsActorBase(TProto* request, NActors::TActorId newSchemeCache) : TBase(request, request->GetProtoRequest()->stream_name()) , NewSchemeCache(std::move(newSchemeCache)) - , Ip(request->GetPeerName()) + , Ip(request->GetPeerName()) { Y_ENSURE(request); } @@ -261,9 +261,9 @@ namespace NKikimr::NDataStreams::V1 { template<class TDerived, class TProto> void TPutRecordsActorBase<TDerived, TProto>::Bootstrap(const NActors::TActorContext& ctx) { TString error = CheckRequestIsValid(static_cast<TDerived*>(this)->GetPutRecordsRequest()); - + if (!error.empty()) { - return this->ReplyWithError(Ydb::StatusIds::BAD_REQUEST, Ydb::PersQueue::ErrorCode::BAD_REQUEST, error, ctx); + return this->ReplyWithError(Ydb::StatusIds::BAD_REQUEST, Ydb::PersQueue::ErrorCode::BAD_REQUEST, error, ctx); } if (this->Request_->GetInternalToken().empty()) { @@ -293,7 +293,7 @@ namespace NKikimr::NDataStreams::V1 { if (TBase::ReplyIfNotTopic(ev, ctx)) { return; } - + const NSchemeCache::TSchemeCacheNavigate* navigate = ev->Get()->Request.Get(); auto topicInfo = navigate->ResultSet.begin(); if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { @@ -315,10 +315,10 @@ namespace NKikimr::NDataStreams::V1 { AddRecord(items, totalShardsCount, i); } - for (auto& partition : pqDescription.GetPartitions()) { - auto part = partition.GetPartitionId(); - if (items[part].empty()) continue; - PartitionToActor[part].ActorId = ctx.Register( + for (auto& partition : pqDescription.GetPartitions()) { + auto part = partition.GetPartitionId(); + if (items[part].empty()) continue; + PartitionToActor[part].ActorId = ctx.Register( new TDatastreamsPartitionActor(ctx.SelfID, partition.GetTabletId(), part, this->GetTopicPath(ctx), std::move(items[part])) ); } @@ -332,13 +332,13 @@ namespace NKikimr::NDataStreams::V1 { } } - TString GetErrorText(const NPersQueue::NErrorCode::EErrorCode errorCode) { - if (errorCode == NPersQueue::NErrorCode::OVERLOAD) - return "ProvisionedThroughputExceededException"; - return "InternalFailure"; - //TODO: other codes https://docs.aws.amazon.com/kinesis/latest/APIReference/CommonErrors.html - } - + TString GetErrorText(const NPersQueue::NErrorCode::EErrorCode errorCode) { + if (errorCode == NPersQueue::NErrorCode::OVERLOAD) + return "ProvisionedThroughputExceededException"; + return "InternalFailure"; + //TODO: other codes https://docs.aws.amazon.com/kinesis/latest/APIReference/CommonErrors.html + } + template<class TDerived, class TProto> void TPutRecordsActorBase<TDerived, TProto>::Handle(NDataStreams::V1::TEvDataStreams::TEvPartitionActorResult::TPtr& ev, const TActorContext& ctx) { auto it = PartitionToActor.find(ev->Get()->PartitionId); @@ -347,15 +347,15 @@ namespace NKikimr::NDataStreams::V1 { PutRecordsResult.set_failed_record_count( PutRecordsResult.failed_record_count() + it->second.RecordIndexes.size()); } - PutRecordsResult.set_encryption_type(Ydb::DataStreams::V1::EncryptionType::NONE); + PutRecordsResult.set_encryption_type(Ydb::DataStreams::V1::EncryptionType::NONE); for (ui32 i = 0; i < it->second.RecordIndexes.size(); ++i) { ui32 index = it->second.RecordIndexes[i]; auto& record = *PutRecordsResult.mutable_records(index); if (ev->Get()->ErrorText.Defined()) { record.set_error_message(*ev->Get()->ErrorText); - record.set_error_code(GetErrorText(*(ev->Get()->ErrorCode))); //TODO: Throttling exception sometimes + record.set_error_code(GetErrorText(*(ev->Get()->ErrorCode))); //TODO: Throttling exception sometimes } else { - record.set_shard_id(GetShardName(it->first)); + record.set_shard_id(GetShardName(it->first)); record.set_sequence_number(Sprintf("%lu", ev->Get()->CurrentOffset + i)); } } @@ -369,13 +369,13 @@ namespace NKikimr::NDataStreams::V1 { const auto& record = static_cast<TDerived*>(this)->GetPutRecordsRequest().records(index); ui32 shard = 0; if (record.explicit_hash_key().empty()) { - auto hashKey = HexBytesToDecimal(MD5::Calc(record.partition_key())); + auto hashKey = HexBytesToDecimal(MD5::Calc(record.partition_key())); shard = ShardFromDecimal(hashKey, totalShardsCount); } else { - auto hashKey = BytesToDecimal(record.explicit_hash_key()); - shard = ShardFromDecimal(hashKey, totalShardsCount); + auto hashKey = BytesToDecimal(record.explicit_hash_key()); + shard = ShardFromDecimal(hashKey, totalShardsCount); } - items[shard].push_back(TPutRecordsItem{record.data(), record.partition_key(), record.explicit_hash_key(), Ip}); + items[shard].push_back(TPutRecordsItem{record.data(), record.partition_key(), record.explicit_hash_key(), Ip}); PartitionToActor[shard].RecordIndexes.push_back(index); } @@ -432,16 +432,16 @@ namespace NKikimr::NDataStreams::V1 { if (putRecordsResult.failed_record_count() == 0) { result.set_sequence_number(putRecordsResult.records(0).sequence_number()); - result.set_shard_id(putRecordsResult.records(0).shard_id()); - result.set_encryption_type(Ydb::DataStreams::V1::EncryptionType::NONE); - return ReplyWithResult(Ydb::StatusIds::SUCCESS, result, ctx); + result.set_shard_id(putRecordsResult.records(0).shard_id()); + result.set_encryption_type(Ydb::DataStreams::V1::EncryptionType::NONE); + return ReplyWithResult(Ydb::StatusIds::SUCCESS, result, ctx); } else { - if (putRecordsResult.records(0).error_code() == "ProvisionedThroughputExceededException") { - return ReplyWithResult(Ydb::StatusIds::OVERLOADED, ctx); - } - //TODO: other codes - access denied and so on - return ReplyWithResult(Ydb::StatusIds::INTERNAL_ERROR, ctx); + if (putRecordsResult.records(0).error_code() == "ProvisionedThroughputExceededException") { + return ReplyWithResult(Ydb::StatusIds::OVERLOADED, ctx); + } + //TODO: other codes - access denied and so on + return ReplyWithResult(Ydb::StatusIds::INTERNAL_ERROR, ctx); } } -} +} diff --git a/ydb/services/lib/actors/pq_schema_actor.cpp b/ydb/services/lib/actors/pq_schema_actor.cpp index 1b2bdb8cba..f48e398488 100644 --- a/ydb/services/lib/actors/pq_schema_actor.cpp +++ b/ydb/services/lib/actors/pq_schema_actor.cpp @@ -20,18 +20,18 @@ namespace NKikimr::NGRpcProxy::V1 { constexpr i32 MAX_SUPPORTED_CODECS_COUNT = 100; - TClientServiceTypes GetSupportedClientServiceTypes(const TActorContext& ctx) { - TClientServiceTypes serviceTypes; + TClientServiceTypes GetSupportedClientServiceTypes(const TActorContext& ctx) { + TClientServiceTypes serviceTypes; const auto& pqConfig = AppData(ctx)->PQConfig; - ui32 count = pqConfig.GetDefaultClientServiceType().GetMaxReadRulesCountPerTopic(); - if (count == 0) count = Max<ui32>(); - TString name = pqConfig.GetDefaultClientServiceType().GetName(); - serviceTypes.insert({name, {name, count}}); + ui32 count = pqConfig.GetDefaultClientServiceType().GetMaxReadRulesCountPerTopic(); + if (count == 0) count = Max<ui32>(); + TString name = pqConfig.GetDefaultClientServiceType().GetName(); + serviceTypes.insert({name, {name, count}}); for (const auto& serviceType : pqConfig.GetClientServiceType()) { - ui32 count = serviceType.GetMaxReadRulesCountPerTopic(); - if (count == 0) count = Max<ui32>(); - TString name = serviceType.GetName(); - serviceTypes.insert({name, {name, count}}); + ui32 count = serviceType.GetMaxReadRulesCountPerTopic(); + if (count == 0) count = Max<ui32>(); + TString name = serviceType.GetName(); + serviceTypes.insert({name, {name, count}}); } return serviceTypes; } @@ -57,20 +57,20 @@ namespace NKikimr::NGRpcProxy::V1 { TString AddReadRuleToConfig( NKikimrPQ::TPQTabletConfig* config, const Ydb::PersQueue::V1::TopicSettings::ReadRule& rr, - const TClientServiceTypes& supportedClientServiceTypes, + const TClientServiceTypes& supportedClientServiceTypes, const TActorContext& ctx ) { auto consumerName = NPersQueue::ConvertNewConsumerName(rr.consumer_name(), ctx); - if(consumerName.find("/") != TString::npos || consumerName.find("|") != TString::npos) { - return TStringBuilder() << "consumer '" << rr.consumer_name() << "' has illegal symbols"; - } + if(consumerName.find("/") != TString::npos || consumerName.find("|") != TString::npos) { + return TStringBuilder() << "consumer '" << rr.consumer_name() << "' has illegal symbols"; + } { TString migrationError = ReadRuleServiceTypeMigration(config, ctx); if (migrationError) { return migrationError; } } - + config->AddReadRules(consumerName); if (rr.starting_message_timestamp_ms() < 0) { @@ -103,7 +103,7 @@ namespace NKikimr::NGRpcProxy::V1 { config->MutablePartitionConfig()->AddImportantClientId(consumerName); if (!rr.service_type().empty()) { - if (!supportedClientServiceTypes.contains(rr.service_type())) { + if (!supportedClientServiceTypes.contains(rr.service_type())) { return TStringBuilder() << "Unknown read rule service type '" << rr.service_type() << "' for consumer '" << rr.consumer_name() << "'"; } @@ -162,7 +162,7 @@ namespace NKikimr::NGRpcProxy::V1 { config->AddReadRuleServiceTypes(originalConfig.GetReadRuleServiceTypes(i)); } else { if (pqConfig.GetDisallowDefaultClientServiceType()) { - return TStringBuilder() << "service type cannot be empty for consumer '" + return TStringBuilder() << "service type cannot be empty for consumer '" << originalConfig.GetReadRules(i) << "'"; } config->AddReadRuleServiceTypes(pqConfig.GetDefaultClientServiceType().GetName()); @@ -176,15 +176,15 @@ namespace NKikimr::NGRpcProxy::V1 { return ""; } - bool CheckReadRulesConfig(const NKikimrPQ::TPQTabletConfig& config, const TClientServiceTypes& supportedClientServiceTypes, - TString& error) { - - if (config.GetReadRules().size() > MAX_READ_RULES_COUNT) { - error = TStringBuilder() << "read rules count cannot be more than " - << MAX_SUPPORTED_CODECS_COUNT << ", provided " << config.GetReadRules().size(); - return false; - } - + bool CheckReadRulesConfig(const NKikimrPQ::TPQTabletConfig& config, const TClientServiceTypes& supportedClientServiceTypes, + TString& error) { + + if (config.GetReadRules().size() > MAX_READ_RULES_COUNT) { + error = TStringBuilder() << "read rules count cannot be more than " + << MAX_SUPPORTED_CODECS_COUNT << ", provided " << config.GetReadRules().size(); + return false; + } + THashSet<TString> readRuleConsumers; for (auto consumerName : config.GetReadRules()) { if (readRuleConsumers.find(consumerName) != readRuleConsumers.end()) { @@ -193,21 +193,21 @@ namespace NKikimr::NGRpcProxy::V1 { } readRuleConsumers.insert(consumerName); } - - for (const auto& t : supportedClientServiceTypes) { - - auto type = t.first; - auto count = std::count_if(config.GetReadRuleServiceTypes().begin(), config.GetReadRuleServiceTypes().end(), - [type](const TString& cType){ - return type == cType; - }); - auto limit = t.second.MaxCount; - if (count > limit) { - error = TStringBuilder() << "Count of consumers with service type '" << type << "' is limited for " << limit << " for stream\n"; - return false; - } - } - + + for (const auto& t : supportedClientServiceTypes) { + + auto type = t.first; + auto count = std::count_if(config.GetReadRuleServiceTypes().begin(), config.GetReadRuleServiceTypes().end(), + [type](const TString& cType){ + return type == cType; + }); + auto limit = t.second.MaxCount; + if (count > limit) { + error = TStringBuilder() << "Count of consumers with service type '" << type << "' is limited for " << limit << " for stream\n"; + return false; + } + } + return false; } @@ -266,15 +266,15 @@ namespace NKikimr::NGRpcProxy::V1 { error = TStringBuilder() << "Attirbute allow_unauthenticated_write is " << pair.second << ", which is not bool"; return Ydb::StatusIds::BAD_REQUEST; } - } else if (pair.first == "_abc_slug") { - config->SetAbcSlug(pair.second); - } else if (pair.first == "_abc_id") { - try { - config->SetAbcId(!FromString<ui32>(pair.second)); - } catch(...) { - error = TStringBuilder() << "Attirbute abc_id is " << pair.second << ", which is not integer"; - return Ydb::StatusIds::BAD_REQUEST; - } + } else if (pair.first == "_abc_slug") { + config->SetAbcSlug(pair.second); + } else if (pair.first == "_abc_id") { + try { + config->SetAbcId(!FromString<ui32>(pair.second)); + } catch(...) { + error = TStringBuilder() << "Attirbute abc_id is " << pair.second << ", which is not integer"; + return Ydb::StatusIds::BAD_REQUEST; + } } else { error = TStringBuilder() << "Attirbute " << pair.first << " is not supported"; return Ydb::StatusIds::BAD_REQUEST; @@ -481,8 +481,8 @@ namespace NKikimr::NGRpcProxy::V1 { } } - CheckReadRulesConfig(*config, supportedClientServiceTypes, error); - return error.empty() ? Ydb::StatusIds::SUCCESS : Ydb::StatusIds::BAD_REQUEST; + CheckReadRulesConfig(*config, supportedClientServiceTypes, error); + return error.empty() ? Ydb::StatusIds::SUCCESS : Ydb::StatusIds::BAD_REQUEST; } -} +} diff --git a/ydb/services/lib/actors/pq_schema_actor.h b/ydb/services/lib/actors/pq_schema_actor.h index dacbab7bd7..1be2e7fa24 100644 --- a/ydb/services/lib/actors/pq_schema_actor.h +++ b/ydb/services/lib/actors/pq_schema_actor.h @@ -21,25 +21,25 @@ namespace NKikimr::NGRpcProxy::V1 { TString& error ); - struct TClientServiceType { - TString Name; - ui32 MaxCount; - }; - typedef std::map<TString, TClientServiceType> TClientServiceTypes; - TClientServiceTypes GetSupportedClientServiceTypes(const TActorContext& ctx); + struct TClientServiceType { + TString Name; + ui32 MaxCount; + }; + typedef std::map<TString, TClientServiceType> TClientServiceTypes; + TClientServiceTypes GetSupportedClientServiceTypes(const TActorContext& ctx); - // Returns true if have duplicated read rules - bool CheckReadRulesConfig(const NKikimrPQ::TPQTabletConfig& config, const TClientServiceTypes& supportedReadRuleServiceTypes, TString& error); + // Returns true if have duplicated read rules + bool CheckReadRulesConfig(const NKikimrPQ::TPQTabletConfig& config, const TClientServiceTypes& supportedReadRuleServiceTypes, TString& error); TString AddReadRuleToConfig( NKikimrPQ::TPQTabletConfig *config, const Ydb::PersQueue::V1::TopicSettings::ReadRule& rr, - const TClientServiceTypes& supportedReadRuleServiceTypes, + const TClientServiceTypes& supportedReadRuleServiceTypes, const TActorContext& ctx ); TString RemoveReadRuleFromConfig( NKikimrPQ::TPQTabletConfig *config, - const NKikimrPQ::TPQTabletConfig& originalConfig, + const NKikimrPQ::TPQTabletConfig& originalConfig, const TString& consumerName, const TActorContext& ctx ); @@ -107,13 +107,13 @@ namespace NKikimr::NGRpcProxy::V1 { PrepareTopicPath(ctx); auto navigateRequest = std::make_unique<NSchemeCache::TSchemeCacheNavigate>(); navigateRequest->DatabaseName = CanonizePath(this->Request_->GetDatabaseName().GetOrElse("")); - + NSchemeCache::TSchemeCacheNavigate::TEntry entry; entry.Path = NKikimr::SplitPath(TopicPath); entry.SyncVersion = true; entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpTopic; navigateRequest->ResultSet.emplace_back(entry); - + if (this->Request_->GetInternalToken().empty()) { if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { return ReplyWithError(Ydb::StatusIds::UNAUTHORIZED, Ydb::PersQueue::ErrorCode::ACCESS_DENIED, @@ -126,7 +126,7 @@ namespace NKikimr::NGRpcProxy::V1 { ctx.Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigateRequest.release())); } } - + bool ReplyIfNotTopic(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { const NSchemeCache::TSchemeCacheNavigate* result = ev->Get()->Request.Get(); Y_VERIFY(result->ResultSet.size() == 1); diff --git a/ydb/services/lib/sharding/sharding.cpp b/ydb/services/lib/sharding/sharding.cpp index b01d8523fe..ce0e5a1c0a 100644 --- a/ydb/services/lib/sharding/sharding.cpp +++ b/ydb/services/lib/sharding/sharding.cpp @@ -1,6 +1,6 @@ #include "sharding.h" -#include <vector> +#include <vector> #include <util/generic/maybe.h> #include <util/generic/yexception.h> #include <util/string/printf.h> @@ -9,23 +9,23 @@ namespace NKikimr::NDataStreams::V1 { namespace { - NYql::NDecimal::TUint128 Uint128FromString(const TString& bytes, ui32 base = 10) { - Y_VERIFY(base == 10 || base == 16); - NYql::NDecimal::TUint128 x = 1; - NYql::NDecimal::TUint128 res = 0; - for (auto it = bytes.rbegin(); it != bytes.rend(); ++it) { - if (!((*it >= '0' && *it <='9') || (*it >= 'a' && *it <= 'z') || (*it >= 'A' && *it <= 'Z'))) - ythrow yexception() << "invalid character '" << *it << "'"; - ui32 v = (*it >= '0' && *it <= '9') ? (*it - '0') : (( *it >= 'a' && *it <= 'z') ? (*it - 'a' + 10) : (*it - 'A' + 10)); - if (v >= base) - ythrow yexception() << "string is not valid Uint128"; - res += x * v; - x = base == 16 ? x << 4 : x * 10; + NYql::NDecimal::TUint128 Uint128FromString(const TString& bytes, ui32 base = 10) { + Y_VERIFY(base == 10 || base == 16); + NYql::NDecimal::TUint128 x = 1; + NYql::NDecimal::TUint128 res = 0; + for (auto it = bytes.rbegin(); it != bytes.rend(); ++it) { + if (!((*it >= '0' && *it <='9') || (*it >= 'a' && *it <= 'z') || (*it >= 'A' && *it <= 'Z'))) + ythrow yexception() << "invalid character '" << *it << "'"; + ui32 v = (*it >= '0' && *it <= '9') ? (*it - '0') : (( *it >= 'a' && *it <= 'z') ? (*it - 'a' + 10) : (*it - 'A' + 10)); + if (v >= base) + ythrow yexception() << "string is not valid Uint128"; + res += x * v; + x = base == 16 ? x << 4 : x * 10; } - return res; + return res; } - ui32 ShardFromUint128(NYql::NDecimal::TUint128 value, NYql::NDecimal::TUint128 totalShardsCount) { + ui32 ShardFromUint128(NYql::NDecimal::TUint128 value, NYql::NDecimal::TUint128 totalShardsCount) { NYql::NDecimal::TUint128 max = -1; NYql::NDecimal::TUint128 sliceSize = max / totalShardsCount; NYql::NDecimal::TUint128 shard = value / sliceSize; @@ -35,61 +35,61 @@ namespace NKikimr::NDataStreams::V1 { } - TString Uint128ToDecimalString(NYql::NDecimal::TUint128 value, const NYql::NDecimal::TUint128& base) { - std::vector<char> result; - while (value != 0) { - result.push_back((char)('0' + ui32(value % base))); - value = value / base; - } - std::reverse(result.begin(), result.end()); - return result.size() > 0 ? TString(result.begin(), result.end()) : "0"; - } - + TString Uint128ToDecimalString(NYql::NDecimal::TUint128 value, const NYql::NDecimal::TUint128& base) { + std::vector<char> result; + while (value != 0) { + result.push_back((char)('0' + ui32(value % base))); + value = value / base; + } + std::reverse(result.begin(), result.end()); + return result.size() > 0 ? TString(result.begin(), result.end()) : "0"; + } + TString GetShardName(ui32 index) { return Sprintf("shard-%06d", index); } - NYql::NDecimal::TUint128 BytesToDecimal(const TString& bytes) { - return Uint128FromString(bytes, 10); + NYql::NDecimal::TUint128 BytesToDecimal(const TString& bytes) { + return Uint128FromString(bytes, 10); } - bool IsValidDecimal(const TString& bytes) { - if (bytes.empty()) - return false; - if (bytes.size() > 1 && (bytes[0] < '1' || bytes[0] > '9')) - return false; + bool IsValidDecimal(const TString& bytes) { + if (bytes.empty()) + return false; + if (bytes.size() > 1 && (bytes[0] < '1' || bytes[0] > '9')) + return false; static const TString UI128_MAX = "340282366920938463463374607431768211455"; - if (bytes.size() > UI128_MAX.size() || (bytes.size() == UI128_MAX.size() && bytes > UI128_MAX)) { + if (bytes.size() > UI128_MAX.size() || (bytes.size() == UI128_MAX.size() && bytes > UI128_MAX)) { return false; } - for (auto& c : bytes) { - if (c < '0' || c > '9') - return false; - } - return true; + for (auto& c : bytes) { + if (c < '0' || c > '9') + return false; + } + return true; + } + + NYql::NDecimal::TUint128 HexBytesToDecimal(const TString& hex) { + return Uint128FromString(hex, 16); } - NYql::NDecimal::TUint128 HexBytesToDecimal(const TString& hex) { - return Uint128FromString(hex, 16); + ui32 ShardFromDecimal(const NYql::NDecimal::TUint128& decimal, ui32 totalShardsCount) { + return ShardFromUint128(decimal, totalShardsCount); } - ui32 ShardFromDecimal(const NYql::NDecimal::TUint128& decimal, ui32 totalShardsCount) { - return ShardFromUint128(decimal, totalShardsCount); - } - THashKeyRange RangeFromShardNumber(ui32 shardNumber, ui32 shardCount) { Y_ENSURE(shardNumber < shardCount); NYql::NDecimal::TUint128 max = -1; if (shardCount == 1) { - return {0, max}; + return {0, max}; } NYql::NDecimal::TUint128 slice = max / shardCount; NYql::NDecimal::TUint128 left = NYql::NDecimal::TUint128(shardNumber) * slice; NYql::NDecimal::TUint128 right = shardNumber + 1 == shardCount ? max : NYql::NDecimal::TUint128(shardNumber + 1) * slice - NYql::NDecimal::TUint128(1); - return {left, right}; + return {left, right}; } } diff --git a/ydb/services/lib/sharding/sharding.h b/ydb/services/lib/sharding/sharding.h index 8b2e5c2fdb..257ee8d2be 100644 --- a/ydb/services/lib/sharding/sharding.h +++ b/ydb/services/lib/sharding/sharding.h @@ -1,21 +1,21 @@ #pragma once #include <ydb/library/yql/public/decimal/yql_decimal.h> - + #include <util/generic/string.h> namespace NKikimr::NDataStreams::V1 { struct THashKeyRange { - NYql::NDecimal::TUint128 Start; - NYql::NDecimal::TUint128 End; + NYql::NDecimal::TUint128 Start; + NYql::NDecimal::TUint128 End; }; THashKeyRange RangeFromShardNumber(ui32 shardNumber, ui32 shardCount); TString GetShardName(ui32 index); - NYql::NDecimal::TUint128 HexBytesToDecimal(const TString &hex); - ui32 ShardFromDecimal(const NYql::NDecimal::TUint128 &decimal, ui32 totalShardsCount); - NYql::NDecimal::TUint128 BytesToDecimal(const TString &bytes); - bool IsValidDecimal(const TString& bytes); - TString Uint128ToDecimalString(NYql::NDecimal::TUint128 decimal, const NYql::NDecimal::TUint128& base = 10); -} + NYql::NDecimal::TUint128 HexBytesToDecimal(const TString &hex); + ui32 ShardFromDecimal(const NYql::NDecimal::TUint128 &decimal, ui32 totalShardsCount); + NYql::NDecimal::TUint128 BytesToDecimal(const TString &bytes); + bool IsValidDecimal(const TString& bytes); + TString Uint128ToDecimalString(NYql::NDecimal::TUint128 decimal, const NYql::NDecimal::TUint128& base = 10); +} diff --git a/ydb/services/persqueue_cluster_discovery/cluster_discovery_service.cpp b/ydb/services/persqueue_cluster_discovery/cluster_discovery_service.cpp index d9a707aa11..aabd11d4ca 100644 --- a/ydb/services/persqueue_cluster_discovery/cluster_discovery_service.cpp +++ b/ydb/services/persqueue_cluster_discovery/cluster_discovery_service.cpp @@ -76,16 +76,16 @@ private: return "NULL"; } - bool IsHealthy(bool useLocalEnabled = false) const { - bool isLocalEnabled = false; - if (ClustersList) { - for (const auto& cluster : ClustersList->Clusters) { - if (cluster.IsLocal) - isLocalEnabled = cluster.IsEnabled; - } - - } - return ClustersList && DatacenterClassifier && (isLocalEnabled || !useLocalEnabled); + bool IsHealthy(bool useLocalEnabled = false) const { + bool isLocalEnabled = false; + if (ClustersList) { + for (const auto& cluster : ClustersList->Clusters) { + if (cluster.IsLocal) + isLocalEnabled = cluster.IsEnabled; + } + + } + return ClustersList && DatacenterClassifier && (isLocalEnabled || !useLocalEnabled); } TString MakeReport() const { @@ -121,12 +121,12 @@ private: TStringBuilder responseContent; if (path.EndsWith("/health")) { - static const char HTTPNOTAVAIL_H[] = "HTTP/1.1 418 I'm a teapot\r\nConnection: Close\r\n\r\nDiscovery service is disabled on the node\r\n"; - responseContent << (IsHealthy() ? NMonitoring::HTTPOKTEXT : HTTPNOTAVAIL_H) << "Service statuses: 200 - OK, 418 - DISABLED"; - } else if (path.EndsWith("/ping")) { - static const char HTTPNOTAVAIL_P[] = "HTTP/1.1 418 I'm a teapot\r\nConnection: Close\r\n\r\nDiscovery service is disabled on the node and local cluster is disabled\r\n"; - responseContent << (IsHealthy(true) ? NMonitoring::HTTPOKTEXT : HTTPNOTAVAIL_P) << "Service statuses: 200 - OK, 418 - DISABLED"; - } else{ + static const char HTTPNOTAVAIL_H[] = "HTTP/1.1 418 I'm a teapot\r\nConnection: Close\r\n\r\nDiscovery service is disabled on the node\r\n"; + responseContent << (IsHealthy() ? NMonitoring::HTTPOKTEXT : HTTPNOTAVAIL_H) << "Service statuses: 200 - OK, 418 - DISABLED"; + } else if (path.EndsWith("/ping")) { + static const char HTTPNOTAVAIL_P[] = "HTTP/1.1 418 I'm a teapot\r\nConnection: Close\r\n\r\nDiscovery service is disabled on the node and local cluster is disabled\r\n"; + responseContent << (IsHealthy(true) ? NMonitoring::HTTPOKTEXT : HTTPNOTAVAIL_P) << "Service statuses: 200 - OK, 418 - DISABLED"; + } else{ responseContent << NMonitoring::HTTPOKTEXT << MakeReport(); } diff --git a/ydb/services/persqueue_cluster_discovery/cluster_discovery_service_ut.cpp b/ydb/services/persqueue_cluster_discovery/cluster_discovery_service_ut.cpp index a506ed63b8..34721e262d 100644 --- a/ydb/services/persqueue_cluster_discovery/cluster_discovery_service_ut.cpp +++ b/ydb/services/persqueue_cluster_discovery/cluster_discovery_service_ut.cpp @@ -475,7 +475,7 @@ Y_UNIT_TEST_SUITE(TPQCDTest) { ClusterInfo c1Info; c1Info.set_name("dc1"); - c1Info.set_endpoint("localhost"); + c1Info.set_endpoint("localhost"); c1Info.set_available(true); ClusterInfo c2Info; diff --git a/ydb/services/persqueue_v1/grpc_pq_actor.h b/ydb/services/persqueue_v1/grpc_pq_actor.h index 22e0703ad3..65c8aca1db 100644 --- a/ydb/services/persqueue_v1/grpc_pq_actor.h +++ b/ydb/services/persqueue_v1/grpc_pq_actor.h @@ -6,19 +6,19 @@ #include <ydb/core/grpc_services/rpc_deferrable.h> #include <ydb/core/grpc_services/rpc_calls.h> - + #include <ydb/core/client/server/msgbus_server_pq_metacache.h> #include <ydb/core/client/server/msgbus_server_persqueue.h> - + #include <ydb/core/base/events.h> #include <ydb/core/tx/scheme_cache/scheme_cache.h> #include <ydb/core/protos/grpc_pq_old.pb.h> #include <ydb/core/protos/pqconfig.pb.h> - -#include <library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h> - +#include <library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h> + + #include <ydb/core/persqueue/events/global.h> #include <ydb/core/persqueue/writer/writer.h> #include <ydb/core/persqueue/percentile_counter.h> @@ -27,127 +27,127 @@ #include <ydb/core/tx/tx_proxy/proxy.h> #include <ydb/public/lib/base/msgbus_status.h> #include <ydb/core/kqp/kqp.h> - + #include <ydb/library/persqueue/topic_parser/topic_parser.h> - + #include <ydb/services/lib/actors/pq_schema_actor.h> #include <ydb/services/lib/actors/type_definitions.h> - + #include <util/generic/guid.h> #include <util/system/compiler.h> namespace NKikimr::NGRpcProxy::V1 { -using namespace Ydb; - -using namespace NKikimr::NGRpcService; - -PersQueue::ErrorCode::ErrorCode ConvertOldCode(const NPersQueue::NErrorCode::EErrorCode code); - -void FillIssue(Ydb::Issue::IssueMessage* issue, const PersQueue::ErrorCode::ErrorCode errorCode, const TString& errorReason); - -Ydb::StatusIds::StatusCode ConvertPersQueueInternalCodeToStatus(const PersQueue::ErrorCode::ErrorCode code); - -const TString& TopicPrefix(const TActorContext& ctx); - -static const TDuration CHECK_ACL_DELAY = TDuration::Minutes(5); - +using namespace Ydb; + +using namespace NKikimr::NGRpcService; + +PersQueue::ErrorCode::ErrorCode ConvertOldCode(const NPersQueue::NErrorCode::EErrorCode code); + +void FillIssue(Ydb::Issue::IssueMessage* issue, const PersQueue::ErrorCode::ErrorCode errorCode, const TString& errorReason); + +Ydb::StatusIds::StatusCode ConvertPersQueueInternalCodeToStatus(const PersQueue::ErrorCode::ErrorCode code); + +const TString& TopicPrefix(const TActorContext& ctx); + +static const TDuration CHECK_ACL_DELAY = TDuration::Minutes(5); + // Codec ID size in bytes constexpr ui32 CODEC_ID_SIZE = 1; - + template<typename TItem0, typename... TItems> bool AllEqual(const TItem0& item0, const TItems&... items) { return ((items == item0) && ... && true); } - -static inline bool InternalErrorCode(PersQueue::ErrorCode::ErrorCode errorCode) { - switch(errorCode) { - case PersQueue::ErrorCode::UNKNOWN_TOPIC: - case PersQueue::ErrorCode::ERROR: - case PersQueue::ErrorCode::INITIALIZING: - case PersQueue::ErrorCode::OVERLOAD: - case PersQueue::ErrorCode::WRITE_ERROR_DISK_IS_FULL: - return true; - default: - return false; - } - return false; -} - -struct TPartitionId { + +static inline bool InternalErrorCode(PersQueue::ErrorCode::ErrorCode errorCode) { + switch(errorCode) { + case PersQueue::ErrorCode::UNKNOWN_TOPIC: + case PersQueue::ErrorCode::ERROR: + case PersQueue::ErrorCode::INITIALIZING: + case PersQueue::ErrorCode::OVERLOAD: + case PersQueue::ErrorCode::WRITE_ERROR_DISK_IS_FULL: + return true; + default: + return false; + } + return false; +} + +struct TPartitionId { NPersQueue::TConverterPtr TopicConverter; - ui64 Partition; - ui64 AssignId; - - bool operator < (const TPartitionId& rhs) const { + ui64 Partition; + ui64 AssignId; + + bool operator < (const TPartitionId& rhs) const { return std::make_tuple(AssignId, Partition, TopicConverter->GetClientsideName()) < std::make_tuple(rhs.AssignId, rhs.Partition, rhs.TopicConverter->GetClientsideName()); - } -}; - - - -IOutputStream& operator <<(IOutputStream& out, const TPartitionId& partId); - -struct TCommitCookie { - ui64 AssignId; - ui64 Cookie; -}; - - -struct TEvPQProxy { + } +}; + + + +IOutputStream& operator <<(IOutputStream& out, const TPartitionId& partId); + +struct TCommitCookie { + ui64 AssignId; + ui64 Cookie; +}; + + +struct TEvPQProxy { enum EEv { EvWriteInit = EventSpaceBegin(TKikimrEvents::ES_PQ_PROXY_NEW), // TODO: Replace 'NEW' with version or something EvWrite, - EvDone, - EvReadInit, - EvRead, - EvCloseSession, - EvPartitionReady, - EvReadResponse, - EvCommitCookie, - EvCommitDone, - EvStartRead, - EvReleasePartition, - EvReleased, - EvPartitionReleased, - EvLockPartition, - EvRestartPipe, - EvDieCommand, - EvPartitionStatus, - EvAuth, - EvReadSessionStatus, - EvReadSessionStatusResponse, - EvAuthResultOk, - EvUpdateClusters, - EvQueryCompiled, - EvSessionDead, + EvDone, + EvReadInit, + EvRead, + EvCloseSession, + EvPartitionReady, + EvReadResponse, + EvCommitCookie, + EvCommitDone, + EvStartRead, + EvReleasePartition, + EvReleased, + EvPartitionReleased, + EvLockPartition, + EvRestartPipe, + EvDieCommand, + EvPartitionStatus, + EvAuth, + EvReadSessionStatus, + EvReadSessionStatusResponse, + EvAuthResultOk, + EvUpdateClusters, + EvQueryCompiled, + EvSessionDead, EvSessionSetPreferredCluster, - EvScheduleUpdateClusters, - EvDeadlineExceeded, - EvGetStatus, - EvUpdateToken, - EvCommitRange, - EvEnd + EvScheduleUpdateClusters, + EvDeadlineExceeded, + EvGetStatus, + EvUpdateToken, + EvCommitRange, + EvEnd }; - - struct TEvReadSessionStatus : public TEventPB<TEvReadSessionStatus, NKikimrPQ::TReadSessionStatus, EvReadSessionStatus> { - }; - - struct TEvReadSessionStatusResponse : public TEventPB<TEvReadSessionStatusResponse, NKikimrPQ::TReadSessionStatusResponse, EvReadSessionStatusResponse> { - }; - - - struct TEvAuthResultOk : public NActors::TEventLocal<TEvAuthResultOk, EvAuthResultOk> { + + struct TEvReadSessionStatus : public TEventPB<TEvReadSessionStatus, NKikimrPQ::TReadSessionStatus, EvReadSessionStatus> { + }; + + struct TEvReadSessionStatusResponse : public TEventPB<TEvReadSessionStatusResponse, NKikimrPQ::TReadSessionStatusResponse, EvReadSessionStatusResponse> { + }; + + + struct TEvAuthResultOk : public NActors::TEventLocal<TEvAuthResultOk, EvAuthResultOk> { TEvAuthResultOk(const TTopicTabletsPairs&& topicAndTablets) - : TopicAndTablets(std::move(topicAndTablets)) - { } - + : TopicAndTablets(std::move(topicAndTablets)) + { } + TTopicTabletsPairs TopicAndTablets; - }; - + }; + struct TEvSessionSetPreferredCluster : public NActors::TEventLocal<TEvSessionSetPreferredCluster, EvSessionSetPreferredCluster> { TEvSessionSetPreferredCluster(const ui64 cookie, const TString& preferredCluster) : Cookie(cookie) @@ -156,94 +156,94 @@ struct TEvPQProxy { const ui64 Cookie; const TString PreferredCluster; }; - - struct TEvSessionDead : public NActors::TEventLocal<TEvSessionDead, EvSessionDead> { - TEvSessionDead(const ui64 cookie) - : Cookie(cookie) - { } - - const ui64 Cookie; - }; - - struct TEvScheduleUpdateClusters : public NActors::TEventLocal<TEvScheduleUpdateClusters, EvScheduleUpdateClusters> { - TEvScheduleUpdateClusters() - { } - }; - - - struct TEvUpdateClusters : public NActors::TEventLocal<TEvUpdateClusters, EvUpdateClusters> { - TEvUpdateClusters(const TString& localCluster, bool enabled, const TVector<TString>& clusters) - : LocalCluster(localCluster) - , Enabled(enabled) - , Clusters(clusters) - { } - - const TString LocalCluster; - const bool Enabled; - const TVector<TString> Clusters; - }; - - struct TEvQueryCompiled : public NActors::TEventLocal<TEvQueryCompiled, EvQueryCompiled> { - TEvQueryCompiled(const TString& selectQ, const TString& updateQ, const TString& deleteQ) - : SelectQ(selectQ) - , UpdateQ(updateQ) - , DeleteQ(deleteQ) - { } - - const TString SelectQ, UpdateQ, DeleteQ; - }; - - - - struct TEvWriteInit : public NActors::TEventLocal<TEvWriteInit, EvWriteInit> { + + struct TEvSessionDead : public NActors::TEventLocal<TEvSessionDead, EvSessionDead> { + TEvSessionDead(const ui64 cookie) + : Cookie(cookie) + { } + + const ui64 Cookie; + }; + + struct TEvScheduleUpdateClusters : public NActors::TEventLocal<TEvScheduleUpdateClusters, EvScheduleUpdateClusters> { + TEvScheduleUpdateClusters() + { } + }; + + + struct TEvUpdateClusters : public NActors::TEventLocal<TEvUpdateClusters, EvUpdateClusters> { + TEvUpdateClusters(const TString& localCluster, bool enabled, const TVector<TString>& clusters) + : LocalCluster(localCluster) + , Enabled(enabled) + , Clusters(clusters) + { } + + const TString LocalCluster; + const bool Enabled; + const TVector<TString> Clusters; + }; + + struct TEvQueryCompiled : public NActors::TEventLocal<TEvQueryCompiled, EvQueryCompiled> { + TEvQueryCompiled(const TString& selectQ, const TString& updateQ, const TString& deleteQ) + : SelectQ(selectQ) + , UpdateQ(updateQ) + , DeleteQ(deleteQ) + { } + + const TString SelectQ, UpdateQ, DeleteQ; + }; + + + + struct TEvWriteInit : public NActors::TEventLocal<TEvWriteInit, EvWriteInit> { TEvWriteInit(PersQueue::V1::StreamingWriteClientMessage&& req, const TString& peerName) - : Request(std::move(req)) - , PeerName(peerName) + : Request(std::move(req)) + , PeerName(peerName) { } PersQueue::V1::StreamingWriteClientMessage Request; - TString PeerName; + TString PeerName; }; struct TEvWrite : public NActors::TEventLocal<TEvWrite, EvWrite> { explicit TEvWrite(PersQueue::V1::StreamingWriteClientMessage&& req) - : Request(std::move(req)) + : Request(std::move(req)) { } PersQueue::V1::StreamingWriteClientMessage Request; }; - struct TEvDone : public NActors::TEventLocal<TEvDone, EvDone> { - TEvDone() - { } - }; - - struct TEvReadInit : public NActors::TEventLocal<TEvReadInit, EvReadInit> { + struct TEvDone : public NActors::TEventLocal<TEvDone, EvDone> { + TEvDone() + { } + }; + + struct TEvReadInit : public NActors::TEventLocal<TEvReadInit, EvReadInit> { TEvReadInit(const PersQueue::V1::MigrationStreamingReadClientMessage& req, const TString& peerName) - : Request(req) - , PeerName(peerName) - { } - + : Request(req) + , PeerName(peerName) + { } + PersQueue::V1::MigrationStreamingReadClientMessage Request; - TString PeerName; - }; - - struct TEvRead : public NActors::TEventLocal<TEvRead, EvRead> { - explicit TEvRead(const TString& guid = CreateGuidAsString(), ui64 maxCount = 0, ui64 maxSize = 0, ui64 maxTimeLagMs = 0, ui64 readTimestampMs = 0) - : Guid(guid) - , MaxCount(maxCount) - , MaxSize(maxSize) - , MaxTimeLagMs(maxTimeLagMs) - , ReadTimestampMs(readTimestampMs) - { } - + TString PeerName; + }; + + struct TEvRead : public NActors::TEventLocal<TEvRead, EvRead> { + explicit TEvRead(const TString& guid = CreateGuidAsString(), ui64 maxCount = 0, ui64 maxSize = 0, ui64 maxTimeLagMs = 0, ui64 readTimestampMs = 0) + : Guid(guid) + , MaxCount(maxCount) + , MaxSize(maxSize) + , MaxTimeLagMs(maxTimeLagMs) + , ReadTimestampMs(readTimestampMs) + { } + const TString Guid; - ui64 MaxCount; - ui64 MaxSize; - ui64 MaxTimeLagMs; - ui64 ReadTimestampMs; - }; - + ui64 MaxCount; + ui64 MaxSize; + ui64 MaxTimeLagMs; + ui64 ReadTimestampMs; + }; + struct TEvUpdateToken : public NActors::TEventLocal<TEvUpdateToken, EvUpdateToken> { explicit TEvUpdateToken(PersQueue::V1::StreamingWriteClientMessage&& req) : Request(std::move(req)) @@ -252,211 +252,211 @@ struct TEvPQProxy { PersQueue::V1::StreamingWriteClientMessage Request; }; - struct TEvCloseSession : public NActors::TEventLocal<TEvCloseSession, EvCloseSession> { - TEvCloseSession(const TString& reason, const PersQueue::ErrorCode::ErrorCode errorCode) - : Reason(reason) - , ErrorCode(errorCode) - { } - - const TString Reason; - PersQueue::ErrorCode::ErrorCode ErrorCode; - }; - - struct TEvPartitionReady : public NActors::TEventLocal<TEvPartitionReady, EvPartitionReady> { - TEvPartitionReady(const TPartitionId& partition, const ui64 wTime, const ui64 sizeLag, - const ui64 readOffset, const ui64 endOffset) - : Partition(partition) - , WTime(wTime) - , SizeLag(sizeLag) - , ReadOffset(readOffset) - , EndOffset(endOffset) - { } - - const TPartitionId Partition; - ui64 WTime; - ui64 SizeLag; - ui64 ReadOffset; - ui64 EndOffset; - }; - - struct TEvReadResponse : public NActors::TEventLocal<TEvReadResponse, EvReadResponse> { + struct TEvCloseSession : public NActors::TEventLocal<TEvCloseSession, EvCloseSession> { + TEvCloseSession(const TString& reason, const PersQueue::ErrorCode::ErrorCode errorCode) + : Reason(reason) + , ErrorCode(errorCode) + { } + + const TString Reason; + PersQueue::ErrorCode::ErrorCode ErrorCode; + }; + + struct TEvPartitionReady : public NActors::TEventLocal<TEvPartitionReady, EvPartitionReady> { + TEvPartitionReady(const TPartitionId& partition, const ui64 wTime, const ui64 sizeLag, + const ui64 readOffset, const ui64 endOffset) + : Partition(partition) + , WTime(wTime) + , SizeLag(sizeLag) + , ReadOffset(readOffset) + , EndOffset(endOffset) + { } + + const TPartitionId Partition; + ui64 WTime; + ui64 SizeLag; + ui64 ReadOffset; + ui64 EndOffset; + }; + + struct TEvReadResponse : public NActors::TEventLocal<TEvReadResponse, EvReadResponse> { explicit TEvReadResponse(PersQueue::V1::MigrationStreamingReadServerMessage&& resp, ui64 nextReadOffset, bool fromDisk, TDuration waitQuotaTime) - : Response(std::move(resp)) + : Response(std::move(resp)) , NextReadOffset(nextReadOffset) - , FromDisk(fromDisk) + , FromDisk(fromDisk) , WaitQuotaTime(waitQuotaTime) - { } - + { } + PersQueue::V1::MigrationStreamingReadServerMessage Response; ui64 NextReadOffset; - bool FromDisk; + bool FromDisk; TDuration WaitQuotaTime; - }; - - struct TCommitCookie { - TVector<ui64> Cookies; - }; - - struct TCommitRange { - TVector<std::pair<ui64, ui64>> Ranges; - }; - - struct TEvCommitCookie : public NActors::TEventLocal<TEvCommitCookie, EvCommitCookie> { - explicit TEvCommitCookie(const ui64 assignId, const TCommitCookie&& commitInfo) - : AssignId(assignId) - , CommitInfo(std::move(commitInfo)) - { } - - ui64 AssignId; - TCommitCookie CommitInfo; - }; - - struct TEvCommitRange : public NActors::TEventLocal<TEvCommitRange, EvCommitRange> { - explicit TEvCommitRange(const ui64 assignId, const TCommitRange&& commitInfo) - : AssignId(assignId) - , CommitInfo(std::move(commitInfo)) - { } - - ui64 AssignId; - TCommitRange CommitInfo; - }; - - - - - struct TEvAuth : public NActors::TEventLocal<TEvAuth, EvAuth> { - TEvAuth(const TString& auth) - : Auth(auth) - { } - - TString Auth; - }; - - struct TEvStartRead : public NActors::TEventLocal<TEvStartRead, EvStartRead> { - TEvStartRead(const TPartitionId& partition, ui64 readOffset, ui64 commitOffset, bool verifyReadOffset) - : Partition(partition) - , ReadOffset(readOffset) - , CommitOffset(commitOffset) - , VerifyReadOffset(verifyReadOffset) + }; + + struct TCommitCookie { + TVector<ui64> Cookies; + }; + + struct TCommitRange { + TVector<std::pair<ui64, ui64>> Ranges; + }; + + struct TEvCommitCookie : public NActors::TEventLocal<TEvCommitCookie, EvCommitCookie> { + explicit TEvCommitCookie(const ui64 assignId, const TCommitCookie&& commitInfo) + : AssignId(assignId) + , CommitInfo(std::move(commitInfo)) + { } + + ui64 AssignId; + TCommitCookie CommitInfo; + }; + + struct TEvCommitRange : public NActors::TEventLocal<TEvCommitRange, EvCommitRange> { + explicit TEvCommitRange(const ui64 assignId, const TCommitRange&& commitInfo) + : AssignId(assignId) + , CommitInfo(std::move(commitInfo)) + { } + + ui64 AssignId; + TCommitRange CommitInfo; + }; + + + + + struct TEvAuth : public NActors::TEventLocal<TEvAuth, EvAuth> { + TEvAuth(const TString& auth) + : Auth(auth) + { } + + TString Auth; + }; + + struct TEvStartRead : public NActors::TEventLocal<TEvStartRead, EvStartRead> { + TEvStartRead(const TPartitionId& partition, ui64 readOffset, ui64 commitOffset, bool verifyReadOffset) + : Partition(partition) + , ReadOffset(readOffset) + , CommitOffset(commitOffset) + , VerifyReadOffset(verifyReadOffset) , Generation(0) - { } - - const TPartitionId Partition; - ui64 ReadOffset; - ui64 CommitOffset; - bool VerifyReadOffset; - ui64 Generation; - }; - - struct TEvReleased : public NActors::TEventLocal<TEvReleased, EvReleased> { - TEvReleased(const TPartitionId& partition) - : Partition(partition) - { } - - const TPartitionId Partition; - }; - - struct TEvGetStatus : public NActors::TEventLocal<TEvGetStatus, EvGetStatus> { - TEvGetStatus(const TPartitionId& partition) - : Partition(partition) - { } - - const TPartitionId Partition; - }; - - - struct TEvCommitDone : public NActors::TEventLocal<TEvCommitDone, EvCommitDone> { - explicit TEvCommitDone(const ui64 assignId, const ui64 startCookie, const ui64 lastCookie, const ui64 offset) - : AssignId(assignId) - , StartCookie(startCookie) - , LastCookie(lastCookie) - , Offset(offset) - { } - - ui64 AssignId; - ui64 StartCookie; - ui64 LastCookie; - ui64 Offset; - }; - - struct TEvReleasePartition : public NActors::TEventLocal<TEvReleasePartition, EvReleasePartition> { - TEvReleasePartition() - { } - }; - - struct TEvLockPartition : public NActors::TEventLocal<TEvLockPartition, EvLockPartition> { - explicit TEvLockPartition(const ui64 readOffset, const ui64 commitOffset, bool verifyReadOffset, bool startReading) - : ReadOffset(readOffset) - , CommitOffset(commitOffset) - , VerifyReadOffset(verifyReadOffset) - , StartReading(startReading) - { } - - ui64 ReadOffset; - ui64 CommitOffset; - bool VerifyReadOffset; - bool StartReading; - }; - - - struct TEvPartitionReleased : public NActors::TEventLocal<TEvPartitionReleased, EvPartitionReleased> { - TEvPartitionReleased(const TPartitionId& partition) - : Partition(partition) - { } - TPartitionId Partition; - }; - - - struct TEvRestartPipe : public NActors::TEventLocal<TEvRestartPipe, EvRestartPipe> { - TEvRestartPipe() - { } - }; - - struct TEvDeadlineExceeded : public NActors::TEventLocal<TEvDeadlineExceeded, EvDeadlineExceeded> { - TEvDeadlineExceeded(ui64 cookie) - : Cookie(cookie) - { } - - ui64 Cookie; - }; - - - struct TEvDieCommand : public NActors::TEventLocal<TEvDieCommand, EvDieCommand> { - TEvDieCommand(const TString& reason, const PersQueue::ErrorCode::ErrorCode errorCode) - : Reason(reason) + { } + + const TPartitionId Partition; + ui64 ReadOffset; + ui64 CommitOffset; + bool VerifyReadOffset; + ui64 Generation; + }; + + struct TEvReleased : public NActors::TEventLocal<TEvReleased, EvReleased> { + TEvReleased(const TPartitionId& partition) + : Partition(partition) + { } + + const TPartitionId Partition; + }; + + struct TEvGetStatus : public NActors::TEventLocal<TEvGetStatus, EvGetStatus> { + TEvGetStatus(const TPartitionId& partition) + : Partition(partition) + { } + + const TPartitionId Partition; + }; + + + struct TEvCommitDone : public NActors::TEventLocal<TEvCommitDone, EvCommitDone> { + explicit TEvCommitDone(const ui64 assignId, const ui64 startCookie, const ui64 lastCookie, const ui64 offset) + : AssignId(assignId) + , StartCookie(startCookie) + , LastCookie(lastCookie) + , Offset(offset) + { } + + ui64 AssignId; + ui64 StartCookie; + ui64 LastCookie; + ui64 Offset; + }; + + struct TEvReleasePartition : public NActors::TEventLocal<TEvReleasePartition, EvReleasePartition> { + TEvReleasePartition() + { } + }; + + struct TEvLockPartition : public NActors::TEventLocal<TEvLockPartition, EvLockPartition> { + explicit TEvLockPartition(const ui64 readOffset, const ui64 commitOffset, bool verifyReadOffset, bool startReading) + : ReadOffset(readOffset) + , CommitOffset(commitOffset) + , VerifyReadOffset(verifyReadOffset) + , StartReading(startReading) + { } + + ui64 ReadOffset; + ui64 CommitOffset; + bool VerifyReadOffset; + bool StartReading; + }; + + + struct TEvPartitionReleased : public NActors::TEventLocal<TEvPartitionReleased, EvPartitionReleased> { + TEvPartitionReleased(const TPartitionId& partition) + : Partition(partition) + { } + TPartitionId Partition; + }; + + + struct TEvRestartPipe : public NActors::TEventLocal<TEvRestartPipe, EvRestartPipe> { + TEvRestartPipe() + { } + }; + + struct TEvDeadlineExceeded : public NActors::TEventLocal<TEvDeadlineExceeded, EvDeadlineExceeded> { + TEvDeadlineExceeded(ui64 cookie) + : Cookie(cookie) + { } + + ui64 Cookie; + }; + + + struct TEvDieCommand : public NActors::TEventLocal<TEvDieCommand, EvDieCommand> { + TEvDieCommand(const TString& reason, const PersQueue::ErrorCode::ErrorCode errorCode) + : Reason(reason) , ErrorCode(errorCode) - { } - - TString Reason; - PersQueue::ErrorCode::ErrorCode ErrorCode; - }; - - struct TEvPartitionStatus : public NActors::TEventLocal<TEvPartitionStatus, EvPartitionStatus> { - TEvPartitionStatus(const TPartitionId& partition, const ui64 offset, const ui64 endOffset, const ui64 writeTimestampEstimateMs, bool init = true) - : Partition(partition) - , Offset(offset) - , EndOffset(endOffset) - , WriteTimestampEstimateMs(writeTimestampEstimateMs) - , Init(init) - { } - - TPartitionId Partition; - ui64 Offset; - ui64 EndOffset; - ui64 WriteTimestampEstimateMs; - bool Init; - }; - + { } + + TString Reason; + PersQueue::ErrorCode::ErrorCode ErrorCode; + }; + + struct TEvPartitionStatus : public NActors::TEventLocal<TEvPartitionStatus, EvPartitionStatus> { + TEvPartitionStatus(const TPartitionId& partition, const ui64 offset, const ui64 endOffset, const ui64 writeTimestampEstimateMs, bool init = true) + : Partition(partition) + , Offset(offset) + , EndOffset(endOffset) + , WriteTimestampEstimateMs(writeTimestampEstimateMs) + , Init(init) + { } + + TPartitionId Partition; + ui64 Offset; + ui64 EndOffset; + ui64 WriteTimestampEstimateMs; + bool Init; + }; + }; - - -/// WRITE ACTOR + + +/// WRITE ACTOR class TWriteSessionActor : public NActors::TActorBootstrapped<TWriteSessionActor> { using IContext = NGRpcServer::IGRpcStreamingContext<PersQueue::V1::StreamingWriteClientMessage, PersQueue::V1::StreamingWriteServerMessage>; using TEvDescribeTopicsResponse = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsResponse; using TEvDescribeTopicsRequest = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsRequest; - + public: TWriteSessionActor(NKikimr::NGRpcService::TEvStreamPQWriteRequest* request, const ui64 cookie, const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, @@ -470,127 +470,127 @@ public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_WRITE; } private: - STFUNC(StateFunc) { + STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { - CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup); - - HFunc(IContext::TEvReadFinished, Handle); - HFunc(IContext::TEvWriteFinished, Handle); - CFunc(IContext::TEvNotifiedWhenDone::EventType, HandleDone); - HFunc(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse, Handle); - - HFunc(TEvPQProxy::TEvDieCommand, HandlePoison) - HFunc(TEvPQProxy::TEvWriteInit, Handle) - HFunc(TEvPQProxy::TEvWrite, Handle) + CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup); + + HFunc(IContext::TEvReadFinished, Handle); + HFunc(IContext::TEvWriteFinished, Handle); + CFunc(IContext::TEvNotifiedWhenDone::EventType, HandleDone); + HFunc(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse, Handle); + + HFunc(TEvPQProxy::TEvDieCommand, HandlePoison) + HFunc(TEvPQProxy::TEvWriteInit, Handle) + HFunc(TEvPQProxy::TEvWrite, Handle) HFunc(TEvPQProxy::TEvUpdateToken, Handle) - HFunc(TEvPQProxy::TEvDone, Handle) - HFunc(TEvPersQueue::TEvGetPartitionIdForWriteResponse, Handle) - + HFunc(TEvPQProxy::TEvDone, Handle) + HFunc(TEvPersQueue::TEvGetPartitionIdForWriteResponse, Handle) + HFunc(TEvDescribeTopicsResponse, Handle) - + HFunc(NPQ::TEvPartitionWriter::TEvInitResult, Handle); HFunc(NPQ::TEvPartitionWriter::TEvWriteAccepted, Handle); HFunc(NPQ::TEvPartitionWriter::TEvWriteResponse, Handle); HFunc(NPQ::TEvPartitionWriter::TEvDisconnected, Handle); - HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); - HFunc(TEvTabletPipe::TEvClientConnected, Handle); - - HFunc(NKqp::TEvKqp::TEvQueryResponse, Handle); - HFunc(NKqp::TEvKqp::TEvProcessResponse, Handle); - + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + + HFunc(NKqp::TEvKqp::TEvQueryResponse, Handle); + HFunc(NKqp::TEvKqp::TEvProcessResponse, Handle); + default: break; }; } - - void Handle(IContext::TEvReadFinished::TPtr& ev, const TActorContext &ctx); - void Handle(IContext::TEvWriteFinished::TPtr& ev, const TActorContext &ctx); - void HandleDone(const TActorContext &ctx); - - void Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev, const TActorContext &ctx); - - - void Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr &ev, const TActorContext &ctx); - void Handle(NKqp::TEvKqp::TEvProcessResponse::TPtr &ev, const TActorContext &ctx); - - void CheckACL(const TActorContext& ctx); + + void Handle(IContext::TEvReadFinished::TPtr& ev, const TActorContext &ctx); + void Handle(IContext::TEvWriteFinished::TPtr& ev, const TActorContext &ctx); + void HandleDone(const TActorContext &ctx); + + void Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev, const TActorContext &ctx); + + + void Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr &ev, const TActorContext &ctx); + void Handle(NKqp::TEvKqp::TEvProcessResponse::TPtr &ev, const TActorContext &ctx); + + void CheckACL(const TActorContext& ctx); // Requests fresh ACL from 'SchemeCache' void InitCheckSchema(const TActorContext& ctx, bool needWaitSchema = false); - void Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvWrite::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvWrite::TPtr& ev, const NActors::TActorContext& ctx); void Handle(TEvPQProxy::TEvUpdateToken::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvDone::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvDone::TPtr& ev, const NActors::TActorContext& ctx); void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx); void Handle(TEvDescribeTopicsResponse::TPtr& ev, const TActorContext& ctx); - void LogSession(const TActorContext& ctx); - - void DiscoverPartition(const NActors::TActorContext& ctx); - void UpdatePartition(const NActors::TActorContext& ctx); - void RequestNextPartition(const NActors::TActorContext& ctx); - - void ProceedPartition(const ui32 partition, const NActors::TActorContext& ctx); + void LogSession(const TActorContext& ctx); + + void DiscoverPartition(const NActors::TActorContext& ctx); + void UpdatePartition(const NActors::TActorContext& ctx); + void RequestNextPartition(const NActors::TActorContext& ctx); + + void ProceedPartition(const ui32 partition, const NActors::TActorContext& ctx); THolder<NKqp::TEvKqp::TEvQueryRequest> MakeUpdateSourceIdMetadataRequest(const NActors::TActorContext& ctx); void InitCheckACL(const TActorContext& ctx); - + void Handle(NPQ::TEvPartitionWriter::TEvInitResult::TPtr& ev, const TActorContext& ctx); void Handle(NPQ::TEvPartitionWriter::TEvWriteAccepted::TPtr& ev, const TActorContext& ctx); void Handle(NPQ::TEvPartitionWriter::TEvWriteResponse::TPtr& ev, const TActorContext& ctx); void Handle(NPQ::TEvPartitionWriter::TEvDisconnected::TPtr& ev, const TActorContext& ctx); - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPersQueue::TEvGetPartitionIdForWriteResponse::TPtr& ev, const NActors::TActorContext& ctx); - - void HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvGetPartitionIdForWriteResponse::TPtr& ev, const NActors::TActorContext& ctx); + + void HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const NActors::TActorContext& ctx); void HandleWakeup(const NActors::TActorContext& ctx); - void CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx); - - void CheckFinish(const NActors::TActorContext& ctx); - - void GenerateNextWriteRequest(const NActors::TActorContext& ctx); - + void CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx); + + void CheckFinish(const NActors::TActorContext& ctx); + + void GenerateNextWriteRequest(const NActors::TActorContext& ctx); + void SetupCounters(); void SetupCounters(const TString& cloudId, const TString& dbId, const TString& folderId); - + private: - std::unique_ptr<NKikimr::NGRpcService::TEvStreamPQWriteRequest> Request; - - enum EState { - ES_CREATED = 1, - ES_WAIT_SCHEME_1 = 2, - ES_WAIT_SCHEME_2 = 3, - ES_WAIT_TABLE_REQUEST_1 = 4, - ES_WAIT_NEXT_PARTITION = 5, - ES_WAIT_TABLE_REQUEST_2 = 6, - ES_WAIT_TABLE_REQUEST_3 = 7, + std::unique_ptr<NKikimr::NGRpcService::TEvStreamPQWriteRequest> Request; + + enum EState { + ES_CREATED = 1, + ES_WAIT_SCHEME_1 = 2, + ES_WAIT_SCHEME_2 = 3, + ES_WAIT_TABLE_REQUEST_1 = 4, + ES_WAIT_NEXT_PARTITION = 5, + ES_WAIT_TABLE_REQUEST_2 = 6, + ES_WAIT_TABLE_REQUEST_3 = 7, ES_WAIT_WRITER_INIT = 8, ES_INITED = 9 - //TODO: filter - }; - - EState State; + //TODO: filter + }; + + EState State; TActorId SchemeCache; TActorId NewSchemeCache; TActorId Writer; - - TString PeerName; - ui64 Cookie; - + + TString PeerName; + ui64 Cookie; + NPersQueue::TTopicsListController TopicsController; NPersQueue::TConverterPtr TopicConverter; - ui32 Partition; - ui32 PreferedPartition; + ui32 Partition; + ui32 PreferedPartition; // 'SourceId' is called 'MessageGroupId' since gRPC data plane API v1 TString SourceId; // TODO: Replace with 'MessageGroupId' everywhere - TString EscapedSourceId; + TString EscapedSourceId; ui32 Hash = 0; - - TString OwnerCookie; - TString UserAgent; - - ui32 NumReserveBytesRequests; - + + TString OwnerCookie; + TString UserAgent; + + ui32 NumReserveBytesRequests; + THolder<TAclWrapper> ACL; struct TWriteRequestBatchInfo: public TSimpleRefCount<TWriteRequestBatchInfo> { @@ -607,561 +607,561 @@ private: }; // Nonprocessed source client requests - std::deque<THolder<TEvPQProxy::TEvWrite>> Writes; - + std::deque<THolder<TEvPQProxy::TEvWrite>> Writes; + // Formed, but not sent, batch requests to partition actor std::deque<TWriteRequestBatchInfo::TPtr> FormedWrites; - + // Requests that is already sent to partition actor std::deque<TWriteRequestBatchInfo::TPtr> SentMessages; - bool WritesDone; - + bool WritesDone; + THashMap<ui32, ui64> PartitionToTablet; - - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - - NKikimr::NPQ::TMultiCounter BytesInflight; - NKikimr::NPQ::TMultiCounter BytesInflightTotal; - - ui64 BytesInflight_; - ui64 BytesInflightTotal_; - - bool NextRequestInited; - - NKikimr::NPQ::TMultiCounter SessionsCreated; - NKikimr::NPQ::TMultiCounter SessionsActive; - - NKikimr::NPQ::TMultiCounter Errors; - - ui64 NextRequestCookie; - - TIntrusivePtr<NACLib::TUserToken> Token; - TString Auth; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + NKikimr::NPQ::TMultiCounter BytesInflight; + NKikimr::NPQ::TMultiCounter BytesInflightTotal; + + ui64 BytesInflight_; + ui64 BytesInflightTotal_; + + bool NextRequestInited; + + NKikimr::NPQ::TMultiCounter SessionsCreated; + NKikimr::NPQ::TMultiCounter SessionsActive; + + NKikimr::NPQ::TMultiCounter Errors; + + ui64 NextRequestCookie; + + TIntrusivePtr<NACLib::TUserToken> Token; + TString Auth; // Got 'update_token_request', authentication or authorization in progress or 'update_token_response' is not sent yet. Only single 'update_token_request' is allowed inflight bool UpdateTokenInProgress; bool UpdateTokenAuthenticated; - bool ACLCheckInProgress; - bool FirstACLCheck; - bool RequestNotChecked; - TInstant LastACLCheckTimestamp; - TInstant LogSessionDeadline; - + bool ACLCheckInProgress; + bool FirstACLCheck; + bool RequestNotChecked; + TInstant LastACLCheckTimestamp; + TInstant LogSessionDeadline; + ui64 BalancerTabletId; TActorId PipeToBalancer; // PQ tablet configuration that we get at the time of session initialization NKikimrPQ::TPQTabletConfig InitialPQTabletConfig; - - NKikimrPQClient::TDataChunk InitMeta; + + NKikimrPQClient::TDataChunk InitMeta; TString LocalDC; - TString ClientDC; + TString ClientDC; TString SelectSourceIdQuery; TString UpdateSourceIdQuery; TInstant LastSourceIdUpdate; ui64 SourceIdCreateTime; bool SourceIdUpdateInfly; - - TVector<NPQ::TLabelsInfo> Aggr; - NKikimr::NPQ::TMultiCounter SLITotal; - NKikimr::NPQ::TMultiCounter SLIErrors; - TInstant StartTime; - NKikimr::NPQ::TPercentileCounter InitLatency; - NKikimr::NPQ::TMultiCounter SLIBigLatency; + + TVector<NPQ::TLabelsInfo> Aggr; + NKikimr::NPQ::TMultiCounter SLITotal; + NKikimr::NPQ::TMultiCounter SLIErrors; + TInstant StartTime; + NKikimr::NPQ::TPercentileCounter InitLatency; + NKikimr::NPQ::TMultiCounter SLIBigLatency; }; - -class TReadInitAndAuthActor : public NActors::TActorBootstrapped<TReadInitAndAuthActor> { + +class TReadInitAndAuthActor : public NActors::TActorBootstrapped<TReadInitAndAuthActor> { using TEvDescribeTopicsResponse = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsResponse; using TEvDescribeTopicsRequest = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsRequest; -public: +public: TReadInitAndAuthActor(const TActorContext& ctx, const TActorId& parentId, const TString& clientId, const ui64 cookie, const TString& session, const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TIntrusivePtr<NACLib::TUserToken> token, const NPersQueue::TTopicsToConverter& topics); - ~TReadInitAndAuthActor(); - - void Bootstrap(const NActors::TActorContext& ctx); - void Die(const NActors::TActorContext& ctx) override; - - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_READ; } - -private: - + ~TReadInitAndAuthActor(); + + void Bootstrap(const NActors::TActorContext& ctx); + void Die(const NActors::TActorContext& ctx) override; + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_READ; } + +private: + STRICT_STFUNC(StateFunc, HFunc(TEvDescribeTopicsResponse, HandleTopicsDescribeResponse) HFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, HandleClientSchemeCacheResponse) HFunc(NActors::TEvents::TEvPoisonPill, HandlePoison) ); - - void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr&, const NActors::TActorContext& ctx) { - Die(ctx); - } - - void CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code, const TActorContext& ctx); - - + + void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr&, const NActors::TActorContext& ctx) { + Die(ctx); + } + + void CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code, const TActorContext& ctx); + + bool ProcessTopicSchemeCacheResponse(const NSchemeCache::TSchemeCacheNavigate::TEntry& entry, THashMap<TString, TTopicHolder>::iterator topicsIter, const TActorContext& ctx); void HandleClientSchemeCacheResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx); void SendCacheNavigateRequest(const TActorContext& ctx, const TString& path); - + void HandleTopicsDescribeResponse(TEvDescribeTopicsResponse::TPtr& ev, const NActors::TActorContext& ctx); - void FinishInitialization(const NActors::TActorContext& ctx); + void FinishInitialization(const NActors::TActorContext& ctx); bool CheckTopicACL(const NSchemeCache::TSchemeCacheNavigate::TEntry& entry, const TString& topic, const TActorContext& ctx); void CheckClientACL(const TActorContext& ctx); - + bool CheckACLPermissionsForNavigate(const TIntrusivePtr<TSecurityObject>& secObject, const TString& path, NACLib::EAccessRights rights, const TString& errorTextWhenAccessDenied, const TActorContext& ctx); - -private: + +private: const TActorId ParentId; - const ui64 Cookie; - const TString Session; - + const ui64 Cookie; + const TString Session; + const TActorId MetaCacheId; const TActorId NewSchemeCache; - - const TString ClientId; - const TString ClientPath; - - TIntrusivePtr<NACLib::TUserToken> Token; - - THashMap<TString, TTopicHolder> Topics; // topic -> info - - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + const TString ClientId; + const TString ClientPath; + + TIntrusivePtr<NACLib::TUserToken> Token; + + THashMap<TString, TTopicHolder> Topics; // topic -> info + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; bool DoCheckACL; -}; - - -class TReadSessionActor : public TActorBootstrapped<TReadSessionActor> { +}; + + +class TReadSessionActor : public TActorBootstrapped<TReadSessionActor> { using IContext = NGRpcServer::IGRpcStreamingContext<PersQueue::V1::MigrationStreamingReadClientMessage, PersQueue::V1::MigrationStreamingReadServerMessage>; -public: +public: TReadSessionActor(NKikimr::NGRpcService::TEvStreamPQReadRequest* request, const ui64 cookie, const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const TMaybe<TString> clientDC, const NPersQueue::TTopicsListController& topicsHandler); - ~TReadSessionActor(); - - void Bootstrap(const NActors::TActorContext& ctx); - - void Die(const NActors::TActorContext& ctx) override; - + ~TReadSessionActor(); + + void Bootstrap(const NActors::TActorContext& ctx); + + void Die(const NActors::TActorContext& ctx) override; + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_READ; } - - - struct TTopicCounters { - NKikimr::NPQ::TMultiCounter PartitionsLocked; - NKikimr::NPQ::TMultiCounter PartitionsReleased; - NKikimr::NPQ::TMultiCounter PartitionsToBeReleased; - NKikimr::NPQ::TMultiCounter PartitionsToBeLocked; - NKikimr::NPQ::TMultiCounter PartitionsInfly; - NKikimr::NPQ::TMultiCounter Errors; - NKikimr::NPQ::TMultiCounter Commits; - NKikimr::NPQ::TMultiCounter WaitsForData; - - NKikimr::NPQ::TPercentileCounter CommitLatency; - NKikimr::NPQ::TMultiCounter SLIBigLatency; - NKikimr::NPQ::TMultiCounter SLITotal; - }; - -private: - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup) - - HFunc(IContext::TEvReadFinished, Handle); - HFunc(IContext::TEvWriteFinished, Handle); - CFunc(IContext::TEvNotifiedWhenDone::EventType, HandleDone); - HFunc(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse, Handle); - - HFunc(TEvPQProxy::TEvAuthResultOk, Handle); // form auth actor - - HFunc(TEvPQProxy::TEvDieCommand, HandlePoison) + + + struct TTopicCounters { + NKikimr::NPQ::TMultiCounter PartitionsLocked; + NKikimr::NPQ::TMultiCounter PartitionsReleased; + NKikimr::NPQ::TMultiCounter PartitionsToBeReleased; + NKikimr::NPQ::TMultiCounter PartitionsToBeLocked; + NKikimr::NPQ::TMultiCounter PartitionsInfly; + NKikimr::NPQ::TMultiCounter Errors; + NKikimr::NPQ::TMultiCounter Commits; + NKikimr::NPQ::TMultiCounter WaitsForData; + + NKikimr::NPQ::TPercentileCounter CommitLatency; + NKikimr::NPQ::TMultiCounter SLIBigLatency; + NKikimr::NPQ::TMultiCounter SLITotal; + }; + +private: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup) + + HFunc(IContext::TEvReadFinished, Handle); + HFunc(IContext::TEvWriteFinished, Handle); + CFunc(IContext::TEvNotifiedWhenDone::EventType, HandleDone); + HFunc(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse, Handle); + + HFunc(TEvPQProxy::TEvAuthResultOk, Handle); // form auth actor + + HFunc(TEvPQProxy::TEvDieCommand, HandlePoison) HFunc(TEvPQProxy::TEvReadInit, Handle) //from gRPC - HFunc(TEvPQProxy::TEvReadSessionStatus, Handle) // from read sessions info builder proxy + HFunc(TEvPQProxy::TEvReadSessionStatus, Handle) // from read sessions info builder proxy HFunc(TEvPQProxy::TEvRead, Handle) //from gRPC HFunc(TEvPQProxy::TEvDone, Handle) //from gRPC HFunc(TEvPQProxy::TEvCloseSession, Handle) //from partitionActor HFunc(TEvPQProxy::TEvPartitionReady, Handle) //from partitionActor - HFunc(TEvPQProxy::TEvPartitionReleased, Handle) //from partitionActor - - HFunc(TEvPQProxy::TEvReadResponse, Handle) //from partitionActor - HFunc(TEvPQProxy::TEvCommitCookie, Handle) //from gRPC - HFunc(TEvPQProxy::TEvCommitRange, Handle) //from gRPC - HFunc(TEvPQProxy::TEvStartRead, Handle) //from gRPC - HFunc(TEvPQProxy::TEvReleased, Handle) //from gRPC - HFunc(TEvPQProxy::TEvGetStatus, Handle) //from gRPC - HFunc(TEvPQProxy::TEvAuth, Handle) //from gRPC - - HFunc(TEvPQProxy::TEvCommitDone, Handle) //from PartitionActor - HFunc(TEvPQProxy::TEvPartitionStatus, Handle) //from partitionActor - - HFunc(TEvPersQueue::TEvLockPartition, Handle) //from Balancer - HFunc(TEvPersQueue::TEvReleasePartition, Handle) //from Balancer - HFunc(TEvPersQueue::TEvError, Handle) //from Balancer - - HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); - HFunc(TEvTabletPipe::TEvClientConnected, Handle); - - default: - break; - }; - } - - bool WriteResponse(PersQueue::V1::MigrationStreamingReadServerMessage&& response, bool finish = false); - - void Handle(IContext::TEvReadFinished::TPtr& ev, const TActorContext &ctx); - void Handle(IContext::TEvWriteFinished::TPtr& ev, const TActorContext &ctx); - void HandleDone(const TActorContext &ctx); - - void Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev, const TActorContext &ctx); - - - void Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvReadResponse::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvDone::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvReleased::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const NActors::TActorContext& ctx); - void ProcessAuth(const TString& auth, const TActorContext& ctx); - void Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPersQueue::TEvError::TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); + HFunc(TEvPQProxy::TEvPartitionReleased, Handle) //from partitionActor + + HFunc(TEvPQProxy::TEvReadResponse, Handle) //from partitionActor + HFunc(TEvPQProxy::TEvCommitCookie, Handle) //from gRPC + HFunc(TEvPQProxy::TEvCommitRange, Handle) //from gRPC + HFunc(TEvPQProxy::TEvStartRead, Handle) //from gRPC + HFunc(TEvPQProxy::TEvReleased, Handle) //from gRPC + HFunc(TEvPQProxy::TEvGetStatus, Handle) //from gRPC + HFunc(TEvPQProxy::TEvAuth, Handle) //from gRPC + + HFunc(TEvPQProxy::TEvCommitDone, Handle) //from PartitionActor + HFunc(TEvPQProxy::TEvPartitionStatus, Handle) //from partitionActor + + HFunc(TEvPersQueue::TEvLockPartition, Handle) //from Balancer + HFunc(TEvPersQueue::TEvReleasePartition, Handle) //from Balancer + HFunc(TEvPersQueue::TEvError, Handle) //from Balancer + + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + + default: + break; + }; + } + + bool WriteResponse(PersQueue::V1::MigrationStreamingReadServerMessage&& response, bool finish = false); + + void Handle(IContext::TEvReadFinished::TPtr& ev, const TActorContext &ctx); + void Handle(IContext::TEvWriteFinished::TPtr& ev, const TActorContext &ctx); + void HandleDone(const TActorContext &ctx); + + void Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev, const TActorContext &ctx); + + + void Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvReadResponse::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvDone::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvReleased::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const NActors::TActorContext& ctx); + void ProcessAuth(const TString& auth, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvError::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); [[nodiscard]] bool ProcessBalancerDead(const ui64 tabletId, const NActors::TActorContext& ctx); // returns false if actor died - - void HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const NActors::TActorContext& ctx); - void HandleWakeup(const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const NActors::TActorContext& ctx); - + + void HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const NActors::TActorContext& ctx); + void HandleWakeup(const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const NActors::TActorContext& ctx); + void CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx); - - void SetupCounters(); - void SetupTopicCounters(const TString& topic); + + void SetupCounters(); + void SetupTopicCounters(const TString& topic); void SetupTopicCounters(const TString& topic, const TString& cloudId, const TString& dbId, const TString& folderId); - - void ProcessReads(const NActors::TActorContext& ctx); // returns false if actor died + + void ProcessReads(const NActors::TActorContext& ctx); // returns false if actor died struct TFormedReadResponse; - void ProcessAnswer(const NActors::TActorContext& ctx, TIntrusivePtr<TFormedReadResponse> formedResponse); // returns false if actor died - - void RegisterSessions(const NActors::TActorContext& ctx); + void ProcessAnswer(const NActors::TActorContext& ctx, TIntrusivePtr<TFormedReadResponse> formedResponse); // returns false if actor died + + void RegisterSessions(const NActors::TActorContext& ctx); void RegisterSession(const TActorId& pipe, const TString& topic, const TVector<ui32>& groups, const TActorContext& ctx); - - struct TPartitionActorInfo; - void DropPartition(THashMap<ui64, TPartitionActorInfo>::iterator it, const TActorContext& ctx); - + + struct TPartitionActorInfo; + void DropPartition(THashMap<ui64, TPartitionActorInfo>::iterator it, const TActorContext& ctx); + bool ActualPartitionActor(const TActorId& part); - void ReleasePartition(const THashMap<ui64, TPartitionActorInfo>::iterator& it, - bool couldBeReads, const TActorContext& ctx); // returns false if actor died - - void SendReleaseSignalToClient(const THashMap<ui64, TPartitionActorInfo>::iterator& it, bool kill, const TActorContext& ctx); - - void InformBalancerAboutRelease(const THashMap<ui64, TPartitionActorInfo>::iterator& it, const TActorContext& ctx); - + void ReleasePartition(const THashMap<ui64, TPartitionActorInfo>::iterator& it, + bool couldBeReads, const TActorContext& ctx); // returns false if actor died + + void SendReleaseSignalToClient(const THashMap<ui64, TPartitionActorInfo>::iterator& it, bool kill, const TActorContext& ctx); + + void InformBalancerAboutRelease(const THashMap<ui64, TPartitionActorInfo>::iterator& it, const TActorContext& ctx); + static ui32 NormalizeMaxReadMessagesCount(ui32 sourceValue); static ui32 NormalizeMaxReadSize(ui32 sourceValue); -private: - std::unique_ptr<NKikimr::NGRpcService::TEvStreamPQReadRequest> Request; - - const TString ClientDC; - - const TInstant StartTimestamp; - +private: + std::unique_ptr<NKikimr::NGRpcService::TEvStreamPQReadRequest> Request; + + const TString ClientDC; + + const TInstant StartTimestamp; + TActorId SchemeCache; TActorId NewSchemeCache; - + TActorId AuthInitActor; - TIntrusivePtr<NACLib::TUserToken> Token; - - TString ClientId; - TString ClientPath; - TString Session; - TString PeerName; - - bool CommitsDisabled; - bool BalancersInitStarted; - - bool InitDone; - bool RangesMode = false; - + TIntrusivePtr<NACLib::TUserToken> Token; + + TString ClientId; + TString ClientPath; + TString Session; + TString PeerName; + + bool CommitsDisabled; + bool BalancersInitStarted; + + bool InitDone; + bool RangesMode = false; + ui32 MaxReadMessagesCount; ui32 MaxReadSize; ui32 MaxTimeLagMs; ui64 ReadTimestampMs; - - TString Auth; - - bool ForceACLCheck; - bool RequestNotChecked; - TInstant LastACLCheckTimestamp; - - struct TPartitionActorInfo { + + TString Auth; + + bool ForceACLCheck; + bool RequestNotChecked; + TInstant LastACLCheckTimestamp; + + struct TPartitionActorInfo { TActorId Actor; - const TPartitionId Partition; - std::deque<ui64> Commits; + const TPartitionId Partition; + std::deque<ui64> Commits; bool Reading; - bool Releasing; - bool Released; + bool Releasing; + bool Released; bool LockSent; - bool ReleaseSent; - - ui64 ReadIdToResponse; - ui64 ReadIdCommitted; - TSet<ui64> NextCommits; - TDisjointIntervalTree<ui64> NextRanges; - - ui64 Offset; - - TInstant AssignTimestamp; - + bool ReleaseSent; + + ui64 ReadIdToResponse; + ui64 ReadIdCommitted; + TSet<ui64> NextCommits; + TDisjointIntervalTree<ui64> NextRanges; + + ui64 Offset; + + TInstant AssignTimestamp; + TPartitionActorInfo(const TActorId& actor, const TPartitionId& partition, const TActorContext& ctx) - : Actor(actor) - , Partition(partition) + : Actor(actor) + , Partition(partition) , Reading(false) - , Releasing(false) - , Released(false) + , Releasing(false) + , Released(false) , LockSent(false) - , ReleaseSent(false) - , ReadIdToResponse(1) - , ReadIdCommitted(0) - , Offset(0) - , AssignTimestamp(ctx.Now()) - { } - - void MakeCommit(const TActorContext& ctx); - }; - - + , ReleaseSent(false) + , ReadIdToResponse(1) + , ReadIdCommitted(0) + , Offset(0) + , AssignTimestamp(ctx.Now()) + { } + + void MakeCommit(const TActorContext& ctx); + }; + + THashSet<TActorId> ActualPartitionActors; - THashMap<ui64, std::pair<ui32, ui64>> BalancerGeneration; - ui64 NextAssignId; - THashMap<ui64, TPartitionActorInfo> Partitions; //assignId -> info - + THashMap<ui64, std::pair<ui32, ui64>> BalancerGeneration; + ui64 NextAssignId; + THashMap<ui64, TPartitionActorInfo> Partitions; //assignId -> info + THashMap<TString, TTopicHolder> Topics; // topic -> info THashMap<TString, NPersQueue::TConverterPtr> FullPathToConverter; // PrimaryFullPath -> Converter, for balancer replies matching THashSet<TString> TopicsToResolve; - THashMap<TString, TVector<ui32>> TopicGroups; - THashMap<TString, ui64> ReadFromTimestamp; - - bool ReadOnlyLocal; - TDuration CommitInterval; - - struct TPartitionInfo { - ui64 AssignId; - ui64 WTime; - ui64 SizeLag; - ui64 MsgLag; - bool operator < (const TPartitionInfo& rhs) const { - return std::tie(WTime, AssignId) < std::tie(rhs.WTime, rhs.AssignId); - } - }; - + THashMap<TString, TVector<ui32>> TopicGroups; + THashMap<TString, ui64> ReadFromTimestamp; + + bool ReadOnlyLocal; + TDuration CommitInterval; + + struct TPartitionInfo { + ui64 AssignId; + ui64 WTime; + ui64 SizeLag; + ui64 MsgLag; + bool operator < (const TPartitionInfo& rhs) const { + return std::tie(WTime, AssignId) < std::tie(rhs.WTime, rhs.AssignId); + } + }; + TSet<TPartitionInfo> AvailablePartitions; struct TFormedReadResponse: public TSimpleRefCount<TFormedReadResponse> { using TPtr = TIntrusivePtr<TFormedReadResponse>; - TFormedReadResponse(const TString& guid, const TInstant start) + TFormedReadResponse(const TString& guid, const TInstant start) : Guid(guid) - , Start(start) - , FromDisk(false) + , Start(start) + , FromDisk(false) { } PersQueue::V1::MigrationStreamingReadServerMessage Response; - ui32 RequestsInfly = 0; - i64 ByteSize = 0; - ui64 RequestedBytes = 0; - + ui32 RequestsInfly = 0; + i64 ByteSize = 0; + ui64 RequestedBytes = 0; + //returns byteSize diff i64 ApplyResponse(PersQueue::V1::MigrationStreamingReadServerMessage&& resp); - + THashSet<TActorId> PartitionsTookPartInRead; - TSet<TPartitionId> PartitionsTookPartInControlMessages; + TSet<TPartitionId> PartitionsTookPartInControlMessages; TSet<TPartitionInfo> PartitionsBecameAvailable; // Partitions that became available during this read request execution. - + // These partitions are bringed back to AvailablePartitions after reply to this read request. const TString Guid; - TInstant Start; - bool FromDisk; + TInstant Start; + bool FromDisk; TDuration WaitQuotaTime; - }; - + }; + THashMap<TActorId, TFormedReadResponse::TPtr> PartitionToReadResponse; // Partition actor -> TFormedReadResponse answer that has this partition. // PartitionsTookPartInRead in formed read response contain this actor id. - - struct TControlMessages { + + struct TControlMessages { TVector<PersQueue::V1::MigrationStreamingReadServerMessage> ControlMessages; - ui32 Infly = 0; - }; - - TMap<TPartitionId, TControlMessages> PartitionToControlMessages; - - - std::deque<THolder<TEvPQProxy::TEvRead>> Reads; - - ui64 Cookie; - - struct TCommitInfo { - ui64 StartReadId; - ui32 Partitions; - }; - - TMap<ui64, TCommitInfo> Commits; //readid->TCommitInfo - - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - - NMonitoring::TDynamicCounters::TCounterPtr SessionsCreated; - NMonitoring::TDynamicCounters::TCounterPtr SessionsActive; - - NMonitoring::TDynamicCounters::TCounterPtr Errors; - NMonitoring::TDynamicCounters::TCounterPtr PipeReconnects; - NMonitoring::TDynamicCounters::TCounterPtr BytesInflight; - ui64 BytesInflight_; - ui64 RequestedBytes; - ui32 ReadsInfly; - std::queue<ui64> ActiveWrites; - - NKikimr::NPQ::TPercentileCounter PartsPerSession; - + ui32 Infly = 0; + }; + + TMap<TPartitionId, TControlMessages> PartitionToControlMessages; + + + std::deque<THolder<TEvPQProxy::TEvRead>> Reads; + + ui64 Cookie; + + struct TCommitInfo { + ui64 StartReadId; + ui32 Partitions; + }; + + TMap<ui64, TCommitInfo> Commits; //readid->TCommitInfo + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + NMonitoring::TDynamicCounters::TCounterPtr SessionsCreated; + NMonitoring::TDynamicCounters::TCounterPtr SessionsActive; + + NMonitoring::TDynamicCounters::TCounterPtr Errors; + NMonitoring::TDynamicCounters::TCounterPtr PipeReconnects; + NMonitoring::TDynamicCounters::TCounterPtr BytesInflight; + ui64 BytesInflight_; + ui64 RequestedBytes; + ui32 ReadsInfly; + std::queue<ui64> ActiveWrites; + + NKikimr::NPQ::TPercentileCounter PartsPerSession; + THashMap<TString, TTopicCounters> TopicCounters; - THashMap<TString, ui32> NumPartitionsFromTopic; - - TVector<NPQ::TLabelsInfo> Aggr; - NKikimr::NPQ::TMultiCounter SLITotal; - NKikimr::NPQ::TMultiCounter SLIErrors; - TInstant StartTime; - NKikimr::NPQ::TPercentileCounter InitLatency; - NKikimr::NPQ::TPercentileCounter ReadLatency; - NKikimr::NPQ::TPercentileCounter ReadLatencyFromDisk; - NKikimr::NPQ::TPercentileCounter CommitLatency; - NKikimr::NPQ::TMultiCounter SLIBigLatency; - NKikimr::NPQ::TMultiCounter SLIBigReadLatency; - NKikimr::NPQ::TMultiCounter ReadsTotal; + THashMap<TString, ui32> NumPartitionsFromTopic; + + TVector<NPQ::TLabelsInfo> Aggr; + NKikimr::NPQ::TMultiCounter SLITotal; + NKikimr::NPQ::TMultiCounter SLIErrors; + TInstant StartTime; + NKikimr::NPQ::TPercentileCounter InitLatency; + NKikimr::NPQ::TPercentileCounter ReadLatency; + NKikimr::NPQ::TPercentileCounter ReadLatencyFromDisk; + NKikimr::NPQ::TPercentileCounter CommitLatency; + NKikimr::NPQ::TMultiCounter SLIBigLatency; + NKikimr::NPQ::TMultiCounter SLIBigReadLatency; + NKikimr::NPQ::TMultiCounter ReadsTotal; NPersQueue::TTopicsListController TopicsHandler; -}; - - -class TReadInfoActor : public TRpcOperationRequestActor<TReadInfoActor, TEvPQReadInfoRequest> { -using TBase = TRpcOperationRequestActor<TReadInfoActor, TEvPQReadInfoRequest>; -public: +}; + + +class TReadInfoActor : public TRpcOperationRequestActor<TReadInfoActor, TEvPQReadInfoRequest> { +using TBase = TRpcOperationRequestActor<TReadInfoActor, TEvPQReadInfoRequest>; +public: TReadInfoActor( NKikimr::NGRpcService::TEvPQReadInfoRequest* request, const NPersQueue::TTopicsListController& topicsHandler, const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters ); - ~TReadInfoActor(); - - void Bootstrap(const NActors::TActorContext& ctx); - - - + ~TReadInfoActor(); + + void Bootstrap(const NActors::TActorContext& ctx); + + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PQ_META_REQUEST_PROCESSOR; } - - bool HasCancelOperation() { - return false; - } - -private: - - void Die(const NActors::TActorContext& ctx) override; - - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvPQProxy::TEvAuthResultOk, Handle); // form auth actor - HFunc(TEvPQProxy::TEvCloseSession, Handle) //from auth actor - - HFunc(TEvPersQueue::TEvResponse, Handle); - default: - break; - }; - } - - void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx); - - void AnswerError(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx); - void ProcessAnswers(const TActorContext& ctx); - -private: + + bool HasCancelOperation() { + return false; + } + +private: + + void Die(const NActors::TActorContext& ctx) override; + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPQProxy::TEvAuthResultOk, Handle); // form auth actor + HFunc(TEvPQProxy::TEvCloseSession, Handle) //from auth actor + + HFunc(TEvPersQueue::TEvResponse, Handle); + default: + break; + }; + } + + void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx); + + void AnswerError(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx); + void ProcessAnswers(const TActorContext& ctx); + +private: TActorId SchemeCache; TActorId NewSchemeCache; - + TActorId AuthInitActor; - + TTopicTabletsPairs TopicAndTablets; - - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - - TString ClientId; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + TString ClientId; NPersQueue::TTopicsListController TopicsHandler; -}; - - +}; + + class TDropTopicActor : public TPQGrpcSchemaBase<TDropTopicActor, NKikimr::NGRpcService::TEvPQDropTopicRequest> { using TBase = TPQGrpcSchemaBase<TDropTopicActor, TEvPQDropTopicRequest>; public: - TDropTopicActor(NKikimr::NGRpcService::TEvPQDropTopicRequest* request); - ~TDropTopicActor() = default; - + TDropTopicActor(NKikimr::NGRpcService::TEvPQDropTopicRequest* request); + ~TDropTopicActor() = default; + void FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TActorContext& ctx, const TString& workingDir, const TString& name); - - void Bootstrap(const NActors::TActorContext& ctx); + + void Bootstrap(const NActors::TActorContext& ctx); void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx){ Y_UNUSED(ev); Y_UNUSED(ctx); } -}; - +}; + class TDescribeTopicActor : public TPQGrpcSchemaBase<TDescribeTopicActor, NKikimr::NGRpcService::TEvPQDescribeTopicRequest> { using TBase = TPQGrpcSchemaBase<TDescribeTopicActor, TEvPQDescribeTopicRequest>; - -public: - TDescribeTopicActor(NKikimr::NGRpcService::TEvPQDescribeTopicRequest* request); - ~TDescribeTopicActor() = default; - - void StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx); - - void Bootstrap(const NActors::TActorContext& ctx); - + +public: + TDescribeTopicActor(NKikimr::NGRpcService::TEvPQDescribeTopicRequest* request); + ~TDescribeTopicActor() = default; + + void StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx); + + void Bootstrap(const NActors::TActorContext& ctx); + void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx); -}; - - +}; + + class TAddReadRuleActor : public TUpdateSchemeActor<TAddReadRuleActor, TEvPQAddReadRuleRequest> { using TBase = TUpdateSchemeActor<TAddReadRuleActor, TEvPQAddReadRuleRequest>; - + public: TAddReadRuleActor(NKikimr::NGRpcService::TEvPQAddReadRuleRequest *request); - + void Bootstrap(const NActors::TActorContext &ctx); void ModifyPersqueueConfig(const TActorContext& ctx, NKikimrSchemeOp::TPersQueueGroupDescription& groupConfig, @@ -1172,7 +1172,7 @@ public: class TRemoveReadRuleActor : public TUpdateSchemeActor<TRemoveReadRuleActor, TEvPQRemoveReadRuleRequest> { using TBase = TUpdateSchemeActor<TRemoveReadRuleActor, TEvPQRemoveReadRuleRequest>; -public: +public: TRemoveReadRuleActor(NKikimr::NGRpcService::TEvPQRemoveReadRuleRequest* request); void Bootstrap(const NActors::TActorContext &ctx); @@ -1187,36 +1187,36 @@ class TCreateTopicActor : public TPQGrpcSchemaBase<TCreateTopicActor, NKikimr::N using TBase = TPQGrpcSchemaBase<TCreateTopicActor, TEvPQCreateTopicRequest>; public: - TCreateTopicActor(NKikimr::NGRpcService::TEvPQCreateTopicRequest* request, const TString& localCluster, const TVector<TString>& clusters); - ~TCreateTopicActor() = default; - + TCreateTopicActor(NKikimr::NGRpcService::TEvPQCreateTopicRequest* request, const TString& localCluster, const TVector<TString>& clusters); + ~TCreateTopicActor() = default; + void FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TActorContext& ctx, const TString& workingDir, const TString& name); - - void Bootstrap(const NActors::TActorContext& ctx); - + + void Bootstrap(const NActors::TActorContext& ctx); + void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx){ Y_UNUSED(ev); Y_UNUSED(ctx); } -private: - TString LocalCluster; - TVector<TString> Clusters; -}; - +private: + TString LocalCluster; + TVector<TString> Clusters; +}; + class TAlterTopicActor : public TPQGrpcSchemaBase<TAlterTopicActor, NKikimr::NGRpcService::TEvPQAlterTopicRequest> { using TBase = TPQGrpcSchemaBase<TAlterTopicActor, TEvPQAlterTopicRequest>; - -public: - TAlterTopicActor(NKikimr::NGRpcService::TEvPQAlterTopicRequest* request); - ~TAlterTopicActor() = default; - + +public: + TAlterTopicActor(NKikimr::NGRpcService::TEvPQAlterTopicRequest* request); + ~TAlterTopicActor() = default; + void FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TActorContext& ctx, const TString& workingDir, const TString& name); - - void Bootstrap(const NActors::TActorContext& ctx); + + void Bootstrap(const NActors::TActorContext& ctx); void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx){ Y_UNUSED(ev); Y_UNUSED(ctx); } -}; - - - +}; + + + } diff --git a/ydb/services/persqueue_v1/grpc_pq_read.cpp b/ydb/services/persqueue_v1/grpc_pq_read.cpp index ccaeca0f65..6b7e72fdd0 100644 --- a/ydb/services/persqueue_v1/grpc_pq_read.cpp +++ b/ydb/services/persqueue_v1/grpc_pq_read.cpp @@ -1,79 +1,79 @@ -#include "grpc_pq_read.h" -#include "grpc_pq_actor.h" - +#include "grpc_pq_read.h" +#include "grpc_pq_actor.h" + #include <ydb/core/grpc_services/grpc_helper.h> #include <ydb/core/tx/scheme_board/cache.h> - + #include <algorithm> -using namespace NActors; -using namespace NKikimrClient; - -using grpc::Status; - -namespace NKikimr { -namespace NGRpcProxy { -namespace V1 { - -/////////////////////////////////////////////////////////////////////////////// - +using namespace NActors; +using namespace NKikimrClient; + +using grpc::Status; + +namespace NKikimr { +namespace NGRpcProxy { +namespace V1 { + +/////////////////////////////////////////////////////////////////////////////// + using namespace PersQueue::V1; - - - + + + IActor* CreatePQReadService(const TActorId& schemeCache, const TActorId& newSchemeCache, - TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) { + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) { return new TPQReadService(schemeCache, newSchemeCache, counters, maxSessions); -} - - - +} + + + TPQReadService::TPQReadService(const TActorId& schemeCache, const TActorId& newSchemeCache, - TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) - : SchemeCache(schemeCache) + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) + : SchemeCache(schemeCache) , NewSchemeCache(newSchemeCache) - , Counters(counters) - , MaxSessions(maxSessions) - , LocalCluster("") -{ -} - - -void TPQReadService::Bootstrap(const TActorContext& ctx) { + , Counters(counters) + , MaxSessions(maxSessions) + , LocalCluster("") +{ +} + + +void TPQReadService::Bootstrap(const TActorContext& ctx) { HaveClusters = !AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen(); // ToDo[migration] - proper condition if (HaveClusters) { ctx.Send(NPQ::NClusterTracker::MakeClusterTrackerID(), new NPQ::NClusterTracker::TEvClusterTracker::TEvSubscribe); } - ctx.Send(NNetClassifier::MakeNetClassifierID(), new NNetClassifier::TEvNetClassifier::TEvSubscribe); + ctx.Send(NNetClassifier::MakeNetClassifierID(), new NNetClassifier::TEvNetClassifier::TEvSubscribe); TopicConverterFactory = std::make_shared<NPersQueue::TTopicNamesConverterFactory>( AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen(), AppData(ctx)->PQConfig.GetRoot() ); TopicsHandler = std::make_unique<NPersQueue::TTopicsListController>( TopicConverterFactory, HaveClusters, Clusters, LocalCluster ); - Become(&TThis::StateFunc); -} - - -ui64 TPQReadService::NextCookie() { - return ++LastCookie; -} - - -void TPQReadService::Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx) { - if (!DatacenterClassifier) { - for (auto it = Sessions.begin(); it != Sessions.end(); ++it) { - ctx.Send(it->second, new TEvPQProxy::TEvDieCommand("datacenter classifier initialized, restart session please", PersQueue::ErrorCode::INITIALIZING)); - } - } - - DatacenterClassifier = ev->Get()->Classifier; -} - + Become(&TThis::StateFunc); +} + + +ui64 TPQReadService::NextCookie() { + return ++LastCookie; +} + + +void TPQReadService::Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx) { + if (!DatacenterClassifier) { + for (auto it = Sessions.begin(); it != Sessions.end(); ++it) { + ctx.Send(it->second, new TEvPQProxy::TEvDieCommand("datacenter classifier initialized, restart session please", PersQueue::ErrorCode::INITIALIZING)); + } + } + + DatacenterClassifier = ev->Get()->Classifier; +} + void TPQReadService::Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev) { Y_VERIFY(ev->Get()->ClustersList); - + Y_VERIFY(ev->Get()->ClustersList->Clusters.size()); const auto& clusters = ev->Get()->ClustersList->Clusters; @@ -90,95 +90,95 @@ void TPQReadService::Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClusters Clusters[i] = clusters[i].Name; } TopicsHandler->UpdateClusters(Clusters, LocalCluster); -} - - -void TPQReadService::Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorContext&) { - Sessions.erase(ev->Get()->Cookie); -} - - +} + + +void TPQReadService::Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorContext&) { + Sessions.erase(ev->Get()->Cookie); +} + + MigrationStreamingReadServerMessage FillReadResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code) { MigrationStreamingReadServerMessage res; - FillIssue(res.add_issues(), code, errorReason); - res.set_status(ConvertPersQueueInternalCodeToStatus(code)); - return res; -} - -google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> FillInfoResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code) { - google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> res; - FillIssue(res.Add(), code, errorReason); - return res; -} - - -void TPQReadService::Handle(NKikimr::NGRpcService::TEvStreamPQReadRequest::TPtr& ev, const TActorContext& ctx) { - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection"); - - if (TooMuchSessions()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection failed - too much sessions"); - ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); - ev->Get()->GetStreamCtx()->WriteAndFinish(FillReadResponse("proxy overloaded", PersQueue::ErrorCode::OVERLOAD), grpc::Status::OK); //CANCELLED - return; - } + FillIssue(res.add_issues(), code, errorReason); + res.set_status(ConvertPersQueueInternalCodeToStatus(code)); + return res; +} + +google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> FillInfoResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code) { + google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> res; + FillIssue(res.Add(), code, errorReason); + return res; +} + + +void TPQReadService::Handle(NKikimr::NGRpcService::TEvStreamPQReadRequest::TPtr& ev, const TActorContext& ctx) { + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection"); + + if (TooMuchSessions()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection failed - too much sessions"); + ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); + ev->Get()->GetStreamCtx()->WriteAndFinish(FillReadResponse("proxy overloaded", PersQueue::ErrorCode::OVERLOAD), grpc::Status::OK); //CANCELLED + return; + } if (HaveClusters && (Clusters.empty() || LocalCluster.empty())) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection failed - cluster is not known yet"); - - ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); - ev->Get()->GetStreamCtx()->WriteAndFinish(FillReadResponse("cluster initializing", PersQueue::ErrorCode::INITIALIZING), grpc::Status::OK); //CANCELLED - // TODO: Inc SLI Errors - return; - } else { - const ui64 cookie = NextCookie(); - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new session created cookie " << cookie); - - auto ip = ev->Get()->GetStreamCtx()->GetPeerName(); - + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection failed - cluster is not known yet"); + + ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); + ev->Get()->GetStreamCtx()->WriteAndFinish(FillReadResponse("cluster initializing", PersQueue::ErrorCode::INITIALIZING), grpc::Status::OK); //CANCELLED + // TODO: Inc SLI Errors + return; + } else { + const ui64 cookie = NextCookie(); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new session created cookie " << cookie); + + auto ip = ev->Get()->GetStreamCtx()->GetPeerName(); + TActorId worker = ctx.Register(new TReadSessionActor( ev->Release().Release(), cookie, SchemeCache, NewSchemeCache, Counters, DatacenterClassifier ? DatacenterClassifier->ClassifyAddress(NAddressClassifier::ExtractAddress(ip)) : "unknown", *TopicsHandler )); - - Sessions[cookie] = worker; - } -} - - -void TPQReadService::Handle(NKikimr::NGRpcService::TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx) { - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new read info request"); - - if (Clusters.empty() || LocalCluster.empty()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new read info request failed - cluster is not known yet"); - - ev->Get()->SendResult(ConvertPersQueueInternalCodeToStatus(PersQueue::ErrorCode::INITIALIZING), FillInfoResponse("cluster initializing", PersQueue::ErrorCode::INITIALIZING)); //CANCELLED - return; - } else { + + Sessions[cookie] = worker; + } +} + + +void TPQReadService::Handle(NKikimr::NGRpcService::TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx) { + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new read info request"); + + if (Clusters.empty() || LocalCluster.empty()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new read info request failed - cluster is not known yet"); + + ev->Get()->SendResult(ConvertPersQueueInternalCodeToStatus(PersQueue::ErrorCode::INITIALIZING), FillInfoResponse("cluster initializing", PersQueue::ErrorCode::INITIALIZING)); //CANCELLED + return; + } else { //ctx.Register(new TReadInfoActor(ev->Release().Release(), Clusters, LocalCluster, SchemeCache, NewSchemeCache, Counters)); ctx.Register(new TReadInfoActor(ev->Release().Release(), *TopicsHandler, SchemeCache, NewSchemeCache, Counters)); - } -} - - - -bool TPQReadService::TooMuchSessions() { - return Sessions.size() >= MaxSessions; -} - - -} -} -} - - -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvStreamPQReadRequest::TPtr& ev, const TActorContext& ctx) { - ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release()); -} - -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx) { - ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release()); -} + } +} + + + +bool TPQReadService::TooMuchSessions() { + return Sessions.size() >= MaxSessions; +} + + +} +} +} + + +void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvStreamPQReadRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release()); +} + +void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release()); +} diff --git a/ydb/services/persqueue_v1/grpc_pq_read.h b/ydb/services/persqueue_v1/grpc_pq_read.h index fa3ba7f73c..558063ed13 100644 --- a/ydb/services/persqueue_v1/grpc_pq_read.h +++ b/ydb/services/persqueue_v1/grpc_pq_read.h @@ -1,85 +1,85 @@ -#pragma once - -#include "grpc_pq_actor.h" -#include "persqueue.h" - +#pragma once + +#include "grpc_pq_actor.h" +#include "persqueue.h" + #include <ydb/core/client/server/grpc_base.h> #include <ydb/core/persqueue/cluster_tracker.h> #include <ydb/core/mind/address_classification/net_classifier.h> - + #include <library/cpp/actors/core/actorsystem.h> - -#include <util/generic/hash.h> -#include <util/system/mutex.h> - - -namespace NKikimr { -namespace NGRpcProxy { -namespace V1 { - - - + +#include <util/generic/hash.h> +#include <util/system/mutex.h> + + +namespace NKikimr { +namespace NGRpcProxy { +namespace V1 { + + + inline TActorId GetPQReadServiceActorID() { return TActorId(0, "PQReadSvc"); -} - +} + IActor* CreatePQReadService(const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions); - -class TPQReadService : public NActors::TActorBootstrapped<TPQReadService> { -public: + +class TPQReadService : public NActors::TActorBootstrapped<TPQReadService> { +public: TPQReadService(const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions); - - ~TPQReadService() - {} - - void Bootstrap(const TActorContext& ctx); - -private: - ui64 NextCookie(); - - bool TooMuchSessions(); - TString AvailableLocalCluster(); - - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - HFunc(NKikimr::NGRpcService::TEvStreamPQReadRequest, Handle); - HFunc(NKikimr::NGRpcService::TEvPQReadInfoRequest, Handle); + + ~TPQReadService() + {} + + void Bootstrap(const TActorContext& ctx); + +private: + ui64 NextCookie(); + + bool TooMuchSessions(); + TString AvailableLocalCluster(); + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(NKikimr::NGRpcService::TEvStreamPQReadRequest, Handle); + HFunc(NKikimr::NGRpcService::TEvPQReadInfoRequest, Handle); hFunc(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate, Handle); - HFunc(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate, Handle); - HFunc(TEvPQProxy::TEvSessionDead, Handle); - } - } - -private: - void Handle(NKikimr::NGRpcService::TEvStreamPQReadRequest::TPtr& ev, const TActorContext& ctx); - void Handle(NKikimr::NGRpcService::TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx); + HFunc(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate, Handle); + HFunc(TEvPQProxy::TEvSessionDead, Handle); + } + } + +private: + void Handle(NKikimr::NGRpcService::TEvStreamPQReadRequest::TPtr& ev, const TActorContext& ctx); + void Handle(NKikimr::NGRpcService::TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx); void Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev); - void Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx); - - void Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorContext& ctx); - + void Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorContext& ctx); + NActors::TActorId SchemeCache; NActors::TActorId NewSchemeCache; - - TAtomic LastCookie = 0; - + + TAtomic LastCookie = 0; + THashMap<ui64, TActorId> Sessions; - - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - - ui32 MaxSessions; - TVector<TString> Clusters; - TString LocalCluster; - - NAddressClassifier::TLabeledAddressClassifier::TConstPtr DatacenterClassifier; // Detects client's datacenter by IP. May be null + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + ui32 MaxSessions; + TVector<TString> Clusters; + TString LocalCluster; + + NAddressClassifier::TLabeledAddressClassifier::TConstPtr DatacenterClassifier; // Detects client's datacenter by IP. May be null std::shared_ptr<NPersQueue::TTopicNamesConverterFactory> TopicConverterFactory; std::unique_ptr<NPersQueue::TTopicsListController> TopicsHandler; bool HaveClusters; -}; +}; + - -} -} -} +} +} +} diff --git a/ydb/services/persqueue_v1/grpc_pq_read_actor.cpp b/ydb/services/persqueue_v1/grpc_pq_read_actor.cpp index 9ce019deda..654edfcfcd 100644 --- a/ydb/services/persqueue_v1/grpc_pq_read_actor.cpp +++ b/ydb/services/persqueue_v1/grpc_pq_read_actor.cpp @@ -1,6 +1,6 @@ -#include "grpc_pq_actor.h" -#include "grpc_pq_read.h" - +#include "grpc_pq_actor.h" +#include "grpc_pq_read.h" + #include <ydb/core/base/path.h> #include <ydb/core/client/server/msgbus_server_persqueue.h> #include <ydb/core/client/server/msgbus_server_pq_read_session_info.h> @@ -10,34 +10,34 @@ #include <ydb/core/persqueue/write_meta.h> #include <ydb/core/persqueue/writer/source_id_encoding.h> #include <ydb/library/persqueue/topic_parser/topic_parser.h> - - + + #include <library/cpp/actors/core/log.h> #include <library/cpp/actors/interconnect/interconnect.h> #include <library/cpp/protobuf/util/repeated_field_utils.h> -#include <library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h> - -#include <util/string/strip.h> -#include <util/charset/utf8.h> - +#include <library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h> + +#include <util/string/strip.h> +#include <util/charset/utf8.h> + #include <algorithm> -using namespace NActors; -using namespace NKikimrClient; - -namespace NKikimr { - -using namespace NMsgBusProxy; - -namespace NGRpcProxy { -namespace V1 { - +using namespace NActors; +using namespace NKikimrClient; + +namespace NKikimr { + +using namespace NMsgBusProxy; + +namespace NGRpcProxy { +namespace V1 { + using namespace PersQueue::V1; - -#define PQ_LOG_PREFIX "session cookie " << Cookie << " consumer " << ClientPath << " session " << Session - - -//11 tries = 10,23 seconds, then each try for 5 seconds , so 21 retries will take near 1 min + +#define PQ_LOG_PREFIX "session cookie " << Cookie << " consumer " << ClientPath << " session " << Session + + +//11 tries = 10,23 seconds, then each try for 5 seconds , so 21 retries will take near 1 min static const NTabletPipe::TClientRetryPolicy RetryPolicyForPipes = { .RetryLimitCount = 21, .MinRetryTime = TDuration::MilliSeconds(10), @@ -45,278 +45,278 @@ static const NTabletPipe::TClientRetryPolicy RetryPolicyForPipes = { .BackoffMultiplier = 2, .DoFirstRetryInstantly = true }; - -static const ui64 MAX_INFLY_BYTES = 25 * 1024 * 1024; -static const ui32 MAX_INFLY_READS = 10; - -static const TDuration READ_TIMEOUT_DURATION = TDuration::Seconds(1); - -static const TDuration WAIT_DATA = TDuration::Seconds(10); -static const TDuration PREWAIT_DATA = TDuration::Seconds(9); -static const TDuration WAIT_DELTA = TDuration::MilliSeconds(500); - -static const ui64 INIT_COOKIE = Max<ui64>(); //some identifier - -static const ui32 MAX_PIPE_RESTARTS = 100; //after 100 restarts without progress kill session -static const ui32 RESTART_PIPE_DELAY_MS = 100; - -static const ui64 MAX_READ_SIZE = 100 << 20; //100mb; - -static const ui32 MAX_COMMITS_INFLY = 3; - + +static const ui64 MAX_INFLY_BYTES = 25 * 1024 * 1024; +static const ui32 MAX_INFLY_READS = 10; + +static const TDuration READ_TIMEOUT_DURATION = TDuration::Seconds(1); + +static const TDuration WAIT_DATA = TDuration::Seconds(10); +static const TDuration PREWAIT_DATA = TDuration::Seconds(9); +static const TDuration WAIT_DELTA = TDuration::MilliSeconds(500); + +static const ui64 INIT_COOKIE = Max<ui64>(); //some identifier + +static const ui32 MAX_PIPE_RESTARTS = 100; //after 100 restarts without progress kill session +static const ui32 RESTART_PIPE_DELAY_MS = 100; + +static const ui64 MAX_READ_SIZE = 100 << 20; //100mb; + +static const ui32 MAX_COMMITS_INFLY = 3; + static const double LAG_GROW_MULTIPLIER = 1.2; //assume that 20% more data arrived to partitions - - -//TODO: add here tracking of bytes in/out - - -IOutputStream& operator <<(IOutputStream& out, const TPartitionId& partId) { + + +//TODO: add here tracking of bytes in/out + + +IOutputStream& operator <<(IOutputStream& out, const TPartitionId& partId) { out << "TopicId: " << partId.TopicConverter->GetClientsideName() << ":" << partId.Partition << "(assignId:" << partId.AssignId << ")"; - return out; -} - -struct TOffsetInfo { - // find by read id - bool operator<(ui64 readId) const { - return ReadId < readId; - } - - friend bool operator<(ui64 readId, const TOffsetInfo& info) { - return readId < info.ReadId; - } - - - ui64 ReadId = 0; - ui64 Offset = 0; -}; - - + return out; +} + +struct TOffsetInfo { + // find by read id + bool operator<(ui64 readId) const { + return ReadId < readId; + } + + friend bool operator<(ui64 readId, const TOffsetInfo& info) { + return readId < info.ReadId; + } + + + ui64 ReadId = 0; + ui64 Offset = 0; +}; + + bool RemoveEmptyMessages(MigrationStreamingReadServerMessage::DataBatch& data) { auto batchRemover = [&](MigrationStreamingReadServerMessage::DataBatch::Batch& batch) -> bool { - return batch.message_data_size() == 0; - }; + return batch.message_data_size() == 0; + }; auto partitionDataRemover = [&](MigrationStreamingReadServerMessage::DataBatch::PartitionData& partition) -> bool { - NProtoBuf::RemoveRepeatedFieldItemIf(partition.mutable_batches(), batchRemover); - return partition.batches_size() == 0; - }; - NProtoBuf::RemoveRepeatedFieldItemIf(data.mutable_partition_data(), partitionDataRemover); - return !data.partition_data().empty(); -} - - -class TPartitionActor : public NActors::TActorBootstrapped<TPartitionActor> { -public: + NProtoBuf::RemoveRepeatedFieldItemIf(partition.mutable_batches(), batchRemover); + return partition.batches_size() == 0; + }; + NProtoBuf::RemoveRepeatedFieldItemIf(data.mutable_partition_data(), partitionDataRemover); + return !data.partition_data().empty(); +} + + +class TPartitionActor : public NActors::TActorBootstrapped<TPartitionActor> { +public: TPartitionActor(const TActorId& parentId, const TString& clientId, const TString& clientPath, const ui64 cookie, const TString& session, const TPartitionId& partition, ui32 generation, ui32 step, - const ui64 tabletID, const TReadSessionActor::TTopicCounters& counters, const bool commitsDisabled, const TString& clientDC); - ~TPartitionActor(); - - void Bootstrap(const NActors::TActorContext& ctx); - void Die(const NActors::TActorContext& ctx) override; - - + const ui64 tabletID, const TReadSessionActor::TTopicCounters& counters, const bool commitsDisabled, const TString& clientDC); + ~TPartitionActor(); + + void Bootstrap(const NActors::TActorContext& ctx); + void Die(const NActors::TActorContext& ctx) override; + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_PARTITION; } -private: - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup) - HFunc(TEvPQProxy::TEvDeadlineExceeded, Handle) - - HFunc(NActors::TEvents::TEvPoisonPill, HandlePoison) - HFunc(TEvPQProxy::TEvRead, Handle) - HFunc(TEvPQProxy::TEvCommitCookie, Handle) - HFunc(TEvPQProxy::TEvCommitRange, Handle) - HFunc(TEvPQProxy::TEvReleasePartition, Handle) - HFunc(TEvPQProxy::TEvLockPartition, Handle) - HFunc(TEvPQProxy::TEvGetStatus, Handle) - HFunc(TEvPQProxy::TEvRestartPipe, Handle) - - HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); - HFunc(TEvTabletPipe::TEvClientConnected, Handle); - HFunc(TEvPersQueue::TEvResponse, Handle); - HFunc(TEvPersQueue::TEvHasDataInfoResponse, Handle); - default: - break; - }; - } - - - void Handle(TEvPQProxy::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx); +private: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup) + HFunc(TEvPQProxy::TEvDeadlineExceeded, Handle) + + HFunc(NActors::TEvents::TEvPoisonPill, HandlePoison) + HFunc(TEvPQProxy::TEvRead, Handle) + HFunc(TEvPQProxy::TEvCommitCookie, Handle) + HFunc(TEvPQProxy::TEvCommitRange, Handle) + HFunc(TEvPQProxy::TEvReleasePartition, Handle) + HFunc(TEvPQProxy::TEvLockPartition, Handle) + HFunc(TEvPQProxy::TEvGetStatus, Handle) + HFunc(TEvPQProxy::TEvRestartPipe, Handle) + + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + HFunc(TEvPersQueue::TEvResponse, Handle); + HFunc(TEvPersQueue::TEvHasDataInfoResponse, Handle); + default: + break; + }; + } + + + void Handle(TEvPQProxy::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx); void Handle(TEvPQProxy::TEvGetStatus:: TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvPQProxy::TEvDeadlineExceeded::TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const NActors::TActorContext& ctx); - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, const NActors::TActorContext& ctx); - - void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); - void HandleWakeup(const NActors::TActorContext& ctx); - - void CheckRelease(const NActors::TActorContext& ctx); - void InitLockPartition(const NActors::TActorContext& ctx); - void InitStartReading(const NActors::TActorContext& ctx); - - void RestartPipe(const NActors::TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode); - void WaitDataInPartition(const NActors::TActorContext& ctx); - void SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx); - void MakeCommit(const TActorContext& ctx); - - -private: + + void Handle(TEvPQProxy::TEvDeadlineExceeded::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const NActors::TActorContext& ctx); + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, const NActors::TActorContext& ctx); + + void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); + void HandleWakeup(const NActors::TActorContext& ctx); + + void CheckRelease(const NActors::TActorContext& ctx); + void InitLockPartition(const NActors::TActorContext& ctx); + void InitStartReading(const NActors::TActorContext& ctx); + + void RestartPipe(const NActors::TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode); + void WaitDataInPartition(const NActors::TActorContext& ctx); + void SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx); + void MakeCommit(const TActorContext& ctx); + + +private: const TActorId ParentId; - const TString ClientId; - const TString ClientPath; - const ui64 Cookie; - const TString Session; - const TString ClientDC; - - const TPartitionId Partition; - const ui32 Generation; - const ui32 Step; - - const ui64 TabletID; - - ui64 ReadOffset; - ui64 ClientReadOffset; - ui64 ClientCommitOffset; - bool ClientVerifyReadOffset; - ui64 CommittedOffset; - ui64 WriteTimestampEstimateMs; - - ui64 ReadIdToResponse; - ui64 ReadIdCommitted; - TSet<ui64> NextCommits; - TDisjointIntervalTree<ui64> NextRanges; - - std::deque<TOffsetInfo> Offsets; - - ui64 WTime; - bool InitDone; - bool StartReading; - bool AllPrepareInited; - bool FirstInit; + const TString ClientId; + const TString ClientPath; + const ui64 Cookie; + const TString Session; + const TString ClientDC; + + const TPartitionId Partition; + const ui32 Generation; + const ui32 Step; + + const ui64 TabletID; + + ui64 ReadOffset; + ui64 ClientReadOffset; + ui64 ClientCommitOffset; + bool ClientVerifyReadOffset; + ui64 CommittedOffset; + ui64 WriteTimestampEstimateMs; + + ui64 ReadIdToResponse; + ui64 ReadIdCommitted; + TSet<ui64> NextCommits; + TDisjointIntervalTree<ui64> NextRanges; + + std::deque<TOffsetInfo> Offsets; + + ui64 WTime; + bool InitDone; + bool StartReading; + bool AllPrepareInited; + bool FirstInit; TActorId PipeClient; - ui32 PipeGeneration; - bool RequestInfly; - NKikimrClient::TPersQueueRequest CurrentRequest; - - ui64 EndOffset; - ui64 SizeLag; - + ui32 PipeGeneration; + bool RequestInfly; + NKikimrClient::TPersQueueRequest CurrentRequest; + + ui64 EndOffset; + ui64 SizeLag; + TString ReadGuid; // empty if not reading - bool NeedRelease; - bool Released; - - std::set<ui64> WaitDataInfly; - ui64 WaitDataCookie; - bool WaitForData; - - bool LockCounted; - struct TCommitInfo { - ui64 StartReadId; - ui64 Offset; - TInstant StartTime; - }; - - std::deque<std::pair<ui64, TCommitInfo>> CommitsInfly; //ReadId, Offset - - TReadSessionActor::TTopicCounters Counters; - - bool CommitsDisabled; - ui64 CommitCookie; -}; - - + bool NeedRelease; + bool Released; + + std::set<ui64> WaitDataInfly; + ui64 WaitDataCookie; + bool WaitForData; + + bool LockCounted; + struct TCommitInfo { + ui64 StartReadId; + ui64 Offset; + TInstant StartTime; + }; + + std::deque<std::pair<ui64, TCommitInfo>> CommitsInfly; //ReadId, Offset + + TReadSessionActor::TTopicCounters Counters; + + bool CommitsDisabled; + ui64 CommitCookie; +}; + + TReadSessionActor::TReadSessionActor( NKikimr::NGRpcService::TEvStreamPQReadRequest* request, const ui64 cookie, const TActorId& schemeCache, const TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const TMaybe<TString> clientDC, const NPersQueue::TTopicsListController& topicsHandler ) - : Request(request) - , ClientDC(clientDC ? *clientDC : "other") - , StartTimestamp(TInstant::Now()) - , SchemeCache(schemeCache) - , NewSchemeCache(newSchemeCache) - , AuthInitActor() - , ClientId() - , ClientPath() - , Session() - , CommitsDisabled(false) - , BalancersInitStarted(false) - , InitDone(false) + : Request(request) + , ClientDC(clientDC ? *clientDC : "other") + , StartTimestamp(TInstant::Now()) + , SchemeCache(schemeCache) + , NewSchemeCache(newSchemeCache) + , AuthInitActor() + , ClientId() + , ClientPath() + , Session() + , CommitsDisabled(false) + , BalancersInitStarted(false) + , InitDone(false) , MaxReadMessagesCount(0) , MaxReadSize(0) , MaxTimeLagMs(0) , ReadTimestampMs(0) - , ForceACLCheck(false) - , RequestNotChecked(true) - , LastACLCheckTimestamp(TInstant::Zero()) - , NextAssignId(1) - , ReadOnlyLocal(false) - , Cookie(cookie) - , Counters(counters) - , BytesInflight_(0) - , RequestedBytes(0) - , ReadsInfly(0) + , ForceACLCheck(false) + , RequestNotChecked(true) + , LastACLCheckTimestamp(TInstant::Zero()) + , NextAssignId(1) + , ReadOnlyLocal(false) + , Cookie(cookie) + , Counters(counters) + , BytesInflight_(0) + , RequestedBytes(0) + , ReadsInfly(0) , TopicsHandler(topicsHandler) -{ - Y_ASSERT(Request); - ++(*GetServiceCounters(Counters, "pqproxy|readSession")->GetCounter("SessionsCreatedTotal", true)); -} - - -TReadSessionActor::~TReadSessionActor() = default; - - -void TReadSessionActor::Bootstrap(const TActorContext& ctx) { - Y_VERIFY(Request); - Request->GetStreamCtx()->Attach(ctx.SelfID); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } +{ + Y_ASSERT(Request); + ++(*GetServiceCounters(Counters, "pqproxy|readSession")->GetCounter("SessionsCreatedTotal", true)); +} + + +TReadSessionActor::~TReadSessionActor() = default; + + +void TReadSessionActor::Bootstrap(const TActorContext& ctx) { + Y_VERIFY(Request); + Request->GetStreamCtx()->Attach(ctx.SelfID); + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return; + } //auto& pqConfig = AppData(ctx)->PQConfig; //bool noDcMode = !pqConfig.GetTopicsAreFirstClassCitizen(); // ToDo[migration] - add multicluster mode //ConverterFactory = MakeHolder<NPersQueue::TTopicNamesConverterFactory>(noDcMode, pqConfig.GetRoot()); - StartTime = ctx.Now(); - - Become(&TThis::StateFunc); -} - -void TReadSessionActor::HandleDone(const TActorContext& ctx) { - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc closed"); - Die(ctx); -} - - -void TReadSessionActor::Handle(IContext::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { - - auto& request = ev->Get()->Record; - auto token = request.token(); - request.set_token(""); - - if (!token.empty()) { //TODO refreshtoken here - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvAuth(token)); - } - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read done: success: " << ev->Get()->Success << " data: " << request); - - if (!ev->Get()->Success) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed"); - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvDone()); - return; - } + StartTime = ctx.Now(); + + Become(&TThis::StateFunc); +} + +void TReadSessionActor::HandleDone(const TActorContext& ctx) { + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc closed"); + Die(ctx); +} + + +void TReadSessionActor::Handle(IContext::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { + + auto& request = ev->Get()->Record; + auto token = request.token(); + request.set_token(""); + + if (!token.empty()) { //TODO refreshtoken here + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvAuth(token)); + } + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read done: success: " << ev->Get()->Success << " data: " << request); + + if (!ev->Get()->Success) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed"); + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvDone()); + return; + } auto converterFactory = TopicsHandler.GetConverterFactory(); auto MakePartitionId = [&](auto& request) { auto converter = converterFactory->MakeTopicNameConverter( @@ -332,388 +332,388 @@ void TReadSessionActor::Handle(IContext::TEvReadFinished::TPtr& ev, const TActor const ui64 assignId = request.assign_id(); return TPartitionId{converter, partition, assignId}; }; - switch (request.request_case()) { + switch (request.request_case()) { case MigrationStreamingReadClientMessage::kInitRequest: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReadInit(request, Request->GetStreamCtx()->GetPeerName())); - break; - } + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReadInit(request, Request->GetStreamCtx()->GetPeerName())); + break; + } case MigrationStreamingReadClientMessage::kStatus: { //const auto& req = request.status(); ctx.Send(ctx.SelfID, new TEvPQProxy::TEvGetStatus(MakePartitionId(request.status()))); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; - - } + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return; + } + break; + + } case MigrationStreamingReadClientMessage::kRead: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvRead()); // Proto read message have no parameters - break; - } + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvRead()); // Proto read message have no parameters + break; + } case MigrationStreamingReadClientMessage::kReleased: { //const auto& req = request.released(); ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(MakePartitionId(request.released()))); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; - - } + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return; + } + break; + + } case MigrationStreamingReadClientMessage::kStartRead: { - const auto& req = request.start_read(); + const auto& req = request.start_read(); + + const ui64 readOffset = req.read_offset(); + const ui64 commitOffset = req.commit_offset(); + const bool verifyReadOffset = req.verify_read_offset(); - const ui64 readOffset = req.read_offset(); - const ui64 commitOffset = req.commit_offset(); - const bool verifyReadOffset = req.verify_read_offset(); - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvStartRead(MakePartitionId(request.start_read()), readOffset, commitOffset, verifyReadOffset)); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; - } + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return; + } + break; + } case MigrationStreamingReadClientMessage::kCommit: { - const auto& req = request.commit(); - - if (!req.cookies_size() && !RangesMode) { - CloseSession(TStringBuilder() << "can't commit without cookies", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - if (RangesMode && !req.offset_ranges_size()) { - CloseSession(TStringBuilder() << "can't commit without offsets", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - - } - - THashMap<ui64, TEvPQProxy::TCommitCookie> commitCookie; - THashMap<ui64, TEvPQProxy::TCommitRange> commitRange; - - for (auto& c: req.cookies()) { - commitCookie[c.assign_id()].Cookies.push_back(c.partition_cookie()); - } - for (auto& c: req.offset_ranges()) { - commitRange[c.assign_id()].Ranges.push_back(std::make_pair(c.start_offset(), c.end_offset())); - } - - for (auto& c : commitCookie) { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitCookie(c.first, std::move(c.second))); - } - - for (auto& c : commitRange) { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitRange(c.first, std::move(c.second))); - } - - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; - } - - default: { - CloseSession(TStringBuilder() << "unsupported request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - break; - } - } -} - - -void TReadSessionActor::Handle(IContext::TEvWriteFinished::TPtr& ev, const TActorContext& ctx) { - if (!ev->Get()->Success) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - } - Y_VERIFY(!ActiveWrites.empty()); - ui64 sz = ActiveWrites.front(); - ActiveWrites.pop(); - Y_VERIFY(BytesInflight_ >= sz); - BytesInflight_ -= sz; - if (BytesInflight) (*BytesInflight) -= sz; - - ProcessReads(ctx); -} - - -void TReadSessionActor::Die(const TActorContext& ctx) { - - ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill()); - - for (auto& p : Partitions) { - ctx.Send(p.second.Actor, new TEvents::TEvPoisonPill()); - - if (!p.second.Released) { + const auto& req = request.commit(); + + if (!req.cookies_size() && !RangesMode) { + CloseSession(TStringBuilder() << "can't commit without cookies", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + if (RangesMode && !req.offset_ranges_size()) { + CloseSession(TStringBuilder() << "can't commit without offsets", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + + } + + THashMap<ui64, TEvPQProxy::TCommitCookie> commitCookie; + THashMap<ui64, TEvPQProxy::TCommitRange> commitRange; + + for (auto& c: req.cookies()) { + commitCookie[c.assign_id()].Cookies.push_back(c.partition_cookie()); + } + for (auto& c: req.offset_ranges()) { + commitRange[c.assign_id()].Ranges.push_back(std::make_pair(c.start_offset(), c.end_offset())); + } + + for (auto& c : commitCookie) { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitCookie(c.first, std::move(c.second))); + } + + for (auto& c : commitRange) { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitRange(c.first, std::move(c.second))); + } + + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return; + } + break; + } + + default: { + CloseSession(TStringBuilder() << "unsupported request", PersQueue::ErrorCode::BAD_REQUEST, ctx); + break; + } + } +} + + +void TReadSessionActor::Handle(IContext::TEvWriteFinished::TPtr& ev, const TActorContext& ctx) { + if (!ev->Get()->Success) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + } + Y_VERIFY(!ActiveWrites.empty()); + ui64 sz = ActiveWrites.front(); + ActiveWrites.pop(); + Y_VERIFY(BytesInflight_ >= sz); + BytesInflight_ -= sz; + if (BytesInflight) (*BytesInflight) -= sz; + + ProcessReads(ctx); +} + + +void TReadSessionActor::Die(const TActorContext& ctx) { + + ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill()); + + for (auto& p : Partitions) { + ctx.Send(p.second.Actor, new TEvents::TEvPoisonPill()); + + if (!p.second.Released) { auto it = TopicCounters.find(p.second.Partition.TopicConverter->GetClientsideName()); - Y_VERIFY(it != TopicCounters.end()); - it->second.PartitionsInfly.Dec(); - it->second.PartitionsReleased.Inc(); - if (p.second.Releasing) - it->second.PartitionsToBeReleased.Dec(); - } - } - - for (auto& t : Topics) { - if (t.second.PipeClient) - NTabletPipe::CloseClient(ctx, t.second.PipeClient); - } - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " is DEAD"); - - if (SessionsActive) { - --(*SessionsActive); - } - if (BytesInflight) { - (*BytesInflight) -= BytesInflight_; - } - if (SessionsActive) { //PartsPerSession is inited too - PartsPerSession.DecFor(Partitions.size(), 1); - } - - ctx.Send(GetPQReadServiceActorID(), new TEvPQProxy::TEvSessionDead(Cookie)); - - TActorBootstrapped<TReadSessionActor>::Die(ctx); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext& ctx) { - CloseSession(TStringBuilder() << "Reads done signal - closing everything", PersQueue::ErrorCode::OK, ctx); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const TActorContext& ctx) { - RequestNotChecked = true; - - if (CommitsDisabled) { - CloseSession("commits in session are disabled by client option", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - const ui64& assignId = ev->Get()->AssignId; - auto it = Partitions.find(assignId); - if (it == Partitions.end()) //stale commit - ignore it - return; - - for (auto& c : ev->Get()->CommitInfo.Cookies) { - if(RangesMode) { - CloseSession("Commits cookies in ranges commit mode is illegal", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - it->second.NextCommits.insert(c); - } - - ctx.Send(it->second.Actor, new TEvPQProxy::TEvCommitCookie(ev->Get()->AssignId, std::move(ev->Get()->CommitInfo))); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const TActorContext& ctx) { - RequestNotChecked = true; - - if (CommitsDisabled) { - CloseSession("commits in session are disabled by client option", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - const ui64& assignId = ev->Get()->AssignId; - auto it = Partitions.find(assignId); - if (it == Partitions.end()) //stale commit - ignore it - return; - - for (auto& c : ev->Get()->CommitInfo.Ranges) { - if(!RangesMode) { - CloseSession("Commits ranges in cookies commit mode is illegal", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - if (c.first >= c.second || it->second.NextRanges.Intersects(c.first, c.second) || c.first < it->second.Offset) { - CloseSession(TStringBuilder() << "Offsets range [" << c.first << ", " << c.second << ") has already committed offsets, double committing is forbiden; or incorrect", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - - } - it->second.NextRanges.InsertInterval(c.first, c.second); - } - - ctx.Send(it->second.Actor, new TEvPQProxy::TEvCommitRange(ev->Get()->AssignId, std::move(ev->Get()->CommitInfo))); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx) { - ProcessAuth(ev->Get()->Auth, ctx); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const TActorContext& ctx) { - RequestNotChecked = true; - - auto it = Partitions.find(ev->Get()->Partition.AssignId); - if (it == Partitions.end()) { - return; - } - - if (it == Partitions.end() || it->second.Releasing) { - //do nothing - already released partition - LOG_WARN_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got NOTACTUAL StartRead from client for " << ev->Get()->Partition - << " at offset " << ev->Get()->ReadOffset); - return; - } - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got StartRead from client for " << ev->Get()->Partition - << " at readOffset " << ev->Get()->ReadOffset << " commitOffset " << ev->Get()->CommitOffset); - - //proxy request to partition - allow initing - //TODO: add here VerifyReadOffset too and check it againts Committed position - ctx.Send(it->second.Actor, new TEvPQProxy::TEvLockPartition(ev->Get()->ReadOffset, ev->Get()->CommitOffset, ev->Get()->VerifyReadOffset, true)); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvReleased::TPtr& ev, const TActorContext& ctx) { - RequestNotChecked = true; - - auto it = Partitions.find(ev->Get()->Partition.AssignId); - if (it == Partitions.end()) { - return; - } - if (!it->second.Releasing) { - CloseSession(TStringBuilder() << "Release of partition that is not requested for release is forbiden for " << it->second.Partition, PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - - } - Y_VERIFY(it->second.LockSent); - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got Released from client for " << ev->Get()->Partition); - - ReleasePartition(it, true, ctx); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const TActorContext& ctx) { - - auto it = Partitions.find(ev->Get()->Partition.AssignId); - if (it == Partitions.end()) { - // Ignore request - client asking status after releasing of partition. - return; - } - if (!it->second.Releasing) { - // Ignore request - client asking status after releasing of partition. - return; - - } - ctx.Send(it->second.Actor, new TEvPQProxy::TEvGetStatus(ev->Get()->Partition)); -} - - - -void TReadSessionActor::DropPartition(THashMap<ui64, TPartitionActorInfo>::iterator it, const TActorContext& ctx) { - ctx.Send(it->second.Actor, new TEvents::TEvPoisonPill()); - bool res = ActualPartitionActors.erase(it->second.Actor); - Y_VERIFY(res); - + Y_VERIFY(it != TopicCounters.end()); + it->second.PartitionsInfly.Dec(); + it->second.PartitionsReleased.Inc(); + if (p.second.Releasing) + it->second.PartitionsToBeReleased.Dec(); + } + } + + for (auto& t : Topics) { + if (t.second.PipeClient) + NTabletPipe::CloseClient(ctx, t.second.PipeClient); + } + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " is DEAD"); + + if (SessionsActive) { + --(*SessionsActive); + } + if (BytesInflight) { + (*BytesInflight) -= BytesInflight_; + } + if (SessionsActive) { //PartsPerSession is inited too + PartsPerSession.DecFor(Partitions.size(), 1); + } + + ctx.Send(GetPQReadServiceActorID(), new TEvPQProxy::TEvSessionDead(Cookie)); + + TActorBootstrapped<TReadSessionActor>::Die(ctx); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext& ctx) { + CloseSession(TStringBuilder() << "Reads done signal - closing everything", PersQueue::ErrorCode::OK, ctx); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const TActorContext& ctx) { + RequestNotChecked = true; + + if (CommitsDisabled) { + CloseSession("commits in session are disabled by client option", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + const ui64& assignId = ev->Get()->AssignId; + auto it = Partitions.find(assignId); + if (it == Partitions.end()) //stale commit - ignore it + return; + + for (auto& c : ev->Get()->CommitInfo.Cookies) { + if(RangesMode) { + CloseSession("Commits cookies in ranges commit mode is illegal", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + it->second.NextCommits.insert(c); + } + + ctx.Send(it->second.Actor, new TEvPQProxy::TEvCommitCookie(ev->Get()->AssignId, std::move(ev->Get()->CommitInfo))); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const TActorContext& ctx) { + RequestNotChecked = true; + + if (CommitsDisabled) { + CloseSession("commits in session are disabled by client option", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + const ui64& assignId = ev->Get()->AssignId; + auto it = Partitions.find(assignId); + if (it == Partitions.end()) //stale commit - ignore it + return; + + for (auto& c : ev->Get()->CommitInfo.Ranges) { + if(!RangesMode) { + CloseSession("Commits ranges in cookies commit mode is illegal", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + if (c.first >= c.second || it->second.NextRanges.Intersects(c.first, c.second) || c.first < it->second.Offset) { + CloseSession(TStringBuilder() << "Offsets range [" << c.first << ", " << c.second << ") has already committed offsets, double committing is forbiden; or incorrect", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + + } + it->second.NextRanges.InsertInterval(c.first, c.second); + } + + ctx.Send(it->second.Actor, new TEvPQProxy::TEvCommitRange(ev->Get()->AssignId, std::move(ev->Get()->CommitInfo))); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx) { + ProcessAuth(ev->Get()->Auth, ctx); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const TActorContext& ctx) { + RequestNotChecked = true; + + auto it = Partitions.find(ev->Get()->Partition.AssignId); + if (it == Partitions.end()) { + return; + } + + if (it == Partitions.end() || it->second.Releasing) { + //do nothing - already released partition + LOG_WARN_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got NOTACTUAL StartRead from client for " << ev->Get()->Partition + << " at offset " << ev->Get()->ReadOffset); + return; + } + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got StartRead from client for " << ev->Get()->Partition + << " at readOffset " << ev->Get()->ReadOffset << " commitOffset " << ev->Get()->CommitOffset); + + //proxy request to partition - allow initing + //TODO: add here VerifyReadOffset too and check it againts Committed position + ctx.Send(it->second.Actor, new TEvPQProxy::TEvLockPartition(ev->Get()->ReadOffset, ev->Get()->CommitOffset, ev->Get()->VerifyReadOffset, true)); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvReleased::TPtr& ev, const TActorContext& ctx) { + RequestNotChecked = true; + + auto it = Partitions.find(ev->Get()->Partition.AssignId); + if (it == Partitions.end()) { + return; + } + if (!it->second.Releasing) { + CloseSession(TStringBuilder() << "Release of partition that is not requested for release is forbiden for " << it->second.Partition, PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + + } + Y_VERIFY(it->second.LockSent); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got Released from client for " << ev->Get()->Partition); + + ReleasePartition(it, true, ctx); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const TActorContext& ctx) { + + auto it = Partitions.find(ev->Get()->Partition.AssignId); + if (it == Partitions.end()) { + // Ignore request - client asking status after releasing of partition. + return; + } + if (!it->second.Releasing) { + // Ignore request - client asking status after releasing of partition. + return; + + } + ctx.Send(it->second.Actor, new TEvPQProxy::TEvGetStatus(ev->Get()->Partition)); +} + + + +void TReadSessionActor::DropPartition(THashMap<ui64, TPartitionActorInfo>::iterator it, const TActorContext& ctx) { + ctx.Send(it->second.Actor, new TEvents::TEvPoisonPill()); + bool res = ActualPartitionActors.erase(it->second.Actor); + Y_VERIFY(res); + if (--NumPartitionsFromTopic[it->second.Partition.TopicConverter->GetClientsideName()] == 0) { bool res = TopicCounters.erase(it->second.Partition.TopicConverter->GetClientsideName()); - Y_VERIFY(res); - } - - PartsPerSession.DecFor(Partitions.size(), 1); - BalancerGeneration.erase(it->first); - Partitions.erase(it); - PartsPerSession.IncFor(Partitions.size(), 1); -} - - -void TReadSessionActor::Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const TActorContext& ctx) { - - Y_VERIFY(!CommitsDisabled); - - if (!ActualPartitionActor(ev->Sender)) - return; - - ui64 assignId = ev->Get()->AssignId; - - auto it = Partitions.find(assignId); - Y_VERIFY(it != Partitions.end()); - Y_VERIFY(it->second.Offset < ev->Get()->Offset); - it->second.NextRanges.EraseInterval(it->second.Offset, ev->Get()->Offset); - - - if (ev->Get()->StartCookie == Max<ui64>()) //means commit at start - return; - + Y_VERIFY(res); + } + + PartsPerSession.DecFor(Partitions.size(), 1); + BalancerGeneration.erase(it->first); + Partitions.erase(it); + PartsPerSession.IncFor(Partitions.size(), 1); +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const TActorContext& ctx) { + + Y_VERIFY(!CommitsDisabled); + + if (!ActualPartitionActor(ev->Sender)) + return; + + ui64 assignId = ev->Get()->AssignId; + + auto it = Partitions.find(assignId); + Y_VERIFY(it != Partitions.end()); + Y_VERIFY(it->second.Offset < ev->Get()->Offset); + it->second.NextRanges.EraseInterval(it->second.Offset, ev->Get()->Offset); + + + if (ev->Get()->StartCookie == Max<ui64>()) //means commit at start + return; + MigrationStreamingReadServerMessage result; - result.set_status(Ydb::StatusIds::SUCCESS); - if (!RangesMode) { - for (ui64 i = ev->Get()->StartCookie; i <= ev->Get()->LastCookie; ++i) { - auto c = result.mutable_committed()->add_cookies(); - c->set_partition_cookie(i); - c->set_assign_id(assignId); - it->second.NextCommits.erase(i); - it->second.ReadIdCommitted = i; - } - } else { - auto c = result.mutable_committed()->add_offset_ranges(); - c->set_assign_id(assignId); - c->set_start_offset(it->second.Offset); - c->set_end_offset(ev->Get()->Offset); - } - - it->second.Offset = ev->Get()->Offset; - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " replying for commits from assignId " << assignId << " from " << ev->Get()->StartCookie << " to " << ev->Get()->LastCookie << " to offset " << it->second.Offset); - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } -} - - -void TReadSessionActor::Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const TActorContext& ctx) { - THolder<TEvPQProxy::TEvReadSessionStatusResponse> result(new TEvPQProxy::TEvReadSessionStatusResponse()); - for (auto& p : Partitions) { - auto part = result->Record.AddPartition(); + result.set_status(Ydb::StatusIds::SUCCESS); + if (!RangesMode) { + for (ui64 i = ev->Get()->StartCookie; i <= ev->Get()->LastCookie; ++i) { + auto c = result.mutable_committed()->add_cookies(); + c->set_partition_cookie(i); + c->set_assign_id(assignId); + it->second.NextCommits.erase(i); + it->second.ReadIdCommitted = i; + } + } else { + auto c = result.mutable_committed()->add_offset_ranges(); + c->set_assign_id(assignId); + c->set_start_offset(it->second.Offset); + c->set_end_offset(ev->Get()->Offset); + } + + it->second.Offset = ev->Get()->Offset; + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " replying for commits from assignId " << assignId << " from " << ev->Get()->StartCookie << " to " << ev->Get()->LastCookie << " to offset " << it->second.Offset); + if (!WriteResponse(std::move(result))) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + return; + } +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const TActorContext& ctx) { + THolder<TEvPQProxy::TEvReadSessionStatusResponse> result(new TEvPQProxy::TEvReadSessionStatusResponse()); + for (auto& p : Partitions) { + auto part = result->Record.AddPartition(); part->SetTopic(p.second.Partition.TopicConverter->GetPrimaryPath()); - part->SetPartition(p.second.Partition.Partition); - part->SetAssignId(p.second.Partition.AssignId); - for (auto& c : p.second.NextCommits) { - part->AddNextCommits(c); - } - part->SetReadIdCommitted(p.second.ReadIdCommitted); - part->SetLastReadId(p.second.ReadIdToResponse - 1); - part->SetTimestampMs(p.second.AssignTimestamp.MilliSeconds()); - } - result->Record.SetSession(Session); - result->Record.SetTimestamp(StartTimestamp.MilliSeconds()); - - result->Record.SetClientNode(PeerName); - result->Record.SetProxyNodeId(ctx.SelfID.NodeId()); - - ctx.Send(ev->Sender, result.Release()); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const TActorContext& ctx) { - - THolder<TEvPQProxy::TEvReadInit> event(ev->Release()); - - if (!Topics.empty()) { - //answer error - CloseSession("got second init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - + part->SetPartition(p.second.Partition.Partition); + part->SetAssignId(p.second.Partition.AssignId); + for (auto& c : p.second.NextCommits) { + part->AddNextCommits(c); + } + part->SetReadIdCommitted(p.second.ReadIdCommitted); + part->SetLastReadId(p.second.ReadIdToResponse - 1); + part->SetTimestampMs(p.second.AssignTimestamp.MilliSeconds()); + } + result->Record.SetSession(Session); + result->Record.SetTimestamp(StartTimestamp.MilliSeconds()); + + result->Record.SetClientNode(PeerName); + result->Record.SetProxyNodeId(ctx.SelfID.NodeId()); + + ctx.Send(ev->Sender, result.Release()); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const TActorContext& ctx) { + + THolder<TEvPQProxy::TEvReadInit> event(ev->Release()); + + if (!Topics.empty()) { + //answer error + CloseSession("got second init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + const auto& init = event->Request.init_request(); - + if (!init.topics_read_settings_size()) { - CloseSession("no topics in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - + CloseSession("no topics in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + if (init.consumer().empty()) { - CloseSession("no consumer in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - + CloseSession("no consumer in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + ClientId = NPersQueue::ConvertNewConsumerName(init.consumer(), ctx); ClientPath = init.consumer(); - - TStringBuilder session; - session << ClientPath << "_" << ctx.SelfID.NodeId() << "_" << Cookie << "_" << TAppData::RandomProvider->GenRand64() << "_v1"; - Session = session; + + TStringBuilder session; + session << ClientPath << "_" << ctx.SelfID.NodeId() << "_" << Cookie << "_" << TAppData::RandomProvider->GenRand64() << "_v1"; + Session = session; CommitsDisabled = false; - RangesMode = init.ranges_mode(); - - MaxReadMessagesCount = NormalizeMaxReadMessagesCount(init.read_params().max_read_messages_count()); - MaxReadSize = NormalizeMaxReadSize(init.read_params().max_read_size()); + RangesMode = init.ranges_mode(); + + MaxReadMessagesCount = NormalizeMaxReadMessagesCount(init.read_params().max_read_messages_count()); + MaxReadSize = NormalizeMaxReadSize(init.read_params().max_read_size()); if (init.max_lag_duration_ms() < 0) { CloseSession("max_lag_duration_ms must be nonnegative number", PersQueue::ErrorCode::BAD_REQUEST, ctx); return; @@ -725,19 +725,19 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const TActorCo } ReadTimestampMs = static_cast<ui64>(init.start_from_written_at_ms()); - PeerName = event->PeerName; - - ReadOnlyLocal = init.read_only_original(); - + PeerName = event->PeerName; + + ReadOnlyLocal = init.read_only_original(); + for (const auto& topic : init.topics_read_settings()) { auto converter = TopicsHandler.GetConverterFactory()->MakeTopicNameConverter( topic.topic(), TString(), Request->GetDatabaseName().GetOrElse(TString()) ); const auto topicName = converter->GetModernName(); if (topicName.empty()) { - CloseSession("empty topic in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } + CloseSession("empty topic in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } if (topic.start_from_written_at_ms() < 0) { CloseSession("start_from_written_at_ms must be nonnegative number", PersQueue::ErrorCode::BAD_REQUEST, ctx); return; @@ -753,99 +753,99 @@ void TReadSessionActor::Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const TActorCo return; } TopicGroups[topicName].push_back(static_cast<ui32>(pg)); - } + } ReadFromTimestamp[topicName] = topic.start_from_written_at_ms(); - } - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " init: " << event->Request << " from " << PeerName); - - SetupCounters(); - - if (Request->GetInternalToken().empty()) { - if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { - CloseSession("Unauthenticated access is forbidden, please provide credentials", PersQueue::ErrorCode::ACCESS_DENIED, ctx); - return; - } - } else { - Y_VERIFY(Request->GetYdbToken()); - Auth = *(Request->GetYdbToken()); - Token = new NACLib::TUserToken(Request->GetInternalToken()); - } + } + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " init: " << event->Request << " from " << PeerName); + + SetupCounters(); + + if (Request->GetInternalToken().empty()) { + if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { + CloseSession("Unauthenticated access is forbidden, please provide credentials", PersQueue::ErrorCode::ACCESS_DENIED, ctx); + return; + } + } else { + Y_VERIFY(Request->GetYdbToken()); + Auth = *(Request->GetYdbToken()); + Token = new NACLib::TUserToken(Request->GetInternalToken()); + } AuthInitActor = ctx.Register(new TReadInitAndAuthActor( ctx, ctx.SelfID, ClientId, Cookie, Session, SchemeCache, NewSchemeCache, Counters, Token, TopicsHandler.GetReadTopicsList(TopicsToResolve, ReadOnlyLocal, Request->GetDatabaseName().GetOrElse(TString())) )); - - - auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); - Aggr = {{{{"Account", ClientPath.substr(0, ClientPath.find("/"))}}, {"total"}}}; - + + + auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); + Aggr = {{{{"Account", ClientPath.substr(0, ClientPath.find("/"))}}, {"total"}}}; + SLIErrors = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsError"}, true, "sensor", false); SLITotal = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsTotal"}, true, "sensor", false); - SLITotal.Inc(); - + SLITotal.Inc(); + } - + void TReadSessionActor::RegisterSession(const TActorId& pipe, const TString& topic, const TVector<ui32>& groups, const TActorContext& ctx) -{ - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " register session to " << topic); - THolder<TEvPersQueue::TEvRegisterReadSession> request; - request.Reset(new TEvPersQueue::TEvRegisterReadSession); - auto& req = request->Record; - req.SetSession(Session); - req.SetClientNode(PeerName); +{ + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " register session to " << topic); + THolder<TEvPersQueue::TEvRegisterReadSession> request; + request.Reset(new TEvPersQueue::TEvRegisterReadSession); + auto& req = request->Record; + req.SetSession(Session); + req.SetClientNode(PeerName); ActorIdToProto(pipe, req.MutablePipeClient()); - req.SetClientId(ClientId); - - for (ui32 i = 0; i < groups.size(); ++i) { - req.AddGroups(groups[i]); - } - - NTabletPipe::SendData(ctx, pipe, request.Release()); -} - -void TReadSessionActor::RegisterSessions(const TActorContext& ctx) { - InitDone = true; - + req.SetClientId(ClientId); + + for (ui32 i = 0; i < groups.size(); ++i) { + req.AddGroups(groups[i]); + } + + NTabletPipe::SendData(ctx, pipe, request.Release()); +} + +void TReadSessionActor::RegisterSessions(const TActorContext& ctx) { + InitDone = true; + for (auto& t : Topics) { auto& topic = t.first; - RegisterSession(t.second.PipeClient, topic, t.second.Groups, ctx); + RegisterSession(t.second.PipeClient, topic, t.second.Groups, ctx); NumPartitionsFromTopic[t.second.TopicNameConverter->GetClientsideName()] = 0; - } -} - - -void TReadSessionActor::SetupCounters() -{ - auto subGroup = GetServiceCounters(Counters, "pqproxy|readSession")->GetSubgroup("Client", ClientId)->GetSubgroup("ConsumerPath", ClientPath); - SessionsCreated = subGroup->GetExpiringCounter("SessionsCreated", true); - SessionsActive = subGroup->GetExpiringCounter("SessionsActive", false); - Errors = subGroup->GetExpiringCounter("Errors", true); - PipeReconnects = subGroup->GetExpiringCounter("PipeReconnects", true); - - BytesInflight = subGroup->GetExpiringCounter("BytesInflight", false); - - PartsPerSession = NKikimr::NPQ::TPercentileCounter(subGroup->GetSubgroup("sensor", "PartsPerSession"), {}, {}, "Count", + } +} + + +void TReadSessionActor::SetupCounters() +{ + auto subGroup = GetServiceCounters(Counters, "pqproxy|readSession")->GetSubgroup("Client", ClientId)->GetSubgroup("ConsumerPath", ClientPath); + SessionsCreated = subGroup->GetExpiringCounter("SessionsCreated", true); + SessionsActive = subGroup->GetExpiringCounter("SessionsActive", false); + Errors = subGroup->GetExpiringCounter("Errors", true); + PipeReconnects = subGroup->GetExpiringCounter("PipeReconnects", true); + + BytesInflight = subGroup->GetExpiringCounter("BytesInflight", false); + + PartsPerSession = NKikimr::NPQ::TPercentileCounter(subGroup->GetSubgroup("sensor", "PartsPerSession"), {}, {}, "Count", TVector<std::pair<ui64, TString>>{{1, "1"}, {2, "2"}, {5, "5"}, - {10, "10"}, {20, "20"}, {50, "50"}, {70, "70"}, - {100, "100"}, {150, "150"}, {300,"300"}, {99999999, "99999999"}}, false); - - ++(*SessionsCreated); - ++(*SessionsActive); - PartsPerSession.IncFor(Partitions.size(), 1); //for 0 - -} - - -void TReadSessionActor::SetupTopicCounters(const TString& topic) -{ - auto& topicCounters = TopicCounters[topic]; + {10, "10"}, {20, "20"}, {50, "50"}, {70, "70"}, + {100, "100"}, {150, "150"}, {300,"300"}, {99999999, "99999999"}}, false); + + ++(*SessionsCreated); + ++(*SessionsActive); + PartsPerSession.IncFor(Partitions.size(), 1); //for 0 + +} + + +void TReadSessionActor::SetupTopicCounters(const TString& topic) +{ + auto& topicCounters = TopicCounters[topic]; auto subGroup = GetServiceCounters(Counters, "pqproxy|readSession"); -//client/consumerPath Account/Producer OriginDC Topic/TopicPath - TVector<NPQ::TLabelsInfo> aggr = NKikimr::NPQ::GetLabels(topic); - TVector<std::pair<TString, TString>> cons = {{"Client", ClientId}, {"ConsumerPath", ClientPath}}; - +//client/consumerPath Account/Producer OriginDC Topic/TopicPath + TVector<NPQ::TLabelsInfo> aggr = NKikimr::NPQ::GetLabels(topic); + TVector<std::pair<TString, TString>> cons = {{"Client", ClientId}, {"ConsumerPath", ClientPath}}; + topicCounters.PartitionsLocked = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsLocked"}, true); topicCounters.PartitionsReleased = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsReleased"}, true); topicCounters.PartitionsToBeReleased = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsToBeReleased"}, false); @@ -854,12 +854,12 @@ void TReadSessionActor::SetupTopicCounters(const TString& topic) topicCounters.Errors = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsErrors"}, true); topicCounters.Commits = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"Commits"}, true); topicCounters.WaitsForData = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"WaitsForData"}, true); - - topicCounters.CommitLatency = CommitLatency; - topicCounters.SLIBigLatency = SLIBigLatency; - topicCounters.SLITotal = SLITotal; -} - + + topicCounters.CommitLatency = CommitLatency; + topicCounters.SLIBigLatency = SLIBigLatency; + topicCounters.SLITotal = SLITotal; +} + void TReadSessionActor::SetupTopicCounters(const TString& topic, const TString& cloudId, const TString& dbId, const TString& folderId) { @@ -883,106 +883,106 @@ void TReadSessionActor::SetupTopicCounters(const TString& topic, const TString& topicCounters.SLITotal = SLITotal; } -void TReadSessionActor::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) { - - LastACLCheckTimestamp = ctx.Now(); - +void TReadSessionActor::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) { + + LastACLCheckTimestamp = ctx.Now(); + LOG_INFO_S( ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth ok, got " << ev->Get()->TopicAndTablets.size() << " topics, init done " << InitDone ); - + AuthInitActor = TActorId(); - - if (!InitDone) { - ui32 initBorder = AppData(ctx)->PQConfig.GetReadInitLatencyBigMs(); - ui32 readBorder = AppData(ctx)->PQConfig.GetReadLatencyBigMs(); - ui32 readBorderFromDisk = AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs(); - - auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); - InitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "ReadInit", initBorder, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); - CommitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "Commit", AppData(ctx)->PQConfig.GetCommitLatencyBigMs(), {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + + if (!InitDone) { + ui32 initBorder = AppData(ctx)->PQConfig.GetReadInitLatencyBigMs(); + ui32 readBorder = AppData(ctx)->PQConfig.GetReadLatencyBigMs(); + ui32 readBorderFromDisk = AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs(); + + auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); + InitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "ReadInit", initBorder, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + CommitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "Commit", AppData(ctx)->PQConfig.GetCommitLatencyBigMs(), {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); SLIBigLatency = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsBigLatency"}, true, "sensor", false); - ReadLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "Read", readBorder, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); - ReadLatencyFromDisk = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "ReadFromDisk", readBorderFromDisk, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + ReadLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "Read", readBorder, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + ReadLatencyFromDisk = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "ReadFromDisk", readBorderFromDisk, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); SLIBigReadLatency = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"ReadBigLatency"}, true, "sensor", false); ReadsTotal = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"ReadsTotal"}, true, "sensor", false); - - ui32 initDurationMs = (ctx.Now() - StartTime).MilliSeconds(); - InitLatency.IncFor(initDurationMs, 1); - if (initDurationMs >= initBorder) { - SLIBigLatency.Inc(); - } - + + ui32 initDurationMs = (ctx.Now() - StartTime).MilliSeconds(); + InitLatency.IncFor(initDurationMs, 1); + if (initDurationMs >= initBorder) { + SLIBigLatency.Inc(); + } + MigrationStreamingReadServerMessage result; - result.set_status(Ydb::StatusIds::SUCCESS); - + result.set_status(Ydb::StatusIds::SUCCESS); + result.mutable_init_response()->set_session_id(Session); - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - - - Y_VERIFY(!BalancersInitStarted); - BalancersInitStarted = true; - + if (!WriteResponse(std::move(result))) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + return; + } + + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return; + } + + + Y_VERIFY(!BalancersInitStarted); + BalancersInitStarted = true; + for (auto& t : ev->Get()->TopicAndTablets) { // ToDo - return something from Init and Auth Actor (Full Path - ?) auto& topicHolder = Topics[t.TopicNameConverter->GetClientsideName()]; topicHolder.TabletID = t.TabletID; topicHolder.TopicNameConverter = t.TopicNameConverter; FullPathToConverter[t.TopicNameConverter->GetPrimaryPath()] = t.TopicNameConverter; - } - - for (auto& t : Topics) { - NTabletPipe::TClientConfig clientConfig; - - clientConfig.CheckAliveness = false; - - clientConfig.RetryPolicy = RetryPolicyForPipes; + } + + for (auto& t : Topics) { + NTabletPipe::TClientConfig clientConfig; + + clientConfig.CheckAliveness = false; + + clientConfig.RetryPolicy = RetryPolicyForPipes; t.second.PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, t.second.TabletID, clientConfig)); auto it = TopicGroups.find(t.second.TopicNameConverter->GetModernName()); - if (it != TopicGroups.end()) { - t.second.Groups = it->second; - } - } - - RegisterSessions(ctx); - - ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); - } else { - for (auto& t : ev->Get()->TopicAndTablets) { + if (it != TopicGroups.end()) { + t.second.Groups = it->second; + } + } + + RegisterSessions(ctx); + + ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); + } else { + for (auto& t : ev->Get()->TopicAndTablets) { if (Topics.find(t.TopicNameConverter->GetClientsideName()) == Topics.end()) { CloseSession( TStringBuilder() << "list of topics changed - new topic '" << t.TopicNameConverter->GetClientsideName() << "' found", PersQueue::ErrorCode::BAD_REQUEST, ctx ); - return; - } - } - } -} - - -void TReadSessionActor::Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { - - auto& record = ev->Get()->Record; - Y_VERIFY(record.GetSession() == Session); - Y_VERIFY(record.GetClientId() == ClientId); - + return; + } + } + } +} + + +void TReadSessionActor::Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { + + auto& record = ev->Get()->Record; + Y_VERIFY(record.GetSession() == Session); + Y_VERIFY(record.GetClientId() == ClientId); + TActorId pipe = ActorIdFromProto(record.GetPipeClient()); auto converterIter = FullPathToConverter.find(NPersQueue::NormalizeFullPath(record.GetPath())); - + if (converterIter.IsEnd()) { LOG_DEBUG_S( ctx, NKikimrServices::PQ_READ_PROXY, @@ -992,325 +992,325 @@ void TReadSessionActor::Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const T } const auto& topic = converterIter->second->GetClientsideName(); auto jt = Topics.find(topic); // ToDo - Check - if (jt == Topics.end() || pipe != jt->second.PipeClient) { //this is message from old version of pipe + if (jt == Topics.end() || pipe != jt->second.PipeClient) { //this is message from old version of pipe LOG_ALERT_S( ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " ignored ev lock for topic = " << topic << " path recognized, but topic is unknown, this is unexpected" ); - return; - } - + return; + } + if (NumPartitionsFromTopic[topic]++ == 0) { if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { SetupTopicCounters(topic, jt->second.CloudId, jt->second.DbId, jt->second.FolderId); } else { SetupTopicCounters(topic); } - } - + } + auto it = TopicCounters.find(topic); - Y_VERIFY(it != TopicCounters.end()); - - ui64 assignId = NextAssignId++; - BalancerGeneration[assignId] = {record.GetGeneration(), record.GetStep()}; + Y_VERIFY(it != TopicCounters.end()); + + ui64 assignId = NextAssignId++; + BalancerGeneration[assignId] = {record.GetGeneration(), record.GetStep()}; TPartitionId partitionId{converterIter->second, record.GetPartition(), assignId}; - - IActor* partitionActor = new TPartitionActor(ctx.SelfID, ClientId, ClientPath, Cookie, Session, partitionId, record.GetGeneration(), - record.GetStep(), record.GetTabletId(), it->second, CommitsDisabled, ClientDC); - + + IActor* partitionActor = new TPartitionActor(ctx.SelfID, ClientId, ClientPath, Cookie, Session, partitionId, record.GetGeneration(), + record.GetStep(), record.GetTabletId(), it->second, CommitsDisabled, ClientDC); + TActorId actorId = ctx.Register(partitionActor); - PartsPerSession.DecFor(Partitions.size(), 1); - Y_VERIFY(record.GetGeneration() > 0); - auto pp = Partitions.insert(std::make_pair(assignId, TPartitionActorInfo{actorId, partitionId, ctx})); - Y_VERIFY(pp.second); - PartsPerSession.IncFor(Partitions.size(), 1); - - bool res = ActualPartitionActors.insert(actorId).second; - Y_VERIFY(res); - - it->second.PartitionsLocked.Inc(); - it->second.PartitionsInfly.Inc(); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Assign: " << record); - + PartsPerSession.DecFor(Partitions.size(), 1); + Y_VERIFY(record.GetGeneration() > 0); + auto pp = Partitions.insert(std::make_pair(assignId, TPartitionActorInfo{actorId, partitionId, ctx})); + Y_VERIFY(pp.second); + PartsPerSession.IncFor(Partitions.size(), 1); + + bool res = ActualPartitionActors.insert(actorId).second; + Y_VERIFY(res); + + it->second.PartitionsLocked.Inc(); + it->second.PartitionsInfly.Inc(); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Assign: " << record); + ctx.Send(actorId, new TEvPQProxy::TEvLockPartition(0, 0, false, false)); -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) { - if (!ActualPartitionActor(ev->Sender)) - return; - - auto it = Partitions.find(ev->Get()->Partition.AssignId); - Y_VERIFY(it != Partitions.end()); - Y_VERIFY(!it->second.Releasing); // if releasing and no lock sent yet - then server must already release partition - - if (ev->Get()->Init) { - Y_VERIFY(!it->second.LockSent); - - it->second.LockSent = true; - it->second.Offset = ev->Get()->Offset; - +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) { + if (!ActualPartitionActor(ev->Sender)) + return; + + auto it = Partitions.find(ev->Get()->Partition.AssignId); + Y_VERIFY(it != Partitions.end()); + Y_VERIFY(!it->second.Releasing); // if releasing and no lock sent yet - then server must already release partition + + if (ev->Get()->Init) { + Y_VERIFY(!it->second.LockSent); + + it->second.LockSent = true; + it->second.Offset = ev->Get()->Offset; + MigrationStreamingReadServerMessage result; - result.set_status(Ydb::StatusIds::SUCCESS); - + result.set_status(Ydb::StatusIds::SUCCESS); + result.mutable_assigned()->mutable_topic()->set_path(ev->Get()->Partition.TopicConverter->GetModernName()); result.mutable_assigned()->set_cluster(ev->Get()->Partition.TopicConverter->GetCluster()); - result.mutable_assigned()->set_partition(ev->Get()->Partition.Partition); - result.mutable_assigned()->set_assign_id(it->first); - - result.mutable_assigned()->set_read_offset(ev->Get()->Offset); - result.mutable_assigned()->set_end_offset(ev->Get()->EndOffset); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " sending to client create partition stream event"); - - auto pp = it->second.Partition; - pp.AssignId = 0; - auto jt = PartitionToControlMessages.find(pp); - if (jt == PartitionToControlMessages.end()) { - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - } else { - Y_VERIFY(jt->second.Infly); - jt->second.ControlMessages.push_back(result); - } - } else { - Y_VERIFY(it->second.LockSent); - + result.mutable_assigned()->set_partition(ev->Get()->Partition.Partition); + result.mutable_assigned()->set_assign_id(it->first); + + result.mutable_assigned()->set_read_offset(ev->Get()->Offset); + result.mutable_assigned()->set_end_offset(ev->Get()->EndOffset); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " sending to client create partition stream event"); + + auto pp = it->second.Partition; + pp.AssignId = 0; + auto jt = PartitionToControlMessages.find(pp); + if (jt == PartitionToControlMessages.end()) { + if (!WriteResponse(std::move(result))) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + return; + } + } else { + Y_VERIFY(jt->second.Infly); + jt->second.ControlMessages.push_back(result); + } + } else { + Y_VERIFY(it->second.LockSent); + MigrationStreamingReadServerMessage result; - result.set_status(Ydb::StatusIds::SUCCESS); - + result.set_status(Ydb::StatusIds::SUCCESS); + result.mutable_partition_status()->mutable_topic()->set_path(ev->Get()->Partition.TopicConverter->GetModernName()); result.mutable_partition_status()->set_cluster(ev->Get()->Partition.TopicConverter->GetCluster()); - result.mutable_partition_status()->set_partition(ev->Get()->Partition.Partition); - result.mutable_partition_status()->set_assign_id(it->first); - - result.mutable_partition_status()->set_committed_offset(ev->Get()->Offset); - result.mutable_partition_status()->set_end_offset(ev->Get()->EndOffset); - result.mutable_partition_status()->set_write_watermark_ms(ev->Get()->WriteTimestampEstimateMs); - - auto pp = it->second.Partition; - pp.AssignId = 0; - auto jt = PartitionToControlMessages.find(pp); - if (jt == PartitionToControlMessages.end()) { - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - } else { - Y_VERIFY(jt->second.Infly); - jt->second.ControlMessages.push_back(result); - } - } -} - -void TReadSessionActor::Handle(TEvPersQueue::TEvError::TPtr& ev, const TActorContext& ctx) { - CloseSession(ev->Get()->Record.GetDescription(), ConvertOldCode(ev->Get()->Record.GetCode()), ctx); -} - - -void TReadSessionActor::SendReleaseSignalToClient(const THashMap<ui64, TPartitionActorInfo>::iterator& it, bool kill, const TActorContext& ctx) -{ + result.mutable_partition_status()->set_partition(ev->Get()->Partition.Partition); + result.mutable_partition_status()->set_assign_id(it->first); + + result.mutable_partition_status()->set_committed_offset(ev->Get()->Offset); + result.mutable_partition_status()->set_end_offset(ev->Get()->EndOffset); + result.mutable_partition_status()->set_write_watermark_ms(ev->Get()->WriteTimestampEstimateMs); + + auto pp = it->second.Partition; + pp.AssignId = 0; + auto jt = PartitionToControlMessages.find(pp); + if (jt == PartitionToControlMessages.end()) { + if (!WriteResponse(std::move(result))) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + return; + } + } else { + Y_VERIFY(jt->second.Infly); + jt->second.ControlMessages.push_back(result); + } + } +} + +void TReadSessionActor::Handle(TEvPersQueue::TEvError::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->Record.GetDescription(), ConvertOldCode(ev->Get()->Record.GetCode()), ctx); +} + + +void TReadSessionActor::SendReleaseSignalToClient(const THashMap<ui64, TPartitionActorInfo>::iterator& it, bool kill, const TActorContext& ctx) +{ MigrationStreamingReadServerMessage result; - result.set_status(Ydb::StatusIds::SUCCESS); - + result.set_status(Ydb::StatusIds::SUCCESS); + result.mutable_release()->mutable_topic()->set_path(it->second.Partition.TopicConverter->GetModernName()); result.mutable_release()->set_cluster(it->second.Partition.TopicConverter->GetCluster()); - result.mutable_release()->set_partition(it->second.Partition.Partition); - result.mutable_release()->set_assign_id(it->second.Partition.AssignId); - result.mutable_release()->set_forceful_release(kill); - result.mutable_release()->set_commit_offset(it->second.Offset); - - auto pp = it->second.Partition; - pp.AssignId = 0; - auto jt = PartitionToControlMessages.find(pp); - if (jt == PartitionToControlMessages.end()) { - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - } else { - Y_VERIFY(jt->second.Infly); - jt->second.ControlMessages.push_back(result); - } - Y_VERIFY(it->second.LockSent); - it->second.ReleaseSent = true; -} - - -void TReadSessionActor::Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const TActorContext& ctx) { - auto& record = ev->Get()->Record; - Y_VERIFY(record.GetSession() == Session); - Y_VERIFY(record.GetClientId() == ClientId); + result.mutable_release()->set_partition(it->second.Partition.Partition); + result.mutable_release()->set_assign_id(it->second.Partition.AssignId); + result.mutable_release()->set_forceful_release(kill); + result.mutable_release()->set_commit_offset(it->second.Offset); + + auto pp = it->second.Partition; + pp.AssignId = 0; + auto jt = PartitionToControlMessages.find(pp); + if (jt == PartitionToControlMessages.end()) { + if (!WriteResponse(std::move(result))) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + return; + } + } else { + Y_VERIFY(jt->second.Infly); + jt->second.ControlMessages.push_back(result); + } + Y_VERIFY(it->second.LockSent); + it->second.ReleaseSent = true; +} + + +void TReadSessionActor::Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const TActorContext& ctx) { + auto& record = ev->Get()->Record; + Y_VERIFY(record.GetSession() == Session); + Y_VERIFY(record.GetClientId() == ClientId); TString topicPath = NPersQueue::NormalizeFullPath(record.GetPath()); - - ui32 group = record.HasGroup() ? record.GetGroup() : 0; + + ui32 group = record.HasGroup() ? record.GetGroup() : 0; auto pathIter = FullPathToConverter.find(topicPath); Y_VERIFY(!pathIter.IsEnd()); auto it = Topics.find(pathIter->second->GetClientsideName()); Y_VERIFY(!it.IsEnd()); auto& converter = it->second.TopicNameConverter; - + TActorId pipe = ActorIdFromProto(record.GetPipeClient()); - - if (pipe != it->second.PipeClient) { //this is message from old version of pipe - return; - } - - for (ui32 c = 0; c < record.GetCount(); ++c) { - Y_VERIFY(!Partitions.empty()); - + + if (pipe != it->second.PipeClient) { //this is message from old version of pipe + return; + } + + for (ui32 c = 0; c < record.GetCount(); ++c) { + Y_VERIFY(!Partitions.empty()); + TActorId actorId = TActorId{}; - auto jt = Partitions.begin(); - ui32 i = 0; - for (auto it = Partitions.begin(); it != Partitions.end(); ++it) { + auto jt = Partitions.begin(); + ui32 i = 0; + for (auto it = Partitions.begin(); it != Partitions.end(); ++it) { if (it->second.Partition.TopicConverter->GetPrimaryPath() == converter->GetPrimaryPath() && !it->second.Releasing && (group == 0 || it->second.Partition.Partition + 1 == group) ) { - ++i; - if (rand() % i == 0) { //will lead to 1/n probability for each of n partitions - actorId = it->second.Actor; - jt = it; - } - } - } - Y_VERIFY(actorId); - - { + ++i; + if (rand() % i == 0) { //will lead to 1/n probability for each of n partitions + actorId = it->second.Actor; + jt = it; + } + } + } + Y_VERIFY(actorId); + + { auto it = TopicCounters.find(converter->GetClientsideName()); - Y_VERIFY(it != TopicCounters.end()); - it->second.PartitionsToBeReleased.Inc(); - } - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " releasing " << jt->second.Partition); - jt->second.Releasing = true; + Y_VERIFY(it != TopicCounters.end()); + it->second.PartitionsToBeReleased.Inc(); + } + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " releasing " << jt->second.Partition); + jt->second.Releasing = true; if (!jt->second.LockSent) { //no lock yet - can release silently ReleasePartition(jt, true, ctx); - } else { + } else { SendReleaseSignalToClient(jt, false, ctx); - } - } -} - - -void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx) { - if (!ActualPartitionActor(ev->Sender)) - return; - - const auto assignId = ev->Get()->Partition.AssignId; - - auto it = Partitions.find(assignId); - Y_VERIFY(it != Partitions.end()); - Y_VERIFY(it->second.Releasing); - - ReleasePartition(it, false, ctx); //no reads could be here - this is release from partition -} - -void TReadSessionActor::InformBalancerAboutRelease(const THashMap<ui64, TPartitionActorInfo>::iterator& it, const TActorContext& ctx) { - - THolder<TEvPersQueue::TEvPartitionReleased> request; - request.Reset(new TEvPersQueue::TEvPartitionReleased); - auto& req = request->Record; - + } + } +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx) { + if (!ActualPartitionActor(ev->Sender)) + return; + + const auto assignId = ev->Get()->Partition.AssignId; + + auto it = Partitions.find(assignId); + Y_VERIFY(it != Partitions.end()); + Y_VERIFY(it->second.Releasing); + + ReleasePartition(it, false, ctx); //no reads could be here - this is release from partition +} + +void TReadSessionActor::InformBalancerAboutRelease(const THashMap<ui64, TPartitionActorInfo>::iterator& it, const TActorContext& ctx) { + + THolder<TEvPersQueue::TEvPartitionReleased> request; + request.Reset(new TEvPersQueue::TEvPartitionReleased); + auto& req = request->Record; + const auto& converter = it->second.Partition.TopicConverter; auto jt = Topics.find(converter->GetClientsideName()); - Y_VERIFY(jt != Topics.end()); - - req.SetSession(Session); + Y_VERIFY(jt != Topics.end()); + + req.SetSession(Session); ActorIdToProto(jt->second.PipeClient, req.MutablePipeClient()); - req.SetClientId(ClientId); + req.SetClientId(ClientId); req.SetTopic(converter->GetPrimaryPath()); - req.SetPartition(it->second.Partition.Partition); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " released: " << it->second.Partition); - - NTabletPipe::SendData(ctx, jt->second.PipeClient, request.Release()); -} - - -void TReadSessionActor::CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { - - if (errorCode != PersQueue::ErrorCode::OK) { - if (InternalErrorCode(errorCode)) { - SLIErrors.Inc(); - } - if (Errors) { - ++(*Errors); - } else { - ++(*GetServiceCounters(Counters, "pqproxy|readSession")->GetCounter("Errors", true)); - } - + req.SetPartition(it->second.Partition.Partition); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " released: " << it->second.Partition); + + NTabletPipe::SendData(ctx, jt->second.PipeClient, request.Release()); +} + + +void TReadSessionActor::CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { + + if (errorCode != PersQueue::ErrorCode::OK) { + if (InternalErrorCode(errorCode)) { + SLIErrors.Inc(); + } + if (Errors) { + ++(*Errors); + } else { + ++(*GetServiceCounters(Counters, "pqproxy|readSession")->GetCounter("Errors", true)); + } + MigrationStreamingReadServerMessage result; - result.set_status(ConvertPersQueueInternalCodeToStatus(errorCode)); - - FillIssue(result.add_issues(), errorCode, errorReason); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed with error reason: " << errorReason); - - if (!WriteResponse(std::move(result), true)) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - } else { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed"); - if (!Request->GetStreamCtx()->Finish(std::move(grpc::Status::OK))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc double finish failed"); - Die(ctx); - return; - } - - } - - Die(ctx); -} - - -void TReadSessionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { - TEvTabletPipe::TEvClientConnected *msg = ev->Get(); - if (msg->Status != NKikimrProto::OK) { - if (msg->Dead) { - CloseSession(TStringBuilder() << "one of topics is deleted, tablet " << msg->TabletId, PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - //TODO: remove it - CloseSession(TStringBuilder() << "unable to connect to one of topics, tablet " << msg->TabletId, PersQueue::ErrorCode::ERROR, ctx); - return; - + result.set_status(ConvertPersQueueInternalCodeToStatus(errorCode)); + + FillIssue(result.add_issues(), errorCode, errorReason); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed with error reason: " << errorReason); + + if (!WriteResponse(std::move(result), true)) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + return; + } + } else { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed"); + if (!Request->GetStreamCtx()->Finish(std::move(grpc::Status::OK))) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc double finish failed"); + Die(ctx); + return; + } + + } + + Die(ctx); +} + + +void TReadSessionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + TEvTabletPipe::TEvClientConnected *msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { + if (msg->Dead) { + CloseSession(TStringBuilder() << "one of topics is deleted, tablet " << msg->TabletId, PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + //TODO: remove it + CloseSession(TStringBuilder() << "unable to connect to one of topics, tablet " << msg->TabletId, PersQueue::ErrorCode::ERROR, ctx); + return; + #if 0 const bool isAlive = ProcessBalancerDead(msg->TabletId, ctx); // returns false if actor died Y_UNUSED(isAlive); - return; + return; #endif - } -} - + } +} + bool TReadSessionActor::ActualPartitionActor(const TActorId& part) { return ActualPartitionActors.contains(part); -} - - -void TReadSessionActor::ReleasePartition(const THashMap<ui64, TPartitionActorInfo>::iterator& it, - bool couldBeReads, const TActorContext& ctx) -{ - { +} + + +void TReadSessionActor::ReleasePartition(const THashMap<ui64, TPartitionActorInfo>::iterator& it, + bool couldBeReads, const TActorContext& ctx) +{ + { auto jt = TopicCounters.find(it->second.Partition.TopicConverter->GetClientsideName()); - Y_VERIFY(jt != TopicCounters.end()); - jt->second.PartitionsReleased.Inc(); - jt->second.PartitionsInfly.Dec(); - if (!it->second.Released && it->second.Releasing) { - jt->second.PartitionsToBeReleased.Dec(); - } - } - + Y_VERIFY(jt != TopicCounters.end()); + jt->second.PartitionsReleased.Inc(); + jt->second.PartitionsInfly.Dec(); + if (!it->second.Released && it->second.Releasing) { + jt->second.PartitionsToBeReleased.Dec(); + } + } + Y_VERIFY(couldBeReads || !it->second.Reading); - //process reads + //process reads TFormedReadResponse::TPtr formedResponseToAnswer; if (it->second.Reading) { const auto readIt = PartitionToReadResponse.find(it->second.Actor); @@ -1318,551 +1318,551 @@ void TReadSessionActor::ReleasePartition(const THashMap<ui64, TPartitionActorInf if (--readIt->second->RequestsInfly == 0) { formedResponseToAnswer = readIt->second; } - } - - InformBalancerAboutRelease(it, ctx); - - it->second.Released = true; //to force drop - DropPartition(it, ctx); //partition will be dropped - + } + + InformBalancerAboutRelease(it, ctx); + + it->second.Released = true; //to force drop + DropPartition(it, ctx); //partition will be dropped + if (formedResponseToAnswer) { - ProcessAnswer(ctx, formedResponseToAnswer); // returns false if actor died + ProcessAnswer(ctx, formedResponseToAnswer); // returns false if actor died } -} - - +} + + bool TReadSessionActor::ProcessBalancerDead(const ui64 tablet, const TActorContext& ctx) { - for (auto& t : Topics) { - if (t.second.TabletID == tablet) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " balancer for topic " << t.first << " is dead, restarting all from this topic"); - - //Drop all partitions from this topic - for (auto it = Partitions.begin(); it != Partitions.end();) { + for (auto& t : Topics) { + if (t.second.TabletID == tablet) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " balancer for topic " << t.first << " is dead, restarting all from this topic"); + + //Drop all partitions from this topic + for (auto it = Partitions.begin(); it != Partitions.end();) { if (it->second.Partition.TopicConverter->GetClientsideName() == t.first) { //partition from this topic - // kill actor - auto jt = it; - ++it; + // kill actor + auto jt = it; + ++it; if (jt->second.LockSent) { - SendReleaseSignalToClient(jt, true, ctx); + SendReleaseSignalToClient(jt, true, ctx); } - ReleasePartition(jt, true, ctx); - } else { - ++it; - } - } - - //reconnect pipe - NTabletPipe::TClientConfig clientConfig; - clientConfig.CheckAliveness = false; - clientConfig.RetryPolicy = RetryPolicyForPipes; + ReleasePartition(jt, true, ctx); + } else { + ++it; + } + } + + //reconnect pipe + NTabletPipe::TClientConfig clientConfig; + clientConfig.CheckAliveness = false; + clientConfig.RetryPolicy = RetryPolicyForPipes; t.second.PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, t.second.TabletID, clientConfig)); - if (InitDone) { - ++(*PipeReconnects); - ++(*Errors); - - RegisterSession(t.second.PipeClient, t.first, t.second.Groups, ctx); - } - } - } + if (InitDone) { + ++(*PipeReconnects); + ++(*Errors); + + RegisterSession(t.second.PipeClient, t.first, t.second.Groups, ctx); + } + } + } return true; -} - - -void TReadSessionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { +} + + +void TReadSessionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { const bool isAlive = ProcessBalancerDead(ev->Get()->TabletId, ctx); // returns false if actor died Y_UNUSED(isAlive); -} - -void TReadSessionActor::Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr &ev , const TActorContext& ctx) { - if (ev->Get()->Authenticated && !ev->Get()->InternalToken.empty()) { - Token = new NACLib::TUserToken(ev->Get()->InternalToken); - ForceACLCheck = true; - } else { - Request->ReplyUnauthenticated("refreshed token is invalid"); - Die(ctx); - } -} - -void TReadSessionActor::ProcessAuth(const TString& auth, const TActorContext& ctx) { - if (!auth.empty() && auth != Auth) { - Auth = auth; - Request->RefreshToken(auth, ctx, ctx.SelfID); - } -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx) { - RequestNotChecked = true; - - THolder<TEvPQProxy::TEvRead> event(ev->Release()); - - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got read request with guid: " << event->Guid); - - Reads.emplace_back(event.Release()); - - ProcessReads(ctx); -} - - +} + +void TReadSessionActor::Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr &ev , const TActorContext& ctx) { + if (ev->Get()->Authenticated && !ev->Get()->InternalToken.empty()) { + Token = new NACLib::TUserToken(ev->Get()->InternalToken); + ForceACLCheck = true; + } else { + Request->ReplyUnauthenticated("refreshed token is invalid"); + Die(ctx); + } +} + +void TReadSessionActor::ProcessAuth(const TString& auth, const TActorContext& ctx) { + if (!auth.empty() && auth != Auth) { + Auth = auth; + Request->RefreshToken(auth, ctx, ctx.SelfID); + } +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx) { + RequestNotChecked = true; + + THolder<TEvPQProxy::TEvRead> event(ev->Release()); + + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return; + } + + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got read request with guid: " << event->Guid); + + Reads.emplace_back(event.Release()); + + ProcessReads(ctx); +} + + i64 TReadSessionActor::TFormedReadResponse::ApplyResponse(MigrationStreamingReadServerMessage&& resp) { - Y_VERIFY(resp.data_batch().partition_data_size() == 1); - Response.set_status(Ydb::StatusIds::SUCCESS); - - Response.mutable_data_batch()->add_partition_data()->Swap(resp.mutable_data_batch()->mutable_partition_data(0)); - i64 prev = Response.ByteSize(); - std::swap<i64>(prev, ByteSize); - return ByteSize - prev; -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvReadResponse::TPtr& ev, const TActorContext& ctx) { + Y_VERIFY(resp.data_batch().partition_data_size() == 1); + Response.set_status(Ydb::StatusIds::SUCCESS); + + Response.mutable_data_batch()->add_partition_data()->Swap(resp.mutable_data_batch()->mutable_partition_data(0)); + i64 prev = Response.ByteSize(); + std::swap<i64>(prev, ByteSize); + return ByteSize - prev; +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvReadResponse::TPtr& ev, const TActorContext& ctx) { TActorId sender = ev->Sender; if (!ActualPartitionActor(sender)) - return; - - THolder<TEvPQProxy::TEvReadResponse> event(ev->Release()); - - Y_VERIFY(event->Response.data_batch().partition_data_size() == 1); - const ui64 partitionCookie = event->Response.data_batch().partition_data(0).cookie().partition_cookie(); - Y_VERIFY(partitionCookie != 0); // cookie is assigned - const ui64 assignId = event->Response.data_batch().partition_data(0).cookie().assign_id(); - const auto partitionIt = Partitions.find(assignId); + return; + + THolder<TEvPQProxy::TEvReadResponse> event(ev->Release()); + + Y_VERIFY(event->Response.data_batch().partition_data_size() == 1); + const ui64 partitionCookie = event->Response.data_batch().partition_data(0).cookie().partition_cookie(); + Y_VERIFY(partitionCookie != 0); // cookie is assigned + const ui64 assignId = event->Response.data_batch().partition_data(0).cookie().assign_id(); + const auto partitionIt = Partitions.find(assignId); Y_VERIFY(partitionIt != Partitions.end()); Y_VERIFY(partitionIt->second.Reading); partitionIt->second.Reading = false; - - partitionIt->second.ReadIdToResponse = partitionCookie + 1; - + + partitionIt->second.ReadIdToResponse = partitionCookie + 1; + auto it = PartitionToReadResponse.find(sender); Y_VERIFY(it != PartitionToReadResponse.end()); TFormedReadResponse::TPtr formedResponse = it->second; LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " read done guid " << formedResponse->Guid - << partitionIt->second.Partition - << " size " << event->Response.ByteSize()); - + << partitionIt->second.Partition + << " size " << event->Response.ByteSize()); + const i64 diff = formedResponse->ApplyResponse(std::move(event->Response)); - if (event->FromDisk) { - formedResponse->FromDisk = true; - } + if (event->FromDisk) { + formedResponse->FromDisk = true; + } formedResponse->WaitQuotaTime = Max(formedResponse->WaitQuotaTime, event->WaitQuotaTime); --formedResponse->RequestsInfly; - - BytesInflight_ += diff; - (*BytesInflight) += diff; - + + BytesInflight_ += diff; + (*BytesInflight) += diff; + if (formedResponse->RequestsInfly == 0) { - ProcessAnswer(ctx, formedResponse); - } -} - -bool TReadSessionActor::WriteResponse(PersQueue::V1::MigrationStreamingReadServerMessage&& response, bool finish) { - ui64 sz = response.ByteSize(); - ActiveWrites.push(sz); - BytesInflight_ += sz; - if (BytesInflight) (*BytesInflight) += sz; - - return finish ? Request->GetStreamCtx()->WriteAndFinish(std::move(response), grpc::Status::OK) : Request->GetStreamCtx()->Write(std::move(response)); -} - -void TReadSessionActor::ProcessAnswer(const TActorContext& ctx, TFormedReadResponse::TPtr formedResponse) { + ProcessAnswer(ctx, formedResponse); + } +} + +bool TReadSessionActor::WriteResponse(PersQueue::V1::MigrationStreamingReadServerMessage&& response, bool finish) { + ui64 sz = response.ByteSize(); + ActiveWrites.push(sz); + BytesInflight_ += sz; + if (BytesInflight) (*BytesInflight) += sz; + + return finish ? Request->GetStreamCtx()->WriteAndFinish(std::move(response), grpc::Status::OK) : Request->GetStreamCtx()->Write(std::move(response)); +} + +void TReadSessionActor::ProcessAnswer(const TActorContext& ctx, TFormedReadResponse::TPtr formedResponse) { ui32 readDurationMs = (ctx.Now() - formedResponse->Start - formedResponse->WaitQuotaTime).MilliSeconds(); - if (formedResponse->FromDisk) { - ReadLatencyFromDisk.IncFor(readDurationMs, 1); - } else { - ReadLatency.IncFor(readDurationMs, 1); - } - if (readDurationMs >= (formedResponse->FromDisk ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs() : AppData(ctx)->PQConfig.GetReadLatencyBigMs())) { - SLIBigReadLatency.Inc(); - } - + if (formedResponse->FromDisk) { + ReadLatencyFromDisk.IncFor(readDurationMs, 1); + } else { + ReadLatency.IncFor(readDurationMs, 1); + } + if (readDurationMs >= (formedResponse->FromDisk ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs() : AppData(ctx)->PQConfig.GetReadLatencyBigMs())) { + SLIBigReadLatency.Inc(); + } + Y_VERIFY(formedResponse->RequestsInfly == 0); const ui64 diff = formedResponse->Response.ByteSize(); - const bool hasMessages = RemoveEmptyMessages(*formedResponse->Response.mutable_data_batch()); + const bool hasMessages = RemoveEmptyMessages(*formedResponse->Response.mutable_data_batch()); if (hasMessages) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " response to read " << formedResponse->Guid); - - if (!WriteResponse(std::move(formedResponse->Response))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " response to read " << formedResponse->Guid); + + if (!WriteResponse(std::move(formedResponse->Response))) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + return; + } } else { LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " empty read result " << formedResponse->Guid << ", start new reading"); } - + BytesInflight_ -= diff; (*BytesInflight) -= diff; - - for (auto& pp : formedResponse->PartitionsTookPartInControlMessages) { - auto it = PartitionToControlMessages.find(pp); - Y_VERIFY(it != PartitionToControlMessages.end()); - if (--it->second.Infly == 0) { - for (auto& r : it->second.ControlMessages) { - if (!WriteResponse(std::move(r))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - } - PartitionToControlMessages.erase(it); - } - } - + + for (auto& pp : formedResponse->PartitionsTookPartInControlMessages) { + auto it = PartitionToControlMessages.find(pp); + Y_VERIFY(it != PartitionToControlMessages.end()); + if (--it->second.Infly == 0) { + for (auto& r : it->second.ControlMessages) { + if (!WriteResponse(std::move(r))) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); + Die(ctx); + return; + } + } + PartitionToControlMessages.erase(it); + } + } + for (const TActorId& p : formedResponse->PartitionsTookPartInRead) { PartitionToReadResponse.erase(p); } - RequestedBytes -= formedResponse->RequestedBytes; - ReadsInfly--; - + RequestedBytes -= formedResponse->RequestedBytes; + ReadsInfly--; + // Bring back available partitions. // If some partition was removed from partitions container, it is not bad because it will be checked during read processing. AvailablePartitions.insert(formedResponse->PartitionsBecameAvailable.begin(), formedResponse->PartitionsBecameAvailable.end()); - if (!hasMessages) { + if (!hasMessages) { // process new read MigrationStreamingReadClientMessage req; - req.mutable_read(); - Reads.emplace_back(new TEvPQProxy::TEvRead(formedResponse->Guid)); // Start new reading request with the same guid + req.mutable_read(); + Reads.emplace_back(new TEvPQProxy::TEvRead(formedResponse->Guid)); // Start new reading request with the same guid } - ProcessReads(ctx); // returns false if actor died -} - -void TReadSessionActor::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { - CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); -} - + ProcessReads(ctx); // returns false if actor died +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); +} + ui32 TReadSessionActor::NormalizeMaxReadMessagesCount(ui32 sourceValue) { ui32 count = Min<ui32>(sourceValue, Max<i32>()); if (count == 0) { - count = Max<i32>(); + count = Max<i32>(); } return count; } - + ui32 TReadSessionActor::NormalizeMaxReadSize(ui32 sourceValue) { - ui32 size = Min<ui32>(sourceValue, MAX_READ_SIZE); + ui32 size = Min<ui32>(sourceValue, MAX_READ_SIZE); if (size == 0) { size = MAX_READ_SIZE; } return size; } -void TReadSessionActor::ProcessReads(const TActorContext& ctx) { - while (!Reads.empty() && BytesInflight_ + RequestedBytes < MAX_INFLY_BYTES && ReadsInfly < MAX_INFLY_READS) { +void TReadSessionActor::ProcessReads(const TActorContext& ctx) { + while (!Reads.empty() && BytesInflight_ + RequestedBytes < MAX_INFLY_BYTES && ReadsInfly < MAX_INFLY_READS) { ui32 count = MaxReadMessagesCount; ui64 size = MaxReadSize; - ui32 partitionsAsked = 0; - - TFormedReadResponse::TPtr formedResponse = new TFormedReadResponse(Reads.front()->Guid, ctx.Now()); - while (!AvailablePartitions.empty()) { - auto part = *AvailablePartitions.begin(); - AvailablePartitions.erase(AvailablePartitions.begin()); - - auto it = Partitions.find(part.AssignId); - if (it == Partitions.end() || it->second.Releasing) { //this is already released partition - continue; - } - //add this partition to reading - ++partitionsAsked; - + ui32 partitionsAsked = 0; + + TFormedReadResponse::TPtr formedResponse = new TFormedReadResponse(Reads.front()->Guid, ctx.Now()); + while (!AvailablePartitions.empty()) { + auto part = *AvailablePartitions.begin(); + AvailablePartitions.erase(AvailablePartitions.begin()); + + auto it = Partitions.find(part.AssignId); + if (it == Partitions.end() || it->second.Releasing) { //this is already released partition + continue; + } + //add this partition to reading + ++partitionsAsked; + const ui32 ccount = Min<ui32>(part.MsgLag * LAG_GROW_MULTIPLIER, count); - count -= ccount; - const ui64 csize = (ui64)Min<double>(part.SizeLag * LAG_GROW_MULTIPLIER, size); - size -= csize; - Y_VERIFY(csize < Max<i32>()); - + count -= ccount; + const ui64 csize = (ui64)Min<double>(part.SizeLag * LAG_GROW_MULTIPLIER, size); + size -= csize; + Y_VERIFY(csize < Max<i32>()); + auto jt = ReadFromTimestamp.find(it->second.Partition.TopicConverter->GetModernName()); - Y_VERIFY(jt != ReadFromTimestamp.end()); - ui64 readTimestampMs = Max(ReadTimestampMs, jt->second); - - TAutoPtr<TEvPQProxy::TEvRead> read = new TEvPQProxy::TEvRead(Reads.front()->Guid, ccount, csize, MaxTimeLagMs, readTimestampMs); - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX - << " performing read request with guid " << read->Guid - << " from " << it->second.Partition << " count " << ccount << " size " << csize + Y_VERIFY(jt != ReadFromTimestamp.end()); + ui64 readTimestampMs = Max(ReadTimestampMs, jt->second); + + TAutoPtr<TEvPQProxy::TEvRead> read = new TEvPQProxy::TEvRead(Reads.front()->Guid, ccount, csize, MaxTimeLagMs, readTimestampMs); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX + << " performing read request with guid " << read->Guid + << " from " << it->second.Partition << " count " << ccount << " size " << csize << " partitionsAsked " << partitionsAsked << " maxTimeLag " << MaxTimeLagMs << "ms"); - - + + Y_VERIFY(!it->second.Reading); it->second.Reading = true; formedResponse->PartitionsTookPartInRead.insert(it->second.Actor); - auto pp = it->second.Partition; - pp.AssignId = 0; - PartitionToControlMessages[pp].Infly++; - bool res = formedResponse->PartitionsTookPartInControlMessages.insert(pp).second; - Y_VERIFY(res); - - RequestedBytes += csize; - formedResponse->RequestedBytes += csize; - - ctx.Send(it->second.Actor, read.Release()); + auto pp = it->second.Partition; + pp.AssignId = 0; + PartitionToControlMessages[pp].Infly++; + bool res = formedResponse->PartitionsTookPartInControlMessages.insert(pp).second; + Y_VERIFY(res); + + RequestedBytes += csize; + formedResponse->RequestedBytes += csize; + + ctx.Send(it->second.Actor, read.Release()); const auto insertResult = PartitionToReadResponse.insert(std::make_pair(it->second.Actor, formedResponse)); Y_VERIFY(insertResult.second); - + if (count == 0 || size == 0) - break; - } - if (partitionsAsked == 0) - break; - ReadsTotal.Inc(); + break; + } + if (partitionsAsked == 0) + break; + ReadsTotal.Inc(); formedResponse->RequestsInfly = partitionsAsked; - - ReadsInfly++; - + + ReadsInfly++; + i64 diff = formedResponse->Response.ByteSize(); - BytesInflight_ += diff; + BytesInflight_ += diff; formedResponse->ByteSize = diff; - (*BytesInflight) += diff; - Reads.pop_front(); - } -} - - -void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const TActorContext& ctx) { - - if (!ActualPartitionActor(ev->Sender)) - return; - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << ev->Get()->Partition - << " ready for read with readOffset " - << ev->Get()->ReadOffset << " endOffset " << ev->Get()->EndOffset << " WTime " - << ev->Get()->WTime << " sizeLag " << ev->Get()->SizeLag); - + (*BytesInflight) += diff; + Reads.pop_front(); + } +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const TActorContext& ctx) { + + if (!ActualPartitionActor(ev->Sender)) + return; + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << ev->Get()->Partition + << " ready for read with readOffset " + << ev->Get()->ReadOffset << " endOffset " << ev->Get()->EndOffset << " WTime " + << ev->Get()->WTime << " sizeLag " << ev->Get()->SizeLag); + const auto it = PartitionToReadResponse.find(ev->Sender); // check whether this partition is taking part in read response auto& container = it != PartitionToReadResponse.end() ? it->second->PartitionsBecameAvailable : AvailablePartitions; - auto res = container.insert(TPartitionInfo{ev->Get()->Partition.AssignId, ev->Get()->WTime, ev->Get()->SizeLag, - ev->Get()->EndOffset - ev->Get()->ReadOffset}); - Y_VERIFY(res.second); - ProcessReads(ctx); -} - - -void TReadSessionActor::HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx) { + auto res = container.insert(TPartitionInfo{ev->Get()->Partition.AssignId, ev->Get()->WTime, ev->Get()->SizeLag, + ev->Get()->EndOffset - ev->Get()->ReadOffset}); + Y_VERIFY(res.second); + ProcessReads(ctx); +} + + +void TReadSessionActor::HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx) { CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); -} - - -void TReadSessionActor::HandleWakeup(const TActorContext& ctx) { - ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); - if (Token && !AuthInitActor && (ForceACLCheck || (ctx.Now() - LastACLCheckTimestamp > TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()) && RequestNotChecked))) { - ForceACLCheck = false; - RequestNotChecked = false; - Y_VERIFY(!AuthInitActor); - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " checking auth because of timeout"); - +} + + +void TReadSessionActor::HandleWakeup(const TActorContext& ctx) { + ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); + if (Token && !AuthInitActor && (ForceACLCheck || (ctx.Now() - LastACLCheckTimestamp > TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()) && RequestNotChecked))) { + ForceACLCheck = false; + RequestNotChecked = false; + Y_VERIFY(!AuthInitActor); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " checking auth because of timeout"); + AuthInitActor = ctx.Register(new TReadInitAndAuthActor( ctx, ctx.SelfID, ClientId, Cookie, Session, SchemeCache, NewSchemeCache, Counters, Token, TopicsHandler.GetReadTopicsList(TopicsToResolve, ReadOnlyLocal, Request->GetDatabaseName().GetOrElse(TString())) )); - } -} - - -////////////////// PARTITION ACTOR - + } +} + + +////////////////// PARTITION ACTOR + TPartitionActor::TPartitionActor(const TActorId& parentId, const TString& clientId, const TString& clientPath, const ui64 cookie, const TString& session, - const TPartitionId& partition, const ui32 generation, const ui32 step, const ui64 tabletID, - const TReadSessionActor::TTopicCounters& counters, bool commitsDisabled, const TString& clientDC) - : ParentId(parentId) - , ClientId(clientId) - , ClientPath(clientPath) - , Cookie(cookie) - , Session(session) - , ClientDC(clientDC) - , Partition(partition) - , Generation(generation) - , Step(step) - , TabletID(tabletID) - , ReadOffset(0) - , ClientReadOffset(0) - , ClientCommitOffset(0) - , ClientVerifyReadOffset(false) - , CommittedOffset(0) - , WriteTimestampEstimateMs(0) - , ReadIdToResponse(1) - , ReadIdCommitted(0) - , WTime(0) - , InitDone(false) - , StartReading(false) - , AllPrepareInited(false) - , FirstInit(true) - , PipeClient() - , PipeGeneration(0) - , RequestInfly(false) - , EndOffset(0) - , SizeLag(0) - , NeedRelease(false) - , Released(false) - , WaitDataCookie(0) - , WaitForData(false) - , LockCounted(false) - , Counters(counters) - , CommitsDisabled(commitsDisabled) - , CommitCookie(1) -{ -} - - -void TPartitionActor::MakeCommit(const TActorContext& ctx) { - ui64 offset = ClientReadOffset; - if (CommitsDisabled) - return; - if (CommitsInfly.size() > MAX_COMMITS_INFLY) - return; - - //Ranges mode - if (!NextRanges.Empty() && NextRanges.Min() == ClientCommitOffset) { - auto first = NextRanges.begin(); - offset = first->second; - NextRanges.EraseInterval(first->first, first->second); - - ClientCommitOffset = offset; - ++CommitCookie; - CommitsInfly.push_back(std::pair<ui64, TCommitInfo>(CommitCookie, {CommitCookie, offset, ctx.Now()})); - Counters.SLITotal.Inc(); - - if (PipeClient) //if not then pipe will be recreated soon and SendCommit will be done - SendCommit(CommitCookie, offset, ctx); - return; - } - - //Now commits by cookies. - ui64 readId = ReadIdCommitted; - auto it = NextCommits.begin(); - if (it != NextCommits.end() && *it == 0) { //commit of readed in prev session data - NextCommits.erase(NextCommits.begin()); - if (ClientReadOffset <= ClientCommitOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(Partition.AssignId, 0, 0, CommittedOffset)); - } else { - ClientCommitOffset = ClientReadOffset; - CommitsInfly.push_back(std::pair<ui64, TCommitInfo>(0, {0, ClientReadOffset, ctx.Now()})); - Counters.SLITotal.Inc(); - if (PipeClient) //if not then pipe will be recreated soon and SendCommit will be done - SendCommit(0, ClientReadOffset, ctx); - } - MakeCommit(ctx); - return; - } - for (;it != NextCommits.end() && (*it) == readId + 1; ++it) { - ++readId; - } - if (readId == ReadIdCommitted) - return; - NextCommits.erase(NextCommits.begin(), it); - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " commit request from " << ReadIdCommitted + 1 << " to " << readId << " in " << Partition); - - ui64 startReadId = ReadIdCommitted + 1; - - ReadIdCommitted = readId; - - auto jt = Offsets.begin(); - while(jt != Offsets.end() && jt->ReadId != readId) ++jt; - Y_VERIFY(jt != Offsets.end()); - - offset = Max(offset, jt->Offset); - - Offsets.erase(Offsets.begin(), ++jt); - - Y_VERIFY(offset > ClientCommitOffset); - - ClientCommitOffset = offset; - CommitsInfly.push_back(std::pair<ui64, TCommitInfo>(readId, {startReadId, offset, ctx.Now()})); - Counters.SLITotal.Inc(); - - if (PipeClient) //if not then pipe will be recreated soon and SendCommit will be done - SendCommit(readId, offset, ctx); -} - -TPartitionActor::~TPartitionActor() = default; - - -void TPartitionActor::Bootstrap(const TActorContext&) { - Become(&TThis::StateFunc); -} - - -void TPartitionActor::CheckRelease(const TActorContext& ctx) { - const bool hasUncommittedData = ReadOffset > ClientCommitOffset && ReadOffset > ClientReadOffset; //TODO: remove ReadOffset > ClientReadOffset - otherwise wait for commit with cookie(0) - if (NeedRelease) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " checking release readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid - << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released); - } - - if (NeedRelease && (ReadGuid.empty() && CommitsInfly.empty() && !hasUncommittedData && !Released)) { - Released = true; - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReleased(Partition)); - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " check release done - releasing; readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid - << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released); - - } -} - - -void TPartitionActor::SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx) { - NKikimrClient::TPersQueueRequest request; + const TPartitionId& partition, const ui32 generation, const ui32 step, const ui64 tabletID, + const TReadSessionActor::TTopicCounters& counters, bool commitsDisabled, const TString& clientDC) + : ParentId(parentId) + , ClientId(clientId) + , ClientPath(clientPath) + , Cookie(cookie) + , Session(session) + , ClientDC(clientDC) + , Partition(partition) + , Generation(generation) + , Step(step) + , TabletID(tabletID) + , ReadOffset(0) + , ClientReadOffset(0) + , ClientCommitOffset(0) + , ClientVerifyReadOffset(false) + , CommittedOffset(0) + , WriteTimestampEstimateMs(0) + , ReadIdToResponse(1) + , ReadIdCommitted(0) + , WTime(0) + , InitDone(false) + , StartReading(false) + , AllPrepareInited(false) + , FirstInit(true) + , PipeClient() + , PipeGeneration(0) + , RequestInfly(false) + , EndOffset(0) + , SizeLag(0) + , NeedRelease(false) + , Released(false) + , WaitDataCookie(0) + , WaitForData(false) + , LockCounted(false) + , Counters(counters) + , CommitsDisabled(commitsDisabled) + , CommitCookie(1) +{ +} + + +void TPartitionActor::MakeCommit(const TActorContext& ctx) { + ui64 offset = ClientReadOffset; + if (CommitsDisabled) + return; + if (CommitsInfly.size() > MAX_COMMITS_INFLY) + return; + + //Ranges mode + if (!NextRanges.Empty() && NextRanges.Min() == ClientCommitOffset) { + auto first = NextRanges.begin(); + offset = first->second; + NextRanges.EraseInterval(first->first, first->second); + + ClientCommitOffset = offset; + ++CommitCookie; + CommitsInfly.push_back(std::pair<ui64, TCommitInfo>(CommitCookie, {CommitCookie, offset, ctx.Now()})); + Counters.SLITotal.Inc(); + + if (PipeClient) //if not then pipe will be recreated soon and SendCommit will be done + SendCommit(CommitCookie, offset, ctx); + return; + } + + //Now commits by cookies. + ui64 readId = ReadIdCommitted; + auto it = NextCommits.begin(); + if (it != NextCommits.end() && *it == 0) { //commit of readed in prev session data + NextCommits.erase(NextCommits.begin()); + if (ClientReadOffset <= ClientCommitOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(Partition.AssignId, 0, 0, CommittedOffset)); + } else { + ClientCommitOffset = ClientReadOffset; + CommitsInfly.push_back(std::pair<ui64, TCommitInfo>(0, {0, ClientReadOffset, ctx.Now()})); + Counters.SLITotal.Inc(); + if (PipeClient) //if not then pipe will be recreated soon and SendCommit will be done + SendCommit(0, ClientReadOffset, ctx); + } + MakeCommit(ctx); + return; + } + for (;it != NextCommits.end() && (*it) == readId + 1; ++it) { + ++readId; + } + if (readId == ReadIdCommitted) + return; + NextCommits.erase(NextCommits.begin(), it); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " commit request from " << ReadIdCommitted + 1 << " to " << readId << " in " << Partition); + + ui64 startReadId = ReadIdCommitted + 1; + + ReadIdCommitted = readId; + + auto jt = Offsets.begin(); + while(jt != Offsets.end() && jt->ReadId != readId) ++jt; + Y_VERIFY(jt != Offsets.end()); + + offset = Max(offset, jt->Offset); + + Offsets.erase(Offsets.begin(), ++jt); + + Y_VERIFY(offset > ClientCommitOffset); + + ClientCommitOffset = offset; + CommitsInfly.push_back(std::pair<ui64, TCommitInfo>(readId, {startReadId, offset, ctx.Now()})); + Counters.SLITotal.Inc(); + + if (PipeClient) //if not then pipe will be recreated soon and SendCommit will be done + SendCommit(readId, offset, ctx); +} + +TPartitionActor::~TPartitionActor() = default; + + +void TPartitionActor::Bootstrap(const TActorContext&) { + Become(&TThis::StateFunc); +} + + +void TPartitionActor::CheckRelease(const TActorContext& ctx) { + const bool hasUncommittedData = ReadOffset > ClientCommitOffset && ReadOffset > ClientReadOffset; //TODO: remove ReadOffset > ClientReadOffset - otherwise wait for commit with cookie(0) + if (NeedRelease) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " checking release readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid + << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released); + } + + if (NeedRelease && (ReadGuid.empty() && CommitsInfly.empty() && !hasUncommittedData && !Released)) { + Released = true; + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReleased(Partition)); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " check release done - releasing; readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid + << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released); + + } +} + + +void TPartitionActor::SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx) { + NKikimrClient::TPersQueueRequest request; request.MutablePartitionRequest()->SetTopic(Partition.TopicConverter->GetPrimaryPath()); - request.MutablePartitionRequest()->SetPartition(Partition.Partition); - request.MutablePartitionRequest()->SetCookie(readId); - - Y_VERIFY(PipeClient); - + request.MutablePartitionRequest()->SetPartition(Partition.Partition); + request.MutablePartitionRequest()->SetCookie(readId); + + Y_VERIFY(PipeClient); + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); - auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); - commit->SetClientId(ClientId); - commit->SetOffset(offset); - Y_VERIFY(!Session.empty()); - commit->SetSessionId(Session); - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " committing to position " << offset << " prev " << CommittedOffset - << " end " << EndOffset << " by cookie " << readId); - - TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); - req->Record.Swap(&request); - - NTabletPipe::SendData(ctx, PipeClient, req.Release()); -} - -void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) { - - if (!PipeClient) - return; - - Counters.Errors.Inc(); - - NTabletPipe::CloseClient(ctx, PipeClient); + auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); + commit->SetClientId(ClientId); + commit->SetOffset(offset); + Y_VERIFY(!Session.empty()); + commit->SetSessionId(Session); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " committing to position " << offset << " prev " << CommittedOffset + << " end " << EndOffset << " by cookie " << readId); + + TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); +} + +void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) { + + if (!PipeClient) + return; + + Counters.Errors.Inc(); + + NTabletPipe::CloseClient(ctx, PipeClient); PipeClient = TActorId{}; - if (errorCode != NPersQueue::NErrorCode::OVERLOAD) - ++PipeGeneration; - - if (PipeGeneration == MAX_PIPE_RESTARTS) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("too much attempts to restart pipe", PersQueue::ErrorCode::ERROR)); - return; - } - - ctx.Schedule(TDuration::MilliSeconds(RESTART_PIPE_DELAY_MS), new TEvPQProxy::TEvRestartPipe()); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " schedule pipe restart attempt " << PipeGeneration << " reason: " << reason); -} - - -void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TActorContext& ctx) { - - Y_VERIFY(!PipeClient); - - NTabletPipe::TClientConfig clientConfig; + if (errorCode != NPersQueue::NErrorCode::OVERLOAD) + ++PipeGeneration; + + if (PipeGeneration == MAX_PIPE_RESTARTS) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("too much attempts to restart pipe", PersQueue::ErrorCode::ERROR)); + return; + } + + ctx.Schedule(TDuration::MilliSeconds(RESTART_PIPE_DELAY_MS), new TEvPQProxy::TEvRestartPipe()); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " schedule pipe restart attempt " << PipeGeneration << " reason: " << reason); +} + + +void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TActorContext& ctx) { + + Y_VERIFY(!PipeClient); + + NTabletPipe::TClientConfig clientConfig; clientConfig.RetryPolicy = { .RetryLimitCount = 6, .MinRetryTime = TDuration::MilliSeconds(10), @@ -1871,61 +1871,61 @@ void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TAct .DoFirstRetryInstantly = true }; PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, TabletID, clientConfig)); - Y_VERIFY(TabletID); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " pipe restart attempt " << PipeGeneration << " RequestInfly " << RequestInfly << " ReadOffset " << ReadOffset << " EndOffset " << EndOffset - << " InitDone " << InitDone << " WaitForData " << WaitForData); - - if (RequestInfly) { //got read infly - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " resend " << CurrentRequest); - - TAutoPtr<TEvPersQueue::TEvRequest> event(new TEvPersQueue::TEvRequest); - event->Record = CurrentRequest; - + Y_VERIFY(TabletID); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " pipe restart attempt " << PipeGeneration << " RequestInfly " << RequestInfly << " ReadOffset " << ReadOffset << " EndOffset " << EndOffset + << " InitDone " << InitDone << " WaitForData " << WaitForData); + + if (RequestInfly) { //got read infly + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " resend " << CurrentRequest); + + TAutoPtr<TEvPersQueue::TEvRequest> event(new TEvPersQueue::TEvRequest); + event->Record = CurrentRequest; + ActorIdToProto(PipeClient, event->Record.MutablePartitionRequest()->MutablePipeClient()); - - NTabletPipe::SendData(ctx, PipeClient, event.Release()); - } - if (InitDone) { - for (auto& c : CommitsInfly) { //resend all commits - if (c.second.Offset != Max<ui64>()) - SendCommit(c.first, c.second.Offset, ctx); - } - if (WaitForData) { //resend wait-for-data requests - WaitDataInfly.clear(); - WaitDataInPartition(ctx); - } - } -} - + + NTabletPipe::SendData(ctx, PipeClient, event.Release()); + } + if (InitDone) { + for (auto& c : CommitsInfly) { //resend all commits + if (c.second.Offset != Max<ui64>()) + SendCommit(c.first, c.second.Offset, ctx); + } + if (WaitForData) { //resend wait-for-data requests + WaitDataInfly.clear(); + WaitDataInPartition(ctx); + } + } +} + bool FillBatchedData(MigrationStreamingReadServerMessage::DataBatch * data, const NKikimrClient::TCmdReadResult& res, const TPartitionId& Partition, ui64 ReadIdToResponse, ui64& ReadOffset, ui64& WTime, ui64 EndOffset, const TActorContext& ctx) { - - auto* partitionData = data->add_partition_data(); + + auto* partitionData = data->add_partition_data(); partitionData->mutable_topic()->set_path(Partition.TopicConverter->GetModernName()); partitionData->set_cluster(Partition.TopicConverter->GetCluster()); - partitionData->set_partition(Partition.Partition); + partitionData->set_partition(Partition.Partition); partitionData->set_deprecated_topic(Partition.TopicConverter->GetFullLegacyName()); - partitionData->mutable_cookie()->set_assign_id(Partition.AssignId); - partitionData->mutable_cookie()->set_partition_cookie(ReadIdToResponse); - - bool hasOffset = false; - bool hasData = false; - + partitionData->mutable_cookie()->set_assign_id(Partition.AssignId); + partitionData->mutable_cookie()->set_partition_cookie(ReadIdToResponse); + + bool hasOffset = false; + bool hasData = false; + MigrationStreamingReadServerMessage::DataBatch::Batch* currentBatch = nullptr; - for (ui32 i = 0; i < res.ResultSize(); ++i) { - const auto& r = res.GetResult(i); - WTime = r.GetWriteTimestampMS(); - Y_VERIFY(r.GetOffset() >= ReadOffset); - ReadOffset = r.GetOffset() + 1; - hasOffset = true; - - auto proto(GetDeserializedData(r.GetData())); - if (proto.GetChunkType() != NKikimrPQClient::TDataChunk::REGULAR) { - continue; //TODO - no such chunks must be on prod - } - + for (ui32 i = 0; i < res.ResultSize(); ++i) { + const auto& r = res.GetResult(i); + WTime = r.GetWriteTimestampMS(); + Y_VERIFY(r.GetOffset() >= ReadOffset); + ReadOffset = r.GetOffset() + 1; + hasOffset = true; + + auto proto(GetDeserializedData(r.GetData())); + if (proto.GetChunkType() != NKikimrPQClient::TDataChunk::REGULAR) { + continue; //TODO - no such chunks must be on prod + } + TString sourceId; if (!r.GetSourceId().empty()) { if (!NPQ::NSourceIdEncoding::IsValidEncoded(r.GetSourceId())) { @@ -1933,238 +1933,238 @@ bool FillBatchedData(MigrationStreamingReadServerMessage::DataBatch * data, cons << " offset " << r.GetOffset() << " seqNo " << r.GetSeqNo() << " sourceId '" << r.GetSourceId() << "'"); } sourceId = NPQ::NSourceIdEncoding::Decode(r.GetSourceId()); - } - - if (!currentBatch || currentBatch->write_timestamp_ms() != r.GetWriteTimestampMS() || currentBatch->source_id() != sourceId) { - // If write time and source id are the same, the rest fields will be the same too. - currentBatch = partitionData->add_batches(); - currentBatch->set_write_timestamp_ms(r.GetWriteTimestampMS()); - currentBatch->set_source_id(sourceId); - - if (proto.HasMeta()) { - const auto& header = proto.GetMeta(); - if (header.HasServer()) { - auto* item = currentBatch->add_extra_fields(); - item->set_key("server"); - item->set_value(header.GetServer()); - } - if (header.HasFile()) { - auto* item = currentBatch->add_extra_fields(); - item->set_key("file"); - item->set_value(header.GetFile()); - } - if (header.HasIdent()) { - auto* item = currentBatch->add_extra_fields(); - item->set_key("ident"); - item->set_value(header.GetIdent()); - } - if (header.HasLogType()) { - auto* item = currentBatch->add_extra_fields(); - item->set_key("logtype"); - item->set_value(header.GetLogType()); - } - } - - if (proto.HasExtraFields()) { - const auto& map = proto.GetExtraFields(); - for (const auto& kv : map.GetItems()) { - auto* item = currentBatch->add_extra_fields(); - item->set_key(kv.GetKey()); - item->set_value(kv.GetValue()); - } - } - - if (proto.HasIp() && IsUtf(proto.GetIp())) { - currentBatch->set_ip(proto.GetIp()); - } - } - - auto* message = currentBatch->add_message_data(); - message->set_seq_no(r.GetSeqNo()); - message->set_create_timestamp_ms(r.GetCreateTimestampMS()); - message->set_offset(r.GetOffset()); - + } + + if (!currentBatch || currentBatch->write_timestamp_ms() != r.GetWriteTimestampMS() || currentBatch->source_id() != sourceId) { + // If write time and source id are the same, the rest fields will be the same too. + currentBatch = partitionData->add_batches(); + currentBatch->set_write_timestamp_ms(r.GetWriteTimestampMS()); + currentBatch->set_source_id(sourceId); + + if (proto.HasMeta()) { + const auto& header = proto.GetMeta(); + if (header.HasServer()) { + auto* item = currentBatch->add_extra_fields(); + item->set_key("server"); + item->set_value(header.GetServer()); + } + if (header.HasFile()) { + auto* item = currentBatch->add_extra_fields(); + item->set_key("file"); + item->set_value(header.GetFile()); + } + if (header.HasIdent()) { + auto* item = currentBatch->add_extra_fields(); + item->set_key("ident"); + item->set_value(header.GetIdent()); + } + if (header.HasLogType()) { + auto* item = currentBatch->add_extra_fields(); + item->set_key("logtype"); + item->set_value(header.GetLogType()); + } + } + + if (proto.HasExtraFields()) { + const auto& map = proto.GetExtraFields(); + for (const auto& kv : map.GetItems()) { + auto* item = currentBatch->add_extra_fields(); + item->set_key(kv.GetKey()); + item->set_value(kv.GetValue()); + } + } + + if (proto.HasIp() && IsUtf(proto.GetIp())) { + currentBatch->set_ip(proto.GetIp()); + } + } + + auto* message = currentBatch->add_message_data(); + message->set_seq_no(r.GetSeqNo()); + message->set_create_timestamp_ms(r.GetCreateTimestampMS()); + message->set_offset(r.GetOffset()); + message->set_explicit_hash(r.GetExplicitHash()); message->set_partition_key(r.GetPartitionKey()); - if (proto.HasCodec()) { + if (proto.HasCodec()) { message->set_codec(NPQ::ToV1Codec((NPersQueueCommon::ECodec)proto.GetCodec())); - } - message->set_uncompressed_size(r.GetUncompressedSize()); - message->set_data(proto.GetData()); - hasData = true; - } - - const ui64 realReadOffset = res.HasRealReadOffset() ? res.GetRealReadOffset() : 0; - - if (!hasOffset) { //no data could be read from partition at offset ReadOffset - no data in partition at all??? - ReadOffset = Min(Max(ReadOffset + 1, realReadOffset + 1), EndOffset); - } - return hasData; -} - - -void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { - - if (ev->Get()->Record.HasErrorCode() && ev->Get()->Record.GetErrorCode() != NPersQueue::NErrorCode::OK) { + } + message->set_uncompressed_size(r.GetUncompressedSize()); + message->set_data(proto.GetData()); + hasData = true; + } + + const ui64 realReadOffset = res.HasRealReadOffset() ? res.GetRealReadOffset() : 0; + + if (!hasOffset) { //no data could be read from partition at offset ReadOffset - no data in partition at all??? + ReadOffset = Min(Max(ReadOffset + 1, realReadOffset + 1), EndOffset); + } + return hasData; +} + + +void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { + + if (ev->Get()->Record.HasErrorCode() && ev->Get()->Record.GetErrorCode() != NPersQueue::NErrorCode::OK) { const auto errorCode = ev->Get()->Record.GetErrorCode(); if (errorCode == NPersQueue::NErrorCode::WRONG_COOKIE || errorCode == NPersQueue::NErrorCode::BAD_REQUEST) { - Counters.Errors.Inc(); - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + ev->Get()->Record.GetErrorReason(), ConvertOldCode(ev->Get()->Record.GetErrorCode()))); - } else { - RestartPipe(ctx, TStringBuilder() << "status is not ok. Code: " << EErrorCode_Name(errorCode) << ". Reason: " << ev->Get()->Record.GetErrorReason(), errorCode); - } - return; - } - - if (ev->Get()->Record.GetStatus() != NMsgBusProxy::MSTATUS_OK) { //this is incorrect answer, die - Y_VERIFY(!ev->Get()->Record.HasErrorCode()); - Counters.Errors.Inc(); - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + ev->Get()->Record.GetErrorReason(), PersQueue::ErrorCode::ERROR)); - return; - } - if (!ev->Get()->Record.HasPartitionResponse()) { //this is incorrect answer, die - Counters.Errors.Inc(); - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("empty partition in response", PersQueue::ErrorCode::ERROR)); - return; - } - - const auto& result = ev->Get()->Record.GetPartitionResponse(); - - if (!result.HasCookie()) { //this is incorrect answer, die - Counters.Errors.Inc(); - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("no cookie in response", PersQueue::ErrorCode::ERROR)); - return; - } - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " initDone " << InitDone << " event " << result); - - - if (!InitDone) { - if (result.GetCookie() != INIT_COOKIE) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " unwaited response in init with cookie " << result.GetCookie()); - return; - } - Y_VERIFY(RequestInfly); - CurrentRequest.Clear(); - RequestInfly = false; - - Y_VERIFY(result.HasCmdGetClientOffsetResult()); - const auto& resp = result.GetCmdGetClientOffsetResult(); - Y_VERIFY(resp.HasEndOffset()); - EndOffset = resp.GetEndOffset(); - SizeLag = resp.GetSizeLag(); - WriteTimestampEstimateMs = resp.GetWriteTimestampEstimateMS(); - + Counters.Errors.Inc(); + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + ev->Get()->Record.GetErrorReason(), ConvertOldCode(ev->Get()->Record.GetErrorCode()))); + } else { + RestartPipe(ctx, TStringBuilder() << "status is not ok. Code: " << EErrorCode_Name(errorCode) << ". Reason: " << ev->Get()->Record.GetErrorReason(), errorCode); + } + return; + } + + if (ev->Get()->Record.GetStatus() != NMsgBusProxy::MSTATUS_OK) { //this is incorrect answer, die + Y_VERIFY(!ev->Get()->Record.HasErrorCode()); + Counters.Errors.Inc(); + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + ev->Get()->Record.GetErrorReason(), PersQueue::ErrorCode::ERROR)); + return; + } + if (!ev->Get()->Record.HasPartitionResponse()) { //this is incorrect answer, die + Counters.Errors.Inc(); + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("empty partition in response", PersQueue::ErrorCode::ERROR)); + return; + } + + const auto& result = ev->Get()->Record.GetPartitionResponse(); + + if (!result.HasCookie()) { //this is incorrect answer, die + Counters.Errors.Inc(); + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("no cookie in response", PersQueue::ErrorCode::ERROR)); + return; + } + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " initDone " << InitDone << " event " << result); + + + if (!InitDone) { + if (result.GetCookie() != INIT_COOKIE) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " unwaited response in init with cookie " << result.GetCookie()); + return; + } + Y_VERIFY(RequestInfly); + CurrentRequest.Clear(); + RequestInfly = false; + + Y_VERIFY(result.HasCmdGetClientOffsetResult()); + const auto& resp = result.GetCmdGetClientOffsetResult(); + Y_VERIFY(resp.HasEndOffset()); + EndOffset = resp.GetEndOffset(); + SizeLag = resp.GetSizeLag(); + WriteTimestampEstimateMs = resp.GetWriteTimestampEstimateMS(); + ClientCommitOffset = ReadOffset = CommittedOffset = resp.HasOffset() ? resp.GetOffset() : 0; - Y_VERIFY(EndOffset >= CommittedOffset); - - if (resp.HasWriteTimestampMS()) - WTime = resp.GetWriteTimestampMS(); - - InitDone = true; - PipeGeneration = 0; //reset tries counter - all ok - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " INIT DONE " << Partition - << " EndOffset " << EndOffset << " readOffset " << ReadOffset << " committedOffset " << CommittedOffset); - - - if (!StartReading) { - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs)); - } else { - InitStartReading(ctx); - } - return; - } - - if (!result.HasCmdReadResult()) { //this is commit response - if (CommitsInfly.empty()) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for nothing"); - return; - } - ui64 readId = CommitsInfly.front().first; - - if (result.GetCookie() != readId) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for " << readId); - return; - } - - Counters.Commits.Inc(); - - ui32 commitDurationMs = (ctx.Now() - CommitsInfly.front().second.StartTime).MilliSeconds(); - Counters.CommitLatency.IncFor(commitDurationMs, 1); - if (commitDurationMs >= AppData(ctx)->PQConfig.GetCommitLatencyBigMs()) { - Counters.SLIBigLatency.Inc(); - } - - CommittedOffset = CommitsInfly.front().second.Offset; - ui64 startReadId = CommitsInfly.front().second.StartReadId; - ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(Partition.AssignId, startReadId, readId, CommittedOffset)); - - CommitsInfly.pop_front(); - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId); - - CheckRelease(ctx); - PipeGeneration = 0; //reset tries counter - all ok - MakeCommit(ctx); - return; - } - - //This is read - Y_VERIFY(result.HasCmdReadResult()); - const auto& res = result.GetCmdReadResult(); - - if (result.GetCookie() != (ui64)ReadOffset) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + Y_VERIFY(EndOffset >= CommittedOffset); + + if (resp.HasWriteTimestampMS()) + WTime = resp.GetWriteTimestampMS(); + + InitDone = true; + PipeGeneration = 0; //reset tries counter - all ok + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " INIT DONE " << Partition + << " EndOffset " << EndOffset << " readOffset " << ReadOffset << " committedOffset " << CommittedOffset); + + + if (!StartReading) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs)); + } else { + InitStartReading(ctx); + } + return; + } + + if (!result.HasCmdReadResult()) { //this is commit response + if (CommitsInfly.empty()) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for nothing"); + return; + } + ui64 readId = CommitsInfly.front().first; + + if (result.GetCookie() != readId) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for " << readId); + return; + } + + Counters.Commits.Inc(); + + ui32 commitDurationMs = (ctx.Now() - CommitsInfly.front().second.StartTime).MilliSeconds(); + Counters.CommitLatency.IncFor(commitDurationMs, 1); + if (commitDurationMs >= AppData(ctx)->PQConfig.GetCommitLatencyBigMs()) { + Counters.SLIBigLatency.Inc(); + } + + CommittedOffset = CommitsInfly.front().second.Offset; + ui64 startReadId = CommitsInfly.front().second.StartReadId; + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(Partition.AssignId, startReadId, readId, CommittedOffset)); + + CommitsInfly.pop_front(); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId); + + CheckRelease(ctx); + PipeGeneration = 0; //reset tries counter - all ok + MakeCommit(ctx); + return; + } + + //This is read + Y_VERIFY(result.HasCmdReadResult()); + const auto& res = result.GetCmdReadResult(); + + if (result.GetCookie() != (ui64)ReadOffset) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition << " unwaited read-response with cookie " << result.GetCookie() << "; waiting for " << ReadOffset << "; current read guid is " << ReadGuid); - return; - } - - Y_VERIFY(res.HasMaxOffset()); - EndOffset = res.GetMaxOffset(); - SizeLag = res.GetSizeLag(); - + return; + } + + Y_VERIFY(res.HasMaxOffset()); + EndOffset = res.GetMaxOffset(); + SizeLag = res.GetSizeLag(); + MigrationStreamingReadServerMessage response; - response.set_status(Ydb::StatusIds::SUCCESS); - - auto* data = response.mutable_data_batch(); - bool hasData = FillBatchedData(data, res, Partition, ReadIdToResponse, ReadOffset, WTime, EndOffset, ctx); - WriteTimestampEstimateMs = Max(WriteTimestampEstimateMs, WTime); - - if (!CommitsDisabled) { - Offsets.push_back({ReadIdToResponse, ReadOffset}); - } - - if (Offsets.size() >= AppData(ctx)->PQConfig.GetMaxReadCookies() + 10) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "got more than " << AppData(ctx)->PQConfig.GetMaxReadCookies() << " uncommitted reads", PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - - CurrentRequest.Clear(); - RequestInfly = false; - - Y_VERIFY(!WaitForData); - - if (EndOffset > ReadOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Partition, WTime, SizeLag, ReadOffset, EndOffset)); - } else { - WaitForData = true; - if (PipeClient) //pipe will be recreated soon - WaitDataInPartition(ctx); - } - - if (hasData) { - ++ReadIdToResponse; - } - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " after read state " << Partition - << " EndOffset " << EndOffset << " ReadOffset " << ReadOffset << " ReadGuid " << ReadGuid << " has messages " << hasData); - + response.set_status(Ydb::StatusIds::SUCCESS); + + auto* data = response.mutable_data_batch(); + bool hasData = FillBatchedData(data, res, Partition, ReadIdToResponse, ReadOffset, WTime, EndOffset, ctx); + WriteTimestampEstimateMs = Max(WriteTimestampEstimateMs, WTime); + + if (!CommitsDisabled) { + Offsets.push_back({ReadIdToResponse, ReadOffset}); + } + + if (Offsets.size() >= AppData(ctx)->PQConfig.GetMaxReadCookies() + 10) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "got more than " << AppData(ctx)->PQConfig.GetMaxReadCookies() << " uncommitted reads", PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + + CurrentRequest.Clear(); + RequestInfly = false; + + Y_VERIFY(!WaitForData); + + if (EndOffset > ReadOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Partition, WTime, SizeLag, ReadOffset, EndOffset)); + } else { + WaitForData = true; + if (PipeClient) //pipe will be recreated soon + WaitDataInPartition(ctx); + } + + if (hasData) { + ++ReadIdToResponse; + } + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " after read state " << Partition + << " EndOffset " << EndOffset << " ReadOffset " << ReadOffset << " ReadGuid " << ReadGuid << " has messages " << hasData); + ReadGuid = TString(); auto readResponse = MakeHolder<TEvPQProxy::TEvReadResponse>( std::move(response), @@ -2173,140 +2173,140 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo TDuration::MilliSeconds(res.GetWaitQuotaTimeMs()) ); ctx.Send(ParentId, readResponse.Release()); - CheckRelease(ctx); - - PipeGeneration = 0; //reset tries counter - all ok -} - - -void TPartitionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { - TEvTabletPipe::TEvClientConnected *msg = ev->Get(); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " pipe restart attempt " << PipeGeneration << " pipe creation result: " << msg->Status); - - if (msg->Status != NKikimrProto::OK) { - RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << msg->TabletId, NPersQueue::NErrorCode::ERROR); - return; - } -} - -void TPartitionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { - RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << ev->Get()->TabletId, NPersQueue::NErrorCode::ERROR); -} - - -void TPartitionActor::Handle(TEvPQProxy::TEvReleasePartition::TPtr&, const TActorContext& ctx) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " (partition)releasing " << Partition << " ReadOffset " << ReadOffset << " ClientCommitOffset " << ClientCommitOffset - << " CommittedOffst " << CommittedOffset); - NeedRelease = true; - CheckRelease(ctx); -} - - -void TPartitionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr&, const TActorContext& ctx) { - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, false)); -} - - -void TPartitionActor::Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { - ClientReadOffset = ev->Get()->ReadOffset; - ClientCommitOffset = ev->Get()->CommitOffset; - ClientVerifyReadOffset = ev->Get()->VerifyReadOffset; - - if (StartReading) { - Y_VERIFY(ev->Get()->StartReading); //otherwise it is signal from actor, this could not be done - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("double partition locking", PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - - StartReading = ev->Get()->StartReading; - InitLockPartition(ctx); -} - -void TPartitionActor::InitStartReading(const TActorContext& ctx) { - - Y_VERIFY(AllPrepareInited); - Y_VERIFY(!WaitForData); - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Start reading " << Partition - << " EndOffset " << EndOffset << " readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " clientCommitOffset " << ClientCommitOffset << " clientReadOffset " << ClientReadOffset); - - Counters.PartitionsToBeLocked.Dec(); - LockCounted = false; - - ReadOffset = Max<ui64>(CommittedOffset, ClientReadOffset); - - if (ClientVerifyReadOffset) { - if (ClientReadOffset < ClientCommitOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() - << "trying to read from position that is less than position provided to commit: read " << ClientReadOffset << " committed " << ClientCommitOffset, - PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - - if (ClientCommitOffset < CommittedOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() - << "trying to commit to position that is less than committed: read " << ClientCommitOffset << " committed " << CommittedOffset, - PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - if (ClientReadOffset < CommittedOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() - << "trying to read from position that is less than committed: read " << ClientReadOffset << " committed " << CommittedOffset, - PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - } - - if (ClientCommitOffset > CommittedOffset) { - if (ClientCommitOffset > ReadOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() - << "trying to read from position that is less than provided to commit: read " << ReadOffset << " commit " << ClientCommitOffset, - PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - if (ClientCommitOffset > EndOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() - << "trying to commit to future: commit " << ClientCommitOffset << " endOffset " << EndOffset, - PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - Y_VERIFY(CommitsInfly.empty()); - CommitsInfly.push_back(std::pair<ui64, TCommitInfo>(Max<ui64>(), {Max<ui64>(), ClientCommitOffset, ctx.Now()})); - Counters.SLITotal.Inc(); - if (PipeClient) //pipe will be recreated soon - SendCommit(CommitsInfly.back().first, CommitsInfly.back().second.Offset, ctx); - } else { - ClientCommitOffset = CommittedOffset; - } - - if (EndOffset > ReadOffset) { - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Partition, WTime, SizeLag, ReadOffset, EndOffset)); - } else { - WaitForData = true; - if (PipeClient) //pipe will be recreated soon - WaitDataInPartition(ctx); - } -} - -//TODO: add here reaction on client release request - -void TPartitionActor::InitLockPartition(const TActorContext& ctx) { - if (PipeClient && AllPrepareInited) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("double partition locking", PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - if (!LockCounted) { - Counters.PartitionsToBeLocked.Inc(); - LockCounted = true; - } - if (StartReading) - AllPrepareInited = true; - - if (FirstInit) { - Y_VERIFY(!PipeClient); - FirstInit = false; - NTabletPipe::TClientConfig clientConfig; + CheckRelease(ctx); + + PipeGeneration = 0; //reset tries counter - all ok +} + + +void TPartitionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + TEvTabletPipe::TEvClientConnected *msg = ev->Get(); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " pipe restart attempt " << PipeGeneration << " pipe creation result: " << msg->Status); + + if (msg->Status != NKikimrProto::OK) { + RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << msg->TabletId, NPersQueue::NErrorCode::ERROR); + return; + } +} + +void TPartitionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << ev->Get()->TabletId, NPersQueue::NErrorCode::ERROR); +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvReleasePartition::TPtr&, const TActorContext& ctx) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " (partition)releasing " << Partition << " ReadOffset " << ReadOffset << " ClientCommitOffset " << ClientCommitOffset + << " CommittedOffst " << CommittedOffset); + NeedRelease = true; + CheckRelease(ctx); +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr&, const TActorContext& ctx) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, false)); +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { + ClientReadOffset = ev->Get()->ReadOffset; + ClientCommitOffset = ev->Get()->CommitOffset; + ClientVerifyReadOffset = ev->Get()->VerifyReadOffset; + + if (StartReading) { + Y_VERIFY(ev->Get()->StartReading); //otherwise it is signal from actor, this could not be done + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("double partition locking", PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + + StartReading = ev->Get()->StartReading; + InitLockPartition(ctx); +} + +void TPartitionActor::InitStartReading(const TActorContext& ctx) { + + Y_VERIFY(AllPrepareInited); + Y_VERIFY(!WaitForData); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Start reading " << Partition + << " EndOffset " << EndOffset << " readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " clientCommitOffset " << ClientCommitOffset << " clientReadOffset " << ClientReadOffset); + + Counters.PartitionsToBeLocked.Dec(); + LockCounted = false; + + ReadOffset = Max<ui64>(CommittedOffset, ClientReadOffset); + + if (ClientVerifyReadOffset) { + if (ClientReadOffset < ClientCommitOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() + << "trying to read from position that is less than position provided to commit: read " << ClientReadOffset << " committed " << ClientCommitOffset, + PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + + if (ClientCommitOffset < CommittedOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() + << "trying to commit to position that is less than committed: read " << ClientCommitOffset << " committed " << CommittedOffset, + PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + if (ClientReadOffset < CommittedOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() + << "trying to read from position that is less than committed: read " << ClientReadOffset << " committed " << CommittedOffset, + PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + } + + if (ClientCommitOffset > CommittedOffset) { + if (ClientCommitOffset > ReadOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() + << "trying to read from position that is less than provided to commit: read " << ReadOffset << " commit " << ClientCommitOffset, + PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + if (ClientCommitOffset > EndOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() + << "trying to commit to future: commit " << ClientCommitOffset << " endOffset " << EndOffset, + PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + Y_VERIFY(CommitsInfly.empty()); + CommitsInfly.push_back(std::pair<ui64, TCommitInfo>(Max<ui64>(), {Max<ui64>(), ClientCommitOffset, ctx.Now()})); + Counters.SLITotal.Inc(); + if (PipeClient) //pipe will be recreated soon + SendCommit(CommitsInfly.back().first, CommitsInfly.back().second.Offset, ctx); + } else { + ClientCommitOffset = CommittedOffset; + } + + if (EndOffset > ReadOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Partition, WTime, SizeLag, ReadOffset, EndOffset)); + } else { + WaitForData = true; + if (PipeClient) //pipe will be recreated soon + WaitDataInPartition(ctx); + } +} + +//TODO: add here reaction on client release request + +void TPartitionActor::InitLockPartition(const TActorContext& ctx) { + if (PipeClient && AllPrepareInited) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("double partition locking", PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + if (!LockCounted) { + Counters.PartitionsToBeLocked.Inc(); + LockCounted = true; + } + if (StartReading) + AllPrepareInited = true; + + if (FirstInit) { + Y_VERIFY(!PipeClient); + FirstInit = false; + NTabletPipe::TClientConfig clientConfig; clientConfig.RetryPolicy = { .RetryLimitCount = 6, .MinRetryTime = TDuration::MilliSeconds(10), @@ -2315,283 +2315,283 @@ void TPartitionActor::InitLockPartition(const TActorContext& ctx) { .DoFirstRetryInstantly = true }; PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, TabletID, clientConfig)); - - NKikimrClient::TPersQueueRequest request; - + + NKikimrClient::TPersQueueRequest request; + request.MutablePartitionRequest()->SetTopic(Partition.TopicConverter->GetPrimaryPath()); - request.MutablePartitionRequest()->SetPartition(Partition.Partition); - request.MutablePartitionRequest()->SetCookie(INIT_COOKIE); - + request.MutablePartitionRequest()->SetPartition(Partition.Partition); + request.MutablePartitionRequest()->SetCookie(INIT_COOKIE); + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); - - auto cmd = request.MutablePartitionRequest()->MutableCmdCreateSession(); - cmd->SetClientId(ClientId); - cmd->SetSessionId(Session); - cmd->SetGeneration(Generation); - cmd->SetStep(Step); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " INITING " << Partition); - - TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); - Y_VERIFY(!RequestInfly); - CurrentRequest = request; - RequestInfly = true; - req->Record.Swap(&request); - - NTabletPipe::SendData(ctx, PipeClient, req.Release()); - } else { - Y_VERIFY(StartReading); //otherwise it is double locking from actor, not client - client makes lock always with StartReading == true - Y_VERIFY(InitDone); - InitStartReading(ctx); - } -} - - -void TPartitionActor::WaitDataInPartition(const TActorContext& ctx) { - - if (WaitDataInfly.size() > 1) //already got 2 requests inflight - return; - Y_VERIFY(InitDone); - - Y_VERIFY(PipeClient); - - if (!WaitForData) - return; - - Y_VERIFY(ReadOffset >= EndOffset); - - TAutoPtr<TEvPersQueue::TEvHasDataInfo> event(new TEvPersQueue::TEvHasDataInfo()); - event->Record.SetPartition(Partition.Partition); - event->Record.SetOffset(ReadOffset); - event->Record.SetCookie(++WaitDataCookie); - ui64 deadline = (ctx.Now() + WAIT_DATA - WAIT_DELTA).MilliSeconds(); - event->Record.SetDeadline(deadline); - event->Record.SetClientId(ClientId); - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition << " wait data in partition inited, cookie " << WaitDataCookie); - - NTabletPipe::SendData(ctx, PipeClient, event.Release()); - - ctx.Schedule(PREWAIT_DATA, new TEvents::TEvWakeup()); - - ctx.Schedule(WAIT_DATA, new TEvPQProxy::TEvDeadlineExceeded(WaitDataCookie)); - - WaitDataInfly.insert(WaitDataCookie); -} - -void TPartitionActor::Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, const TActorContext& ctx) { - const auto& record = ev->Get()->Record; - - WriteTimestampEstimateMs = record.GetWriteTimestampEstimateMS(); - - auto it = WaitDataInfly.find(ev->Get()->Record.GetCookie()); - if (it == WaitDataInfly.end()) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " unwaited response for WaitData " << ev->Get()->Record); - return; - } - WaitDataInfly.erase(it); - if (!WaitForData) - return; - - Counters.WaitsForData.Inc(); - - Y_VERIFY(record.HasEndOffset()); - Y_VERIFY(EndOffset <= record.GetEndOffset()); //end offset could not be changed if no data arrived, but signal will be sended anyway after timeout - Y_VERIFY(ReadOffset >= EndOffset); //otherwise no WaitData were needed - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " wait for data done: " << " readOffset " << ReadOffset << " EndOffset " << EndOffset << " newEndOffset " - << record.GetEndOffset() << " commitOffset " << CommittedOffset << " clientCommitOffset " << ClientCommitOffset << " cookie " << ev->Get()->Record.GetCookie()); - - EndOffset = record.GetEndOffset(); - SizeLag = record.GetSizeLag(); - - if (ReadOffset < EndOffset) { - WaitForData = false; - WaitDataInfly.clear(); - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Partition, WTime, SizeLag, ReadOffset, EndOffset)); - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " ready for read with readOffset " << ReadOffset << " endOffset " << EndOffset); - } else { - if (PipeClient) - WaitDataInPartition(ctx); - } - CheckRelease(ctx); //just for logging purpose -} - - -void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " READ FROM " << Partition - << "maxCount " << ev->Get()->MaxCount << " maxSize " << ev->Get()->MaxSize << " maxTimeLagMs " << ev->Get()->MaxTimeLagMs << " readTimestampMs " << ev->Get()->ReadTimestampMs - << " readOffset " << ReadOffset << " EndOffset " << EndOffset << " ClientCommitOffset " << ClientCommitOffset << " committedOffset " << CommittedOffset << " Guid " << ev->Get()->Guid); - - Y_VERIFY(!NeedRelease); - Y_VERIFY(!Released); - + + auto cmd = request.MutablePartitionRequest()->MutableCmdCreateSession(); + cmd->SetClientId(ClientId); + cmd->SetSessionId(Session); + cmd->SetGeneration(Generation); + cmd->SetStep(Step); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " INITING " << Partition); + + TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); + Y_VERIFY(!RequestInfly); + CurrentRequest = request; + RequestInfly = true; + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); + } else { + Y_VERIFY(StartReading); //otherwise it is double locking from actor, not client - client makes lock always with StartReading == true + Y_VERIFY(InitDone); + InitStartReading(ctx); + } +} + + +void TPartitionActor::WaitDataInPartition(const TActorContext& ctx) { + + if (WaitDataInfly.size() > 1) //already got 2 requests inflight + return; + Y_VERIFY(InitDone); + + Y_VERIFY(PipeClient); + + if (!WaitForData) + return; + + Y_VERIFY(ReadOffset >= EndOffset); + + TAutoPtr<TEvPersQueue::TEvHasDataInfo> event(new TEvPersQueue::TEvHasDataInfo()); + event->Record.SetPartition(Partition.Partition); + event->Record.SetOffset(ReadOffset); + event->Record.SetCookie(++WaitDataCookie); + ui64 deadline = (ctx.Now() + WAIT_DATA - WAIT_DELTA).MilliSeconds(); + event->Record.SetDeadline(deadline); + event->Record.SetClientId(ClientId); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition << " wait data in partition inited, cookie " << WaitDataCookie); + + NTabletPipe::SendData(ctx, PipeClient, event.Release()); + + ctx.Schedule(PREWAIT_DATA, new TEvents::TEvWakeup()); + + ctx.Schedule(WAIT_DATA, new TEvPQProxy::TEvDeadlineExceeded(WaitDataCookie)); + + WaitDataInfly.insert(WaitDataCookie); +} + +void TPartitionActor::Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, const TActorContext& ctx) { + const auto& record = ev->Get()->Record; + + WriteTimestampEstimateMs = record.GetWriteTimestampEstimateMS(); + + auto it = WaitDataInfly.find(ev->Get()->Record.GetCookie()); + if (it == WaitDataInfly.end()) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " unwaited response for WaitData " << ev->Get()->Record); + return; + } + WaitDataInfly.erase(it); + if (!WaitForData) + return; + + Counters.WaitsForData.Inc(); + + Y_VERIFY(record.HasEndOffset()); + Y_VERIFY(EndOffset <= record.GetEndOffset()); //end offset could not be changed if no data arrived, but signal will be sended anyway after timeout + Y_VERIFY(ReadOffset >= EndOffset); //otherwise no WaitData were needed + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " wait for data done: " << " readOffset " << ReadOffset << " EndOffset " << EndOffset << " newEndOffset " + << record.GetEndOffset() << " commitOffset " << CommittedOffset << " clientCommitOffset " << ClientCommitOffset << " cookie " << ev->Get()->Record.GetCookie()); + + EndOffset = record.GetEndOffset(); + SizeLag = record.GetSizeLag(); + + if (ReadOffset < EndOffset) { + WaitForData = false; + WaitDataInfly.clear(); + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Partition, WTime, SizeLag, ReadOffset, EndOffset)); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " ready for read with readOffset " << ReadOffset << " endOffset " << EndOffset); + } else { + if (PipeClient) + WaitDataInPartition(ctx); + } + CheckRelease(ctx); //just for logging purpose +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " READ FROM " << Partition + << "maxCount " << ev->Get()->MaxCount << " maxSize " << ev->Get()->MaxSize << " maxTimeLagMs " << ev->Get()->MaxTimeLagMs << " readTimestampMs " << ev->Get()->ReadTimestampMs + << " readOffset " << ReadOffset << " EndOffset " << EndOffset << " ClientCommitOffset " << ClientCommitOffset << " committedOffset " << CommittedOffset << " Guid " << ev->Get()->Guid); + + Y_VERIFY(!NeedRelease); + Y_VERIFY(!Released); + Y_VERIFY(ReadGuid.empty()); - Y_VERIFY(!RequestInfly); - + Y_VERIFY(!RequestInfly); + ReadGuid = ev->Get()->Guid; - - const auto req = ev->Get(); - - NKikimrClient::TPersQueueRequest request; - + + const auto req = ev->Get(); + + NKikimrClient::TPersQueueRequest request; + request.MutablePartitionRequest()->SetTopic(Partition.TopicConverter->GetPrimaryPath()); - - request.MutablePartitionRequest()->SetPartition(Partition.Partition); - request.MutablePartitionRequest()->SetCookie((ui64)ReadOffset); - + + request.MutablePartitionRequest()->SetPartition(Partition.Partition); + request.MutablePartitionRequest()->SetCookie((ui64)ReadOffset); + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); - auto read = request.MutablePartitionRequest()->MutableCmdRead(); - read->SetClientId(ClientId); - read->SetClientDC(ClientDC); - if (req->MaxCount) { - read->SetCount(req->MaxCount); - } - if (req->MaxSize) { - read->SetBytes(req->MaxSize); - } - if (req->MaxTimeLagMs) { - read->SetMaxTimeLagMs(req->MaxTimeLagMs); - } - if (req->ReadTimestampMs) { - read->SetReadTimestampMs(req->ReadTimestampMs); - } - - read->SetOffset(ReadOffset); - read->SetTimeoutMs(READ_TIMEOUT_DURATION.MilliSeconds()); - RequestInfly = true; - CurrentRequest = request; - - if (!PipeClient) //Pipe will be recreated soon - return; - - TAutoPtr<TEvPersQueue::TEvRequest> event(new TEvPersQueue::TEvRequest); - event->Record.Swap(&request); - - NTabletPipe::SendData(ctx, PipeClient, event.Release()); -} - - -void TPartitionActor::Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const TActorContext& ctx) { - //TODO: add here processing of cookie == 0 if ReadOffset > ClientCommittedOffset if any - Y_VERIFY(ev->Get()->AssignId == Partition.AssignId); - for (auto& readId : ev->Get()->CommitInfo.Cookies) { - if (readId == 0) { - if (ReadIdCommitted > 0) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "commit of 0 allowed only as first commit in " << Partition, PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - NextCommits.insert(0); - continue; - } - if (readId <= ReadIdCommitted) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "commit of " << readId << " that is already committed in " << Partition, PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - if (readId >= ReadIdToResponse) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "commit of unknown cookie " << readId << " in " << Partition, PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - bool res = NextCommits.insert(readId).second; - if (!res) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "double commit of cookie " << readId << " in " << Partition, PersQueue::ErrorCode::BAD_REQUEST)); - return; - } - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " commit request from client for " << readId << " in " << Partition); - } - - MakeCommit(ctx); - - if (NextCommits.size() >= AppData(ctx)->PQConfig.GetMaxReadCookies()) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "got more than " << AppData(ctx)->PQConfig.GetMaxReadCookies() - << " unordered cookies to commit in " << Partition << ", last cookie is " << ReadIdCommitted, - PersQueue::ErrorCode::BAD_REQUEST)); - return; - } -} - -void TPartitionActor::Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const TActorContext& ctx) { - Y_VERIFY(ev->Get()->AssignId == Partition.AssignId); - - for (auto& c : ev->Get()->CommitInfo.Ranges) { - NextRanges.InsertInterval(c.first, c.second); - } - - MakeCommit(ctx); - - if (NextRanges.GetNumIntervals() >= AppData(ctx)->PQConfig.GetMaxReadCookies()) { - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "got more than " << AppData(ctx)->PQConfig.GetMaxReadCookies() - << " unordered offset ranges to commit in " << Partition - << ", last to be committed offset is " << ClientCommitOffset - << ", committed offset is " << CommittedOffset, - PersQueue::ErrorCode::BAD_REQUEST)); - return; - } -} - - - -void TPartitionActor::Die(const TActorContext& ctx) { - if (PipeClient) - NTabletPipe::CloseClient(ctx, PipeClient); - TActorBootstrapped<TPartitionActor>::Die(ctx); -} - -void TPartitionActor::HandlePoison(TEvents::TEvPoisonPill::TPtr&, const TActorContext& ctx) { - if (LockCounted) - Counters.PartitionsToBeLocked.Dec(); - Die(ctx); -} - -void TPartitionActor::Handle(TEvPQProxy::TEvDeadlineExceeded::TPtr& ev, const TActorContext& ctx) { - - WaitDataInfly.erase(ev->Get()->Cookie); - if (ReadOffset >= EndOffset && WaitDataInfly.size() <= 1 && PipeClient) { - Y_VERIFY(WaitForData); - WaitDataInPartition(ctx); - } - -} - - -void TPartitionActor::HandleWakeup(const TActorContext& ctx) { - if (ReadOffset >= EndOffset && WaitDataInfly.size() <= 1 && PipeClient) { //send one more - Y_VERIFY(WaitForData); - WaitDataInPartition(ctx); - } -} - -///////////////// AuthAndInitActor + auto read = request.MutablePartitionRequest()->MutableCmdRead(); + read->SetClientId(ClientId); + read->SetClientDC(ClientDC); + if (req->MaxCount) { + read->SetCount(req->MaxCount); + } + if (req->MaxSize) { + read->SetBytes(req->MaxSize); + } + if (req->MaxTimeLagMs) { + read->SetMaxTimeLagMs(req->MaxTimeLagMs); + } + if (req->ReadTimestampMs) { + read->SetReadTimestampMs(req->ReadTimestampMs); + } + + read->SetOffset(ReadOffset); + read->SetTimeoutMs(READ_TIMEOUT_DURATION.MilliSeconds()); + RequestInfly = true; + CurrentRequest = request; + + if (!PipeClient) //Pipe will be recreated soon + return; + + TAutoPtr<TEvPersQueue::TEvRequest> event(new TEvPersQueue::TEvRequest); + event->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, event.Release()); +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const TActorContext& ctx) { + //TODO: add here processing of cookie == 0 if ReadOffset > ClientCommittedOffset if any + Y_VERIFY(ev->Get()->AssignId == Partition.AssignId); + for (auto& readId : ev->Get()->CommitInfo.Cookies) { + if (readId == 0) { + if (ReadIdCommitted > 0) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "commit of 0 allowed only as first commit in " << Partition, PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + NextCommits.insert(0); + continue; + } + if (readId <= ReadIdCommitted) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "commit of " << readId << " that is already committed in " << Partition, PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + if (readId >= ReadIdToResponse) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "commit of unknown cookie " << readId << " in " << Partition, PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + bool res = NextCommits.insert(readId).second; + if (!res) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "double commit of cookie " << readId << " in " << Partition, PersQueue::ErrorCode::BAD_REQUEST)); + return; + } + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " commit request from client for " << readId << " in " << Partition); + } + + MakeCommit(ctx); + + if (NextCommits.size() >= AppData(ctx)->PQConfig.GetMaxReadCookies()) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "got more than " << AppData(ctx)->PQConfig.GetMaxReadCookies() + << " unordered cookies to commit in " << Partition << ", last cookie is " << ReadIdCommitted, + PersQueue::ErrorCode::BAD_REQUEST)); + return; + } +} + +void TPartitionActor::Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const TActorContext& ctx) { + Y_VERIFY(ev->Get()->AssignId == Partition.AssignId); + + for (auto& c : ev->Get()->CommitInfo.Ranges) { + NextRanges.InsertInterval(c.first, c.second); + } + + MakeCommit(ctx); + + if (NextRanges.GetNumIntervals() >= AppData(ctx)->PQConfig.GetMaxReadCookies()) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "got more than " << AppData(ctx)->PQConfig.GetMaxReadCookies() + << " unordered offset ranges to commit in " << Partition + << ", last to be committed offset is " << ClientCommitOffset + << ", committed offset is " << CommittedOffset, + PersQueue::ErrorCode::BAD_REQUEST)); + return; + } +} + + + +void TPartitionActor::Die(const TActorContext& ctx) { + if (PipeClient) + NTabletPipe::CloseClient(ctx, PipeClient); + TActorBootstrapped<TPartitionActor>::Die(ctx); +} + +void TPartitionActor::HandlePoison(TEvents::TEvPoisonPill::TPtr&, const TActorContext& ctx) { + if (LockCounted) + Counters.PartitionsToBeLocked.Dec(); + Die(ctx); +} + +void TPartitionActor::Handle(TEvPQProxy::TEvDeadlineExceeded::TPtr& ev, const TActorContext& ctx) { + + WaitDataInfly.erase(ev->Get()->Cookie); + if (ReadOffset >= EndOffset && WaitDataInfly.size() <= 1 && PipeClient) { + Y_VERIFY(WaitForData); + WaitDataInPartition(ctx); + } + +} + + +void TPartitionActor::HandleWakeup(const TActorContext& ctx) { + if (ReadOffset >= EndOffset && WaitDataInfly.size() <= 1 && PipeClient) { //send one more + Y_VERIFY(WaitForData); + WaitDataInPartition(ctx); + } +} + +///////////////// AuthAndInitActor TReadInitAndAuthActor::TReadInitAndAuthActor( const TActorContext& ctx, const TActorId& parentId, const TString& clientId, const ui64 cookie, const TString& session, const NActors::TActorId& metaCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TIntrusivePtr<NACLib::TUserToken> token, const NPersQueue::TTopicsToConverter& topics ) - : ParentId(parentId) - , Cookie(cookie) - , Session(session) + : ParentId(parentId) + , Cookie(cookie) + , Session(session) , MetaCacheId(metaCache) - , NewSchemeCache(newSchemeCache) - , ClientId(clientId) + , NewSchemeCache(newSchemeCache) + , ClientId(clientId) , ClientPath(NPersQueue::ConvertOldConsumerName(ClientId, ctx)) - , Token(token) - , Counters(counters) -{ - for (const auto& t : topics) { + , Token(token) + , Counters(counters) +{ + for (const auto& t : topics) { Topics[t.first].TopicNameConverter = t.second; - } -} - - -TReadInitAndAuthActor::~TReadInitAndAuthActor() = default; - - -void TReadInitAndAuthActor::Bootstrap(const TActorContext &ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth for : " << ClientId); + } +} + + +TReadInitAndAuthActor::~TReadInitAndAuthActor() = default; + + +void TReadInitAndAuthActor::Bootstrap(const TActorContext &ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth for : " << ClientId); Become(&TThis::StateFunc); TVector<TString> topicNames; for (const auto& topic : Topics) { @@ -2599,36 +2599,36 @@ void TReadInitAndAuthActor::Bootstrap(const TActorContext &ctx) { } DoCheckACL = AppData(ctx)->PQConfig.GetCheckACL() && Token; ctx.Send(MetaCacheId, new TEvDescribeTopicsRequest(topicNames)); -} - -void TReadInitAndAuthActor::Die(const TActorContext& ctx) { - for (auto& t : Topics) { - if (t.second.PipeClient) - NTabletPipe::CloseClient(ctx, t.second.PipeClient); - } - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth is DEAD"); - - TActorBootstrapped<TReadInitAndAuthActor>::Die(ctx); -} - -void TReadInitAndAuthActor::CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code, const TActorContext& ctx) -{ - ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(errorReason, code)); - Die(ctx); -} - +} + +void TReadInitAndAuthActor::Die(const TActorContext& ctx) { + for (auto& t : Topics) { + if (t.second.PipeClient) + NTabletPipe::CloseClient(ctx, t.second.PipeClient); + } + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth is DEAD"); + + TActorBootstrapped<TReadInitAndAuthActor>::Die(ctx); +} + +void TReadInitAndAuthActor::CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code, const TActorContext& ctx) +{ + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(errorReason, code)); + Die(ctx); +} + void TReadInitAndAuthActor::SendCacheNavigateRequest(const TActorContext& ctx, const TString& path) { auto schemeCacheRequest = MakeHolder<NSchemeCache::TSchemeCacheNavigate>(); NSchemeCache::TSchemeCacheNavigate::TEntry entry; entry.Path = NKikimr::SplitPath(path); - entry.SyncVersion = true; + entry.SyncVersion = true; entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpPath; schemeCacheRequest->ResultSet.emplace_back(entry); LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Send client acl request"); ctx.Send(NewSchemeCache, new TEvTxProxySchemeCache::TEvNavigateKeySet(schemeCacheRequest.Release())); } - + bool TReadInitAndAuthActor::ProcessTopicSchemeCacheResponse( const NSchemeCache::TSchemeCacheNavigate::TEntry& entry, THashMap<TString, TTopicHolder>::iterator topicsIter, @@ -2639,7 +2639,7 @@ bool TReadInitAndAuthActor::ProcessTopicSchemeCacheResponse( topicsIter->second.TabletID = pqDescr.GetBalancerTabletID(); return CheckTopicACL(entry, topicsIter->first, ctx); } - + void TReadInitAndAuthActor::HandleTopicsDescribeResponse(TEvDescribeTopicsResponse::TPtr& ev, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Handle describe topics response"); @@ -2648,7 +2648,7 @@ void TReadInitAndAuthActor::HandleTopicsDescribeResponse(TEvDescribeTopicsRespon auto path = JoinPath(entry.Path); auto it = Topics.find(path); Y_VERIFY(it != Topics.end()); - + if (processResult.IsFatal) { Topics.erase(it); if (Topics.empty()) { @@ -2658,11 +2658,11 @@ void TReadInitAndAuthActor::HandleTopicsDescribeResponse(TEvDescribeTopicsRespon } else { continue; } - } + } if (!ProcessTopicSchemeCacheResponse(entry, it, ctx)) return; - - } + + } if (Topics.empty()) { CloseSession("no topics found", PersQueue::ErrorCode::BAD_REQUEST, ctx); return; @@ -2674,9 +2674,9 @@ void TReadInitAndAuthActor::HandleTopicsDescribeResponse(TEvDescribeTopicsRespon } else { FinishInitialization(ctx); } -} - - +} + + bool TReadInitAndAuthActor::CheckTopicACL( const NSchemeCache::TSchemeCacheNavigate::TEntry& entry, const TString& topic, const TActorContext& ctx ) { @@ -2686,7 +2686,7 @@ bool TReadInitAndAuthActor::CheckTopicACL( bool reqAuthRead = DoCheckACL && ( pqDescr.GetPQTabletConfig().GetRequireAuthRead() || alwaysCheckPermissions ); - + if (reqAuthRead && !CheckACLPermissionsForNavigate( entry.SecurityObject, topic, NACLib::EAccessRights::SelectRow, "No ReadTopic permissions", ctx @@ -2701,32 +2701,32 @@ bool TReadInitAndAuthActor::CheckTopicACL( if (cons == ClientId) { found = true; break; - } - } + } + } if (!found) { CloseSession( TStringBuilder() << "no read rule provided for consumer '" << ClientPath << "' in topic '" << topic << "'", PersQueue::ErrorCode::BAD_REQUEST, ctx ); - return false; - } - } - return true; -} - - + return false; + } + } + return true; +} + + void TReadInitAndAuthActor::CheckClientACL(const TActorContext& ctx) { // ToDo[migration] - Through converter/metacache - ? SendCacheNavigateRequest(ctx, AppData(ctx)->PQConfig.GetRoot() + "/" + ClientPath); } - - + + void TReadInitAndAuthActor::HandleClientSchemeCacheResponse( TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx ) { TEvTxProxySchemeCache::TEvNavigateKeySetResult* msg = ev->Get(); const NSchemeCache::TSchemeCacheNavigate* navigate = msg->Request.Get(); - + Y_VERIFY(navigate->ResultSet.size() == 1); auto& entry = navigate->ResultSet.front(); auto path = "/" + JoinPath(entry.Path); // ToDo [migration] - through converter ? @@ -2734,17 +2734,17 @@ void TReadInitAndAuthActor::HandleClientSchemeCacheResponse( const NSchemeCache::TSchemeCacheNavigate::EStatus status = navigate->ResultSet.front().Status; CloseSession(TStringBuilder() << "Failed to read ACL for '" << path << "' Scheme cache error : " << status, PersQueue::ErrorCode::ERROR, ctx); return; - } - + } + NACLib::EAccessRights rights = (NACLib::EAccessRights)(NACLib::EAccessRights::ReadAttributes + NACLib::EAccessRights::WriteAttributes); if ( !CheckACLPermissionsForNavigate(entry.SecurityObject, path, rights, "No ReadAsConsumer permissions", ctx) ) { - return; + return; } FinishInitialization(ctx); } - + bool TReadInitAndAuthActor::CheckACLPermissionsForNavigate( const TIntrusivePtr<TSecurityObject>& secObject, const TString& path, @@ -2760,10 +2760,10 @@ bool TReadInitAndAuthActor::CheckACLPermissionsForNavigate( PersQueue::ErrorCode::ACCESS_DENIED, ctx ); return false; - } + } return true; -} - +} + void TReadInitAndAuthActor::FinishInitialization(const TActorContext& ctx) { TTopicTabletsPairs res; @@ -2774,251 +2774,251 @@ void TReadInitAndAuthActor::FinishInitialization(const TActorContext& ctx) { } ctx.Send(ParentId, new TEvPQProxy::TEvAuthResultOk(std::move(res))); Die(ctx); -} - -// READINFOACTOR +} + +// READINFOACTOR TReadInfoActor::TReadInfoActor( TEvPQReadInfoRequest* request, const NPersQueue::TTopicsListController& topicsHandler, const TActorId& schemeCache, const TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters ) - : TBase(request) - , SchemeCache(schemeCache) - , NewSchemeCache(newSchemeCache) - , AuthInitActor() - , Counters(counters) + : TBase(request) + , SchemeCache(schemeCache) + , NewSchemeCache(newSchemeCache) + , AuthInitActor() + , Counters(counters) , TopicsHandler(topicsHandler) -{ - Y_ASSERT(request); -} - - - -TReadInfoActor::~TReadInfoActor() = default; - - -void TReadInfoActor::Bootstrap(const TActorContext& ctx) { - TBase::Bootstrap(ctx); - Become(&TThis::StateFunc); - +{ + Y_ASSERT(request); +} + + + +TReadInfoActor::~TReadInfoActor() = default; + + +void TReadInfoActor::Bootstrap(const TActorContext& ctx) { + TBase::Bootstrap(ctx); + Become(&TThis::StateFunc); + auto request = dynamic_cast<const ReadInfoRequest*>(GetProtoRequest()); - Y_VERIFY(request); + Y_VERIFY(request); ClientId = NPersQueue::ConvertNewConsumerName(request->consumer().path(), ctx); - - bool readOnlyLocal = request->get_only_original(); - - TIntrusivePtr<NACLib::TUserToken> token; + + bool readOnlyLocal = request->get_only_original(); + + TIntrusivePtr<NACLib::TUserToken> token; if (Request_->GetInternalToken().empty()) { - if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { - AnswerError("Unauthenticated access is forbidden, please provide credentials", PersQueue::ErrorCode::ACCESS_DENIED, ctx); - return; - } - } else { + if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { + AnswerError("Unauthenticated access is forbidden, please provide credentials", PersQueue::ErrorCode::ACCESS_DENIED, ctx); + return; + } + } else { token = new NACLib::TUserToken(Request_->GetInternalToken()); - } - - THashSet<TString> topicsToResolve; - - for (auto& t : request->topics()) { + } + + THashSet<TString> topicsToResolve; + + for (auto& t : request->topics()) { if (t.path().empty()) { - AnswerError("empty topic in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } + AnswerError("empty topic in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } topicsToResolve.insert(t.path()); - } - + } + AuthInitActor = ctx.Register(new TReadInitAndAuthActor( ctx, ctx.SelfID, ClientId, 0, TString("read_info:") + Request().GetPeerName(), SchemeCache, NewSchemeCache, Counters, token, TopicsHandler.GetReadTopicsList(topicsToResolve, readOnlyLocal, Request().GetDatabaseName().GetOrElse(TString())) )); -} - - -void TReadInfoActor::Die(const TActorContext& ctx) { - - ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill()); - - TActorBootstrapped<TReadInfoActor>::Die(ctx); -} - - -void TReadInfoActor::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) { - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "GetReadInfo auth ok fo read info, got " << ev->Get()->TopicAndTablets.size() << " topics"); +} + + +void TReadInfoActor::Die(const TActorContext& ctx) { + + ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill()); + + TActorBootstrapped<TReadInfoActor>::Die(ctx); +} + + +void TReadInfoActor::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) { + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "GetReadInfo auth ok fo read info, got " << ev->Get()->TopicAndTablets.size() << " topics"); TopicAndTablets = std::move(ev->Get()->TopicAndTablets); - if (TopicAndTablets.empty()) { - AnswerError("empty list of topics", PersQueue::ErrorCode::UNKNOWN_TOPIC, ctx); - return; - } - - NKikimrClient::TPersQueueRequest proto; - proto.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->SetClientId(ClientId); - for (auto& t : TopicAndTablets) { + if (TopicAndTablets.empty()) { + AnswerError("empty list of topics", PersQueue::ErrorCode::UNKNOWN_TOPIC, ctx); + return; + } + + NKikimrClient::TPersQueueRequest proto; + proto.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->SetClientId(ClientId); + for (auto& t : TopicAndTablets) { proto.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->AddTopic(t.TopicNameConverter->GetClientsideName()); - } - + } + ctx.Register(NMsgBusProxy::CreateActorServerPersQueue( ctx.SelfID, proto, SchemeCache, std::make_shared<NMsgBusProxy::TPersQueueGetReadSessionsInfoWorkerFactory>() )); - -} - - -void TReadInfoActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { - if (ev->Get()->Record.GetStatus() != MSTATUS_OK) { - return AnswerError(ev->Get()->Record.GetErrorReason(), PersQueue::ErrorCode::ERROR, ctx); - } - - // Convert to correct response. - - ReadInfoResult result; - - const auto& resp = ev->Get()->Record; - Y_VERIFY(resp.HasMetaResponse()); - - Y_VERIFY(resp.GetMetaResponse().GetCmdGetReadSessionsInfoResult().TopicResultSize() == TopicAndTablets.size()); - TMap<std::pair<TString, ui64>, ReadInfoResult::TopicInfo::PartitionInfo*> partResultMap; - for (auto& tt : resp.GetMetaResponse().GetCmdGetReadSessionsInfoResult().GetTopicResult()) { - auto topicRes = result.add_topics(); - topicRes->mutable_topic()->set_path(NPersQueue::GetTopicPath(tt.GetTopic())); - topicRes->set_cluster(NPersQueue::GetDC(tt.GetTopic())); - topicRes->set_status(ConvertPersQueueInternalCodeToStatus(ConvertOldCode(tt.GetErrorCode()))); - if (tt.GetErrorCode() != NPersQueue::NErrorCode::OK) - FillIssue(topicRes->add_issues(), ConvertOldCode(tt.GetErrorCode()), tt.GetErrorReason()); - - for (auto& pp : tt.GetPartitionResult()) { - auto partRes = topicRes->add_partitions(); - - partRes->set_partition(pp.GetPartition()); - partRes->set_status(ConvertPersQueueInternalCodeToStatus(ConvertOldCode(pp.GetErrorCode()))); - if (pp.GetErrorCode() != NPersQueue::NErrorCode::OK) - FillIssue(partRes->add_issues(), ConvertOldCode(pp.GetErrorCode()), pp.GetErrorReason()); - - partRes->set_start_offset(pp.GetStartOffset()); - partRes->set_end_offset(pp.GetEndOffset()); - - partRes->set_commit_offset(pp.GetClientOffset()); - partRes->set_commit_time_lag_ms(pp.GetTimeLag()); - - partRes->set_read_offset(pp.GetClientReadOffset()); - partRes->set_read_time_lag_ms(pp.GetReadTimeLag()); - - partRes->set_session_id(pp.GetSession()); //TODO: fill error when no session returned result - - partRes->set_client_node(pp.GetClientNode()); - partRes->set_proxy_node(pp.GetProxyNode()); - partRes->set_tablet_node(pp.GetTabletNode()); - partResultMap[std::make_pair<TString, ui64>(TString(tt.GetTopic()), pp.GetPartition())] = partRes; - } - } - for (auto& ss : resp.GetMetaResponse().GetCmdGetReadSessionsInfoResult().GetSessionResult()) { - for (auto& pr : ss.GetPartitionResult()) { - auto it = partResultMap.find(std::make_pair<TString, ui64>(TString(pr.GetTopic()), pr.GetPartition())); - if (it == partResultMap.end()) - continue; - auto sesRes = it->second; - sesRes->set_session_id(ss.GetSession()); - sesRes->set_status(ConvertPersQueueInternalCodeToStatus(ConvertOldCode(ss.GetErrorCode()))); - if (ss.GetErrorCode() != NPersQueue::NErrorCode::OK) //TODO: what if this is result for already dead session? - FillIssue(sesRes->add_issues(), ConvertOldCode(ss.GetErrorCode()), ss.GetErrorReason()); - - for (auto& nc : pr.GetNextCommits()) { - sesRes->add_out_of_order_read_cookies_to_commit(nc); - } - sesRes->set_last_read_cookie(pr.GetLastReadId()); - sesRes->set_committed_read_cookie(pr.GetReadIdCommitted()); - sesRes->set_assign_timestamp_ms(pr.GetTimestamp()); - - sesRes->set_client_node(ss.GetClientNode()); - sesRes->set_proxy_node(ss.GetProxyNode()); - } - } - Request().SendResult(result, Ydb::StatusIds::SUCCESS); - Die(ctx); -} - -void FillIssue(Ydb::Issue::IssueMessage* issue, const PersQueue::ErrorCode::ErrorCode errorCode, const TString& errorReason) { - issue->set_message(errorReason); - issue->set_severity(NYql::TSeverityIds::S_ERROR); - issue->set_issue_code(errorCode); -} - -PersQueue::ErrorCode::ErrorCode ConvertOldCode(const NPersQueue::NErrorCode::EErrorCode code) -{ - if (code == NPersQueue::NErrorCode::OK) - return PersQueue::ErrorCode::OK; - return PersQueue::ErrorCode::ErrorCode(code + 500000); -} - - -void TReadInfoActor::AnswerError(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { - - ReadInfoResponse response; - response.mutable_operation()->set_ready(true); - auto issue = response.mutable_operation()->add_issues(); - FillIssue(issue, errorCode, errorReason); - response.mutable_operation()->set_status(ConvertPersQueueInternalCodeToStatus(errorCode)); - Reply(ConvertPersQueueInternalCodeToStatus(errorCode), response.operation().issues(), ctx); -} - - -void TReadInfoActor::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { - AnswerError(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); -} - - - -Ydb::StatusIds::StatusCode ConvertPersQueueInternalCodeToStatus(const PersQueue::ErrorCode::ErrorCode code) { - - using namespace PersQueue::ErrorCode; - - switch(code) { - case OK : - return Ydb::StatusIds::SUCCESS; - case INITIALIZING: - case CLUSTER_DISABLED: - return Ydb::StatusIds::UNAVAILABLE; + +} + + +void TReadInfoActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { + if (ev->Get()->Record.GetStatus() != MSTATUS_OK) { + return AnswerError(ev->Get()->Record.GetErrorReason(), PersQueue::ErrorCode::ERROR, ctx); + } + + // Convert to correct response. + + ReadInfoResult result; + + const auto& resp = ev->Get()->Record; + Y_VERIFY(resp.HasMetaResponse()); + + Y_VERIFY(resp.GetMetaResponse().GetCmdGetReadSessionsInfoResult().TopicResultSize() == TopicAndTablets.size()); + TMap<std::pair<TString, ui64>, ReadInfoResult::TopicInfo::PartitionInfo*> partResultMap; + for (auto& tt : resp.GetMetaResponse().GetCmdGetReadSessionsInfoResult().GetTopicResult()) { + auto topicRes = result.add_topics(); + topicRes->mutable_topic()->set_path(NPersQueue::GetTopicPath(tt.GetTopic())); + topicRes->set_cluster(NPersQueue::GetDC(tt.GetTopic())); + topicRes->set_status(ConvertPersQueueInternalCodeToStatus(ConvertOldCode(tt.GetErrorCode()))); + if (tt.GetErrorCode() != NPersQueue::NErrorCode::OK) + FillIssue(topicRes->add_issues(), ConvertOldCode(tt.GetErrorCode()), tt.GetErrorReason()); + + for (auto& pp : tt.GetPartitionResult()) { + auto partRes = topicRes->add_partitions(); + + partRes->set_partition(pp.GetPartition()); + partRes->set_status(ConvertPersQueueInternalCodeToStatus(ConvertOldCode(pp.GetErrorCode()))); + if (pp.GetErrorCode() != NPersQueue::NErrorCode::OK) + FillIssue(partRes->add_issues(), ConvertOldCode(pp.GetErrorCode()), pp.GetErrorReason()); + + partRes->set_start_offset(pp.GetStartOffset()); + partRes->set_end_offset(pp.GetEndOffset()); + + partRes->set_commit_offset(pp.GetClientOffset()); + partRes->set_commit_time_lag_ms(pp.GetTimeLag()); + + partRes->set_read_offset(pp.GetClientReadOffset()); + partRes->set_read_time_lag_ms(pp.GetReadTimeLag()); + + partRes->set_session_id(pp.GetSession()); //TODO: fill error when no session returned result + + partRes->set_client_node(pp.GetClientNode()); + partRes->set_proxy_node(pp.GetProxyNode()); + partRes->set_tablet_node(pp.GetTabletNode()); + partResultMap[std::make_pair<TString, ui64>(TString(tt.GetTopic()), pp.GetPartition())] = partRes; + } + } + for (auto& ss : resp.GetMetaResponse().GetCmdGetReadSessionsInfoResult().GetSessionResult()) { + for (auto& pr : ss.GetPartitionResult()) { + auto it = partResultMap.find(std::make_pair<TString, ui64>(TString(pr.GetTopic()), pr.GetPartition())); + if (it == partResultMap.end()) + continue; + auto sesRes = it->second; + sesRes->set_session_id(ss.GetSession()); + sesRes->set_status(ConvertPersQueueInternalCodeToStatus(ConvertOldCode(ss.GetErrorCode()))); + if (ss.GetErrorCode() != NPersQueue::NErrorCode::OK) //TODO: what if this is result for already dead session? + FillIssue(sesRes->add_issues(), ConvertOldCode(ss.GetErrorCode()), ss.GetErrorReason()); + + for (auto& nc : pr.GetNextCommits()) { + sesRes->add_out_of_order_read_cookies_to_commit(nc); + } + sesRes->set_last_read_cookie(pr.GetLastReadId()); + sesRes->set_committed_read_cookie(pr.GetReadIdCommitted()); + sesRes->set_assign_timestamp_ms(pr.GetTimestamp()); + + sesRes->set_client_node(ss.GetClientNode()); + sesRes->set_proxy_node(ss.GetProxyNode()); + } + } + Request().SendResult(result, Ydb::StatusIds::SUCCESS); + Die(ctx); +} + +void FillIssue(Ydb::Issue::IssueMessage* issue, const PersQueue::ErrorCode::ErrorCode errorCode, const TString& errorReason) { + issue->set_message(errorReason); + issue->set_severity(NYql::TSeverityIds::S_ERROR); + issue->set_issue_code(errorCode); +} + +PersQueue::ErrorCode::ErrorCode ConvertOldCode(const NPersQueue::NErrorCode::EErrorCode code) +{ + if (code == NPersQueue::NErrorCode::OK) + return PersQueue::ErrorCode::OK; + return PersQueue::ErrorCode::ErrorCode(code + 500000); +} + + +void TReadInfoActor::AnswerError(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { + + ReadInfoResponse response; + response.mutable_operation()->set_ready(true); + auto issue = response.mutable_operation()->add_issues(); + FillIssue(issue, errorCode, errorReason); + response.mutable_operation()->set_status(ConvertPersQueueInternalCodeToStatus(errorCode)); + Reply(ConvertPersQueueInternalCodeToStatus(errorCode), response.operation().issues(), ctx); +} + + +void TReadInfoActor::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { + AnswerError(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); +} + + + +Ydb::StatusIds::StatusCode ConvertPersQueueInternalCodeToStatus(const PersQueue::ErrorCode::ErrorCode code) { + + using namespace PersQueue::ErrorCode; + + switch(code) { + case OK : + return Ydb::StatusIds::SUCCESS; + case INITIALIZING: + case CLUSTER_DISABLED: + return Ydb::StatusIds::UNAVAILABLE; case PREFERRED_CLUSTER_MISMATCHED: return Ydb::StatusIds::ABORTED; - case OVERLOAD: - return Ydb::StatusIds::OVERLOADED; - case BAD_REQUEST: - return Ydb::StatusIds::BAD_REQUEST; - case WRONG_COOKIE: - case CREATE_SESSION_ALREADY_LOCKED: - case DELETE_SESSION_NO_SESSION: - case READ_ERROR_NO_SESSION: - return Ydb::StatusIds::SESSION_EXPIRED; - case WRITE_ERROR_PARTITION_IS_FULL: - case WRITE_ERROR_DISK_IS_FULL: - case WRITE_ERROR_BAD_OFFSET: + case OVERLOAD: + return Ydb::StatusIds::OVERLOADED; + case BAD_REQUEST: + return Ydb::StatusIds::BAD_REQUEST; + case WRONG_COOKIE: + case CREATE_SESSION_ALREADY_LOCKED: + case DELETE_SESSION_NO_SESSION: + case READ_ERROR_NO_SESSION: + return Ydb::StatusIds::SESSION_EXPIRED; + case WRITE_ERROR_PARTITION_IS_FULL: + case WRITE_ERROR_DISK_IS_FULL: + case WRITE_ERROR_BAD_OFFSET: case SOURCEID_DELETED: - case READ_ERROR_IN_PROGRESS: - case READ_ERROR_TOO_SMALL_OFFSET: - case READ_ERROR_TOO_BIG_OFFSET: - case SET_OFFSET_ERROR_COMMIT_TO_FUTURE: - case READ_NOT_DONE: - return Ydb::StatusIds::GENERIC_ERROR; - case TABLET_IS_DROPPED: - case UNKNOWN_TOPIC: - case WRONG_PARTITION_NUMBER: - return Ydb::StatusIds::SCHEME_ERROR; - case ACCESS_DENIED: - return Ydb::StatusIds::UNAUTHORIZED; - case ERROR: - return Ydb::StatusIds::GENERIC_ERROR; - - default: - return Ydb::StatusIds::STATUS_CODE_UNSPECIFIED; - } -} - - - -} -} -} + case READ_ERROR_IN_PROGRESS: + case READ_ERROR_TOO_SMALL_OFFSET: + case READ_ERROR_TOO_BIG_OFFSET: + case SET_OFFSET_ERROR_COMMIT_TO_FUTURE: + case READ_NOT_DONE: + return Ydb::StatusIds::GENERIC_ERROR; + case TABLET_IS_DROPPED: + case UNKNOWN_TOPIC: + case WRONG_PARTITION_NUMBER: + return Ydb::StatusIds::SCHEME_ERROR; + case ACCESS_DENIED: + return Ydb::StatusIds::UNAUTHORIZED; + case ERROR: + return Ydb::StatusIds::GENERIC_ERROR; + + default: + return Ydb::StatusIds::STATUS_CODE_UNSPECIFIED; + } +} + + + +} +} +} diff --git a/ydb/services/persqueue_v1/grpc_pq_schema.cpp b/ydb/services/persqueue_v1/grpc_pq_schema.cpp index 20191d2e68..7756adc797 100644 --- a/ydb/services/persqueue_v1/grpc_pq_schema.cpp +++ b/ydb/services/persqueue_v1/grpc_pq_schema.cpp @@ -1,126 +1,126 @@ -#include "grpc_pq_schema.h" -#include "grpc_pq_actor.h" - +#include "grpc_pq_schema.h" +#include "grpc_pq_actor.h" + #include <ydb/core/grpc_services/grpc_helper.h> #include <ydb/core/tx/scheme_board/cache.h> #include <ydb/core/ydb_convert/ydb_convert.h> - + #include <ydb/library/persqueue/obfuscate/obfuscate.h> #include <ydb/library/persqueue/topic_parser/topic_parser.h> - -#include <algorithm> - -using namespace NActors; -using namespace NKikimrClient; - -using grpc::Status; - + +#include <algorithm> + +using namespace NActors; +using namespace NKikimrClient; + +using grpc::Status; + namespace NKikimr::NGRpcProxy::V1 { - + constexpr TStringBuf GRPCS_ENDPOINT_PREFIX = "grpcs://"; -/////////////////////////////////////////////////////////////////////////////// - -using namespace PersQueue::V1; - - +/////////////////////////////////////////////////////////////////////////////// + +using namespace PersQueue::V1; + + IActor* CreatePQSchemaService(const TActorId& schemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters) { - return new TPQSchemaService(schemeCache, counters); -} - - - + return new TPQSchemaService(schemeCache, counters); +} + + + TPQSchemaService::TPQSchemaService(const TActorId& schemeCache, - TIntrusivePtr<NMonitoring::TDynamicCounters> counters) - : SchemeCache(schemeCache) - , Counters(counters) - , LocalCluster("") -{ -} - - -void TPQSchemaService::Bootstrap(const TActorContext& ctx) { + TIntrusivePtr<NMonitoring::TDynamicCounters> counters) + : SchemeCache(schemeCache) + , Counters(counters) + , LocalCluster("") +{ +} + + +void TPQSchemaService::Bootstrap(const TActorContext& ctx) { if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { // ToDo[migration]: switch to haveClusters ctx.Send(NPQ::NClusterTracker::MakeClusterTrackerID(), new NPQ::NClusterTracker::TEvClusterTracker::TEvSubscribe); - } - - Become(&TThis::StateFunc); -} - - -void TPQSchemaService::Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev) { - Y_VERIFY(ev->Get()->ClustersList); - Y_VERIFY(ev->Get()->ClustersList->Clusters.size()); - - const auto& clusters = ev->Get()->ClustersList->Clusters; - - LocalCluster = {}; - - auto it = std::find_if(begin(clusters), end(clusters), [](const auto& cluster) { return cluster.IsLocal; }); - if (it != end(clusters)) { - LocalCluster = it->Name; - } - - Clusters.resize(clusters.size()); - for (size_t i = 0; i < clusters.size(); ++i) { - Clusters[i] = clusters[i].Name; - } -} - - -google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> FillResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code) { - google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> res; - FillIssue(res.Add(), code, errorReason); - return res; -} - - -void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx) { - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new drop topic request"); - - ctx.Register(new TDropTopicActor(ev->Release().Release())); -} - - - - -TDropTopicActor::TDropTopicActor(NKikimr::NGRpcService::TEvPQDropTopicRequest* request) + } + + Become(&TThis::StateFunc); +} + + +void TPQSchemaService::Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev) { + Y_VERIFY(ev->Get()->ClustersList); + Y_VERIFY(ev->Get()->ClustersList->Clusters.size()); + + const auto& clusters = ev->Get()->ClustersList->Clusters; + + LocalCluster = {}; + + auto it = std::find_if(begin(clusters), end(clusters), [](const auto& cluster) { return cluster.IsLocal; }); + if (it != end(clusters)) { + LocalCluster = it->Name; + } + + Clusters.resize(clusters.size()); + for (size_t i = 0; i < clusters.size(); ++i) { + Clusters[i] = clusters[i].Name; + } +} + + +google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> FillResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code) { + google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> res; + FillIssue(res.Add(), code, errorReason); + return res; +} + + +void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx) { + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new drop topic request"); + + ctx.Register(new TDropTopicActor(ev->Release().Release())); +} + + + + +TDropTopicActor::TDropTopicActor(NKikimr::NGRpcService::TEvPQDropTopicRequest* request) : TBase(request, request->GetProtoRequest()->path()) -{ -} - -void TDropTopicActor::Bootstrap(const NActors::TActorContext& ctx) -{ - TBase::Bootstrap(ctx); - SendProposeRequest(ctx); - Become(&TDropTopicActor::StateWork); -} - - +{ +} + +void TDropTopicActor::Bootstrap(const NActors::TActorContext& ctx) +{ + TBase::Bootstrap(ctx); + SendProposeRequest(ctx); + Become(&TDropTopicActor::StateWork); +} + + void TDropTopicActor::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TActorContext& ctx, const TString& workingDir, const TString& name) { Y_UNUSED(ctx); NKikimrSchemeOp::TModifyScheme& modifyScheme(*proposal.Record.MutableTransaction()->MutableModifyScheme()); - modifyScheme.SetWorkingDir(workingDir); + modifyScheme.SetWorkingDir(workingDir); modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpDropPersQueueGroup); - modifyScheme.MutableDrop()->SetName(name); -} - -TDescribeTopicActor::TDescribeTopicActor(NKikimr::NGRpcService::TEvPQDescribeTopicRequest* request) + modifyScheme.MutableDrop()->SetName(name); +} + +TDescribeTopicActor::TDescribeTopicActor(NKikimr::NGRpcService::TEvPQDescribeTopicRequest* request) : TBase(request, request->GetProtoRequest()->path()) -{ -} - -void TDescribeTopicActor::StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { - switch (ev->GetTypeRewrite()) { - default: TBase::StateWork(ev, ctx); - } -} - - +{ +} + +void TDescribeTopicActor::StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { + switch (ev->GetTypeRewrite()) { + default: TBase::StateWork(ev, ctx); + } +} + + void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { Y_VERIFY(ev->Get()->Request.Get()->ResultSet.size() == 1); // describe for only one topic if (ReplyIfNotTopic(ev, ctx)) { @@ -130,9 +130,9 @@ void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEv const auto& response = ev->Get()->Request.Get()->ResultSet.front(); const TString path = JoinSeq("/", response.Path); - + Ydb::PersQueue::V1::DescribeTopicResult result; - + auto settings = result.mutable_settings(); Ydb::Scheme::Entry *selfEntry = result.mutable_self(); const auto& selfInfo = response.Self->Info; @@ -142,16 +142,16 @@ void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEv if (response.PQGroupInfo) { const auto &pqDescr = response.PQGroupInfo->Description; settings->set_partitions_count(pqDescr.GetTotalGroupCount()); - + const auto &config = pqDescr.GetPQTabletConfig(); if (!config.GetRequireAuthWrite()) { (*settings->mutable_attributes())["_allow_unauthenticated_write"] = "true"; } - + if (!config.GetRequireAuthRead()) { (*settings->mutable_attributes())["_allow_unauthenticated_read"] = "true"; } - + if (pqDescr.GetPartitionPerTablet() != 2) { (*settings->mutable_attributes())["_partitions_per_tablet"] = TStringBuilder() << pqDescr.GetPartitionPerTablet(); @@ -171,15 +171,15 @@ void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEv settings->set_retention_period_ms(partConfig.GetLifetimeSeconds() * 1000); settings->set_message_group_seqno_retention_period_ms(partConfig.GetSourceIdLifetimeSeconds() * 1000); settings->set_max_partition_message_groups_seqno_stored(partConfig.GetSourceIdMaxCounts()); - + if (local) { settings->set_max_partition_write_speed(partConfig.GetWriteSpeedInBytesPerSecond()); settings->set_max_partition_write_burst(partConfig.GetBurstSize()); } - + settings->set_supported_format( (Ydb::PersQueue::V1::TopicSettings::Format) (config.GetFormatVersion() + 1)); - + for (const auto &codec : config.GetCodecs().GetIds()) { settings->add_supported_codecs((Ydb::PersQueue::V1::Codec) (codec + 1)); } @@ -201,7 +201,7 @@ void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEv if (c == config.GetReadRules(i)) { important = true; break; - } + } } rr->set_important(important); @@ -251,34 +251,34 @@ void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEv partConfig.GetMirrorFrom().GetCredentials().GetIam().GetServiceAccountKey()) ); } - } + } rmr->set_database(partConfig.GetMirrorFrom().GetDatabase()); - } - } + } + } return ReplyWithResult(Ydb::StatusIds::SUCCESS, result, ctx); -} - - -void TDescribeTopicActor::Bootstrap(const NActors::TActorContext& ctx) -{ - TBase::Bootstrap(ctx); +} + + +void TDescribeTopicActor::Bootstrap(const NActors::TActorContext& ctx) +{ + TBase::Bootstrap(ctx); SendDescribeProposeRequest(ctx); - Become(&TDescribeTopicActor::StateWork); -} - - + Become(&TDescribeTopicActor::StateWork); +} + + TAddReadRuleActor::TAddReadRuleActor(NKikimr::NGRpcService::TEvPQAddReadRuleRequest* request) : TBase(request, request->GetProtoRequest()->path()) { } - + void TAddReadRuleActor::Bootstrap(const NActors::TActorContext& ctx) { TBase::Bootstrap(ctx); SendDescribeProposeRequest(ctx); Become(&TBase::StateWork); -} - +} + void TAddReadRuleActor::ModifyPersqueueConfig( const TActorContext& ctx, NKikimrSchemeOp::TPersQueueGroupDescription& groupConfig, @@ -289,20 +289,20 @@ void TAddReadRuleActor::ModifyPersqueueConfig( auto* pqConfig = groupConfig.MutablePQTabletConfig(); auto rule = GetProtoRequest()->read_rule(); - - if (rule.version() == 0) { + + if (rule.version() == 0) { rule.set_version(selfInfo.GetVersion().GetPQVersion()); - } - auto serviceTypes = GetSupportedClientServiceTypes(ctx); - TString error = AddReadRuleToConfig(pqConfig, rule, serviceTypes, ctx); - bool hasDuplicates = false; + } + auto serviceTypes = GetSupportedClientServiceTypes(ctx); + TString error = AddReadRuleToConfig(pqConfig, rule, serviceTypes, ctx); + bool hasDuplicates = false; if (error.Empty()) { - hasDuplicates = CheckReadRulesConfig(*pqConfig, serviceTypes, error); + hasDuplicates = CheckReadRulesConfig(*pqConfig, serviceTypes, error); } if (!error.Empty()) { return ReplyWithError(hasDuplicates ? Ydb::StatusIds::ALREADY_EXISTS : Ydb::StatusIds::BAD_REQUEST, - hasDuplicates ? Ydb::PersQueue::ErrorCode::OK : Ydb::PersQueue::ErrorCode::BAD_REQUEST, error, ctx); + hasDuplicates ? Ydb::PersQueue::ErrorCode::OK : Ydb::PersQueue::ErrorCode::BAD_REQUEST, error, ctx); } } @@ -337,41 +337,41 @@ void TRemoveReadRuleActor::ModifyPersqueueConfig( } } -TCreateTopicActor::TCreateTopicActor(NKikimr::NGRpcService::TEvPQCreateTopicRequest* request, const TString& localCluster, const TVector<TString>& clusters) +TCreateTopicActor::TCreateTopicActor(NKikimr::NGRpcService::TEvPQCreateTopicRequest* request, const TString& localCluster, const TVector<TString>& clusters) : TBase(request, request->GetProtoRequest()->path()) - , LocalCluster(localCluster) - , Clusters(clusters) -{ - Y_ASSERT(request); -} - -void TCreateTopicActor::Bootstrap(const NActors::TActorContext& ctx) -{ - TBase::Bootstrap(ctx); - SendProposeRequest(ctx); - Become(&TCreateTopicActor::StateWork); -} - - -TAlterTopicActor::TAlterTopicActor(NKikimr::NGRpcService::TEvPQAlterTopicRequest* request) + , LocalCluster(localCluster) + , Clusters(clusters) +{ + Y_ASSERT(request); +} + +void TCreateTopicActor::Bootstrap(const NActors::TActorContext& ctx) +{ + TBase::Bootstrap(ctx); + SendProposeRequest(ctx); + Become(&TCreateTopicActor::StateWork); +} + + +TAlterTopicActor::TAlterTopicActor(NKikimr::NGRpcService::TEvPQAlterTopicRequest* request) : TBase(request, request->GetProtoRequest()->path()) -{ - Y_ASSERT(request); -} - -void TAlterTopicActor::Bootstrap(const NActors::TActorContext& ctx) -{ - TBase::Bootstrap(ctx); - SendProposeRequest(ctx); - Become(&TAlterTopicActor::StateWork); -} - +{ + Y_ASSERT(request); +} + +void TAlterTopicActor::Bootstrap(const NActors::TActorContext& ctx) +{ + TBase::Bootstrap(ctx); + SendProposeRequest(ctx); + Become(&TAlterTopicActor::StateWork); +} + void TCreateTopicActor::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TActorContext& ctx, const TString& workingDir, const TString& name) { NKikimrSchemeOp::TModifyScheme& modifyScheme(*proposal.Record.MutableTransaction()->MutableModifyScheme()); - modifyScheme.SetWorkingDir(workingDir); - + modifyScheme.SetWorkingDir(workingDir); + { TString error; auto status = FillProposeRequestImpl(name, GetProtoRequest()->settings(), modifyScheme, ctx, false, error); @@ -380,48 +380,48 @@ void TCreateTopicActor::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction return ReplyWithResult(status, ctx); } } - - const auto& pqDescr = modifyScheme.GetCreatePersQueueGroup(); - const auto& config = pqDescr.GetPQTabletConfig(); - - if (!LocalCluster.empty() && config.GetLocalDC() && config.GetDC() != LocalCluster) { - Request_->RaiseIssue(FillIssue(TStringBuilder() << "Local cluster is not correct - provided '" << config.GetDC() - << "' instead of " << LocalCluster, PersQueue::ErrorCode::BAD_REQUEST)); - return ReplyWithResult(Ydb::StatusIds::BAD_REQUEST, ctx); - } + + const auto& pqDescr = modifyScheme.GetCreatePersQueueGroup(); + const auto& config = pqDescr.GetPQTabletConfig(); + + if (!LocalCluster.empty() && config.GetLocalDC() && config.GetDC() != LocalCluster) { + Request_->RaiseIssue(FillIssue(TStringBuilder() << "Local cluster is not correct - provided '" << config.GetDC() + << "' instead of " << LocalCluster, PersQueue::ErrorCode::BAD_REQUEST)); + return ReplyWithResult(Ydb::StatusIds::BAD_REQUEST, ctx); + } if (Count(Clusters, config.GetDC()) == 0 && !Clusters.empty()) { Request_->RaiseIssue(FillIssue(TStringBuilder() << "Unknown cluster '" << config.GetDC() << "'", PersQueue::ErrorCode::BAD_REQUEST)); - return ReplyWithResult(Ydb::StatusIds::BAD_REQUEST, ctx); - } -} - - + return ReplyWithResult(Ydb::StatusIds::BAD_REQUEST, ctx); + } +} + + void TAlterTopicActor::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TActorContext& ctx, const TString& workingDir, const TString& name) { NKikimrSchemeOp::TModifyScheme &modifyScheme(*proposal.Record.MutableTransaction()->MutableModifyScheme()); - modifyScheme.SetWorkingDir(workingDir); + modifyScheme.SetWorkingDir(workingDir); TString error; auto status = FillProposeRequestImpl(name, GetProtoRequest()->settings(), modifyScheme, ctx, true, error); if (!error.empty()) { Request_->RaiseIssue(FillIssue(error, PersQueue::ErrorCode::BAD_REQUEST)); return ReplyWithResult(status, ctx); - } -} - - - - -void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new Alter topic request"); - ctx.Register(new TAlterTopicActor(ev->Release().Release())); -} - + } +} + + + + +void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new Alter topic request"); + ctx.Register(new TAlterTopicActor(ev->Release().Release())); +} + void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQAddReadRuleRequest::TPtr& ev, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new Add read rules request"); ctx.Register(new TAddReadRuleActor(ev->Release().Release())); } - + void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQRemoveReadRuleRequest::TPtr& ev, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new Remove read rules request"); ctx.Register(new TRemoveReadRuleActor(ev->Release().Release())); @@ -432,30 +432,30 @@ void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQCreateTopicRequest::TP ctx.Register(new TCreateTopicActor(ev->Release().Release(), LocalCluster, Clusters)); } -void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new Describe topic request"); - ctx.Register(new TDescribeTopicActor(ev->Release().Release())); -} - - -} - - -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx) { - ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); -} - -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQCreateTopicRequest::TPtr& ev, const TActorContext& ctx) { - ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); -} - -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx) { - ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); -} - -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx) { - ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); -} +void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new Describe topic request"); + ctx.Register(new TDescribeTopicActor(ev->Release().Release())); +} + + +} + + +void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); +} + +void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQCreateTopicRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); +} + +void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); +} + +void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); +} void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQAddReadRuleRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); diff --git a/ydb/services/persqueue_v1/grpc_pq_schema.h b/ydb/services/persqueue_v1/grpc_pq_schema.h index 7fd0f81408..2ef8c503e4 100644 --- a/ydb/services/persqueue_v1/grpc_pq_schema.h +++ b/ydb/services/persqueue_v1/grpc_pq_schema.h @@ -1,65 +1,65 @@ -#pragma once - -#include "grpc_pq_actor.h" -#include "persqueue.h" - +#pragma once + +#include "grpc_pq_actor.h" +#include "persqueue.h" + #include <ydb/core/client/server/grpc_base.h> #include <ydb/core/persqueue/cluster_tracker.h> - -#include <library/cpp/actors/core/actorsystem.h> - -#include <util/generic/hash.h> -#include <util/system/mutex.h> - - + +#include <library/cpp/actors/core/actorsystem.h> + +#include <util/generic/hash.h> +#include <util/system/mutex.h> + + namespace NKikimr::NGRpcProxy::V1 { - + static const i64 DEFAULT_MAX_DATABASE_MESSAGEGROUP_SEQNO_RETENTION_PERIOD = 16*24*60*60*1000; - + inline TActorId GetPQSchemaServiceActorID() { return TActorId(0, "PQSchmSvc"); -} - +} + IActor* CreatePQSchemaService(const NActors::TActorId& schemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters); - -class TPQSchemaService : public NActors::TActorBootstrapped<TPQSchemaService> { -public: + +class TPQSchemaService : public NActors::TActorBootstrapped<TPQSchemaService> { +public: TPQSchemaService(const NActors::TActorId& schemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters); - - void Bootstrap(const TActorContext& ctx); - -private: - TString AvailableLocalCluster(); - - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - HFunc(NKikimr::NGRpcService::TEvPQDropTopicRequest, Handle); - HFunc(NKikimr::NGRpcService::TEvPQCreateTopicRequest, Handle); - HFunc(NKikimr::NGRpcService::TEvPQAlterTopicRequest, Handle); + + void Bootstrap(const TActorContext& ctx); + +private: + TString AvailableLocalCluster(); + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(NKikimr::NGRpcService::TEvPQDropTopicRequest, Handle); + HFunc(NKikimr::NGRpcService::TEvPQCreateTopicRequest, Handle); + HFunc(NKikimr::NGRpcService::TEvPQAlterTopicRequest, Handle); HFunc(NKikimr::NGRpcService::TEvPQAddReadRuleRequest, Handle); HFunc(NKikimr::NGRpcService::TEvPQRemoveReadRuleRequest, Handle); - HFunc(NKikimr::NGRpcService::TEvPQDescribeTopicRequest, Handle); - - hFunc(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate, Handle); - } - } - -private: - void Handle(NKikimr::NGRpcService::TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx); - void Handle(NKikimr::NGRpcService::TEvPQCreateTopicRequest::TPtr& ev, const TActorContext& ctx); - void Handle(NKikimr::NGRpcService::TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx); + HFunc(NKikimr::NGRpcService::TEvPQDescribeTopicRequest, Handle); + + hFunc(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate, Handle); + } + } + +private: + void Handle(NKikimr::NGRpcService::TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx); + void Handle(NKikimr::NGRpcService::TEvPQCreateTopicRequest::TPtr& ev, const TActorContext& ctx); + void Handle(NKikimr::NGRpcService::TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(NKikimr::NGRpcService::TEvPQAddReadRuleRequest::TPtr& ev, const TActorContext& ctx); void Handle(NKikimr::NGRpcService::TEvPQRemoveReadRuleRequest::TPtr& ev, const TActorContext& ctx); - void Handle(NKikimr::NGRpcService::TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx); - - void Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev); - + void Handle(NKikimr::NGRpcService::TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx); + + void Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev); + NActors::TActorId SchemeCache; - - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - - TVector<TString> Clusters; - TString LocalCluster; -}; - -} + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + TVector<TString> Clusters; + TString LocalCluster; +}; + +} diff --git a/ydb/services/persqueue_v1/grpc_pq_write.cpp b/ydb/services/persqueue_v1/grpc_pq_write.cpp index a55e81c715..5f17a2f3ab 100644 --- a/ydb/services/persqueue_v1/grpc_pq_write.cpp +++ b/ydb/services/persqueue_v1/grpc_pq_write.cpp @@ -12,63 +12,63 @@ using grpc::Status; namespace NKikimr { namespace NGRpcProxy { -namespace V1 { +namespace V1 { using namespace PersQueue::V1; - + /////////////////////////////////////////////////////////////////////////////// IActor* CreatePQWriteService(const TActorId& schemeCache, const TActorId& newSchemeCache, - TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) { + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) { return new TPQWriteService(schemeCache, newSchemeCache, counters, maxSessions); -} +} + - TPQWriteService::TPQWriteService(const TActorId& schemeCache, const TActorId& newSchemeCache, - TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) - : SchemeCache(schemeCache) + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) + : SchemeCache(schemeCache) , NewSchemeCache(newSchemeCache) - , Counters(counters) - , MaxSessions(maxSessions) - , Enabled(false) -{ -} + , Counters(counters) + , MaxSessions(maxSessions) + , Enabled(false) +{ +} -void TPQWriteService::Bootstrap(const TActorContext& ctx) { +void TPQWriteService::Bootstrap(const TActorContext& ctx) { HaveClusters = !AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen(); // ToDo[migration]: switch to proper option if (HaveClusters) { ctx.Send(NPQ::NClusterTracker::MakeClusterTrackerID(), new NPQ::NClusterTracker::TEvClusterTracker::TEvSubscribe); } - ctx.Send(NNetClassifier::MakeNetClassifierID(), new NNetClassifier::TEvNetClassifier::TEvSubscribe); + ctx.Send(NNetClassifier::MakeNetClassifierID(), new NNetClassifier::TEvNetClassifier::TEvSubscribe); ConverterFactory = std::make_shared<NPersQueue::TTopicNamesConverterFactory>( AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen(), AppData(ctx)->PQConfig.GetRoot() ); - Become(&TThis::StateFunc); -} + Become(&TThis::StateFunc); +} -ui64 TPQWriteService::NextCookie() { - return ++LastCookie; +ui64 TPQWriteService::NextCookie() { + return ++LastCookie; } -void TPQWriteService::Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx) { - - if (!DatacenterClassifier) { - for (auto it = Sessions.begin(); it != Sessions.end(); ++it) { - ctx.Send(it->second, new TEvPQProxy::TEvDieCommand("datacenter classifier initialized, restart session please", PersQueue::ErrorCode::INITIALIZING)); - } - } - DatacenterClassifier = ev->Get()->Classifier; -} - - +void TPQWriteService::Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx) { + + if (!DatacenterClassifier) { + for (auto it = Sessions.begin(); it != Sessions.end(); ++it) { + ctx.Send(it->second, new TEvPQProxy::TEvDieCommand("datacenter classifier initialized, restart session please", PersQueue::ErrorCode::INITIALIZING)); + } + } + DatacenterClassifier = ev->Get()->Classifier; +} + + void TPQWriteService::Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev, const TActorContext& ctx) { Y_VERIFY(ev->Get()->ClustersList); Y_VERIFY(ev->Get()->ClustersList->Clusters.size()); - + const auto& clusters = ev->Get()->ClustersList->Clusters; LocalCluster = ""; @@ -127,9 +127,9 @@ void TPQWriteService::Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvCluster Send(workerID, new TEvPQProxy::TEvDieCommand(closeReason, PersQueue::ErrorCode::PREFERRED_CLUSTER_MISMATCHED)); } } - } - } -} + } + } +} void TPQWriteService::Handle(TEvPQProxy::TEvSessionSetPreferredCluster::TPtr& ev, const TActorContext& ctx) { const auto& cookie = ev->Get()->Cookie; @@ -143,8 +143,8 @@ void TPQWriteService::Handle(TEvPQProxy::TEvSessionSetPreferredCluster::TPtr& ev RemotePreferredClusterBySessionCookie[cookie] = std::move(preferredCluster); } } - -void TPQWriteService::Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorContext&) { + +void TPQWriteService::Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorContext&) { const auto& cookie = ev->Get()->Cookie; Sessions.erase(cookie); if (RemotePreferredClusterBySessionCookie.contains(cookie)) { @@ -160,26 +160,26 @@ void TPQWriteService::Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorC StreamingWriteServerMessage FillWriteResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code) { StreamingWriteServerMessage res; - FillIssue(res.add_issues(), code, errorReason); - res.set_status(ConvertPersQueueInternalCodeToStatus(code)); - return res; + FillIssue(res.add_issues(), code, errorReason); + res.set_status(ConvertPersQueueInternalCodeToStatus(code)); + return res; } -void TPQWriteService::Handle(NKikimr::NGRpcService::TEvStreamPQWriteRequest::TPtr& ev, const TActorContext& ctx) { - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "new grpc connection"); +void TPQWriteService::Handle(NKikimr::NGRpcService::TEvStreamPQWriteRequest::TPtr& ev, const TActorContext& ctx) { + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "new grpc connection"); + + if (TooMuchSessions()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "new grpc connection failed - too much sessions"); + ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); + ev->Get()->GetStreamCtx()->WriteAndFinish(FillWriteResponse("proxy overloaded", PersQueue::ErrorCode::OVERLOAD), grpc::Status::OK); //CANCELLED + return; + } - if (TooMuchSessions()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "new grpc connection failed - too much sessions"); - ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); - ev->Get()->GetStreamCtx()->WriteAndFinish(FillWriteResponse("proxy overloaded", PersQueue::ErrorCode::OVERLOAD), grpc::Status::OK); //CANCELLED - return; - } - TString localCluster = AvailableLocalCluster(ctx); if (HaveClusters && localCluster.empty()) { - ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); + ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); if (LocalCluster) { LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "new grpc connection failed - cluster disabled"); ev->Get()->GetStreamCtx()->WriteAndFinish(FillWriteResponse("cluster disabled", PersQueue::ErrorCode::CLUSTER_DISABLED), grpc::Status::OK); //CANCELLED @@ -187,47 +187,47 @@ void TPQWriteService::Handle(NKikimr::NGRpcService::TEvStreamPQWriteRequest::TPt LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "new grpc connection failed - initializing"); ev->Get()->GetStreamCtx()->WriteAndFinish(FillWriteResponse("initializing", PersQueue::ErrorCode::INITIALIZING), grpc::Status::OK); //CANCELLED } - return; - } else { + return; + } else { TopicsHandler = std::make_unique<NPersQueue::TTopicsListController>( ConverterFactory, HaveClusters, TVector<TString>{}, localCluster ); - const ui64 cookie = NextCookie(); - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "new session created cookie " << cookie); + const ui64 cookie = NextCookie(); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "new session created cookie " << cookie); - auto ip = ev->Get()->GetStreamCtx()->GetPeerName(); + auto ip = ev->Get()->GetStreamCtx()->GetPeerName(); TActorId worker = ctx.Register(new TWriteSessionActor( ev->Release().Release(), cookie, SchemeCache, NewSchemeCache, Counters, DatacenterClassifier ? DatacenterClassifier->ClassifyAddress(NAddressClassifier::ExtractAddress(ip)) : "unknown", *TopicsHandler )); - - Sessions[cookie] = worker; + + Sessions[cookie] = worker; } } -bool TPQWriteService::TooMuchSessions() { - return Sessions.size() >= MaxSessions; -} - - +bool TPQWriteService::TooMuchSessions() { + return Sessions.size() >= MaxSessions; +} + + TString TPQWriteService::AvailableLocalCluster(const TActorContext&) const { return HaveClusters && Enabled ? *LocalCluster : ""; } - -/////////////////////////////////////////////////////////////////////////////// - + +/////////////////////////////////////////////////////////////////////////////// + +} } -} -} +} + +void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvStreamPQWriteRequest::TPtr& ev, const TActorContext& ctx) { -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvStreamPQWriteRequest::TPtr& ev, const TActorContext& ctx) { - - ctx.Send(NKikimr::NGRpcProxy::V1::GetPQWriteServiceActorID(), ev->Release().Release()); + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQWriteServiceActorID(), ev->Release().Release()); } diff --git a/ydb/services/persqueue_v1/grpc_pq_write.h b/ydb/services/persqueue_v1/grpc_pq_write.h index 1efb910cd7..73148388b2 100644 --- a/ydb/services/persqueue_v1/grpc_pq_write.h +++ b/ydb/services/persqueue_v1/grpc_pq_write.h @@ -1,12 +1,12 @@ #pragma once -#include "grpc_pq_actor.h" -#include "persqueue.h" +#include "grpc_pq_actor.h" +#include "persqueue.h" #include <ydb/core/client/server/grpc_base.h> #include <ydb/core/persqueue/cluster_tracker.h> #include <ydb/core/mind/address_classification/net_classifier.h> - + #include <library/cpp/actors/core/actorsystem.h> #include <util/generic/hash.h> @@ -15,54 +15,54 @@ namespace NKikimr { namespace NGRpcProxy { -namespace V1 { +namespace V1 { inline TActorId GetPQWriteServiceActorID() { return TActorId(0, "PQWriteSvc"); -} - +} + IActor* CreatePQWriteService(const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions); - -class TPQWriteService : public NActors::TActorBootstrapped<TPQWriteService> { + +class TPQWriteService : public NActors::TActorBootstrapped<TPQWriteService> { public: TPQWriteService(const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions); - ~TPQWriteService() - {} - - void Bootstrap(const TActorContext& ctx); - + ~TPQWriteService() + {} + + void Bootstrap(const TActorContext& ctx); + private: ui64 NextCookie(); - bool TooMuchSessions(); + bool TooMuchSessions(); TString AvailableLocalCluster(const TActorContext& ctx) const; - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - HFunc(NKikimr::NGRpcService::TEvStreamPQWriteRequest, Handle); + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(NKikimr::NGRpcService::TEvStreamPQWriteRequest, Handle); HFunc(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate, Handle); - HFunc(TEvPQProxy::TEvSessionDead, Handle); + HFunc(TEvPQProxy::TEvSessionDead, Handle); HFunc(TEvPQProxy::TEvSessionSetPreferredCluster, Handle); - HFunc(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate, Handle); - - } - } - + HFunc(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate, Handle); + + } + } + private: - void Handle(NKikimr::NGRpcService::TEvStreamPQWriteRequest::TPtr& ev, const TActorContext& ctx); + void Handle(NKikimr::NGRpcService::TEvStreamPQWriteRequest::TPtr& ev, const TActorContext& ctx); void Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev, const TActorContext& ctx); - void Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx); - + void Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvSessionSetPreferredCluster::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorContext& ctx); - + void Handle(TEvPQProxy::TEvSessionDead::TPtr& ev, const TActorContext& ctx); + NActors::TActorId SchemeCache; NActors::TActorId NewSchemeCache; - TAtomic LastCookie = 0; + TAtomic LastCookie = 0; THashMap<ui64, TActorId> Sessions; // Created at by session cookie map by remote preferred cluster name @@ -70,23 +70,23 @@ private: THashMap<ui64, TString> RemotePreferredClusterBySessionCookie; // Cluster enabled at time if cluster is currently enabled THashMap<TString, TInstant> ClustersEnabledAt; - - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - - ui32 MaxSessions; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + ui32 MaxSessions; TMaybe<TString> LocalCluster; bool Enabled; TString SelectSourceIdQuery; TString UpdateSourceIdQuery; TString DeleteSourceIdQuery; - - NAddressClassifier::TLabeledAddressClassifier::TConstPtr DatacenterClassifier; // Detects client's datacenter by IP. May be null + + NAddressClassifier::TLabeledAddressClassifier::TConstPtr DatacenterClassifier; // Detects client's datacenter by IP. May be null bool HaveClusters; NPersQueue::TConverterFactoryPtr ConverterFactory; std::unique_ptr<NPersQueue::TTopicsListController> TopicsHandler; }; - + +} } } -} diff --git a/ydb/services/persqueue_v1/grpc_pq_write_actor.cpp b/ydb/services/persqueue_v1/grpc_pq_write_actor.cpp index 67586c166e..3038cc82a6 100644 --- a/ydb/services/persqueue_v1/grpc_pq_write_actor.cpp +++ b/ydb/services/persqueue_v1/grpc_pq_write_actor.cpp @@ -1,5 +1,5 @@ #include "grpc_pq_actor.h" -#include "grpc_pq_write.h" +#include "grpc_pq_write.h" #include "grpc_pq_codecs.h" #include <ydb/core/persqueue/pq_database.h> @@ -10,10 +10,10 @@ #include <ydb/library/persqueue/topic_parser/topic_parser.h> #include <ydb/services/lib/sharding/sharding.h> #include <library/cpp/actors/core/log.h> -#include <library/cpp/digest/md5/md5.h> +#include <library/cpp/digest/md5/md5.h> #include <util/string/hex.h> -#include <util/string/vector.h> -#include <util/string/escape.h> +#include <util/string/vector.h> +#include <util/string/escape.h> #include <util/string/printf.h> using namespace NActors; @@ -74,16 +74,16 @@ void FillChunkDataFromReq( } namespace NGRpcProxy { -namespace V1 { +namespace V1 { using namespace Ydb::PersQueue::V1; -static const ui32 MAX_RESERVE_REQUESTS_INFLIGHT = 5; - -static const ui32 MAX_BYTES_INFLIGHT = 1 << 20; //1mb +static const ui32 MAX_RESERVE_REQUESTS_INFLIGHT = 5; + +static const ui32 MAX_BYTES_INFLIGHT = 1 << 20; //1mb static const ui32 MURMUR_ARRAY_SEED = 0x9747b28c; static const TDuration SOURCEID_UPDATE_PERIOD = TDuration::Hours(1); - + static const TString SELECT_SOURCEID_QUERY1 = "--!syntax_v1\n" "DECLARE $Hash AS Uint32; " @@ -105,75 +105,75 @@ static const TString UPDATE_SOURCEID_QUERY1 = static const TString UPDATE_SOURCEID_QUERY2 = "` (Hash, Topic, SourceId, CreateTime, AccessTime, Partition) VALUES " "($Hash, $Topic, $SourceId, $CreateTime, $AccessTime, $Partition); "; -//TODO: add here tracking of bytes in/out - - +//TODO: add here tracking of bytes in/out + + TWriteSessionActor::TWriteSessionActor( NKikimr::NGRpcService::TEvStreamPQWriteRequest* request, const ui64 cookie, const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const TMaybe<TString> clientDC, const NPersQueue::TTopicsListController& topicsController ) - : Request(request) - , State(ES_CREATED) - , SchemeCache(schemeCache) + : Request(request) + , State(ES_CREATED) + , SchemeCache(schemeCache) , NewSchemeCache(newSchemeCache) - , PeerName("") - , Cookie(cookie) + , PeerName("") + , Cookie(cookie) , TopicsController(topicsController) - , Partition(0) - , PreferedPartition(Max<ui32>()) - , NumReserveBytesRequests(0) - , WritesDone(false) - , Counters(counters) - , BytesInflight_(0) - , BytesInflightTotal_(0) - , NextRequestInited(false) - , NextRequestCookie(0) - , Token(nullptr) + , Partition(0) + , PreferedPartition(Max<ui32>()) + , NumReserveBytesRequests(0) + , WritesDone(false) + , Counters(counters) + , BytesInflight_(0) + , BytesInflightTotal_(0) + , NextRequestInited(false) + , NextRequestCookie(0) + , Token(nullptr) , UpdateTokenInProgress(false) , UpdateTokenAuthenticated(false) - , ACLCheckInProgress(false) - , FirstACLCheck(true) + , ACLCheckInProgress(false) + , FirstACLCheck(true) , RequestNotChecked(false) - , LastACLCheckTimestamp(TInstant::Zero()) - , LogSessionDeadline(TInstant::Zero()) + , LastACLCheckTimestamp(TInstant::Zero()) + , LogSessionDeadline(TInstant::Zero()) , BalancerTabletId(0) - , ClientDC(clientDC ? *clientDC : "other") + , ClientDC(clientDC ? *clientDC : "other") , LastSourceIdUpdate(TInstant::Zero()) , SourceIdCreateTime(0) , SourceIdUpdateInfly(false) { - Y_ASSERT(Request); - ++(*GetServiceCounters(Counters, "pqproxy|writeSession")->GetCounter("SessionsCreatedTotal", true)); + Y_ASSERT(Request); + ++(*GetServiceCounters(Counters, "pqproxy|writeSession")->GetCounter("SessionsCreatedTotal", true)); } - + TWriteSessionActor::~TWriteSessionActor() = default; -void TWriteSessionActor::Bootstrap(const TActorContext& ctx) { - - Y_VERIFY(Request); +void TWriteSessionActor::Bootstrap(const TActorContext& ctx) { + + Y_VERIFY(Request); SelectSourceIdQuery = SELECT_SOURCEID_QUERY1 + AppData(ctx)->PQConfig.GetSourceIdTablePath() + SELECT_SOURCEID_QUERY2; UpdateSourceIdQuery = UPDATE_SOURCEID_QUERY1 + AppData(ctx)->PQConfig.GetSourceIdTablePath() + UPDATE_SOURCEID_QUERY2; - Request->GetStreamCtx()->Attach(ctx.SelfID); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "grpc read failed at start"); - Die(ctx); - return; - } - Become(&TThis::StateFunc); - StartTime = ctx.Now(); + Request->GetStreamCtx()->Attach(ctx.SelfID); + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "grpc read failed at start"); + Die(ctx); + return; + } + Become(&TThis::StateFunc); + StartTime = ctx.Now(); +} + + +void TWriteSessionActor::HandleDone(const TActorContext& ctx) { + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc closed"); + Die(ctx); } - -void TWriteSessionActor::HandleDone(const TActorContext& ctx) { - - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc closed"); - Die(ctx); -} - TString WriteRequestToLog(const Ydb::PersQueue::V1::StreamingWriteClientMessage& proto) { switch (proto.client_message_case()) { case StreamingWriteClientMessage::kInitRequest: @@ -189,21 +189,21 @@ TString WriteRequestToLog(const Ydb::PersQueue::V1::StreamingWriteClientMessage& } } -void TWriteSessionActor::Handle(IContext::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { +void TWriteSessionActor::Handle(IContext::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc read done: success: " << ev->Get()->Success << " data: " << WriteRequestToLog(ev->Get()->Record)); if (!ev->Get()->Success) { LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc read failed"); ctx.Send(ctx.SelfID, new TEvPQProxy::TEvDone()); return; } - + switch(ev->Get()->Record.client_message_case()) { case StreamingWriteClientMessage::kInitRequest: - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvWriteInit(std::move(ev->Get()->Record), Request->GetStreamCtx()->GetPeerName())); - break; + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvWriteInit(std::move(ev->Get()->Record), Request->GetStreamCtx()->GetPeerName())); + break; case StreamingWriteClientMessage::kWriteRequest: - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvWrite(std::move(ev->Get()->Record))); - break; + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvWrite(std::move(ev->Get()->Record))); + break; case StreamingWriteClientMessage::kUpdateTokenRequest: { ctx.Send(ctx.SelfID, new TEvPQProxy::TEvUpdateToken(std::move(ev->Get()->Record))); break; @@ -212,65 +212,65 @@ void TWriteSessionActor::Handle(IContext::TEvReadFinished::TPtr& ev, const TActo CloseSession("'client_message' is not set", PersQueue::ErrorCode::BAD_REQUEST, ctx); return; } - } -} - - -void TWriteSessionActor::Handle(IContext::TEvWriteFinished::TPtr& ev, const TActorContext& ctx) { - if (!ev->Get()->Success) { - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc write failed"); - Die(ctx); - } -} - - + } +} + + +void TWriteSessionActor::Handle(IContext::TEvWriteFinished::TPtr& ev, const TActorContext& ctx) { + if (!ev->Get()->Success) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc write failed"); + Die(ctx); + } +} + + void TWriteSessionActor::Die(const TActorContext& ctx) { if (Writer) ctx.Send(Writer, new TEvents::TEvPoisonPill()); - - if (SessionsActive) { - SessionsActive.Dec(); - BytesInflight.Dec(BytesInflight_); - BytesInflightTotal.Dec(BytesInflightTotal_); - } - - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " is DEAD"); - - ctx.Send(GetPQWriteServiceActorID(), new TEvPQProxy::TEvSessionDead(Cookie)); - + + if (SessionsActive) { + SessionsActive.Dec(); + BytesInflight.Dec(BytesInflight_); + BytesInflightTotal.Dec(BytesInflightTotal_); + } + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " is DEAD"); + + ctx.Send(GetPQWriteServiceActorID(), new TEvPQProxy::TEvSessionDead(Cookie)); + TActorBootstrapped<TWriteSessionActor>::Die(ctx); } -void TWriteSessionActor::CheckFinish(const TActorContext& ctx) { - if (!WritesDone) - return; - if (State != ES_INITED) { - CloseSession("out of order Writes done before initialization", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } +void TWriteSessionActor::CheckFinish(const TActorContext& ctx) { + if (!WritesDone) + return; + if (State != ES_INITED) { + CloseSession("out of order Writes done before initialization", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } if (Writes.empty() && FormedWrites.empty() && SentMessages.empty()) { - CloseSession("", PersQueue::ErrorCode::OK, ctx); - return; - } -} - -void TWriteSessionActor::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext& ctx) { - WritesDone = true; - CheckFinish(ctx); -} - -void TWriteSessionActor::CheckACL(const TActorContext& ctx) { + CloseSession("", PersQueue::ErrorCode::OK, ctx); + return; + } +} + +void TWriteSessionActor::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext& ctx) { + WritesDone = true; + CheckFinish(ctx); +} + +void TWriteSessionActor::CheckACL(const TActorContext& ctx) { //Y_VERIFY(ACLCheckInProgress); - NACLib::EAccessRights rights = NACLib::EAccessRights::UpdateRow; + NACLib::EAccessRights rights = NACLib::EAccessRights::UpdateRow; Y_VERIFY(ACL); if (ACL->CheckAccess(rights, *Token)) { - ACLCheckInProgress = false; - if (FirstACLCheck) { - FirstACLCheck = false; - DiscoverPartition(ctx); - } + ACLCheckInProgress = false; + if (FirstACLCheck) { + FirstACLCheck = false; + DiscoverPartition(ctx); + } if (UpdateTokenInProgress && UpdateTokenAuthenticated) { UpdateTokenInProgress = false; StreamingWriteServerMessage serverMessage; @@ -281,29 +281,29 @@ void TWriteSessionActor::CheckACL(const TActorContext& ctx) { Die(ctx); } } - } else { - TString errorReason = Sprintf("access to topic '%s' denied for '%s' due to 'no WriteTopic rights', Marker# PQ1125", + } else { + TString errorReason = Sprintf("access to topic '%s' denied for '%s' due to 'no WriteTopic rights', Marker# PQ1125", TopicConverter->GetClientsideName().c_str(), - Token->GetUserSID().c_str()); - CloseSession(errorReason, PersQueue::ErrorCode::ACCESS_DENIED, ctx); - } + Token->GetUserSID().c_str()); + CloseSession(errorReason, PersQueue::ErrorCode::ACCESS_DENIED, ctx); + } } -void TWriteSessionActor::Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const TActorContext& ctx) { - THolder<TEvPQProxy::TEvWriteInit> event(ev->Release()); +void TWriteSessionActor::Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const TActorContext& ctx) { + THolder<TEvPQProxy::TEvWriteInit> event(ev->Release()); - if (State != ES_CREATED) { - //answer error - CloseSession("got second init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } + if (State != ES_CREATED) { + //answer error + CloseSession("got second init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } const auto& init = event->Request.init_request(); if (init.topic().empty() || init.message_group_id().empty()) { CloseSession("no topic or message_group_id in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - + return; + } + TopicConverter = TopicsController.GetWriteTopicConverter(init.topic(), Request->GetDatabaseName().GetOrElse("/Root")); if (!TopicConverter->IsValid()) { CloseSession( @@ -313,8 +313,8 @@ void TWriteSessionActor::Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const TActor return; } - PeerName = event->PeerName; - + PeerName = event->PeerName; + SourceId = init.message_group_id(); TString encodedSourceId; try { @@ -322,17 +322,17 @@ void TWriteSessionActor::Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const TActor } catch (yexception& e) { CloseSession(TStringBuilder() << "incorrect sourceId \"" << SourceId << "\": " << e.what(), PersQueue::ErrorCode::BAD_REQUEST, ctx); return; - } + } EscapedSourceId = HexEncode(encodedSourceId); TString s = TopicConverter->GetClientsideName() + encodedSourceId; Hash = MurmurHash<ui32>(s.c_str(), s.size(), MURMUR_ARRAY_SEED); - - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session request cookie: " << Cookie << " " << init << " from " << PeerName); - //TODO: get user agent from headers - UserAgent = "pqv1 server"; - LogSession(ctx); - + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session request cookie: " << Cookie << " " << init << " from " << PeerName); + //TODO: get user agent from headers + UserAgent = "pqv1 server"; + LogSession(ctx); + if (Request->GetInternalToken().empty()) { // session without auth if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { Request->ReplyUnauthenticated("Unauthenticated access is forbidden, please provide credentials"); @@ -340,42 +340,42 @@ void TWriteSessionActor::Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const TActor return; } } - + InitCheckSchema(ctx, true); PreferedPartition = init.partition_group_id() > 0 ? init.partition_group_id() - 1 : Max<ui32>(); - + InitMeta = GetInitialDataChunk(init, TopicConverter->GetFullLegacyName(), PeerName); // ToDo[migration] - check? - - auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); + + auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); Aggr = {{{{"Account", TopicConverter->GetAccount()}}, {"total"}}}; - + SLITotal = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsTotal"}, true, "sensor", false); SLIErrors = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsError"}, true, "sensor", false); - SLITotal.Inc(); + SLITotal.Inc(); const auto& preferredCluster = init.preferred_cluster(); if (!preferredCluster.empty()) { Send(GetPQWriteServiceActorID(), new TEvPQProxy::TEvSessionSetPreferredCluster(Cookie, preferredCluster)); } -} - +} + void TWriteSessionActor::SetupCounters() -{ - //now topic is checked, can create group for real topic, not garbage - auto subGroup = GetServiceCounters(Counters, "pqproxy|writeSession"); +{ + //now topic is checked, can create group for real topic, not garbage + auto subGroup = GetServiceCounters(Counters, "pqproxy|writeSession"); TVector<NPQ::TLabelsInfo> aggr = NKikimr::NPQ::GetLabels(LocalDC, TopicConverter->GetClientsideName()); - + BytesInflight = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"BytesInflight"}, false); BytesInflightTotal = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"BytesInflightTotal"}, false); SessionsCreated = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"SessionsCreated"}, true); SessionsActive = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"SessionsActive"}, false); Errors = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"Errors"}, true); - - SessionsCreated.Inc(); - SessionsActive.Inc(); -} - + + SessionsCreated.Inc(); + SessionsActive.Inc(); +} + void TWriteSessionActor::SetupCounters(const TString& cloudId, const TString& dbId, const TString& folderId) { @@ -395,7 +395,7 @@ void TWriteSessionActor::SetupCounters(const TString& cloudId, const TString& db void TWriteSessionActor::InitCheckSchema(const TActorContext& ctx, bool needWaitSchema) { LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "init check schema"); - + if (!needWaitSchema) { ACLCheckInProgress = true; } @@ -408,26 +408,26 @@ void TWriteSessionActor::InitCheckSchema(const TActorContext& ctx, bool needWait void TWriteSessionActor::Handle(TEvDescribeTopicsResponse::TPtr& ev, const TActorContext& ctx) { auto* res = ev->Get()->Result.Get(); Y_VERIFY(res->ResultSet.size() == 1); - + auto& entry = res->ResultSet[0]; - TString errorReason; + TString errorReason; auto processResult = ProcessMetaCacheTopicResponse(entry); if (processResult.IsFatal) { CloseSession(processResult.Reason, processResult.ErrorCode, ctx); - return; + return; } auto& description = entry.PQGroupInfo->Description; Y_VERIFY(description.PartitionsSize() > 0); Y_VERIFY(description.HasPQTabletConfig()); InitialPQTabletConfig = description.GetPQTabletConfig(); - + BalancerTabletId = description.GetBalancerTabletID(); - + for (ui32 i = 0; i < description.PartitionsSize(); ++i) { const auto& pi = description.GetPartitions(i); - PartitionToTablet[pi.GetPartitionId()] = pi.GetTabletId(); - } - + PartitionToTablet[pi.GetPartitionId()] = pi.GetTabletId(); + } + if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { const auto& tabletConfig = description.GetPQTabletConfig(); SetupCounters(tabletConfig.GetYcCloudId(), tabletConfig.GetYdbDatabaseId(), @@ -435,29 +435,29 @@ void TWriteSessionActor::Handle(TEvDescribeTopicsResponse::TPtr& ev, const TActo } else { SetupCounters(); } - + Y_VERIFY (entry.SecurityObject); ACL.Reset(new TAclWrapper(entry.SecurityObject)); LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " describe result for acl check"); - if (Request->GetInternalToken().empty()) { // session without auth - if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { + if (Request->GetInternalToken().empty()) { // session without auth + if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { Request->ReplyUnauthenticated("Unauthenticated access is forbidden, please provide credentials"); Die(ctx); - return; - } - Y_VERIFY(FirstACLCheck); - FirstACLCheck = false; - DiscoverPartition(ctx); - } else { - Y_VERIFY(Request->GetYdbToken()); - Auth = *Request->GetYdbToken(); - - Token = new NACLib::TUserToken(Request->GetInternalToken()); + return; + } + Y_VERIFY(FirstACLCheck); + FirstACLCheck = false; + DiscoverPartition(ctx); + } else { + Y_VERIFY(Request->GetYdbToken()); + Auth = *Request->GetYdbToken(); + + Token = new NACLib::TUserToken(Request->GetInternalToken()); CheckACL(ctx); - } -} - + } +} + void TWriteSessionActor::Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { TEvTxProxySchemeCache::TEvNavigateKeySetResult* msg = ev->Get(); const NSchemeCache::TSchemeCacheNavigate* navigate = msg->Request.Get(); @@ -513,17 +513,17 @@ void TWriteSessionActor::Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult:: } } -void TWriteSessionActor::DiscoverPartition(const NActors::TActorContext& ctx) { - +void TWriteSessionActor::DiscoverPartition(const NActors::TActorContext& ctx) { + if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { // ToDo[migration] - separate flag for having config tables - auto partitionId = PreferedPartition < Max<ui32>() ? PreferedPartition - : NKikimr::NDataStreams::V1::ShardFromDecimal(NKikimr::NDataStreams::V1::HexBytesToDecimal(MD5::Calc(SourceId)), PartitionToTablet.size()); - ProceedPartition(partitionId, ctx); - return; - } - - //read from DS - auto ev = MakeHolder<NKqp::TEvKqp::TEvQueryRequest>(); + auto partitionId = PreferedPartition < Max<ui32>() ? PreferedPartition + : NKikimr::NDataStreams::V1::ShardFromDecimal(NKikimr::NDataStreams::V1::HexBytesToDecimal(MD5::Calc(SourceId)), PartitionToTablet.size()); + ProceedPartition(partitionId, ctx); + return; + } + + //read from DS + auto ev = MakeHolder<NKqp::TEvKqp::TEvQueryRequest>(); ev->Record.MutableRequest()->SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); ev->Record.MutableRequest()->SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); ev->Record.MutableRequest()->SetKeepSession(false); @@ -540,24 +540,24 @@ void TWriteSessionActor::DiscoverPartition(const NActors::TActorContext& ctx) { parameters["$SourceId"] = EscapedSourceId; ev->Record.MutableRequest()->MutableParameters()->Swap(¶meters); ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); - State = ES_WAIT_TABLE_REQUEST_1; -} - - -void TWriteSessionActor::UpdatePartition(const TActorContext& ctx) { - Y_VERIFY(State == ES_WAIT_TABLE_REQUEST_1 || State == ES_WAIT_NEXT_PARTITION); + State = ES_WAIT_TABLE_REQUEST_1; +} + + +void TWriteSessionActor::UpdatePartition(const TActorContext& ctx) { + Y_VERIFY(State == ES_WAIT_TABLE_REQUEST_1 || State == ES_WAIT_NEXT_PARTITION); auto ev = MakeUpdateSourceIdMetadataRequest(ctx); - ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); - State = ES_WAIT_TABLE_REQUEST_2; -} - -void TWriteSessionActor::RequestNextPartition(const TActorContext& ctx) { - Y_VERIFY(State == ES_WAIT_TABLE_REQUEST_1); - State = ES_WAIT_NEXT_PARTITION; - THolder<TEvPersQueue::TEvGetPartitionIdForWrite> x(new TEvPersQueue::TEvGetPartitionIdForWrite); - Y_VERIFY(!PipeToBalancer); - Y_VERIFY(BalancerTabletId); - NTabletPipe::TClientConfig clientConfig; + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); + State = ES_WAIT_TABLE_REQUEST_2; +} + +void TWriteSessionActor::RequestNextPartition(const TActorContext& ctx) { + Y_VERIFY(State == ES_WAIT_TABLE_REQUEST_1); + State = ES_WAIT_NEXT_PARTITION; + THolder<TEvPersQueue::TEvGetPartitionIdForWrite> x(new TEvPersQueue::TEvGetPartitionIdForWrite); + Y_VERIFY(!PipeToBalancer); + Y_VERIFY(BalancerTabletId); + NTabletPipe::TClientConfig clientConfig; clientConfig.RetryPolicy = { .RetryLimitCount = 6, .MinRetryTime = TDuration::MilliSeconds(10), @@ -566,84 +566,84 @@ void TWriteSessionActor::RequestNextPartition(const TActorContext& ctx) { .DoFirstRetryInstantly = true }; PipeToBalancer = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, BalancerTabletId, clientConfig)); - - NTabletPipe::SendData(ctx, PipeToBalancer, x.Release()); -} - -void TWriteSessionActor::Handle(TEvPersQueue::TEvGetPartitionIdForWriteResponse::TPtr& ev, const TActorContext& ctx) { - Y_VERIFY(State == ES_WAIT_NEXT_PARTITION); - Partition = ev->Get()->Record.GetPartitionId(); - UpdatePartition(ctx); -} - -void TWriteSessionActor::Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr &ev, const TActorContext &ctx) { + + NTabletPipe::SendData(ctx, PipeToBalancer, x.Release()); +} + +void TWriteSessionActor::Handle(TEvPersQueue::TEvGetPartitionIdForWriteResponse::TPtr& ev, const TActorContext& ctx) { + Y_VERIFY(State == ES_WAIT_NEXT_PARTITION); + Partition = ev->Get()->Record.GetPartitionId(); + UpdatePartition(ctx); +} + +void TWriteSessionActor::Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr &ev, const TActorContext &ctx) { auto& record = ev->Get()->Record.GetRef(); - + if (record.GetYdbStatus() == Ydb::StatusIds::ABORTED) { LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " messageGroupId " - << SourceId << " escaped " << EscapedSourceId << " discover partition race, retrying"); - DiscoverPartition(ctx); - return; - } - + << SourceId << " escaped " << EscapedSourceId << " discover partition race, retrying"); + DiscoverPartition(ctx); + return; + } + if (record.GetYdbStatus() != Ydb::StatusIds::SUCCESS) { - TStringBuilder errorReason; + TStringBuilder errorReason; errorReason << "internal error in kqp Marker# PQ50 : " << record; if (State == EState::ES_INITED) { LOG_WARN_S(ctx, NKikimrServices::PQ_WRITE_PROXY, errorReason); SourceIdUpdateInfly = false; } else { - CloseSession(errorReason, PersQueue::ErrorCode::ERROR, ctx); + CloseSession(errorReason, PersQueue::ErrorCode::ERROR, ctx); } - return; - } - + return; + } + if (State == EState::ES_WAIT_TABLE_REQUEST_1) { SourceIdCreateTime = TInstant::Now().MilliSeconds(); - - bool partitionFound = false; - auto& t = record.GetResponse().GetResults(0).GetValue().GetStruct(0); - - if (t.ListSize() != 0) { - auto& tt = t.GetList(0).GetStruct(0); - if (tt.HasOptional() && tt.GetOptional().HasUint32()) { //already got partition - Partition = tt.GetOptional().GetUint32(); - if (PreferedPartition < Max<ui32>() && Partition != PreferedPartition) { + + bool partitionFound = false; + auto& t = record.GetResponse().GetResults(0).GetValue().GetStruct(0); + + if (t.ListSize() != 0) { + auto& tt = t.GetList(0).GetStruct(0); + if (tt.HasOptional() && tt.GetOptional().HasUint32()) { //already got partition + Partition = tt.GetOptional().GetUint32(); + if (PreferedPartition < Max<ui32>() && Partition != PreferedPartition) { CloseSession(TStringBuilder() << "MessageGroupId " << SourceId << " is already bound to PartitionGroupId " << (Partition + 1) << ", but client provided " << (PreferedPartition + 1) << ". MessageGroupId->PartitionGroupId binding cannot be changed, either use another MessageGroupId, specify PartitionGroupId " << (Partition + 1) << ", or do not specify PartitionGroupId at all.", - PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - partitionFound = true; + PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + partitionFound = true; SourceIdCreateTime = t.GetList(0).GetStruct(1).GetOptional().GetUint64(); - } - } - + } + } + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " messageGroupId " - << SourceId << " escaped " << EscapedSourceId << " hash " << Hash << " partition " << Partition << " partitions " + << SourceId << " escaped " << EscapedSourceId << " hash " << Hash << " partition " << Partition << " partitions " << PartitionToTablet.size() << "(" << Hash % PartitionToTablet.size() << ") create " << SourceIdCreateTime << " result " << t); - - if (!partitionFound && (PreferedPartition < Max<ui32>() || !AppData(ctx)->PQConfig.GetRoundRobinPartitionMapping())) { - Partition = PreferedPartition < Max<ui32>() ? PreferedPartition : Hash % PartitionToTablet.size(); //choose partition default value - partitionFound = true; - } - - if (partitionFound) { - UpdatePartition(ctx); - } else { - RequestNextPartition(ctx); - } - return; - } else if (State == EState::ES_WAIT_TABLE_REQUEST_2) { + + if (!partitionFound && (PreferedPartition < Max<ui32>() || !AppData(ctx)->PQConfig.GetRoundRobinPartitionMapping())) { + Partition = PreferedPartition < Max<ui32>() ? PreferedPartition : Hash % PartitionToTablet.size(); //choose partition default value + partitionFound = true; + } + + if (partitionFound) { + UpdatePartition(ctx); + } else { + RequestNextPartition(ctx); + } + return; + } else if (State == EState::ES_WAIT_TABLE_REQUEST_2) { LastSourceIdUpdate = ctx.Now(); - ProceedPartition(Partition, ctx); + ProceedPartition(Partition, ctx); } else if (State == EState::ES_INITED) { SourceIdUpdateInfly = false; LastSourceIdUpdate = ctx.Now(); - } else { - Y_FAIL("Wrong state"); - } -} - + } else { + Y_FAIL("Wrong state"); + } +} + THolder<NKqp::TEvKqp::TEvQueryRequest> TWriteSessionActor::MakeUpdateSourceIdMetadataRequest( const NActors::TActorContext& ctx ) { @@ -669,80 +669,80 @@ THolder<NKqp::TEvKqp::TEvQueryRequest> TWriteSessionActor::MakeUpdateSourceIdMet parameters["$AccessTime"] = TInstant::Now().MilliSeconds(); parameters["$Partition"] = Partition; ev->Record.MutableRequest()->MutableParameters()->Swap(¶meters); - + return ev; } -void TWriteSessionActor::Handle(NKqp::TEvKqp::TEvProcessResponse::TPtr &ev, const TActorContext &ctx) { - auto& record = ev->Get()->Record; - - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session cookie: " << Cookie << " sessionId: " << OwnerCookie << " sourceID " - << SourceId << " escaped " << EscapedSourceId << " discover partition error - " << record); - - CloseSession("Internal error on discovering partition", PersQueue::ErrorCode::ERROR, ctx); -} - - -void TWriteSessionActor::ProceedPartition(const ui32 partition, const TActorContext& ctx) { - Partition = partition; - auto it = PartitionToTablet.find(Partition); - - ui64 tabletId = it != PartitionToTablet.end() ? it->second : 0; - - if (!tabletId) { +void TWriteSessionActor::Handle(NKqp::TEvKqp::TEvProcessResponse::TPtr &ev, const TActorContext &ctx) { + auto& record = ev->Get()->Record; + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session cookie: " << Cookie << " sessionId: " << OwnerCookie << " sourceID " + << SourceId << " escaped " << EscapedSourceId << " discover partition error - " << record); + + CloseSession("Internal error on discovering partition", PersQueue::ErrorCode::ERROR, ctx); +} + + +void TWriteSessionActor::ProceedPartition(const ui32 partition, const TActorContext& ctx) { + Partition = partition; + auto it = PartitionToTablet.find(Partition); + + ui64 tabletId = it != PartitionToTablet.end() ? it->second : 0; + + if (!tabletId) { CloseSession( Sprintf("no partition %u in topic '%s', Marker# PQ4", Partition, TopicConverter->GetClientsideName().c_str()), PersQueue::ErrorCode::UNKNOWN_TOPIC, ctx ); - return; - } - + return; + } + Writer = ctx.RegisterWithSameMailbox(NPQ::CreatePartitionWriter(ctx.SelfID, tabletId, Partition, SourceId)); State = ES_WAIT_WRITER_INIT; - - ui32 border = AppData(ctx)->PQConfig.GetWriteInitLatencyBigMs(); - auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); - - InitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "WriteInit", border, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + + ui32 border = AppData(ctx)->PQConfig.GetWriteInitLatencyBigMs(); + auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); + + InitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "WriteInit", border, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); SLIBigLatency = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsBigLatency"}, true, "sesnor", false); - - ui32 initDurationMs = (ctx.Now() - StartTime).MilliSeconds(); - InitLatency.IncFor(initDurationMs, 1); - if (initDurationMs >= border) { - SLIBigLatency.Inc(); - } -} - -void TWriteSessionActor::CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { - - if (errorCode != PersQueue::ErrorCode::OK) { - - if (InternalErrorCode(errorCode)) { - SLIErrors.Inc(); - } - - if (Errors) { - Errors.Inc(); - } else { - ++(*GetServiceCounters(Counters, "pqproxy|writeSession")->GetCounter("Errors", true)); - } - + + ui32 initDurationMs = (ctx.Now() - StartTime).MilliSeconds(); + InitLatency.IncFor(initDurationMs, 1); + if (initDurationMs >= border) { + SLIBigLatency.Inc(); + } +} + +void TWriteSessionActor::CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { + + if (errorCode != PersQueue::ErrorCode::OK) { + + if (InternalErrorCode(errorCode)) { + SLIErrors.Inc(); + } + + if (Errors) { + Errors.Inc(); + } else { + ++(*GetServiceCounters(Counters, "pqproxy|writeSession")->GetCounter("Errors", true)); + } + StreamingWriteServerMessage result; - result.set_status(ConvertPersQueueInternalCodeToStatus(errorCode)); - FillIssue(result.add_issues(), errorCode, errorReason); - - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 error cookie: " << Cookie << " reason: " << errorReason << " sessionId: " << OwnerCookie); - - if (!Request->GetStreamCtx()->WriteAndFinish(std::move(result), grpc::Status::OK)) { - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc last write failed"); - } - } else { - if (!Request->GetStreamCtx()->Finish(grpc::Status::OK)) { - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " double finish call"); - } - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 closed cookie: " << Cookie << " sessionId: " << OwnerCookie); - } + result.set_status(ConvertPersQueueInternalCodeToStatus(errorCode)); + FillIssue(result.add_issues(), errorCode, errorReason); + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 error cookie: " << Cookie << " reason: " << errorReason << " sessionId: " << OwnerCookie); + + if (!Request->GetStreamCtx()->WriteAndFinish(std::move(result), grpc::Status::OK)) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc last write failed"); + } + } else { + if (!Request->GetStreamCtx()->Finish(grpc::Status::OK)) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " double finish call"); + } + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 closed cookie: " << Cookie << " sessionId: " << OwnerCookie); + } Die(ctx); } @@ -755,11 +755,11 @@ void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvInitResult::TPtr& ev if (!result.IsSuccess()) { const auto& error = result.GetError(); if (error.Response.HasErrorCode()) { - return CloseSession("status is not ok: " + error.Response.GetErrorReason(), ConvertOldCode(error.Response.GetErrorCode()), ctx); + return CloseSession("status is not ok: " + error.Response.GetErrorReason(), ConvertOldCode(error.Response.GetErrorCode()), ctx); } else { return CloseSession("error at writer init: " + error.Reason, PersQueue::ErrorCode::ERROR, ctx); } - } + } OwnerCookie = result.GetResult().OwnerCookie; const auto& maxSeqNo = result.GetResult().SourceIdInfo.GetSeqNo(); @@ -778,20 +778,20 @@ void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvInitResult::TPtr& ev init->add_supported_codecs(CodecByName(codecName)); } } - + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session inited cookie: " << Cookie << " partition: " << Partition << " MaxSeqNo: " << maxSeqNo << " sessionId: " << OwnerCookie); - + if (!Request->GetStreamCtx()->Write(std::move(response))) { LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc write failed"); Die(ctx); return; } - + State = ES_INITED; - + ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); - + //init completed; wait for first data chunk NextRequestInited = true; if (!Request->GetStreamCtx()->Read()) { @@ -805,66 +805,66 @@ void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvWriteAccepted::TPtr& if (State != ES_INITED) { return CloseSession("got write permission but not wait for it", PersQueue::ErrorCode::ERROR, ctx); } - + Y_VERIFY(!FormedWrites.empty()); TWriteRequestBatchInfo::TPtr writeRequest = std::move(FormedWrites.front()); - - if (ev->Get()->Cookie != writeRequest->Cookie) { - return CloseSession("out of order reserve bytes response from server, may be previous is lost", PersQueue::ErrorCode::ERROR, ctx); + + if (ev->Get()->Cookie != writeRequest->Cookie) { + return CloseSession("out of order reserve bytes response from server, may be previous is lost", PersQueue::ErrorCode::ERROR, ctx); } - + FormedWrites.pop_front(); - + ui64 diff = writeRequest->ByteSize; - + SentMessages.emplace_back(std::move(writeRequest)); - + BytesInflight_ -= diff; BytesInflight.Dec(diff); - + if (!NextRequestInited && BytesInflight_ < MAX_BYTES_INFLIGHT) { //allow only one big request to be readed but not sended - NextRequestInited = true; - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc read failed"); - Die(ctx); - return; - } - } - + NextRequestInited = true; + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc read failed"); + Die(ctx); + return; + } + } + --NumReserveBytesRequests; if (!Writes.empty()) GenerateNextWriteRequest(ctx); } - + void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvWriteResponse::TPtr& ev, const TActorContext& ctx) { if (State != ES_INITED) { return CloseSession("got write response but not wait for it", PersQueue::ErrorCode::ERROR, ctx); } - + const auto& result = *ev->Get(); if (!result.IsSuccess()) { const auto& record = result.Record; if (record.HasErrorCode()) { - return CloseSession("status is not ok: " + record.GetErrorReason(), ConvertOldCode(record.GetErrorCode()), ctx); + return CloseSession("status is not ok: " + record.GetErrorReason(), ConvertOldCode(record.GetErrorCode()), ctx); } else { return CloseSession("error at write: " + result.GetError().Reason, PersQueue::ErrorCode::ERROR, ctx); } } - + const auto& resp = result.Record.GetPartitionResponse(); - + if (SentMessages.empty()) { CloseSession("got too many replies from server, internal error", PersQueue::ErrorCode::ERROR, ctx); return; } - + TWriteRequestBatchInfo::TPtr writeRequest = std::move(SentMessages.front()); SentMessages.pop_front(); - - if (resp.GetCookie() != writeRequest->Cookie) { - return CloseSession("out of order write response from server, may be previous is lost", PersQueue::ErrorCode::ERROR, ctx); + + if (resp.GetCookie() != writeRequest->Cookie) { + return CloseSession("out of order write response from server, may be previous is lost", PersQueue::ErrorCode::ERROR, ctx); } - + auto addAck = [](const TPersQueuePartitionResponse::TCmdWriteResult& res, StreamingWriteServerMessage::BatchWriteResponse* batchWriteResponse, StreamingWriteServerMessage::WriteStatistics* stat) { batchWriteResponse->add_sequence_numbers(res.GetSeqNo()); @@ -892,18 +892,18 @@ void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvWriteResponse::TPtr& if (partitionCmdWriteResultIndex == resp.CmdWriteResultSize()) { CloseSession("too less responses from server", PersQueue::ErrorCode::ERROR, ctx); return; - } + } const auto& partitionCmdWriteResult = resp.GetCmdWriteResult(partitionCmdWriteResultIndex); const auto writtenSequenceNumber = userWriteRequest->Request.write_request().sequence_numbers(messageIndex); if (partitionCmdWriteResult.GetSeqNo() != writtenSequenceNumber) { CloseSession(TStringBuilder() << "Expected partition " << Partition << " write result for message with sequence number " << writtenSequenceNumber << " but got for " << partitionCmdWriteResult.GetSeqNo(), PersQueue::ErrorCode::ERROR, ctx); - return; - } - + return; + } + addAck(partitionCmdWriteResult, batchWriteResponse, batchWriteResponse->mutable_write_statistics()); ++partitionCmdWriteResultIndex; } - + if (!Request->GetStreamCtx()->Write(std::move(result))) { // TODO: Log gRPC write error code LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc write failed"); @@ -911,77 +911,77 @@ void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvWriteResponse::TPtr& return; } } - + ui64 diff = writeRequest->ByteSize; - + BytesInflightTotal_ -= diff; BytesInflightTotal.Dec(diff); - + CheckFinish(ctx); } - + void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvDisconnected::TPtr&, const TActorContext& ctx) { CloseSession("pipe to partition's tablet is dead", PersQueue::ErrorCode::ERROR, ctx); } -void TWriteSessionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { - TEvTabletPipe::TEvClientConnected *msg = ev->Get(); - //TODO: add here retries for connecting to PQRB - if (msg->Status != NKikimrProto::OK) { - CloseSession(TStringBuilder() << "pipe to tablet is dead " << msg->TabletId, PersQueue::ErrorCode::ERROR, ctx); - return; - } +void TWriteSessionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + TEvTabletPipe::TEvClientConnected *msg = ev->Get(); + //TODO: add here retries for connecting to PQRB + if (msg->Status != NKikimrProto::OK) { + CloseSession(TStringBuilder() << "pipe to tablet is dead " << msg->TabletId, PersQueue::ErrorCode::ERROR, ctx); + return; + } } - -void TWriteSessionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { - //TODO: add here retries for connecting to PQRB - CloseSession(TStringBuilder() << "pipe to tablet is dead " << ev->Get()->TabletId, PersQueue::ErrorCode::ERROR, ctx); + +void TWriteSessionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + //TODO: add here retries for connecting to PQRB + CloseSession(TStringBuilder() << "pipe to tablet is dead " << ev->Get()->TabletId, PersQueue::ErrorCode::ERROR, ctx); } - -void TWriteSessionActor::GenerateNextWriteRequest(const TActorContext& ctx) { + +void TWriteSessionActor::GenerateNextWriteRequest(const TActorContext& ctx) { TWriteRequestBatchInfo::TPtr writeRequest = new TWriteRequestBatchInfo(); - + auto ev = MakeHolder<NPQ::TEvPartitionWriter::TEvWriteRequest>(++NextRequestCookie); NKikimrClient::TPersQueueRequest& request = ev->Record; writeRequest->UserWriteRequests = std::move(Writes); Writes.clear(); - i64 diff = 0; + i64 diff = 0; auto addData = [&](const StreamingWriteClientMessage::WriteRequest& writeRequest, const i32 messageIndex) { - auto w = request.MutablePartitionRequest()->AddCmdWrite(); + auto w = request.MutablePartitionRequest()->AddCmdWrite(); w->SetData(GetSerializedData(InitMeta, writeRequest, messageIndex)); w->SetSeqNo(writeRequest.sequence_numbers(messageIndex)); w->SetSourceId(NPQ::NSourceIdEncoding::EncodeSimple(SourceId)); w->SetCreateTimeMS(writeRequest.created_at_ms(messageIndex)); - w->SetUncompressedSize(writeRequest.blocks_uncompressed_sizes(messageIndex)); - w->SetClientDC(ClientDC); + w->SetUncompressedSize(writeRequest.blocks_uncompressed_sizes(messageIndex)); + w->SetClientDC(ClientDC); }; - + for (const auto& write : writeRequest->UserWriteRequests) { diff -= write->Request.ByteSize(); const auto& writeRequest = write->Request.write_request(); for (i32 messageIndex = 0; messageIndex != writeRequest.sequence_numbers_size(); ++messageIndex) { addData(writeRequest, messageIndex); } - } - + } + writeRequest->Cookie = request.GetPartitionRequest().GetCookie(); - - Y_VERIFY(-diff <= (i64)BytesInflight_); - diff += request.ByteSize(); - BytesInflight_ += diff; - BytesInflightTotal_ += diff; - BytesInflight.Inc(diff); - BytesInflightTotal.Inc(diff); - + + Y_VERIFY(-diff <= (i64)BytesInflight_); + diff += request.ByteSize(); + BytesInflight_ += diff; + BytesInflightTotal_ += diff; + BytesInflight.Inc(diff); + BytesInflightTotal.Inc(diff); + writeRequest->ByteSize = request.ByteSize(); FormedWrites.push_back(writeRequest); - + ctx.Send(Writer, std::move(ev)); - ++NumReserveBytesRequests; -} - + ++NumReserveBytesRequests; +} + void TWriteSessionActor::Handle(TEvPQProxy::TEvUpdateToken::TPtr& ev, const TActorContext& ctx) { if (State != ES_INITED) { CloseSession("got 'update_token_request' but write session is not initialized", PersQueue::ErrorCode::BAD_REQUEST, ctx); @@ -1024,33 +1024,33 @@ void TWriteSessionActor::Handle(TEvPQProxy::TEvUpdateToken::TPtr& ev, const TAct } } -void TWriteSessionActor::Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr &ev , const TActorContext& ctx) { - Y_UNUSED(ctx); +void TWriteSessionActor::Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr &ev , const TActorContext& ctx) { + Y_UNUSED(ctx); LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "updating token"); - if (ev->Get()->Authenticated && !ev->Get()->InternalToken.empty()) { - Token = new NACLib::TUserToken(ev->Get()->InternalToken); + if (ev->Get()->Authenticated && !ev->Get()->InternalToken.empty()) { + Token = new NACLib::TUserToken(ev->Get()->InternalToken); Request->SetInternalToken(ev->Get()->InternalToken); UpdateTokenAuthenticated = true; if (!ACLCheckInProgress) { InitCheckSchema(ctx); } - } else { - Request->ReplyUnauthenticated("refreshed token is invalid"); - Die(ctx); - } -} - -void TWriteSessionActor::Handle(TEvPQProxy::TEvWrite::TPtr& ev, const TActorContext& ctx) { - - RequestNotChecked = true; - - if (State != ES_INITED) { - //answer error - CloseSession("write in not inited session", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; - } - + } else { + Request->ReplyUnauthenticated("refreshed token is invalid"); + Die(ctx); + } +} + +void TWriteSessionActor::Handle(TEvPQProxy::TEvWrite::TPtr& ev, const TActorContext& ctx) { + + RequestNotChecked = true; + + if (State != ES_INITED) { + //answer error + CloseSession("write in not inited session", PersQueue::ErrorCode::BAD_REQUEST, ctx); + return; + } + const auto& writeRequest = ev->Get()->Request.write_request(); if (!AllEqual(writeRequest.sequence_numbers_size(), writeRequest.created_at_ms_size(), writeRequest.sent_at_ms_size(), writeRequest.message_sizes_size())) { CloseSession(TStringBuilder() << "messages meta repeated fields do not have same size, 'sequence_numbers' size is " << writeRequest.sequence_numbers_size() @@ -1082,7 +1082,7 @@ void TWriteSessionActor::Handle(TEvPQProxy::TEvWrite::TPtr& ev, const TActorCont CloseSession(TStringBuilder() << "bad write request - 'sequence_numbers' items must be greater than 0. Value at position " << messageIndex << " is " << data.sequence_numbers(messageIndex), PersQueue::ErrorCode::BAD_REQUEST, ctx); return false; } - + if (messageIndex > 0 && data.sequence_numbers(messageIndex) <= data.sequence_numbers(messageIndex - 1)) { CloseSession(TStringBuilder() << "bad write request - 'sequence_numbers' are unsorted. Value " << data.sequence_numbers(messageIndex) << " at position " << messageIndex << " is less than or equal to value " << data.sequence_numbers(messageIndex - 1) << " at position " << (messageIndex - 1), PersQueue::ErrorCode::BAD_REQUEST, ctx); @@ -1099,7 +1099,7 @@ void TWriteSessionActor::Handle(TEvPQProxy::TEvWrite::TPtr& ev, const TActorCont if (!ValidateWriteWithCodec(InitialPQTabletConfig, codecID, error)) { CloseSession(TStringBuilder() << "bad write request - 'blocks_headers' at position " << messageIndex << " is invalid: " << error, PersQueue::ErrorCode::BAD_REQUEST, ctx); return false; - } + } if (data.blocks_message_counts(messageIndex) != 1) { CloseSession(TStringBuilder() << "bad write request - 'blocks_message_counts' at position " << messageIndex << " is " << data.blocks_message_counts(messageIndex) @@ -1114,61 +1114,61 @@ void TWriteSessionActor::Handle(TEvPQProxy::TEvWrite::TPtr& ev, const TActorCont } } - THolder<TEvPQProxy::TEvWrite> event(ev->Release()); - Writes.push_back(std::move(event)); - - ui64 diff = Writes.back()->Request.ByteSize(); - BytesInflight_ += diff; - BytesInflightTotal_ += diff; - BytesInflight.Inc(diff); - BytesInflightTotal.Inc(diff); - - if (BytesInflight_ < MAX_BYTES_INFLIGHT) { //allow only one big request to be readed but not sended - Y_VERIFY(NextRequestInited); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc read failed"); - Die(ctx); - return; - - } - } else { - NextRequestInited = false; - } - - if (NumReserveBytesRequests < MAX_RESERVE_REQUESTS_INFLIGHT) { - GenerateNextWriteRequest(ctx); - } -} - -void TWriteSessionActor::HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx) { + THolder<TEvPQProxy::TEvWrite> event(ev->Release()); + Writes.push_back(std::move(event)); + + ui64 diff = Writes.back()->Request.ByteSize(); + BytesInflight_ += diff; + BytesInflightTotal_ += diff; + BytesInflight.Inc(diff); + BytesInflightTotal.Inc(diff); + + if (BytesInflight_ < MAX_BYTES_INFLIGHT) { //allow only one big request to be readed but not sended + Y_VERIFY(NextRequestInited); + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " grpc read failed"); + Die(ctx); + return; + + } + } else { + NextRequestInited = false; + } + + if (NumReserveBytesRequests < MAX_RESERVE_REQUESTS_INFLIGHT) { + GenerateNextWriteRequest(ctx); + } +} + +void TWriteSessionActor::HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx) { CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); -} - -void TWriteSessionActor::LogSession(const TActorContext& ctx) { - LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "write session: cookie=" << Cookie << " sessionId=" << OwnerCookie << " userAgent=\"" << UserAgent << "\" ip=" << PeerName << " proto=v1 " +} + +void TWriteSessionActor::LogSession(const TActorContext& ctx) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "write session: cookie=" << Cookie << " sessionId=" << OwnerCookie << " userAgent=\"" << UserAgent << "\" ip=" << PeerName << " proto=v1 " << " topic=" << TopicConverter->GetModernName() << " durationSec=" << (ctx.Now() - StartTime).Seconds()); - - LogSessionDeadline = ctx.Now() + TDuration::Hours(1) + TDuration::Seconds(rand() % 60); -} - -void TWriteSessionActor::HandleWakeup(const TActorContext& ctx) { - Y_VERIFY(State == ES_INITED); - ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); + + LogSessionDeadline = ctx.Now() + TDuration::Hours(1) + TDuration::Seconds(rand() % 60); +} + +void TWriteSessionActor::HandleWakeup(const TActorContext& ctx) { + Y_VERIFY(State == ES_INITED); + ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); if (Token && !ACLCheckInProgress && RequestNotChecked && (ctx.Now() - LastACLCheckTimestamp > TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()))) { - RequestNotChecked = false; + RequestNotChecked = false; InitCheckSchema(ctx); - } + } // ToDo[migration] - separate flag for having config tables - if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen() && !SourceIdUpdateInfly && ctx.Now() - LastSourceIdUpdate > SOURCEID_UPDATE_PERIOD) { + if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen() && !SourceIdUpdateInfly && ctx.Now() - LastSourceIdUpdate > SOURCEID_UPDATE_PERIOD) { auto ev = MakeUpdateSourceIdMetadataRequest(ctx); SourceIdUpdateInfly = true; ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); } - if (ctx.Now() >= LogSessionDeadline) { - LogSession(ctx); - } -} - -} -} -} + if (ctx.Now() >= LogSessionDeadline) { + LogSession(ctx); + } +} + +} +} +} diff --git a/ydb/services/persqueue_v1/persqueue.cpp b/ydb/services/persqueue_v1/persqueue.cpp index 7466229210..ccacc4d43a 100644 --- a/ydb/services/persqueue_v1/persqueue.cpp +++ b/ydb/services/persqueue_v1/persqueue.cpp @@ -1,63 +1,63 @@ -#include "persqueue.h" - +#include "persqueue.h" + #include <ydb/core/base/appdata.h> #include <ydb/core/base/counters.h> #include <ydb/core/grpc_services/rpc_calls.h> #include <ydb/core/grpc_services/grpc_helper.h> #include <ydb/core/tx/scheme_board/cache.h> - -#include "grpc_pq_read.h" -#include "grpc_pq_write.h" -#include "grpc_pq_schema.h" - -namespace NKikimr { -namespace NGRpcService { -namespace V1 { - -static const ui32 PersQueueWriteSessionsMaxCount = 1000000; -static const ui32 PersQueueReadSessionsMaxCount = 100000; - + +#include "grpc_pq_read.h" +#include "grpc_pq_write.h" +#include "grpc_pq_schema.h" + +namespace NKikimr { +namespace NGRpcService { +namespace V1 { + +static const ui32 PersQueueWriteSessionsMaxCount = 1000000; +static const ui32 PersQueueReadSessionsMaxCount = 100000; + TGRpcPersQueueService::TGRpcPersQueueService(NActors::TActorSystem *system, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const NActors::TActorId& schemeCache,const NActors::TActorId& grpcRequestProxy) - : ActorSystem(system) - , Counters(counters) - , SchemeCache(schemeCache) - , GRpcRequestProxy(grpcRequestProxy) -{ } - + : ActorSystem(system) + , Counters(counters) + , SchemeCache(schemeCache) + , GRpcRequestProxy(grpcRequestProxy) +{ } + void TGRpcPersQueueService::InitService(grpc::ServerCompletionQueue *cq, NGrpc::TLoggerPtr logger) { - CQ = cq; + CQ = cq; InitNewSchemeCacheActor(); - if (ActorSystem->AppData<TAppData>()->PQConfig.GetEnabled()) { - + if (ActorSystem->AppData<TAppData>()->PQConfig.GetEnabled()) { + IActor* writeSvc = NGRpcProxy::V1::CreatePQWriteService(SchemeCache, NewSchemeCache,Counters, PersQueueWriteSessionsMaxCount); TActorId actorId = ActorSystem->Register(writeSvc, TMailboxType::HTSwap, ActorSystem->AppData<TAppData>()->UserPoolId); - ActorSystem->RegisterLocalService(NGRpcProxy::V1::GetPQWriteServiceActorID(), actorId); - + ActorSystem->RegisterLocalService(NGRpcProxy::V1::GetPQWriteServiceActorID(), actorId); + IActor* readSvc = NGRpcProxy::V1::CreatePQReadService(SchemeCache, NewSchemeCache, Counters, PersQueueReadSessionsMaxCount); - actorId = ActorSystem->Register(readSvc, TMailboxType::HTSwap, ActorSystem->AppData<TAppData>()->UserPoolId); - ActorSystem->RegisterLocalService(NGRpcProxy::V1::GetPQReadServiceActorID(), actorId); - - IActor* schemaSvc = NGRpcProxy::V1::CreatePQSchemaService(SchemeCache, Counters); - actorId = ActorSystem->Register(schemaSvc, TMailboxType::HTSwap, ActorSystem->AppData<TAppData>()->UserPoolId); - ActorSystem->RegisterLocalService(NGRpcProxy::V1::GetPQSchemaServiceActorID(), actorId); - + actorId = ActorSystem->Register(readSvc, TMailboxType::HTSwap, ActorSystem->AppData<TAppData>()->UserPoolId); + ActorSystem->RegisterLocalService(NGRpcProxy::V1::GetPQReadServiceActorID(), actorId); + + IActor* schemaSvc = NGRpcProxy::V1::CreatePQSchemaService(SchemeCache, Counters); + actorId = ActorSystem->Register(schemaSvc, TMailboxType::HTSwap, ActorSystem->AppData<TAppData>()->UserPoolId); + ActorSystem->RegisterLocalService(NGRpcProxy::V1::GetPQSchemaServiceActorID(), actorId); + SetupIncomingRequests(std::move(logger)); - } -} - + } +} + void TGRpcPersQueueService::SetGlobalLimiterHandle(NGrpc::TGlobalLimiter* limiter) { - Limiter = limiter; -} - -bool TGRpcPersQueueService::IncRequest() { - return Limiter->Inc(); -} - -void TGRpcPersQueueService::DecRequest() { - Limiter->Dec(); -} - + Limiter = limiter; +} + +bool TGRpcPersQueueService::IncRequest() { + return Limiter->Inc(); +} + +void TGRpcPersQueueService::DecRequest() { + Limiter->Dec(); +} + void TGRpcPersQueueService::InitNewSchemeCacheActor() { auto appData = ActorSystem->AppData<TAppData>(); auto cacheCounters = GetServiceCounters(Counters, "pqproxy|schemecache"); @@ -67,93 +67,93 @@ void TGRpcPersQueueService::InitNewSchemeCacheActor() { } void TGRpcPersQueueService::SetupIncomingRequests(NGrpc::TLoggerPtr logger) { - - auto getCounterBlock = NKikimr::NGRpcService::CreateCounterCb(Counters, ActorSystem); - - { + + auto getCounterBlock = NKikimr::NGRpcService::CreateCounterCb(Counters, ActorSystem); + + { using TBiRequest = Ydb::PersQueue::V1::StreamingWriteClientMessage; - + using TBiResponse = Ydb::PersQueue::V1::StreamingWriteServerMessage; - - using TStreamGRpcRequest = NGRpcServer::TGRpcStreamingRequest< - TBiRequest, - TBiResponse, - TGRpcPersQueueService, - NKikimrServices::GRPC_SERVER>; - - + + using TStreamGRpcRequest = NGRpcServer::TGRpcStreamingRequest< + TBiRequest, + TBiResponse, + TGRpcPersQueueService, + NKikimrServices::GRPC_SERVER>; + + TStreamGRpcRequest::Start(this, this->GetService(), CQ, &Ydb::PersQueue::V1::PersQueueService::AsyncService::RequestStreamingWrite, - [this](TIntrusivePtr<TStreamGRpcRequest::IContext> context) { - ActorSystem->Send(GRpcRequestProxy, new NKikimr::NGRpcService::TEvStreamPQWriteRequest(context)); - }, + [this](TIntrusivePtr<TStreamGRpcRequest::IContext> context) { + ActorSystem->Send(GRpcRequestProxy, new NKikimr::NGRpcService::TEvStreamPQWriteRequest(context)); + }, *ActorSystem, "PersQueueService/CreateWriteSession", getCounterBlock("persistent_queue", "WriteSession", true, true), nullptr - ); - } - - { + ); + } + + { using TBiRequest = Ydb::PersQueue::V1::MigrationStreamingReadClientMessage; - + using TBiResponse = Ydb::PersQueue::V1::MigrationStreamingReadServerMessage; - - using TStreamGRpcRequest = NGRpcServer::TGRpcStreamingRequest< - TBiRequest, - TBiResponse, - TGRpcPersQueueService, - NKikimrServices::GRPC_SERVER>; - - + + using TStreamGRpcRequest = NGRpcServer::TGRpcStreamingRequest< + TBiRequest, + TBiResponse, + TGRpcPersQueueService, + NKikimrServices::GRPC_SERVER>; + + TStreamGRpcRequest::Start(this, this->GetService(), CQ, &Ydb::PersQueue::V1::PersQueueService::AsyncService::RequestMigrationStreamingRead, - [this](TIntrusivePtr<TStreamGRpcRequest::IContext> context) { - ActorSystem->Send(GRpcRequestProxy, new NKikimr::NGRpcService::TEvStreamPQReadRequest(context)); - }, + [this](TIntrusivePtr<TStreamGRpcRequest::IContext> context) { + ActorSystem->Send(GRpcRequestProxy, new NKikimr::NGRpcService::TEvStreamPQReadRequest(context)); + }, *ActorSystem, "PersQueueService/CreateReadSession", getCounterBlock("persistent_queue", "ReadSession", true, true), nullptr - ); - } - -#ifdef ADD_REQUEST -#error ADD_REQUEST macro already defined -#endif -#define ADD_REQUEST(NAME, SVC, IN, OUT, ACTION) \ + ); + } + +#ifdef ADD_REQUEST +#error ADD_REQUEST macro already defined +#endif +#define ADD_REQUEST(NAME, SVC, IN, OUT, ACTION) \ MakeIntrusive<TGRpcRequest<Ydb::PersQueue::V1::IN, Ydb::PersQueue::V1::OUT, NGRpcService::V1::TGRpcPersQueueService>>(this, this->GetService(), CQ, \ [this](NGrpc::IRequestContextBase *ctx) { \ - NGRpcService::ReportGrpcReqToMon(*ActorSystem, ctx->GetPeer()); \ - ACTION; \ - }, &Ydb::PersQueue::V1::SVC::AsyncService::Request ## NAME, \ + NGRpcService::ReportGrpcReqToMon(*ActorSystem, ctx->GetPeer()); \ + ACTION; \ + }, &Ydb::PersQueue::V1::SVC::AsyncService::Request ## NAME, \ "PersQueueService/"#NAME, logger, getCounterBlock("persistent_queue", #NAME))->Run(); - - ADD_REQUEST(GetReadSessionsInfo, PersQueueService, ReadInfoRequest, ReadInfoResponse, { - ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQReadInfoRequest(ctx)); - }) - - ADD_REQUEST(DropTopic, PersQueueService, DropTopicRequest, DropTopicResponse, { - ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQDropTopicRequest(ctx)); - }) - - ADD_REQUEST(CreateTopic, PersQueueService, CreateTopicRequest, CreateTopicResponse, { - ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQCreateTopicRequest(ctx)); - }) - ADD_REQUEST(AlterTopic, PersQueueService, AlterTopicRequest, AlterTopicResponse, { - ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQAlterTopicRequest(ctx)); - }) - ADD_REQUEST(DescribeTopic, PersQueueService, DescribeTopicRequest, DescribeTopicResponse, { - ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQDescribeTopicRequest(ctx)); - }) + + ADD_REQUEST(GetReadSessionsInfo, PersQueueService, ReadInfoRequest, ReadInfoResponse, { + ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQReadInfoRequest(ctx)); + }) + + ADD_REQUEST(DropTopic, PersQueueService, DropTopicRequest, DropTopicResponse, { + ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQDropTopicRequest(ctx)); + }) + + ADD_REQUEST(CreateTopic, PersQueueService, CreateTopicRequest, CreateTopicResponse, { + ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQCreateTopicRequest(ctx)); + }) + ADD_REQUEST(AlterTopic, PersQueueService, AlterTopicRequest, AlterTopicResponse, { + ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQAlterTopicRequest(ctx)); + }) + ADD_REQUEST(DescribeTopic, PersQueueService, DescribeTopicRequest, DescribeTopicResponse, { + ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQDescribeTopicRequest(ctx)); + }) ADD_REQUEST(AddReadRule, PersQueueService, AddReadRuleRequest, AddReadRuleResponse, { ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQAddReadRuleRequest(ctx)); }) ADD_REQUEST(RemoveReadRule, PersQueueService, RemoveReadRuleRequest, RemoveReadRuleResponse, { ActorSystem->Send(GRpcRequestProxy, new NGRpcService::TEvPQRemoveReadRuleRequest(ctx)); }) - -#undef ADD_REQUEST - - -} - + +#undef ADD_REQUEST + + +} + void TGRpcPersQueueService::StopService() noexcept { TGrpcServiceBase::StopService(); } -} // V1 -} // namespace NGRpcService -} // namespace NKikimr +} // V1 +} // namespace NGRpcService +} // namespace NKikimr diff --git a/ydb/services/persqueue_v1/persqueue.h b/ydb/services/persqueue_v1/persqueue.h index e44576e1d8..eefe1ee4c2 100644 --- a/ydb/services/persqueue_v1/persqueue.h +++ b/ydb/services/persqueue_v1/persqueue.h @@ -1,47 +1,47 @@ -#pragma once - +#pragma once + #include <library/cpp/actors/core/actorsystem.h> - + #include <ydb/public/api/grpc/draft/ydb_persqueue_v1.grpc.pb.h> - + #include <library/cpp/grpc/server/grpc_server.h> - - -namespace NKikimr { - -namespace NGRpcService { -namespace V1 { - -class TGRpcPersQueueService + + +namespace NKikimr { + +namespace NGRpcService { +namespace V1 { + +class TGRpcPersQueueService : public NGrpc::TGrpcServiceBase<Ydb::PersQueue::V1::PersQueueService> -{ -public: +{ +public: TGRpcPersQueueService(NActors::TActorSystem* system, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const NActors::TActorId& schemeCache, const NActors::TActorId& grpcRequestProxy); - + void InitService(grpc::ServerCompletionQueue* cq, NGrpc::TLoggerPtr logger) override; void SetGlobalLimiterHandle(NGrpc::TGlobalLimiter* limiter) override; void StopService() noexcept override; - + using NGrpc::TGrpcServiceBase<Ydb::PersQueue::V1::PersQueueService>::GetService; - - bool IncRequest(); - void DecRequest(); - -private: + + bool IncRequest(); + void DecRequest(); + +private: void SetupIncomingRequests(NGrpc::TLoggerPtr logger); - + void InitNewSchemeCacheActor(); - NActors::TActorSystem* ActorSystem; + NActors::TActorSystem* ActorSystem; grpc::ServerCompletionQueue* CQ = nullptr; - - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; NGrpc::TGlobalLimiter* Limiter = nullptr; NActors::TActorId SchemeCache; NActors::TActorId NewSchemeCache; NActors::TActorId GRpcRequestProxy; -}; - -} // namespace V1 -} // namespace NGRpcService -} // namespace NKikimr +}; + +} // namespace V1 +} // namespace NGRpcService +} // namespace NKikimr diff --git a/ydb/services/persqueue_v1/persqueue_common_ut.cpp b/ydb/services/persqueue_v1/persqueue_common_ut.cpp index 6251bd5dd3..9d706d0656 100644 --- a/ydb/services/persqueue_v1/persqueue_common_ut.cpp +++ b/ydb/services/persqueue_v1/persqueue_common_ut.cpp @@ -282,14 +282,14 @@ Y_UNIT_TEST_SUITE(TPersQueueCommonTest) { for (const TString &topicPath : differentTopicPathsTypes) { server.CreateTopicWithQuota(topicPath); - auto driver = server.Server->AnnoyingClient->GetDriver(); - - { - auto writer = CreateSimpleWriter(*driver, server.TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath, "123"); - writer->Write(data); - bool res = writer->Close(TDuration::Seconds(10)); - UNIT_ASSERT(res); - } + auto driver = server.Server->AnnoyingClient->GetDriver(); + + { + auto writer = CreateSimpleWriter(*driver, server.TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath, "123"); + writer->Write(data); + bool res = writer->Close(TDuration::Seconds(10)); + UNIT_ASSERT(res); + } } } @@ -311,39 +311,39 @@ Y_UNIT_TEST_SUITE(TPersQueueCommonTest) { server.CreateTopicWithQuota(topicPath, true, 100.0); const TString data = TString("123") * 100; // 300 bytes // 3 seconds - auto driver = server.Server->AnnoyingClient->GetDriver(); - - // Warm up write - { - auto writer = CreateSimpleWriter(*driver, server.TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath, "123"); - - writer->Write(data); - bool res = writer->Close(TDuration::Seconds(10)); - UNIT_ASSERT(res); - } - + auto driver = server.Server->AnnoyingClient->GetDriver(); + + // Warm up write + { + auto writer = CreateSimpleWriter(*driver, server.TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath, "123"); + + writer->Write(data); + bool res = writer->Close(TDuration::Seconds(10)); + UNIT_ASSERT(res); + } + // will be removed const TInstant startWrite = TInstant::Now(); - - { - auto writer = CreateSimpleWriter(*driver, server.TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath, "123"); - - writer->Write(data); - bool res = writer->Close(TDuration::Seconds(10)); - UNIT_ASSERT(res); - } - - { - auto writer = CreateSimpleWriter(*driver, server.TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath, "123"); - - writer->Write(data); - bool res = writer->Close(TDuration::Seconds(10)); - UNIT_ASSERT(res); - } - - + + { + auto writer = CreateSimpleWriter(*driver, server.TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath, "123"); + + writer->Write(data); + bool res = writer->Close(TDuration::Seconds(10)); + UNIT_ASSERT(res); + } + + { + auto writer = CreateSimpleWriter(*driver, server.TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath, "123"); + + writer->Write(data); + bool res = writer->Close(TDuration::Seconds(10)); + UNIT_ASSERT(res); + } + + const TInstant endWrite = TInstant::Now(); // Check write time with quota const TDuration writeTime = endWrite - startWrite; @@ -355,6 +355,6 @@ Y_UNIT_TEST_SUITE(TPersQueueCommonTest) { } } -} +} } diff --git a/ydb/services/persqueue_v1/persqueue_new_schemecache_ut.cpp b/ydb/services/persqueue_v1/persqueue_new_schemecache_ut.cpp index 66bd9e37b5..8e209dbd9d 100644 --- a/ydb/services/persqueue_v1/persqueue_new_schemecache_ut.cpp +++ b/ydb/services/persqueue_v1/persqueue_new_schemecache_ut.cpp @@ -39,7 +39,7 @@ namespace NKikimr::NPersQueueTests { using namespace Ydb::PersQueue::V1; using namespace NThreading; using namespace NNetClassifier; - using namespace NYdb::NPersQueue; + using namespace NYdb::NPersQueue; using namespace NPersQueue; NJson::TJsonValue GetCountersNewSchemeCache(ui16 port, const TString& counters, const TString& subsystem, const TString& topicPath) { @@ -76,7 +76,7 @@ namespace NKikimr::NPersQueueTests { annoyingClient.MkDir("/Root/PQ", "account1"); annoyingClient.CreateTopicNoLegacy(DEFAULT_TOPIC_PATH, 5, false); annoyingClient.CreateTopicNoLegacy("/Root/PQ/account1/topic1", 5, false); - annoyingClient.CreateTopicNoLegacy("/Root/account2/topic2", 5); + annoyingClient.CreateTopicNoLegacy("/Root/account2/topic2", 5); } Y_UNIT_TEST(CheckGrpcWriteNoDC) { @@ -92,16 +92,16 @@ namespace NKikimr::NPersQueueTests { TPQDataWriter writer("source1", server, DEFAULT_TOPIC_PATH); - writer.Write("/Root/account2/topic2", {"valuevaluevalue1"}, true, "topic1@" BUILTIN_ACL_DOMAIN); + writer.Write("/Root/account2/topic2", {"valuevaluevalue1"}, true, "topic1@" BUILTIN_ACL_DOMAIN); writer.Write("/Root/PQ/account1/topic1", {"valuevaluevalue1"}, true, "topic1@" BUILTIN_ACL_DOMAIN); NACLib::TDiffACL acl; acl.AddAccess(NACLib::EAccessType::Allow, NACLib::UpdateRow, "topic1@" BUILTIN_ACL_DOMAIN); - server.AnnoyingClient->ModifyACL("/Root/account2", "topic2", acl.SerializeAsString()); + server.AnnoyingClient->ModifyACL("/Root/account2", "topic2", acl.SerializeAsString()); server.AnnoyingClient->ModifyACL("/Root/PQ/account1", "topic1", acl.SerializeAsString()); WaitACLModification(); - writer.Write("/Root/account2/topic2", {"valuevaluevalue1"}, false, "topic1@" BUILTIN_ACL_DOMAIN); + writer.Write("/Root/account2/topic2", {"valuevaluevalue1"}, false, "topic1@" BUILTIN_ACL_DOMAIN); writer.Write("/Root/PQ/account1/topic1", {"valuevaluevalue1"}, false, "topic1@" BUILTIN_ACL_DOMAIN); writer.Write("/Root/PQ/account1/topic1", {"valuevaluevalue2"}, false, "topic1@" BUILTIN_ACL_DOMAIN); @@ -116,130 +116,130 @@ namespace NKikimr::NPersQueueTests { PrepareForGrpcNoDC(*server.AnnoyingClient); NYdb::TDriverConfig driverCfg; - driverCfg.SetEndpoint(TStringBuilder() << "localhost:" << server.GrpcPort).SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)).SetDatabase("/Root"); - + driverCfg.SetEndpoint(TStringBuilder() << "localhost:" << server.GrpcPort).SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)).SetDatabase("/Root"); + auto ydbDriver = MakeHolder<NYdb::TDriver>(driverCfg); auto persQueueClient = MakeHolder<NYdb::NPersQueue::TPersQueueClient>(*ydbDriver); - { - auto res = persQueueClient->AddReadRule("/Root/account2/topic2", TAddReadRuleSettings().ReadRule(TReadRuleSettings().ConsumerName("user1"))); - res.Wait(); - UNIT_ASSERT(res.GetValue().IsSuccess()); - } + { + auto res = persQueueClient->AddReadRule("/Root/account2/topic2", TAddReadRuleSettings().ReadRule(TReadRuleSettings().ConsumerName("user1"))); + res.Wait(); + UNIT_ASSERT(res.GetValue().IsSuccess()); + } { NACLib::TDiffACL acl; acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "user1@" BUILTIN_ACL_DOMAIN); - server.AnnoyingClient->ModifyACL("/Root/account2", "topic2", acl.SerializeAsString()); + server.AnnoyingClient->ModifyACL("/Root/account2", "topic2", acl.SerializeAsString()); } WaitACLModification(); { - auto writer = CreateSimpleWriter(*ydbDriver, "/Root/account2/topic2", "123", 1); + auto writer = CreateSimpleWriter(*ydbDriver, "/Root/account2/topic2", "123", 1); for (int i = 0; i < 4; ++i) { - bool res = writer->Write(TString(10, 'a')); - UNIT_ASSERT(res); + bool res = writer->Write(TString(10, 'a')); + UNIT_ASSERT(res); } - bool res = writer->Close(TDuration::Seconds(10)); - UNIT_ASSERT(res); + bool res = writer->Close(TDuration::Seconds(10)); + UNIT_ASSERT(res); } auto testReadFromTopic = [&](const TString& topicPath) { - NYdb::NPersQueue::TReadSessionSettings settings; - settings.ConsumerName("user1").AppendTopics(topicPath); - auto reader = CreateReader(*ydbDriver, settings); - - for (int i = 0; i < 4; ++i) { - auto msg = GetNextMessageSkipAssignment(reader); - UNIT_ASSERT(msg); - Cerr << "GOT MESSAGE: " << DebugString(*msg) << "\n"; + NYdb::NPersQueue::TReadSessionSettings settings; + settings.ConsumerName("user1").AppendTopics(topicPath); + auto reader = CreateReader(*ydbDriver, settings); + + for (int i = 0; i < 4; ++i) { + auto msg = GetNextMessageSkipAssignment(reader); + UNIT_ASSERT(msg); + Cerr << "GOT MESSAGE: " << DebugString(*msg) << "\n"; } }; - testReadFromTopic("/Root/account2/topic2"); - testReadFromTopic("account2/topic2"); + testReadFromTopic("/Root/account2/topic2"); + testReadFromTopic("account2/topic2"); } } - - Y_UNIT_TEST_SUITE(TPersqueueDataPlaneTestSuite) { + + Y_UNIT_TEST_SUITE(TPersqueueDataPlaneTestSuite) { Y_UNIT_TEST(WriteSession) { TPersQueueV1TestServer server(true); - - TString topic = "/Root/account1/write_topic"; - TString consumer = "consumer_aba"; - { + + TString topic = "/Root/account1/write_topic"; + TString consumer = "consumer_aba"; + { auto res = server.PersQueueClient->CreateTopic(topic); - res.Wait(); - UNIT_ASSERT(res.GetValue().IsSuccess()); - } - - { + res.Wait(); + UNIT_ASSERT(res.GetValue().IsSuccess()); + } + + { auto res = server.PersQueueClient->AddReadRule(topic, TAddReadRuleSettings().ReadRule(TReadRuleSettings().ConsumerName(consumer))); - res.Wait(); - UNIT_ASSERT(res.GetValue().IsSuccess()); - } - - { + res.Wait(); + UNIT_ASSERT(res.GetValue().IsSuccess()); + } + + { auto writer = server.PersQueueClient->CreateSimpleBlockingWriteSession(TWriteSessionSettings() - .Path(topic).MessageGroupId("my_group_1") - .ClusterDiscoveryMode(EClusterDiscoveryMode::Off) - .RetryPolicy(IRetryPolicy::GetNoRetryPolicy())); - Cerr << "InitSeqNO " << writer->GetInitSeqNo() << "\n"; - writer->Write("somedata", 1); - writer->Close(); - } - { + .Path(topic).MessageGroupId("my_group_1") + .ClusterDiscoveryMode(EClusterDiscoveryMode::Off) + .RetryPolicy(IRetryPolicy::GetNoRetryPolicy())); + Cerr << "InitSeqNO " << writer->GetInitSeqNo() << "\n"; + writer->Write("somedata", 1); + writer->Close(); + } + { auto reader = server.PersQueueClient->CreateReadSession(TReadSessionSettings().ConsumerName("non_existing") - .AppendTopics(topic).DisableClusterDiscovery(true) - .RetryPolicy(IRetryPolicy::GetNoRetryPolicy())); - - - auto future = reader->WaitEvent(); - future.Wait(TDuration::Seconds(10)); - UNIT_ASSERT(future.HasValue()); - - TMaybe<NYdb::NPersQueue::TReadSessionEvent::TEvent> event = reader->GetEvent(false); - UNIT_ASSERT(event.Defined()); - - Cerr << "Got new read session event: " << DebugString(*event) << Endl; - - UNIT_ASSERT(std::get_if<TSessionClosedEvent>(&*event)); - } - { + .AppendTopics(topic).DisableClusterDiscovery(true) + .RetryPolicy(IRetryPolicy::GetNoRetryPolicy())); + + + auto future = reader->WaitEvent(); + future.Wait(TDuration::Seconds(10)); + UNIT_ASSERT(future.HasValue()); + + TMaybe<NYdb::NPersQueue::TReadSessionEvent::TEvent> event = reader->GetEvent(false); + UNIT_ASSERT(event.Defined()); + + Cerr << "Got new read session event: " << DebugString(*event) << Endl; + + UNIT_ASSERT(std::get_if<TSessionClosedEvent>(&*event)); + } + { auto reader = server.PersQueueClient->CreateReadSession(TReadSessionSettings().ConsumerName(consumer) - .AppendTopics(topic).DisableClusterDiscovery(true) - .RetryPolicy(IRetryPolicy::GetNoRetryPolicy())); - - - auto future = reader->WaitEvent(); - future.Wait(TDuration::Seconds(10)); - UNIT_ASSERT(future.HasValue()); - - TMaybe<NYdb::NPersQueue::TReadSessionEvent::TEvent> event = reader->GetEvent(false); - UNIT_ASSERT(event.Defined()); - - Cerr << "Got new read session event: " << DebugString(*event) << Endl; - - UNIT_ASSERT(std::get_if<TReadSessionEvent::TCreatePartitionStreamEvent>(&*event)); - } - } - } - - Y_UNIT_TEST_SUITE(TPersqueueControlPlaneTestSuite) { + .AppendTopics(topic).DisableClusterDiscovery(true) + .RetryPolicy(IRetryPolicy::GetNoRetryPolicy())); + + + auto future = reader->WaitEvent(); + future.Wait(TDuration::Seconds(10)); + UNIT_ASSERT(future.HasValue()); + + TMaybe<NYdb::NPersQueue::TReadSessionEvent::TEvent> event = reader->GetEvent(false); + UNIT_ASSERT(event.Defined()); + + Cerr << "Got new read session event: " << DebugString(*event) << Endl; + + UNIT_ASSERT(std::get_if<TReadSessionEvent::TCreatePartitionStreamEvent>(&*event)); + } + } + } + + Y_UNIT_TEST_SUITE(TPersqueueControlPlaneTestSuite) { Y_UNIT_TEST(SetupReadLockSessionWithDatabase) { TPersQueueV1TestServer server; - - { + + { auto res = server.PersQueueClient->AddReadRule("/Root/acc/topic1", TAddReadRuleSettings().ReadRule(TReadRuleSettings().ConsumerName("user"))); - res.Wait(); - Cerr << "ADD RESULT " << res.GetValue().GetIssues().ToString() << "\n"; - UNIT_ASSERT(res.GetValue().IsSuccess()); - } - - + res.Wait(); + Cerr << "ADD RESULT " << res.GetValue().GetIssues().ToString() << "\n"; + UNIT_ASSERT(res.GetValue().IsSuccess()); + } + + auto stub = Ydb::PersQueue::V1::PersQueueService::NewStub(server.InsecureChannel); grpc::ClientContext grpcContext; grpcContext.AddMetadata("x-ydb-database", "/Root/acc"); @@ -309,12 +309,12 @@ namespace NKikimr::NPersQueueTests { addRuleRequest.set_path("topic1"); auto* rr = addRuleRequest.mutable_read_rule(); rr->set_consumer_name("goodUser"); - rr->set_version(0); + rr->set_version(0); rr->set_important(true); rr->set_supported_format(TopicSettings::FORMAT_BASE); rr->add_supported_codecs(CODEC_ZSTD); auto status = stub->AddReadRule(&grpcContext, addRuleRequest, &addRuleResponse); - Cerr << "ADD RR RESPONSE " << addRuleResponse << "\n"; + Cerr << "ADD RR RESPONSE " << addRuleResponse << "\n"; UNIT_ASSERT(status.ok() && addRuleResponse.operation().status() == Ydb::StatusIds::SUCCESS); addRuleRequest.set_path(path); } @@ -337,11 +337,11 @@ namespace NKikimr::NPersQueueTests { auto status = stub->RemoveReadRule(&grpcContext, removeRuleRequest, &removeRuleResponse); UNIT_ASSERT(status.ok() && removeRuleResponse.operation().status() == Ydb::StatusIds::NOT_FOUND); } - auto findReadRule = [&](const TString& consumerName, const TMaybe<i64> version, const TopicSettings& settings) { + auto findReadRule = [&](const TString& consumerName, const TMaybe<i64> version, const TopicSettings& settings) { for (const auto& rr : settings.read_rules()) { if (rr.consumer_name() == consumerName) { - Cerr << rr << "\n"; - return !version || rr.version() == *version; + Cerr << rr << "\n"; + return !version || rr.version() == *version; } } return false; @@ -358,7 +358,7 @@ namespace NKikimr::NPersQueueTests { Ydb::PersQueue::V1::DescribeTopicResult res; UNIT_ASSERT(status.ok() && describeTopicResponse.operation().status() == Ydb::StatusIds::SUCCESS); describeTopicResponse.operation().result().UnpackTo(&res); - UNIT_ASSERT(findReadRule("goodUser", 1, res.settings())); + UNIT_ASSERT(findReadRule("goodUser", 1, res.settings())); addRuleRequest.set_path(path); } @@ -376,31 +376,31 @@ namespace NKikimr::NPersQueueTests { stub->DescribeTopic(&grpcContext, describeTopicRequest, &describeTopicResponse); Ydb::PersQueue::V1::DescribeTopicResult res; describeTopicResponse.operation().result().UnpackTo(&res); - UNIT_ASSERT(!findReadRule("goodUser", {}, res.settings())); + UNIT_ASSERT(!findReadRule("goodUser", {}, res.settings())); + } + + { + grpc::ClientContext grpcContext; + auto* rr = addRuleRequest.mutable_read_rule(); + rr->set_consumer_name("goodUser"); + rr->set_version(0); + rr->set_important(true); + rr->set_supported_format(TopicSettings::FORMAT_BASE); + rr->add_supported_codecs(CODEC_ZSTD); + auto status = stub->AddReadRule(&grpcContext, addRuleRequest, &addRuleResponse); + Cerr << addRuleResponse << "\n"; + UNIT_ASSERT(status.ok() && addRuleResponse.operation().status() == Ydb::StatusIds::SUCCESS); + } + + { + grpc::ClientContext grpcContext; + auto status = stub->DescribeTopic(&grpcContext, describeTopicRequest, &describeTopicResponse); + Ydb::PersQueue::V1::DescribeTopicResult res; + UNIT_ASSERT(status.ok() && describeTopicResponse.operation().status() == Ydb::StatusIds::SUCCESS); + describeTopicResponse.operation().result().UnpackTo(&res); + UNIT_ASSERT(findReadRule("goodUser", 3, res.settings())); // version is 3 : add, remove and add } - - { - grpc::ClientContext grpcContext; - auto* rr = addRuleRequest.mutable_read_rule(); - rr->set_consumer_name("goodUser"); - rr->set_version(0); - rr->set_important(true); - rr->set_supported_format(TopicSettings::FORMAT_BASE); - rr->add_supported_codecs(CODEC_ZSTD); - auto status = stub->AddReadRule(&grpcContext, addRuleRequest, &addRuleResponse); - Cerr << addRuleResponse << "\n"; - UNIT_ASSERT(status.ok() && addRuleResponse.operation().status() == Ydb::StatusIds::SUCCESS); - } - - { - grpc::ClientContext grpcContext; - auto status = stub->DescribeTopic(&grpcContext, describeTopicRequest, &describeTopicResponse); - Ydb::PersQueue::V1::DescribeTopicResult res; - UNIT_ASSERT(status.ok() && describeTopicResponse.operation().status() == Ydb::StatusIds::SUCCESS); - describeTopicResponse.operation().result().UnpackTo(&res); - UNIT_ASSERT(findReadRule("goodUser", 3, res.settings())); // version is 3 : add, remove and add - } - + } } } diff --git a/ydb/services/persqueue_v1/persqueue_ut.cpp b/ydb/services/persqueue_v1/persqueue_ut.cpp index 74af7bf4be..3bc9b21728 100644 --- a/ydb/services/persqueue_v1/persqueue_ut.cpp +++ b/ydb/services/persqueue_v1/persqueue_ut.cpp @@ -9,75 +9,75 @@ #include <ydb/core/testlib/test_pq_client.h> #include <ydb/core/protos/grpc_pq_old.pb.h> #include <ydb/core/persqueue/cluster_tracker.h> - + #include <ydb/core/tablet/tablet_counters_aggregator.h> - + #include <ydb/library/aclib/aclib.h> #include <ydb/library/persqueue/obfuscate/obfuscate.h> #include <ydb/library/persqueue/tests/counters.h> #include <ydb/library/persqueue/topic_parser/topic_parser.h> - + #include <library/cpp/testing/unittest/tests_data.h> #include <library/cpp/testing/unittest/registar.h> #include <library/cpp/json/json_reader.h> #include <library/cpp/monlib/dynamic_counters/encode.h> #include <google/protobuf/text_format.h> #include <google/protobuf/util/message_differencer.h> - -#include <util/string/join.h> + +#include <util/string/join.h> #include <util/system/sanitizers.h> -#include <util/generic/guid.h> - -#include <grpc++/client_context.h> -#include <grpc++/create_channel.h> - +#include <util/generic/guid.h> + +#include <grpc++/client_context.h> +#include <grpc++/create_channel.h> + #include <ydb/public/api/grpc/draft/ydb_persqueue_v1.grpc.pb.h> #include <ydb/public/api/protos/persqueue_error_codes_v1.pb.h> - + #include <ydb/public/sdk/cpp/client/ydb_persqueue_public/persqueue.h> #include <ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils/data_plane_helpers.h> - - + + namespace NKikimr::NPersQueueTests { - -using namespace Tests; -using namespace NKikimrClient; -using namespace Ydb::PersQueue; + +using namespace Tests; +using namespace NKikimrClient; +using namespace Ydb::PersQueue; using namespace Ydb::PersQueue::V1; using namespace NThreading; -using namespace NNetClassifier; - -TAutoPtr<IEventHandle> GetClassifierUpdate(TServer& server, const TActorId sender) { - auto& actorSystem = *server.GetRuntime(); - actorSystem.Send( - new IEventHandle(MakeNetClassifierID(), sender, - new TEvNetClassifier::TEvSubscribe() - )); - - TAutoPtr<IEventHandle> handle; - actorSystem.GrabEdgeEvent<NNetClassifier::TEvNetClassifier::TEvClassifierUpdate>(handle); - - UNIT_ASSERT(handle); - UNIT_ASSERT_VALUES_EQUAL(handle->Recipient, sender); - - return handle; -} - -THolder<TTempFileHandle> CreateNetDataFile(const TString& content) { - auto netDataFile = MakeHolder<TTempFileHandle>("data.tsv"); - - netDataFile->Write(content.Data(), content.Size()); - netDataFile->FlushData(); - - return netDataFile; -} - - -static TString FormNetData() { - return "10.99.99.224/32\tSAS\n" - "::1/128\tVLA\n"; -} - +using namespace NNetClassifier; + +TAutoPtr<IEventHandle> GetClassifierUpdate(TServer& server, const TActorId sender) { + auto& actorSystem = *server.GetRuntime(); + actorSystem.Send( + new IEventHandle(MakeNetClassifierID(), sender, + new TEvNetClassifier::TEvSubscribe() + )); + + TAutoPtr<IEventHandle> handle; + actorSystem.GrabEdgeEvent<NNetClassifier::TEvNetClassifier::TEvClassifierUpdate>(handle); + + UNIT_ASSERT(handle); + UNIT_ASSERT_VALUES_EQUAL(handle->Recipient, sender); + + return handle; +} + +THolder<TTempFileHandle> CreateNetDataFile(const TString& content) { + auto netDataFile = MakeHolder<TTempFileHandle>("data.tsv"); + + netDataFile->Write(content.Data(), content.Size()); + netDataFile->FlushData(); + + return netDataFile; +} + + +static TString FormNetData() { + return "10.99.99.224/32\tSAS\n" + "::1/128\tVLA\n"; +} + namespace { const static TString DEFAULT_TOPIC_NAME = "rt3.dc1--topic1"; const static TString SHORT_TOPIC_NAME = "topic1"; @@ -101,7 +101,7 @@ namespace { Y_UNIT_TEST_SUITE(TPersQueueTest) { Y_UNIT_TEST(AllEqual) { using NGRpcProxy::V1::AllEqual; - + UNIT_ASSERT(AllEqual(0)); UNIT_ASSERT(AllEqual(0, 0)); UNIT_ASSERT(AllEqual(0, 0, 0)); @@ -119,468 +119,468 @@ namespace { TPersQueueV1TestServer server; SET_LOCALS; const TString topicPath = server.GetTopicPathMultipleDC(); - - auto driver = server.Server->AnnoyingClient->GetDriver(); - - NYdb::NPersQueue::TReadSessionSettings settings; - settings.ConsumerName("shared/user").AppendTopics(topicPath).ReadMirrored("dc1"); - auto reader = CreateReader(*driver, settings); - - for (ui32 i = 0; i < 2; ++i) { - auto msg = reader->GetEvent(true, 1); - UNIT_ASSERT(msg); - + + auto driver = server.Server->AnnoyingClient->GetDriver(); + + NYdb::NPersQueue::TReadSessionSettings settings; + settings.ConsumerName("shared/user").AppendTopics(topicPath).ReadMirrored("dc1"); + auto reader = CreateReader(*driver, settings); + + for (ui32 i = 0; i < 2; ++i) { + auto msg = reader->GetEvent(true, 1); + UNIT_ASSERT(msg); + Cerr << "Got message: " << NYdb::NPersQueue::DebugString(*msg) << "\n"; - - auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); - - UNIT_ASSERT(ev); - - UNIT_ASSERT(ev->GetPartitionStream()->GetTopicPath() == topicPath); - UNIT_ASSERT(ev->GetPartitionStream()->GetCluster() == "dc1" || ev->GetPartitionStream()->GetCluster() == "dc2"); - UNIT_ASSERT(ev->GetPartitionStream()->GetPartitionId() == 0); - - } - auto wait = reader->WaitEvent(); - UNIT_ASSERT(!wait.Wait(TDuration::Seconds(1))); - + + auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); + + UNIT_ASSERT(ev); + + UNIT_ASSERT(ev->GetPartitionStream()->GetTopicPath() == topicPath); + UNIT_ASSERT(ev->GetPartitionStream()->GetCluster() == "dc1" || ev->GetPartitionStream()->GetCluster() == "dc2"); + UNIT_ASSERT(ev->GetPartitionStream()->GetPartitionId() == 0); + + } + auto wait = reader->WaitEvent(); + UNIT_ASSERT(!wait.Wait(TDuration::Seconds(1))); + pqClient->AlterTopicNoLegacy("/Root/PQ/rt3.dc2--acc--topic2dc", 2); Cerr << "======Alter topic done\n"; - UNIT_ASSERT(wait.Wait(TDuration::Seconds(5))); - - auto msg = reader->GetEvent(true, 1); - UNIT_ASSERT(msg); - - Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; - - auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); - - UNIT_ASSERT(ev); - - UNIT_ASSERT(ev->GetPartitionStream()->GetTopicPath() == topicPath); - UNIT_ASSERT(ev->GetPartitionStream()->GetCluster() == "dc2"); - UNIT_ASSERT(ev->GetPartitionStream()->GetPartitionId() == 1); - } - - + UNIT_ASSERT(wait.Wait(TDuration::Seconds(5))); + + auto msg = reader->GetEvent(true, 1); + UNIT_ASSERT(msg); + + Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); + + UNIT_ASSERT(ev); + + UNIT_ASSERT(ev->GetPartitionStream()->GetTopicPath() == topicPath); + UNIT_ASSERT(ev->GetPartitionStream()->GetCluster() == "dc2"); + UNIT_ASSERT(ev->GetPartitionStream()->GetPartitionId() == 1); + } + + Y_UNIT_TEST(SetupLockSession) { TPersQueueV1TestServer server; SET_LOCALS; MAKE_INSECURE_STUB; auto readStream = StubP_->MigrationStreamingRead(&rcontext); - UNIT_ASSERT(readStream); - - // init read session - { + UNIT_ASSERT(readStream); + + // init read session + { MigrationStreamingReadClientMessage req; MigrationStreamingReadServerMessage resp; - + req.mutable_init_request()->add_topics_read_settings()->set_topic("acc/topic1"); - + req.mutable_init_request()->set_consumer("user"); req.mutable_init_request()->set_read_only_original(true); - req.mutable_init_request()->mutable_read_params()->set_max_read_messages_count(1); - - if (!readStream->Write(req)) { - ythrow yexception() << "write fail"; - } - UNIT_ASSERT(readStream->Read(&resp)); + req.mutable_init_request()->mutable_read_params()->set_max_read_messages_count(1); + + if (!readStream->Write(req)) { + ythrow yexception() << "write fail"; + } + UNIT_ASSERT(readStream->Read(&resp)); Cerr << "===Got response: " << resp.ShortDebugString() << Endl; UNIT_ASSERT(resp.response_case() == MigrationStreamingReadServerMessage::kInitResponse); - //send some reads + //send some reads req.Clear(); - req.mutable_read(); - for (ui32 i = 0; i < 10; ++i) { - if (!readStream->Write(req)) { - ythrow yexception() << "write fail"; - } - } - } - - { + req.mutable_read(); + for (ui32 i = 0; i < 10; ++i) { + if (!readStream->Write(req)) { + ythrow yexception() << "write fail"; + } + } + } + + { Sleep(TDuration::Seconds(10)); - ReadInfoRequest request; - ReadInfoResponse response; - request.mutable_consumer()->set_path("user"); + ReadInfoRequest request; + ReadInfoResponse response; + request.mutable_consumer()->set_path("user"); request.set_get_only_original(true); - request.add_topics()->set_path("acc/topic1"); - grpc::ClientContext rcontext; - auto status = StubP_->GetReadSessionsInfo(&rcontext, request, &response); - UNIT_ASSERT(status.ok()); - ReadInfoResult res; - response.operation().result().UnpackTo(&res); + request.add_topics()->set_path("acc/topic1"); + grpc::ClientContext rcontext; + auto status = StubP_->GetReadSessionsInfo(&rcontext, request, &response); + UNIT_ASSERT(status.ok()); + ReadInfoResult res; + response.operation().result().UnpackTo(&res); Cerr << "Read info response: " << response << Endl << res << Endl; UNIT_ASSERT_VALUES_EQUAL(res.topics_size(), 1); - UNIT_ASSERT(res.topics(0).status() == Ydb::StatusIds::SUCCESS); - } - ui64 assignId = 0; - { + UNIT_ASSERT(res.topics(0).status() == Ydb::StatusIds::SUCCESS); + } + ui64 assignId = 0; + { MigrationStreamingReadClientMessage req; MigrationStreamingReadServerMessage resp; - - //lock partition - UNIT_ASSERT(readStream->Read(&resp)); + + //lock partition + UNIT_ASSERT(readStream->Read(&resp)); UNIT_ASSERT(resp.response_case() == MigrationStreamingReadServerMessage::kAssigned); - UNIT_ASSERT(resp.assigned().topic().path() == "acc/topic1"); - UNIT_ASSERT(resp.assigned().cluster() == "dc1"); - UNIT_ASSERT(resp.assigned().partition() == 0); - - assignId = resp.assigned().assign_id(); - req.Clear(); - req.mutable_start_read()->mutable_topic()->set_path("acc/topic1"); - req.mutable_start_read()->set_cluster("dc1"); - req.mutable_start_read()->set_partition(0); - req.mutable_start_read()->set_assign_id(assignId); - - req.mutable_start_read()->set_read_offset(10); - if (!readStream->Write(req)) { - ythrow yexception() << "write fail"; - } - - } - - auto driver = server.Server->AnnoyingClient->GetDriver(); - + UNIT_ASSERT(resp.assigned().topic().path() == "acc/topic1"); + UNIT_ASSERT(resp.assigned().cluster() == "dc1"); + UNIT_ASSERT(resp.assigned().partition() == 0); + + assignId = resp.assigned().assign_id(); + req.Clear(); + req.mutable_start_read()->mutable_topic()->set_path("acc/topic1"); + req.mutable_start_read()->set_cluster("dc1"); + req.mutable_start_read()->set_partition(0); + req.mutable_start_read()->set_assign_id(assignId); + + req.mutable_start_read()->set_read_offset(10); + if (!readStream->Write(req)) { + ythrow yexception() << "write fail"; + } + + } + + auto driver = server.Server->AnnoyingClient->GetDriver(); + { - auto writer = CreateSimpleWriter(*driver, "acc/topic1", "source"); - for (int i = 1; i < 17; ++i) { - bool res = writer->Write("valuevaluevalue" + ToString(i), i); - UNIT_ASSERT(res); + auto writer = CreateSimpleWriter(*driver, "acc/topic1", "source"); + for (int i = 1; i < 17; ++i) { + bool res = writer->Write("valuevaluevalue" + ToString(i), i); + UNIT_ASSERT(res); } - bool res = writer->Close(TDuration::Seconds(10)); - UNIT_ASSERT(res); + bool res = writer->Close(TDuration::Seconds(10)); + UNIT_ASSERT(res); } - //check read results + //check read results MigrationStreamingReadServerMessage resp; - for (ui32 i = 10; i < 16; ++i) { - UNIT_ASSERT(readStream->Read(&resp)); - Cerr << "Got read response " << resp << "\n"; + for (ui32 i = 10; i < 16; ++i) { + UNIT_ASSERT(readStream->Read(&resp)); + Cerr << "Got read response " << resp << "\n"; UNIT_ASSERT_C(resp.response_case() == MigrationStreamingReadServerMessage::kDataBatch, resp); - UNIT_ASSERT(resp.data_batch().partition_data_size() == 1); - UNIT_ASSERT(resp.data_batch().partition_data(0).batches_size() == 1); - UNIT_ASSERT(resp.data_batch().partition_data(0).batches(0).message_data_size() == 1); - UNIT_ASSERT(resp.data_batch().partition_data(0).batches(0).message_data(0).offset() == i); - } - //TODO: restart here readSession and read from position 10 - { + UNIT_ASSERT(resp.data_batch().partition_data_size() == 1); + UNIT_ASSERT(resp.data_batch().partition_data(0).batches_size() == 1); + UNIT_ASSERT(resp.data_batch().partition_data(0).batches(0).message_data_size() == 1); + UNIT_ASSERT(resp.data_batch().partition_data(0).batches(0).message_data(0).offset() == i); + } + //TODO: restart here readSession and read from position 10 + { MigrationStreamingReadClientMessage req; MigrationStreamingReadServerMessage resp; - - auto cookie = req.mutable_commit()->add_cookies(); - cookie->set_assign_id(assignId); - cookie->set_partition_cookie(1); - - if (!readStream->Write(req)) { - ythrow yexception() << "write fail"; - } - UNIT_ASSERT(readStream->Read(&resp)); + + auto cookie = req.mutable_commit()->add_cookies(); + cookie->set_assign_id(assignId); + cookie->set_partition_cookie(1); + + if (!readStream->Write(req)) { + ythrow yexception() << "write fail"; + } + UNIT_ASSERT(readStream->Read(&resp)); UNIT_ASSERT_C(resp.response_case() == MigrationStreamingReadServerMessage::kCommitted, resp); - } - - + } + + pqClient->AlterTopic("rt3.dc1--acc--topic1", 10); - { - ReadInfoRequest request; - ReadInfoResponse response; - request.mutable_consumer()->set_path("user"); - request.set_get_only_original(false); - request.add_topics()->set_path("acc/topic1"); - grpc::ClientContext rcontext; - auto status = StubP_->GetReadSessionsInfo(&rcontext, request, &response); - UNIT_ASSERT(status.ok()); - ReadInfoResult res; - response.operation().result().UnpackTo(&res); + { + ReadInfoRequest request; + ReadInfoResponse response; + request.mutable_consumer()->set_path("user"); + request.set_get_only_original(false); + request.add_topics()->set_path("acc/topic1"); + grpc::ClientContext rcontext; + auto status = StubP_->GetReadSessionsInfo(&rcontext, request, &response); + UNIT_ASSERT(status.ok()); + ReadInfoResult res; + response.operation().result().UnpackTo(&res); Cerr << "Get read session info response: " << response << "\n" << res << "\n"; -// UNIT_ASSERT(res.sessions_size() == 1); +// UNIT_ASSERT(res.sessions_size() == 1); UNIT_ASSERT_VALUES_EQUAL(res.topics_size(), 1); UNIT_ASSERT_VALUES_EQUAL(res.topics(0).partitions_size(), 10); - } - - { - ReadInfoRequest request; - ReadInfoResponse response; - request.mutable_consumer()->set_path("user"); - request.set_get_only_original(false); - request.add_topics()->set_path("acc/topic1"); - grpc::ClientContext rcontext; - + } + + { + ReadInfoRequest request; + ReadInfoResponse response; + request.mutable_consumer()->set_path("user"); + request.set_get_only_original(false); + request.add_topics()->set_path("acc/topic1"); + grpc::ClientContext rcontext; + pqClient->MarkNodeInHive(runtime, 0, false); pqClient->MarkNodeInHive(runtime, 1, false); - + pqClient->RestartBalancerTablet(runtime, "rt3.dc1--acc--topic1"); - auto status = StubP_->GetReadSessionsInfo(&rcontext, request, &response); - UNIT_ASSERT(status.ok()); - ReadInfoResult res; - response.operation().result().UnpackTo(&res); + auto status = StubP_->GetReadSessionsInfo(&rcontext, request, &response); + UNIT_ASSERT(status.ok()); + ReadInfoResult res; + response.operation().result().UnpackTo(&res); Cerr << "Read sessions info response: " << response << "\nResult: " << res << "\n"; - UNIT_ASSERT(res.topics().size() == 1); + UNIT_ASSERT(res.topics().size() == 1); UNIT_ASSERT(res.topics(0).partitions(0).status() == Ydb::StatusIds::UNAVAILABLE); - } - } - - - void SetupWriteSessionImpl(bool rr) { + } + } + + + void SetupWriteSessionImpl(bool rr) { NPersQueue::TTestServer server{PQSettings(0, 2, rr), false}; server.ServerSettings.SetEnableSystemViews(false); server.StartServer(); - + server.EnableLogs({ NKikimrServices::PERSQUEUE }); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 10); - + TPQDataWriter writer("source", server); - + ui32 p = writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue1"}); - + server.AnnoyingClient->AlterTopic(DEFAULT_TOPIC_NAME, 15); - + ui32 pp = writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue2"}); - UNIT_ASSERT_VALUES_EQUAL(p, pp); - + UNIT_ASSERT_VALUES_EQUAL(p, pp); + writer.Write(SHORT_TOPIC_NAME, {"1", "2", "3", "4", "5"}); - writer.Write("topic2", {"valuevaluevalue1"}, true); - - p = writer.InitSession("sid1", 2, true); - pp = writer.InitSession("sid1", 0, true); - - UNIT_ASSERT(p = pp); - UNIT_ASSERT(p == 1); - - { - p = writer.InitSession("sidx", 0, true); - pp = writer.InitSession("sidx", 0, true); - - UNIT_ASSERT(p == pp); - } - - writer.InitSession("sid1", 3, false); - - //check round robin; - TMap<ui32, ui32> ss; - for (ui32 i = 0; i < 15*5; ++i) { - ss[writer.InitSession("sid_rand_" + ToString<ui32>(i), 0, true)]++; - } - for (auto &s : ss) { - Cerr << s.first << " " << s.second << "\n"; - if (rr) { - UNIT_ASSERT(s.second >= 4 && s.second <= 6); - } - } - } - - Y_UNIT_TEST(SetupWriteSession) { - SetupWriteSessionImpl(false); - SetupWriteSessionImpl(true); - } - - Y_UNIT_TEST(StoreNoMoreThanXSourceIDs) { + writer.Write("topic2", {"valuevaluevalue1"}, true); + + p = writer.InitSession("sid1", 2, true); + pp = writer.InitSession("sid1", 0, true); + + UNIT_ASSERT(p = pp); + UNIT_ASSERT(p == 1); + + { + p = writer.InitSession("sidx", 0, true); + pp = writer.InitSession("sidx", 0, true); + + UNIT_ASSERT(p == pp); + } + + writer.InitSession("sid1", 3, false); + + //check round robin; + TMap<ui32, ui32> ss; + for (ui32 i = 0; i < 15*5; ++i) { + ss[writer.InitSession("sid_rand_" + ToString<ui32>(i), 0, true)]++; + } + for (auto &s : ss) { + Cerr << s.first << " " << s.second << "\n"; + if (rr) { + UNIT_ASSERT(s.second >= 4 && s.second <= 6); + } + } + } + + Y_UNIT_TEST(SetupWriteSession) { + SetupWriteSessionImpl(false); + SetupWriteSessionImpl(true); + } + + Y_UNIT_TEST(StoreNoMoreThanXSourceIDs) { ui16 X = 4; ui64 SOURCEID_COUNT_DELETE_BATCH_SIZE = 100; NPersQueue::TTestServer server; server.EnableLogs({ NKikimrServices::PERSQUEUE, NKikimrServices::PQ_WRITE_PROXY }); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1, 8*1024*1024, 86400, 20000000, "", 200000000, {}, {}, {}, X, 86400); - auto driver = server.AnnoyingClient->GetDriver(); - - auto writer1 = CreateSimpleWriter(*driver, SHORT_TOPIC_NAME, TStringBuilder() << "test source ID " << 0, {}, {}, true); - writer1->GetInitSeqNo(); - - bool res = writer1->Write("x", 1); - UNIT_ASSERT(res); - - Sleep(TDuration::Seconds(5)); - - auto writer2 = CreateSimpleWriter(*driver, SHORT_TOPIC_NAME, TStringBuilder() << "test source ID Del " << 0); - writer2->GetInitSeqNo(); - - res = writer2->Write("x", 1); - UNIT_ASSERT(res); - - Sleep(TDuration::Seconds(5)); - - res = writer1->Write("x", 2); - UNIT_ASSERT(res); - - Sleep(TDuration::Seconds(5)); - + auto driver = server.AnnoyingClient->GetDriver(); + + auto writer1 = CreateSimpleWriter(*driver, SHORT_TOPIC_NAME, TStringBuilder() << "test source ID " << 0, {}, {}, true); + writer1->GetInitSeqNo(); + + bool res = writer1->Write("x", 1); + UNIT_ASSERT(res); + + Sleep(TDuration::Seconds(5)); + + auto writer2 = CreateSimpleWriter(*driver, SHORT_TOPIC_NAME, TStringBuilder() << "test source ID Del " << 0); + writer2->GetInitSeqNo(); + + res = writer2->Write("x", 1); + UNIT_ASSERT(res); + + Sleep(TDuration::Seconds(5)); + + res = writer1->Write("x", 2); + UNIT_ASSERT(res); + + Sleep(TDuration::Seconds(5)); + for (ui32 nProducer=1; nProducer < X + SOURCEID_COUNT_DELETE_BATCH_SIZE + 1; ++nProducer) { - auto writer = CreateSimpleWriter(*driver, SHORT_TOPIC_NAME, TStringBuilder() << "test source ID " << nProducer); - - res = writer->Write("x", 1); - UNIT_ASSERT(res); - - UNIT_ASSERT(writer->IsAlive()); - - res = writer->Close(TDuration::Seconds(10)); - UNIT_ASSERT(res); - + auto writer = CreateSimpleWriter(*driver, SHORT_TOPIC_NAME, TStringBuilder() << "test source ID " << nProducer); + + res = writer->Write("x", 1); + UNIT_ASSERT(res); + + UNIT_ASSERT(writer->IsAlive()); + + res = writer->Close(TDuration::Seconds(10)); + UNIT_ASSERT(res); + } - res = writer1->Write("x", 3); - UNIT_ASSERT(res); - res = writer1->Close(TDuration::Seconds(5)); - UNIT_ASSERT(res); + res = writer1->Write("x", 3); + UNIT_ASSERT(res); + res = writer1->Close(TDuration::Seconds(5)); + UNIT_ASSERT(res); - res = writer2->Write("x", 4); - UNIT_ASSERT(res); + res = writer2->Write("x", 4); + UNIT_ASSERT(res); - UNIT_ASSERT(!writer2->Close()); + UNIT_ASSERT(!writer2->Close()); } Y_UNIT_TEST(EachMessageGetsExactlyOneAcknowledgementInCorrectOrder) { NPersQueue::TTestServer server; server.AnnoyingClient->CreateTopic("rt3.dc1--topic", 1); - auto driver = server.AnnoyingClient->GetDriver(); + auto driver = server.AnnoyingClient->GetDriver(); + + auto writer = CreateSimpleWriter(*driver, "topic", "test source ID"); - auto writer = CreateSimpleWriter(*driver, "topic", "test source ID"); + bool res = true; - bool res = true; - ui32 messageCount = 1000; for (ui32 sequenceNumber = 1; sequenceNumber <= messageCount; ++sequenceNumber) { - res = writer->Write("x", sequenceNumber); - UNIT_ASSERT(res); + res = writer->Write("x", sequenceNumber); + UNIT_ASSERT(res); } - UNIT_ASSERT(writer->IsAlive()); - res = writer->Close(TDuration::Seconds(10)); - UNIT_ASSERT(res); + UNIT_ASSERT(writer->IsAlive()); + res = writer->Close(TDuration::Seconds(10)); + UNIT_ASSERT(res); } Y_UNIT_TEST(SetupWriteSessionOnDisabledCluster) { TPersQueueV1TestServer server; SET_LOCALS; - + TPQDataWriter writer("source", *server.Server); - + pqClient->DisableDC(); - - Sleep(TDuration::Seconds(5)); + + Sleep(TDuration::Seconds(5)); writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue1"}, true); - } - + } + Y_UNIT_TEST(CloseActiveWriteSessionOnClusterDisable) { NPersQueue::TTestServer server; - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 10); - + server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY }); - - - TPQDataWriter writer2("source", server); - - auto driver = server.AnnoyingClient->GetDriver(); - - auto writer = CreateWriter(*driver, SHORT_TOPIC_NAME, "123", 0, "raw"); - - auto msg = writer->GetEvent(true); - UNIT_ASSERT(msg); // ReadyToAcceptEvent - - Cerr << DebugString(*msg) << "\n"; - + + + TPQDataWriter writer2("source", server); + + auto driver = server.AnnoyingClient->GetDriver(); + + auto writer = CreateWriter(*driver, SHORT_TOPIC_NAME, "123", 0, "raw"); + + auto msg = writer->GetEvent(true); + UNIT_ASSERT(msg); // ReadyToAcceptEvent + + Cerr << DebugString(*msg) << "\n"; + server.AnnoyingClient->DisableDC(); - - UNIT_ASSERT(writer->WaitEvent().Wait(TDuration::Seconds(30))); - msg = writer->GetEvent(true); - UNIT_ASSERT(msg); - - - Cerr << DebugString(*msg) << "\n"; - - auto ev = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*msg); - - UNIT_ASSERT(ev); - - Cerr << "is dead res: " << ev->DebugString() << "\n"; - UNIT_ASSERT_EQUAL(ev->GetIssues().back().GetCode(), Ydb::PersQueue::ErrorCode::CLUSTER_DISABLED); - } - - Y_UNIT_TEST(BadSids) { + + UNIT_ASSERT(writer->WaitEvent().Wait(TDuration::Seconds(30))); + msg = writer->GetEvent(true); + UNIT_ASSERT(msg); + + + Cerr << DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*msg); + + UNIT_ASSERT(ev); + + Cerr << "is dead res: " << ev->DebugString() << "\n"; + UNIT_ASSERT_EQUAL(ev->GetIssues().back().GetCode(), Ydb::PersQueue::ErrorCode::CLUSTER_DISABLED); + } + + Y_UNIT_TEST(BadSids) { NPersQueue::TTestServer server; server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 10); server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY }); - TPQDataWriter writer2("source", server); + TPQDataWriter writer2("source", server); TString topic = SHORT_TOPIC_NAME; - - auto driver = server.AnnoyingClient->GetDriver(); - - auto writer = CreateSimpleWriter(*driver, topic, "base64:a***"); - UNIT_ASSERT(!writer->Write("x")); - writer = CreateSimpleWriter(*driver, topic, "base64:aa=="); - UNIT_ASSERT(!writer->Write("x")); - writer = CreateSimpleWriter(*driver, topic, "base64:a"); - UNIT_ASSERT(!writer->Write("x")); - writer = CreateSimpleWriter(*driver, topic, "base64:aa"); - UNIT_ASSERT(writer->Write("x")); - UNIT_ASSERT(writer->Close()); - } - + + auto driver = server.AnnoyingClient->GetDriver(); + + auto writer = CreateSimpleWriter(*driver, topic, "base64:a***"); + UNIT_ASSERT(!writer->Write("x")); + writer = CreateSimpleWriter(*driver, topic, "base64:aa=="); + UNIT_ASSERT(!writer->Write("x")); + writer = CreateSimpleWriter(*driver, topic, "base64:a"); + UNIT_ASSERT(!writer->Write("x")); + writer = CreateSimpleWriter(*driver, topic, "base64:aa"); + UNIT_ASSERT(writer->Write("x")); + UNIT_ASSERT(writer->Close()); + } + Y_UNIT_TEST(ReadFromSeveralPartitions) { NPersQueue::TTestServer server; server.EnableLogs({ NKikimrServices::PQ_READ_PROXY, NKikimrServices::PQ_METACACHE }); - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 10); - + TPQDataWriter writer("source1", server); Cerr << "===Writer started\n"; - std::shared_ptr<grpc::Channel> Channel_; - std::unique_ptr<Ydb::PersQueue::V1::PersQueueService::Stub> StubP_; - + std::shared_ptr<grpc::Channel> Channel_; + std::unique_ptr<Ydb::PersQueue::V1::PersQueueService::Stub> StubP_; + Channel_ = grpc::CreateChannel("localhost:" + ToString(server.GrpcPort), grpc::InsecureChannelCredentials()); - StubP_ = Ydb::PersQueue::V1::PersQueueService::NewStub(Channel_); - - - //Write some data + StubP_ = Ydb::PersQueue::V1::PersQueueService::NewStub(Channel_); + + + //Write some data writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue1"}); - + TPQDataWriter writer2("source2", server); writer2.Write(SHORT_TOPIC_NAME, {"valuevaluevalue2"}); Cerr << "===Writer - writes done\n"; - - grpc::ClientContext rcontext; + + grpc::ClientContext rcontext; auto readStream = StubP_->MigrationStreamingRead(&rcontext); - UNIT_ASSERT(readStream); - - // init read session - { + UNIT_ASSERT(readStream); + + // init read session + { MigrationStreamingReadClientMessage req; MigrationStreamingReadServerMessage resp; - + req.mutable_init_request()->add_topics_read_settings()->set_topic(SHORT_TOPIC_NAME); - + req.mutable_init_request()->set_consumer("user"); req.mutable_init_request()->set_read_only_original(true); req.mutable_init_request()->mutable_read_params()->set_max_read_messages_count(1000); - - if (!readStream->Write(req)) { - ythrow yexception() << "write fail"; - } + + if (!readStream->Write(req)) { + ythrow yexception() << "write fail"; + } Cerr << "===Try to get read response\n"; - UNIT_ASSERT(readStream->Read(&resp)); + UNIT_ASSERT(readStream->Read(&resp)); Cerr << "Read server response: " << resp.ShortDebugString() << Endl; UNIT_ASSERT(resp.response_case() == MigrationStreamingReadServerMessage::kInitResponse); - - //send some reads - Sleep(TDuration::Seconds(5)); - for (ui32 i = 0; i < 10; ++i) { - req.Clear(); - req.mutable_read(); - - if (!readStream->Write(req)) { - ythrow yexception() << "write fail"; - } - } - } - - //check read results + + //send some reads + Sleep(TDuration::Seconds(5)); + for (ui32 i = 0; i < 10; ++i) { + req.Clear(); + req.mutable_read(); + + if (!readStream->Write(req)) { + ythrow yexception() << "write fail"; + } + } + } + + //check read results MigrationStreamingReadServerMessage resp; for (ui32 i = 0; i < 2;) { MigrationStreamingReadServerMessage resp; - UNIT_ASSERT(readStream->Read(&resp)); + UNIT_ASSERT(readStream->Read(&resp)); if (resp.response_case() == MigrationStreamingReadServerMessage::kAssigned) { auto assignId = resp.assigned().assign_id(); MigrationStreamingReadClientMessage req; @@ -593,19 +593,19 @@ namespace { UNIT_ASSERT_C(resp.response_case() == MigrationStreamingReadServerMessage::kDataBatch, resp); i += resp.data_batch().partition_data_size(); - } - } - - - void SetupReadSessionTest() { + } + } + + + void SetupReadSessionTest() { NPersQueue::TTestServer server; server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); - server.AnnoyingClient->CreateTopicNoLegacy("rt3.dc2--topic1", 2, true, false); - + server.AnnoyingClient->CreateTopicNoLegacy("rt3.dc2--topic1", 2, true, false); + TPQDataWriter writer("source1", server); - + writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue0"}); writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue1"}); writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue2"}); @@ -616,333 +616,333 @@ namespace { writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue7"}); writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue8"}); writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue9"}); - + writer.Read(SHORT_TOPIC_NAME, "user1", "", false, false); - } - + } + Y_UNIT_TEST(SetupReadSession) { - SetupReadSessionTest(); + SetupReadSessionTest(); } - - + + Y_UNIT_TEST(WriteExisting) { NPersQueue::TTestServer server; server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); - - { - THolder<NMsgBusProxy::TBusPersQueue> request = TRequestDescribePQ().GetRequest({}); - - NKikimrClient::TResponse response; - + + { + THolder<NMsgBusProxy::TBusPersQueue> request = TRequestDescribePQ().GetRequest({}); + + NKikimrClient::TResponse response; + auto channel = grpc::CreateChannel("localhost:"+ToString(server.GrpcPort), grpc::InsecureChannelCredentials()); - auto stub(NKikimrClient::TGRpcServer::NewStub(channel)); - grpc::ClientContext context; - auto status = stub->PersQueueRequest(&context, request->Record, &response); - - UNIT_ASSERT(status.ok()); - } - + auto stub(NKikimrClient::TGRpcServer::NewStub(channel)); + grpc::ClientContext context; + auto status = stub->PersQueueRequest(&context, request->Record, &response); + + UNIT_ASSERT(status.ok()); + } + server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "abacaba", 1, "valuevaluevalue1", "", ETransport::GRpc); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "abacaba", 2, "valuevaluevalue1", "", ETransport::GRpc); - } - + } + Y_UNIT_TEST(WriteExistingBigValue) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root").SetNodeCount(2)); server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2, 8*1024*1024, 86400, 100000); - - - TInstant now(Now()); - + + + TInstant now(Now()); + server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "abacaba", 1, TString(1000000, 'a')); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "abacaba", 2, TString(1, 'a')); - UNIT_ASSERT(TInstant::Now() - now > TDuration::MilliSeconds(5990)); //speed limit is 200kb/s and burst is 200kb, so to write 1mb it will take at least 4 seconds - } - + UNIT_ASSERT(TInstant::Now() - now > TDuration::MilliSeconds(5990)); //speed limit is 200kb/s and burst is 200kb, so to write 1mb it will take at least 4 seconds + } + Y_UNIT_TEST(WriteEmptyData) { NPersQueue::TTestServer server{PQSettings(0).SetDomainName("Root").SetNodeCount(2)}; - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); - + server.EnableLogs({ NKikimrServices::PERSQUEUE }); - + // empty data and sourceId server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "", 1, "", "", ETransport::MsgBus, NMsgBusProxy::MSTATUS_ERROR); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "a", 1, "", "", ETransport::MsgBus, NMsgBusProxy::MSTATUS_ERROR); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "", 1, "a", "", ETransport::MsgBus, NMsgBusProxy::MSTATUS_ERROR); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "a", 1, "a", "", ETransport::MsgBus, NMsgBusProxy::MSTATUS_OK); - } - - + } + + Y_UNIT_TEST(WriteNonExistingPartition) { NPersQueue::TTestServer server{PQSettings(0).SetDomainName("Root").SetNodeCount(2)}; server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); - + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); - + server.AnnoyingClient->WriteToPQ( DEFAULT_TOPIC_NAME, 100500, "abacaba", 1, "valuevaluevalue1", "", ETransport::MsgBus, NMsgBusProxy::MSTATUS_ERROR, NMsgBusProxy::MSTATUS_ERROR ); - } - + } + Y_UNIT_TEST(WriteNonExistingTopic) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root").SetNodeCount(2)); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); - + server.AnnoyingClient->WriteToPQ( DEFAULT_TOPIC_NAME + "000", 1, "abacaba", 1, "valuevaluevalue1", "", ETransport::MsgBus, NMsgBusProxy::MSTATUS_ERROR, NMsgBusProxy::MSTATUS_ERROR ); - } - + } + Y_UNIT_TEST(SchemeshardRestart) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root").SetNodeCount(1)); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); TString secondTopic = "rt3.dc1--topic2"; server.AnnoyingClient->CreateTopic(secondTopic, 2); - - // force topic1 into cache and establish pipe from cache to schemeshard + + // force topic1 into cache and establish pipe from cache to schemeshard server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 1, "abacaba", 1, "valuevaluevalue1"); - + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, - NKikimrServices::PERSQUEUE, - NKikimrServices::PQ_METACACHE }); - + NKikimrServices::PERSQUEUE, + NKikimrServices::PQ_METACACHE }); + server.AnnoyingClient->RestartSchemeshard(server.CleverServer->GetRuntime()); - + server.AnnoyingClient->WriteToPQ(secondTopic, 1, "abacaba", 1, "valuevaluevalue1"); - } - + } + Y_UNIT_TEST(WriteAfterAlter) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root").SetNodeCount(2)); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); - - + + server.AnnoyingClient->WriteToPQ( DEFAULT_TOPIC_NAME, 5, "abacaba", 1, "valuevaluevalue1", "", ETransport::MsgBus, NMsgBusProxy::MSTATUS_ERROR, NMsgBusProxy::MSTATUS_ERROR ); - + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, - NKikimrServices::PERSQUEUE, - NKikimrServices::PQ_METACACHE }); - + NKikimrServices::PERSQUEUE, + NKikimrServices::PQ_METACACHE }); + server.AnnoyingClient->AlterTopic(DEFAULT_TOPIC_NAME, 10); - Sleep(TDuration::Seconds(1)); + Sleep(TDuration::Seconds(1)); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 5, "abacaba", 1, "valuevaluevalue1"); server.AnnoyingClient->WriteToPQ( DEFAULT_TOPIC_NAME, 15, "abacaba", 1, "valuevaluevalue1", "", ETransport::MsgBus, NMsgBusProxy::MSTATUS_ERROR, NMsgBusProxy::MSTATUS_ERROR ); - + server.AnnoyingClient->AlterTopic(DEFAULT_TOPIC_NAME, 20); - Sleep(TDuration::Seconds(1)); + Sleep(TDuration::Seconds(1)); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 5, "abacaba", 1, "valuevaluevalue1"); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 15, "abacaba", 1, "valuevaluevalue1"); - } - + } + Y_UNIT_TEST(Delete) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root").SetNodeCount(2)); - + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE}); - - // Delete non-existing + + // Delete non-existing server.AnnoyingClient->DeleteTopic2(DEFAULT_TOPIC_NAME, NPersQueue::NErrorCode::UNKNOWN_TOPIC); - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); - - // Delete existing + + // Delete existing server.AnnoyingClient->DeleteTopic2(DEFAULT_TOPIC_NAME); - - // Double delete - "What Is Dead May Never Die" + + // Double delete - "What Is Dead May Never Die" server.AnnoyingClient->DeleteTopic2(DEFAULT_TOPIC_NAME, NPersQueue::NErrorCode::UNKNOWN_TOPIC); - - // Resurrect deleted topic + + // Resurrect deleted topic server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); server.AnnoyingClient->DeleteTopic2(DEFAULT_TOPIC_NAME); - } - - + } + + Y_UNIT_TEST(BigRead) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root")); - server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1, 8*1024*1024, 86400, 20000000, "user1", 2000000); - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1, 8*1024*1024, 86400, 20000000, "user1", 2000000); + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); - - TString value(1024*1024, 'x'); - for (ui32 i = 0; i < 32; ++i) + + TString value(1024*1024, 'x'); + for (ui32 i = 0; i < 32; ++i) server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 0, "source1", i}, value); - - // trying to read small PQ messages in a big messagebus event + + // trying to read small PQ messages in a big messagebus event auto info = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 0, 32, "user1"}, 23, "", NMsgBusProxy::MSTATUS_OK); //will read 21mb - UNIT_ASSERT_VALUES_EQUAL(info.BlobsFromDisk, 0); - UNIT_ASSERT_VALUES_EQUAL(info.BlobsFromCache, 4); - - TInstant now(TInstant::Now()); + UNIT_ASSERT_VALUES_EQUAL(info.BlobsFromDisk, 0); + UNIT_ASSERT_VALUES_EQUAL(info.BlobsFromCache, 4); + + TInstant now(TInstant::Now()); info = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 0, 32, "user1"}, 23, "", NMsgBusProxy::MSTATUS_OK); //will read 21mb - TDuration dur = TInstant::Now() - now; - UNIT_ASSERT_C(dur > TDuration::Seconds(7) && dur < TDuration::Seconds(20), "dur = " << dur); //speed limit is 2000kb/s and burst is 2000kb, so to read 24mb it will take at least 11 seconds - + TDuration dur = TInstant::Now() - now; + UNIT_ASSERT_C(dur > TDuration::Seconds(7) && dur < TDuration::Seconds(20), "dur = " << dur); //speed limit is 2000kb/s and burst is 2000kb, so to read 24mb it will take at least 11 seconds + server.AnnoyingClient->GetPartStatus({}, 1, true); - - } - - - // expects that L2 size is 32Mb + + } + + + // expects that L2 size is 32Mb Y_UNIT_TEST(Cache) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root")); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1, 8*1024*1024, 86400); - + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); - - TString value(1024*1024, 'x'); - for (ui32 i = 0; i < 32; ++i) + + TString value(1024*1024, 'x'); + for (ui32 i = 0; i < 32; ++i) server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 0, "source1", i}, value); - + auto info0 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 0, 16, "user1"}, 16); auto info16 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 16, 16, "user1"}, 16); - - UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromCache, 3); - UNIT_ASSERT_VALUES_EQUAL(info16.BlobsFromCache, 2); - UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromDisk + info16.BlobsFromDisk, 0); - - for (ui32 i = 0; i < 8; ++i) + + UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromCache, 3); + UNIT_ASSERT_VALUES_EQUAL(info16.BlobsFromCache, 2); + UNIT_ASSERT_VALUES_EQUAL(info0.BlobsFromDisk + info16.BlobsFromDisk, 0); + + for (ui32 i = 0; i < 8; ++i) server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 0, "source1", 32+i}, value); - + info0 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 0, 16, "user1"}, 16); info16 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 16, 16, "user1"}, 16); - - ui32 fromDisk = info0.BlobsFromDisk + info16.BlobsFromDisk; - ui32 fromCache = info0.BlobsFromCache + info16.BlobsFromCache; + + ui32 fromDisk = info0.BlobsFromDisk + info16.BlobsFromDisk; + ui32 fromCache = info0.BlobsFromCache + info16.BlobsFromCache; UNIT_ASSERT(fromDisk > 0); - UNIT_ASSERT(fromDisk < 5); + UNIT_ASSERT(fromDisk < 5); UNIT_ASSERT(fromCache > 0); - UNIT_ASSERT(fromCache < 5); - } - + UNIT_ASSERT(fromCache < 5); + } + Y_UNIT_TEST(CacheHead) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root")); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1, 6*1024*1024, 86400); - + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); - - ui64 seqNo = 0; - for (ui32 blobSizeKB = 256; blobSizeKB < 4096; blobSizeKB *= 2) { - static const ui32 maxEventKB = 24*1024; - ui32 blobSize = blobSizeKB * 1024; - ui32 count = maxEventKB / blobSizeKB; - count -= count%2; - ui32 half = count/2; - - ui64 offset = seqNo; - TString value(blobSize, 'a'); - for (ui32 i = 0; i < count; ++i) + + ui64 seqNo = 0; + for (ui32 blobSizeKB = 256; blobSizeKB < 4096; blobSizeKB *= 2) { + static const ui32 maxEventKB = 24*1024; + ui32 blobSize = blobSizeKB * 1024; + ui32 count = maxEventKB / blobSizeKB; + count -= count%2; + ui32 half = count/2; + + ui64 offset = seqNo; + TString value(blobSize, 'a'); + for (ui32 i = 0; i < count; ++i) server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 0, "source1", seqNo++}, value); - + auto info_half1 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, offset, half, "user1"}, half); auto info_half2 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, offset, half, "user1"}, half); - - UNIT_ASSERT(info_half1.BlobsFromCache > 0); - UNIT_ASSERT(info_half2.BlobsFromCache > 0); - UNIT_ASSERT_VALUES_EQUAL(info_half1.BlobsFromDisk, 0); - UNIT_ASSERT_VALUES_EQUAL(info_half2.BlobsFromDisk, 0); - } - } - + + UNIT_ASSERT(info_half1.BlobsFromCache > 0); + UNIT_ASSERT(info_half2.BlobsFromCache > 0); + UNIT_ASSERT_VALUES_EQUAL(info_half1.BlobsFromDisk, 0); + UNIT_ASSERT_VALUES_EQUAL(info_half2.BlobsFromDisk, 0); + } + } + Y_UNIT_TEST(SameOffset) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root")); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1, 6*1024*1024, 86400); TString secondTopic = DEFAULT_TOPIC_NAME + "2"; server.AnnoyingClient->CreateTopic(secondTopic, 1, 6*1024*1024, 86400); - + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); - - ui32 valueSize = 128; - TString value1(valueSize, 'a'); - TString value2(valueSize, 'b'); + + ui32 valueSize = 128; + TString value1(valueSize, 'a'); + TString value2(valueSize, 'b'); server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 0, "source1", 0}, value1); server.AnnoyingClient->WriteToPQ({secondTopic, 0, "source1", 0}, value2); - - // avoid reading from head - TString mb(1024*1024, 'x'); - for (ui32 i = 1; i < 16; ++i) { + + // avoid reading from head + TString mb(1024*1024, 'x'); + for (ui32 i = 1; i < 16; ++i) { server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 0, "source1", i}, mb); server.AnnoyingClient->WriteToPQ({secondTopic, 0, "source1", i}, mb); - } - + } + auto info1 = server.AnnoyingClient->ReadFromPQ({DEFAULT_TOPIC_NAME, 0, 0, 1, "user1"}, 1); auto info2 = server.AnnoyingClient->ReadFromPQ({secondTopic, 0, 0, 1, "user1"}, 1); - - UNIT_ASSERT_VALUES_EQUAL(info1.BlobsFromCache, 1); - UNIT_ASSERT_VALUES_EQUAL(info2.BlobsFromCache, 1); - UNIT_ASSERT_VALUES_EQUAL(info1.Values.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(info2.Values.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(info1.Values[0].size(), valueSize); - UNIT_ASSERT_VALUES_EQUAL(info2.Values[0].size(), valueSize); - UNIT_ASSERT(info1.Values[0] == value1); - UNIT_ASSERT(info2.Values[0] == value2); - } - - + + UNIT_ASSERT_VALUES_EQUAL(info1.BlobsFromCache, 1); + UNIT_ASSERT_VALUES_EQUAL(info2.BlobsFromCache, 1); + UNIT_ASSERT_VALUES_EQUAL(info1.Values.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(info2.Values.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(info1.Values[0].size(), valueSize); + UNIT_ASSERT_VALUES_EQUAL(info2.Values[0].size(), valueSize); + UNIT_ASSERT(info1.Values[0] == value1); + UNIT_ASSERT(info2.Values[0] == value2); + } + + Y_UNIT_TEST(FetchRequest) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root")); TString secondTopic = DEFAULT_TOPIC_NAME + "2"; - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 10); server.AnnoyingClient->CreateTopic(secondTopic, 10); - - ui32 valueSize = 128; - TString value1(valueSize, 'a'); - TString value2(valueSize, 'b'); + + ui32 valueSize = 128; + TString value1(valueSize, 'a'); + TString value2(valueSize, 'b'); server.AnnoyingClient->WriteToPQ({secondTopic, 5, "source1", 0}, value2); server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 1, "source1", 0}, value1); server.AnnoyingClient->WriteToPQ({DEFAULT_TOPIC_NAME, 1, "source1", 1}, value2); - + server.EnableLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD, NKikimrServices::PERSQUEUE }); - TInstant tm(TInstant::Now()); + TInstant tm(TInstant::Now()); server.AnnoyingClient->FetchRequestPQ({{secondTopic, 5, 0, 400},{DEFAULT_TOPIC_NAME, 1, 0, 400},{DEFAULT_TOPIC_NAME, 3, 0, 400}}, 400, 1000000); - UNIT_ASSERT((TInstant::Now() - tm).Seconds() < 1); - tm = TInstant::Now(); + UNIT_ASSERT((TInstant::Now() - tm).Seconds() < 1); + tm = TInstant::Now(); server.AnnoyingClient->FetchRequestPQ({{secondTopic, 5, 1, 400}}, 400, 5000); - UNIT_ASSERT((TInstant::Now() - tm).Seconds() > 2); + UNIT_ASSERT((TInstant::Now() - tm).Seconds() > 2); server.AnnoyingClient->FetchRequestPQ({{secondTopic, 5, 0, 400},{DEFAULT_TOPIC_NAME, 1, 0, 400},{DEFAULT_TOPIC_NAME, 3, 0, 400}}, 1, 1000000); server.AnnoyingClient->FetchRequestPQ({{secondTopic, 5, 500, 400},{secondTopic, 4, 0, 400},{DEFAULT_TOPIC_NAME, 1, 0, 400}}, 400, 1000000); - } - + } + Y_UNIT_TEST(Init) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root").SetNodeCount(2)); - if (!true) { + if (!true) { server.EnableLogs( { - NKikimrServices::FLAT_TX_SCHEMESHARD, - NKikimrServices::TX_DATASHARD, - NKikimrServices::HIVE, - NKikimrServices::PERSQUEUE, - NKikimrServices::TABLET_MAIN, - NKikimrServices::BS_PROXY_DISCOVER, - NKikimrServices::PIPE_CLIENT, - NKikimrServices::PQ_METACACHE }); - } - + NKikimrServices::FLAT_TX_SCHEMESHARD, + NKikimrServices::TX_DATASHARD, + NKikimrServices::HIVE, + NKikimrServices::PERSQUEUE, + NKikimrServices::TABLET_MAIN, + NKikimrServices::BS_PROXY_DISCOVER, + NKikimrServices::PIPE_CLIENT, + NKikimrServices::PQ_METACACHE }); + } + server.AnnoyingClient->DescribeTopic({}); server.AnnoyingClient->TestCase({}, 0, 0, true); - + server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 10); server.AnnoyingClient->AlterTopic(DEFAULT_TOPIC_NAME, 20); TString secondTopic = DEFAULT_TOPIC_NAME + "2"; TString thirdTopic = DEFAULT_TOPIC_NAME + "3"; server.AnnoyingClient->CreateTopic(secondTopic, 25); - + server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 5, "abacaba", 1, "valuevaluevalue1"); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 5, "abacaba", 2, "valuevaluevalue2"); server.AnnoyingClient->WriteToPQ(DEFAULT_TOPIC_NAME, 5, "abacabae", 1, "valuevaluevalue3"); server.AnnoyingClient->ReadFromPQ(DEFAULT_TOPIC_NAME, 5, 0, 10, 3); - + server.AnnoyingClient->SetClientOffsetPQ(DEFAULT_TOPIC_NAME, 5, 2); - + server.AnnoyingClient->TestCase({{DEFAULT_TOPIC_NAME, {5}}}, 1, 1, true); server.AnnoyingClient->TestCase({{DEFAULT_TOPIC_NAME, {0}}}, 1, 0, true); server.AnnoyingClient->TestCase({{DEFAULT_TOPIC_NAME, {}}}, 20, 1, true); @@ -953,188 +953,188 @@ namespace { server.AnnoyingClient->TestCase({{DEFAULT_TOPIC_NAME, {}}, {thirdTopic, {}}}, 0, 0, false); server.AnnoyingClient->TestCase({{DEFAULT_TOPIC_NAME, {}}, {secondTopic, {}}}, 45, 1, true); server.AnnoyingClient->TestCase({{DEFAULT_TOPIC_NAME, {0, 3, 5}}, {secondTopic, {1, 4, 6, 8}}}, 7, 1, true); - + server.AnnoyingClient->DescribeTopic({DEFAULT_TOPIC_NAME}); server.AnnoyingClient->DescribeTopic({secondTopic}); server.AnnoyingClient->DescribeTopic({secondTopic, DEFAULT_TOPIC_NAME}); server.AnnoyingClient->DescribeTopic({}); server.AnnoyingClient->DescribeTopic({thirdTopic}, true); - } - + } + void WaitResolveSuccess(TFlatMsgBusPQClient& annoyingClient, TString topic, ui32 numParts) { - const TInstant start = TInstant::Now(); - while (true) { - TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); - auto req = request->Record.MutableMetaRequest(); - auto partOff = req->MutableCmdGetPartitionLocations(); - auto treq = partOff->AddTopicRequest(); - treq->SetTopic(topic); - for (ui32 i = 0; i < numParts; ++i) - treq->AddPartition(i); - - TAutoPtr<NBus::TBusMessage> reply; - NBus::EMessageStatus status = annoyingClient.SyncCall(request, reply); - UNIT_ASSERT_VALUES_EQUAL(status, NBus::MESSAGE_OK); - const NMsgBusProxy::TBusResponse* response = dynamic_cast<NMsgBusProxy::TBusResponse*>(reply.Get()); - UNIT_ASSERT(response); - if (response->Record.GetStatus() == NMsgBusProxy::MSTATUS_OK) - break; - UNIT_ASSERT(TInstant::Now() - start < ::DEFAULT_DISPATCH_TIMEOUT); - Sleep(TDuration::MilliSeconds(10)); - } - } - + const TInstant start = TInstant::Now(); + while (true) { + TAutoPtr<NMsgBusProxy::TBusPersQueue> request(new NMsgBusProxy::TBusPersQueue); + auto req = request->Record.MutableMetaRequest(); + auto partOff = req->MutableCmdGetPartitionLocations(); + auto treq = partOff->AddTopicRequest(); + treq->SetTopic(topic); + for (ui32 i = 0; i < numParts; ++i) + treq->AddPartition(i); + + TAutoPtr<NBus::TBusMessage> reply; + NBus::EMessageStatus status = annoyingClient.SyncCall(request, reply); + UNIT_ASSERT_VALUES_EQUAL(status, NBus::MESSAGE_OK); + const NMsgBusProxy::TBusResponse* response = dynamic_cast<NMsgBusProxy::TBusResponse*>(reply.Get()); + UNIT_ASSERT(response); + if (response->Record.GetStatus() == NMsgBusProxy::MSTATUS_OK) + break; + UNIT_ASSERT(TInstant::Now() - start < ::DEFAULT_DISPATCH_TIMEOUT); + Sleep(TDuration::MilliSeconds(10)); + } + } + Y_UNIT_TEST(WhenDisableNodeAndCreateTopic_ThenAllPartitionsAreOnOtherNode) { NPersQueue::TTestServer server(PQSettings(0).SetDomainName("Root").SetNodeCount(2)); server.EnableLogs({ NKikimrServices::PERSQUEUE, NKikimrServices::HIVE }); TString unusedTopic = "rt3.dc1--unusedtopic"; server.AnnoyingClient->CreateTopic(unusedTopic, 1); WaitResolveSuccess(*server.AnnoyingClient, unusedTopic, 1); - - // Act - // Disable node #0 + + // Act + // Disable node #0 server.AnnoyingClient->MarkNodeInHive(server.CleverServer->GetRuntime(), 0, false); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 3); WaitResolveSuccess(*server.AnnoyingClient, DEFAULT_TOPIC_NAME, 3); - - // Assert that all partitions are on node #1 + + // Assert that all partitions are on node #1 const ui32 node1Id = server.CleverServer->GetRuntime()->GetNodeId(1); - UNIT_ASSERT_VALUES_EQUAL( + UNIT_ASSERT_VALUES_EQUAL( server.AnnoyingClient->GetPartLocation({{DEFAULT_TOPIC_NAME, {0, 1}}}, 2, true), - TVector<ui32>({node1Id, node1Id}) - ); - } - + TVector<ui32>({node1Id, node1Id}) + ); + } + void PrepareForGrpc(NPersQueue::TTestServer& server) { server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 2); - } - - class TTestCredentialsProvider : public NYdb::ICredentialsProvider { - public: - - TTestCredentialsProvider(const NYdb::TStringType& token) - : Token(token) - {} - - virtual ~TTestCredentialsProvider() - {} - - NYdb::TStringType GetAuthInfo() const override { - return Token; - } - - void SetToken(const NYdb::TStringType& token) { - Token = token; - } - bool IsValid() const override { - return true; - } - - NYdb::TStringType Token; - }; - - class TTestCredentialsProviderFactory : public NYdb::ICredentialsProviderFactory { - public: - TTestCredentialsProviderFactory(const NYdb::TStringType& token) - : CredentialsProvider(new TTestCredentialsProvider(token)) - {} - - TTestCredentialsProviderFactory(const TTestCredentialsProviderFactory&) = delete; - TTestCredentialsProviderFactory& operator = (const TTestCredentialsProviderFactory&) = delete; - - virtual ~TTestCredentialsProviderFactory() - {} - - std::shared_ptr<NYdb::ICredentialsProvider> CreateProvider() const override { - return CredentialsProvider; - } - - NYdb::TStringType GetClientIdentity() const override { - return CreateGuidAsString(); - } - - void SetToken(const NYdb::TStringType& token) { - CredentialsProvider->SetToken(token); - } - private: - std::shared_ptr<TTestCredentialsProvider> CredentialsProvider; - - - }; - - + } + + class TTestCredentialsProvider : public NYdb::ICredentialsProvider { + public: + + TTestCredentialsProvider(const NYdb::TStringType& token) + : Token(token) + {} + + virtual ~TTestCredentialsProvider() + {} + + NYdb::TStringType GetAuthInfo() const override { + return Token; + } + + void SetToken(const NYdb::TStringType& token) { + Token = token; + } + bool IsValid() const override { + return true; + } + + NYdb::TStringType Token; + }; + + class TTestCredentialsProviderFactory : public NYdb::ICredentialsProviderFactory { + public: + TTestCredentialsProviderFactory(const NYdb::TStringType& token) + : CredentialsProvider(new TTestCredentialsProvider(token)) + {} + + TTestCredentialsProviderFactory(const TTestCredentialsProviderFactory&) = delete; + TTestCredentialsProviderFactory& operator = (const TTestCredentialsProviderFactory&) = delete; + + virtual ~TTestCredentialsProviderFactory() + {} + + std::shared_ptr<NYdb::ICredentialsProvider> CreateProvider() const override { + return CredentialsProvider; + } + + NYdb::TStringType GetClientIdentity() const override { + return CreateGuidAsString(); + } + + void SetToken(const NYdb::TStringType& token) { + CredentialsProvider->SetToken(token); + } + private: + std::shared_ptr<TTestCredentialsProvider> CredentialsProvider; + + + }; + + Y_UNIT_TEST(CheckACLForGrpcWrite) { NPersQueue::TTestServer server(PQSettings(0, 1)); server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY }); PrepareForGrpc(server); - + TPQDataWriter writer("source1", server); TPQDataWriter writer2("source1", server); - + server.CleverServer->GetRuntime()->GetAppData().PQConfig.SetRequireCredentialsInNewProtocol(true); - + writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue1"}, true, TString()); // Fail if user set empty token writer.Write(SHORT_TOPIC_NAME, {"valuevaluevalue1"}, true, "topic1@" BUILTIN_ACL_DOMAIN); - - NACLib::TDiffACL acl; - acl.AddAccess(NACLib::EAccessType::Allow, NACLib::UpdateRow, "topic1@" BUILTIN_ACL_DOMAIN); + + NACLib::TDiffACL acl; + acl.AddAccess(NACLib::EAccessType::Allow, NACLib::UpdateRow, "topic1@" BUILTIN_ACL_DOMAIN); server.AnnoyingClient->ModifyACL("/Root/PQ", DEFAULT_TOPIC_NAME, acl.SerializeAsString()); WaitACLModification(); - + writer2.Write(SHORT_TOPIC_NAME, {"valuevaluevalue1"}, false, "topic1@" BUILTIN_ACL_DOMAIN); writer2.Write(SHORT_TOPIC_NAME, {"valuevaluevalue1"}, true, "invalid_ticket"); - - auto driver = server.AnnoyingClient->GetDriver(); - - - for (ui32 i = 0; i < 2; ++i) { - std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = std::make_shared<TTestCredentialsProviderFactory>(NYdb::TStringType("topic1@" BUILTIN_ACL_DOMAIN)); - dynamic_cast<TTestCredentialsProviderFactory*>(creds.get())->SetToken(NYdb::TStringType("topic1@" BUILTIN_ACL_DOMAIN)); - - auto writer = CreateWriter(*driver, SHORT_TOPIC_NAME, "123", {}, {}, {}, creds); - - auto msg = writer->GetEvent(true); - UNIT_ASSERT(msg); // ReadyToAcceptEvent - - Cerr << DebugString(*msg) << "\n"; - - auto ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); - UNIT_ASSERT(ev); - - writer->Write(std::move(ev->ContinuationToken), "a"); - - msg = writer->GetEvent(true); - UNIT_ASSERT(msg); - Cerr << DebugString(*msg) << "\n"; - ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); - UNIT_ASSERT(ev); - - msg = writer->GetEvent(true); - UNIT_ASSERT(msg); - Cerr << DebugString(*msg) << "\n"; - auto ack = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TAcksEvent>(&*msg); - UNIT_ASSERT(ack); - - NYdb::TStringType token = i == 0 ? "user_without_rights@" BUILTIN_ACL_DOMAIN : "invalid_ticket"; - Cerr << "Set token " << token << "\n"; - - dynamic_cast<TTestCredentialsProviderFactory*>(creds.get())->SetToken(token); - - writer->Write(std::move(ev->ContinuationToken), "a"); - ui32 events = 0; - while(true) { - UNIT_ASSERT(writer->WaitEvent().Wait(TDuration::Seconds(10))); - msg = writer->GetEvent(true); - UNIT_ASSERT(msg); - Cerr << DebugString(*msg) << "\n"; - if (std::holds_alternative<NYdb::NPersQueue::TSessionClosedEvent>(*msg)) - break; - UNIT_ASSERT(++events <= 2); // Before close only one ack and one ready-to-accept can be received - } - } - } - - + + auto driver = server.AnnoyingClient->GetDriver(); + + + for (ui32 i = 0; i < 2; ++i) { + std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = std::make_shared<TTestCredentialsProviderFactory>(NYdb::TStringType("topic1@" BUILTIN_ACL_DOMAIN)); + dynamic_cast<TTestCredentialsProviderFactory*>(creds.get())->SetToken(NYdb::TStringType("topic1@" BUILTIN_ACL_DOMAIN)); + + auto writer = CreateWriter(*driver, SHORT_TOPIC_NAME, "123", {}, {}, {}, creds); + + auto msg = writer->GetEvent(true); + UNIT_ASSERT(msg); // ReadyToAcceptEvent + + Cerr << DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); + UNIT_ASSERT(ev); + + writer->Write(std::move(ev->ContinuationToken), "a"); + + msg = writer->GetEvent(true); + UNIT_ASSERT(msg); + Cerr << DebugString(*msg) << "\n"; + ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); + UNIT_ASSERT(ev); + + msg = writer->GetEvent(true); + UNIT_ASSERT(msg); + Cerr << DebugString(*msg) << "\n"; + auto ack = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TAcksEvent>(&*msg); + UNIT_ASSERT(ack); + + NYdb::TStringType token = i == 0 ? "user_without_rights@" BUILTIN_ACL_DOMAIN : "invalid_ticket"; + Cerr << "Set token " << token << "\n"; + + dynamic_cast<TTestCredentialsProviderFactory*>(creds.get())->SetToken(token); + + writer->Write(std::move(ev->ContinuationToken), "a"); + ui32 events = 0; + while(true) { + UNIT_ASSERT(writer->WaitEvent().Wait(TDuration::Seconds(10))); + msg = writer->GetEvent(true); + UNIT_ASSERT(msg); + Cerr << DebugString(*msg) << "\n"; + if (std::holds_alternative<NYdb::NPersQueue::TSessionClosedEvent>(*msg)) + break; + UNIT_ASSERT(++events <= 2); // Before close only one ack and one ready-to-accept can be received + } + } + } + + Y_UNIT_TEST(CheckACLForGrpcRead) { NPersQueue::TTestServer server(PQSettings(0, 1)); server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); @@ -1142,7 +1142,7 @@ namespace { TString topic2 = DEFAULT_TOPIC_NAME + "2"; TString shortTopic2Name = "topic12"; PrepareForGrpc(server); - + server.AnnoyingClient->CreateTopic(topic2, 1, 8*1024*1024, 86400, 20000000, "", 200000000, {"user1", "user2"}); server.WaitInit(shortTopic2Name); server.AnnoyingClient->CreateConsumer("user1"); @@ -1150,286 +1150,286 @@ namespace { server.AnnoyingClient->CreateConsumer("user5"); server.AnnoyingClient->GrantConsumerAccess("user1", "user2@" BUILTIN_ACL_DOMAIN); server.AnnoyingClient->GrantConsumerAccess("user1", "user3@" BUILTIN_ACL_DOMAIN); - + server.AnnoyingClient->GrantConsumerAccess("user1", "1@" BUILTIN_ACL_DOMAIN); server.AnnoyingClient->GrantConsumerAccess("user2", "2@" BUILTIN_ACL_DOMAIN); server.AnnoyingClient->GrantConsumerAccess("user5", "1@" BUILTIN_ACL_DOMAIN); server.AnnoyingClient->GrantConsumerAccess("user5", "2@" BUILTIN_ACL_DOMAIN); Cerr << "=== Create writer\n"; TPQDataWriter writer("source1", server); - + server.CleverServer->GetRuntime()->GetAppData().PQConfig.SetRequireCredentialsInNewProtocol(true); - - NACLib::TDiffACL acl; + + NACLib::TDiffACL acl; acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "1@" BUILTIN_ACL_DOMAIN); acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "2@" BUILTIN_ACL_DOMAIN); - acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "user1@" BUILTIN_ACL_DOMAIN); - acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "user2@" BUILTIN_ACL_DOMAIN); + acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "user1@" BUILTIN_ACL_DOMAIN); + acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "user2@" BUILTIN_ACL_DOMAIN); server.AnnoyingClient->ModifyACL("/Root/PQ", topic2, acl.SerializeAsString()); WaitACLModification(); - + auto ticket1 = "1@" BUILTIN_ACL_DOMAIN; auto ticket2 = "2@" BUILTIN_ACL_DOMAIN; - + Cerr << "=== Writer - do reads\n"; writer.Read(shortTopic2Name, "user1", ticket1, false, false, true); - + writer.Read(shortTopic2Name, "user1", "user2@" BUILTIN_ACL_DOMAIN, false, false, true); writer.Read(shortTopic2Name, "user1", "user3@" BUILTIN_ACL_DOMAIN, true, false, true); //for topic writer.Read(shortTopic2Name, "user1", "user1@" BUILTIN_ACL_DOMAIN, true, false, true); //for consumer writer.Read(shortTopic2Name, "user2", ticket1, true, false, true); writer.Read(shortTopic2Name, "user2", ticket2, false, false, true); - + writer.Read(shortTopic2Name, "user5", ticket1, true, false, true); writer.Read(shortTopic2Name, "user5", ticket2, true, false, true); - - acl.Clear(); - acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "user3@" BUILTIN_ACL_DOMAIN); + + acl.Clear(); + acl.AddAccess(NACLib::EAccessType::Allow, NACLib::SelectRow, "user3@" BUILTIN_ACL_DOMAIN); server.AnnoyingClient->ModifyACL("/Root/PQ", topic2, acl.SerializeAsString()); WaitACLModification(); - + Cerr << "==== Writer - read\n"; writer.Read(shortTopic2Name, "user1", "user3@" BUILTIN_ACL_DOMAIN, false, true, true); - + auto Channel_ = grpc::CreateChannel("localhost:" + ToString(server.GrpcPort), grpc::InsecureChannelCredentials()); - auto StubP_ = Ydb::PersQueue::V1::PersQueueService::NewStub(Channel_); - -/* auto driver = server.AnnoyingClient->GetDriver(); - + auto StubP_ = Ydb::PersQueue::V1::PersQueueService::NewStub(Channel_); + +/* auto driver = server.AnnoyingClient->GetDriver(); + Cerr << "==== Start consuming loop\n"; - for (ui32 i = 0; i < 2; ++i) { - - std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = std::make_shared<TTestCredentialsProviderFactory>(NYdb::TStringType("user3@" BUILTIN_ACL_DOMAIN)); - - NYdb::NPersQueue::TReadSessionSettings settings; - settings.ConsumerName("user1").AppendTopics(shortTopic2Name).ReadOriginal({"dc1"}); - auto reader = CreateReader(*driver, settings, creds); - - auto msg = reader->GetEvent(true, 1); - UNIT_ASSERT(msg); - - Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; - - auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); - - UNIT_ASSERT(ev); - dynamic_cast<TTestCredentialsProviderFactory*>(creds.get())->SetToken(i == 0 ? "user_without_rights@" BUILTIN_ACL_DOMAIN : "invalid_ticket"); - - ev->Confirm(); - + for (ui32 i = 0; i < 2; ++i) { + + std::shared_ptr<NYdb::ICredentialsProviderFactory> creds = std::make_shared<TTestCredentialsProviderFactory>(NYdb::TStringType("user3@" BUILTIN_ACL_DOMAIN)); + + NYdb::NPersQueue::TReadSessionSettings settings; + settings.ConsumerName("user1").AppendTopics(shortTopic2Name).ReadOriginal({"dc1"}); + auto reader = CreateReader(*driver, settings, creds); + + auto msg = reader->GetEvent(true, 1); + UNIT_ASSERT(msg); + + Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); + + UNIT_ASSERT(ev); + dynamic_cast<TTestCredentialsProviderFactory*>(creds.get())->SetToken(i == 0 ? "user_without_rights@" BUILTIN_ACL_DOMAIN : "invalid_ticket"); + + ev->Confirm(); + Cerr << "=== Wait for consumer death (" << i << ")" << Endl; - - msg = reader->GetEvent(true, 1); - UNIT_ASSERT(msg); - - Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; - - auto closeEv = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*msg); - - UNIT_ASSERT(closeEv); - } -*/ + + msg = reader->GetEvent(true, 1); + UNIT_ASSERT(msg); + + Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; + + auto closeEv = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*msg); + + UNIT_ASSERT(closeEv); + } +*/ Cerr << "==== Start second loop\n"; - for (ui32 i = 0; i < 3; ++i){ + for (ui32 i = 0; i < 3; ++i){ server.AnnoyingClient->GetClientInfo({topic2}, "user1", true); - - ReadInfoRequest request; - ReadInfoResponse response; - request.mutable_consumer()->set_path("user1"); - request.set_get_only_original(true); + + ReadInfoRequest request; + ReadInfoResponse response; + request.mutable_consumer()->set_path("user1"); + request.set_get_only_original(true); request.add_topics()->set_path(shortTopic2Name); - grpc::ClientContext rcontext; - if (i == 0) { - rcontext.AddMetadata("x-ydb-auth-ticket", "user_without_rights@" BUILTIN_ACL_DOMAIN); - } - if (i == 1) { - rcontext.AddMetadata("x-ydb-auth-ticket", "invalid_ticket"); - } - if (i == 2) { - rcontext.AddMetadata("x-ydb-auth-ticket", "user3@" BUILTIN_ACL_DOMAIN); - } - auto status = StubP_->GetReadSessionsInfo(&rcontext, request, &response); - UNIT_ASSERT(status.ok()); - ReadInfoResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT(response.operation().ready() == true); - UNIT_ASSERT(response.operation().status() == (i < 2) ? Ydb::StatusIds::UNAUTHORIZED : Ydb::StatusIds::SUCCESS); - } - } - + grpc::ClientContext rcontext; + if (i == 0) { + rcontext.AddMetadata("x-ydb-auth-ticket", "user_without_rights@" BUILTIN_ACL_DOMAIN); + } + if (i == 1) { + rcontext.AddMetadata("x-ydb-auth-ticket", "invalid_ticket"); + } + if (i == 2) { + rcontext.AddMetadata("x-ydb-auth-ticket", "user3@" BUILTIN_ACL_DOMAIN); + } + auto status = StubP_->GetReadSessionsInfo(&rcontext, request, &response); + UNIT_ASSERT(status.ok()); + ReadInfoResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT(response.operation().ready() == true); + UNIT_ASSERT(response.operation().status() == (i < 2) ? Ydb::StatusIds::UNAUTHORIZED : Ydb::StatusIds::SUCCESS); + } + } + Y_UNIT_TEST(CheckKillBalancer) { NPersQueue::TTestServer server; server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY, NKikimrServices::PQ_READ_PROXY}); PrepareForGrpc(server); - + TPQDataWriter writer("source1", server); - - - auto driver = server.AnnoyingClient->GetDriver(); - - NYdb::NPersQueue::TReadSessionSettings settings; - settings.ConsumerName("shared/user").AppendTopics(SHORT_TOPIC_NAME).ReadOriginal({"dc1"}); - auto reader = CreateReader(*driver, settings); - for (ui32 i = 0; i < 2; ++i) { - auto msg = reader->GetEvent(true, 1); - UNIT_ASSERT(msg); - - Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; - - auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); - - UNIT_ASSERT(ev); - - ev->Confirm(); - } - + + + auto driver = server.AnnoyingClient->GetDriver(); + + NYdb::NPersQueue::TReadSessionSettings settings; + settings.ConsumerName("shared/user").AppendTopics(SHORT_TOPIC_NAME).ReadOriginal({"dc1"}); + auto reader = CreateReader(*driver, settings); + for (ui32 i = 0; i < 2; ++i) { + auto msg = reader->GetEvent(true, 1); + UNIT_ASSERT(msg); + + Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); + + UNIT_ASSERT(ev); + + ev->Confirm(); + } + server.AnnoyingClient->RestartBalancerTablet(server.CleverServer->GetRuntime(), "rt3.dc1--topic1"); - Cerr << "Balancer killed\n"; - - - ui32 createEv = 0, destroyEv = 0; - for (ui32 i = 0; i < 4; ++i) { - auto msg = reader->GetEvent(true, 1); - UNIT_ASSERT(msg); - - Cerr << "Got message: " << NYdb::NPersQueue::DebugString(*msg) << "\n"; - - auto ev1 = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TPartitionStreamClosedEvent>(&*msg); - auto ev2 = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); - - UNIT_ASSERT(ev1 || ev2); - - if (ev1) { - ++destroyEv; - } - if (ev2) { - ev2->Confirm(); - ++createEv; - } - } - UNIT_ASSERT(createEv == 2); - UNIT_ASSERT(destroyEv == 2); - - UNIT_ASSERT(!reader->WaitEvent().Wait(TDuration::Seconds(1))); - } - - + Cerr << "Balancer killed\n"; + + + ui32 createEv = 0, destroyEv = 0; + for (ui32 i = 0; i < 4; ++i) { + auto msg = reader->GetEvent(true, 1); + UNIT_ASSERT(msg); + + Cerr << "Got message: " << NYdb::NPersQueue::DebugString(*msg) << "\n"; + + auto ev1 = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TPartitionStreamClosedEvent>(&*msg); + auto ev2 = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); + + UNIT_ASSERT(ev1 || ev2); + + if (ev1) { + ++destroyEv; + } + if (ev2) { + ev2->Confirm(); + ++createEv; + } + } + UNIT_ASSERT(createEv == 2); + UNIT_ASSERT(destroyEv == 2); + + UNIT_ASSERT(!reader->WaitEvent().Wait(TDuration::Seconds(1))); + } + + Y_UNIT_TEST(TestWriteStat) { NPersQueue::TTestServer server(PQSettings(0, 1, true, "10"), false); auto netDataUpdated = server.PrepareNetDataFile(FormNetData()); UNIT_ASSERT(netDataUpdated); server.StartServer(); - + server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY, NKikimrServices::NET_CLASSIFIER }); server.EnableLogs({ NKikimrServices::PERSQUEUE }, NActors::NLog::PRI_ERROR); - + auto sender = server.CleverServer->GetRuntime()->AllocateEdgeActor(); - + GetClassifierUpdate(*server.CleverServer, sender); //wait for initializing - + server.AnnoyingClient->CreateTopic("rt3.dc1--account--topic1", 10, 10000, 10000, 2000); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 10, 10000, 10000, 2000); - - auto driver = server.AnnoyingClient->GetDriver(); - - auto writer = CreateWriter(*driver, "account/topic1", "base64:AAAAaaaa____----12", 0, "raw"); - - auto msg = writer->GetEvent(true); - UNIT_ASSERT(msg); // ReadyToAcceptEvent - - auto ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); - UNIT_ASSERT(ev); - - TInstant st(TInstant::Now()); - for (ui32 i = 1; i <= 5; ++i) { - writer->Write(std::move(ev->ContinuationToken), TString(2000, 'a')); - msg = writer->GetEvent(true); - UNIT_ASSERT(msg); // ReadyToAcceptEvent - - ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); - UNIT_ASSERT(ev); - - msg = writer->GetEvent(true); - - Cerr << DebugString(*msg) << "\n"; - - auto ack = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TAcksEvent>(&*msg); - UNIT_ASSERT(ack); - - if (i == 5) { - UNIT_ASSERT(TInstant::Now() - st > TDuration::Seconds(3)); + + auto driver = server.AnnoyingClient->GetDriver(); + + auto writer = CreateWriter(*driver, "account/topic1", "base64:AAAAaaaa____----12", 0, "raw"); + + auto msg = writer->GetEvent(true); + UNIT_ASSERT(msg); // ReadyToAcceptEvent + + auto ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); + UNIT_ASSERT(ev); + + TInstant st(TInstant::Now()); + for (ui32 i = 1; i <= 5; ++i) { + writer->Write(std::move(ev->ContinuationToken), TString(2000, 'a')); + msg = writer->GetEvent(true); + UNIT_ASSERT(msg); // ReadyToAcceptEvent + + ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); + UNIT_ASSERT(ev); + + msg = writer->GetEvent(true); + + Cerr << DebugString(*msg) << "\n"; + + auto ack = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TAcksEvent>(&*msg); + UNIT_ASSERT(ack); + + if (i == 5) { + UNIT_ASSERT(TInstant::Now() - st > TDuration::Seconds(3)); // TODO: Describe this assert in comment - UNIT_ASSERT(!ack->Acks.empty()); - UNIT_ASSERT(ack->Acks.back().Stat); + UNIT_ASSERT(!ack->Acks.empty()); + UNIT_ASSERT(ack->Acks.back().Stat); UNIT_ASSERT(ack->Acks.back().Stat->TotalTimeInPartitionQueue >= ack->Acks.back().Stat->PartitionQuotedTime); UNIT_ASSERT(ack->Acks.back().Stat->TotalTimeInPartitionQueue <= ack->Acks.back().Stat->PartitionQuotedTime + TDuration::Seconds(10)); - } - } + } + } GetCounters(server.CleverServer->GetRuntime()->GetMonPort(), "pqproxy", "writeSession", "account/topic1", "Vla"); - { - NYdb::NPersQueue::TReadSessionSettings settings; - settings.ConsumerName("shared/user").AppendTopics(TString("account/topic1")).ReadOriginal({"dc1"}); - - auto reader = CreateReader(*driver, settings); - - auto msg = GetNextMessageSkipAssignment(reader); - UNIT_ASSERT(msg); - - Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; - } + { + NYdb::NPersQueue::TReadSessionSettings settings; + settings.ConsumerName("shared/user").AppendTopics(TString("account/topic1")).ReadOriginal({"dc1"}); + + auto reader = CreateReader(*driver, settings); + + auto msg = GetNextMessageSkipAssignment(reader); + UNIT_ASSERT(msg); + + Cerr << NYdb::NPersQueue::DebugString(*msg) << "\n"; + } GetCounters(server.CleverServer->GetRuntime()->GetMonPort(), "pqproxy", "readSession", "account/topic1", "Vla"); - } - - Y_UNIT_TEST(TestWriteSessionsConflicts) { + } + + Y_UNIT_TEST(TestWriteSessionsConflicts) { NPersQueue::TTestServer server; server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1); - + TPQDataWriter writer("source", server); - - server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY }); - - TString topic = SHORT_TOPIC_NAME; - TString sourceId = "123"; - - - auto driver = server.AnnoyingClient->GetDriver(); - - auto writer1 = CreateWriter(*driver, topic, sourceId); - - auto msg = writer1->GetEvent(true); - UNIT_ASSERT(msg); // ReadyToAcceptEvent - - Cerr << DebugString(*msg) << "\n"; - - auto ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); - UNIT_ASSERT(ev); - - auto writer2 = CreateWriter(*driver, topic, sourceId); - - msg = writer2->GetEvent(true); - UNIT_ASSERT(msg); // ReadyToAcceptEvent - - Cerr << DebugString(*msg) << "\n"; - - ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); - UNIT_ASSERT(ev); - - // First session dies. - UNIT_ASSERT(writer1->WaitEvent().Wait(TDuration::Seconds(10))); - - msg = writer1->GetEvent(true); - UNIT_ASSERT(msg); - - Cerr << DebugString(*msg) << "\n"; - - auto closeEv = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*msg); - UNIT_ASSERT(closeEv); - - UNIT_ASSERT(!writer2->WaitEvent().Wait(TDuration::Seconds(1))); - } - -/* + + server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY }); + + TString topic = SHORT_TOPIC_NAME; + TString sourceId = "123"; + + + auto driver = server.AnnoyingClient->GetDriver(); + + auto writer1 = CreateWriter(*driver, topic, sourceId); + + auto msg = writer1->GetEvent(true); + UNIT_ASSERT(msg); // ReadyToAcceptEvent + + Cerr << DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); + UNIT_ASSERT(ev); + + auto writer2 = CreateWriter(*driver, topic, sourceId); + + msg = writer2->GetEvent(true); + UNIT_ASSERT(msg); // ReadyToAcceptEvent + + Cerr << DebugString(*msg) << "\n"; + + ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); + UNIT_ASSERT(ev); + + // First session dies. + UNIT_ASSERT(writer1->WaitEvent().Wait(TDuration::Seconds(10))); + + msg = writer1->GetEvent(true); + UNIT_ASSERT(msg); + + Cerr << DebugString(*msg) << "\n"; + + auto closeEv = std::get_if<NYdb::NPersQueue::TSessionClosedEvent>(&*msg); + UNIT_ASSERT(closeEv); + + UNIT_ASSERT(!writer2->WaitEvent().Wait(TDuration::Seconds(1))); + } + +/* Y_UNIT_TEST(TestLockErrors) { return; // Test is ignored. FIX: KIKIMR-7881 @@ -1437,168 +1437,168 @@ namespace { server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1); server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); auto pqLib = TPQLib::WithCerrLogger(); - - { + + { auto [producer, pcResult] = CreateProducer(pqLib, server.GrpcPort, SHORT_TOPIC_NAME, "123"); - for (ui32 i = 1; i <= 11; ++i) { - auto f = producer->Write(i, TString(10, 'a')); - f.Wait(); - } - } - - TConsumerSettings ss; - ss.Consumer = "user"; + for (ui32 i = 1; i <= 11; ++i) { + auto f = producer->Write(i, TString(10, 'a')); + f.Wait(); + } + } + + TConsumerSettings ss; + ss.Consumer = "user"; ss.Server = TServerSetting{"localhost", server.GrpcPort}; ss.Topics.push_back({SHORT_TOPIC_NAME, {}}); - ss.MaxCount = 1; - ss.Unpack = false; - + ss.MaxCount = 1; + ss.Unpack = false; + auto [consumer, ccResult] = CreateConsumer(pqLib, ss); - auto msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + auto msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); - - auto pp = msg.GetValue().StartRead; - pp.SetValue(TAssignInfo{0, 5, false}); - auto future = consumer->IsDead(); - future.Wait(); - Cerr << future.GetValue() << "\n"; - + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); + + auto pp = msg.GetValue().StartRead; + pp.SetValue(TAssignInfo{0, 5, false}); + auto future = consumer->IsDead(); + future.Wait(); + Cerr << future.GetValue() << "\n"; + std::tie(consumer, ccResult) = CreateConsumer(pqLib, ss); - msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); - - pp = msg.GetValue().StartRead; - pp.SetValue(TAssignInfo{12, 12, false}); - future = consumer->IsDead(); - future.Wait(); - Cerr << future.GetValue() << "\n"; - + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); + + pp = msg.GetValue().StartRead; + pp.SetValue(TAssignInfo{12, 12, false}); + future = consumer->IsDead(); + future.Wait(); + Cerr << future.GetValue() << "\n"; + std::tie(consumer, ccResult) = CreateConsumer(pqLib, ss); - msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); - - pp = msg.GetValue().StartRead; - pp.SetValue(TAssignInfo{6, 7, false}); - future = consumer->IsDead(); - future.Wait(); - Cerr << future.GetValue() << "\n"; - + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); + + pp = msg.GetValue().StartRead; + pp.SetValue(TAssignInfo{6, 7, false}); + future = consumer->IsDead(); + future.Wait(); + Cerr << future.GetValue() << "\n"; + std::tie(consumer, ccResult) = CreateConsumer(pqLib, ss); - msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); - auto assignId = msg.GetValue().Response.assigned().assign_id(); - pp = msg.GetValue().StartRead; - pp.SetValue(TAssignInfo{5, 0, false}); - consumer->Commit({{assignId, 0}}); - while (true) { - msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); + auto assignId = msg.GetValue().Response.assigned().assign_id(); + pp = msg.GetValue().StartRead; + pp.SetValue(TAssignInfo{5, 0, false}); + consumer->Commit({{assignId, 0}}); + while (true) { + msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; if (msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kCommitted) { - UNIT_ASSERT(msg.GetValue().Response.committed().cookies_size() == 1); - UNIT_ASSERT(msg.GetValue().Response.committed().cookies(0).assign_id() == assignId); - UNIT_ASSERT(msg.GetValue().Response.committed().cookies(0).partition_cookie() == 0); - break; - } - } - + UNIT_ASSERT(msg.GetValue().Response.committed().cookies_size() == 1); + UNIT_ASSERT(msg.GetValue().Response.committed().cookies(0).assign_id() == assignId); + UNIT_ASSERT(msg.GetValue().Response.committed().cookies(0).partition_cookie() == 0); + break; + } + } + std::tie(consumer, ccResult) = CreateConsumer(pqLib, ss); - msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 5); - UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); - - pp = msg.GetValue().StartRead; - pp.SetValue(TAssignInfo{11, 11, false}); - Sleep(TDuration::Seconds(5)); - + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 5); + UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); + + pp = msg.GetValue().StartRead; + pp.SetValue(TAssignInfo{11, 11, false}); + Sleep(TDuration::Seconds(5)); + std::tie(consumer, ccResult) = CreateConsumer(pqLib, ss); - msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 11); - UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); - - pp = msg.GetValue().StartRead; - pp.SetValue(TAssignInfo{1, 0, true}); - future = consumer->IsDead(); - future.Wait(); - Cerr << future.GetValue() << "\n"; - + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 11); + UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); + + pp = msg.GetValue().StartRead; + pp.SetValue(TAssignInfo{1, 0, true}); + future = consumer->IsDead(); + future.Wait(); + Cerr << future.GetValue() << "\n"; + std::tie(consumer, ccResult) = CreateConsumer(pqLib, ss); - msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); - UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 11); - UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); - - pp = msg.GetValue().StartRead; - pp.SetValue(TAssignInfo{0, 0, false}); - future = consumer->IsDead(); - UNIT_ASSERT(!future.Wait(TDuration::Seconds(5))); - } -*/ - - Y_UNIT_TEST(TestBigMessage) { + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); + UNIT_ASSERT(msg.GetValue().Response.assigned().read_offset() == 11); + UNIT_ASSERT(msg.GetValue().Response.assigned().end_offset() == 11); + + pp = msg.GetValue().StartRead; + pp.SetValue(TAssignInfo{0, 0, false}); + future = consumer->IsDead(); + UNIT_ASSERT(!future.Wait(TDuration::Seconds(5))); + } +*/ + + Y_UNIT_TEST(TestBigMessage) { NPersQueue::TTestServer server; server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1); - - server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY }); - - TPQDataWriter writer2("source", server); - - auto driver = server.AnnoyingClient->GetDriver(); - - auto writer = CreateWriter(*driver, SHORT_TOPIC_NAME, "123", 0, "raw"); - - auto msg = writer->GetEvent(true); - UNIT_ASSERT(msg); // ReadyToAcceptEvent - - auto ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); - UNIT_ASSERT(ev); - - writer->Write(std::move(ev->ContinuationToken), TString((1 << 20) * 60, 'a')); //TODO: Increase GRPC_ARG_MAX_SEND_MESSAGE_LENGTH + + server.EnableLogs({ NKikimrServices::PQ_WRITE_PROXY }); + + TPQDataWriter writer2("source", server); + + auto driver = server.AnnoyingClient->GetDriver(); + + auto writer = CreateWriter(*driver, SHORT_TOPIC_NAME, "123", 0, "raw"); + + auto msg = writer->GetEvent(true); + UNIT_ASSERT(msg); // ReadyToAcceptEvent + + auto ev = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*msg); + UNIT_ASSERT(ev); + + writer->Write(std::move(ev->ContinuationToken), TString((1 << 20) * 60, 'a')); //TODO: Increase GRPC_ARG_MAX_SEND_MESSAGE_LENGTH { msg = writer->GetEvent(true); UNIT_ASSERT(msg); // ReadyToAcceptEvent @@ -1611,17 +1611,17 @@ namespace { auto ev2 = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TAcksEvent>(&*msg); UNIT_ASSERT(ev2); } - } - -/* + } + +/* void TestRereadsWhenDataIsEmptyImpl(bool withWait) { NPersQueue::TTestServer server; server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1); - + server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); TPQDataWriter writer("source", server); auto pqLib = TPQLib::WithCerrLogger(); - + // Write nonempty data NKikimr::NPersQueueTests::TRequestWritePQ writeReq(DEFAULT_TOPIC_NAME, 0, "src", 4); @@ -1656,13 +1656,13 @@ namespace { auto msg1 = GetNextMessageSkipAssignment(consumer).GetValueSync().Response; auto assertHasData = [](const MigrationStreamingReadServerMessage& msg, const TString& data) { - const auto& d = msg.data_batch(); - UNIT_ASSERT_VALUES_EQUAL_C(d.partition_data_size(), 1, msg); - UNIT_ASSERT_VALUES_EQUAL_C(d.partition_data(0).batches_size(), 1, msg); - UNIT_ASSERT_VALUES_EQUAL_C(d.partition_data(0).batches(0).message_data_size(), 1, msg); - UNIT_ASSERT_VALUES_EQUAL_C(d.partition_data(0).batches(0).message_data(0).data(), data, msg); + const auto& d = msg.data_batch(); + UNIT_ASSERT_VALUES_EQUAL_C(d.partition_data_size(), 1, msg); + UNIT_ASSERT_VALUES_EQUAL_C(d.partition_data(0).batches_size(), 1, msg); + UNIT_ASSERT_VALUES_EQUAL_C(d.partition_data(0).batches(0).message_data_size(), 1, msg); + UNIT_ASSERT_VALUES_EQUAL_C(d.partition_data(0).batches(0).message_data(0).data(), data, msg); }; - UNIT_ASSERT_VALUES_EQUAL_C(msg1.data_batch().partition_data(0).cookie().partition_cookie(), 1, msg1); + UNIT_ASSERT_VALUES_EQUAL_C(msg1.data_batch().partition_data(0).cookie().partition_cookie(), 1, msg1); assertHasData(msg1, "data1"); auto resp2Future = consumer->GetNextMessage(); @@ -1680,8 +1680,8 @@ namespace { write("data3"); } const auto& msg2 = resp2Future.GetValueSync().Response; - UNIT_ASSERT_VALUES_EQUAL_C(msg2.data_batch().partition_data(0).cookie().partition_cookie(), 2, msg2); - + UNIT_ASSERT_VALUES_EQUAL_C(msg2.data_batch().partition_data(0).cookie().partition_cookie(), 2, msg2); + assertHasData(msg2, "data3"); } @@ -1692,9 +1692,9 @@ namespace { Y_UNIT_TEST(TestRereadsWhenDataIsEmptyWithWait) { TestRereadsWhenDataIsEmptyImpl(true); } - - - Y_UNIT_TEST(TestLockAfterDrop) { + + + Y_UNIT_TEST(TestLockAfterDrop) { NPersQueue::TTestServer server{false}; server.GrpcServerOptions.SetMaxMessageSize(130*1024*1024); server.StartServer(); @@ -1702,322 +1702,322 @@ namespace { server.WaitInit(SHORT_TOPIC_NAME); server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); auto pqLib = TPQLib::WithCerrLogger(); - + auto [producer, pcResult] = CreateProducer(pqLib, server.GrpcPort, SHORT_TOPIC_NAME, "123"); - auto f = producer->Write(1, TString(1024, 'a')); - f.Wait(); - + auto f = producer->Write(1, TString(1024, 'a')); + f.Wait(); + ui32 maxCount = 1; bool unpack = false; auto [consumer, ccResult] = CreateConsumer(pqLib, server.GrpcPort, "user", {SHORT_TOPIC_NAME, {}}, maxCount, unpack); Cerr << ccResult.Response << "\n"; - - auto msg = consumer->GetNextMessage(); - msg.Wait(); + + auto msg = consumer->GetNextMessage(); + msg.Wait(); UNIT_ASSERT_C(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned, msg.GetValue().Response); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); - + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.assigned().partition() == 0); + server.CleverServer->GetRuntime()->ResetScheduledCount(); server.AnnoyingClient->RestartPartitionTablets(server.CleverServer->GetRuntime(), DEFAULT_TOPIC_NAME); - - msg.GetValue().StartRead.SetValue({0,0,false}); - - msg = consumer->GetNextMessage(); - UNIT_ASSERT(msg.Wait(TDuration::Seconds(10))); - - Cerr << msg.GetValue().Response << "\n"; + + msg.GetValue().StartRead.SetValue({0,0,false}); + + msg = consumer->GetNextMessage(); + UNIT_ASSERT(msg.Wait(TDuration::Seconds(10))); + + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kDataBatch); - } - - - Y_UNIT_TEST(TestMaxNewTopicModel) { + } + + + Y_UNIT_TEST(TestMaxNewTopicModel) { NPersQueue::TTestServer server; server.AnnoyingClient->AlterUserAttributes("/", "Root", {{"__extra_path_symbols_allowed", "@"}}); server.AnnoyingClient->CreateTopic("rt3.dc1--aaa@bbb@ccc--topic", 1); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1); - + server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); auto pqLib = TPQLib::WithCerrLogger(); - - { + + { auto [producer, pcResult] = CreateProducer(pqLib, server.GrpcPort, "aaa/bbb/ccc/topic", "123"); UNIT_ASSERT_C(pcResult.Response.server_message_case() == StreamingWriteServerMessage::kInitResponse, pcResult.Response); - for (ui32 i = 1; i <= 11; ++i) { - auto f = producer->Write(i, TString(10, 'a')); - f.Wait(); + for (ui32 i = 1; i <= 11; ++i) { + auto f = producer->Write(i, TString(10, 'a')); + f.Wait(); UNIT_ASSERT_C(f.GetValue().Response.server_message_case() == StreamingWriteServerMessage::kBatchWriteResponse, f.GetValue().Response); - } - } - + } + } + ui32 maxCount = 1; bool unpack = false; auto [consumer, ccResult] = CreateConsumer(pqLib, server.GrpcPort, "user", {"aaa/bbb/ccc/topic", {}}, maxCount, unpack); UNIT_ASSERT_C(ccResult.Response.response_case() == MigrationStreamingReadServerMessage::kInitResponse, ccResult.Response); - - auto msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + + auto msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); - } - - - Y_UNIT_TEST(TestReleaseWithAssigns) { + } + + + Y_UNIT_TEST(TestReleaseWithAssigns) { NPersQueue::TTestServer server; server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 3); - + server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); auto pqLib = TPQLib::WithCerrLogger(); - + TPQDataWriter writer("source", server); - - for (ui32 i = 1; i <= 3; ++i) { + + for (ui32 i = 1; i <= 3; ++i) { TString sourceId = "123" + ToString<int>(i); ui32 partitionGroup = i; auto [producer, pcResult] = CreateProducer(pqLib, server.GrpcPort, SHORT_TOPIC_NAME, sourceId, partitionGroup); - + UNIT_ASSERT(pcResult.Response.server_message_case() == StreamingWriteServerMessage::kInitResponse); - auto f = producer->Write(i, TString(10, 'a')); - f.Wait(); - } - - TConsumerSettings ss; - ss.Consumer = "user"; + auto f = producer->Write(i, TString(10, 'a')); + f.Wait(); + } + + TConsumerSettings ss; + ss.Consumer = "user"; ss.Server = TServerSetting{"localhost", server.GrpcPort}; ss.Topics.push_back({SHORT_TOPIC_NAME, {}}); - ss.ReadMirroredPartitions = false; - ss.MaxCount = 3; - ss.Unpack = false; - + ss.ReadMirroredPartitions = false; + ss.MaxCount = 3; + ss.Unpack = false; + auto [consumer, ccResult] = CreateConsumer(pqLib, ss); Cerr << ccResult.Response << "\n"; - - for (ui32 i = 1; i <= 3; ++i) { - auto msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + + for (ui32 i = 1; i <= 3; ++i) { + auto msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - } - + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + } + auto [consumer2, ccResult2] = CreateConsumer(pqLib, ss); Cerr << ccResult2.Response << "\n"; - - auto msg = consumer->GetNextMessage(); - auto msg2 = consumer2->GetNextMessage(); - - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + + auto msg = consumer->GetNextMessage(); + auto msg2 = consumer2->GetNextMessage(); + + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kRelease); UNIT_ASSERT(msg.GetValue().Response.release().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.release().cluster() == "dc1"); - - UNIT_ASSERT(!msg2.Wait(TDuration::Seconds(1))); - - msg.GetValue().Release.SetValue(); - - msg2.Wait(); - Cerr << msg2.GetValue().Response << "\n"; - + UNIT_ASSERT(msg.GetValue().Response.release().cluster() == "dc1"); + + UNIT_ASSERT(!msg2.Wait(TDuration::Seconds(1))); + + msg.GetValue().Release.SetValue(); + + msg2.Wait(); + Cerr << msg2.GetValue().Response << "\n"; + UNIT_ASSERT(msg2.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg2.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg2.GetValue().Response.assigned().cluster() == "dc1"); - } - + UNIT_ASSERT(msg2.GetValue().Response.assigned().cluster() == "dc1"); + } + Y_UNIT_TEST(TestSilentRelease) { NPersQueue::TTestServer server; server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 3); - + server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); auto pqLib = TPQLib::WithCerrLogger(); - + TPQDataWriter writer("source", server); - - TVector<std::pair<ui64, ui64>> cookies; - - for (ui32 i = 1; i <= 3; ++i) { + + TVector<std::pair<ui64, ui64>> cookies; + + for (ui32 i = 1; i <= 3; ++i) { TString sourceId = "123" + ToString<int>(i); ui32 partitionGroup = i; - + auto [producer, pcResult] = CreateProducer(pqLib, server.GrpcPort, SHORT_TOPIC_NAME, sourceId, partitionGroup); Cerr << "===Response: " << pcResult.Response << Endl; UNIT_ASSERT(pcResult.Response.server_message_case() == StreamingWriteServerMessage::kInitResponse); - auto f = producer->Write(i, TString(10, 'a')); - f.Wait(); - } - - TConsumerSettings ss; - ss.Consumer = "user"; + auto f = producer->Write(i, TString(10, 'a')); + f.Wait(); + } + + TConsumerSettings ss; + ss.Consumer = "user"; ss.Server = TServerSetting{"localhost", server.GrpcPort}; ss.Topics.push_back({SHORT_TOPIC_NAME, {}}); - ss.ReadMirroredPartitions = false; - ss.MaxCount = 1; - ss.Unpack = false; - + ss.ReadMirroredPartitions = false; + ss.MaxCount = 1; + ss.Unpack = false; + auto [consumer, ccResult] = CreateConsumer(pqLib, ss); Cerr << ccResult.Response << "\n"; - - for (ui32 i = 1; i <= 3; ++i) { + + for (ui32 i = 1; i <= 3; ++i) { auto msg = GetNextMessageSkipAssignment(consumer); Cerr << msg.GetValueSync().Response << "\n"; UNIT_ASSERT(msg.GetValueSync().Response.response_case() == MigrationStreamingReadServerMessage::kDataBatch); - for (auto& p : msg.GetValue().Response.data_batch().partition_data()) { - cookies.emplace_back(p.cookie().assign_id(), p.cookie().partition_cookie()); - } - } - + for (auto& p : msg.GetValue().Response.data_batch().partition_data()) { + cookies.emplace_back(p.cookie().assign_id(), p.cookie().partition_cookie()); + } + } + auto [consumer2, ccResult2] = CreateConsumer(pqLib, ss); Cerr << ccResult2.Response << "\n"; - - auto msg = consumer->GetNextMessage(); - auto msg2 = consumer2->GetNextMessage(); + + auto msg = consumer->GetNextMessage(); + auto msg2 = consumer2->GetNextMessage(); UNIT_ASSERT(!msg2.Wait(TDuration::Seconds(1))); consumer->Commit(cookies); - + if (msg.GetValueSync().Release.Initialized()) { msg.GetValueSync().Release.SetValue(); - } - - msg2.Wait(); - Cerr << msg2.GetValue().Response << "\n"; - + } + + msg2.Wait(); + Cerr << msg2.GetValue().Response << "\n"; + UNIT_ASSERT(msg2.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg2.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg2.GetValue().Response.assigned().cluster() == "dc1"); + UNIT_ASSERT(msg2.GetValue().Response.assigned().cluster() == "dc1"); UNIT_ASSERT(msg2.GetValue().Response.assigned().read_offset() == 1); - } - -*/ - -/* - Y_UNIT_TEST(TestDoubleRelease) { + } + +*/ + +/* + Y_UNIT_TEST(TestDoubleRelease) { NPersQueue::TTestServer server; server.AnnoyingClient->CreateTopic( - DEFAULT_TOPIC_NAME, 1, 8000000, 86400, 50000000, "", 50000000, {"user1"}, {"user1", "user2"} + DEFAULT_TOPIC_NAME, 1, 8000000, 86400, 50000000, "", 50000000, {"user1"}, {"user1", "user2"} ); server.EnableLogs({ NKikimrServices::PQ_READ_PROXY, NKikimrServices::TABLET_AGGREGATOR }); auto pqLib = TPQLib::WithCerrLogger(); TPQDataWriter writer("source", server); - - TConsumerSettings ss; - ss.Consumer = "user"; + + TConsumerSettings ss; + ss.Consumer = "user"; ss.Server = TServerSetting{"localhost", server.GrpcPort}; ss.Topics.push_back({SHORT_TOPIC_NAME, {}}); - ss.ReadMirroredPartitions = false; - ss.MaxCount = 3; - ss.Unpack = false; - + ss.ReadMirroredPartitions = false; + ss.MaxCount = 3; + ss.Unpack = false; + auto [consumer, ccResult] = CreateConsumer(pqLib, ss); Cerr << ccResult.Response << "\n"; - - auto msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << msg.GetValue().Response << "\n"; + + auto msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kAssigned); UNIT_ASSERT(msg.GetValue().Response.assigned().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); - - msg = consumer->GetNextMessage(); - UNIT_ASSERT(!msg.Wait(TDuration::Seconds(1))); - - THolder<IConsumer> consumer2; - do { + UNIT_ASSERT(msg.GetValue().Response.assigned().cluster() == "dc1"); + + msg = consumer->GetNextMessage(); + UNIT_ASSERT(!msg.Wait(TDuration::Seconds(1))); + + THolder<IConsumer> consumer2; + do { std::tie(consumer2, ccResult) = CreateConsumer(pqLib, ss); Cerr << ccResult.Response << "\n"; - } while(!msg.Wait(TDuration::Seconds(1))); - - Cerr << msg.GetValue().Response << "\n"; + } while(!msg.Wait(TDuration::Seconds(1))); + + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kRelease); UNIT_ASSERT(msg.GetValue().Response.release().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.release().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.release().forceful_release() == false); - - msg = consumer->GetNextMessage(); - UNIT_ASSERT(!msg.Wait(TDuration::Seconds(1))); - + UNIT_ASSERT(msg.GetValue().Response.release().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.release().forceful_release() == false); + + msg = consumer->GetNextMessage(); + UNIT_ASSERT(!msg.Wait(TDuration::Seconds(1))); + server.AnnoyingClient->RestartBalancerTablet(server.CleverServer->GetRuntime(), DEFAULT_TOPIC_NAME); - UNIT_ASSERT(msg.Wait(TDuration::Seconds(1))); - - Cerr << msg.GetValue().Response << "\n"; + UNIT_ASSERT(msg.Wait(TDuration::Seconds(1))); + + Cerr << msg.GetValue().Response << "\n"; UNIT_ASSERT(msg.GetValue().Response.response_case() == MigrationStreamingReadServerMessage::kRelease); UNIT_ASSERT(msg.GetValue().Response.release().topic().path() == SHORT_TOPIC_NAME); - UNIT_ASSERT(msg.GetValue().Response.release().cluster() == "dc1"); - UNIT_ASSERT(msg.GetValue().Response.release().forceful_release() == true); - - THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response; + UNIT_ASSERT(msg.GetValue().Response.release().cluster() == "dc1"); + UNIT_ASSERT(msg.GetValue().Response.release().forceful_release() == true); + + THolder<TEvTabletCounters::TEvTabletLabeledCountersResponse> response; TActorId edge = server.CleverServer->GetRuntime()->AllocateEdgeActor(); - - do { - - auto actorId = NKikimr::CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, 3, "rt3.*--*,user*!!/!!*!!/rt3.*--*", 0); // remove !! + + do { + + auto actorId = NKikimr::CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, 3, "rt3.*--*,user*!!/!!*!!/rt3.*--*", 0); // remove !! server.CleverServer->GetRuntime()->Register(actorId); response = server.CleverServer->GetRuntime()-> - GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); - - Cerr << "FINAL RESPONSE :\n" << response->Record.DebugString() << Endl; - } while (response->Record.LabeledCountersByGroupSize() == 0); - - Cerr << "MULITREQUEST\n"; - - auto actorId = NKikimr::CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, 3, "rt3.*--*,user*!!/!!*!!/rt3.*--*", 3); // remove !! + GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); + + Cerr << "FINAL RESPONSE :\n" << response->Record.DebugString() << Endl; + } while (response->Record.LabeledCountersByGroupSize() == 0); + + Cerr << "MULITREQUEST\n"; + + auto actorId = NKikimr::CreateClusterLabeledCountersAggregatorActor(edge, TTabletTypes::PersQueue, 3, "rt3.*--*,user*!!/!!*!!/rt3.*--*", 3); // remove !! server.CleverServer->GetRuntime()->Register(actorId); response = server.CleverServer->GetRuntime()-> - GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); - - Cerr << "FINAL RESPONSE2 :\n" << response->Record.DebugString() << Endl; - UNIT_ASSERT(response->Record.LabeledCountersByGroupSize()); - - } - - Y_UNIT_TEST(TestUncompressedSize) { - TRateLimiterTestSetup setup(NKikimrPQ::TPQConfig::TQuotingConfig::USER_PAYLOAD_SIZE); - - setup.CreateTopic("account/topic"); - - THolder<IProducer> producer = setup.StartProducer("account/topic", true); - - TString data = TString("12345") * 100; - for (ui32 i = 0; i < 100; ++i) { - producer->Write(data); - } - auto writeResult = producer->Write(data); - const auto& writeResponse = writeResult.GetValueSync().Response; - UNIT_ASSERT_EQUAL_C(Ydb::StatusIds::SUCCESS, writeResponse.status(), "Response: " << writeResponse); - + GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(); + + Cerr << "FINAL RESPONSE2 :\n" << response->Record.DebugString() << Endl; + UNIT_ASSERT(response->Record.LabeledCountersByGroupSize()); + + } + + Y_UNIT_TEST(TestUncompressedSize) { + TRateLimiterTestSetup setup(NKikimrPQ::TPQConfig::TQuotingConfig::USER_PAYLOAD_SIZE); + + setup.CreateTopic("account/topic"); + + THolder<IProducer> producer = setup.StartProducer("account/topic", true); + + TString data = TString("12345") * 100; + for (ui32 i = 0; i < 100; ++i) { + producer->Write(data); + } + auto writeResult = producer->Write(data); + const auto& writeResponse = writeResult.GetValueSync().Response; + UNIT_ASSERT_EQUAL_C(Ydb::StatusIds::SUCCESS, writeResponse.status(), "Response: " << writeResponse); + auto pqLib = TPQLib::WithCerrLogger(); - using namespace NPersQueue; + using namespace NPersQueue; auto [consumer, ccResult] = CreateConsumer(pqLib, setup.GetGrpcPort(), "shared/user", {"/account/topic" , {}}, 1000, false); - Cerr << ccResult.Response << "\n"; - - auto msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << "ASSIGN RESPONSE: " << msg.GetValue().Response << "\n"; - UNIT_ASSERT(msg.GetValue().Type == EMT_ASSIGNED); - auto pp = msg.GetValue().StartRead; - pp.SetValue(TAssignInfo()); - Cerr << "START READ\n"; - ui32 count = 0; - do { - msg = consumer->GetNextMessage(); - msg.Wait(); - Cerr << "GOT LAST MESSAGE: " << msg.GetValue().Response << "\n"; - for (auto& pd : msg.GetValue().Response.data_batch().partition_data()) { - for (auto & b : pd.batches()) { - for (auto& md : b.message_data()) { - UNIT_ASSERT(md.uncompressed_size() == 500); - ++count; - } - } - } - Cerr << count << "\n"; - } while (count < 100); - } - + Cerr << ccResult.Response << "\n"; + + auto msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << "ASSIGN RESPONSE: " << msg.GetValue().Response << "\n"; + UNIT_ASSERT(msg.GetValue().Type == EMT_ASSIGNED); + auto pp = msg.GetValue().StartRead; + pp.SetValue(TAssignInfo()); + Cerr << "START READ\n"; + ui32 count = 0; + do { + msg = consumer->GetNextMessage(); + msg.Wait(); + Cerr << "GOT LAST MESSAGE: " << msg.GetValue().Response << "\n"; + for (auto& pd : msg.GetValue().Response.data_batch().partition_data()) { + for (auto & b : pd.batches()) { + for (auto& md : b.message_data()) { + UNIT_ASSERT(md.uncompressed_size() == 500); + ++count; + } + } + } + Cerr << count << "\n"; + } while (count < 100); + } + Y_UNIT_TEST(TestReadQuotasSimple) { TRateLimiterTestSetup setup(NKikimrPQ::TPQConfig::TQuotingConfig::USER_PAYLOAD_SIZE, 1000, 1000, true); - + const TString topicPath = "acc/topic1"; const TString consumerPath = "acc2/reader1"; setup.CreateTopic(topicPath); @@ -2105,7 +2105,7 @@ namespace { } } - Y_UNIT_TEST(TestDeletionOfTopic) { + Y_UNIT_TEST(TestDeletionOfTopic) { if (NSan::ASanIsOn()) { return; } @@ -2114,21 +2114,21 @@ namespace { server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1); server.WaitInit(SHORT_TOPIC_NAME); server.EnableLogs({ NKikimrServices::PQ_READ_PROXY }); - + server.AnnoyingClient->DeleteTopic2(DEFAULT_TOPIC_NAME, NPersQueue::NErrorCode::OK, false); auto pqLib = TPQLib::WithCerrLogger(); - + ui32 maxCount = 1; bool unpack = false; auto [consumer, ccResult] = CreateConsumer(pqLib, server.GrpcPort, "user", {SHORT_TOPIC_NAME, {}}, maxCount, unpack); Cerr << "Consumer create response: " << ccResult.Response << "\n"; - - auto isDead = consumer->IsDead(); - isDead.Wait(); + + auto isDead = consumer->IsDead(); + isDead.Wait(); Cerr << "Is dead future value: " << isDead.GetValue() << "\n"; UNIT_ASSERT_EQUAL(ccResult.Response.Getissues(0).issue_code(), Ydb::PersQueue::ErrorCode::UNKNOWN_TOPIC); - } -*/ + } +*/ Y_UNIT_TEST(Codecs_InitWriteSession_DefaultTopicSupportedCodecsInInitResponse) { APITestSetup setup{TEST_CASE_NAME}; grpc::ClientContext context; @@ -2224,7 +2224,7 @@ namespace { setup.GetPQConfig().SetClustersUpdateTimeoutSec(0); setup.GetPQConfig().SetRemoteClusterEnabledDelaySec(0); - setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(0); + setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(0); auto sessionWithNoPreferredCluster = setup.InitWriteSession(GenerateSessionSetupWithPreferredCluster(TString())); auto sessionWithLocalPreffedCluster = setup.InitWriteSession(GenerateSessionSetupWithPreferredCluster(setup.GetLocalCluster())); auto sessionWithRemotePrefferedCluster = setup.InitWriteSession(GenerateSessionSetupWithPreferredCluster(setup.GetRemoteCluster())); @@ -2269,15 +2269,15 @@ namespace { auto log = setup.GetLog(); setup.GetPQConfig().SetClustersUpdateTimeoutSec(0); setup.GetPQConfig().SetRemoteClusterEnabledDelaySec(0); - setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(0); - + setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(0); + const auto edgeActorID = setup.GetServer().GetRuntime()->AllocateEdgeActor(); - setup.GetServer().GetRuntime()->Send(new IEventHandle(NPQ::NClusterTracker::MakeClusterTrackerID(), edgeActorID, new NPQ::NClusterTracker::TEvClusterTracker::TEvSubscribe)); - log << TLOG_INFO << "Wait for cluster tracker event"; - auto clustersUpdate = setup.GetServer().GetRuntime()->GrabEdgeEvent<NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate>(); + setup.GetServer().GetRuntime()->Send(new IEventHandle(NPQ::NClusterTracker::MakeClusterTrackerID(), edgeActorID, new NPQ::NClusterTracker::TEvClusterTracker::TEvSubscribe)); + log << TLOG_INFO << "Wait for cluster tracker event"; + auto clustersUpdate = setup.GetServer().GetRuntime()->GrabEdgeEvent<NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate>(); + - auto session = setup.InitWriteSession(GenerateSessionSetupWithPreferredCluster(setup.GetRemoteCluster())); AssertStreamingSessionAlive(session.first); @@ -2290,26 +2290,26 @@ namespace { auto log = setup.GetLog(); setup.GetPQConfig().SetClustersUpdateTimeoutSec(0); setup.GetPQConfig().SetRemoteClusterEnabledDelaySec(0); - setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(2); + setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(2); - TInstant now(TInstant::Now()); + TInstant now(TInstant::Now()); auto session = setup.InitWriteSession(GenerateSessionSetupWithPreferredCluster("non-existent-cluster")); AssertStreamingSessionAlive(session.first); AssertStreamingSessionDead(session.first, Ydb::StatusIds::ABORTED, Ydb::PersQueue::ErrorCode::PREFERRED_CLUSTER_MISMATCHED); - UNIT_ASSERT(TInstant::Now() - now > TDuration::MilliSeconds(1999)); + UNIT_ASSERT(TInstant::Now() - now > TDuration::MilliSeconds(1999)); } Y_UNIT_TEST(PreferredCluster_EnabledRemotePreferredClusterAndRemoteClusterEnabledDelaySec_SessionDiesOnlyAfterDelay) { APITestSetup setup{TEST_CASE_NAME}; auto log = setup.GetLog(); setup.GetPQConfig().SetClustersUpdateTimeoutSec(0); - setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(2); + setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(2); const auto edgeActorID = setup.GetServer().GetRuntime()->AllocateEdgeActor(); - setup.GetPQConfig().SetRemoteClusterEnabledDelaySec(0); + setup.GetPQConfig().SetRemoteClusterEnabledDelaySec(0); - TInstant now(TInstant::Now()); + TInstant now(TInstant::Now()); auto session = setup.InitWriteSession(GenerateSessionSetupWithPreferredCluster(setup.GetRemoteCluster())); setup.GetServer().GetRuntime()->Send(new IEventHandle(NPQ::NClusterTracker::MakeClusterTrackerID(), edgeActorID, new NPQ::NClusterTracker::TEvClusterTracker::TEvSubscribe)); @@ -2317,9 +2317,9 @@ namespace { auto clustersUpdate = setup.GetServer().GetRuntime()->GrabEdgeEvent<NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate>(); AssertStreamingSessionAlive(session.first); - AssertStreamingSessionDead(session.first, Ydb::StatusIds::ABORTED, Ydb::PersQueue::ErrorCode::PREFERRED_CLUSTER_MISMATCHED); + AssertStreamingSessionDead(session.first, Ydb::StatusIds::ABORTED, Ydb::PersQueue::ErrorCode::PREFERRED_CLUSTER_MISMATCHED); - UNIT_ASSERT(TInstant::Now() - now > TDuration::MilliSeconds(1999)); + UNIT_ASSERT(TInstant::Now() - now > TDuration::MilliSeconds(1999)); } Y_UNIT_TEST(PreferredCluster_RemotePreferredClusterEnabledWhileSessionInitializing_SessionDiesOnlyAfterInitializationAndDelay) { @@ -2327,7 +2327,7 @@ namespace { auto log = setup.GetLog(); setup.GetPQConfig().SetClustersUpdateTimeoutSec(0); setup.GetPQConfig().SetRemoteClusterEnabledDelaySec(0); - setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(2); + setup.GetPQConfig().SetCloseClientSessionWithEnabledRemotePreferredClusterDelaySec(2); const auto edgeActorID = setup.GetServer().GetRuntime()->AllocateEdgeActor(); setup.GetFlatMsgBusPQClient().UpdateDC(setup.GetRemoteCluster(), false, false); @@ -2339,19 +2339,19 @@ namespace { setup.GetServer().GetRuntime()->Send(new IEventHandle(NPQ::NClusterTracker::MakeClusterTrackerID(), edgeActorID, new NPQ::NClusterTracker::TEvClusterTracker::TEvSubscribe)); log << TLOG_INFO << "Wait for cluster tracker event"; auto clustersUpdate = setup.GetServer().GetRuntime()->GrabEdgeEvent<NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate>(); - TInstant now(TInstant::Now()); + TInstant now(TInstant::Now()); setup.InitSession(session, GenerateSessionSetupWithPreferredCluster(setup.GetRemoteCluster())); - + AssertStreamingSessionAlive(session); log << TLOG_INFO << "Set small delay and wait for initialized session with remote preferred cluster to die"; AssertStreamingSessionDead(session, Ydb::StatusIds::ABORTED, Ydb::PersQueue::ErrorCode::PREFERRED_CLUSTER_MISMATCHED); - - UNIT_ASSERT(TInstant::Now() - now > TDuration::MilliSeconds(1999)); - + + UNIT_ASSERT(TInstant::Now() - now > TDuration::MilliSeconds(1999)); + } - - Y_UNIT_TEST(SchemeOperationsTest) { + + Y_UNIT_TEST(SchemeOperationsTest) { NPersQueue::TTestServer server; server.EnableLogs({NKikimrServices::PQ_READ_PROXY, NKikimrServices::BLACKBOX_VALIDATOR }); TString topic1 = "rt3.dc1--acc--topic1"; @@ -2359,331 +2359,331 @@ namespace { server.AnnoyingClient->CreateTopic(topic1, 1); server.AnnoyingClient->CreateTopic(DEFAULT_TOPIC_NAME, 1); server.AnnoyingClient->CreateConsumer("user"); - - std::shared_ptr<grpc::Channel> Channel_; - std::unique_ptr<Ydb::PersQueue::V1::PersQueueService::Stub> StubP_; - - { + + std::shared_ptr<grpc::Channel> Channel_; + std::unique_ptr<Ydb::PersQueue::V1::PersQueueService::Stub> StubP_; + + { Channel_ = grpc::CreateChannel("localhost:" + ToString(server.GrpcPort), grpc::InsecureChannelCredentials()); - StubP_ = Ydb::PersQueue::V1::PersQueueService::NewStub(Channel_); - } - - do { - CreateTopicRequest request; - CreateTopicResponse response; + StubP_ = Ydb::PersQueue::V1::PersQueueService::NewStub(Channel_); + } + + do { + CreateTopicRequest request; + CreateTopicResponse response; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); - auto props = request.mutable_settings(); - props->set_partitions_count(1); - props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); + auto props = request.mutable_settings(); + props->set_partitions_count(1); + props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); props->set_retention_period_ms(TDuration::Days(1).MilliSeconds()); props->set_max_partition_storage_size(1000); props->set_max_partition_write_speed(1000); props->set_max_partition_write_burst(1000); - grpc::ClientContext rcontext; - rcontext.AddMetadata("x-ydb-auth-ticket", "user@" BUILTIN_ACL_DOMAIN); - - auto status = StubP_->CreateTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - CreateTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - if (response.operation().status() == Ydb::StatusIds::UNAVAILABLE) { - Sleep(TDuration::Seconds(1)); - continue; - } - Cerr << response.operation() << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::UNAUTHORIZED); - break; - } while (true); - - { - // local cluster - CreateTopicRequest request; - CreateTopicResponse response; + grpc::ClientContext rcontext; + rcontext.AddMetadata("x-ydb-auth-ticket", "user@" BUILTIN_ACL_DOMAIN); + + auto status = StubP_->CreateTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + CreateTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + if (response.operation().status() == Ydb::StatusIds::UNAVAILABLE) { + Sleep(TDuration::Seconds(1)); + continue; + } + Cerr << response.operation() << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::UNAUTHORIZED); + break; + } while (true); + + { + // local cluster + CreateTopicRequest request; + CreateTopicResponse response; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); - auto props = request.mutable_settings(); - props->set_partitions_count(2); - props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); + auto props = request.mutable_settings(); + props->set_partitions_count(2); + props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); props->set_retention_period_ms(TDuration::Days(1).MilliSeconds()); props->set_max_partition_storage_size(1000); props->set_max_partition_write_speed(1000); props->set_max_partition_write_burst(1000); - - grpc::ClientContext rcontext; - - auto status = StubP_->CreateTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - CreateTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + + grpc::ClientContext rcontext; + + auto status = StubP_->CreateTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + CreateTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); server.AnnoyingClient->AddTopic(topic3); - } - - auto alter =[&StubP_](const AlterTopicRequest& request, Ydb::StatusIds::StatusCode statusCode, bool auth) - { - AlterTopicResponse response; - - grpc::ClientContext rcontext; - if (auth) - rcontext.AddMetadata("x-ydb-auth-ticket", "user@" BUILTIN_ACL_DOMAIN); - - auto status = StubP_->AlterTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - AlterTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), statusCode); - }; - - AlterTopicRequest request; + } + + auto alter =[&StubP_](const AlterTopicRequest& request, Ydb::StatusIds::StatusCode statusCode, bool auth) + { + AlterTopicResponse response; + + grpc::ClientContext rcontext; + if (auth) + rcontext.AddMetadata("x-ydb-auth-ticket", "user@" BUILTIN_ACL_DOMAIN); + + auto status = StubP_->AlterTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + AlterTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), statusCode); + }; + + AlterTopicRequest request; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); - auto props = request.mutable_settings(); - props->set_partitions_count(1); - props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); + auto props = request.mutable_settings(); + props->set_partitions_count(1); + props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); props->set_retention_period_ms(TDuration::Days(1).MilliSeconds()); props->set_max_partition_storage_size(1000); props->set_max_partition_write_speed(1000); props->set_max_partition_write_burst(1000); - alter(request, Ydb::StatusIds::UNAUTHORIZED, true); - alter(request, Ydb::StatusIds::GENERIC_ERROR, false); - props->set_partitions_count(3); - alter(request, Ydb::StatusIds::SUCCESS, false); - props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(6)); - alter(request, Ydb::StatusIds::BAD_REQUEST, false); - props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); - auto rr = props->add_read_rules(); - alter(request, Ydb::StatusIds::BAD_REQUEST, false); + alter(request, Ydb::StatusIds::UNAUTHORIZED, true); + alter(request, Ydb::StatusIds::GENERIC_ERROR, false); + props->set_partitions_count(3); + alter(request, Ydb::StatusIds::SUCCESS, false); + props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(6)); + alter(request, Ydb::StatusIds::BAD_REQUEST, false); + props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); + auto rr = props->add_read_rules(); + alter(request, Ydb::StatusIds::BAD_REQUEST, false); server.AnnoyingClient->AlterTopic(); props->add_supported_codecs(Ydb::PersQueue::V1::CODEC_RAW); props->add_supported_codecs(Ydb::PersQueue::V1::CODEC_ZSTD); - - props->set_max_partition_write_speed(123); - props->set_max_partition_write_burst(1234); - props->set_max_partition_storage_size(234); - props->set_partitions_count(3); - - rr->set_consumer_name("consumer"); - rr->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); - + + props->set_max_partition_write_speed(123); + props->set_max_partition_write_burst(1234); + props->set_max_partition_storage_size(234); + props->set_partitions_count(3); + + rr->set_consumer_name("consumer"); + rr->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); + rr->add_supported_codecs(Ydb::PersQueue::V1::CODEC_LZOP); rr->add_supported_codecs(Ydb::PersQueue::V1::CODEC_GZIP); - - rr->set_important(true); - rr->set_starting_message_timestamp_ms(111); - rr->set_version(567); - - (*props->mutable_attributes())["_allow_unauthenticated_read"] = "true"; - - (*props->mutable_attributes())["_partitions_per_tablet"] = "5"; - alter(request, Ydb::StatusIds::SUCCESS, false); - + + rr->set_important(true); + rr->set_starting_message_timestamp_ms(111); + rr->set_version(567); + + (*props->mutable_attributes())["_allow_unauthenticated_read"] = "true"; + + (*props->mutable_attributes())["_partitions_per_tablet"] = "5"; + alter(request, Ydb::StatusIds::SUCCESS, false); + TString topic4 = "rt3.dc1--acc--topic4"; server.AnnoyingClient->CreateTopic(topic4, 1); //ensure creation auto res = server.AnnoyingClient->DescribeTopic({topic3}); - Cerr << res.DebugString(); - TString resultDescribe = R"___(TopicInfo { - Topic: "rt3.dc1--acc--topic3" - NumPartitions: 3 - Config { - PartitionConfig { - MaxCountInPartition: 2147483647 - MaxSizeInPartition: 234 + Cerr << res.DebugString(); + TString resultDescribe = R"___(TopicInfo { + Topic: "rt3.dc1--acc--topic3" + NumPartitions: 3 + Config { + PartitionConfig { + MaxCountInPartition: 2147483647 + MaxSizeInPartition: 234 LifetimeSeconds: 86400 - ImportantClientId: "consumer" + ImportantClientId: "consumer" SourceIdLifetimeSeconds: 1382400 - WriteSpeedInBytesPerSecond: 123 - BurstSize: 1234 - NumChannels: 10 - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } - ExplicitChannelProfiles { - PoolKind: "test" - } + WriteSpeedInBytesPerSecond: 123 + BurstSize: 1234 + NumChannels: 10 + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } + ExplicitChannelProfiles { + PoolKind: "test" + } SourceIdMaxCounts: 6000000 - } - Version: 3 - LocalDC: true - RequireAuthWrite: true - RequireAuthRead: false - Producer: "acc" - Ident: "acc" - Topic: "topic3" - DC: "dc1" - ReadRules: "consumer" - ReadFromTimestampsMs: 111 - ConsumerFormatVersions: 0 - ConsumerCodecs { - Ids: 2 - Ids: 1 - Codecs: "lzop" - Codecs: "gzip" - } + } + Version: 3 + LocalDC: true + RequireAuthWrite: true + RequireAuthRead: false + Producer: "acc" + Ident: "acc" + Topic: "topic3" + DC: "dc1" + ReadRules: "consumer" + ReadFromTimestampsMs: 111 + ConsumerFormatVersions: 0 + ConsumerCodecs { + Ids: 2 + Ids: 1 + Codecs: "lzop" + Codecs: "gzip" + } ReadRuleServiceTypes: "data-transfer" - FormatVersion: 0 - Codecs { - Ids: 0 - Ids: 3 - Codecs: "raw" - Codecs: "zstd" - } - ReadRuleVersions: 567 - } - ErrorCode: OK -} -)___"; - UNIT_ASSERT_VALUES_EQUAL(res.DebugString(), resultDescribe); - - Cerr << "DESCRIBES:\n"; - { - DescribeTopicRequest request; - DescribeTopicResponse response; + FormatVersion: 0 + Codecs { + Ids: 0 + Ids: 3 + Codecs: "raw" + Codecs: "zstd" + } + ReadRuleVersions: 567 + } + ErrorCode: OK +} +)___"; + UNIT_ASSERT_VALUES_EQUAL(res.DebugString(), resultDescribe); + + Cerr << "DESCRIBES:\n"; + { + DescribeTopicRequest request; + DescribeTopicResponse response; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); - grpc::ClientContext rcontext; - rcontext.AddMetadata("x-ydb-auth-ticket", "user@" BUILTIN_ACL_DOMAIN); - - auto status = StubP_->DescribeTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - DescribeTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; + grpc::ClientContext rcontext; + rcontext.AddMetadata("x-ydb-auth-ticket", "user@" BUILTIN_ACL_DOMAIN); + + auto status = StubP_->DescribeTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + DescribeTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SCHEME_ERROR); // muts be Ydb::StatusIds::UNAUTHORIZED); - } - - { - DescribeTopicRequest request; - DescribeTopicResponse response; - request.set_path("/Root/PQ/rt3.dc1--acc--topic123"); - grpc::ClientContext rcontext; - - auto status = StubP_->DescribeTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - DescribeTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; + } + + { + DescribeTopicRequest request; + DescribeTopicResponse response; + request.set_path("/Root/PQ/rt3.dc1--acc--topic123"); + grpc::ClientContext rcontext; + + auto status = StubP_->DescribeTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + DescribeTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SCHEME_ERROR); - } - - { - DescribeTopicRequest request; - DescribeTopicResponse response; + } + + { + DescribeTopicRequest request; + DescribeTopicResponse response; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); - grpc::ClientContext rcontext; - - auto status = StubP_->DescribeTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - DescribeTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - props->CopyFrom(res.settings()); - - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); - } - - - alter(request, Ydb::StatusIds::SUCCESS, false); - - { - DescribeTopicRequest request; - DescribeTopicResponse response; + grpc::ClientContext rcontext; + + auto status = StubP_->DescribeTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + DescribeTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + props->CopyFrom(res.settings()); + + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + } + + + alter(request, Ydb::StatusIds::SUCCESS, false); + + { + DescribeTopicRequest request; + DescribeTopicResponse response; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); - grpc::ClientContext rcontext; - - auto status = StubP_->DescribeTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - DescribeTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); - UNIT_ASSERT_VALUES_EQUAL(props->DebugString(), res.settings().DebugString()); - } - - { - DropTopicRequest request; - DropTopicResponse response; + grpc::ClientContext rcontext; + + auto status = StubP_->DescribeTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + DescribeTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(props->DebugString(), res.settings().DebugString()); + } + + { + DropTopicRequest request; + DropTopicResponse response; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); - grpc::ClientContext rcontext; - auto status = StubP_->DropTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - DropTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + grpc::ClientContext rcontext; + auto status = StubP_->DropTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + DropTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); server.AnnoyingClient->RemoveTopic(topic3); - } - - - { - DropTopicRequest request; - DropTopicResponse response; + } + + + { + DropTopicRequest request; + DropTopicResponse response; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); - grpc::ClientContext rcontext; - auto status = StubP_->DropTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - DropTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SCHEME_ERROR); - } - + grpc::ClientContext rcontext; + auto status = StubP_->DropTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + DropTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SCHEME_ERROR); + } + server.AnnoyingClient->CreateTopic("rt3.dc1--acc--topic5", 1); //ensure creation server.AnnoyingClient->DescribeTopic({topic3}, true); - - - { - NYdb::TDriverConfig driverCfg; + + + { + NYdb::TDriverConfig driverCfg; driverCfg.SetEndpoint(TStringBuilder() << "localhost:" << server.GrpcPort); std::shared_ptr<NYdb::TDriver> ydbDriver(new NYdb::TDriver(driverCfg)); auto pqClient = NYdb::NPersQueue::TPersQueueClient(*ydbDriver); - - auto res = pqClient.CreateTopic("/Root/PQ/rt3.dc1--acc2--topic2"); - res.Wait(); - Cerr << res.GetValue().IsSuccess() << " " << res.GetValue().GetIssues().ToString() << "\n"; - } - - } - + + auto res = pqClient.CreateTopic("/Root/PQ/rt3.dc1--acc2--topic2"); + res.Wait(); + Cerr << res.GetValue().IsSuccess() << " " << res.GetValue().GetIssues().ToString() << "\n"; + } + + } + Y_UNIT_TEST(SchemeOperationsCheckPropValues) { NPersQueue::TTestServer server; @@ -2904,125 +2904,125 @@ namespace { }); } - - Y_UNIT_TEST(ReadRuleServiceTypeLimit) { - TServerSettings settings = PQSettings(0); - { - auto type = settings.PQConfig.AddClientServiceType(); - type->SetName("MyGreatType"); - type->SetMaxReadRulesCountPerTopic(3); - } - NPersQueue::TTestServer server(settings); - server.EnableLogs({ NKikimrServices::PQ_READ_PROXY, NKikimrServices::BLACKBOX_VALIDATOR }); - - std::unique_ptr<Ydb::PersQueue::V1::PersQueueService::Stub> pqStub; - - { - std::shared_ptr<grpc::Channel> channel = grpc::CreateChannel("localhost:" + ToString(server.GrpcPort), grpc::InsecureChannelCredentials()); - pqStub = Ydb::PersQueue::V1::PersQueueService::NewStub(channel); - } - { - CreateTopicRequest request; - CreateTopicResponse response; - request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); - auto props = request.mutable_settings(); - props->set_partitions_count(1); - props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); - props->set_retention_period_ms(TDuration::Days(1).MilliSeconds()); - - grpc::ClientContext rcontext; - auto status = pqStub->CreateTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - CreateTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); - } - auto checkDescribe = [&](const TVector<std::pair<TString, TString>>& readRules) { - DescribeTopicRequest request; - DescribeTopicResponse response; - request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); - grpc::ClientContext rcontext; - - auto status = pqStub->DescribeTopic(&rcontext, request, &response); - UNIT_ASSERT(status.ok()); - DescribeTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); - - UNIT_ASSERT_VALUES_EQUAL(res.settings().read_rules().size(), readRules.size()); - for (ui64 i = 0; i < readRules.size(); ++i) { - const auto& rr = res.settings().read_rules(i); - UNIT_ASSERT_EQUAL(rr.consumer_name(), readRules[i].first); - UNIT_ASSERT_EQUAL(rr.service_type(), readRules[i].second); - } - }; - - TVector<std::pair<TString, TString>> readRules; - for (ui32 i = 0; i < 4; ++i) { - AddReadRuleRequest request; - AddReadRuleResponse response; - request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); - auto rr = request.mutable_read_rule(); - rr->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); - rr->set_consumer_name(TStringBuilder() << "acc/new_user" << i); - rr->set_service_type("MyGreatType"); - readRules.push_back({TStringBuilder() << "acc/new_user" << i, "MyGreatType"}); - - grpc::ClientContext rcontext; - auto status = pqStub->AddReadRule(&rcontext, request, &response); - Cerr << response << "\n"; - if (i < 3) { - UNIT_ASSERT(status.ok()); - checkDescribe(readRules); - } - if (i == 3) { - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::BAD_REQUEST); - } - } - { - AddReadRuleRequest request; - AddReadRuleResponse response; - request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); - auto rr = request.mutable_read_rule(); - rr->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); - rr->set_consumer_name(TStringBuilder() << "acc/new_user0"); - rr->set_service_type("MyGreatType"); - - grpc::ClientContext rcontext; - auto status = pqStub->AddReadRule(&rcontext, request, &response); - Cerr << response << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::ALREADY_EXISTS); - } - { - AlterTopicRequest request; - AlterTopicResponse response; - request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); - auto props = request.mutable_settings(); - props->set_partitions_count(1); - props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); - props->set_retention_period_ms(TDuration::Days(1).MilliSeconds()); - for(ui32 i = 0; i < 4; ++i) { - auto rr = props->add_read_rules(); - rr->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); - rr->set_consumer_name(TStringBuilder() << "acc/new_user" << i); - rr->set_service_type("MyGreatType"); - } - - grpc::ClientContext rcontext; - auto status = pqStub->AlterTopic(&rcontext, request, &response); - - UNIT_ASSERT(status.ok()); - CreateTopicResult res; - response.operation().result().UnpackTo(&res); - Cerr << response << "\n" << res << "\n"; - UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::BAD_REQUEST); - } - } - - + + Y_UNIT_TEST(ReadRuleServiceTypeLimit) { + TServerSettings settings = PQSettings(0); + { + auto type = settings.PQConfig.AddClientServiceType(); + type->SetName("MyGreatType"); + type->SetMaxReadRulesCountPerTopic(3); + } + NPersQueue::TTestServer server(settings); + server.EnableLogs({ NKikimrServices::PQ_READ_PROXY, NKikimrServices::BLACKBOX_VALIDATOR }); + + std::unique_ptr<Ydb::PersQueue::V1::PersQueueService::Stub> pqStub; + + { + std::shared_ptr<grpc::Channel> channel = grpc::CreateChannel("localhost:" + ToString(server.GrpcPort), grpc::InsecureChannelCredentials()); + pqStub = Ydb::PersQueue::V1::PersQueueService::NewStub(channel); + } + { + CreateTopicRequest request; + CreateTopicResponse response; + request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); + auto props = request.mutable_settings(); + props->set_partitions_count(1); + props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); + props->set_retention_period_ms(TDuration::Days(1).MilliSeconds()); + + grpc::ClientContext rcontext; + auto status = pqStub->CreateTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + CreateTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + } + auto checkDescribe = [&](const TVector<std::pair<TString, TString>>& readRules) { + DescribeTopicRequest request; + DescribeTopicResponse response; + request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); + grpc::ClientContext rcontext; + + auto status = pqStub->DescribeTopic(&rcontext, request, &response); + UNIT_ASSERT(status.ok()); + DescribeTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + + UNIT_ASSERT_VALUES_EQUAL(res.settings().read_rules().size(), readRules.size()); + for (ui64 i = 0; i < readRules.size(); ++i) { + const auto& rr = res.settings().read_rules(i); + UNIT_ASSERT_EQUAL(rr.consumer_name(), readRules[i].first); + UNIT_ASSERT_EQUAL(rr.service_type(), readRules[i].second); + } + }; + + TVector<std::pair<TString, TString>> readRules; + for (ui32 i = 0; i < 4; ++i) { + AddReadRuleRequest request; + AddReadRuleResponse response; + request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); + auto rr = request.mutable_read_rule(); + rr->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); + rr->set_consumer_name(TStringBuilder() << "acc/new_user" << i); + rr->set_service_type("MyGreatType"); + readRules.push_back({TStringBuilder() << "acc/new_user" << i, "MyGreatType"}); + + grpc::ClientContext rcontext; + auto status = pqStub->AddReadRule(&rcontext, request, &response); + Cerr << response << "\n"; + if (i < 3) { + UNIT_ASSERT(status.ok()); + checkDescribe(readRules); + } + if (i == 3) { + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::BAD_REQUEST); + } + } + { + AddReadRuleRequest request; + AddReadRuleResponse response; + request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); + auto rr = request.mutable_read_rule(); + rr->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); + rr->set_consumer_name(TStringBuilder() << "acc/new_user0"); + rr->set_service_type("MyGreatType"); + + grpc::ClientContext rcontext; + auto status = pqStub->AddReadRule(&rcontext, request, &response); + Cerr << response << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::ALREADY_EXISTS); + } + { + AlterTopicRequest request; + AlterTopicResponse response; + request.set_path("/Root/PQ/rt3.dc1--acc--some-topic"); + auto props = request.mutable_settings(); + props->set_partitions_count(1); + props->set_supported_format(Ydb::PersQueue::V1::TopicSettings::FORMAT_BASE); + props->set_retention_period_ms(TDuration::Days(1).MilliSeconds()); + for(ui32 i = 0; i < 4; ++i) { + auto rr = props->add_read_rules(); + rr->set_supported_format(Ydb::PersQueue::V1::TopicSettings::Format(1)); + rr->set_consumer_name(TStringBuilder() << "acc/new_user" << i); + rr->set_service_type("MyGreatType"); + } + + grpc::ClientContext rcontext; + auto status = pqStub->AlterTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + CreateTopicResult res; + response.operation().result().UnpackTo(&res); + Cerr << response << "\n" << res << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::BAD_REQUEST); + } + } + + Y_UNIT_TEST(ReadRuleDisallowDefaultServiceType) { TServerSettings settings = PQSettings(0); { @@ -3132,7 +3132,7 @@ namespace { UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::BAD_REQUEST); } checkDescribe({{"acc/consumer1", "MyGreatType"}}); - + { AlterTopicRequest request; AlterTopicResponse response; @@ -3329,7 +3329,7 @@ namespace { ); } } - + Y_UNIT_TEST(ReadRuleServiceTypeMigrationWithDisallowDefault) { TServerSettings settings = PQSettings(0); { @@ -3357,7 +3357,7 @@ namespace { std::shared_ptr<grpc::Channel> channel = grpc::CreateChannel("localhost:" + ToString(server.GrpcPort), grpc::InsecureChannelCredentials()); pqStub = Ydb::PersQueue::V1::PersQueueService::NewStub(channel); } - + auto doAlter = [&]( const TString& topic, const TVector<std::pair<TString, TString>>& readRules, @@ -3532,5 +3532,5 @@ namespace { ); } } -} -} +} +} diff --git a/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h b/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h index 80bc89e33b..2933d1f3a1 100644 --- a/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h +++ b/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h @@ -63,9 +63,9 @@ namespace NKikimr::NPersQueueTests { Server->AnnoyingClient->CreateTopicNoLegacy("/Root/PQ/acc/topic1", 1); } else { Cerr << "=== Will create legacy-style topics\n"; - Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--acc--topic2dc", 1); - Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc2--acc--topic2dc", 1, true, false); - Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--topic1", 1); + Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--acc--topic2dc", 1); + Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc2--acc--topic2dc", 1, true, false); + Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--topic1", 1); Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--acc--topic1", 1); Server->WaitInit("topic1"); Sleep(TDuration::Seconds(10)); @@ -74,9 +74,9 @@ namespace NKikimr::NPersQueueTests { InsecureChannel = grpc::CreateChannel("localhost:" + ToString(Server->GrpcPort), grpc::InsecureChannelCredentials()); ServiceStub = Ydb::PersQueue::V1::PersQueueService::NewStub(InsecureChannel); InitializeWritePQService(TenantModeEnabled() ? "Root/acc/topic1" : "topic1"); - - NYdb::TDriverConfig driverCfg; - driverCfg.SetEndpoint(TStringBuilder() << "localhost:" << Server->GrpcPort).SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)).SetDatabase("/Root"); + + NYdb::TDriverConfig driverCfg; + driverCfg.SetEndpoint(TStringBuilder() << "localhost:" << Server->GrpcPort).SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)).SetDatabase("/Root"); YdbDriver.reset(new NYdb::TDriver(driverCfg)); PersQueueClient = MakeHolder<NYdb::NPersQueue::TPersQueueClient>(*YdbDriver); } @@ -161,9 +161,9 @@ namespace NKikimr::NPersQueueTests { TSimpleSharedPtr<TPortManager> PortManager; std::shared_ptr<grpc::Channel> InsecureChannel; std::unique_ptr<Ydb::PersQueue::V1::PersQueueService::Stub> ServiceStub; - + std::shared_ptr<NYdb::TDriver> YdbDriver; - THolder<NYdb::NPersQueue::TPersQueueClient> PersQueueClient; + THolder<NYdb::NPersQueue::TPersQueueClient> PersQueueClient; }; class TPersQueueV1TestServer : public TPersQueueV1TestServerBase { @@ -250,7 +250,7 @@ namespace NKikimr::NPersQueueTests { } } } -/* +/* THolder<IProducer> StartProducer(const TString& topicPath, bool compress = false) { TString fullPath = TenantModeEnabled() ? "/Root/PQ/" + topicPath : topicPath; TProducerSettings producerSettings; @@ -263,7 +263,7 @@ namespace NKikimr::NPersQueueTests { UNIT_ASSERT_EQUAL_C(Ydb::StatusIds::SUCCESS, startResult.GetValueSync().Response.status(), "Response: " << startResult.GetValueSync().Response); return producer; } -*/ +*/ }; } diff --git a/ydb/services/persqueue_v1/ut/pq_data_writer.h b/ydb/services/persqueue_v1/ut/pq_data_writer.h index 09d201abbd..caef894060 100644 --- a/ydb/services/persqueue_v1/ut/pq_data_writer.h +++ b/ydb/services/persqueue_v1/ut/pq_data_writer.h @@ -192,37 +192,37 @@ public: } } - ui32 InitSession(const TString& sourceId, ui32 pg, bool success, ui32 step = 0) { - Ydb::PersQueue::V1::StreamingWriteClientMessage req; - Ydb::PersQueue::V1::StreamingWriteServerMessage resp; - - grpc::ClientContext context; - - auto stream = StubP_->StreamingWrite(&context); - - UNIT_ASSERT(stream); - req.mutable_init_request()->set_topic("topic1"); - req.mutable_init_request()->set_message_group_id(sourceId); - req.mutable_init_request()->set_partition_group_id(pg); - - UNIT_ASSERT(stream->Write(req)); - UNIT_ASSERT(stream->Read(&resp)); - Cerr << "Init result: " << resp << "\n"; - if (!success) { - UNIT_ASSERT(resp.status() != Ydb::StatusIds::SUCCESS); - return 0; - } else { - if (resp.status() != Ydb::StatusIds::SUCCESS && step < 5) { - Sleep(TDuration::MilliSeconds(100)); - return InitSession(sourceId, pg, success, step + 1); - } - UNIT_ASSERT(resp.status() == Ydb::StatusIds::SUCCESS); - - return resp.init_response().partition_id(); - } - return 0; - } - + ui32 InitSession(const TString& sourceId, ui32 pg, bool success, ui32 step = 0) { + Ydb::PersQueue::V1::StreamingWriteClientMessage req; + Ydb::PersQueue::V1::StreamingWriteServerMessage resp; + + grpc::ClientContext context; + + auto stream = StubP_->StreamingWrite(&context); + + UNIT_ASSERT(stream); + req.mutable_init_request()->set_topic("topic1"); + req.mutable_init_request()->set_message_group_id(sourceId); + req.mutable_init_request()->set_partition_group_id(pg); + + UNIT_ASSERT(stream->Write(req)); + UNIT_ASSERT(stream->Read(&resp)); + Cerr << "Init result: " << resp << "\n"; + if (!success) { + UNIT_ASSERT(resp.status() != Ydb::StatusIds::SUCCESS); + return 0; + } else { + if (resp.status() != Ydb::StatusIds::SUCCESS && step < 5) { + Sleep(TDuration::MilliSeconds(100)); + return InitSession(sourceId, pg, success, step + 1); + } + UNIT_ASSERT(resp.status() == Ydb::StatusIds::SUCCESS); + + return resp.init_response().partition_id(); + } + return 0; + } + ui32 Write(const TString& topic, const TVector<TString>& data, bool error = false, const TMaybe<TString>& ticket = {}) { return WriteImpl(topic, {data}, error, ticket); } diff --git a/ydb/services/persqueue_v1/ut/rate_limiter_test_setup.cpp b/ydb/services/persqueue_v1/ut/rate_limiter_test_setup.cpp index 89e63de32a..779d60e778 100644 --- a/ydb/services/persqueue_v1/ut/rate_limiter_test_setup.cpp +++ b/ydb/services/persqueue_v1/ut/rate_limiter_test_setup.cpp @@ -93,19 +93,19 @@ void TRateLimiterTestSetup::CreateQuotaResources(const TString& path, const TStr } } -/* +/* THolder<Ydb::PersQueue::IProducer> TRateLimiterTestSetup::StartProducer(const TString& topicPath, bool compress) { Ydb::PersQueue::TProducerSettings producerSettings; producerSettings.Server = Ydb::PersQueue::TServerSetting("localhost", Server->GrpcPort); producerSettings.Topic = topicPath; producerSettings.SourceId = "TRateLimiterTestSetupSourceId"; - producerSettings.Codec = compress ? "gzip" : "raw"; + producerSettings.Codec = compress ? "gzip" : "raw"; THolder<Ydb::PersQueue::IProducer> producer = PQLib->CreateProducer(producerSettings); auto startResult = producer->Start(); UNIT_ASSERT_EQUAL_C(Ydb::StatusIds::SUCCESS, startResult.GetValueSync().Response.status(), "Response: " << startResult.GetValueSync().Response); return producer; } -*/ +*/ void TRateLimiterTestSetup::Start(bool enableReadQuoting) { InitServer(enableReadQuoting); diff --git a/ydb/services/persqueue_v1/ut/rate_limiter_test_setup.h b/ydb/services/persqueue_v1/ut/rate_limiter_test_setup.h index 11e4d1645f..01529818e1 100644 --- a/ydb/services/persqueue_v1/ut/rate_limiter_test_setup.h +++ b/ydb/services/persqueue_v1/ut/rate_limiter_test_setup.h @@ -19,7 +19,7 @@ public: void CreateConsumer(const TString& path); // namespace NPersQueue = Ydb::PersQueue; -// THolder<Ydb::PersQueue::IProducer> StartProducer(const TString& topicPath, bool compress = false); +// THolder<Ydb::PersQueue::IProducer> StartProducer(const TString& topicPath, bool compress = false); // Getters ui16 GetGrpcPort() const { diff --git a/ydb/services/persqueue_v1/ut/test_utils.h b/ydb/services/persqueue_v1/ut/test_utils.h index 86156afb06..c02308b0d7 100644 --- a/ydb/services/persqueue_v1/ut/test_utils.h +++ b/ydb/services/persqueue_v1/ut/test_utils.h @@ -13,8 +13,8 @@ static constexpr int DEBUG_LOG_LEVEL = 7; -using namespace NKikimr::NPersQueueTests; - +using namespace NKikimr::NPersQueueTests; + inline void WaitACLModification() { // TODO: Tests are flacky without sleep after ModifyACL. Can we cleanly await modified ACLs without random waits? Or at least poll for ACL changes. Sleep(TDuration::Seconds(5)); diff --git a/ydb/services/persqueue_v1/ut/ya.make b/ydb/services/persqueue_v1/ut/ya.make index 726edd1e5d..70c088383d 100644 --- a/ydb/services/persqueue_v1/ut/ya.make +++ b/ydb/services/persqueue_v1/ut/ya.make @@ -1,5 +1,5 @@ UNITTEST_FOR(ydb/services/persqueue_v1) - + OWNER( alexnick g:kikimr @@ -9,30 +9,30 @@ OWNER( CFLAGS( -DACTORLIB_HUGE_PB_SIZE ) - -FORK_SUBTESTS() + +FORK_SUBTESTS() IF (SANITIZER_TYPE OR WITH_VALGRIND) - TIMEOUT(1800) - SIZE(LARGE) + TIMEOUT(1800) + SIZE(LARGE) TAG(ya:fat) REQUIREMENTS(ram:32) -ELSE() - TIMEOUT(600) - SIZE(MEDIUM) -ENDIF() - -SRCS( - persqueue_ut.cpp +ELSE() + TIMEOUT(600) + SIZE(MEDIUM) +ENDIF() + +SRCS( + persqueue_ut.cpp persqueue_common_ut.cpp test_utils.h pq_data_writer.h api_test_setup.h rate_limiter_test_setup.h rate_limiter_test_setup.cpp -) - -PEERDIR( +) + +PEERDIR( library/cpp/getopt library/cpp/svnversion ydb/core/testlib @@ -43,8 +43,8 @@ PEERDIR( ydb/public/sdk/cpp/client/ydb_persqueue_public ydb/public/sdk/cpp/client/ydb_table ydb/services/persqueue_v1 -) - +) + YQL_LAST_ABI_VERSION() -END() +END() diff --git a/ydb/services/persqueue_v1/ya.make b/ydb/services/persqueue_v1/ya.make index 2b803181ab..ade0406fcc 100644 --- a/ydb/services/persqueue_v1/ya.make +++ b/ydb/services/persqueue_v1/ya.make @@ -1,31 +1,31 @@ -LIBRARY() - -OWNER( - alexnick +LIBRARY() + +OWNER( + alexnick g:kikimr g:logbroker -) - -SRCS( - grpc_pq_actor.h +) + +SRCS( + grpc_pq_actor.h grpc_pq_codecs.cpp - grpc_pq_read_actor.cpp - grpc_pq_read.cpp - grpc_pq_read.h - grpc_pq_schema.cpp - grpc_pq_schema.h - grpc_pq_write_actor.cpp - grpc_pq_write.cpp - grpc_pq_write.h - persqueue.cpp - persqueue.h + grpc_pq_read_actor.cpp + grpc_pq_read.cpp + grpc_pq_read.h + grpc_pq_schema.cpp + grpc_pq_schema.h + grpc_pq_write_actor.cpp + grpc_pq_write.cpp + grpc_pq_write.h + persqueue.cpp + persqueue.h persqueue_utils.cpp persqueue_utils.h -) - -PEERDIR( +) + +PEERDIR( library/cpp/actors/core - library/cpp/containers/disjoint_interval_tree + library/cpp/containers/disjoint_interval_tree library/cpp/grpc/server ydb/core/base ydb/core/grpc_services @@ -43,9 +43,9 @@ PEERDIR( ydb/public/api/protos ydb/services/lib/actors ydb/services/lib/sharding -) - -END() +) + +END() RECURSE_FOR_TESTS( ut diff --git a/ydb/tests/library/common/protobuf_ss.py b/ydb/tests/library/common/protobuf_ss.py index e700551d11..90b0638eb4 100644 --- a/ydb/tests/library/common/protobuf_ss.py +++ b/ydb/tests/library/common/protobuf_ss.py @@ -418,7 +418,7 @@ class CreateTopicRequest(AbstractTSchemeOperationRequest): assert isinstance(options, self.Options) if options.partitions_count is not None: - self._pers_queue.TotalGroupCount = options.partitions_count + self._pers_queue.TotalGroupCount = options.partitions_count if options.partition_per_table is not None: self._pers_queue.PartitionPerTablet = options.partition_per_table diff --git a/ydb/tests/library/matchers/scheme_ops.py b/ydb/tests/library/matchers/scheme_ops.py index dd6bffc3f0..2c84649c56 100644 --- a/ydb/tests/library/matchers/scheme_ops.py +++ b/ydb/tests/library/matchers/scheme_ops.py @@ -108,7 +108,7 @@ class Assertions(object): ), PersQueueGroup=has_properties( Name=name or anything(), - TotalGroupCount=total_partition_count or greater_than(0), + TotalGroupCount=total_partition_count or greater_than(0), PartitionPerTablet=partition_per_tablet or greater_than(0), AlterVersion=alter_version or greater_than(0), BalancerTabletID=balancer or greater_than(0), |