diff options
author | komels <komels@ydb.tech> | 2023-12-08 09:49:15 +0300 |
---|---|---|
committer | komels <komels@ydb.tech> | 2023-12-08 10:33:45 +0300 |
commit | c47f199e3a1629a3b0a82811bb2f95cba4b633d1 (patch) | |
tree | 777f81cf8b49dc3ade389acb169d76e250ad5b2d | |
parent | 58f95f0c7d92b1ec514c855893a4caf45fb430fe (diff) | |
download | ydb-c47f199e3a1629a3b0a82811bb2f95cba4b633d1.tar.gz |
Partition direct read
73 files changed, 4565 insertions, 405 deletions
diff --git a/.mapping.json b/.mapping.json index 32f3b89e9e..f6384adc5f 100644 --- a/.mapping.json +++ b/.mapping.json @@ -5824,6 +5824,13 @@ "ydb/core/persqueue/config/CMakeLists.linux-x86_64.txt":"", "ydb/core/persqueue/config/CMakeLists.txt":"", "ydb/core/persqueue/config/CMakeLists.windows-x86_64.txt":"", + "ydb/core/persqueue/dread_cache_service/CMakeLists.txt":"", + "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt":"", + "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt":"", + "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt":"", + "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt":"", + "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt":"", + "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt":"", "ydb/core/persqueue/events/CMakeLists.darwin-arm64.txt":"", "ydb/core/persqueue/events/CMakeLists.darwin-x86_64.txt":"", "ydb/core/persqueue/events/CMakeLists.linux-aarch64.txt":"", diff --git a/ydb/core/driver_lib/run/config.h b/ydb/core/driver_lib/run/config.h index 85fca864c7..f71205fecc 100644 --- a/ydb/core/driver_lib/run/config.h +++ b/ydb/core/driver_lib/run/config.h @@ -57,6 +57,7 @@ union TBasicKikimrServicesMask { bool EnablePersQueueClusterDiscovery:1; bool EnableNetClassifier:1; bool EnablePersQueueClusterTracker:1; + bool EnablePersQueueDirectReadCache:1; bool EnableSysViewService:1; bool EnableMeteringWriter:1; bool EnableAuditWriter:1; diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index ab88bc8bf0..615589b3bc 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -102,6 +102,7 @@ #include <ydb/core/node_whiteboard/node_whiteboard.h> #include <ydb/core/persqueue/cluster_tracker.h> +#include <ydb/core/persqueue/dread_cache_service/caching_service.h> #include <ydb/core/persqueue/pq.h> #include <ydb/core/persqueue/pq_l2_service.h> @@ -1954,6 +1955,19 @@ void TPersQueueClusterTrackerInitializer::InitializeServices(NActors::TActorSyst TActorSetupCmd(actor, TMailboxType::HTSwap, appData->UserPoolId))); } +// TPersQueueDirectReadCache + +TPersQueueDirectReadCacheInitializer::TPersQueueDirectReadCacheInitializer(const TKikimrRunConfig& runConfig) + : IKikimrServicesInitializer(runConfig) +{} + +void TPersQueueDirectReadCacheInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { + IActor* actor = NPQ::CreatePQDReadCacheService(appData->Counters); + setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>( + NPQ::MakePQDReadCacheServiceActorId(), + TActorSetupCmd(actor, TMailboxType::HTSwap, appData->UserPoolId))); +} + // TMemProfMonitorInitializer TMemProfMonitorInitializer::TMemProfMonitorInitializer(const TKikimrRunConfig& runConfig, TIntrusivePtr<TMemObserver> memObserver) diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.h b/ydb/core/driver_lib/run/kikimr_services_initializers.h index 4f3b622d8b..a15bd6c040 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.h +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.h @@ -355,6 +355,13 @@ public: void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; }; +class TPersQueueDirectReadCacheInitializer : public IKikimrServicesInitializer { +public: + TPersQueueDirectReadCacheInitializer(const TKikimrRunConfig& runConfig); + + void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; +}; + class TMemProfMonitorInitializer : public IKikimrServicesInitializer { TIntrusivePtr<TMemObserver> MemObserver; diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index 7476c90fcc..66bd3c186d 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -1449,6 +1449,10 @@ TIntrusivePtr<TServiceInitializersList> TKikimrRunner::CreateServiceInitializers sil->AddServiceInitializer(new TPersQueueClusterTrackerInitializer(runConfig)); } + if (serviceMask.EnablePersQueueDirectReadCache) { + sil->AddServiceInitializer(new TPersQueueDirectReadCacheInitializer(runConfig)); + } + if (serviceMask.EnableIcNodeCacheService) { sil->AddServiceInitializer(new TIcNodeCacheServiceInitializer(runConfig)); } diff --git a/ydb/core/grpc_services/base/base.h b/ydb/core/grpc_services/base/base.h index 010244c5ea..a0e5f479b7 100644 --- a/ydb/core/grpc_services/base/base.h +++ b/ydb/core/grpc_services/base/base.h @@ -103,6 +103,7 @@ struct TRpcServices { EvStreamPQMigrationRead, EvStreamTopicWrite, EvStreamTopicRead, + EvStreamTopicDirectRead, EvPQReadInfo, EvTopicCommitOffset, EvListOperations, diff --git a/ydb/core/grpc_services/grpc_request_proxy.cpp b/ydb/core/grpc_services/grpc_request_proxy.cpp index 4a1358c8db..27e6b0e321 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.cpp +++ b/ydb/core/grpc_services/grpc_request_proxy.cpp @@ -548,6 +548,7 @@ void TGRpcRequestProxyImpl::StateFunc(TAutoPtr<IEventHandle>& ev) { HFunc(TEvStreamPQMigrationReadRequest, PreHandle); HFunc(TEvStreamTopicWriteRequest, PreHandle); HFunc(TEvStreamTopicReadRequest, PreHandle); + HFunc(TEvStreamTopicDirectReadRequest, PreHandle); HFunc(TEvCommitOffsetRequest, PreHandle); HFunc(TEvPQReadInfoRequest, PreHandle); HFunc(TEvPQDropTopicRequest, PreHandle); diff --git a/ydb/core/grpc_services/grpc_request_proxy_handle_methods.h b/ydb/core/grpc_services/grpc_request_proxy_handle_methods.h index d8fe632d1c..155f344d7a 100644 --- a/ydb/core/grpc_services/grpc_request_proxy_handle_methods.h +++ b/ydb/core/grpc_services/grpc_request_proxy_handle_methods.h @@ -12,6 +12,7 @@ protected: static void Handle(TEvStreamPQMigrationReadRequest::TPtr& ev, const TActorContext& ctx); static void Handle(TEvStreamTopicWriteRequest::TPtr& ev, const TActorContext& ctx); static void Handle(TEvStreamTopicReadRequest::TPtr& ev, const TActorContext& ctx); + static void Handle(TEvStreamTopicDirectReadRequest::TPtr& ev, const TActorContext& ctx); static void Handle(TEvCommitOffsetRequest::TPtr& ev, const TActorContext& ctx); static void Handle(TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx); static void Handle(TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx); diff --git a/ydb/core/grpc_services/rpc_calls.cpp b/ydb/core/grpc_services/rpc_calls.cpp index 0bb41f28b6..766e4e05cc 100644 --- a/ydb/core/grpc_services/rpc_calls.cpp +++ b/ydb/core/grpc_services/rpc_calls.cpp @@ -37,6 +37,12 @@ void FillYdbStatus(Ydb::Topic::StreamReadMessage::FromServer& resp, const NYql:: } template <> +void FillYdbStatus(Ydb::Topic::StreamDirectReadMessage::FromServer& resp, const NYql::TIssues& issues, Ydb::StatusIds::StatusCode status) { + resp.set_status(status); + NYql::IssuesToMessage(issues, resp.mutable_issues()); +} + +template <> void FillYdbStatus(Draft::Dummy::PingResponse& resp, const NYql::TIssues& issues, Ydb::StatusIds::StatusCode status) { Y_UNUSED(resp); Y_UNUSED(issues); diff --git a/ydb/core/grpc_services/rpc_calls.h b/ydb/core/grpc_services/rpc_calls.h index cbeeaec420..d6e1e2ef6d 100644 --- a/ydb/core/grpc_services/rpc_calls.h +++ b/ydb/core/grpc_services/rpc_calls.h @@ -66,6 +66,7 @@ using TEvStreamPQWriteRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStre using TEvStreamPQMigrationReadRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStreamPQMigrationRead, Ydb::PersQueue::V1::MigrationStreamingReadClientMessage, Ydb::PersQueue::V1::MigrationStreamingReadServerMessage>; using TEvStreamTopicWriteRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStreamTopicWrite, Ydb::Topic::StreamWriteMessage::FromClient, Ydb::Topic::StreamWriteMessage::FromServer, TRateLimiterMode::RuManual>; using TEvStreamTopicReadRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStreamTopicRead, Ydb::Topic::StreamReadMessage::FromClient, Ydb::Topic::StreamReadMessage::FromServer, TRateLimiterMode::RuManual>; +using TEvStreamTopicDirectReadRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStreamTopicDirectRead, Ydb::Topic::StreamDirectReadMessage::FromClient, Ydb::Topic::StreamDirectReadMessage::FromServer, TRateLimiterMode::RuManual>; using TEvCommitOffsetRequest = TGRpcRequestWrapper<TRpcServices::EvTopicCommitOffset, Ydb::Topic::CommitOffsetRequest, Ydb::Topic::CommitOffsetResponse, true>; using TEvPQReadInfoRequest = TGRpcRequestWrapper<TRpcServices::EvPQReadInfo, Ydb::PersQueue::V1::ReadInfoRequest, Ydb::PersQueue::V1::ReadInfoResponse, true>; using TEvPQDropTopicRequest = TGRpcRequestValidationWrapper<TRpcServices::EvPQDropTopic, Ydb::PersQueue::V1::DropTopicRequest, Ydb::PersQueue::V1::DropTopicResponse, true>; diff --git a/ydb/core/persqueue/CMakeLists.darwin-arm64.txt b/ydb/core/persqueue/CMakeLists.darwin-arm64.txt index fcb42492da..6c3e709acf 100644 --- a/ydb/core/persqueue/CMakeLists.darwin-arm64.txt +++ b/ydb/core/persqueue/CMakeLists.darwin-arm64.txt @@ -8,6 +8,7 @@ add_subdirectory(codecs) add_subdirectory(config) +add_subdirectory(dread_cache_service) add_subdirectory(events) add_subdirectory(partition_key_range) add_subdirectory(ut) @@ -78,6 +79,7 @@ target_sources(ydb-core-persqueue PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp ) generate_enum_serilization(ydb-core-persqueue ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h diff --git a/ydb/core/persqueue/CMakeLists.darwin-x86_64.txt b/ydb/core/persqueue/CMakeLists.darwin-x86_64.txt index fcb42492da..6c3e709acf 100644 --- a/ydb/core/persqueue/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/persqueue/CMakeLists.darwin-x86_64.txt @@ -8,6 +8,7 @@ add_subdirectory(codecs) add_subdirectory(config) +add_subdirectory(dread_cache_service) add_subdirectory(events) add_subdirectory(partition_key_range) add_subdirectory(ut) @@ -78,6 +79,7 @@ target_sources(ydb-core-persqueue PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp ) generate_enum_serilization(ydb-core-persqueue ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h diff --git a/ydb/core/persqueue/CMakeLists.linux-aarch64.txt b/ydb/core/persqueue/CMakeLists.linux-aarch64.txt index efa9885d58..38253dbbae 100644 --- a/ydb/core/persqueue/CMakeLists.linux-aarch64.txt +++ b/ydb/core/persqueue/CMakeLists.linux-aarch64.txt @@ -8,6 +8,7 @@ add_subdirectory(codecs) add_subdirectory(config) +add_subdirectory(dread_cache_service) add_subdirectory(events) add_subdirectory(partition_key_range) add_subdirectory(ut) @@ -79,6 +80,7 @@ target_sources(ydb-core-persqueue PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp ) generate_enum_serilization(ydb-core-persqueue ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h diff --git a/ydb/core/persqueue/CMakeLists.linux-x86_64.txt b/ydb/core/persqueue/CMakeLists.linux-x86_64.txt index efa9885d58..38253dbbae 100644 --- a/ydb/core/persqueue/CMakeLists.linux-x86_64.txt +++ b/ydb/core/persqueue/CMakeLists.linux-x86_64.txt @@ -8,6 +8,7 @@ add_subdirectory(codecs) add_subdirectory(config) +add_subdirectory(dread_cache_service) add_subdirectory(events) add_subdirectory(partition_key_range) add_subdirectory(ut) @@ -79,6 +80,7 @@ target_sources(ydb-core-persqueue PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp ) generate_enum_serilization(ydb-core-persqueue ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h diff --git a/ydb/core/persqueue/CMakeLists.windows-x86_64.txt b/ydb/core/persqueue/CMakeLists.windows-x86_64.txt index fcb42492da..6c3e709acf 100644 --- a/ydb/core/persqueue/CMakeLists.windows-x86_64.txt +++ b/ydb/core/persqueue/CMakeLists.windows-x86_64.txt @@ -8,6 +8,7 @@ add_subdirectory(codecs) add_subdirectory(config) +add_subdirectory(dread_cache_service) add_subdirectory(events) add_subdirectory(partition_key_range) add_subdirectory(ut) @@ -78,6 +79,7 @@ target_sources(ydb-core-persqueue PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp ) generate_enum_serilization(ydb-core-persqueue ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h diff --git a/ydb/core/persqueue/dread_cache_service/CMakeLists.txt b/ydb/core/persqueue/dread_cache_service/CMakeLists.txt new file mode 100644 index 0000000000..1703b0a27b --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) diff --git a/ydb/core/persqueue/dread_cache_service/caching_service.cpp b/ydb/core/persqueue/dread_cache_service/caching_service.cpp new file mode 100644 index 0000000000..d9d20a6dbf --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/caching_service.cpp @@ -0,0 +1,546 @@ +#include "caching_service.h" + +#include <ydb/public/api/protos/persqueue_error_codes_v1.pb.h> +#include <ydb/public/api/protos/ydb_topic.pb.h> +#include <ydb/public/lib/base/msgbus_status.h> +#include <ydb/core/persqueue/key.h> +#include <ydb/core/persqueue/writer/source_id_encoding.h> +#include <ydb/core/persqueue/write_meta.h> +#include <ydb/core/protos/grpc_pq_old.pb.h> +#include <ydb/services/persqueue_v1/actors/events.h> +#include <ydb/services/persqueue_v1/actors/persqueue_utils.h> +#include <ydb/library/actors/core/actor_bootstrapped.h> +#include <contrib/libs/protobuf/src/google/protobuf/util/time_util.h> + +namespace NKikimr::NPQ { +using namespace NActors; +using namespace Ydb::Topic; +using namespace NGRpcProxy::V1; + + +i32 GetDataChunkCodec(const NKikimrPQClient::TDataChunk& proto) { + if (proto.HasCodec()) { + return proto.GetCodec() + 1; + } + return 0; +} + + +class TPQDirectReadCacheService : public TActorBootstrapped<TPQDirectReadCacheService> { +public: + TPQDirectReadCacheService(const ::NMonitoring::TDynamicCounterPtr& counters) + : Counters(counters) + { + + } + + void Bootstrap(const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "Direct read cache created"); + + Become(&TThis::StateWork); + Y_UNUSED(ctx); + //Y_ABORT_UNLESS(Counters); + } + + STRICT_STFUNC(StateWork, + hFunc(TEvPQ::TEvPublishDirectRead, HandlePublish) + hFunc(TEvPQ::TEvStageDirectReadData, HandleFetchData) + hFunc(TEvPQ::TEvForgetDirectRead, HandleForget) + hFunc(TEvPQ::TEvRegisterDirectReadSession, HandleRegister) + hFunc(TEvPQ::TEvDeregisterDirectReadSession, HandleDeregister) + hFunc(TEvPQ::TEvGetFullDirectReadData, HandleGetData) + hFunc(TEvPQProxy::TEvDirectReadDataSessionConnected, HandleCreateClientSession) + hFunc(TEvPQProxy::TEvDirectReadDataSessionDead, HandleDestroyClientSession) + ) + +private: + using TSessionsMap = THashMap<TReadSessionKey, TCacheServiceData>; + + void HandleCreateClientSession(TEvPQProxy::TEvDirectReadDataSessionConnected::TPtr& ev) { + const auto& ctx = ActorContext(); + auto key = MakeSessionKey(ev->Get()); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << "Direct read cache: client session connected with id '" << key.SessionId << "'"); + auto sessionIter = ServerSessions.find(key); + if (sessionIter.IsEnd()) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << "Direct read cache: unknown session id '" << key.SessionId << "', close session"); + CloseSession(ev->Sender, Ydb::PersQueue::ErrorCode::ErrorCode::BAD_REQUEST, "Unknown session"); + return; + } + if (sessionIter->second.Generation != ev->Get()->Generation) { + ctx.Send( + sessionIter->second.Client->ProxyId, + new TEvPQProxy::TEvDirectReadDestroyPartitionSession(key, Ydb::PersQueue::ErrorCode::ErrorCode::ERROR, "Generation mismatch") + ); + return; + } + + sessionIter->second.Client = TCacheClientContext{ev->Sender, ev->Get()->StartingReadId}; + AssignByProxy[ev->Sender].insert(key.PartitionSessionId); + while(SendNextReadToClient(sessionIter)) { + // Empty + } + } + + void HandleDestroyClientSession(TEvPQProxy::TEvDirectReadDataSessionDead::TPtr& ev) { + auto assignIter = AssignByProxy.find(ev->Sender); + if (assignIter.IsEnd()) + return; + for (auto id : assignIter->second) { + return DestroyClientSession(ServerSessions.find( + TReadSessionKey{ev->Get()->Session, id}), false, + Ydb::PersQueue::ErrorCode::ErrorCode::OK, "", ev->Sender + ); + } + } + + void HandleRegister(TEvPQ::TEvRegisterDirectReadSession::TPtr& ev) { + const auto& key = ev->Get()->Session; + RegisterServerSession(key, ev->Get()->Generation); + } + + void HandleDeregister(TEvPQ::TEvDeregisterDirectReadSession::TPtr& ev) { + const auto& key = ev->Get()->Session; + const auto& ctx = ActorContext(); + + auto destroyDone = DestroyServerSession(ServerSessions.find(key), ev->Get()->Generation); + if (destroyDone) { + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: server session deregistered: " << key.SessionId + ); + } else { + LOG_WARN_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: attempted to deregister unknown server session: " << key.SessionId + << ":" << key.PartitionSessionId << " with generation " << ev->Get()->Generation << ", ignored" + ); + return; + } + } + + void HandleFetchData(TEvPQ::TEvStageDirectReadData::TPtr& ev) { + const auto& ctx = ActorContext(); + auto sessionKey = MakeSessionKey(ev->Get()); + auto sessionIter = ServerSessions.find(sessionKey); + if (sessionIter.IsEnd()) { + LOG_ERROR_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: tried to stage direct read for unregistered session: " + << sessionKey.SessionId << ":" << sessionKey.PartitionSessionId + ); + return; + } + if (sessionIter->second.Generation != ev->Get()->TabletGeneration) { + LOG_ALERT_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: tried to stage direct read for session " << sessionKey.SessionId + << " with generation " << ev->Get()->TabletGeneration << ", previously had this session with generation " + << sessionIter->second.Generation << ". Data ignored" + ); + return; + } + auto ins = sessionIter->second.StagedReads.insert(std::make_pair(ev->Get()->ReadKey.ReadId, ev->Get()->Response)); + if (!ins.second) { + LOG_WARN_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: tried to stage duplicate direct read for session " << sessionKey.SessionId << " with id " + << ev->Get()->ReadKey.ReadId << ", new data ignored" + ); + return; + } + ChangeCounterValue("StagedReadDataSize", ins.first->second->ByteSize(), false); + ChangeCounterValue("StagedReadsCount", 1, false); + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: staged direct read id " << ev->Get()->ReadKey.ReadId << " for session: " + << sessionKey.SessionId + ); + } + + void HandlePublish(TEvPQ::TEvPublishDirectRead::TPtr& ev) { + const auto& ctx = ActorContext(); + auto key = MakeSessionKey(ev->Get()); + const auto readId = ev->Get()->ReadKey.ReadId; + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << "Direct read cache: publish read: " << readId << " for session " << key.SessionId); + auto iter = ServerSessions.find(key); + if (iter.IsEnd()) { + LOG_ERROR_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: attempt to publish read for unknow session " << key.SessionId << " ignored" + ); + return; + } + + const auto& generation = ev->Get()->TabletGeneration; + if (iter->second.Generation != generation) + return; + + auto stagedIter = iter->second.StagedReads.find(readId); + if (stagedIter == iter->second.StagedReads.end()) { + LOG_ERROR_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: attempt to publish unknown read id " << readId << " from session: " + << key.SessionId << " ignored"); + return; + } + auto inserted = iter->second.Reads.insert(std::make_pair(ev->Get()->ReadKey.ReadId, stagedIter->second)).second; + if (inserted) { + ChangeCounterValue("PublishedReadDataSize", stagedIter->second->ByteSize(), false); + ChangeCounterValue("PublishedReadsCount", 1, false); + } + ChangeCounterValue("StagedReadDataSize", -stagedIter->second->ByteSize(), false); + ChangeCounterValue("StagedReadsCount", -1, false); + + iter->second.StagedReads.erase(stagedIter); + + SendNextReadToClient(iter); + } + + void HandleForget(TEvPQ::TEvForgetDirectRead::TPtr& ev) { + const auto& ctx = ActorContext(); + auto key = MakeSessionKey(ev->Get()); + auto iter = ServerSessions.find(key); + if (iter.IsEnd()) { + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: attempt to forget read for unknown session: " + << ev->Get()->ReadKey.SessionId << " ignored" + ); + return; + } + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: forget read: " << ev->Get()->ReadKey.ReadId << " for session " + << key.SessionId + ); + + const auto& generation = ev->Get()->TabletGeneration; + if (iter->second.Generation != generation) { // Stale generation in event, ignore it + return; + } + auto readIter = iter->second.Reads.find(ev->Get()->ReadKey.ReadId); + if (readIter != iter->second.Reads.end()) { + ChangeCounterValue("PublishedReadDataSize", -readIter->second->ByteSize(), false); + ChangeCounterValue("PublishedReadsCount", -1, false); + + iter->second.Reads.erase(readIter); + } + auto stagedIter = iter->second.StagedReads.find(ev->Get()->ReadKey.ReadId); + if (stagedIter != iter->second.StagedReads.end()) { + ChangeCounterValue("StagedReadDataSize", -stagedIter->second->ByteSize(), false); + ChangeCounterValue("StagedReadsCount", -1, false); + iter->second.StagedReads.erase(stagedIter); + } + iter->second.StagedReads.erase(ev->Get()->ReadKey.ReadId); + } + + void DestroyClientSession( + TSessionsMap::iterator sessionIter, bool doRespondToProxy, Ydb::PersQueue::ErrorCode::ErrorCode code, + const TString& reason, const TMaybe<TActorId>& proxyId = Nothing() + ) { + if (sessionIter.IsEnd() || !sessionIter->second.Client.Defined()) + return; + auto& client = sessionIter->second.Client.GetRef(); + if (proxyId.Defined() && *proxyId != client.ProxyId) + return; + + if (doRespondToProxy) { + DestroyPartitionSession(sessionIter, code, reason); + } + auto assignIter = AssignByProxy.find(sessionIter->second.Client->ProxyId); + if (!assignIter.IsEnd()) { + assignIter->second.erase(sessionIter->first.PartitionSessionId); + } + sessionIter->second.Client = Nothing(); + } + + [[nodiscard]] bool DestroyServerSession(TSessionsMap::iterator sessionIter, ui64 generation) { + if (sessionIter.IsEnd() || sessionIter->second.Generation > generation) + return false; + DestroyPartitionSession(sessionIter, Ydb::PersQueue::ErrorCode::READ_ERROR_NO_SESSION, "Closed by server"); + ServerSessions.erase(sessionIter); + ChangeCounterValue("ActiveServerSessions", ServerSessions.size(), true); + return true; + } + + void RegisterServerSession(const TReadSessionKey& key, ui32 generation) { + const auto& ctx = ActorContext(); + auto sessionsIter = ServerSessions.find(key); + if (sessionsIter.IsEnd()) { + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: registered server session: " << key.SessionId + << ":" << key.PartitionSessionId << " with generation " << generation + ); + ServerSessions.insert(std::make_pair(key, TCacheServiceData{generation})); + } else if (sessionsIter->second.Generation == generation) { + LOG_WARN_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: attempted to register duplicate server session: " << key.SessionId + << ":" << key.PartitionSessionId << " with same generation " << generation << ", ignored" + ); + } else if (DestroyServerSession(sessionsIter, generation)) { + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: registered server session: " << key.SessionId + << ":" << key.PartitionSessionId << " with generation " << generation + << ", killed existing session with older generation " + ); + ServerSessions.insert(std::make_pair(key, TCacheServiceData{generation})); + } else { + LOG_INFO_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << "Direct read cache: attempted to register server session: " << key.SessionId + << ":" << key.PartitionSessionId << " with stale generation " << generation << ", ignored" + ); + } + ChangeCounterValue("ActiveServerSessions", ServerSessions.size(), true); + } + + template<class TEv> + const TReadSessionKey MakeSessionKey(TEv* ev) { + return TReadSessionKey{ev->ReadKey.SessionId, ev->ReadKey.PartitionSessionId}; + } + + void HandleGetData(TEvPQ::TEvGetFullDirectReadData::TPtr& ev) { + auto* response = new TEvPQ::TEvGetFullDirectReadData(); + auto& data = response->Data; + auto key = MakeSessionKey(ev->Get()); + + if (key.SessionId.Empty()) { + for (const auto& [k,v] : ServerSessions) { + data.emplace_back(k, v); + } + } else { + auto iter = ServerSessions.find(key); + if (iter.IsEnd()) { + response->Error = true; + } else if (ev->Get()->Generation == iter->second.Generation) { + data.emplace_back(key, iter->second); + } + } + ActorContext().Send(ev->Sender, response); + } + +private: + using TServerMessage = StreamDirectReadMessage::FromServer; + using TClientMessage = StreamDirectReadMessage::FromClient; + using IContext = NGRpcServer::IGRpcStreamingContext<TClientMessage, TServerMessage>; + + bool SendNextReadToClient(TSessionsMap::iterator& sessionIter) { + if (sessionIter.IsEnd() || !sessionIter->second.Client.Defined()) { + return false; + } + auto& client = sessionIter->second.Client.GetRef(); + auto nextData = sessionIter->second.Reads.lower_bound(client.NextReadId); + if (nextData == sessionIter->second.Reads.end()) { + return false; + } + auto result = SendData(sessionIter->first.PartitionSessionId, client, nextData->first, nextData->second); + if (!result) { + //ToDo: for discuss. Error in parsing partition response - shall we kill the entire session or just the partition session? + DestroyClientSession(sessionIter, false, Ydb::PersQueue::ErrorCode::OK, ""); + return false; + } + client.NextReadId = nextData->first + 1; + return true; + } + + [[nodiscard]] bool SendData( + ui64 partSessionId, TCacheClientContext& proxyClient, ui64 readId, const std::shared_ptr<NKikimrClient::TResponse>& response + ) { + const auto& ctx = ActorContext(); + auto message = std::make_shared<StreamDirectReadMessage::FromServer>(); + auto* directReadMessage = message->mutable_direct_read_response(); + directReadMessage->set_direct_read_id(readId); + directReadMessage->set_partition_session_id(partSessionId); + + auto ok = VaildatePartitionResponse(proxyClient, *response); + if (!ok) { + return false; + } + + FillBatchedData(directReadMessage->mutable_partition_data(), response->GetPartitionResponse().GetCmdReadResult(), + partSessionId); + message->set_status(Ydb::StatusIds::SUCCESS); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << "Direct read cache: send data to client. AssignId: " + << partSessionId << ", readId: " << readId); + + ctx.Send(proxyClient.ProxyId, new TEvPQProxy::TEvDirectReadSendClientData(std::move(message))); + return true; + } + + void CloseSession( + const TActorId& proxyId, + Ydb::PersQueue::ErrorCode::ErrorCode code, + const TString& reason + ) { + const auto& ctx = ActorContext(); + ctx.Send(proxyId, new TEvPQProxy::TEvDirectReadCloseSession(code, reason)); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << " Direct read cache: close session for proxy " << proxyId.ToString()); + + } + + bool DestroyPartitionSession( + TSessionsMap::iterator sessionIter, Ydb::PersQueue::ErrorCode::ErrorCode code, const TString& reason + ) { + if (sessionIter.IsEnd() || !sessionIter->second.Client.Defined()) { + return false; + } + + const auto& ctx = ActorContext(); + ctx.Send( + sessionIter->second.Client->ProxyId, new TEvPQProxy::TEvDirectReadDestroyPartitionSession(sessionIter->first, code, reason) + ); + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + TStringBuilder() << " Direct read cache: close session for proxy " + << sessionIter->second.Client->ProxyId.ToString() + ); + return true; + } + + void ChangeCounterValue(const TString& name, i64 value, bool isAbs) { + if (!Counters) + return; + auto counter = Counters->GetCounter(name, false); + if (isAbs) + counter->Set(value); + else if (value >= 0) + counter->Add(value); + else + counter->Sub(-value); + } + + bool VaildatePartitionResponse( + TCacheClientContext& proxyClient, NKikimrClient::TResponse& response + ) { + if (response.HasErrorCode() && response.GetErrorCode() != NPersQueue::NErrorCode::OK) { + CloseSession( + proxyClient.ProxyId, + NGRpcProxy::V1::ConvertOldCode(response.GetErrorCode()), + "Status is not ok: " + response.GetErrorReason() + ); + return false; + } + + if (response.GetStatus() != NKikimr::NMsgBusProxy::MSTATUS_OK) { //this is incorrect answer, die + CloseSession( + proxyClient.ProxyId, + Ydb::PersQueue::ErrorCode::ERROR, + "Status is not ok: " + response.GetErrorReason() + ); + return false; + } + if (!response.HasPartitionResponse()) { //this is incorrect answer, die + CloseSession( + proxyClient.ProxyId, + Ydb::PersQueue::ErrorCode::ERROR, + "Direct read cache got empty partition response" + ); + return false; + } + + const auto& partResponse = response.GetPartitionResponse(); + if (!partResponse.HasCmdReadResult()) { //this is incorrect answer, die + CloseSession( + proxyClient.ProxyId, + Ydb::PersQueue::ErrorCode::ERROR, + "Malformed response from partition" + ); + return false; + } + return true; + } + + void FillBatchedData(auto* partitionData, const NKikimrClient::TCmdReadResult& res, ui64 assignId) { + partitionData->set_partition_session_id(assignId); + + i32 batchCodec = 0; // UNSPECIFIED + + StreamReadMessage::ReadResponse::Batch* currentBatch = nullptr; + for (ui32 i = 0; i < res.ResultSize(); ++i) { + const auto& r = res.GetResult(i); + + auto proto(GetDeserializedData(r.GetData())); + if (proto.GetChunkType() != NKikimrPQClient::TDataChunk::REGULAR) { + continue; //TODO - no such chunks must be on prod + } + + TString sourceId; + if (!r.GetSourceId().empty()) { + sourceId = NPQ::NSourceIdEncoding::Decode(r.GetSourceId()); + } + + i64 currBatchWrittenAt = currentBatch ? ::google::protobuf::util::TimeUtil::TimestampToMilliseconds(currentBatch->written_at()) : 0; + if (currentBatch == nullptr || currBatchWrittenAt != static_cast<i64>(r.GetWriteTimestampMS()) || + currentBatch->producer_id() != sourceId || + GetDataChunkCodec(proto) != batchCodec + ) { + // If write time and source id are the same, the rest fields will be the same too. + currentBatch = partitionData->add_batches(); + i64 write_ts = static_cast<i64>(r.GetWriteTimestampMS()); + Y_ABORT_UNLESS(write_ts >= 0); + *currentBatch->mutable_written_at() = ::google::protobuf::util::TimeUtil::MillisecondsToTimestamp(write_ts); + currentBatch->set_producer_id(std::move(sourceId)); + batchCodec = GetDataChunkCodec(proto); + currentBatch->set_codec(batchCodec); + + if (proto.HasMeta()) { + const auto& header = proto.GetMeta(); + if (header.HasServer()) { + (*currentBatch->mutable_write_session_meta())["server"] = header.GetServer(); + } + if (header.HasFile()) { + (*currentBatch->mutable_write_session_meta())["file"] = header.GetFile(); + } + if (header.HasIdent()) { + (*currentBatch->mutable_write_session_meta())["ident"] = header.GetIdent(); + } + if (header.HasLogType()) { + (*currentBatch->mutable_write_session_meta())["logtype"] = header.GetLogType(); + } + } + if (proto.HasExtraFields()) { + const auto& map = proto.GetExtraFields(); + for (const auto& kv : map.GetItems()) { + (*currentBatch->mutable_write_session_meta())[kv.GetKey()] = kv.GetValue(); + } + } + + if (proto.HasIp() && IsUtf(proto.GetIp())) { + (*currentBatch->mutable_write_session_meta())["_ip"] = proto.GetIp(); + } + } + + auto* message = currentBatch->add_message_data(); + + message->set_seq_no(r.GetSeqNo()); + message->set_offset(r.GetOffset()); + message->set_data(proto.GetData()); + message->set_uncompressed_size(r.GetUncompressedSize()); + + *message->mutable_created_at() = + ::google::protobuf::util::TimeUtil::MillisecondsToTimestamp(r.GetCreateTimestampMS()); + + message->set_message_group_id(currentBatch->producer_id()); + auto* msgMeta = message->mutable_metadata_items(); + *msgMeta = (proto.GetMessageMeta()); + } + } +private: + TSessionsMap ServerSessions; + THashMap<TActorId, TSet<ui64>> AssignByProxy; + + ::NMonitoring::TDynamicCounterPtr Counters; +}; + + +IActor* CreatePQDReadCacheService(const ::NMonitoring::TDynamicCounterPtr& counters) { + Y_VERIFY_DEBUG(counters); + return new TPQDirectReadCacheService( + GetServiceCounters(counters, "persqueue")->GetSubgroup("subsystem", "caching_service")); +} + +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/dread_cache_service/caching_service.h b/ydb/core/persqueue/dread_cache_service/caching_service.h new file mode 100644 index 0000000000..8ae61876c5 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/caching_service.h @@ -0,0 +1,14 @@ +#pragma once + +#include <ydb/library/actors/core/actor.h> +#include <ydb/core/persqueue/events/internal.h> + +namespace NKikimr::NPQ { + +inline NActors::TActorId MakePQDReadCacheServiceActorId() { + return NActors::TActorId(0, "PQCacheProxy"); +} + +IActor* CreatePQDReadCacheService(const ::NMonitoring::TDynamicCounterPtr& counters); + +} // namespace diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt new file mode 100644 index 0000000000..a8a2d23b42 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt @@ -0,0 +1,79 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-core-persqueue-dread_cache_service-ut) +target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue +) +target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC + contrib-libs-cxxsupp + yutil + cpp-testing-unittest_main + ydb-core-persqueue + persqueue-ut-common + core-testlib-default + ydb_persqueue_core-ut-ut_utils +) +target_link_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp +) +set_property( + TARGET + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + SPLIT_FACTOR + 10 +) +add_yunittest( + NAME + ydb-core-persqueue-dread_cache_service-ut + TEST_TARGET + ydb-core-persqueue-dread_cache_service-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + TIMEOUT + 60 +) +target_allocator(ydb-core-persqueue-dread_cache_service-ut + system_allocator +) +vcs_info(ydb-core-persqueue-dread_cache_service-ut) diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..4809c9a471 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,80 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-core-persqueue-dread_cache_service-ut) +target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue +) +target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-core-persqueue + persqueue-ut-common + core-testlib-default + ydb_persqueue_core-ut-ut_utils +) +target_link_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp +) +set_property( + TARGET + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + SPLIT_FACTOR + 10 +) +add_yunittest( + NAME + ydb-core-persqueue-dread_cache_service-ut + TEST_TARGET + ydb-core-persqueue-dread_cache_service-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + TIMEOUT + 60 +) +target_allocator(ydb-core-persqueue-dread_cache_service-ut + system_allocator +) +vcs_info(ydb-core-persqueue-dread_cache_service-ut) diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..6fba9d2680 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt @@ -0,0 +1,83 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-core-persqueue-dread_cache_service-ut) +target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue +) +target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-testing-unittest_main + ydb-core-persqueue + persqueue-ut-common + core-testlib-default + ydb_persqueue_core-ut-ut_utils +) +target_link_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp +) +set_property( + TARGET + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + SPLIT_FACTOR + 10 +) +add_yunittest( + NAME + ydb-core-persqueue-dread_cache_service-ut + TEST_TARGET + ydb-core-persqueue-dread_cache_service-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + TIMEOUT + 60 +) +target_allocator(ydb-core-persqueue-dread_cache_service-ut + cpp-malloc-jemalloc +) +vcs_info(ydb-core-persqueue-dread_cache_service-ut) diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..a8c5e30700 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt @@ -0,0 +1,85 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-core-persqueue-dread_cache_service-ut) +target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue +) +target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-core-persqueue + persqueue-ut-common + core-testlib-default + ydb_persqueue_core-ut-ut_utils +) +target_link_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp +) +set_property( + TARGET + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + SPLIT_FACTOR + 10 +) +add_yunittest( + NAME + ydb-core-persqueue-dread_cache_service-ut + TEST_TARGET + ydb-core-persqueue-dread_cache_service-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + TIMEOUT + 60 +) +target_allocator(ydb-core-persqueue-dread_cache_service-ut + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache +) +vcs_info(ydb-core-persqueue-dread_cache_service-ut) diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt new file mode 100644 index 0000000000..d863ebd180 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") + include(CMakeLists.darwin-arm64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +endif() diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..1c348b2635 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt @@ -0,0 +1,73 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-core-persqueue-dread_cache_service-ut) +target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue +) +target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-core-persqueue + persqueue-ut-common + core-testlib-default + ydb_persqueue_core-ut-ut_utils +) +target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp +) +set_property( + TARGET + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + SPLIT_FACTOR + 10 +) +add_yunittest( + NAME + ydb-core-persqueue-dread_cache_service-ut + TEST_TARGET + ydb-core-persqueue-dread_cache_service-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-core-persqueue-dread_cache_service-ut + PROPERTY + TIMEOUT + 60 +) +target_allocator(ydb-core-persqueue-dread_cache_service-ut + system_allocator +) +vcs_info(ydb-core-persqueue-dread_cache_service-ut) diff --git a/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp b/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp new file mode 100644 index 0000000000..0280e72592 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp @@ -0,0 +1,237 @@ +#include <ydb/core/persqueue/dread_cache_service/caching_service.h> +#include <ydb/core/persqueue/ut/common/pq_ut_common.h> +#include <library/cpp/testing/unittest/registar.h> + +namespace NKikimr::NPQ { + +Y_UNIT_TEST_SUITE(TPQCachingProxyTest) { +struct TTestSetup { + TTestContext Context; + TActorId ProxyId; + TTestSetup() { + Context.Prepare(); + Context.Runtime->SetLogPriority(NKikimrServices::PQ_READ_PROXY, NLog::PRI_DEBUG); + ProxyId = Context.Runtime->Register(CreatePQDReadCacheService(new NMonitoring::TDynamicCounters())); + Context.Runtime->AllocateEdgeActor(); + TDispatchOptions opts; + opts.FinalEvents.emplace_back(TEvents::TEvBootstrap::EventType, 1); + Context.Runtime->DispatchEvents(opts); + } + auto* GetRuntime() { + return Context.Runtime.Get(); + } + THolder<TEvPQ::TEvGetFullDirectReadData> SendRequest(TEvPQ::TEvGetFullDirectReadData* request, bool status = true) { + GetRuntime()->Send(ProxyId, Context.Edge, request); + auto resp = GetRuntime()->GrabEdgeEvent<TEvPQ::TEvGetFullDirectReadData>(); + UNIT_ASSERT(resp); + UNIT_ASSERT(resp->Error != status); + return resp; + } +}; + +Y_UNIT_TEST(TestPublishAndForget) { + TTestSetup setup; + auto runtime = setup.GetRuntime(); + auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT(resp->Data.empty()); + + { + auto* reg = new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1)); + UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 1); + UNIT_ASSERT(resp->Data[0].second.Reads.empty()); + { + auto* reg = new TEvPQ::TEvStageDirectReadData( + {"session1", 1, 1}, 1, std::make_shared<NKikimrClient::TResponse>() + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + { + auto* reg = new TEvPQ::TEvPublishDirectRead( + {"session1", 1, 1}, + 1 + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1)); + UNIT_ASSERT(!resp->Error); + UNIT_ASSERT_VALUES_EQUAL(resp->Data[0].second.Reads.size(), 1); + { + auto* reg = new TEvPQ::TEvForgetDirectRead( + {"session1", 1, 1}, 1 + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1)); + UNIT_ASSERT_VALUES_EQUAL(resp->Data[0].second.Reads.size(), 0); +} + +Y_UNIT_TEST(TestDeregister) { + TTestSetup setup; + auto runtime = setup.GetRuntime(); + { + auto* reg = new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + { + auto* reg = new TEvPQ::TEvRegisterDirectReadSession({"session2", 1}, 1); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData( + {"session1", 1}, 1) + ); + UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 1); + UNIT_ASSERT(resp->Data[0].second.Reads.empty()); + resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 2); + { + auto* reg = new TEvPQ::TEvDeregisterDirectReadSession({"session1", 1}, 1); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 1); +} + +Y_UNIT_TEST(TestWrongSessionOrGeneration) { + TTestSetup setup; + auto runtime = setup.GetRuntime(); + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 2) + ); + { + auto* reg = new TEvPQ::TEvStageDirectReadData( + {"session1", 1, 1}, 2, std::make_shared<NKikimrClient::TResponse>() + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvPublishDirectRead({"session1", 1, 1}, 2) + ); + + // Session with old id, shold not have any effect + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1) + ); + { + auto* reg = new TEvPQ::TEvStageDirectReadData( + {"session1", 1, 1}, 1, std::make_shared<NKikimrClient::TResponse>() + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvPublishDirectRead({"session1", 1, 1}, 1) + ); + + auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1)); + UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 0); + + // Forget with old generation, should have no effect + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvForgetDirectRead({"session1", 1, 1}, 1) + ); + + resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 2)); + UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(resp->Data[0].second.Reads.size(), 1); + + resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session-2", 1}, 2), false); + resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 99}, 2), false); +} + +Y_UNIT_TEST(OutdatedSession) { + TTestSetup setup; + auto runtime = setup.GetRuntime(); + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1) + ); + { + auto* reg = new TEvPQ::TEvStageDirectReadData( + {"session1", 1, 1}, 1, std::make_shared<NKikimrClient::TResponse>() + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvPublishDirectRead({"session1", 1, 1}, 1) + ); + + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 2) + ); + + auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1)); + UNIT_ASSERT(resp->Data.empty()); +} + + +Y_UNIT_TEST(MultipleSessions) { + TTestSetup setup; + auto runtime = setup.GetRuntime(); + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1) + ); + { + auto* reg = new TEvPQ::TEvStageDirectReadData( + {"session1", 1, 1}, 1, std::make_shared<NKikimrClient::TResponse>() + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + { + auto* reg = new TEvPQ::TEvStageDirectReadData( + {"session1", 1, 2}, 1, std::make_shared<NKikimrClient::TResponse>() + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvPublishDirectRead({"session1", 1, 1}, 1) + ); + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvPublishDirectRead({"session1", 1, 2}, 1) + ); + + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvRegisterDirectReadSession({"session2", 1}, 2) + ); + { + auto* reg = new TEvPQ::TEvStageDirectReadData( + {"session2", 1, 3}, 2, std::make_shared<NKikimrClient::TResponse>() + ); + runtime->Send(setup.ProxyId, TActorId{}, reg); + } + runtime->Send( + setup.ProxyId, TActorId{}, + new TEvPQ::TEvPublishDirectRead({"session2", 1, 3}, 2) + ); + + auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 2); + for (const auto& [key, data] : resp->Data) { + if (key.SessionId == "session1") { + UNIT_ASSERT_VALUES_EQUAL(data.Generation, 1); + UNIT_ASSERT_VALUES_EQUAL(data.Reads.size(), 2); + auto iter = data.Reads.begin(); + UNIT_ASSERT_VALUES_EQUAL(iter->first, 1); + UNIT_ASSERT_VALUES_EQUAL((++iter)->first, 2); + }else if (key.SessionId == "session2") { + UNIT_ASSERT_VALUES_EQUAL(data.Generation, 2); + UNIT_ASSERT_VALUES_EQUAL(data.Reads.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(data.Reads.begin()->first, 3); + } + } +} +} // Test suite + +} //namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/dread_cache_service/ut/ya.make b/ydb/core/persqueue/dread_cache_service/ut/ya.make new file mode 100644 index 0000000000..58f1a6f991 --- /dev/null +++ b/ydb/core/persqueue/dread_cache_service/ut/ya.make @@ -0,0 +1,29 @@ +UNITTEST_FOR(ydb/core/persqueue) + +FORK_SUBTESTS() + +IF (SANITIZER_TYPE == "thread" OR WITH_VALGRIND) + SIZE(LARGE) + TAG(ya:fat) + TIMEOUT(300) +ELSE() + SIZE(MEDIUM) + TIMEOUT(60) +ENDIF() + +PEERDIR( + ydb/core/persqueue/ut/common + ydb/core/testlib/default + ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils +) + +YQL_LAST_ABI_VERSION() + +SRCS( + caching_proxy_ut.cpp +) + +# RESOURCE( +# ) + +END() diff --git a/ydb/core/persqueue/events/internal.h b/ydb/core/persqueue/events/internal.h index bfcce7a817..428f3e9b36 100644 --- a/ydb/core/persqueue/events/internal.h +++ b/ydb/core/persqueue/events/internal.h @@ -11,7 +11,8 @@ #include <ydb/library/actors/core/event_local.h> #include <ydb/library/actors/core/actorid.h> - +#include <ydb/core/grpc_services/rpc_calls.h> +#include <ydb/public/api/protos/persqueue_error_codes_v1.pb.h> #include <util/generic/maybe.h> namespace NYdb { @@ -22,6 +23,24 @@ namespace NKikimr { namespace NPQ { + struct TCacheClientContext { + TActorId ProxyId; + ui64 NextReadId = 1; + }; + + struct TCacheServiceData { + //ui32 TabletId; + ui32 Generation = 0; + TMap<ui64, std::shared_ptr<NKikimrClient::TResponse>> StagedReads; + TMap<ui64, std::shared_ptr<NKikimrClient::TResponse>> Reads; + TMaybe<TCacheClientContext> Client; + TCacheServiceData() = delete; + + TCacheServiceData(ui32 generation) + : Generation(generation) + {} + }; + struct TRequestedBlob { ui64 Offset; ui16 PartNo; @@ -68,7 +87,7 @@ namespace NPQ { inline bool HasError(const T& event) { return event.Error.HasError(); } -} +} // namespace NPQ; struct TEvPQ { enum EEv { @@ -140,6 +159,16 @@ struct TEvPQ { EvFetchResponse, EvSourceIdRequest, EvSourceIdResponse, + EvPublishRead, + EvForgetRead, + EvRegisterDirectReadSession, + EvRegisterDirectReadSessionResponse, + EvDeregisterDirectReadSession, + EvStageDirectReadData, + EvCacheProxyPublishRead, + EvCacheProxyForgetRead, + EvGetFullDirectReadData, + EvProvideDirectReadInfo, EvEnd }; @@ -196,22 +225,24 @@ struct TEvPQ { }; struct TEvRead : public TEventLocal<TEvRead, EvRead> { - TEvRead(const ui64 cookie, const ui64 offset, const ui16 partNo, const ui32 count, + TEvRead(const ui64 cookie, const ui64 offset, ui64 lastOffset, const ui16 partNo, const ui32 count, const TString& sessionId, const TString& clientId, const ui32 timeout, const ui32 size, const ui32 maxTimeLagMs, const ui64 readTimestampMs, const TString& clientDC, - bool externalOperation) - : Cookie(cookie) - , Offset(offset) - , PartNo(partNo) - , Count(count) - , SessionId(sessionId) - , ClientId(clientId) - , Timeout(timeout) - , Size(size) - , MaxTimeLagMs(maxTimeLagMs) - , ReadTimestampMs(readTimestampMs) - , ClientDC(clientDC) - , ExternalOperation(externalOperation) + bool externalOperation, const TActorId& pipeClient) + : Cookie(cookie) + , Offset(offset) + , PartNo(partNo) + , Count(count) + , SessionId(sessionId) + , ClientId(clientId) + , Timeout(timeout) + , Size(size) + , MaxTimeLagMs(maxTimeLagMs) + , ReadTimestampMs(readTimestampMs) + , ClientDC(clientDC) + , ExternalOperation(externalOperation) + , PipeClient(pipeClient) + , LastOffset(lastOffset) {} ui64 Cookie; @@ -226,6 +257,19 @@ struct TEvPQ { ui64 ReadTimestampMs; TString ClientDC; bool ExternalOperation; + TActorId PipeClient; + ui64 LastOffset; + }; + + struct TEvDirectReadBase { + TEvDirectReadBase(ui64 cookie, const NPQ::TDirectReadKey& readKey, const TActorId& pipeClient) + : Cookie(cookie) + , ReadKey(readKey) + , PipeClient(pipeClient) + {} + ui64 Cookie; + NPQ::TDirectReadKey ReadKey; + TActorId PipeClient; }; struct TEvMonRequest : public TEventLocal<TEvMonRequest, EvMonRequest> { @@ -270,18 +314,20 @@ struct TEvPQ { ESCI_DROP_READ_RULE }; - TEvSetClientInfo(const ui64 cookie, const TString& clientId, const ui64 offset, const TString& sessionId, - const ui32 generation, const ui32 step, ESetClientInfoType type = ESCI_OFFSET, - ui64 readRuleGeneration = 0, bool strict = false) + TEvSetClientInfo(const ui64 cookie, const TString& clientId, const ui64 offset, const TString& sessionId, const ui64 partitionSessionId, + const ui32 generation, const ui32 step, const TActorId& pipeClient, + ESetClientInfoType type = ESCI_OFFSET, ui64 readRuleGeneration = 0, bool strict = false) : Cookie(cookie) , ClientId(clientId) , Offset(offset) , SessionId(sessionId) + , PartitionSessionId(partitionSessionId) , Generation(generation) , Step(step) , Type(type) , ReadRuleGeneration(readRuleGeneration) , Strict(strict) + , PipeClient(pipeClient) { } @@ -289,13 +335,16 @@ struct TEvPQ { TString ClientId; ui64 Offset; TString SessionId; + ui64 PartitionSessionId; ui32 Generation; ui32 Step; ESetClientInfoType Type; ui64 ReadRuleGeneration; bool Strict; + TActorId PipeClient; }; + struct TEvGetClientOffset : public TEventLocal<TEvGetClientOffset, EvGetClientOffset> { TEvGetClientOffset(const ui64 cookie, const TString& clientId) : Cookie(cookie) @@ -365,10 +414,11 @@ struct TEvPQ { struct TEvProxyResponse : public TEventLocal<TEvProxyResponse, EvProxyResponse> { TEvProxyResponse(ui64 cookie) - : Cookie(cookie) + : Cookie(cookie) + , Response(std::make_shared<NKikimrClient::TResponse>()) {} ui64 Cookie; - NKikimrClient::TResponse Response; + std::shared_ptr<NKikimrClient::TResponse> Response; }; struct TEvInitComplete : public TEventLocal<TEvInitComplete, EvInitComplete> { @@ -458,8 +508,8 @@ struct TEvPQ { struct TEvPipeDisconnected : public TEventLocal<TEvPipeDisconnected, EvPipeDisconnected> { explicit TEvPipeDisconnected(const TString& owner, const TActorId& pipeClient) - : Owner(owner) - , PipeClient(pipeClient) + : Owner(owner) + , PipeClient(pipeClient) {} TString Owner; @@ -874,6 +924,71 @@ struct TEvPQ { struct TEvSourceIdResponse : public TEventPB<TEvSourceIdResponse, NKikimrPQ::TEvSourceIdResponse, EvSourceIdResponse> { }; + + struct TEvRegisterDirectReadSession : public TEventLocal<TEvRegisterDirectReadSession, EvRegisterDirectReadSession> { + TEvRegisterDirectReadSession(const NPQ::TReadSessionKey& sessionKey, ui32 tabletGeneration) + : Session(sessionKey) + , Generation(tabletGeneration) + {} + NPQ::TReadSessionKey Session; + ui32 Generation; + }; + + struct TEvDeregisterDirectReadSession : public TEventLocal<TEvDeregisterDirectReadSession, EvDeregisterDirectReadSession> { + TEvDeregisterDirectReadSession(const NPQ::TReadSessionKey& sessionKey, ui32 tabletGeneration) + : Session(sessionKey) + , Generation(tabletGeneration) + {} + NPQ::TReadSessionKey Session; + ui32 Generation; + }; + + struct TEvStageDirectReadData : public TEventLocal<TEvStageDirectReadData, EvStageDirectReadData> { + TEvStageDirectReadData(const NPQ::TDirectReadKey& readKey, ui32 tabletGeneration, + const std::shared_ptr<NKikimrClient::TResponse>& response) + : TabletGeneration(tabletGeneration) + , ReadKey(readKey) + , Response(response) + {} + ui32 TabletGeneration; + NPQ::TDirectReadKey ReadKey; + std::shared_ptr<NKikimrClient::TResponse> Response; + }; + + struct TEvPublishDirectRead : public TEventLocal<TEvPublishDirectRead, EvCacheProxyPublishRead> { + TEvPublishDirectRead(const NPQ::TDirectReadKey& readKey, ui32 tabletGeneration) + : ReadKey(readKey) + , TabletGeneration(tabletGeneration) + {} + NPQ::TDirectReadKey ReadKey; + ui32 TabletGeneration; + }; + + struct TEvForgetDirectRead : public TEventLocal<TEvForgetDirectRead, EvCacheProxyForgetRead> { + TEvForgetDirectRead(const NPQ::TDirectReadKey& readKey, ui32 tabletGeneration) + : TabletGeneration(tabletGeneration) + , ReadKey(readKey) + {} + ui32 TabletGeneration; + NPQ::TDirectReadKey ReadKey; + }; + + struct TEvGetFullDirectReadData : public TEventLocal<TEvGetFullDirectReadData, EvGetFullDirectReadData> { + TEvGetFullDirectReadData() = default; + TEvGetFullDirectReadData(const NPQ::TReadSessionKey& key, ui32 generation) + : ReadKey(key) + , Generation(generation) + {} + + NPQ::TReadSessionKey ReadKey; + ui32 Generation; + bool Error = false; + TVector<std::pair<NPQ::TReadSessionKey, NPQ::TCacheServiceData>> Data; + }; + + struct TEvProvideDirectReadInfo : public TEventLocal<TEvProvideDirectReadInfo, EvProvideDirectReadInfo> { + }; + }; } //NKikimr diff --git a/ydb/core/persqueue/key.h b/ydb/core/persqueue/key.h index f1aa0fd506..4be6be821a 100644 --- a/ydb/core/persqueue/key.h +++ b/ydb/core/persqueue/key.h @@ -3,6 +3,7 @@ #include <util/generic/buffer.h> #include <util/string/cast.h> #include <util/string/printf.h> +#include <util/str_stl.h> namespace NKikimr { namespace NPQ { @@ -252,5 +253,34 @@ TString GetTxKey(ui64 txId) return Sprintf("tx_%" PRIu64, txId); } + +struct TReadSessionKey { + TString SessionId; + ui64 PartitionSessionId = 0; + bool operator ==(const TReadSessionKey& rhs) const { + return SessionId == rhs.SessionId && PartitionSessionId == rhs.PartitionSessionId; + } +}; + +struct TDirectReadKey { + TString SessionId; + ui64 PartitionSessionId = 0; + ui64 ReadId = 0; + bool operator ==(const TDirectReadKey& rhs) const { + return SessionId == rhs.SessionId && PartitionSessionId == rhs.PartitionSessionId && ReadId == rhs.ReadId; + } +}; + }// NPQ }// NKikimr + +template <> +struct THash<NKikimr::NPQ::TReadSessionKey> { +public: + inline size_t operator()(const NKikimr::NPQ::TReadSessionKey& key) const { + size_t res = 0; + res += THash<TString>()(key.SessionId); + res += THash<ui64>()(key.PartitionSessionId); + return res; + } +}; diff --git a/ydb/core/persqueue/partition.cpp b/ydb/core/persqueue/partition.cpp index 1fc027d1b0..7d814dd3f2 100644 --- a/ydb/core/persqueue/partition.cpp +++ b/ydb/core/persqueue/partition.cpp @@ -137,13 +137,14 @@ void AddCheckDiskRequest(TEvKeyValue::TEvRequest *request, ui32 numChannels) { } } -TPartition::TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, const TActorId& blobCache, +TPartition::TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, ui32 tabletGeneration, const TActorId& blobCache, const NPersQueue::TTopicConverterPtr& topicConverter, TString dcId, bool isServerless, const NKikimrPQ::TPQTabletConfig& tabletConfig, const TTabletCountersBase& counters, bool subDomainOutOfSpace, ui32 numChannels, bool newPartition, TVector<TTransaction> distrTxs) : Initializer(this) , TabletID(tabletId) + , TabletGeneration(tabletGeneration) , Partition(partition) , TabletConfig(tabletConfig) , Counters(counters) @@ -468,7 +469,6 @@ void TPartition::Handle(TEvents::TEvPoisonPill::TPtr&, const TActorContext& ctx) Die(ctx); } - bool CheckDiskStatus(const TStorageStatusFlags status) { return !status.Check(NKikimrBlobStorage::StatusDiskSpaceYellowStop); } @@ -581,6 +581,7 @@ void TPartition::Handle(TEvPQ::TEvPipeDisconnected::TPtr& ev, const TActorContex DropOwner(it, ctx); ProcessChangeOwnerRequests(ctx); } + } void TPartition::Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) { @@ -929,7 +930,7 @@ void TPartition::ProcessMaxSeqNoRequest(const TActorContext& ctx) { auto& ev = MaxSeqNoRequests.front(); auto response = MakeHolder<TEvPQ::TEvProxyResponse>(ev->Get()->Cookie); - NKikimrClient::TResponse& resp = response->Response; + NKikimrClient::TResponse& resp = *response->Response; resp.SetStatus(NMsgBusProxy::MSTATUS_OK); resp.SetErrorCode(NPersQueue::NErrorCode::OK); @@ -969,15 +970,18 @@ void TPartition::Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& c auto it = ReadInfo.find(cookie); Y_ABORT_UNLESS(it != ReadInfo.end()); + TReadInfo info = std::move(it->second); ReadInfo.erase(it); //make readinfo class + auto& userInfo = UsersInfoStorage->GetOrCreate(info.User, ctx); TReadAnswer answer(info.FormAnswer( - ctx, *ev->Get(), EndOffset, Partition, &UsersInfoStorage->GetOrCreate(info.User, ctx), + ctx, *ev->Get(), EndOffset, Partition, &userInfo, info.Destination, GetSizeLag(info.Offset), Tablet, Config.GetMeteringMode() )); - + const auto& resp = dynamic_cast<TEvPQ::TEvProxyResponse*>(answer.Event.Get())->Response; + if (HasError(*ev->Get())) { if (info.IsSubscription) { TabletCounters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_ERROR].Increment(1); @@ -988,10 +992,9 @@ void TPartition::Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& c if (info.IsSubscription) { TabletCounters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_OK].Increment(1); } - const auto& resp = dynamic_cast<TEvPQ::TEvProxyResponse*>(answer.Event.Get())->Response; TabletCounters.Cumulative()[COUNTER_PQ_READ_OK].Increment(1); TabletCounters.Percentile()[COUNTER_LATENCY_PQ_READ_OK].IncrementFor((ctx.Now() - info.Timestamp).MilliSeconds()); - TabletCounters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp.ByteSize()); + TabletCounters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp->ByteSize()); } ctx.Send(info.Destination != 0 ? Tablet : ctx.SelfID, answer.Event.Release()); OnReadRequestFinished(cookie, answer.Size, info.User, ctx); @@ -1693,7 +1696,7 @@ void TPartition::BeginChangePartitionConfig(const NKikimrPQ::TPQTabletConfig& co ui64 rrGen = i < config.ReadRuleGenerationsSize() ? config.GetReadRuleGenerations(i) : 0; if (userInfo.ReadRuleGeneration != rrGen) { - TEvPQ::TEvSetClientInfo act(0, consumer, 0, "", 0, 0, + TEvPQ::TEvSetClientInfo act(0, consumer, 0, "", 0, 0, 0, TActorId{}, TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE, rrGen); ProcessUserAct(act, ctx); @@ -1703,8 +1706,8 @@ void TPartition::BeginChangePartitionConfig(const NKikimrPQ::TPQTabletConfig& co for (auto& consumer : hasReadRule) { GetOrCreatePendingUser(consumer); - TEvPQ::TEvSetClientInfo act(0, consumer, - 0, "", 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0); + TEvPQ::TEvSetClientInfo act(0, consumer, 0, "", 0, 0, 0, TActorId{}, + TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0); ProcessUserAct(act, ctx); } @@ -1991,10 +1994,16 @@ void TPartition::ProcessUserAct(TEvPQ::TEvSetClientInfo& act, return; } - if (act.Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION && act.SessionId == userInfo.Session) { //this is retry of current request, answer ok - auto *ui = UsersInfoStorage->GetIfExists(userInfo.User); + if ( //this is retry of current request, answer ok + act.Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION + && act.SessionId == userInfo.Session + && act.Generation == userInfo.Generation + && act.Step == userInfo.Step + ) { + auto* ui = UsersInfoStorage->GetIfExists(userInfo.User); auto ts = ui ? GetTime(*ui, userInfo.Offset) : std::make_pair<TInstant, TInstant>(TInstant::Zero(), TInstant::Zero()); + userInfo.PipeClient = act.PipeClient; ScheduleReplyGetClientOffsetOk(act.Cookie, userInfo.Offset, ts.first, ts.second); @@ -2113,6 +2122,7 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act, userInfo.ReadRuleGeneration = readRuleGeneration; userInfo.Session = ""; + userInfo.PartitionSessionId = 0; userInfo.Generation = userInfo.Step = 0; userInfo.Offset = 0; @@ -2136,10 +2146,14 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act, userInfo.Session = session; userInfo.Generation = generation; userInfo.Step = step; - } else if (dropSession || strictCommitOffset) { + userInfo.PipeClient = act.PipeClient; + userInfo.PartitionSessionId = act.PartitionSessionId; + } else if ((dropSession && act.PipeClient == userInfo.PipeClient) || strictCommitOffset) { userInfo.Session = ""; + userInfo.PartitionSessionId = 0; userInfo.Generation = 0; userInfo.Step = 0; + userInfo.PipeClient = {}; } Y_ABORT_UNLESS(offset <= (ui64)Max<i64>(), "Unexpected Offset: %" PRIu64, offset); @@ -2293,7 +2307,8 @@ void TPartition::AddCmdWriteUserInfos(NKikimrClient::TKeyValueRequest& request) auto *ui = UsersInfoStorage->GetIfExists(user); AddCmdWrite(request, ikey, ikeyDeprecated, - userInfo->Offset, userInfo->Generation, userInfo->Step, userInfo->Session, + userInfo->Offset, userInfo->Generation, userInfo->Step, + userInfo->Session, ui ? ui->ReadOffsetRewindSum : 0, userInfo->ReadRuleGeneration); } else { @@ -2327,11 +2342,13 @@ TUserInfoBase& TPartition::GetOrCreatePendingUser(const TString& user, auto i = PendingUsersInfo.find(user); if (i == PendingUsersInfo.end()) { auto ui = UsersInfoStorage->GetIfExists(user); - auto [p, _] = PendingUsersInfo.emplace(user, UsersInfoStorage->CreateUserInfo(user, - readRuleGeneration)); + auto [p, _] = PendingUsersInfo.emplace(user, UsersInfoStorage->CreateUserInfo(user, readRuleGeneration)); if (ui) { p->second.Session = ui->Session; + p->second.PartitionSessionId = ui->PartitionSessionId; + p->second.PipeClient = ui->PipeClient; + p->second.Generation = ui->Generation; p->second.Step = ui->Step; p->second.Offset = ui->Offset; @@ -2362,7 +2379,7 @@ TUserInfoBase* TPartition::GetPendingUserIfExists(const TString& user) THolder<TEvPQ::TEvProxyResponse> TPartition::MakeReplyOk(const ui64 dst) { auto response = MakeHolder<TEvPQ::TEvProxyResponse>(dst); - NKikimrClient::TResponse& resp = response->Response; + NKikimrClient::TResponse& resp = *response->Response; resp.SetStatus(NMsgBusProxy::MSTATUS_OK); resp.SetErrorCode(NPersQueue::NErrorCode::OK); @@ -2375,7 +2392,7 @@ THolder<TEvPQ::TEvProxyResponse> TPartition::MakeReplyGetClientOffsetOk(const ui const TInstant writeTimestamp, const TInstant createTimestamp) { auto response = MakeHolder<TEvPQ::TEvProxyResponse>(dst); - NKikimrClient::TResponse& resp = response->Response; + NKikimrClient::TResponse& resp = *response->Response; resp.SetStatus(NMsgBusProxy::MSTATUS_OK); resp.SetErrorCode(NPersQueue::NErrorCode::OK); diff --git a/ydb/core/persqueue/partition.h b/ydb/core/persqueue/partition.h index ff7dda0150..914e330e7c 100644 --- a/ydb/core/persqueue/partition.h +++ b/ydb/core/persqueue/partition.h @@ -225,8 +225,11 @@ private: THashMap<TString, TOwnerInfo>::iterator DropOwner(THashMap<TString, TOwnerInfo>::iterator& it, const TActorContext& ctx); // will return rcount and rsize also - TVector<TRequestedBlob> GetReadRequestFromBody(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, ui32* rcount, ui32* rsize); - TVector<TClientBlob> GetReadRequestFromHead(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount, ui32* rsize, ui64* insideHeadOffset); + TVector<TRequestedBlob> GetReadRequestFromBody(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, + const ui32 maxSize, ui32* rcount, ui32* rsize, ui64 lastOffset); + TVector<TClientBlob> GetReadRequestFromHead(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, + const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount, + ui32* rsize, ui64* insideHeadOffset, ui64 lastOffset); ui64 GetUsedStorage(const TActorContext& ctx); @@ -335,16 +338,21 @@ private: void ChangePlanStepAndTxId(ui64 step, ui64 txId); void ResendPendingEvents(const TActorContext& ctx); + void SendReadPreparedProxyResponse(const TReadAnswer& answer, const TReadInfo& readInfo, TUserInfo& user); + + void CheckIfSessionExists(TUserInfoBase& userInfo, const TActorId& newPipe); + // void DestroyReadSession(const TReadSessionKey& key); void Handle(TEvPQ::TEvSourceIdRequest::TPtr& ev, const TActorContext& ctx); TString LogPrefix() const; + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PERSQUEUE_PARTITION_ACTOR; } - TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, const TActorId& blobCache, + TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, ui32 tabletGeneration, const TActorId& blobCache, const NPersQueue::TTopicConverterPtr& topicConverter, TString dcId, bool isServerless, const NKikimrPQ::TPQTabletConfig& config, const TTabletCountersBase& counters, bool SubDomainOutOfSpace, ui32 numChannels, bool newPartition = false, @@ -566,6 +574,7 @@ private: private: ui64 TabletID; + ui32 TabletGeneration; ui32 Partition; NKikimrPQ::TPQTabletConfig Config; NKikimrPQ::TPQTabletConfig TabletConfig; @@ -748,5 +757,5 @@ private: TDeque<std::unique_ptr<IEventBase>> PendingEvents; }; - } // namespace NKikimr::NPQ + diff --git a/ydb/core/persqueue/partition_read.cpp b/ydb/core/persqueue/partition_read.cpp index 136fd9b3ad..12d7877153 100644 --- a/ydb/core/persqueue/partition_read.cpp +++ b/ydb/core/persqueue/partition_read.cpp @@ -3,6 +3,7 @@ #include "partition_util.h" #include "partition.h" #include "read.h" +#include "dread_cache_service/caching_service.h" #include <ydb/core/base/appdata.h> #include <ydb/core/base/blobstorage.h> @@ -42,8 +43,9 @@ void TPartition::FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config userInfo.HasReadRule = true; ui64 rrGen = i < config.ReadRuleGenerationsSize() ? config.GetReadRuleGenerations(i) : 0; if (userInfo.ReadRuleGeneration != rrGen) { - THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(0, consumer, 0, "", 0, 0, - TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE, rrGen); + THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>( + 0, consumer, 0, "", 0, 0, 0, TActorId{}, TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE, rrGen + ); // // TODO(abcdef): заменить на вызов ProcessUserAct // @@ -66,8 +68,9 @@ void TPartition::FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config if (userInfo.NoConsumer) { continue; } - THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(0, consumer, - 0, "", 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0); + THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>( + 0, consumer, 0, "", 0, 0, 0, TActorId{}, TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0 + ); if (!userInfo.Important && userInfo.LabeledCounters) { ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCountersDrop(Partition, userInfo.LabeledCounters->GetGroup())); } @@ -184,7 +187,9 @@ void TPartition::InitUserInfoForImportantClients(const TActorContext& ctx) { continue; } if (!userInfo) { - userInfo = &UsersInfoStorage->Create(ctx, importantUser, 0, true, "", 0, 0, 0, 0, TInstant::Zero()); + userInfo = &UsersInfoStorage->Create( + ctx, importantUser, 0, true, "", 0, 0, 0, 0, 0, TInstant::Zero(), {} + ); } if (userInfo->Offset < (i64)StartOffset) userInfo->Offset = StartOffset; @@ -311,21 +316,21 @@ TReadAnswer TReadInfo::FormAnswer( const ui64 endOffset, const ui32 partition, TUserInfo* userInfo, - const ui64 cookie, + const ui64 destination, const ui64 sizeLag, const TActorId& tablet, const NKikimrPQ::TPQTabletConfig::EMeteringMode meteringMode ) { Y_UNUSED(meteringMode); Y_UNUSED(partition); - THolder<TEvPQ::TEvProxyResponse> answer = MakeHolder<TEvPQ::TEvProxyResponse>(cookie); - NKikimrClient::TResponse& res = answer->Response; + THolder<TEvPQ::TEvProxyResponse> answer = MakeHolder<TEvPQ::TEvProxyResponse>(destination); + NKikimrClient::TResponse& res = *answer->Response; const TEvPQ::TEvBlobResponse* response = &blobResponse; - if (HasError(blobResponse)) { + Error = true; return TReadAnswer{ blobResponse.Error.ErrorStr.size(), - MakeHolder<TEvPQ::TEvError>(blobResponse.Error.ErrorCode, blobResponse.Error.ErrorStr, cookie) + MakeHolder<TEvPQ::TEvError>(blobResponse.Error.ErrorCode, blobResponse.Error.ErrorStr, destination) }; } @@ -335,6 +340,7 @@ TReadAnswer TReadInfo::FormAnswer( readResult->SetWaitQuotaTimeMs(WaitQuotaTime.MilliSeconds()); readResult->SetMaxOffset(endOffset); readResult->SetRealReadOffset(Offset); + ui64 realReadOffset = Offset; readResult->SetReadFromTimestampMs(ReadTimestampMs); Y_ABORT_UNLESS(endOffset <= (ui64)Max<i64>(), "Max offset is too big: %" PRIu64, endOffset); @@ -380,12 +386,18 @@ TReadAnswer TReadInfo::FormAnswer( if (blobValue.empty()) { // this is ok. Means that someone requested too much data or retention race LOG_DEBUG(ctx, NKikimrServices::PERSQUEUE, "Not full answer here!"); - ui64 answerSize = answer->Response.ByteSize(); + ui64 answerSize = answer->Response->ByteSize(); if (userInfo && Destination != 0) { userInfo->ReadDone(ctx, ctx.Now(), answerSize, cnt, ClientDC, tablet, IsExternalRead); } readResult->SetSizeLag(sizeLag - size); + RealReadOffset = realReadOffset; + LastOffset = Offset - 1; + SizeEstimate = answerSize; + readResult->SetSizeEstimate(SizeEstimate); + readResult->SetLastOffset(LastOffset); + readResult->SetEndOffset(endOffset); return {answerSize, std::move(answer)}; } Y_ABORT_UNLESS(blobValue.size() == blobs[pos].Size, "value for offset %" PRIu64 " count %u size must be %u, but got %u", @@ -434,7 +446,6 @@ TReadAnswer TReadInfo::FormAnswer( } AddResultBlob(readResult, res, Offset); - if (res.IsLastPart()) { PartNo = 0; ++Offset; @@ -470,7 +481,6 @@ TReadAnswer TReadInfo::FormAnswer( ); } AddResultBlob(readResult, writeBlob, Offset); - if (writeBlob.IsLastPart()) { ++Offset; } @@ -480,13 +490,20 @@ TReadAnswer TReadInfo::FormAnswer( } } Y_ABORT_UNLESS(Offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, Offset); - ui64 answerSize = answer->Response.ByteSize(); + ui64 answerSize = answer->Response->ByteSize(); if (userInfo && Destination != 0) { userInfo->ReadDone(ctx, ctx.Now(), answerSize, cnt, ClientDC, tablet, IsExternalRead); } readResult->SetSizeLag(sizeLag - size); + RealReadOffset = realReadOffset; + LastOffset = Offset - 1; + SizeEstimate = answerSize; + readResult->SetSizeEstimate(SizeEstimate); + readResult->SetLastOffset(LastOffset); + readResult->SetEndOffset(endOffset); + return {answerSize, std::move(answer)}; } @@ -494,7 +511,9 @@ void TPartition::Handle(TEvPQ::TEvReadTimeout::TPtr& ev, const TActorContext& ct auto res = Subscriber.OnTimeout(ev); if (!res) return; - TReadAnswer answer(res->FormAnswer(ctx, res->Offset, Partition, nullptr, res->Destination, 0, Tablet, Config.GetMeteringMode())); + TReadAnswer answer(res->FormAnswer( + ctx, res->Offset, Partition, nullptr, res->Destination, 0, Tablet, Config.GetMeteringMode() + )); ctx.Send(Tablet, answer.Event.Release()); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, " waiting read cookie " << ev->Get()->Cookie << " partition " << Partition << " read timeout for " << res->User << " offset " << res->Offset); @@ -505,7 +524,9 @@ void TPartition::Handle(TEvPQ::TEvReadTimeout::TPtr& ev, const TActorContext& ct } -TVector<TRequestedBlob> TPartition::GetReadRequestFromBody(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, ui32* rcount, ui32* rsize) { +TVector<TRequestedBlob> TPartition::GetReadRequestFromBody( + const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, ui32* rcount, ui32* rsize, ui64 lastOffset +) { Y_ABORT_UNLESS(rcount && rsize); ui32& count = *rcount; ui32& size = *rsize; @@ -533,7 +554,10 @@ TVector<TRequestedBlob> TPartition::GetReadRequestFromBody(const ui64 startOffse cnt = it->Key.GetCount() - (startOffset - it->Key.GetOffset()); //don't count all elements from first blob sz = (cnt == it->Key.GetCount() ? it->Size : 0); //not readed client blobs can be of ~8Mb, so don't count this size at all } - while (it != DataKeysBody.end() && (size < maxSize && count < maxCount || count == 0)) { //count== 0 grants that blob with offset from ReadFromTimestamp will be readed + while (it != DataKeysBody.end() + && (size < maxSize && count < maxCount || count == 0) //count== 0 grants that blob with offset from ReadFromTimestamp will be readed + && (lastOffset == 0 || it->Key.GetOffset() < lastOffset) + ) { size += sz; count += cnt; TRequestedBlob reqBlob(it->Key.GetOffset(), it->Key.GetPartNo(), it->Key.GetCount(), @@ -550,7 +574,10 @@ TVector<TRequestedBlob> TPartition::GetReadRequestFromBody(const ui64 startOffse return blobs; } -TVector<TClientBlob> TPartition::GetReadRequestFromHead(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount, ui32* rsize, ui64* insideHeadOffset) { +TVector<TClientBlob> TPartition::GetReadRequestFromHead( + const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount, + ui32* rsize, ui64* insideHeadOffset, ui64 lastOffset +) { ui32& count = *rcount; ui32& size = *rsize; TVector<TClientBlob> res; @@ -581,7 +608,11 @@ TVector<TClientBlob> TPartition::GetReadRequestFromHead(const ui64 startOffset, } else { ++pno; } + if (lastOffset > 0 && offset >= lastOffset) + break; + if (skip) continue; + if (blobs[i].IsLastPart()) { bool messageSkippingBehaviour = AppData()->PQConfig.GetTopicsAreFirstClassCitizen() && readTimestampMs > blobs[i].WriteTimestamp.MilliSeconds(); @@ -650,13 +681,12 @@ void TPartition::Handle(TEvPQ::TEvRead::TPtr& ev, const TActorContext& ctx) { read->Offset << ", " << read->PartNo << " EndOffset " << EndOffset); return; } - const TString& user = read->ClientId; Y_ABORT_UNLESS(read->Offset <= EndOffset); auto& userInfo = UsersInfoStorage->GetOrCreate(user, ctx); - + if (!read->SessionId.empty() && !userInfo.NoConsumer) { if (userInfo.Session != read->SessionId) { TabletCounters.Cumulative()[COUNTER_PQ_READ_ERROR_NO_SESSION].Increment(1); @@ -694,7 +724,10 @@ void TPartition::DoRead(TEvPQ::TEvRead::TPtr ev, TDuration waitQuotaTime, const userInfo->ReadOffsetRewindSum += offset - read->Offset; } - TReadInfo info(user, read->ClientDC, offset, read->PartNo, read->Count, read->Size, read->Cookie, read->ReadTimestampMs, waitQuotaTime, read->ExternalOperation); + TReadInfo info( + user, read->ClientDC, offset, read->LastOffset, read->PartNo, read->Count, read->Size, read->Cookie, read->ReadTimestampMs, + waitQuotaTime, read->ExternalOperation, userInfo->PipeClient + ); ui64 cookie = Cookie++; @@ -703,8 +736,7 @@ void TPartition::DoRead(TEvPQ::TEvRead::TPtr ev, TDuration waitQuotaTime, const "read cookie " << cookie << " Topic '" << TopicConverter->GetClientsideName() << "' partition " << Partition << " user " << user << " offset " << read->Offset << " count " << read->Count << " size " << read->Size << " endOffset " << EndOffset - << " max time lag " << read->MaxTimeLagMs << "ms effective offset " << offset - ); + << " max time lag " << read->MaxTimeLagMs << "ms effective offset " << offset); if (offset == EndOffset) { @@ -791,10 +823,9 @@ void TPartition::ReadTimestampForOffset(const TString& user, TUserInfo& userInfo << " ReadingTimestamp " << ReadingTimestamp << " rrg " << ReadingForUserReadRuleGeneration ); - - THolder<TEvPQ::TEvRead> event = MakeHolder<TEvPQ::TEvRead>(0, userInfo.Offset, 0, 1, "", + THolder<TEvPQ::TEvRead> event = MakeHolder<TEvPQ::TEvRead>(0, userInfo.Offset, 0, 0, 1, "", user, 0, MAX_BLOB_PART_SIZE * 2, 0, 0, "", - false); + false, TActorId{}); ctx.Send(ctx.SelfID, event.Release()); TabletCounters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_CACHE_MISS].Increment(1); @@ -841,15 +872,16 @@ void TPartition::Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext& LOG_INFO_S( ctx, NKikimrServices::PERSQUEUE, - "Reading Timestamp failed for offset " << ReadingForOffset << " ( "<< userInfo->Offset << " ) " << ev->Get()->Response.DebugString() + "Reading Timestamp failed for offset " << ReadingForOffset << " ( "<< userInfo->Offset << " ) " + << ev->Get()->Response->DebugString() ); - if (ev->Get()->Response.GetStatus() == NMsgBusProxy::MSTATUS_OK && - ev->Get()->Response.GetErrorCode() == NPersQueue::NErrorCode::OK && - ev->Get()->Response.GetPartitionResponse().HasCmdReadResult() && - ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().ResultSize() > 0 && - (i64)ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().GetResult(0).GetOffset() >= userInfo->Offset) { + if (ev->Get()->Response->GetStatus() == NMsgBusProxy::MSTATUS_OK && + ev->Get()->Response->GetErrorCode() == NPersQueue::NErrorCode::OK && + ev->Get()->Response->GetPartitionResponse().HasCmdReadResult() && + ev->Get()->Response->GetPartitionResponse().GetCmdReadResult().ResultSize() > 0 && + (i64)ev->Get()->Response->GetPartitionResponse().GetCmdReadResult().GetResult(0).GetOffset() >= userInfo->Offset) { //offsets is inside gap - return timestamp of first record after gap - const auto& res = ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().GetResult(0); + const auto& res = ev->Get()->Response->GetPartitionResponse().GetCmdReadResult().GetResult(0); userInfo->WriteTimestamp = TInstant::MilliSeconds(res.GetWriteTimestampMS()); userInfo->CreateTimestamp = TInstant::MilliSeconds(res.GetCreateTimestampMS()); userInfo->ActualTimestamps = true; @@ -898,7 +930,9 @@ void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const u userInfo.ForgetSubscription(ctx.Now()); } - TVector<TRequestedBlob> blobs = GetReadRequestFromBody(info.Offset, info.PartNo, info.Count, info.Size, &count, &size); + TVector<TRequestedBlob> blobs = GetReadRequestFromBody( + info.Offset, info.PartNo, info.Count, info.Size, &count, &size, info.LastOffset + ); info.Blobs = blobs; ui64 lastOffset = info.Offset + Min(count, info.Count); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "read cookie " << cookie << " added " << info.Blobs.size() @@ -906,7 +940,10 @@ void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const u if (blobs.empty() || blobs.back().Key == DataKeysBody.back().Key) { // read from head only when all blobs from body processed ui64 insideHeadOffset{0}; - info.Cached = GetReadRequestFromHead(info.Offset, info.PartNo, info.Count, info.Size, info.ReadTimestampMs, &count, &size, &insideHeadOffset); + info.Cached = GetReadRequestFromHead( + info.Offset, info.PartNo, info.Count, info.Size, info.ReadTimestampMs, &count, + &size, &insideHeadOffset, info.LastOffset + ); info.CachedOffset = insideHeadOffset; } if (info.Destination != 0) { @@ -927,19 +964,23 @@ void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const u } TabletCounters.Cumulative()[COUNTER_PQ_READ_HEAD_ONLY_OK].Increment(1); TabletCounters.Percentile()[COUNTER_LATENCY_PQ_READ_HEAD_ONLY].IncrementFor((ctx.Now() - info.Timestamp).MilliSeconds()); - TabletCounters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp.ByteSize()); + + TabletCounters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp->ByteSize()); + ctx.Send(info.Destination != 0 ? Tablet : ctx.SelfID, answer.Event.Release()); - OnReadRequestFinished(info.Destination, answer.Size, info.User, ctx); + OnReadRequestFinished(cookie, answer.Size, info.User, ctx); return; } const TString user = info.User; bool res = ReadInfo.insert({cookie, std::move(info)}).second; + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Reading cookie " << cookie << ". Send blob request."); Y_ABORT_UNLESS(res); THolder<TEvPQ::TEvBlobRequest> request(new TEvPQ::TEvBlobRequest(user, cookie, Partition, lastOffset, std::move(blobs))); + ctx.Send(BlobCache, request.Release()); } diff --git a/ydb/core/persqueue/partition_types.h b/ydb/core/persqueue/partition_types.h index dd9d74b95d..a84cc54e86 100644 --- a/ydb/core/persqueue/partition_types.h +++ b/ydb/core/persqueue/partition_types.h @@ -127,4 +127,6 @@ struct TDataKey { ui64 CumulativeSize; }; -} // namespace NKikimr + +} // namespace NKikimr::NPQ + diff --git a/ydb/core/persqueue/partition_write.cpp b/ydb/core/persqueue/partition_write.cpp index 19dfd9476b..204d96b38c 100644 --- a/ydb/core/persqueue/partition_write.cpp +++ b/ydb/core/persqueue/partition_write.cpp @@ -35,7 +35,7 @@ void TPartition::ReplyOwnerOk(const TActorContext& ctx, const ui64 dst, const TS LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "TPartition::ReplyOwnerOk. Partition: " << Partition); THolder<TEvPQ::TEvProxyResponse> response = MakeHolder<TEvPQ::TEvProxyResponse>(dst); - NKikimrClient::TResponse& resp = response->Response; + NKikimrClient::TResponse& resp = *response->Response; resp.SetStatus(NMsgBusProxy::MSTATUS_OK); resp.SetErrorCode(NPersQueue::NErrorCode::OK); resp.MutablePartitionResponse()->MutableCmdGetOwnershipResult()->SetOwnerCookie(cookie); @@ -53,7 +53,7 @@ void TPartition::ReplyWrite( Y_ABORT_UNLESS(seqNo <= (ui64)Max<i64>(), "SeqNo is too big: %" PRIu64, seqNo); THolder<TEvPQ::TEvProxyResponse> response = MakeHolder<TEvPQ::TEvProxyResponse>(dst); - NKikimrClient::TResponse& resp = response->Response; + NKikimrClient::TResponse& resp = *response->Response; resp.SetStatus(NMsgBusProxy::MSTATUS_OK); resp.SetErrorCode(NPersQueue::NErrorCode::OK); auto write = resp.MutablePartitionResponse()->AddCmdWriteResult(); @@ -1334,7 +1334,6 @@ void TPartition::AddNewWriteBlob(std::pair<TKey, ui32>& res, TEvKeyValue::TEvReq void TPartition::SetDeadlinesForWrites(const TActorContext& ctx) { PQ_LOG_T("TPartition::SetDeadlinesForWrites."); - if (AppData(ctx)->PQConfig.GetQuotingConfig().GetQuotaWaitDurationMs() > 0 && QuotaDeadline == TInstant::Zero()) { QuotaDeadline = ctx.Now() + TDuration::MilliSeconds(AppData(ctx)->PQConfig.GetQuotingConfig().GetQuotaWaitDurationMs()); diff --git a/ydb/core/persqueue/percentile_counter.cpp b/ydb/core/persqueue/percentile_counter.cpp index f97130bb47..f9d95fad81 100644 --- a/ydb/core/persqueue/percentile_counter.cpp +++ b/ydb/core/persqueue/percentile_counter.cpp @@ -7,6 +7,7 @@ namespace NKikimr { namespace NPQ { + TMultiCounter::TMultiCounter(::NMonitoring::TDynamicCounterPtr counters, const TVector<NPersQueue::TPQLabelsInfo>& labels, const TVector<std::pair<TString, TString>>& subgroups, diff --git a/ydb/core/persqueue/pq_impl.cpp b/ydb/core/persqueue/pq_impl.cpp index acba0a168e..86e066d3b9 100644 --- a/ydb/core/persqueue/pq_impl.cpp +++ b/ydb/core/persqueue/pq_impl.cpp @@ -89,6 +89,10 @@ static TMaybe<TPartitionKeyRange> GetPartitionKeyRange(const NKikimrPQ::TPQTable return TPartitionKeyRange::Parse(proto.GetKeyRange()); } +static bool IsDirectReadCmd(const auto& cmd) { + return cmd.GetDirectReadId() != 0; +} + /******************************************************* ReadProxy *********************************************************/ //megaqc - remove it when LB will be ready class TReadProxy : public TActorBootstrapped<TReadProxy> { @@ -97,28 +101,35 @@ public: return NKikimrServices::TActivity::PERSQUEUE_ANS_ACTOR; } - TReadProxy(const TActorId& sender, const TActorId& tablet, const NKikimrClient::TPersQueueRequest& request) - : Sender(sender) - , Tablet(tablet) - , Request(request) - , Response(new TEvPersQueue::TEvResponse) + TReadProxy(const TActorId& sender, const TActorId& tablet, ui64 tabletGeneration, + const TDirectReadKey& directReadKey, const NKikimrClient::TPersQueueRequest& request) + : Sender(sender) + , Tablet(tablet) + , TabletGeneration(tabletGeneration) + , Request(request) + , Response(new TEvPersQueue::TEvResponse) + , DirectReadKey(directReadKey) { Y_ABORT_UNLESS(Request.HasPartitionRequest() && Request.GetPartitionRequest().HasCmdRead()); Y_ABORT_UNLESS(Request.GetPartitionRequest().GetCmdRead().GetPartNo() == 0); //partial request are not allowed, otherwise remove ReadProxy Y_ABORT_UNLESS(!Response->Record.HasPartitionResponse()); + if (!directReadKey.SessionId.Empty()) { + DirectReadKey.ReadId = Request.GetPartitionRequest().GetCmdRead().GetDirectReadId(); + } } - void Bootstrap(const TActorContext&) + void Bootstrap(const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Read proxy: bootstrap for direct read id: " << DirectReadKey.ReadId); Become(&TThis::StateFunc); } private: - void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { Y_ABORT_UNLESS(Response); const auto& record = ev->Get()->Record; + if (!record.HasPartitionResponse() || !record.GetPartitionResponse().HasCmdReadResult() || record.GetStatus() != NMsgBusProxy::MSTATUS_OK || record.GetErrorCode() != NPersQueue::NErrorCode::OK || record.GetPartitionResponse().GetCmdReadResult().ResultSize() == 0) { @@ -127,43 +138,47 @@ private: Die(ctx); return; } - - Y_ABORT_UNLESS(record.HasPartitionResponse() && record.GetPartitionResponse().HasCmdReadResult()); - - const auto& res = record.GetPartitionResponse().GetCmdReadResult(); - - Response->Record.SetStatus(NMsgBusProxy::MSTATUS_OK); - Response->Record.SetErrorCode(NPersQueue::NErrorCode::OK); - - Y_ABORT_UNLESS(res.ResultSize() > 0); + const auto& readResult = record.GetPartitionResponse().GetCmdReadResult(); + auto isDirectRead = IsDirectReadCmd(Request.GetPartitionRequest().GetCmdRead()); + if (isDirectRead) { + if (!PreparedResponse) { + PreparedResponse = std::make_shared<NKikimrClient::TResponse>(); + } + } + + auto& responseRecord = isDirectRead ? *PreparedResponse : Response->Record; + responseRecord.SetStatus(NMsgBusProxy::MSTATUS_OK); + responseRecord.SetErrorCode(NPersQueue::NErrorCode::OK); + + Y_ABORT_UNLESS(readResult.ResultSize() > 0); bool isStart = false; - if (!Response->Record.HasPartitionResponse()) { - Y_ABORT_UNLESS(!res.GetResult(0).HasPartNo() || res.GetResult(0).GetPartNo() == 0); //starts from begin of record - auto partResp = Response->Record.MutablePartitionResponse(); + if (!responseRecord.HasPartitionResponse()) { + Y_ABORT_UNLESS(!readResult.GetResult(0).HasPartNo() || readResult.GetResult(0).GetPartNo() == 0); //starts from begin of record + auto partResp = responseRecord.MutablePartitionResponse(); auto readRes = partResp->MutableCmdReadResult(); - readRes->SetBlobsFromDisk(readRes->GetBlobsFromDisk() + res.GetBlobsFromDisk()); - readRes->SetBlobsFromCache(readRes->GetBlobsFromCache() + res.GetBlobsFromCache()); + readRes->SetBlobsFromDisk(readRes->GetBlobsFromDisk() + readResult.GetBlobsFromDisk()); + readRes->SetBlobsFromCache(readRes->GetBlobsFromCache() + readResult.GetBlobsFromCache()); isStart = true; } ui64 readFromTimestampMs = AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen() - ? (isStart ? res.GetReadFromTimestampMs() - : Response->Record.GetPartitionResponse().GetCmdReadResult().GetReadFromTimestampMs()) + ? (isStart ? readResult.GetReadFromTimestampMs() + : responseRecord.GetPartitionResponse().GetCmdReadResult().GetReadFromTimestampMs()) : 0; if (record.GetPartitionResponse().HasCookie()) - Response->Record.MutablePartitionResponse()->SetCookie(record.GetPartitionResponse().GetCookie()); + responseRecord.MutablePartitionResponse()->SetCookie(record.GetPartitionResponse().GetCookie()); - auto partResp = Response->Record.MutablePartitionResponse()->MutableCmdReadResult(); + auto partResp = responseRecord.MutablePartitionResponse()->MutableCmdReadResult(); - partResp->SetMaxOffset(res.GetMaxOffset()); - partResp->SetSizeLag(res.GetSizeLag()); - partResp->SetWaitQuotaTimeMs(partResp->GetWaitQuotaTimeMs() + res.GetWaitQuotaTimeMs()); + partResp->SetMaxOffset(readResult.GetMaxOffset()); + partResp->SetSizeLag(readResult.GetSizeLag()); + partResp->SetWaitQuotaTimeMs(partResp->GetWaitQuotaTimeMs() + readResult.GetWaitQuotaTimeMs()); - partResp->SetRealReadOffset(Max(partResp->GetRealReadOffset(), res.GetRealReadOffset())); + partResp->SetRealReadOffset(Max(partResp->GetRealReadOffset(), readResult.GetRealReadOffset())); - for (ui32 i = 0; i < res.ResultSize(); ++i) { - bool isNewMsg = !res.GetResult(i).HasPartNo() || res.GetResult(i).GetPartNo() == 0; + for (ui32 i = 0; i < readResult.ResultSize(); ++i) { + bool isNewMsg = !readResult.GetResult(i).HasPartNo() || readResult.GetResult(i).GetPartNo() == 0; if (!isStart) { Y_ABORT_UNLESS(partResp->ResultSize() > 0); auto& back = partResp->GetResult(partResp->ResultSize() - 1); @@ -176,25 +191,26 @@ private: } if (isNewMsg) { - if (!isStart && res.GetResult(i).HasTotalParts() && res.GetResult(i).GetTotalParts() + i > res.ResultSize()) //last blob is not full + if (!isStart && readResult.GetResult(i).HasTotalParts() + && readResult.GetResult(i).GetTotalParts() + i > readResult.ResultSize()) //last blob is not full break; - partResp->AddResult()->CopyFrom(res.GetResult(i)); + partResp->AddResult()->CopyFrom(readResult.GetResult(i)); isStart = false; } else { //glue to last res auto rr = partResp->MutableResult(partResp->ResultSize() - 1); - if (rr->GetSeqNo() != res.GetResult(i).GetSeqNo() || rr->GetPartNo() + 1 != res.GetResult(i).GetPartNo()) { + if (rr->GetSeqNo() != readResult.GetResult(i).GetSeqNo() || rr->GetPartNo() + 1 != readResult.GetResult(i).GetPartNo()) { LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE, "Handle TEvRead tablet: " << Tablet << " last read pos (seqno/parno): " << rr->GetSeqNo() << "," << rr->GetPartNo() << " readed now " - << res.GetResult(i).GetSeqNo() << ", " << res.GetResult(i).GetPartNo() + << readResult.GetResult(i).GetSeqNo() << ", " << readResult.GetResult(i).GetPartNo() << " full request(now): " << Request); } - Y_ABORT_UNLESS(rr->GetSeqNo() == res.GetResult(i).GetSeqNo()); - (*rr->MutableData()) += res.GetResult(i).GetData(); - rr->SetPartitionKey(res.GetResult(i).GetPartitionKey()); - rr->SetExplicitHash(res.GetResult(i).GetExplicitHash()); - rr->SetPartNo(res.GetResult(i).GetPartNo()); - rr->SetUncompressedSize(rr->GetUncompressedSize() + res.GetResult(i).GetUncompressedSize()); - if (res.GetResult(i).GetPartNo() + 1 == res.GetResult(i).GetTotalParts()) { + Y_ABORT_UNLESS(rr->GetSeqNo() == readResult.GetResult(i).GetSeqNo()); + (*rr->MutableData()) += readResult.GetResult(i).GetData(); + rr->SetPartitionKey(readResult.GetResult(i).GetPartitionKey()); + rr->SetExplicitHash(readResult.GetResult(i).GetExplicitHash()); + rr->SetPartNo(readResult.GetResult(i).GetPartNo()); + rr->SetUncompressedSize(rr->GetUncompressedSize() + readResult.GetResult(i).GetUncompressedSize()); + if (readResult.GetResult(i).GetPartNo() + 1 == readResult.GetResult(i).GetTotalParts()) { Y_ABORT_UNLESS((ui32)rr->GetTotalSize() == (ui32)rr->GetData().size()); } } @@ -216,10 +232,10 @@ private: THolder<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); req->Record = Request; ctx.Send(Tablet, req.Release()); + return; } } - //filter old messages ::google::protobuf::RepeatedPtrField<NKikimrClient::TCmdReadResult::TResult> records; records.Swap(partResp->MutableResult()); @@ -231,8 +247,29 @@ private: result->CopyFrom(rec); } } + if (isDirectRead) { + auto* prepareResponse = Response->Record.MutablePartitionResponse()->MutableCmdPrepareReadResult(); + prepareResponse->SetBytesSizeEstimate(readResult.GetSizeEstimate()); + prepareResponse->SetDirectReadId(DirectReadKey.ReadId); + prepareResponse->SetReadOffset(readResult.GetRealReadOffset()); + prepareResponse->SetLastOffset(readResult.GetLastOffset()); + prepareResponse->SetEndOffset(readResult.GetEndOffset()); - ctx.Send(Sender, Response.Release()); + prepareResponse->SetSizeLag(readResult.GetSizeLag()); + Response->Record.MutablePartitionResponse()->SetCookie(record.GetPartitionResponse().GetCookie()); + if (readResult.ResultSize()) { + prepareResponse->SetWriteTimestampMS(readResult.GetResult(readResult.ResultSize() - 1).GetWriteTimestampMS()); + } + Response->Record.SetStatus(NMsgBusProxy::MSTATUS_OK); + Response->Record.SetErrorCode(NPersQueue::NErrorCode::OK); + ctx.Send(Sender, Response.Release()); + ctx.Send( + MakePQDReadCacheServiceActorId(), + new TEvPQ::TEvStageDirectReadData(DirectReadKey, TabletGeneration, PreparedResponse) + ); + } else { + ctx.Send(Sender, Response.Release()); + } Die(ctx); } @@ -246,15 +283,20 @@ private: const TActorId Sender; const TActorId Tablet; + ui32 TabletGeneration; NKikimrClient::TPersQueueRequest Request; THolder<TEvPersQueue::TEvResponse> Response; + std::shared_ptr<NKikimrClient::TResponse> PreparedResponse; + TDirectReadKey DirectReadKey; + }; -TActorId CreateReadProxy(const TActorId& sender, const TActorId& tablet, const NKikimrClient::TPersQueueRequest& request, +TActorId CreateReadProxy(const TActorId& sender, const TActorId& tablet, ui32 tabletGeneration, + const TDirectReadKey& directReadKey, const NKikimrClient::TPersQueueRequest& request, const TActorContext& ctx) { - return ctx.Register(new TReadProxy(sender, tablet, request)); + return ctx.Register(new TReadProxy(sender, tablet, tabletGeneration, directReadKey, request)); } /******************************************************* AnswerBuilderProxy *********************************************************/ @@ -262,7 +304,8 @@ class TResponseBuilder { public: TResponseBuilder(const TActorId& sender, const TActorId& tablet, const TString& topicName, const ui32 partition, const ui64 messageNo, - const TString& reqId, const TMaybe<ui64> cookie, NMetrics::TResourceMetrics* resourceMetrics, const TActorContext& ctx) + const TString& reqId, const TMaybe<ui64> cookie, NMetrics::TResourceMetrics* resourceMetrics, + const TActorContext& ctx) : Sender(sender) , Tablet(tablet) , TopicName(topicName) @@ -305,13 +348,13 @@ public: Y_ABORT_UNLESS(Response); --Waiting; bool skip = false; - if (WasSplit && ev->Get()->Response.GetPartitionResponse().CmdWriteResultSize() == 1) { //megaqc - remove this - const auto& x = ev->Get()->Response.GetPartitionResponse().GetCmdWriteResult(0); + if (WasSplit && ev->Get()->Response->GetPartitionResponse().CmdWriteResultSize() == 1) { //megaqc - remove this + const auto& x = ev->Get()->Response->GetPartitionResponse().GetCmdWriteResult(0); if (x.HasPartNo() && x.GetPartNo() > 0) skip = true; } if (!skip) //megaqc - remove this - Response->Record.MergeFrom(ev->Get()->Response); + Response->Record.MergeFrom(*ev->Get()->Response); if (!Waiting) { LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Answer ok topic: '" << TopicName << "' partition: " << Partition @@ -1611,29 +1654,41 @@ void TPersQueue::HandleGetMaxSeqNoRequest(const ui64 responseCookie, const TActo ctx.Send(partActor, event.Release()); } -void TPersQueue::HandleDeleteSessionRequest(const ui64 responseCookie, const TActorId& partActor, - const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) +void TPersQueue::HandleDeleteSessionRequest( + const ui64 responseCookie, const TActorId& partActor, + const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx, + const TActorId& pipeClient, const TActorId& +) { Y_ABORT_UNLESS(req.HasCmdDeleteSession()); InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_DELETE_SESSION); const auto& cmd = req.GetCmdDeleteSession(); + //To do : priority + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Got cmd delete session: " << cmd.DebugString()); if (!cmd.HasClientId()){ - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + return ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "no clientId in DeleteSession request: " << ToString(req).data()); } else if (!cmd.HasSessionId()) { - ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, + return ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "not sessionId in DeleteSession request: " << ToString(req).data()); } else { - THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(), - 0, cmd.GetSessionId(), 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION); + THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>( + responseCookie, cmd.GetClientId(), 0, cmd.GetSessionId(), 0, 0, 0, pipeClient, + TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION + ); ctx.Send(partActor, event.Release()); } + auto pipe = PipesInfo.find(pipeClient); + if (!pipe.IsEnd()) { + DestroySession(pipe->second); + } } void TPersQueue::HandleCreateSessionRequest(const ui64 responseCookie, const TActorId& partActor, - const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ + const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx, + const TActorId& pipeClient, const TActorId& +) { Y_ABORT_UNLESS(req.HasCmdCreateSession()); const auto& cmd = req.GetCmdCreateSession(); @@ -1650,9 +1705,31 @@ void TPersQueue::HandleCreateSessionRequest(const ui64 responseCookie, const TAc ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "not step in CreateSession request: " << ToString(req).data()); } else { + bool isDirectRead = cmd.GetPartitionSessionId() > 0; InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_CREATE_SESSION); - THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(), - 0, cmd.GetSessionId(), cmd.GetGeneration(), cmd.GetStep(), TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION); + THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>( + responseCookie, cmd.GetClientId(), 0, cmd.GetSessionId(), cmd.GetPartitionSessionId(), cmd.GetGeneration(), cmd.GetStep(), + pipeClient, TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION, 0, false + ); + if (isDirectRead) { + auto pipeIter = PipesInfo.find(pipeClient); + if (pipeIter.IsEnd()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::ERROR, + TStringBuilder() << "Internal error - server pipe " << pipeClient.ToString() << " not found"); + return; + } + pipeIter->second.ClientId = cmd.GetClientId(); + pipeIter->second.SessionId = cmd.GetSessionId(); + pipeIter->second.PartitionSessionId = cmd.GetPartitionSessionId(); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Created session " << cmd.GetSessionId() << " on pipe: " << pipeIter->first.ToString()); + ctx.Send(MakePQDReadCacheServiceActorId(), + new TEvPQ::TEvRegisterDirectReadSession( + TReadSessionKey{cmd.GetSessionId(), cmd.GetPartitionSessionId()}, + GetGeneration() + ) + ); + + } ctx.Send(partActor, event.Release()); } } @@ -1674,10 +1751,10 @@ void TPersQueue::HandleSetClientOffsetRequest(const ui64 responseCookie, const T TStringBuilder() << "negative offset in SetClientOffset request: " << ToString(req).data()); } else { InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_SET_OFFSET); - THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(), - cmd.GetOffset(), - cmd.HasSessionId() ? cmd.GetSessionId() : "", 0, 0, - TEvPQ::TEvSetClientInfo::ESCI_OFFSET, 0, cmd.GetStrict()); + THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>( + responseCookie, cmd.GetClientId(), cmd.GetOffset(), cmd.HasSessionId() ? cmd.GetSessionId() : "", 0, 0, 0, + TActorId{}, TEvPQ::TEvSetClientInfo::ESCI_OFFSET, 0, cmd.GetStrict() + ); ctx.Send(partActor, event.Release()); } } @@ -1968,7 +2045,7 @@ void TPersQueue::HandleGetOwnershipRequest(const ui64 responseCookie, const TAct return; } - it->second = {partActor, owner, it->second.ServerActors}; + it->second = TPipeInfo::ForOwner(partActor, owner, it->second.ServerActors); InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_OWNERSHIP); THolder<TEvPQ::TEvChangeOwner> event = MakeHolder<TEvPQ::TEvChangeOwner>(responseCookie, owner, pipeClient, sender, req.GetCmdGetOwnership().GetForce()); @@ -1976,9 +2053,11 @@ void TPersQueue::HandleGetOwnershipRequest(const ui64 responseCookie, const TAct } -void TPersQueue::HandleReadRequest(const ui64 responseCookie, const TActorId& partActor, - const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx) -{ +void TPersQueue::HandleReadRequest( + const ui64 responseCookie, const TActorId& partActor, + const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx, + const TActorId& pipeClient, const TActorId& +) { Y_ABORT_UNLESS(req.HasCmdRead()); auto cmd = req.GetCmdRead(); @@ -2015,8 +2094,25 @@ void TPersQueue::HandleReadRequest(const ui64 responseCookie, const TActorId& pa ui32 bytes = Min<ui32>(MAX_BYTES, cmd.HasBytes() ? cmd.GetBytes() : MAX_BYTES); auto clientDC = cmd.HasClientDC() ? to_lower(cmd.GetClientDC()) : "unknown"; clientDC.to_title(); + if (IsDirectReadCmd(cmd)) { + auto pipeIter = PipesInfo.find(pipeClient); + if (pipeIter.IsEnd()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::READ_ERROR_NO_SESSION, + TStringBuilder() << "Read prepare request from unknown(old?) pipe"); + return; + } else if (cmd.GetSessionId().empty()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::READ_ERROR_NO_SESSION, + TStringBuilder() << "Read prepare request with empty session id"); + return; + } else if (pipeIter->second.SessionId != cmd.GetSessionId()) { + ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::READ_ERROR_NO_SESSION, + TStringBuilder() << "Read prepare request with unknown(old?) session id " << cmd.GetSessionId()); + return; + } + } + THolder<TEvPQ::TEvRead> event = - MakeHolder<TEvPQ::TEvRead>(responseCookie, cmd.GetOffset(), + MakeHolder<TEvPQ::TEvRead>(responseCookie, cmd.GetOffset(), cmd.GetLastOffset(), cmd.HasPartNo() ? cmd.GetPartNo() : 0, count, cmd.HasSessionId() ? cmd.GetSessionId() : "", @@ -2024,11 +2120,117 @@ void TPersQueue::HandleReadRequest(const ui64 responseCookie, const TActorId& pa cmd.HasTimeoutMs() ? cmd.GetTimeoutMs() : 0, bytes, cmd.HasMaxTimeLagMs() ? cmd.GetMaxTimeLagMs() : 0, cmd.HasReadTimestampMs() ? cmd.GetReadTimestampMs() : 0, clientDC, - cmd.GetExternalOperation()); + cmd.GetExternalOperation(), + pipeClient); + ctx.Send(partActor, event.Release()); } } +template<class TRequest> +bool ValidateDirectReadRequestBase( + const TRequest& cmd, const THashMap<TActorId, TPersQueue::TPipeInfo>::iterator& pipeIter, + TStringBuilder& error, TDirectReadKey& key +) { + key = TDirectReadKey{cmd.GetSessionKey().GetSessionId(), cmd.GetSessionKey().GetPartitionSessionId(), cmd.GetDirectReadId()}; + if (key.SessionId.Empty()) { + error << "no session id in publish read request: "; + return false; + } else if (key.PartitionSessionId == 0) { + error << "No or zero partition session id in publish read request: "; + return false; + } else if (key.ReadId == 0) { + error << "No or zero ReadId in publish read request: "; + return false; + } + if (pipeIter.IsEnd()) { + error << "Read prepare request from unknown(old?) pipe"; + return false; + } else if (pipeIter->second.SessionId != key.SessionId) { + error << "Read prepare request with unknown(old?) session id " << key.SessionId; + return false; + } + return true; +} + +void TPersQueue::HandlePublishReadRequest( + const ui64 responseCookie, const TActorId&, + const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx, + const TActorId& pipeClient, const TActorId& +) { + auto cmd = req.GetCmdPublishRead(); + TDirectReadKey key; + TStringBuilder error; + + if (!ValidateDirectReadRequestBase(cmd, PipesInfo.find(pipeClient), error, key)) { + error << req.DebugString(); + return ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, error); + } + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_PUBLISH_READ); + THolder<TEvPQ::TEvProxyResponse> publishDoneEvent = MakeHolder<TEvPQ::TEvProxyResponse>(responseCookie); + publishDoneEvent->Response->SetStatus(NMsgBusProxy::MSTATUS_OK); + publishDoneEvent->Response->SetErrorCode(NPersQueue::NErrorCode::OK); + + publishDoneEvent->Response->MutablePartitionResponse()->MutableCmdPublishReadResult(); + ctx.Send(SelfId(), publishDoneEvent.Release()); + + LOG_DEBUG_S( + ctx, NKikimrServices::PERSQUEUE, "Publish direct read id " << key.ReadId << " for session " << key.SessionId + ); + ctx.Send( + MakePQDReadCacheServiceActorId(), + new TEvPQ::TEvPublishDirectRead(key, GetGeneration()) + ); + +} + +void TPersQueue::HandleForgetReadRequest( + const ui64 responseCookie, const TActorId& , + const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx, + const TActorId& pipeClient, const TActorId& +) { + auto cmd = req.GetCmdForgetRead(); + TDirectReadKey key; + TStringBuilder error; + if (!ValidateDirectReadRequestBase(cmd, PipesInfo.find(pipeClient), error, key)) { + error << req.DebugString(); + return ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, error); + } + InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_FORGET_READ); + THolder<TEvPQ::TEvProxyResponse> forgetDoneEvent = MakeHolder<TEvPQ::TEvProxyResponse>(responseCookie); + forgetDoneEvent->Response->SetStatus(NMsgBusProxy::MSTATUS_OK); + forgetDoneEvent->Response->SetErrorCode(NPersQueue::NErrorCode::OK); + + forgetDoneEvent->Response->MutablePartitionResponse()->MutableCmdForgetReadResult(); + ctx.Send(SelfId(), forgetDoneEvent.Release()); + + LOG_DEBUG_S( + ctx, NKikimrServices::PERSQUEUE, "Forget direct read id " << key.ReadId << " for session " << key.SessionId + ); + ctx.Send( + MakePQDReadCacheServiceActorId(), + new TEvPQ::TEvForgetDirectRead(key, GetGeneration()) + ); + +} + +void TPersQueue::DestroySession(TPipeInfo& pipeInfo) { + const auto& ctx = ActorContext(); + LOG_DEBUG_S( + ctx, NKikimrServices::PERSQUEUE, "PQ: Destroy direct read session " << pipeInfo.SessionId + ); + if (pipeInfo.SessionId.Empty()) + return; + ActorContext().Send( + MakePQDReadCacheServiceActorId(), + new TEvPQ::TEvDeregisterDirectReadSession( + TReadSessionKey{pipeInfo.SessionId, pipeInfo.PartitionSessionId}, + GetGeneration() + ) + ); + pipeInfo.SessionId = TString{}; +} + TMaybe<TEvPQ::TEvRegisterMessageGroup::TBody> TPersQueue::MakeRegisterMessageGroup( const NKikimrClient::TPersQueuePartitionRequest::TCmdRegisterMessageGroup& cmd, NPersQueue::NErrorCode::EErrorCode& code, TString& error) const @@ -2153,15 +2355,26 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& if (request.HasPartitionRequest() && request.GetPartitionRequest().HasCookie()) c = request.GetPartitionRequest().GetCookie(); TAutoPtr<TResponseBuilder> ans; - if (request.HasPartitionRequest() && request.GetPartitionRequest().HasCmdRead() && s != TMP_REQUEST_MARKER) { - TActorId rr = CreateReadProxy(ev->Sender, ctx.SelfID, request, ctx); + ui64 responseCookie = ++NextResponseCookie; + + auto& req = request.GetPartitionRequest(); + TActorId pipeClient = ActorIdFromProto(req.GetPipeClient()); + + if (request.GetPartitionRequest().HasCmdRead() && s != TMP_REQUEST_MARKER) { + auto pipeIter = PipesInfo.find(pipeClient); + TDirectReadKey directKey{}; + if (!pipeIter.IsEnd()) { + directKey.SessionId = pipeIter->second.SessionId; + directKey.PartitionSessionId = pipeIter->second.PartitionSessionId; + } + TActorId rr = CreateReadProxy(ev->Sender, ctx.SelfID, GetGeneration(), directKey, request, ctx); ans = CreateResponseProxy(rr, ctx.SelfID, TopicName, p, m, s, c, ResourceMetrics, ctx); } else { ans = CreateResponseProxy(ev->Sender, ctx.SelfID, TopicName, p, m, s, c, ResourceMetrics, ctx); } - ui64 responseCookie = ++NextResponseCookie; ResponseProxy[responseCookie] = ans; Counters->Simple()[COUNTER_PQ_TABLET_INFLIGHT].Set(ResponseProxy.size()); + if (!ConfigInited) { ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::INITIALIZING, "tablet is not ready"); return; @@ -2177,13 +2390,12 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& return; } - auto& req = request.GetPartitionRequest(); if (!req.HasPartition()) { ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "no partition number"); return; } - + ui32 partition = req.GetPartition(); auto it = Partitions.find(partition); @@ -2214,7 +2426,9 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& + req.HasCmdUpdateWriteTimestamp() + req.HasCmdRegisterMessageGroup() + req.HasCmdDeregisterMessageGroup() - + req.HasCmdSplitMessageGroup(); + + req.HasCmdSplitMessageGroup() + + req.HasCmdPublishRead() + + req.HasCmdForgetRead(); if (count != 1) { ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, @@ -2224,14 +2438,12 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& const TActorId& partActor = it->second.Actor; - TActorId pipeClient = ActorIdFromProto(req.GetPipeClient()); - if (req.HasCmdGetMaxSeqNo()) { HandleGetMaxSeqNoRequest(responseCookie, partActor, req, ctx); } else if (req.HasCmdDeleteSession()) { - HandleDeleteSessionRequest(responseCookie, partActor, req, ctx); + HandleDeleteSessionRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); } else if (req.HasCmdCreateSession()) { - HandleCreateSessionRequest(responseCookie, partActor, req, ctx); + HandleCreateSessionRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); } else if (req.HasCmdSetClientOffset()) { HandleSetClientOffsetRequest(responseCookie, partActor, req, ctx); } else if (req.HasCmdGetClientOffset()) { @@ -2241,7 +2453,11 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& } else if (req.HasCmdUpdateWriteTimestamp()) { HandleUpdateWriteTimestampRequest(responseCookie, partActor, req, ctx); } else if (req.HasCmdRead()) { - HandleReadRequest(responseCookie, partActor, req, ctx); + HandleReadRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); + } else if (req.HasCmdPublishRead()) { + HandlePublishReadRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); + } else if (req.HasCmdForgetRead()) { + HandleForgetReadRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); } else if (req.HasCmdGetOwnership()) { HandleGetOwnershipRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender); } else if (req.HasCmdReserveBytes()) { @@ -2258,13 +2474,8 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& void TPersQueue::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext&) { - auto it = PipesInfo.find(ev->Get()->ClientId); - - if (it == PipesInfo.end()) { - PipesInfo.insert({ev->Get()->ClientId, {TActorId(), "", 1}}); - } else { - it->second.ServerActors++; - } + auto it = PipesInfo.insert({ev->Get()->ClientId, {}}).first; + it->second.ServerActors++; Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size(); } @@ -2279,7 +2490,12 @@ void TPersQueue::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TA return; } if (it->second.PartActor != TActorId()) { - ctx.Send(it->second.PartActor, new TEvPQ::TEvPipeDisconnected(it->second.Owner, it->first)); + ctx.Send(it->second.PartActor, new TEvPQ::TEvPipeDisconnected( + it->second.Owner, it->first + )); + } + if (!it->second.SessionId.Empty()) { + DestroySession(it->second); } PipesInfo.erase(it); Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size(); @@ -2352,7 +2568,11 @@ void TPersQueue::HandleDie(const TActorContext& ctx) } ctx.Send(CacheActor, new TEvents::TEvPoisonPill()); - + for (auto& pipe : PipesInfo) { + if (!pipe.second.SessionId.empty()) { + DestroySession(pipe.second); + } + } for (const auto& p : ResponseProxy) { THolder<TEvPQ::TEvError> ev = MakeHolder<TEvPQ::TEvError>(NPersQueue::NErrorCode::INITIALIZING, "tablet will be restarted right now", p.first); bool res = p.second->HandleError(ev.Get(), ctx); @@ -3498,6 +3718,7 @@ TPartition* TPersQueue::CreatePartitionActor(ui32 partitionId, return new TPartition(TabletID(), partitionId, ctx.SelfID, + GetGeneration(), CacheActor, topicConverter, DCId, @@ -3694,6 +3915,13 @@ TString TPersQueue::LogPrefix() const { return TStringBuilder() << SelfId() << " "; } +ui64 TPersQueue::GetGeneration() { + if (!TabletGeneration.Defined()) { + TabletGeneration = Executor()->Generation(); + } + return *TabletGeneration; +} + bool TPersQueue::HandleHook(STFUNC_SIG) { SetActivityType(NKikimrServices::TActivity::PERSQUEUE_ACTOR); diff --git a/ydb/core/persqueue/pq_impl.h b/ydb/core/persqueue/pq_impl.h index dca7795581..dd78e89add 100644 --- a/ydb/core/persqueue/pq_impl.h +++ b/ydb/core/persqueue/pq_impl.h @@ -109,6 +109,9 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { void ReadState(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx); void InitializeMeteringSink(const TActorContext& ctx); + void ProcessReadRequestImpl(const ui64 responseCookie, const TActorId& partActor, + const NKikimrClient::TPersQueuePartitionRequest& req, bool doPrepare, ui32 readId, + const TActorContext& ctx); TMaybe<TEvPQ::TEvRegisterMessageGroup::TBody> MakeRegisterMessageGroup( const NKikimrClient::TPersQueuePartitionRequest::TCmdRegisterMessageGroup& cmd, @@ -129,23 +132,28 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { #define DESCRIBE_HANDLE(A) void A(const ui64 responseCookie, const TActorId& partActor, \ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx); DESCRIBE_HANDLE(HandleGetMaxSeqNoRequest) - DESCRIBE_HANDLE(HandleDeleteSessionRequest) - DESCRIBE_HANDLE(HandleCreateSessionRequest) DESCRIBE_HANDLE(HandleSetClientOffsetRequest) DESCRIBE_HANDLE(HandleGetClientOffsetRequest) DESCRIBE_HANDLE(HandleWriteRequest) DESCRIBE_HANDLE(HandleUpdateWriteTimestampRequest) - DESCRIBE_HANDLE(HandleReadRequest) DESCRIBE_HANDLE(HandleRegisterMessageGroupRequest) DESCRIBE_HANDLE(HandleDeregisterMessageGroupRequest) DESCRIBE_HANDLE(HandleSplitMessageGroupRequest) #undef DESCRIBE_HANDLE + #define DESCRIBE_HANDLE_WITH_SENDER(A) void A(const ui64 responseCookie, const TActorId& partActor, \ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx,\ const TActorId& pipeClient, const TActorId& sender); + + DESCRIBE_HANDLE_WITH_SENDER(HandleCreateSessionRequest) + DESCRIBE_HANDLE_WITH_SENDER(HandleDeleteSessionRequest) + DESCRIBE_HANDLE_WITH_SENDER(HandleReadRequest) + DESCRIBE_HANDLE_WITH_SENDER(HandlePublishReadRequest) + DESCRIBE_HANDLE_WITH_SENDER(HandleForgetReadRequest) DESCRIBE_HANDLE_WITH_SENDER(HandleGetOwnershipRequest) DESCRIBE_HANDLE_WITH_SENDER(HandleReserveBytesRequest) #undef DESCRIBE_HANDLE_WITH_SENDER + bool ChangingState() const { return !TabletStateRequests.empty(); } void TryReturnTabletStateAll(const TActorContext& ctx, NKikimrProto::EReplyStatus status = NKikimrProto::OK); void ReturnTabletState(const TActorContext& ctx, const TChangeNotification& req, NKikimrProto::EReplyStatus status); @@ -212,12 +220,25 @@ private: TVector<TAutoPtr<TEvPersQueue::TEvHasDataInfo>> HasDataRequests; TVector<std::pair<TAutoPtr<TEvPersQueue::TEvUpdateConfig>, TActorId> > UpdateConfigRequests; +public: struct TPipeInfo { TActorId PartActor; TString Owner; - ui32 ServerActors; + ui32 ServerActors = 0; + TString ClientId; + TString SessionId; + ui64 PartitionSessionId = 0; + TPipeInfo() = default; + static TPipeInfo ForOwner(const TActorId& partActor, const TString& owner, ui32 serverActors) { + TPipeInfo res; + res.Owner = owner; + res.PartActor = partActor; + res.ServerActors = serverActors; + return res; + } }; +private: THashMap<TActorId, TPipeInfo> PipesInfo; ui64 NextResponseCookie; @@ -379,9 +400,12 @@ private: bool CanProcessWriteTxs() const; bool CanProcessDeleteTxs() const; + ui64 GetGeneration(); + void DestroySession(TPipeInfo& pipeInfo); bool UseMediatorTimeCast = true; THashMap<ui32, TVector<TEvPQ::TEvSourceIdRequest::TPtr>> SourceIdRequests; + TMaybe<ui64> TabletGeneration; }; diff --git a/ydb/core/persqueue/subscriber.h b/ydb/core/persqueue/subscriber.h index c1b6247ec4..7d8890ebb9 100644 --- a/ydb/core/persqueue/subscriber.h +++ b/ydb/core/persqueue/subscriber.h @@ -13,7 +13,7 @@ namespace NPQ { struct TUserInfo; struct TReadAnswer { - ui64 Size; + ui64 Size = 0; THolder<IEventBase> Event; }; @@ -35,19 +35,27 @@ struct TReadInfo { TVector<TRequestedBlob> Blobs; //offset, count, value ui64 CachedOffset; //offset of head can be bigger than last databody offset TVector<TClientBlob> Cached; //records from head + TActorId PipeClient; + + ui64 SizeEstimate = 0; + ui64 RealReadOffset = 0; + ui64 LastOffset = 0; + bool Error = false; TReadInfo() = delete; TReadInfo( const TString& user, const TString& clientDC, const ui64 offset, + const ui64 lastOffset, const ui16 partNo, const ui64 count, const ui32 size, const ui64 dst, ui64 readTimestampMs, TDuration waitQuotaTime, - const bool isExternalRead + const bool isExternalRead, + const TActorId& pipeClient ) : User(user) , ClientDC(clientDC) @@ -62,6 +70,8 @@ struct TReadInfo { , IsExternalRead(isExternalRead) , IsSubscription(false) , CachedOffset(0) + , PipeClient(pipeClient) + , LastOffset(lastOffset) {} TReadAnswer FormAnswer( @@ -70,7 +80,7 @@ struct TReadInfo { const ui64 endOffset, const ui32 partition, TUserInfo* ui, - const ui64 dst, + const ui64 dst, const ui64 sizeLag, const TActorId& tablet, const NKikimrPQ::TPQTabletConfig::EMeteringMode meteringMode diff --git a/ydb/core/persqueue/user_info.cpp b/ydb/core/persqueue/user_info.cpp index 131f9057fb..5a13af0ad6 100644 --- a/ydb/core/persqueue/user_info.cpp +++ b/ydb/core/persqueue/user_info.cpp @@ -95,7 +95,7 @@ void TUsersInfoStorage::ParseDeprecated(const TString& key, const TString& data, Y_ABORT_UNLESS(offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, offset); if (!userInfo) { - Create(ctx, user, 0, false, session, gen, step, static_cast<i64>(offset), 0, TInstant::Zero()); + Create(ctx, user, 0, false, session, 0, gen, step, static_cast<i64>(offset), 0, TInstant::Zero(), {}); } else { userInfo->Session = session; userInfo->Generation = gen; @@ -121,8 +121,9 @@ void TUsersInfoStorage::Parse(const TString& key, const TString& data, const TAc TUserInfo* userInfo = GetIfExists(user); if (!userInfo) { Create( - ctx, user, userData.GetReadRuleGeneration(), false, userData.GetSession(), - userData.GetGeneration(), userData.GetStep(), offset, userData.GetOffsetRewindSum(), TInstant::Zero() + ctx, user, userData.GetReadRuleGeneration(), false, userData.GetSession(), userData.GetPartitionSessionId(), + userData.GetGeneration(), userData.GetStep(), offset, + userData.GetOffsetRewindSum(), TInstant::Zero(), {} ); } else { userInfo->Session = userData.GetSession(); @@ -147,7 +148,10 @@ TUserInfo& TUsersInfoStorage::GetOrCreate(const TString& user, const TActorConte Y_ABORT_UNLESS(!user.empty()); auto it = UsersInfo.find(user); if (it == UsersInfo.end()) { - return Create(ctx, user, readRuleGeneration ? *readRuleGeneration : ++CurReadRuleGeneration, false, "", 0, 0, 0, 0, TInstant::Zero()); + return Create( + ctx, user, readRuleGeneration ? *readRuleGeneration : ++CurReadRuleGeneration, false, "", 0, + 0, 0, 0, 0, TInstant::Zero(), {} + ); } return it->second; } @@ -171,8 +175,9 @@ TUserInfo TUsersInfoStorage::CreateUserInfo(const TActorContext& ctx, const ui64 readRuleGeneration, bool important, const TString& session, + ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, - TInstant readFromTimestamp) const + TInstant readFromTimestamp, const TActorId& pipeClient) const { TString defaultServiceType = AppData(ctx)->PQConfig.GetDefaultClientServiceType().GetName(); TString userServiceType = ""; @@ -189,8 +194,8 @@ TUserInfo TUsersInfoStorage::CreateUserInfo(const TActorContext& ctx, return { ctx, StreamCountersSubgroup, user, readRuleGeneration, important, TopicConverter, Partition, - session, gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, DbPath, - meterRead + session, partitionSessionId, gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, DbPath, + meterRead, pipeClient }; } @@ -198,14 +203,16 @@ TUserInfoBase TUsersInfoStorage::CreateUserInfo(const TString& user, TMaybe<ui64> readRuleGeneration) const { return TUserInfoBase{user, readRuleGeneration ? *readRuleGeneration : ++CurReadRuleGeneration, - "", 0, 0, 0, false, {}}; + "", 0, 0, 0, false, {}, 0, {}}; } TUserInfo& TUsersInfoStorage::Create( - const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session, - ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, TInstant readFromTimestamp + const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session, + ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, + TInstant readFromTimestamp, const TActorId& pipeClient ) { - auto userInfo = CreateUserInfo(ctx, user, readRuleGeneration, important, session, gen, step, offset, readOffsetRewindSum, readFromTimestamp); + auto userInfo = CreateUserInfo(ctx, user, readRuleGeneration, important, session, partitionSessionId, + gen, step, offset, readOffsetRewindSum, readFromTimestamp, pipeClient); auto result = UsersInfo.emplace(user, std::move(userInfo)); Y_ABORT_UNLESS(result.second); return result.first->second; diff --git a/ydb/core/persqueue/user_info.h b/ydb/core/persqueue/user_info.h index 3d6319accd..328fdef716 100644 --- a/ydb/core/persqueue/user_info.h +++ b/ydb/core/persqueue/user_info.h @@ -6,6 +6,7 @@ #include "quota_tracker.h" #include "account_read_quoter.h" #include "metering_sink.h" +#include "dread_cache_service/caching_service.h" #include <ydb/core/base/counters.h> #include <ydb/core/protos/counters_pq.pb.h> @@ -47,6 +48,9 @@ struct TUserInfoBase { bool Important = false; TInstant ReadFromTimestamp; + + ui64 PartitionSessionId = 0; + TActorId PipeClient; }; struct TUserInfo: public TUserInfoBase { @@ -88,6 +92,8 @@ struct TUserInfo: public TUserInfoBase { std::shared_ptr<TPercentileCounter> ReadTimeLag; bool NoConsumer = false; + + bool DoInternalRead = false; bool MeterRead = true; bool Parsed = false; @@ -163,11 +169,12 @@ struct TUserInfo: public TUserInfoBase { NMonitoring::TDynamicCounterPtr streamCountersSubgroup, const TString& user, const ui64 readRuleGeneration, const bool important, const NPersQueue::TTopicConverterPtr& topicConverter, - const ui32 partition, const TString &session, ui32 gen, ui32 step, i64 offset, + const ui32 partition, const TString& session, ui64 partitionSession, ui32 gen, ui32 step, i64 offset, const ui64 readOffsetRewindSum, const TString& dcId, TInstant readFromTimestamp, - const TString& dbPath, bool meterRead + const TString& dbPath, bool meterRead, const TActorId& pipeClient ) - : TUserInfoBase{user, readRuleGeneration, session, gen, step, offset, important, readFromTimestamp} + : TUserInfoBase{user, readRuleGeneration, session, gen, step, offset, important, + readFromTimestamp, partitionSession, pipeClient} , WriteTimestamp(TAppData::TimeProvider->Now()) , CreateTimestamp(TAppData::TimeProvider->Now()) , ReadTimestamp(TAppData::TimeProvider->Now()) @@ -385,8 +392,9 @@ public: TUserInfoBase CreateUserInfo(const TString& user, TMaybe<ui64> readRuleGeneration = {}) const; TUserInfo& Create( - const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString &session, - ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, TInstant readFromTimestamp + const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session, + ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, + TInstant readFromTimestamp, const TActorId& pipeClient ); void Clear(const TActorContext& ctx); @@ -400,8 +408,9 @@ private: const ui64 readRuleGeneration, bool important, const TString& session, + ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, - TInstant readFromTimestamp) const; + TInstant readFromTimestamp, const TActorId& pipeClient) const; private: THashMap<TString, TUserInfo> UsersInfo; diff --git a/ydb/core/persqueue/ut/common/pq_ut_common.cpp b/ydb/core/persqueue/ut/common/pq_ut_common.cpp index 7e4b911c68..ade9422864 100644 --- a/ydb/core/persqueue/ut/common/pq_ut_common.cpp +++ b/ydb/core/persqueue/ut/common/pq_ut_common.cpp @@ -714,7 +714,11 @@ void CmdSetOffset(const ui32 partition, const TString& user, ui64 offset, bool e } -void CmdCreateSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc, const i64 offset, const ui32 gen, const ui32 step, bool error) { +TActorId CmdCreateSession(const TPQCmdSettings& settings, TTestContext& tc) { + + TActorId pipeClient = tc.Runtime->ConnectToPipe(tc.BalancerTabletId, tc.Edge, 0, GetPipeConfigWithRetries()); + TActorId tabletPipe = tc.Runtime->ConnectToPipe(tc.TabletId, tc.Edge, 0, GetPipeConfigWithRetries()); + TAutoPtr<IEventHandle> handle; TEvPersQueue::TEvResponse *result; THolder<TEvPersQueue::TEvRequest> request; @@ -723,12 +727,18 @@ void CmdCreateSession(const ui32 partition, const TString& user, const TString& tc.Runtime->ResetScheduledCount(); request.Reset(new TEvPersQueue::TEvRequest); auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); + + ActorIdToProto(tabletPipe, req->MutablePipeClient()); + Cerr << "Set pipe for create session: " << tabletPipe.ToString(); + + req->SetPartition(settings.Partition); auto off = req->MutableCmdCreateSession(); - off->SetClientId(user); - off->SetSessionId(session); - off->SetGeneration(gen); - off->SetStep(step); + off->SetClientId(settings.User); + off->SetSessionId(settings.Session); + off->SetGeneration(settings.Generation); + off->SetStep(settings.Step); + off->SetPartitionSessionId(settings.PartitionSessionId); + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); @@ -740,24 +750,25 @@ void CmdCreateSession(const ui32 partition, const TString& user, const TString& continue; } - if (error) { + if (settings.ToFail) { UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::WRONG_COOKIE); - return; + return pipeClient; } - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT_EQUAL_C(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK, result->Record.DebugString()); UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetClientOffsetResult()); auto resp = result->Record.GetPartitionResponse().GetCmdGetClientOffsetResult(); - UNIT_ASSERT(resp.HasOffset() && (i64)resp.GetOffset() == offset); + UNIT_ASSERT(resp.HasOffset() && (i64)resp.GetOffset() == settings.Offset); retriesLeft = 0; } catch (NActors::TSchedulingLimitReachedException) { UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); } } + return tabletPipe; } -void CmdKillSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc) { +void CmdKillSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc, const TActorId& pipe) { TAutoPtr<IEventHandle> handle; TEvPersQueue::TEvResponse *result; THolder<TEvPersQueue::TEvRequest> request; @@ -770,6 +781,9 @@ void CmdKillSession(const ui32 partition, const TString& user, const TString& se auto off = req->MutableCmdDeleteSession(); off->SetClientId(user); off->SetSessionId(session); + if (pipe) { + ActorIdToProto(pipe, req->MutablePipeClient()); + } tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); @@ -865,83 +879,218 @@ TVector<TString> CmdSourceIdRead(TTestContext& tc) { return sourceIds; } +bool CheckCmdReadResult(const TPQCmdReadSettings& settings, TEvPersQueue::TEvResponse* result) { + Y_UNUSED(settings); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); -void CmdRead(const ui32 partition, const ui64 offset, const ui32 count, const ui32 size, const ui32 resCount, bool timeouted, TTestContext& tc, TVector<i32> offsets, const ui32 maxTimeLagMs, const ui64 readTimestampMs, const TString user) { + UNIT_ASSERT(result->Record.HasPartitionResponse()); + UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCookie(), 123); + if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + return false; + } + if (settings.Timeout) { + UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult()); + auto res = result->Record.GetPartitionResponse().GetCmdReadResult(); + UNIT_ASSERT_EQUAL(res.ResultSize(), 0); + return true; + } + if (settings.ToFail) { + UNIT_ASSERT_C(result->Record.GetErrorCode() != NPersQueue::NErrorCode::OK, result->Record.DebugString()); + return true; + } + UNIT_ASSERT_EQUAL_C(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK, result->Record.DebugString()); + if (!settings.DirectReadId) { + UNIT_ASSERT_C(result->Record.GetPartitionResponse().HasCmdReadResult(), result->Record.GetPartitionResponse().DebugString()); + auto res = result->Record.GetPartitionResponse().GetCmdReadResult(); + + UNIT_ASSERT_EQUAL(res.ResultSize(), settings.ResCount); + ui64 off = settings.Offset; + + for (ui32 i = 0; i < settings.ResCount; ++i) { + auto r = res.GetResult(i); + if (settings.Offsets.empty()) { + if (settings.ReadTimestampMs == 0) { + UNIT_ASSERT_EQUAL((ui64)r.GetOffset(), off); + } + UNIT_ASSERT(r.GetSourceId().size() == 9 && r.GetSourceId().StartsWith("sourceid")); + UNIT_ASSERT_EQUAL(ui32(r.GetData()[0]), off); + UNIT_ASSERT_EQUAL(ui32((unsigned char)r.GetData().back()), r.GetSeqNo() % 256); + ++off; + } else { + UNIT_ASSERT(settings.Offsets[i] == (i64)r.GetOffset()); + } + } + } else { + UNIT_ASSERT_C(result->Record.GetPartitionResponse().HasCmdPrepareReadResult(), result->Record.GetPartitionResponse().DebugString()); + auto res = result->Record.GetPartitionResponse().GetCmdPrepareReadResult(); + UNIT_ASSERT(res.GetBytesSizeEstimate() > 0); + UNIT_ASSERT(res.GetEndOffset() > 0); + UNIT_ASSERT_VALUES_EQUAL(res.GetDirectReadId(), settings.DirectReadId); + } + return true; +} + +void CmdRead( + const ui32 partition, const ui64 offset, const ui32 count, const ui32 size, const ui32 resCount, bool timeouted, + TTestContext& tc, TVector<i32> offsets, const ui32 maxTimeLagMs, const ui64 readTimestampMs, const TString user +) { + return CmdRead( + TPQCmdReadSettings("", partition, offset, count, size, resCount, timeouted, + offsets, maxTimeLagMs, readTimestampMs, user), + tc + ); +} + +void CmdRead(const TPQCmdReadSettings& settings, TTestContext& tc) { TAutoPtr<IEventHandle> handle; TEvPersQueue::TEvResponse *result; THolder<TEvPersQueue::TEvRequest> request; - for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + for (ui32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { try { tc.Runtime->ResetScheduledCount(); request.Reset(new TEvPersQueue::TEvRequest); auto req = request->Record.MutablePartitionRequest(); - req->SetPartition(partition); + req->SetPartition(settings.Partition); auto read = req->MutableCmdRead(); - read->SetOffset(offset); - read->SetClientId(user); - read->SetCount(count); - read->SetBytes(size); - if (maxTimeLagMs > 0) { - read->SetMaxTimeLagMs(maxTimeLagMs); + read->SetOffset(settings.Offset); + read->SetSessionId(settings.Session); + read->SetClientId(settings.User); + read->SetCount(settings.Count); + read->SetBytes(settings.Size); + if (settings.MaxTimeLagMs > 0) { + read->SetMaxTimeLagMs(settings.MaxTimeLagMs); } - if (readTimestampMs > 0) { - read->SetReadTimestampMs(readTimestampMs); + if (settings.ReadTimestampMs > 0) { + read->SetReadTimestampMs(settings.ReadTimestampMs); } + if (settings.DirectReadId > 0) { + read->SetDirectReadId(settings.DirectReadId); + } + if (settings.PartitionSessionId > 0) { + read->SetPartitionSessionId(settings.PartitionSessionId); + } + if (settings.Pipe) { + ActorIdToProto(settings.Pipe, req->MutablePipeClient()); + } + req->SetCookie(123); + + Cerr << "Send read request: " << request->Record.DebugString() << " via pipe: " << tc.Edge.ToString() << Endl; tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); - - UNIT_ASSERT(result); - UNIT_ASSERT(result->Record.HasStatus()); - - UNIT_ASSERT(result->Record.HasPartitionResponse()); - UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCookie(), 123); - if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) { + auto checkRes = CheckCmdReadResult(settings, result); + if (!checkRes) { tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress retriesLeft = 3; continue; - } - if (timeouted) { - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult()); - auto res = result->Record.GetPartitionResponse().GetCmdReadResult(); - UNIT_ASSERT_EQUAL(res.ResultSize(), 0); + } else { break; } - UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK); + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); + } + } +} + +template <class TProto> +void FillDirectReadKey(TProto* proto, const TCmdDirectReadSettings& settings) { + proto->SetDirectReadId(settings.DirectReadId); + auto* key = proto->MutableSessionKey(); + key->SetSessionId(settings.Session); + key->SetPartitionSessionId(settings.PartitionSessionId); +} - UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult()); - auto res = result->Record.GetPartitionResponse().GetCmdReadResult(); +template <class TEvent> +void CheckDirectReadEvent(TEvent* event, const TCmdDirectReadSettings& settings) { + UNIT_ASSERT(event->ReadKey.ReadId == settings.DirectReadId); + UNIT_ASSERT(event->ReadKey.SessionId == settings.Session); + UNIT_ASSERT(event->ReadKey.PartitionSessionId > 0); +} - UNIT_ASSERT_EQUAL_C(res.ResultSize(), resCount, - "Result size missmatch: expected " << resCount << " but received " << res.ResultSize()); - ui64 off = offset; +void CmdPublishOrForgetRead(const TCmdDirectReadSettings& settings, bool isPublish, TTestContext& tc) { + TAutoPtr<IEventHandle> handle; + TEvPersQueue::TEvResponse *result; + THolder<TEvPersQueue::TEvRequest> request; + tc.Runtime->ResetScheduledCount(); + request.Reset(new TEvPersQueue::TEvRequest); + auto req = request->Record.MutablePartitionRequest(); - for (ui32 i = 0; i < resCount; ++i) { + ActorIdToProto(settings.Pipe, req->MutablePipeClient()); - auto r = res.GetResult(i); - if (offsets.empty()) { - if (readTimestampMs == 0) { - UNIT_ASSERT_EQUAL((ui64)r.GetOffset(), off); - } - UNIT_ASSERT(r.GetSourceId().size() == 9 && r.GetSourceId().StartsWith("sourceid")); - UNIT_ASSERT_EQUAL(ui32(r.GetData()[0]), off); - UNIT_ASSERT_EQUAL(ui32((unsigned char)r.GetData().back()), r.GetSeqNo() % 256); - ++off; - } else { - UNIT_ASSERT(offsets[i] == (i64)r.GetOffset()); + req->SetPartition(settings.Partition); + req->SetCookie(123); + if (isPublish) { + FillDirectReadKey(req->MutableCmdPublishRead(), settings); + } else { + FillDirectReadKey(req->MutableCmdForgetRead(), settings); + } + + TAtomic hasEvent = 0; + tc.Runtime->SetObserverFunc( + [&](TAutoPtr<IEventHandle>& ev) { + if (auto* msg = ev->CastAsLocal<TEvPQ::TEvStageDirectReadData>()) { + Cerr << "Got publish event\n"; + UNIT_ASSERT(isPublish); + UNIT_ASSERT(msg->TabletGeneration); + //AtomicSet(hasEvent, 1); + UNIT_ASSERT(msg->Response != nullptr); + } else if (auto* msg = ev->CastAsLocal<TEvPQ::TEvPublishDirectRead>()) { + Cerr << "Got publish event\n"; + UNIT_ASSERT(isPublish); + CheckDirectReadEvent(msg, settings); + AtomicSet(hasEvent, 1); + } else if (auto* msg = ev->CastAsLocal<TEvPQ::TEvForgetDirectRead>()) { + UNIT_ASSERT(!isPublish); + CheckDirectReadEvent(msg, settings); + AtomicSet(hasEvent, 1); } + return TTestActorRuntimeBase::EEventAction::PROCESS; } - retriesLeft = 0; - } catch (NActors::TSchedulingLimitReachedException) { - UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2); - } + ); + Cerr << "Send " << (isPublish? "publish " : "forget ") << "read request: " << req->DebugString() << Endl; + + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle); + + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + Cerr << "Got direct read response: " << result->Record.DebugString() << Endl; + if (settings.Fail) { + UNIT_ASSERT(result->Record.GetErrorCode() != NPersQueue::NErrorCode::OK); + return; } + UNIT_ASSERT_C(result->Record.GetErrorCode() == NPersQueue::NErrorCode::OK, result->Record.DebugString()); + + UNIT_ASSERT(result->Record.HasPartitionResponse()); + UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCookie(), 123); + if (isPublish) { + UNIT_ASSERT_C(result->Record.GetPartitionResponse().HasCmdPublishReadResult(), result->Record.DebugString()); + } else { + UNIT_ASSERT_C(result->Record.GetPartitionResponse().HasCmdForgetReadResult(), result->Record.DebugString()); + } + //tc.Runtime->DispatchEvents(); + Cerr << "Expect failure: " << settings.Fail << ", event received: " << AtomicGet(hasEvent) << Endl; + if (settings.Fail) { + UNIT_ASSERT(!AtomicGet(hasEvent)); + } else { + // UNIT_ASSERT(AtomicGet(hasEvent)); // ToDo: !! Fix this - event is send but not cathed for some reason; + + } +} + +void CmdPublishRead(const TCmdDirectReadSettings& settings, TTestContext& tc) { + return CmdPublishOrForgetRead(settings, true, tc); } +void CmdForgetRead(const TCmdDirectReadSettings& settings, TTestContext& tc) { + return CmdPublishOrForgetRead(settings, false, tc); +} void FillUserInfo(NKikimrClient::TKeyValueRequest_TCmdWrite* write, const TString& client, ui32 partition, ui64 offset) { NPQ::TKeyPrefix ikey(NPQ::TKeyPrefix::TypeInfo, partition, NPQ::TKeyPrefix::MarkUser); diff --git a/ydb/core/persqueue/ut/common/pq_ut_common.h b/ydb/core/persqueue/ut/common/pq_ut_common.h index 8593c7fd8d..301e836a1f 100644 --- a/ydb/core/persqueue/ut/common/pq_ut_common.h +++ b/ydb/core/persqueue/ut/common/pq_ut_common.h @@ -411,6 +411,62 @@ void WritePartDataWithBigMsg( // TVector<TString> CmdSourceIdRead(TTestContext& tc); +struct TPQCmdSettingsBase { + ui32 Partition = 0; + TString User; + TString Session; + ui64 PartitionSessionId = 0; + i64 Offset = 0; + bool ToFail = false; +}; + +struct TPQCmdSettings : public TPQCmdSettingsBase { + ui32 Generation = 0; + ui32 Step = 0; + TPQCmdSettings() = default; + TPQCmdSettings(ui32 partition, const TString& user, const TString& session, i64 offset = 0, ui32 generation = 0, + ui32 step = 0, bool error = false) + : TPQCmdSettingsBase{partition, user, session, 0, offset, error} + , Generation(generation) + , Step(step) + {} +}; + +struct TPQCmdReadSettings : public TPQCmdSettingsBase { + ui32 Count = 0; + ui32 Size = 0; + ui32 ResCount = 0; + bool Timeout = false; + TVector<i32> Offsets; + ui32 MaxTimeLagMs = 0; + ui32 ReadTimestampMs = 0; + ui64 DirectReadId = 0; + TActorId Pipe; + TPQCmdReadSettings() = default; + TPQCmdReadSettings(const TString& session, ui32 partition, i64 offset, ui32 count, ui32 size, ui32 resCount, bool timeout = false, + TVector<i32> offsets = {}, const ui32 maxTimeLagMs = 0, const ui64 readTimestampMs = 0, + const TString user = "user") + + : TPQCmdSettingsBase{partition, user, session, 0, offset, false} + , Count(count) + , Size(size) + , ResCount(resCount) + , Timeout(timeout) + , Offsets (offsets) + , MaxTimeLagMs(maxTimeLagMs) + , ReadTimestampMs(readTimestampMs) + {} +}; + +struct TCmdDirectReadSettings { + ui32 Partition; + TString Session; + ui64 PartitionSessionId; + ui64 DirectReadId; + TActorId Pipe; + bool Fail = false; +}; + std::pair<TString, TActorId> CmdSetOwner( const ui32 partition, TTestContext& tc, @@ -425,15 +481,7 @@ std::pair<TString, TActorId> CmdSetOwner( const TString& owner = "default", bool force = true); -void CmdCreateSession( - const ui32 partition, - const TString& user, - const TString& session, - TTestContext& tc, - const i64 offset = 0, - const ui32 gen = 0, - const ui32 step = 0, - bool error = false); +TActorId CmdCreateSession(const TPQCmdSettings& settings, TTestContext& tc); void CmdGetOffset( const ui32 partition, @@ -447,7 +495,8 @@ void CmdKillSession( const ui32 partition, const TString& user, const TString& session, - TTestContext& tc); + TTestContext& tc, + const TActorId& pipe = {}); void CmdRead( const ui32 partition, @@ -462,6 +511,13 @@ void CmdRead( const ui64 readTimestampMs = 0, const TString user = "user"); +void CmdRead( + const TPQCmdReadSettings& settings, + TTestContext& tc); + +void CmdPublishRead(const TCmdDirectReadSettings& settings, TTestContext& tc); +void CmdForgetRead(const TCmdDirectReadSettings& settings, TTestContext& tc); + void CmdReserveBytes( const ui32 partition, TTestContext& tc, diff --git a/ydb/core/persqueue/ut/partition_ut.cpp b/ydb/core/persqueue/ut/partition_ut.cpp index 2f0a9b584f..2ea5da2d6a 100644 --- a/ydb/core/persqueue/ut/partition_ut.cpp +++ b/ydb/core/persqueue/ut/partition_ut.cpp @@ -265,6 +265,7 @@ void TPartitionFixture::CreatePartitionActor(ui32 id, auto actor = new NPQ::TPartition(Ctx->TabletId, id, Ctx->Edge, + 0, Ctx->Edge, TopicConverter, "dcId", @@ -339,8 +340,10 @@ void TPartitionFixture::SendCreateSession(ui64 cookie, clientId, 0, sessionId, + 0, generation, step, + TActorId{}, TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION); Ctx->Runtime->SingleSys()->Send(new IEventHandle(ActorId, Ctx->Edge, event.Release())); } @@ -355,7 +358,9 @@ void TPartitionFixture::SendSetOffset(ui64 cookie, offset, sessionId, 0, - 0); + 0, + 0, + TActorId{}); Ctx->Runtime->SingleSys()->Send(new IEventHandle(ActorId, Ctx->Edge, event.Release())); } @@ -523,19 +528,19 @@ void TPartitionFixture::WaitProxyResponse(const TProxyResponseMatcher& matcher) } if (matcher.Status) { - UNIT_ASSERT(event->Response.HasStatus()); - UNIT_ASSERT(*matcher.Status == event->Response.GetStatus()); + UNIT_ASSERT(event->Response->HasStatus()); + UNIT_ASSERT(*matcher.Status == event->Response->GetStatus()); } if (matcher.ErrorCode) { - UNIT_ASSERT(event->Response.HasErrorCode()); - UNIT_ASSERT(*matcher.ErrorCode == event->Response.GetErrorCode()); + UNIT_ASSERT(event->Response->HasErrorCode()); + UNIT_ASSERT(*matcher.ErrorCode == event->Response->GetErrorCode()); } if (matcher.Offset) { - UNIT_ASSERT(event->Response.HasPartitionResponse()); - UNIT_ASSERT(event->Response.GetPartitionResponse().HasCmdGetClientOffsetResult()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Offset, event->Response.GetPartitionResponse().GetCmdGetClientOffsetResult().GetOffset()); + UNIT_ASSERT(event->Response->HasPartitionResponse()); + UNIT_ASSERT(event->Response->GetPartitionResponse().HasCmdGetClientOffsetResult()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Offset, event->Response->GetPartitionResponse().GetCmdGetClientOffsetResult().GetOffset()); } } @@ -1342,7 +1347,7 @@ Y_UNIT_TEST_F(ReserveSubDomainOutOfSpace, TPartitionFixture) SendChangeOwner(cookie, "owner1", Ctx->Edge); auto ownerEvent = Ctx->Runtime->GrabEdgeEvent<TEvPQ::TEvProxyResponse>(TDuration::Seconds(1)); UNIT_ASSERT(ownerEvent != nullptr); - auto ownerCookie = ownerEvent->Response.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); + auto ownerCookie = ownerEvent->Response->GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); TAutoPtr<IEventHandle> handle; std::function<bool(const TEvPQ::TEvProxyResponse&)> truth = [&](const TEvPQ::TEvProxyResponse& e) { return cookie == e.Cookie; }; @@ -1387,7 +1392,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace, TPartitionFixture) SendChangeOwner(cookie, "owner1", Ctx->Edge, true); auto ownerEvent = Ctx->Runtime->GrabEdgeEvent<TEvPQ::TEvProxyResponse>(TDuration::Seconds(1)); UNIT_ASSERT(ownerEvent != nullptr); - auto ownerCookie = ownerEvent->Response.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); + auto ownerCookie = ownerEvent->Response->GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); TAutoPtr<IEventHandle> handle; std::function<bool(const TEvPQ::TEvError&)> truth = [&](const TEvPQ::TEvError& e) { return cookie == e.Cookie; }; @@ -1437,7 +1442,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_DisableExpiration, TPartitionFixture) SendChangeOwner(cookie, "owner1", Ctx->Edge, true); auto ownerEvent = Ctx->Runtime->GrabEdgeEvent<TEvPQ::TEvProxyResponse>(TDuration::Seconds(1)); UNIT_ASSERT(ownerEvent != nullptr); - auto ownerCookie = ownerEvent->Response.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); + auto ownerCookie = ownerEvent->Response->GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); TAutoPtr<IEventHandle> handle; std::function<bool(const TEvPQ::TEvProxyResponse&)> truth = [&](const TEvPQ::TEvProxyResponse& e) { return cookie == e.Cookie; }; @@ -1464,7 +1469,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_DisableExpiration, TPartitionFixture) event = Ctx->Runtime->GrabEdgeEventIf<TEvPQ::TEvProxyResponse>(handle, truth, TDuration::Seconds(1)); UNIT_ASSERT(event != nullptr); - UNIT_ASSERT_EQUAL(NMsgBusProxy::MSTATUS_OK, event->Response.GetStatus()); + UNIT_ASSERT_EQUAL(NMsgBusProxy::MSTATUS_OK, event->Response->GetStatus()); } Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_IgnoreQuotaDeadline, TPartitionFixture) @@ -1493,7 +1498,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_IgnoreQuotaDeadline, TPartitionFixture) SendChangeOwner(cookie, "owner1", Ctx->Edge, true); auto ownerEvent = Ctx->Runtime->GrabEdgeEvent<TEvPQ::TEvProxyResponse>(TDuration::Seconds(1)); UNIT_ASSERT(ownerEvent != nullptr); - auto ownerCookie = ownerEvent->Response.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); + auto ownerCookie = ownerEvent->Response->GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie(); TAutoPtr<IEventHandle> handle; std::function<bool(const TEvPQ::TEvProxyResponse&)> truth = [&](const TEvPQ::TEvProxyResponse& e) { return cookie == e.Cookie; }; @@ -1520,7 +1525,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_IgnoreQuotaDeadline, TPartitionFixture) event = Ctx->Runtime->GrabEdgeEventIf<TEvPQ::TEvProxyResponse>(handle, truth, TDuration::Seconds(1)); UNIT_ASSERT(event != nullptr); - UNIT_ASSERT_EQUAL(NMsgBusProxy::MSTATUS_OK, event->Response.GetStatus()); + UNIT_ASSERT_EQUAL(NMsgBusProxy::MSTATUS_OK, event->Response->GetStatus()); } } diff --git a/ydb/core/persqueue/ut/pq_ut.cpp b/ydb/core/persqueue/ut/pq_ut.cpp index 8c1d44844b..2c89b9f95c 100644 --- a/ydb/core/persqueue/ut/pq_ut.cpp +++ b/ydb/core/persqueue/ut/pq_ut.cpp @@ -20,6 +20,135 @@ const static TString TOPIC_NAME = "rt3.dc1--topic"; Y_UNIT_TEST_SUITE(TPQTest) { +Y_UNIT_TEST(TestDirectReadHappyWay) { + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + activeZone = false; + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + tc.Runtime->SetScheduledLimit(1000); + tc.Runtime->RegisterService(MakePQDReadCacheServiceActorId(), tc.Runtime->Register( + CreatePQDReadCacheService(new NMonitoring::TDynamicCounters())) + ); + + PQTabletPrepare({.partitions = 1, .writeSpeed = 100_KB}, {{"user1", true}}, tc); + TVector<std::pair<ui64, TString>> data; + TString s{2_MB, 'c'}; + data.push_back({1, s}); + CmdWrite(0, "sourceid0", data, tc, false, {}, false, "", -1, 0, false, false, true); + TString sessionId = "session1"; + TString user = "user1"; + TPQCmdSettings sessionSettings{0, user, sessionId}; + sessionSettings.PartitionSessionId = 1; + + TPQCmdReadSettings readSettings{sessionId, 0, 0, 1, 99999, 1}; + readSettings.PartitionSessionId = 1; + readSettings.DirectReadId = 1; + readSettings.User = user; + + activeZone = false; + auto pipe = CmdCreateSession(sessionSettings, tc); + TCmdDirectReadSettings publishSettings{0, sessionId, 1, 1, pipe, false}; + readSettings.Pipe = pipe; + CmdRead(readSettings, tc); + Cerr << "Run cmd publish\n"; + CmdPublishRead(publishSettings, tc); + Cerr << "Run cmd forget\n"; + CmdForgetRead(publishSettings, tc); + }); +} + +Y_UNIT_TEST(DirectReadBadSessionOrPipe) { + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(1000); + + PQTabletPrepare({.partitions = 1, .writeSpeed = 100_KB}, {{"user1", true}}, tc); + TVector<std::pair<ui64, TString>> data; + TString s{2_MB, 'c'}; + data.push_back({1, s}); + CmdWrite(0, "sourceid2", data, tc, false, {}, false, "", -1, 0, false, false, true); + TString sessionId = "session2"; + TString user = "user2"; + TPQCmdSettings sessionSettings{0, user, sessionId}; + sessionSettings.PartitionSessionId = 1; + + TPQCmdReadSettings readSettings(sessionId, 0, 0, 1, 99999, 1); + readSettings.PartitionSessionId = 1; + readSettings.DirectReadId = 1; + readSettings.User = user; + activeZone = false; + + readSettings.ToFail = true; + //No pipe + CmdRead(readSettings, tc); + auto pipe = CmdCreateSession(sessionSettings, tc); + readSettings.Pipe = pipe; + readSettings.Session = ""; + // No session + CmdRead(readSettings, tc); + readSettings.Session = "bad-session"; + // Bad session + CmdRead(readSettings, tc); + activeZone = false; + readSettings.Session = sessionId; + CmdKillSession(0, user, sessionId,tc, pipe); + activeZone = false; + // Dead session + CmdRead(readSettings, tc); + + activeZone = false; + TCmdDirectReadSettings publishSettings{0, sessionId, 1, 1, pipe, true}; + readSettings.Pipe = pipe; + activeZone = false; + // Dead session + Cerr << "Publish read\n"; + CmdPublishRead(publishSettings, tc); + Cerr << "Forget read\n"; + CmdForgetRead(publishSettings, tc); + }); +} +Y_UNIT_TEST(DirectReadOldPipe) { + TTestContext tc; + RunTestWithReboots(tc.TabletIds, [&]() { + return tc.InitialEventsFilter.Prepare(); + }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { + TFinalizer finalizer(tc); + tc.Prepare(dispatchName, setup, activeZone); + activeZone = false; + tc.Runtime->SetScheduledLimit(1000); + + PQTabletPrepare({.partitions = 1, .writeSpeed = 100_KB}, {{"user1", true}}, tc); + TString sessionId = "session2"; + TString user = "user2"; + TPQCmdSettings sessionSettings{0, user, sessionId}; + sessionSettings.PartitionSessionId = 1; + + TPQCmdReadSettings readSettings(sessionId, 0, 0, 1, 99999, 1); + readSettings.PartitionSessionId = 1; + readSettings.DirectReadId = 1; + readSettings.ToFail = true; + activeZone = false; + + auto pipe = CmdCreateSession(sessionSettings, tc); + + auto event = MakeHolder<TEvTabletPipe::TEvServerDisconnected>(0, pipe, TActorId{}); + tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, event.Release(), 0, GetPipeConfigWithRetries()); + readSettings.Pipe = pipe; + + CmdRead(readSettings, tc); + }); +} + + + Y_UNIT_TEST(TestPartitionTotalQuota) { TTestContext tc; RunTestWithReboots(tc.TabletIds, [&]() { @@ -1698,7 +1827,7 @@ Y_UNIT_TEST(TestReadSessions) { activeZone = true; TVector<std::pair<ui64, TString>> data; - CmdCreateSession(0, "user1", "session1", tc); + CmdCreateSession(TPQCmdSettings{0, "user1", "session1"}, tc); CmdSetOffset(0, "user1", 0, false, tc, "session1"); //all ok - session is set CmdSetOffset(0, "user1", 0, true, tc, "other_session"); //fails - session1 is active @@ -1706,10 +1835,10 @@ Y_UNIT_TEST(TestReadSessions) { CmdSetOffset(0, "user1", 0, false, tc, "session1"); - CmdCreateSession(0, "user1", "session2", tc, 0, 1, 1); - CmdCreateSession(0, "user1", "session3", tc, 0, 1, 1, true); //error on creation - CmdCreateSession(0, "user1", "session3", tc, 0, 0, 2, true); //error on creation - CmdCreateSession(0, "user1", "session3", tc, 0, 0, 0, true); //error on creation + CmdCreateSession(TPQCmdSettings{0, "user1", "session2", 0, 1, 1}, tc); + CmdCreateSession(TPQCmdSettings{0, "user1", "session3", 0, 1, 1, true}, tc); //error on creation + CmdCreateSession(TPQCmdSettings{0, "user1", "session3", 0, 0, 2, true}, tc); //error on creation + CmdCreateSession(TPQCmdSettings{0, "user1", "session3", 0, 0, 0, true}, tc); //error on creation CmdSetOffset(0, "user1", 0, true, tc, "session1"); CmdSetOffset(0, "user1", 0, true, tc, "session3"); CmdSetOffset(0, "user1", 0, false, tc, "session2"); @@ -1827,7 +1956,7 @@ Y_UNIT_TEST(TestReadSubscription) { TVector<std::pair<ui64, TString>> data; - ui32 pp = 8 + 4 + 2 + 9; + ui32 pp = 8 + 4 + 2 + 9; TString tmp0{32 - pp - 2, '-'}; char k = 0; for (ui32 i = 0; i < 5; ++i) { diff --git a/ydb/core/persqueue/ya.make b/ydb/core/persqueue/ya.make index 28086333e4..8000bbe68a 100644 --- a/ydb/core/persqueue/ya.make +++ b/ydb/core/persqueue/ya.make @@ -36,6 +36,7 @@ SRCS( utils.cpp write_meta.cpp microseconds_sliding_window.cpp + dread_cache_service/caching_service.cpp ) GENERATE_ENUM_SERIALIZATION(sourceid.h) @@ -66,5 +67,6 @@ END() RECURSE_FOR_TESTS( ut + dread_cache_service/ut ut/slow ) diff --git a/ydb/core/protos/counters_pq.proto b/ydb/core/protos/counters_pq.proto index 01cbbeaba2..c381e0f1eb 100644 --- a/ydb/core/protos/counters_pq.proto +++ b/ydb/core/protos/counters_pq.proto @@ -137,6 +137,8 @@ enum EPercentileCounters { COUNTER_LATENCY_PQ_REGISTER_MESSAGE_GROUP = 17 [(CounterOpts) = {Name: "LatencyRegisterMessageGroup"}]; COUNTER_LATENCY_PQ_DEREGISTER_MESSAGE_GROUP = 18 [(CounterOpts) = {Name: "LatencyDeregisterMessageGroup"}]; COUNTER_LATENCY_PQ_SPLIT_MESSAGE_GROUP = 19 [(CounterOpts) = {Name: "LatencySplitMessageGroup"}]; + COUNTER_LATENCY_PQ_PUBLISH_READ = 20 [(CounterOpts) = {Name: "LatencyPublishRead"}]; + COUNTER_LATENCY_PQ_FORGET_READ = 21 [(CounterOpts) = {Name: "LatencyForgetRead"}]; } diff --git a/ydb/core/protos/msgbus_pq.proto b/ydb/core/protos/msgbus_pq.proto index 6d93226f3d..8ab13eff77 100644 --- a/ydb/core/protos/msgbus_pq.proto +++ b/ydb/core/protos/msgbus_pq.proto @@ -6,6 +6,12 @@ package NKikimrClient; option java_package = "ru.yandex.kikimr.proto"; +message TReadSessionKey { + optional string SessionId = 1; + optional uint64 PartitionSessionId = 2; +} + + message TPersQueuePartitionRequest { message TCmdRead { optional string ClientId = 1; // mandatory @@ -26,6 +32,20 @@ message TPersQueuePartitionRequest { optional string ExplicitHash = 13; optional bool ExternalOperation = 14 [default = false]; + optional uint64 DirectReadId = 15; + optional uint64 PartitionSessionId = 16; + optional int64 LastOffset = 17; + + } + + message TCmdPublishDirectRead { + optional TReadSessionKey SessionKey = 1; + optional uint64 DirectReadId = 2; + } + + message TCmdForgetDirectRead { + optional TReadSessionKey SessionKey = 1; + optional uint64 DirectReadId = 2; } message TCmdCreateSession { @@ -33,6 +53,7 @@ message TPersQueuePartitionRequest { optional string SessionId = 2; optional uint64 Generation = 3; optional uint64 Step = 4; + optional uint64 PartitionSessionId = 5; } message TCmdDeleteSession { @@ -129,6 +150,8 @@ message TPersQueuePartitionRequest { optional TCmdDeleteSession CmdDeleteSession = 6; optional TCmdCreateSession CmdCreateSession = 7; optional TCmdRead CmdRead = 8; + optional TCmdPublishDirectRead CmdPublishRead = 24; + optional TCmdForgetDirectRead CmdForgetRead = 25; optional TCmdSetClientOffset CmdSetClientOffset = 9; optional TCmdGetClientOffset CmdGetClientOffset = 10; optional TCmdGetOwnership CmdGetOwnership = 11; @@ -387,6 +410,9 @@ message TCmdReadResult { optional uint64 RealReadOffset = 10; optional uint64 WaitQuotaTimeMs = 11; optional uint64 ReadFromTimestampMs = 12; + optional uint64 SizeEstimate = 13; + optional int64 LastOffset = 14; + optional uint64 EndOffset = 15; } @@ -444,11 +470,30 @@ message TPersQueuePartitionResponse { optional string OwnerCookie = 1; } + message TCmdPrepareDirectReadResult { + optional uint32 BytesSizeEstimate = 1; + optional uint64 ReadOffset = 2; + optional uint64 LastOffset = 3; + + optional uint64 EndOffset = 4; + optional uint64 SizeLag = 6; + optional uint64 WriteTimestampMS = 7; + + optional uint64 DirectReadId = 5; + } + + message TCmdPublishDirectReadResult { + } + + repeated TCmdWriteResult CmdWriteResult = 1; optional TCmdGetMaxSeqNoResult CmdGetMaxSeqNoResult = 2; optional TCmdReadResult CmdReadResult = 3; optional TCmdGetClientOffsetResult CmdGetClientOffsetResult = 4; optional TCmdGetOwnershipResult CmdGetOwnershipResult = 5; + optional TCmdPrepareDirectReadResult CmdPrepareReadResult = 7; + optional TCmdPublishDirectReadResult CmdPublishReadResult = 8; + optional TCmdPublishDirectReadResult CmdForgetReadResult = 9; optional uint64 Cookie = 6; } diff --git a/ydb/core/protos/pqconfig.proto b/ydb/core/protos/pqconfig.proto index bc0af912d5..6abc3855e4 100644 --- a/ydb/core/protos/pqconfig.proto +++ b/ydb/core/protos/pqconfig.proto @@ -809,6 +809,8 @@ message TUserInfo { optional string Session = 4; optional uint64 OffsetRewindSum = 5; optional uint64 ReadRuleGeneration = 6; + optional uint64 PartitionSessionId = 7; + } message TPartitionClientInfo { diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index 517a65ecb6..259d5cf6d9 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -99,6 +99,7 @@ #include <ydb/core/keyvalue/keyvalue.h> #include <ydb/core/persqueue/pq.h> #include <ydb/core/persqueue/cluster_tracker.h> +#include <ydb/core/persqueue/dread_cache_service/caching_service.h> #include <ydb/library/security/ydb_credentials_provider_factory.h> #include <ydb/core/fq/libs/init/init.h> #include <ydb/core/fq/libs/mock/yql_mock.h> @@ -961,6 +962,11 @@ namespace Tests { TActorId pqClusterTrackerId = Runtime->Register(pqClusterTracker, nodeIdx); Runtime->RegisterService(NPQ::NClusterTracker::MakeClusterTrackerID(), pqClusterTrackerId, nodeIdx); } + { + IActor* pqReadCacheService = NPQ::CreatePQDReadCacheService(Runtime->GetDynamicCounters()); + TActorId readCacheId = Runtime->Register(pqReadCacheService, nodeIdx); + Runtime->RegisterService(NPQ::MakePQDReadCacheServiceActorId(), readCacheId, nodeIdx); + } { if (Settings->PQConfig.GetEnabled() == true) { diff --git a/ydb/core/testlib/test_pq_client.h b/ydb/core/testlib/test_pq_client.h index 7448bfd6ca..717d653260 100644 --- a/ydb/core/testlib/test_pq_client.h +++ b/ydb/core/testlib/test_pq_client.h @@ -988,17 +988,31 @@ public: THolder<NMsgBusProxy::TBusPersQueue> alterRequest = requestDescr.GetRequest(); ui32 prevVersion = GetTopicVersionFromMetadata(name); + while (prevVersion == 0) { + Sleep(TDuration::MilliSeconds(500)); + prevVersion = GetTopicVersionFromMetadata(name); + } CallPersQueueGRPC(alterRequest->Record); + Cerr << "Alter got " << prevVersion << "\n"; const TInstant start = TInstant::Now(); AlterTopic(); - while (GetTopicVersionFromMetadata(name, cacheSize) != prevVersion + 1) { + auto ver = GetTopicVersionFromMetadata(name, cacheSize); + while (ver != prevVersion + 1) { + Cerr << "Alter1 got " << ver << "\n"; + Sleep(TDuration::MilliSeconds(500)); + ver = GetTopicVersionFromMetadata(name, cacheSize); UNIT_ASSERT(TInstant::Now() - start < ::DEFAULT_DISPATCH_TIMEOUT); } - while (GetTopicVersionFromPath(name) != prevVersion + 1) { + auto ver2 = GetTopicVersionFromPath(name); + while (ver2 != prevVersion + 1) { + Cerr << "Alter2 got " << ver << "\n"; + Sleep(TDuration::MilliSeconds(500)); + ver2 = GetTopicVersionFromPath(name); + UNIT_ASSERT(TInstant::Now() - start < ::DEFAULT_DISPATCH_TIMEOUT); } @@ -1412,6 +1426,8 @@ public: auto settings = NYdb::NPersQueue::TCreateTopicSettings().PartitionsCount(params.PartsCount).ClientWriteDisabled(!params.CanWrite); settings.FederationAccount(params.Account); settings.SupportedCodecs(params.Codecs); + //settings.MaxPartitionWriteSpeed(50_MB); + //settings.MaxPartitionWriteBurst(50_MB); TVector<NYdb::NPersQueue::TReadRuleSettings> rrSettings; for (auto &user : params.ReadRules) { rrSettings.push_back({NYdb::NPersQueue::TReadRuleSettings{}.ConsumerName(user)}); diff --git a/ydb/library/persqueue/topic_parser/counters.h b/ydb/library/persqueue/topic_parser/counters.h index 8367e10f82..d69b247d68 100644 --- a/ydb/library/persqueue/topic_parser/counters.h +++ b/ydb/library/persqueue/topic_parser/counters.h @@ -12,7 +12,6 @@ TVector<NPersQueue::TPQLabelsInfo> GetLabelsForCustomCluster(const TTopicConvert TVector<std::pair<TString, TString>> GetSubgroupsForTopic(const TTopicConverterPtr& topic, const TString& cloudId, const TString& dbId, const TString& dbPath, const TString& folderId); - ::NMonitoring::TDynamicCounterPtr GetCounters(::NMonitoring::TDynamicCounterPtr counters, const TString& subsystem, const TTopicConverterPtr& topic); diff --git a/ydb/public/api/grpc/ydb_topic_v1.proto b/ydb/public/api/grpc/ydb_topic_v1.proto index 4a59c7fb88..1650f60b40 100644 --- a/ydb/public/api/grpc/ydb_topic_v1.proto +++ b/ydb/public/api/grpc/ydb_topic_v1.proto @@ -53,9 +53,9 @@ service TopicService { // StopPartitionSessionResponse(PartitionSessionID1, ...) // only after this response server will give this parittion to other session. // ----------------> - // StartPartitionSessionResponse(PartitionSession2, ...) + // StartPartitionSessionResponse(PartitionSessionID2, ...) // ----------------> - // ReadResponse(data, ...) + // ReadResponse(MessageData, ...) // <---------------- // CommitRequest(PartitionCommit1, ...) // ----------------> @@ -63,8 +63,51 @@ service TopicService { // <---------------- // [something went wrong] (status != SUCCESS, issues not empty) // <---------------- + // + // Pipeline for direct reading is similar, but the client receives data directly from the partition node + // The following is an excerpt from the pipeline for direct reading: + // client server + // InitRequest(Topics, ClientID, ..., direct_read = true) + // ----------------> + // InitResponse(SessionID1) + // <---------------- + // ReadRequest + // ----------------> + // StartPartitionSessionRequest(Topic1, Partition1, PartitionSessionID1, PartitionLocation1, Secret1,...) + // <---------------- + // StartPartitionSessionResponse(PartitionSessionID1, ...) + // ----------------> + // + // Start data session to the partition node using StreamDirectRead.StartDirectReadSession + // + // Get data from the partition node using StreamDirectRead.DirectReadResponse + // + // DirectReadAck(PartitionSessionID1, DirectReadID1, ...) + // ----------------> + // ReadRequest + // ----------------> + // + // Get data the partition node using StreamDirectRead.DirectReadResponse + // + // DirectReadAckRequest(PartitionSessionID1,DirectReadID2, ...) + // ----------------> rpc StreamRead(stream StreamReadMessage.FromClient) returns (stream StreamReadMessage.FromServer); + // Create DirectRead Session + // Pipeline: + // client server + // StartDirectReadPartitionSession(SessionID1, PartitionSessionID1, TabletGeneration1, Secret1,...) + // ----------------> + // DirectReadResponse(PartitionSessionID1, DirectReadID1, Secret1, ...) + // <---------------- + // DirectReadResponse(PartitionSessionID1, DirectReadID2, Secret1, ...) + // <---------------- + // UpdateDirectReadPartitionSession(SessionID1, PartitionSessionID1, TabletGeneration1, Secret1,...) + // ----------------> + // StopDirectReadPartitionSession(SessionID1, PartitionSessionID1, TabletGeneration1, Secret1,...) + // <---------------- + rpc StreamDirectRead(stream StreamDirectReadMessage.FromClient) returns (stream StreamDirectReadMessage.FromServer); + // Single commit offset request. rpc CommitOffset(CommitOffsetRequest) returns (CommitOffsetResponse); diff --git a/ydb/public/api/protos/ydb_topic.proto b/ydb/public/api/protos/ydb_topic.proto index c4c0606055..8b5c0aac9c 100644 --- a/ydb/public/api/protos/ydb_topic.proto +++ b/ydb/public/api/protos/ydb_topic.proto @@ -125,7 +125,7 @@ message StreamWriteMessage { } - // Response for handshake. + // Response to the handshake. message InitResponse { // Last persisted message's sequence number for this producer. // Zero for new producer. @@ -234,13 +234,12 @@ message StreamWriteMessage { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // StreamRead - // Messages for bidirectional streaming rpc StreamRead message StreamReadMessage { // Within a StreamRead session delivered messages are separated by partition. // Reads from a single partition are represented by a partition session. message PartitionSession { - // Identitifier of partition session. Unique inside one RPC call. + // Identifier of partition session. Unique inside one RPC call. int64 partition_session_id = 1; // Topic path of partition. string path = 2; @@ -254,6 +253,7 @@ message StreamReadMessage { // CommitOffsetRequest - request for commit of some read data. // PartitionSessionStatusRequest - request for session status // UpdateTokenRequest - request to update auth token + // DirectReadAck - client signals it has finished direct reading from the partition node. // // StartPartitionSessionResponse - Response to StreamReadServerMessage.StartPartitionSessionRequest. // Client signals it is ready to get data from partition. @@ -267,10 +267,12 @@ message StreamReadMessage { CommitOffsetRequest commit_offset_request = 3; PartitionSessionStatusRequest partition_session_status_request = 4; UpdateTokenRequest update_token_request = 5; + DirectReadAck direct_read_ack = 8; // Responses to respective server commands. StartPartitionSessionResponse start_partition_session_response = 6; StopPartitionSessionResponse stop_partition_session_response = 7; + } } @@ -283,6 +285,7 @@ message StreamReadMessage { // // StartPartitionSessionRequest - command from server to create a partition session. // StopPartitionSessionRequest - command from server to destroy a partition session. + // UpdatePartitionSession - command from server to update a partition session. message FromServer { // Server status of response. Ydb.StatusIds.StatusCode status = 1; @@ -301,6 +304,8 @@ message StreamReadMessage { // Server commands. StartPartitionSessionRequest start_partition_session_request = 8; StopPartitionSessionRequest stop_partition_session_request = 9; + + UpdatePartitionSession update_partition_session = 10; } } @@ -313,6 +318,8 @@ message StreamReadMessage { string consumer = 2; // Optional name. Will be shown in debug stat. string reader_name = 3; + // Direct reading from a partition node. + bool direct_read = 4; message TopicReadSettings { // Topic path. @@ -331,7 +338,7 @@ message StreamReadMessage { // Handshake response. message InitResponse { - // Read session identifier for debug purposes. + // Read session identifier. string session_id = 1; } @@ -355,7 +362,7 @@ message StreamReadMessage { // 4) Server is free to send up to 50 + 100 = 150 bytes. But the next read message is too big, // and it sends 160 bytes ReadResponse. // 5) Let's assume client somehow processes it, and its 200 bytes buffer is free again. - // It shoud account for excess 10 bytes and send ReadRequest with bytes_size = 210. + // It should account for excess 10 bytes and send ReadRequest with bytes_size = 210. int64 bytes_size = 1; } @@ -364,7 +371,7 @@ message StreamReadMessage { // One client message representation. message MessageData { // Partition offset in partition that assigned for message. - int64 offset = 1; //unique value for clientside deduplication - Topic:Partition:Offset + int64 offset = 1; //unique value for client side deduplication - Topic:Partition:Offset // Sequence number that provided with message on write from client. int64 seq_no = 2; // Timestamp of creation of message provided on write from client. @@ -379,7 +386,6 @@ message StreamReadMessage { // Filled if message_group_id was set on message write. string message_group_id = 7 [(Ydb.length).le = 2048]; repeated MetadataItem metadata_items = 8; - } // Representation of sequence of client messages from one write session. @@ -448,7 +454,7 @@ message StreamReadMessage { int64 partition_session_id = 1; } - // Response for status request. + // Response to status request. message PartitionSessionStatusResponse { // Identifier of partition session whose status was requested. int64 partition_session_id = 1; @@ -474,6 +480,9 @@ message StreamReadMessage { // Partition contains messages with offsets in range [start, end). OffsetsRange partition_offsets = 3; + + // Partition location, filled only when InitRequest.direct_read is true. + PartitionLocation partition_location = 4; } // Signal for server that cient is ready to recive data for partition. @@ -490,6 +499,7 @@ message StreamReadMessage { // Server will return data starting from offset that is maximum of actual committed offset, read_offset (if set) // and offsets calculated from InitRequest.max_lag and InitRequest.read_from. optional int64 read_offset = 2; + // All messages with offset less than commit_offset are processed by client. // Server will commit this position if this is not done yet. optional int64 commit_offset = 3; @@ -510,6 +520,9 @@ message StreamReadMessage { // Upper bound for committed offsets. int64 committed_offset = 3; + + // Upper bound for read request identifiers, filled only when InitRequest.direct_read is true and graceful is true. + int64 last_direct_read_id = 4; } // Signal for server that client finished working with this partition. @@ -518,7 +531,124 @@ message StreamReadMessage { message StopPartitionSessionResponse { // Partition session identifier of partition session that is released by client. int64 partition_session_id = 1; + + // Flag of graceful stop, used only when InitRequest.direct_read is true + // Client must pass this value unchanged from the StopPartitionSessionRequest. + // Server can sent two StopPartitionSessionRequests, the first with graceful=true, the second with graceful=false. The client must answer both of them. + bool graceful = 2; + } + + // Command from server to notify about a partition session update. + // Client should not send a response to the command. + message UpdatePartitionSession { + // Partition session identifier. + int64 partition_session_id = 1; + + // Partition location, filled only when InitRequest.direct_read is true. + PartitionLocation partition_location = 2; + } + + // Signal for server that client has finished direct reading. + // Server should not send a response to the command. + message DirectReadAck { + // Partition session identifier. + int64 partition_session_id = 1; + + // Identifier of the successfully completed read request. + int64 direct_read_id = 2; + } + +} + +// Messages for bidirectional streaming rpc StreamDirectRead +message StreamDirectReadMessage { + + // Client-server message for direct read session. + // InitDirectRead - command from client to create and start a direct read session. + // StartDirectReadPartitionSession - command from client to create and start a direct read partition session. + // UpdateTokenRequest - request to update auth token + message FromClient { + oneof client_message { + InitDirectRead init_direct_read = 1; + StartDirectReadPartitionSession start_direct_read_partition_session = 2; + UpdateTokenRequest update_token_request = 3; + } + } + + // Server-client message for direct read session. + // DirectReadResponse - portion of message data. + // StopDirectReadPartitionSession - command from server to stop a direct read partition session. + // UpdateTokenResponse - acknowledgment of token update. + message FromServer { + // Server status of response. + Ydb.StatusIds.StatusCode status = 1; + + // Issues if any. + repeated Ydb.Issue.IssueMessage issues = 2; + + + oneof server_message { + StopDirectReadPartitionSession stop_direct_read_partition_session = 3; + DirectReadResponse direct_read_response = 4; + UpdateTokenResponse update_token_response = 5; + } + } + + // Command from client to create and start a direct read session. + // Server should not send a response to the command. + message InitDirectRead { + // Read session identifier. + string session_id = 1; + // Topics that will be read by this session. + repeated TopicReadSettings topics_read_settings = 2; + // Path of consumer that is used for reading by this session. + string consumer = 3; + + message TopicReadSettings { + // Topic path. + string path = 1; + } + } + + // Command from client to create and start a direct read partition session. + // Server should not send a response to the command. + message StartDirectReadPartitionSession { + // Partition session identifier. + int64 partition_session_id = 1; + + // Upper bound for read request identifiers. + int64 last_direct_read_id = 2; + + // Partition generation. + int64 generation = 3; + } + + // Command from server to stop a direct read partition session. + // Client should not send a response to the command. + message StopDirectReadPartitionSession { + // The reason for the stop. + Ydb.StatusIds.StatusCode status = 1; + + // Issues if any. + repeated Ydb.Issue.IssueMessage issues = 2; + + // Partition session identifier. + int64 partition_session_id = 3; } + + + // Messages that have been read directly from the partition node. + // It's a response to StreamRead.ReadRequest + message DirectReadResponse { + // Partition session identifier. + int64 partition_session_id = 1; + + // Read request identifier. + int64 direct_read_id = 2; + + // Messages data + StreamReadMessage.ReadResponse.PartitionData partition_data = 3; + } } message TransactionIdentity { @@ -602,7 +732,7 @@ message CommitOffsetResult { // Control messages -// message representing statistics by seleveral windows +// message representing statistics by several windows message MultipleWindowsStat { int64 per_minute = 1; int64 per_hour = 2; @@ -639,7 +769,7 @@ message Consumer { google.protobuf.Duration max_read_time_lag = 2; // Maximum of differences between write timestamp and create timestamp for all messages, read during last minute. google.protobuf.Duration max_write_time_lag = 3; - // Bytes read stastics. + // Bytes read statistics. MultipleWindowsStat bytes_read = 4; } } @@ -964,7 +1094,7 @@ message DescribeConsumerResult { // Maximum of differences between write timestamp and create timestamp for all messages, read during last minute. google.protobuf.Duration max_write_time_lag = 7; - // How much bytes were read during several windows statistics from this partiton. + // How much bytes were read during several windows statistics from this partition. MultipleWindowsStat bytes_read = 8; // Read session name, provided by client. diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp index c9dde5232f..8fc23e95c5 100644 --- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp +++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp @@ -894,6 +894,10 @@ void TSingleClusterReadSessionImpl<UseMigrationProtocol>::OnReadDone(NYdbGrpc::T case TServerMessage<false>::kStartPartitionSessionRequest: OnReadDoneImpl(std::move(*ServerMessage->mutable_start_partition_session_request()), deferred); break; + case TServerMessage<false>::kUpdatePartitionSession: + OnReadDoneImpl(std::move(*ServerMessage->mutable_update_partition_session()), deferred); + break; + case TServerMessage<false>::kStopPartitionSessionRequest: OnReadDoneImpl(std::move(*ServerMessage->mutable_stop_partition_session_request()), deferred); break; @@ -907,6 +911,9 @@ void TSingleClusterReadSessionImpl<UseMigrationProtocol>::OnReadDone(NYdbGrpc::T OnReadDoneImpl(std::move(*ServerMessage->mutable_update_token_response()), deferred); break; case TServerMessage<false>::SERVER_MESSAGE_NOT_SET: + errorStatus = TPlainStatus::Internal("Server message is not set"); + break; + default: errorStatus = TPlainStatus::Internal("Unexpected response from server"); break; } @@ -1320,6 +1327,21 @@ inline void TSingleClusterReadSessionImpl<false>::OnReadDoneImpl( template <> template <> inline void TSingleClusterReadSessionImpl<false>::OnReadDoneImpl( + Ydb::Topic::StreamReadMessage::UpdatePartitionSession&& msg, + TDeferredActions<false>& deferred) { + Y_ABORT_UNLESS(Lock.IsLocked()); + Y_UNUSED(deferred); + + auto partitionStreamIt = PartitionStreams.find(msg.partition_session_id()); + if (partitionStreamIt == PartitionStreams.end()) { + return; + } + //TODO: update generation/nodeid info +} + +template <> +template <> +inline void TSingleClusterReadSessionImpl<false>::OnReadDoneImpl( Ydb::Topic::StreamReadMessage::StopPartitionSessionRequest&& msg, TDeferredActions<false>& deferred) { Y_ABORT_UNLESS(Lock.IsLocked()); diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt index 3d815020b5..d2f25bf3c9 100644 --- a/ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt +++ b/ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt @@ -14,6 +14,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC library-actors-core cpp-containers-disjoint_interval_tree cpp-string_utils-base64 + ydb-core-util ydb-core-base ydb-core-grpc_services ydb-core-persqueue @@ -37,6 +38,7 @@ target_sources(services-persqueue_v1-actors PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt index 3d815020b5..d2f25bf3c9 100644 --- a/ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt +++ b/ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt @@ -14,6 +14,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC library-actors-core cpp-containers-disjoint_interval_tree cpp-string_utils-base64 + ydb-core-util ydb-core-base ydb-core-grpc_services ydb-core-persqueue @@ -37,6 +38,7 @@ target_sources(services-persqueue_v1-actors PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt index babb76e529..326faf12eb 100644 --- a/ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt +++ b/ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt @@ -15,6 +15,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC library-actors-core cpp-containers-disjoint_interval_tree cpp-string_utils-base64 + ydb-core-util ydb-core-base ydb-core-grpc_services ydb-core-persqueue @@ -38,6 +39,7 @@ target_sources(services-persqueue_v1-actors PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt index babb76e529..326faf12eb 100644 --- a/ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt +++ b/ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt @@ -15,6 +15,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC library-actors-core cpp-containers-disjoint_interval_tree cpp-string_utils-base64 + ydb-core-util ydb-core-base ydb-core-grpc_services ydb-core-persqueue @@ -38,6 +39,7 @@ target_sources(services-persqueue_v1-actors PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt index 3d815020b5..d2f25bf3c9 100644 --- a/ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt +++ b/ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt @@ -14,6 +14,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC library-actors-core cpp-containers-disjoint_interval_tree cpp-string_utils-base64 + ydb-core-util ydb-core-base ydb-core-grpc_services ydb-core-persqueue @@ -37,6 +38,7 @@ target_sources(services-persqueue_v1-actors PRIVATE ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp diff --git a/ydb/services/persqueue_v1/actors/direct_read_actor.cpp b/ydb/services/persqueue_v1/actors/direct_read_actor.cpp new file mode 100644 index 0000000000..0f53ebc819 --- /dev/null +++ b/ydb/services/persqueue_v1/actors/direct_read_actor.cpp @@ -0,0 +1,471 @@ +#include "direct_read_actor.h" + +#include "helpers.h" +#include "read_init_auth_actor.h" +#include "read_session_actor.h" + +#include <ydb/library/persqueue/topic_parser/counters.h> +#include <ydb/core/persqueue/dread_cache_service/caching_service.h> + +#include <library/cpp/protobuf/util/repeated_field_utils.h> + +#include <google/protobuf/util/time_util.h> + +#include <util/string/join.h> +#include <util/string/strip.h> + +#include <utility> + +#define LOG_PREFIX "Direct read proxy " << ctx.SelfID.ToString() << ": " PQ_LOG_PREFIX + +namespace NKikimr::NGRpcProxy::V1 { + +using namespace NKikimrClient; +using namespace NMsgBusProxy; +using namespace PersQueue::V1; + +TDirectReadSessionActor::TDirectReadSessionActor( + TEvStreamReadRequest* request, const ui64 cookie, + const TActorId& schemeCache, const TActorId& newSchemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const TMaybe<TString> clientDC, + const NPersQueue::TTopicsListController& topicsHandler) + : TRlHelpers({}, request, READ_BLOCK_SIZE, false, TDuration::Minutes(1)) + , Request(request) + , Cookie(cookie) + , ClientDC(clientDC.GetOrElse("other")) + , StartTimestamp(TInstant::Now()) + , SchemeCache(schemeCache) + , NewSchemeCache(newSchemeCache) + , InitDone(false) + , ForceACLCheck(false) + , LastACLCheckTimestamp(TInstant::Zero()) + , Counters(counters) + , TopicsHandler(topicsHandler) +{ + Y_ASSERT(Request); +} + +void TDirectReadSessionActor::Bootstrap(const TActorContext& ctx) { + if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + ++(*GetServiceCounters(Counters, "pqproxy|readSession") + ->GetNamedCounter("sensor", "DirectSessionsCreatedTotal", true)); + } + + Request->GetStreamCtx()->Attach(ctx.SelfID); + if (!ReadFromStreamOrDie(ctx)) { + return; + } + + StartTime = ctx.Now(); + this->Become(&TDirectReadSessionActor::TThis::StateFunc); +} + +void TDirectReadSessionActor::Handle(typename IContext::TEvNotifiedWhenDone::TPtr&, const TActorContext& ctx) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc closed"); + Die(ctx); +} + + +bool TDirectReadSessionActor::ReadFromStreamOrDie(const TActorContext& ctx) { + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return false; + } + return true; +} + +void TDirectReadSessionActor::Handle(typename IContext::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { + auto& request = ev->Get()->Record; + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc read done" + << ": success# " << ev->Get()->Success + << ", data# " << request); + + if (!ev->Get()->Success) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << "grpc read failed"); + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvDone()); + return; + } + + switch (request.client_message_case()) { + case TClientMessage::kInitDirectRead: { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvInitDirectRead(request, Request->GetStreamCtx()->GetPeerName())); + return; + } + + case TClientMessage::kStartDirectReadPartitionSession: { + const auto& req = request.start_direct_read_partition_session(); + + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvStartDirectRead(req.partition_session_id(), req.generation(), req.last_direct_read_id())); + return (void)ReadFromStreamOrDie(ctx); + } + + case TClientMessage::kUpdateTokenRequest: { + if (const auto token = request.update_token_request().token()) { // TODO: refresh token here + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvAuth(token)); + } + return (void)ReadFromStreamOrDie(ctx); + } + + default: { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "unsupported request"); + } + } +} + + +bool TDirectReadSessionActor::WriteToStreamOrDie(const TActorContext& ctx, TServerMessage&& response, bool finish) { + bool res = false; + + if (!finish) { + res = Request->GetStreamCtx()->Write(std::move(response)); + } else { + res = Request->GetStreamCtx()->WriteAndFinish(std::move(response), grpc::Status::OK); + } + + if (!res) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc write failed at start"); + Die(ctx); + } + + return res; +} + + +void TDirectReadSessionActor::Handle(typename IContext::TEvWriteFinished::TPtr& ev, const TActorContext& ctx) { + if (!ev->Get()->Success) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc write failed"); + return Die(ctx); + } +} + + +void TDirectReadSessionActor::Die(const TActorContext& ctx) { + if (AuthInitActor) { + ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill()); + } + + if (DirectSessionsActive) { + --(*DirectSessionsActive); + } + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " proxy is DEAD"); + ctx.Send(GetPQReadServiceActorID(), new TEvPQProxy::TEvSessionDead(Cookie)); + ctx.Send(NPQ::MakePQDReadCacheServiceActorId(), new TEvPQProxy::TEvDirectReadDataSessionDead(Session)); + + TActorBootstrapped<TDirectReadSessionActor>::Die(ctx); +} + + +void TDirectReadSessionActor::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext&) { + CloseSession(PersQueue::ErrorCode::OK, "reads done signal, closing everything"); +} + + +void TDirectReadSessionActor::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext&) { + CloseSession(ev->Get()->ErrorCode, ev->Get()->Reason); +} + +void TDirectReadSessionActor::Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx) { + const auto& auth = ev->Get()->Auth; + if (!auth.empty() && auth != Auth) { + Auth = auth; + Request->RefreshToken(auth, ctx, ctx.SelfID); + } +} + + +void TDirectReadSessionActor::Handle(TEvPQProxy::TEvStartDirectRead::TPtr& ev, const TActorContext& ctx) { + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " got StartDirectRead from client" + << ": sessionId# " << Session + << ", assignId# " << ev->Get()->AssignId + << ", lastDirectReadId# " << ev->Get()->LastDirectReadId + << ", generation# " << ev->Get()->Generation); + + ctx.Send(NPQ::MakePQDReadCacheServiceActorId(), new TEvPQProxy::TEvDirectReadDataSessionConnected( + {Session, ev->Get()->AssignId}, ev->Get()->Generation, ev->Get()->LastDirectReadId + 1) + ); +} + + +void TDirectReadSessionActor::Handle(TEvPQProxy::TEvInitDirectRead::TPtr& ev, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << "got init request:" << ev->Get()->Request.DebugString()); + + if (Initing) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "got second init request"); + } + Initing = true; + + const auto& init = ev->Get()->Request.init_direct_read(); + + if (!init.topics_read_settings_size()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "no topics in init request"); + } + + if (init.consumer().empty()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "no consumer in init request"); + } + + ClientId = NPersQueue::ConvertNewConsumerName(init.consumer(), ctx); + if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + ClientPath = init.consumer(); + } else { + ClientPath = NPersQueue::StripLeadSlash(NPersQueue::MakeConsumerPath(init.consumer())); + } + + Session = init.session_id(); + if (Session.empty()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "no session id in init request"); + } + PeerName = ev->Get()->PeerName; + + auto database = Request->GetDatabaseName().GetOrElse(TString()); + + for (const auto& topic : init.topics_read_settings()) { + const TString path = topic.path(); + if (path.empty()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "empty topic in init request"); + } + + TopicsToResolve.insert(path); + } + + if (Request->GetSerializedToken().empty()) { + if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { + return CloseSession(PersQueue::ErrorCode::ACCESS_DENIED, + "unauthenticated access is forbidden, please provide credentials"); + } + } else { + Y_ABORT_UNLESS(Request->GetYdbToken()); + Auth = *(Request->GetYdbToken()); + Token = new NACLib::TUserToken(Request->GetSerializedToken()); + } + + TopicsList = TopicsHandler.GetReadTopicsList(TopicsToResolve, true, database); + + if (!TopicsList.IsValid) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TopicsList.Reason); + } + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " read init" + << ": from# " << PeerName + << ", request# " << ev->Get()->Request); + + if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + SetupCounters(); + } + + RunAuthActor(ctx); +} + + +void TDirectReadSessionActor::SetupCounters() { + if (DirectSessionsCreated) { + return; + } + + auto subGroup = GetServiceCounters(Counters, "pqproxy|readSession"); + subGroup = subGroup->GetSubgroup("Client", ClientId)->GetSubgroup("ConsumerPath", ClientPath); + const TString name = "sensor"; + + Errors = subGroup->GetExpiringNamedCounter(name, "Errors", true); + DirectSessionsActive = subGroup->GetExpiringNamedCounter(name, "DirectSessionsActive", false); + DirectSessionsCreated = subGroup->GetExpiringNamedCounter(name, "DirectSessionsCreated", true); + + ++(*DirectSessionsCreated); + ++(*DirectSessionsActive); +} + + + +void TDirectReadSessionActor::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth ok" + << ": topics# " << ev->Get()->TopicAndTablets.size() + << ", initDone# " << InitDone); + + LastACLCheckTimestamp = ctx.Now(); + AuthInitActor = TActorId(); + + + if (!InitDone) { + for (const auto& [name, t] : ev->Get()->TopicAndTablets) { // TODO: return something from Init and Auth Actor (Full Path - ?) + + if (!GetMeteringMode()) { + SetMeteringMode(t.MeteringMode); + } else if (*GetMeteringMode() != t.MeteringMode) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, + "cannot read from topics with different metering modes"); + } + } + + if (IsQuotaRequired()) { + Y_ABORT_UNLESS(MaybeRequestQuota(1, EWakeupTag::RlInit, ctx)); + } else { + InitSession(ctx); + } + + } else { + for (const auto& [name, t] : ev->Get()->TopicAndTablets) { + if (t.MeteringMode != *GetMeteringMode()) { + return CloseSession(PersQueue::ErrorCode::OVERLOAD, TStringBuilder() + << "metering mode of topic: " << name << " has been changed"); + } + } + } +} + +void TDirectReadSessionActor::InitSession(const TActorContext& ctx) { + InitDone = true; + ReadFromStreamOrDie(ctx); + ctx.Schedule(TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()), new TEvents::TEvWakeup(EWakeupTag::RecheckAcl)); +} + + +void TDirectReadSessionActor::CloseSession(PersQueue::ErrorCode::ErrorCode code, const TString& reason) { + auto ctx = ActorContext(); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " Close session with reason: " << reason); + if (code != PersQueue::ErrorCode::OK) { + if (Errors) { + ++(*Errors); + } else if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + ++(*GetServiceCounters(Counters, "pqproxy|readSession")->GetCounter("Errors", true)); + } + + TServerMessage result; + result.set_status(ConvertPersQueueInternalCodeToStatus(code)); + FillIssue(result.add_issues(), code, reason); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed with error" + << ": reason# " << reason); + if (!WriteToStreamOrDie(ctx, std::move(result), true)) { + return; + } + } else { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed"); + if (!Request->GetStreamCtx()->Finish(grpc::Status::OK)) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc double finish failed"); + } + } + Die(ctx); +} + + +void TDirectReadSessionActor::Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev , const TActorContext& ctx) { + if (ev->Get()->Authenticated && ev->Get()->InternalToken && !ev->Get()->InternalToken->GetSerializedToken().empty()) { + Token = ev->Get()->InternalToken; + ForceACLCheck = true; + + TServerMessage result; + result.set_status(Ydb::StatusIds::SUCCESS); + result.mutable_update_token_response(); + WriteToStreamOrDie(ctx, std::move(result)); + } else { + if (ev->Get()->Retryable) { + Request->ReplyUnavaliable(); + } else { + Request->ReplyUnauthenticated("refreshed token is invalid"); + } + Die(ctx); + } +} + + +void TDirectReadSessionActor::ProcessAnswer(TFormedDirectReadResponse::TPtr response, const TActorContext& ctx) { + if (!WriteToStreamOrDie(ctx, std::move(*response->Response))) { + return; + } +} + +void TDirectReadSessionActor::Handle(TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx) { + const auto tag = static_cast<EWakeupTag>(ev->Get()->Tag); + OnWakeup(tag); + + switch (tag) { + case EWakeupTag::RlInit: + return InitSession(ctx); + + case EWakeupTag::RecheckAcl: + return RecheckACL(ctx); + + case EWakeupTag::RlAllowed: + if (auto counters = Request->GetCounters()) { + counters->AddConsumedRequestUnits(PendingQuota->RequiredQuota); + } + + ProcessAnswer(PendingQuota, ctx); + + if (!WaitingQuota.empty()) { + PendingQuota = WaitingQuota.front(); + WaitingQuota.pop_front(); + } else { + PendingQuota = nullptr; + } + if (PendingQuota) { + auto res = MaybeRequestQuota(PendingQuota->RequiredQuota, EWakeupTag::RlAllowed, ctx); + Y_ABORT_UNLESS(res); + } + + break; + + case EWakeupTag::RlNoResource: + case EWakeupTag::RlInitNoResource: + if (PendingQuota) { + auto res = MaybeRequestQuota(PendingQuota->RequiredQuota, EWakeupTag::RlAllowed, ctx); + Y_ABORT_UNLESS(res); + } else { + return CloseSession(PersQueue::ErrorCode::OVERLOAD, "throughput limit exceeded"); + } + break; + } +} + + +void TDirectReadSessionActor::RecheckACL(const TActorContext& ctx) { + const auto timeout = TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()); + + ctx.Schedule(timeout, new TEvents::TEvWakeup(EWakeupTag::RecheckAcl)); + + const bool authTimedOut = (ctx.Now() - LastACLCheckTimestamp) > timeout; + + if (Token && !AuthInitActor && (ForceACLCheck || authTimedOut)) { + ForceACLCheck = false; + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " checking auth because of timeout"); + RunAuthActor(ctx); + } +} + + +void TDirectReadSessionActor::RunAuthActor(const TActorContext& ctx) { + Y_ABORT_UNLESS(!AuthInitActor); + AuthInitActor = ctx.Register(new TReadInitAndAuthActor( + ctx, ctx.SelfID, ClientId, Cookie, Session, SchemeCache, NewSchemeCache, Counters, Token, TopicsList, + TopicsHandler.GetLocalCluster())); +} + +void TDirectReadSessionActor::HandleDestroyPartitionSession(TEvPQProxy::TEvDirectReadDestroyPartitionSession::TPtr& ev) { + TServerMessage result; + result.set_status(Ydb::StatusIds::SUCCESS); + auto* stop = result.mutable_stop_direct_read_partition_session(); + stop->set_partition_session_id(ev->Get()->ReadKey.PartitionSessionId); + result.set_status(ConvertPersQueueInternalCodeToStatus(ev->Get()->Code)); + FillIssue(stop->add_issues(), ev->Get()->Code, ev->Get()->Reason); + WriteToStreamOrDie(ActorContext(), std::move(result)); + +} + +void TDirectReadSessionActor::HandleSessionKilled(TEvPQProxy::TEvDirectReadCloseSession::TPtr& ev) { + // ToDo: Close session uses other error code. + CloseSession(ev->Get()->Code, ev->Get()->Reason); +} + +void TDirectReadSessionActor::HandleGotData(TEvPQProxy::TEvDirectReadSendClientData::TPtr& ev) { + auto formedResponse = MakeIntrusive<TFormedDirectReadResponse>(); + formedResponse->Response = std::move(ev->Get()->Message); + ProcessAnswer(formedResponse, ActorContext()); +} + +} diff --git a/ydb/services/persqueue_v1/actors/direct_read_actor.h b/ydb/services/persqueue_v1/actors/direct_read_actor.h new file mode 100644 index 0000000000..1804fcce70 --- /dev/null +++ b/ydb/services/persqueue_v1/actors/direct_read_actor.h @@ -0,0 +1,180 @@ +#pragma once + +#include "events.h" +#include "persqueue_utils.h" + +#include <ydb/core/base/tablet_pipe.h> +#include <ydb/core/grpc_services/grpc_request_proxy.h> +#include <ydb/core/persqueue/events/global.h> + +#include <ydb/core/persqueue/pq_rl_helpers.h> + +#include <ydb/library/actors/core/actor_bootstrapped.h> + +namespace NKikimr::NGRpcProxy::V1 { + +struct TFormedDirectReadResponse: public TSimpleRefCount<TFormedDirectReadResponse> { + using TPtr = TIntrusivePtr<TFormedDirectReadResponse>; + + TFormedDirectReadResponse() = default; + + TFormedDirectReadResponse(TInstant start) + : Start(start) + { + } + + std::shared_ptr<Topic::StreamDirectReadMessage::FromServer> Response; + + TInstant Start; + TDuration WaitQuotaTime; + + ui64 RequiredQuota = 0; +}; + + + +class TDirectReadSessionActor + : public TActorBootstrapped<TDirectReadSessionActor> + , private NPQ::TRlHelpers +{ + using TClientMessage = Topic::StreamDirectReadMessage::FromClient; + + using TServerMessage = Topic::StreamDirectReadMessage::FromServer; + + using TEvStreamReadRequest = NGRpcService::TEvStreamTopicDirectReadRequest; + + using IContext = NGRpcServer::IGRpcStreamingContext<TClientMessage, TServerMessage>; + +public: + TDirectReadSessionActor(TEvStreamReadRequest* request, const ui64 cookie, + const TActorId& schemeCache, const TActorId& newSchemeCache, + TIntrusivePtr<::NMonitoring::TDynamicCounters> counters, + const TMaybe<TString> clientDC, + const NPersQueue::TTopicsListController& topicsHandler); + + void Bootstrap(const TActorContext& ctx); + + void Die(const TActorContext& ctx) override; + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::FRONT_PQ_READ; + } + +private: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + // grpc events + HFunc(IContext::TEvReadFinished, Handle); + HFunc(IContext::TEvWriteFinished, Handle); + HFunc(IContext::TEvNotifiedWhenDone, Handle) + HFunc(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse, Handle); + + // proxy events + HFunc(TEvPQProxy::TEvAuthResultOk, Handle); // form auth actor + HFunc(TEvPQProxy::TEvInitDirectRead, Handle); // from gRPC + HFunc(TEvPQProxy::TEvDone, Handle); // from gRPC + HFunc(TEvPQProxy::TEvCloseSession, Handle); // from auth actor + HFunc(TEvPQProxy::TEvStartDirectRead, Handle); // from gRPC + HFunc(TEvPQProxy::TEvAuth, Handle); // from gRPC + + hFunc(TEvPQProxy::TEvDirectReadCloseSession, HandleSessionKilled) // from CachingService + hFunc(TEvPQProxy::TEvDirectReadDestroyPartitionSession, HandleDestroyPartitionSession) // from CachingService + hFunc(TEvPQProxy::TEvDirectReadSendClientData, HandleGotData) // from CachingService + // system events + HFunc(TEvents::TEvWakeup, Handle); + + default: + break; + } + } + + bool ReadFromStreamOrDie(const TActorContext& ctx); + bool WriteToStreamOrDie(const TActorContext& ctx, TServerMessage&& response, bool finish = false); + + void InitSession(const TActorContext& ctx); + + // grpc events + void Handle(typename IContext::TEvReadFinished::TPtr& ev, const TActorContext &ctx); + void Handle(typename IContext::TEvWriteFinished::TPtr& ev, const TActorContext &ctx); + void Handle(typename IContext::TEvNotifiedWhenDone::TPtr& ev, const TActorContext &ctx); + void Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev, const TActorContext &ctx); + + // proxy events + void Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvInitDirectRead::TPtr& ev, const TActorContext& ctx); + //void Handle(typename TEvReadResponse::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvDone::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx); + //void Handle(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvStartDirectRead::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx); + + // Caching service events + void HandleSessionKilled(TEvPQProxy::TEvDirectReadCloseSession::TPtr& ev); + void HandleDestroyPartitionSession(TEvPQProxy::TEvDirectReadDestroyPartitionSession::TPtr& ev); + + void HandleGotData(TEvPQProxy::TEvDirectReadSendClientData::TPtr& ev); + + // system events + void Handle(TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx); + + void RunAuthActor(const TActorContext& ctx); + void RecheckACL(const TActorContext& ctx); + + void CloseSession(PersQueue::ErrorCode::ErrorCode code, const TString& reason); + + void SetupCounters(); + void SetupCounters(const TString& cloudId, const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId); + + void ProcessAnswer(typename TFormedDirectReadResponse::TPtr response, const TActorContext& ctx); + +private: + std::unique_ptr<TEvStreamReadRequest> Request; + ui64 Cookie; + const TString ClientDC; + const TInstant StartTimestamp; + + TActorId SchemeCache; + TActorId NewSchemeCache; + + TActorId AuthInitActor; + TIntrusiveConstPtr<NACLib::TUserToken> Token; + + TString ClientId; + TString ClientPath; + TString Session; + TString PeerName; + + bool InitDone; + + TString Auth; + + bool ForceACLCheck; + TInstant LastACLCheckTimestamp; + + //THashMap<TString, TTopicHolder> Topics; // topic -> info + THashMap<TString, NPersQueue::TTopicConverterPtr> FullPathToConverter; // PrimaryFullPath -> Converter, for balancer replies matching + THashSet<TString> TopicsToResolve; + + // Response that currenly pending quota + TFormedDirectReadResponse::TPtr PendingQuota; + + // Responses that will be quoted next + std::deque<TFormedDirectReadResponse::TPtr> WaitingQuota; + + TIntrusivePtr<::NMonitoring::TDynamicCounters> Counters; + + ::NMonitoring::TDynamicCounters::TCounterPtr DirectSessionsCreated; + ::NMonitoring::TDynamicCounters::TCounterPtr DirectSessionsActive; + + ::NMonitoring::TDynamicCounters::TCounterPtr Errors; + + TInstant StartTime; + + NPersQueue::TTopicsListController TopicsHandler; + NPersQueue::TTopicsToConverter TopicsList; + + bool Initing = false; +}; + +} diff --git a/ydb/services/persqueue_v1/actors/events.h b/ydb/services/persqueue_v1/actors/events.h index b2953e7a7f..f147836b54 100644 --- a/ydb/services/persqueue_v1/actors/events.h +++ b/ydb/services/persqueue_v1/actors/events.h @@ -5,6 +5,7 @@ #include <ydb/core/base/events.h> #include <ydb/core/grpc_services/rpc_calls.h> #include <ydb/core/protos/pqconfig.pb.h> +#include <ydb/core/persqueue/key.h> #include <ydb/core/persqueue/percentile_counter.h> #include <ydb/public/api/protos/persqueue_error_codes_v1.pb.h> @@ -65,6 +66,16 @@ struct TEvPQProxy { EvCommitRange, EvRequestTablet, EvPartitionLocationResponse, + EvUpdateSession, + EvDirectReadResponse, + EvDirectReadAck, + EvInitDirectRead, + EvStartDirectRead, + EvDirectReadDataSessionConnected, + EvDirectReadDataSessionDead, + EvDirectReadDestroyPartitionSession, + EvDirectReadCloseSession, + EvDirectReadSendClientData, EvEnd }; @@ -257,6 +268,31 @@ struct TEvPQProxy { ui64 EndOffset; }; + + struct TEvDirectReadResponse : public NActors::TEventLocal<TEvDirectReadResponse, EvDirectReadResponse> { + explicit TEvDirectReadResponse(ui64 assignId, ui64 nextReadOffset, ui64 directReadId, ui64 byteSize) + : AssignId(assignId) + , NextReadOffset(nextReadOffset) + , DirectReadId(directReadId) + , ByteSize(byteSize) + { } + + ui64 AssignId; + ui64 NextReadOffset; + ui64 DirectReadId; + ui64 ByteSize; + }; + + struct TEvDirectReadAck : public NActors::TEventLocal<TEvDirectReadAck, EvDirectReadAck> { + explicit TEvDirectReadAck(ui64 assignId, ui64 directReadId) + : AssignId(assignId) + , DirectReadId(directReadId) + { } + + ui64 AssignId; + ui64 DirectReadId; + }; + struct TEvReadResponse : public NActors::TEventLocal<TEvReadResponse, EvReadResponse> { explicit TEvReadResponse(Topic::StreamReadMessage::FromServer&& resp, ui64 nextReadOffset, bool fromDisk, TDuration waitQuotaTime) : Response(std::move(resp)) @@ -330,22 +366,22 @@ struct TEvPQProxy { , ReadOffset(readOffset) , CommitOffset(commitOffset) , VerifyReadOffset(verifyReadOffset) - , Generation(0) { } const ui64 AssignId; ui64 ReadOffset; TMaybe<ui64> CommitOffset; bool VerifyReadOffset; - ui64 Generation; }; struct TEvReleased : public NActors::TEventLocal<TEvReleased, EvReleased> { - TEvReleased(ui64 id) + TEvReleased(ui64 id, bool graceful = true) : AssignId(id) + , Graceful(graceful) { } const ui64 AssignId; + const bool Graceful; }; struct TEvGetStatus : public NActors::TEventLocal<TEvGetStatus, EvGetStatus> { @@ -376,6 +412,13 @@ struct TEvPQProxy { { } }; + struct TEvPartitionReleased : public NActors::TEventLocal<TEvPartitionReleased, EvPartitionReleased> { + TEvPartitionReleased(const TPartitionId& partition) + : Partition(partition) + { } + TPartitionId Partition; + }; + struct TEvLockPartition : public NActors::TEventLocal<TEvLockPartition, EvLockPartition> { explicit TEvLockPartition(const ui64 readOffset, const TMaybe<ui64>& commitOffset, bool verifyReadOffset, bool startReading) @@ -392,13 +435,6 @@ struct TEvPQProxy { }; - struct TEvPartitionReleased : public NActors::TEventLocal<TEvPartitionReleased, EvPartitionReleased> { - TEvPartitionReleased(const TPartitionId& partition) - : Partition(partition) - { } - TPartitionId Partition; - }; - struct TEvRestartPipe : public NActors::TEventLocal<TEvRestartPipe, EvRestartPipe> { TEvRestartPipe() @@ -425,11 +461,14 @@ struct TEvPQProxy { }; struct TEvPartitionStatus : public NActors::TEventLocal<TEvPartitionStatus, EvPartitionStatus> { - TEvPartitionStatus(const TPartitionId& partition, const ui64 offset, const ui64 endOffset, const ui64 writeTimestampEstimateMs, bool init = true) + TEvPartitionStatus(const TPartitionId& partition, const ui64 offset, const ui64 endOffset, const ui64 writeTimestampEstimateMs, ui64 nodeId, ui64 generation, + bool init = true) : Partition(partition) , Offset(offset) , EndOffset(endOffset) , WriteTimestampEstimateMs(writeTimestampEstimateMs) + , NodeId(nodeId) + , Generation(generation) , Init(init) { } @@ -437,8 +476,11 @@ struct TEvPQProxy { ui64 Offset; ui64 EndOffset; ui64 WriteTimestampEstimateMs; + ui64 NodeId; + ui64 Generation; bool Init; }; + struct TEvRequestTablet : public NActors::TEventLocal<TEvRequestTablet, EvRequestTablet> { TEvRequestTablet(const ui64 tabletId) : TabletId(tabletId) @@ -461,7 +503,6 @@ struct TEvPQProxy { struct TEvPartitionLocationResponse : public NActors::TEventLocal<TEvPartitionLocationResponse, EvPartitionLocationResponse> , public TLocalResponseBase - { TEvPartitionLocationResponse() {} TVector<TPartitionLocationInfo> Partitions; @@ -469,6 +510,95 @@ struct TEvPQProxy { ui64 PathId; }; + struct TEvUpdateSession : public NActors::TEventLocal<TEvUpdateSession, EvUpdateSession> { + TEvUpdateSession(const TPartitionId& partition, ui64 nodeId, ui64 generation) + : Partition(partition) + , NodeId(nodeId) + , Generation(generation) + { } + + TPartitionId Partition; + ui64 NodeId; + ui64 Generation; + }; + + struct TEvInitDirectRead : public NActors::TEventLocal<TEvInitDirectRead, EvInitDirectRead> { + TEvInitDirectRead(const Topic::StreamDirectReadMessage::FromClient& req, const TString& peerName) + : Request(req) + , PeerName(peerName) + { } + + Topic::StreamDirectReadMessage::FromClient Request; + TString PeerName; + }; + + struct TEvStartDirectRead : public NActors::TEventLocal<TEvStartDirectRead, EvStartDirectRead> { + TEvStartDirectRead(ui64 assignId, ui64 generation, ui64 lastDirectReadId) + : AssignId(assignId) + , Generation(generation) + , LastDirectReadId(lastDirectReadId) + { } + + const ui64 AssignId; + ui64 Generation; + const ui64 LastDirectReadId; + }; + + + struct TEvDirectReadDataSessionConnected : public TEventLocal<TEvDirectReadDataSessionConnected, EvDirectReadDataSessionConnected> { + TEvDirectReadDataSessionConnected(const NKikimr::NPQ::TReadSessionKey& sessionKey, ui32 tabletGeneration, + ui64 startingReadId) + : ReadKey(sessionKey) + , Generation(tabletGeneration) + , StartingReadId(startingReadId) + {} + + NPQ::TReadSessionKey ReadKey; + ui32 Generation; + ui64 StartingReadId; + }; + + struct TEvDirectReadDataSessionDead : public TEventLocal<TEvDirectReadDataSessionDead, EvDirectReadDataSessionDead> { + TEvDirectReadDataSessionDead(const TString& session) + : Session(session) + {} + + TString Session; + }; + + struct TEvDirectReadDestroyPartitionSession : public TEventLocal<TEvDirectReadDestroyPartitionSession, EvDirectReadDestroyPartitionSession> { + TEvDirectReadDestroyPartitionSession(const NKikimr::NPQ::TReadSessionKey& sessionKey, + Ydb::PersQueue::ErrorCode::ErrorCode code, const TString& reason) + : ReadKey(sessionKey) + , Code(code) + , Reason(reason) + {} + NPQ::TReadSessionKey ReadKey; + Ydb::PersQueue::ErrorCode::ErrorCode Code; + TString Reason; + }; + + struct TEvDirectReadCloseSession : public TEventLocal<TEvDirectReadCloseSession, EvDirectReadCloseSession> { + TEvDirectReadCloseSession(Ydb::PersQueue::ErrorCode::ErrorCode code, const TString& reason) + : Code(code) + , Reason(reason) + {} + Ydb::PersQueue::ErrorCode::ErrorCode Code; + TString Reason; + }; + + struct TEvDirectReadSendClientData : public TEventLocal<TEvDirectReadSendClientData, EvDirectReadSendClientData> { + TEvDirectReadSendClientData(std::shared_ptr<Ydb::Topic::StreamDirectReadMessage::FromServer>&& message) + : Message(std::move(message)) + {} + + TEvDirectReadSendClientData(const std::shared_ptr<Ydb::Topic::StreamDirectReadMessage::FromServer>& message) + : Message(message) + {} + std::shared_ptr<Ydb::Topic::StreamDirectReadMessage::FromServer> Message;; + }; + + }; struct TLocalRequestBase { @@ -479,7 +609,7 @@ struct TLocalRequestBase { , Database(database) , Token(token) {} - + TString Topic; TString Database; TString Token; diff --git a/ydb/services/persqueue_v1/actors/helpers.h b/ydb/services/persqueue_v1/actors/helpers.h index 4b3dd4ff0e..a7b4ddb92e 100644 --- a/ydb/services/persqueue_v1/actors/helpers.h +++ b/ydb/services/persqueue_v1/actors/helpers.h @@ -5,8 +5,12 @@ #include <ydb/core/persqueue/writer/source_id_encoding.h> #include <ydb/services/lib/sharding/sharding.h> +#include <util/generic/size_literals.h> + namespace NKikimr::NGRpcProxy::V1 { +static constexpr ui64 READ_BLOCK_SIZE = 8_KB; // metering + using namespace Ydb; bool RemoveEmptyMessages(PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch& data); diff --git a/ydb/services/persqueue_v1/actors/partition_actor.cpp b/ydb/services/persqueue_v1/actors/partition_actor.cpp index 7429bc626a..242b2943fa 100644 --- a/ydb/services/persqueue_v1/actors/partition_actor.cpp +++ b/ydb/services/persqueue_v1/actors/partition_actor.cpp @@ -25,7 +25,7 @@ TPartitionActor::TPartitionActor( const TString& session, const TPartitionId& partition, const ui32 generation, const ui32 step, const ui64 tabletID, const TTopicCounters& counters, bool commitsDisabled, const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic, - bool useMigrationProtocol + bool directRead, bool useMigrationProtocol ) : ParentId(parentId) , ClientId(clientId) @@ -53,11 +53,11 @@ TPartitionActor::TPartitionActor( , FirstInit(true) , PipeClient() , PipeGeneration(0) + , TabletGeneration(0) + , NodeId(0) , RequestInfly(false) , EndOffset(0) , SizeLag(0) - , NeedRelease(false) - , Released(false) , WaitDataCookie(0) , WaitForData(false) , LockCounted(false) @@ -65,6 +65,7 @@ TPartitionActor::TPartitionActor( , CommitsDisabled(commitsDisabled) , CommitCookie(1) , Topic(topic) + , DirectRead(directRead) , UseMigrationProtocol(useMigrationProtocol) { } @@ -147,26 +148,8 @@ TPartitionActor::~TPartitionActor() = default; void TPartitionActor::Bootstrap(const TActorContext&) { - Become(&TThis::StateFunc); -} - - -void TPartitionActor::CheckRelease(const TActorContext& ctx) { - const bool hasUncommittedData = ReadOffset > ClientCommitOffset && ReadOffset > ClientReadOffset; //TODO: remove ReadOffset > ClientReadOffset - otherwise wait for commit with cookie(0) - if (NeedRelease) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " checking release readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid - << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released); - } - - if (NeedRelease && (ReadGuid.empty() && CommitsInfly.empty() && !hasUncommittedData && !Released)) { - Released = true; - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReleased(Partition)); - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition - << " check release done - releasing; readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid - << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released); - } + Become(&TThis::StateFunc); } @@ -195,6 +178,59 @@ void TPartitionActor::SendCommit(const ui64 readId, const ui64 offset, const TAc NTabletPipe::SendData(ctx, PipeClient, req.Release()); } +void TPartitionActor::SendPublishDirectRead(const ui64 directReadId, const TActorContext& ctx) { + NKikimrClient::TPersQueueRequest request; + request.MutablePartitionRequest()->SetTopic(Topic->GetPrimaryPath()); + request.MutablePartitionRequest()->SetPartition(Partition.Partition); + request.MutablePartitionRequest()->SetCookie(ReadOffset); + + Y_ABORT_UNLESS(PipeClient); + + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); + auto publish = request.MutablePartitionRequest()->MutableCmdPublishRead(); + publish->SetDirectReadId(directReadId); + Y_ABORT_UNLESS(!Session.empty()); + + publish->MutableSessionKey()->SetSessionId(Session); + publish->MutableSessionKey()->SetPartitionSessionId(Partition.AssignId); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " publishing direct read with id " << directReadId); + + TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); +} + +void TPartitionActor::SendForgetDirectRead(const ui64 directReadId, const TActorContext& ctx) { + NKikimrClient::TPersQueueRequest request; + request.MutablePartitionRequest()->SetTopic(Topic->GetPrimaryPath()); + request.MutablePartitionRequest()->SetPartition(Partition.Partition); + request.MutablePartitionRequest()->SetCookie(ReadOffset); + + Y_ABORT_UNLESS(PipeClient); + + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); + auto publish = request.MutablePartitionRequest()->MutableCmdForgetRead(); + publish->SetDirectReadId(directReadId); + Y_ABORT_UNLESS(!Session.empty()); + + publish->MutableSessionKey()->SetSessionId(Session); + publish->MutableSessionKey()->SetPartitionSessionId(Partition.AssignId); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition + << " forgetting " << directReadId); + + TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); +} + + + + void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) { if (!PipeClient) @@ -220,6 +256,25 @@ void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reaso } +void TPartitionActor::Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActorContext& ctx) { + auto it = DirectReads.find(ev->Get()->DirectReadId); + + if (it == DirectReads.end() || ev->Get()->DirectReadId == DirectReadId) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "got direct read ack for uknown direct read id " << ev->Get()->DirectReadId, + PersQueue::ErrorCode::BAD_REQUEST)); + return; + + } + DirectReads.erase(it); + + if (!PipeClient) return; //all direct reads will be cleared on pipe restart + + SendForgetDirectRead(ev->Get()->DirectReadId, ctx); + +} + + + void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TActorContext& ctx) { Y_ABORT_UNLESS(!PipeClient); @@ -239,6 +294,12 @@ void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TAct << " pipe restart attempt " << PipeGeneration << " RequestInfly " << RequestInfly << " ReadOffset " << ReadOffset << " EndOffset " << EndOffset << " InitDone " << InitDone << " WaitForData " << WaitForData); + //TODO: Register in partition this session_id, partition_session_id, pipe actor id + //TODO: RestoreDirectReads if any + if (InitDone) { + //Resend CmdCreateSession with restore reads + } + if (RequestInfly) { //got read infly LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition << " resend " << CurrentRequest); @@ -250,6 +311,7 @@ void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TAct NTabletPipe::SendData(ctx, PipeClient, event.Release()); } + if (InitDone) { for (auto& c : CommitsInfly) { //resend all commits if (c.second.Offset != Max<ui64>()) @@ -468,6 +530,7 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo Y_ABORT_UNLESS(!ev->Get()->Record.HasErrorCode()); Counters.Errors.Inc(); // map NMsgBusProxy::EResponseStatus to PersQueue::ErrorCode??? + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + ev->Get()->Record.GetErrorReason(), PersQueue::ErrorCode::ERROR)); return; } @@ -530,14 +593,14 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo if (!StartReading) { - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs)); + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, TabletGeneration, NodeId)); } else { InitStartReading(ctx); } return; } - if (!result.HasCmdReadResult()) { //this is commit response + if (!(result.HasCmdReadResult() || result.HasCmdPrepareReadResult() || result.HasCmdPublishReadResult() || result.HasCmdForgetReadResult())) { //this is commit response if (CommitsInfly.empty()) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for nothing"); @@ -571,15 +634,15 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition << " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId); - CheckRelease(ctx); PipeGeneration = 0; //reset tries counter - all ok MakeCommit(ctx); return; } - //This is read - Y_ABORT_UNLESS(result.HasCmdReadResult()); - const auto& res = result.GetCmdReadResult(); + if (result.HasCmdForgetReadResult()) { + // ignore it + return; + } if (result.GetCookie() != (ui64)ReadOffset) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition @@ -587,6 +650,80 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo return; } + //This is read + Y_ABORT_UNLESS(result.HasCmdReadResult() || result.HasCmdPrepareReadResult() || result.HasCmdPublishReadResult()); + if (result.HasCmdPrepareReadResult()) { + const auto& res = result.GetCmdPrepareReadResult(); + + Y_ABORT_UNLESS(DirectRead); + Y_ABORT_UNLESS(res.GetDirectReadId() == DirectReadId); + + EndOffset = res.GetEndOffset(); + SizeLag = res.GetSizeLag(); + WTime = res.GetWriteTimestampMS(); + + if (res.GetReadOffset() > 0) + ReadOffset = res.GetReadOffset(); + + DirectReads[DirectReadId] = res; + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " after direct read state " << Partition + << " EndOffset " << EndOffset << " ReadOffset " << ReadOffset << " ReadGuid " << ReadGuid << " with direct read id " << DirectReadId); + + SendPublishDirectRead(DirectReadId, ctx); + + Y_ABORT_UNLESS(RequestInfly); + + CurrentRequest.Clear(); + RequestInfly = false; + + + return; + } + if (result.HasCmdPublishReadResult()) { + ++ReadIdToResponse; + ReadGuid = TString(); + + Y_ABORT_UNLESS(DirectReads.find(DirectReadId) != DirectReads.end()); + + Y_ABORT_UNLESS(!RequestInfly); + + + const auto& dr = DirectReads[DirectReadId]; + + auto readResponse = MakeHolder<TEvPQProxy::TEvDirectReadResponse>( + Partition.AssignId, + dr.GetReadOffset(), + DirectReadId, + dr.GetBytesSizeEstimate() + ); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " after publish direct read state " << Partition + << " EndOffset " << EndOffset << " ReadOffset " << ReadOffset << " ReadGuid " << ReadGuid << " with direct read id " << DirectReadId); + + + ++DirectReadId; + + ctx.Send(ParentId, readResponse.Release()); + + Y_ABORT_UNLESS(!WaitForData); + + ReadOffset = dr.GetLastOffset() + 1; + + Y_ABORT_UNLESS(!RequestInfly); + + if (EndOffset > ReadOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Partition, WTime, SizeLag, ReadOffset, EndOffset)); + } else { + WaitForData = true; + if (PipeClient) //pipe will be recreated soon + WaitDataInPartition(ctx); + } + + return; + } + const auto& res = result.GetCmdReadResult(); + Y_ABORT_UNLESS(res.HasMaxOffset()); EndOffset = res.GetMaxOffset(); SizeLag = res.GetSizeLag(); @@ -647,6 +784,7 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo ); ctx.Send(ParentId, readResponse.Release()); } else { + Y_ABORT_UNLESS(!DirectRead); auto readResponse = MakeHolder<TEvPQProxy::TEvReadResponse>( std::move(response), ReadOffset, @@ -655,7 +793,6 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo ); ctx.Send(ParentId, readResponse.Release()); } - CheckRelease(ctx); PipeGeneration = 0; //reset tries counter - all ok } @@ -671,6 +808,15 @@ void TPartitionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << msg->TabletId, NPersQueue::NErrorCode::TABLET_PIPE_DISCONNECTED); return; } + + auto prevGeneration = TabletGeneration; + Y_UNUSED(prevGeneration); + TabletGeneration = msg->Generation; + NodeId = msg->ServerId.NodeId(); + + if (InitDone) { + ctx.Send(ParentId, new TEvPQProxy::TEvUpdateSession(Partition, NodeId, TabletGeneration)); + } } void TPartitionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { @@ -678,16 +824,9 @@ void TPartitionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const } -void TPartitionActor::Handle(TEvPQProxy::TEvReleasePartition::TPtr&, const TActorContext& ctx) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " (partition)releasing " << Partition << " ReadOffset " << ReadOffset << " ClientCommitOffset " << ClientCommitOffset - << " CommittedOffst " << CommittedOffset); - NeedRelease = true; - CheckRelease(ctx); -} - void TPartitionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr&, const TActorContext& ctx) { - ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, false)); + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, TabletGeneration, NodeId, false)); } @@ -808,7 +947,6 @@ void TPartitionActor::InitLockPartition(const TActorContext& ctx) { .DoFirstRetryInstantly = true }; PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, TabletID, clientConfig)); - NKikimrClient::TPersQueueRequest request; request.MutablePartitionRequest()->SetTopic(Topic->GetPrimaryPath()); @@ -822,6 +960,7 @@ void TPartitionActor::InitLockPartition(const TActorContext& ctx) { cmd->SetSessionId(Session); cmd->SetGeneration(Generation); cmd->SetStep(Step); + cmd->SetPartitionSessionId(Partition.AssignId); LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " INITING " << Partition); @@ -913,7 +1052,6 @@ void TPartitionActor::Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, con if (PipeClient) WaitDataInPartition(ctx); } - CheckRelease(ctx); //just for logging purpose } @@ -924,9 +1062,6 @@ void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& << " readOffset " << ReadOffset << " EndOffset " << EndOffset << " ClientCommitOffset " << ClientCommitOffset << " committedOffset " << CommittedOffset << " Guid " << ev->Get()->Guid); - Y_ABORT_UNLESS(!NeedRelease); - Y_ABORT_UNLESS(!Released); - Y_ABORT_UNLESS(ReadGuid.empty()); Y_ABORT_UNLESS(!RequestInfly); @@ -946,6 +1081,10 @@ void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& read->SetClientId(ClientId); read->SetClientDC(ClientDC); read->SetSessionId(Session); + if (DirectRead) { + read->SetDirectReadId(DirectReadId); + } + if (req->MaxCount) { read->SetCount(req->MaxCount); } diff --git a/ydb/services/persqueue_v1/actors/partition_actor.h b/ydb/services/persqueue_v1/actors/partition_actor.h index 9bff7aff09..58bb333ff8 100644 --- a/ydb/services/persqueue_v1/actors/partition_actor.h +++ b/ydb/services/persqueue_v1/actors/partition_actor.h @@ -9,6 +9,7 @@ #include <ydb/core/base/tablet_pipe.h> #include <ydb/core/persqueue/events/global.h> +#include <ydb/core/util/ulid.h> #include <ydb/library/services/services.pb.h> @@ -71,8 +72,8 @@ public: TPartitionActor(const TActorId& parentId, const TString& clientId, const TString& clientPath, const ui64 cookie, const TString& session, const TPartitionId& partition, ui32 generation, ui32 step, const ui64 tabletID, const TTopicCounters& counters, const bool commitsDisabled, - const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic, - bool useMigrationProtocol = true); + const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic, bool directRead, + bool useMigrationProtocol); ~TPartitionActor(); void Bootstrap(const NActors::TActorContext& ctx); @@ -90,10 +91,10 @@ private: HFunc(TEvPQProxy::TEvRead, Handle) HFunc(TEvPQProxy::TEvCommitCookie, Handle) HFunc(TEvPQProxy::TEvCommitRange, Handle) - HFunc(TEvPQProxy::TEvReleasePartition, Handle) HFunc(TEvPQProxy::TEvLockPartition, Handle) HFunc(TEvPQProxy::TEvGetStatus, Handle) HFunc(TEvPQProxy::TEvRestartPipe, Handle) + HFunc(TEvPQProxy::TEvDirectReadAck, Handle) HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); HFunc(TEvTabletPipe::TEvClientConnected, Handle); @@ -105,10 +106,11 @@ private: } - void Handle(TEvPQProxy::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx); void Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx); void Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvDeadlineExceeded::TPtr& ev, const NActors::TActorContext& ctx); void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx); @@ -124,7 +126,6 @@ private: void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); void HandleWakeup(const NActors::TActorContext& ctx); - void CheckRelease(const NActors::TActorContext& ctx); void InitLockPartition(const NActors::TActorContext& ctx); void InitStartReading(const NActors::TActorContext& ctx); @@ -132,6 +133,8 @@ private: void WaitDataInPartition(const NActors::TActorContext& ctx); void SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx); void MakeCommit(const TActorContext& ctx); + void SendPublishDirectRead(const ui64 directReadId, const TActorContext& ctx); + void SendForgetDirectRead(const ui64 directReadId, const TActorContext& ctx); private: @@ -170,6 +173,9 @@ private: bool FirstInit; TActorId PipeClient; ui32 PipeGeneration; + ui64 TabletGeneration; + ui64 NodeId; + bool RequestInfly; NKikimrClient::TPersQueueRequest CurrentRequest; @@ -178,9 +184,6 @@ private: TString ReadGuid; // empty if not reading - bool NeedRelease; - bool Released; - std::set<ui64> WaitDataInfly; ui64 WaitDataCookie; bool WaitForData; @@ -200,6 +203,11 @@ private: ui64 CommitCookie; NPersQueue::TTopicConverterPtr Topic; + bool DirectRead = false; + + ui64 DirectReadId = 1; + std::map<ui64, NKikimrClient::TPersQueuePartitionResponse::TCmdPrepareDirectReadResult> DirectReads; + bool UseMigrationProtocol; }; diff --git a/ydb/services/persqueue_v1/actors/read_session_actor.h b/ydb/services/persqueue_v1/actors/read_session_actor.h index 03065dcece..61a1cd52cb 100644 --- a/ydb/services/persqueue_v1/actors/read_session_actor.h +++ b/ydb/services/persqueue_v1/actors/read_session_actor.h @@ -30,6 +30,7 @@ struct TPartitionActorInfo { std::deque<ui64> Commits; bool Reading; bool Releasing; + bool Stopping; bool Released; bool LockSent; bool ReleaseSent; @@ -42,6 +43,20 @@ struct TPartitionActorInfo { TInstant AssignTimestamp; + ui64 Generation; + ui64 NodeId; + + + struct TDirectReadInfo { + ui64 DirectReadId = 0; + ui64 ByteSize = 0; + }; + + ui64 MaxProcessedDirectReadId = 0; + ui64 LastDirectReadId = 0; + + std::map<i64, TDirectReadInfo> DirectReads; + explicit TPartitionActorInfo( const TActorId& actor, const TPartitionId& partition, @@ -52,6 +67,7 @@ struct TPartitionActorInfo { , Topic(topic) , Reading(false) , Releasing(false) + , Stopping(false) , Released(false) , LockSent(false) , ReleaseSent(false) @@ -59,7 +75,10 @@ struct TPartitionActorInfo { , ReadIdCommitted(0) , Offset(0) , AssignTimestamp(timestamp) + , Generation(0) + , NodeId(0) { + Y_ABORT_UNLESS(partition.DiscoveryConverter != nullptr); } }; @@ -102,9 +121,16 @@ struct TFormedReadResponse: public TSimpleRefCount<TFormedReadResponse<TServerMe i64 ByteSizeBeforeFiltering = 0; ui64 RequiredQuota = 0; + bool IsDirectRead = false; + ui64 AssignId = 0; + ui64 DirectReadId = 0; + ui64 DirectReadByteSize = 0; + // returns byteSize diff i64 ApplyResponse(TServerMessage&& resp); + i64 ApplyDirectReadResponse(TEvPQProxy::TEvDirectReadResponse::TPtr& ev); + THashSet<TActorId> PartitionsTookPartInRead; TSet<TPartitionId> PartitionsTookPartInControlMessages; @@ -195,11 +221,13 @@ private: HFunc(TEvPQProxy::TEvReadSessionStatus, Handle); // from read sessions info builder proxy HFunc(TEvPQProxy::TEvRead, Handle); // from gRPC HFunc(/* type alias */ TEvReadResponse, Handle); // from partitionActor + HFunc(TEvPQProxy::TEvDirectReadResponse, Handle); // from partitionActor + HFunc(TEvPQProxy::TEvDirectReadAck, Handle); // from gRPC HFunc(TEvPQProxy::TEvDone, Handle); // from gRPC HFunc(TEvPQProxy::TEvCloseSession, Handle); // from partitionActor HFunc(TEvPQProxy::TEvDieCommand, Handle); HFunc(TEvPQProxy::TEvPartitionReady, Handle); // from partitionActor - HFunc(TEvPQProxy::TEvPartitionReleased, Handle); // from partitionActor + HFunc(TEvPQProxy::TEvPartitionReleased, Handle); // from partitionActor HFunc(TEvPQProxy::TEvCommitCookie, Handle); // from gRPC HFunc(TEvPQProxy::TEvCommitRange, Handle); // from gRPC HFunc(TEvPQProxy::TEvStartRead, Handle); // from gRPC @@ -208,6 +236,8 @@ private: HFunc(TEvPQProxy::TEvAuth, Handle); // from gRPC HFunc(TEvPQProxy::TEvCommitDone, Handle); // from PartitionActor HFunc(TEvPQProxy::TEvPartitionStatus, Handle); // from partitionActor + HFunc(TEvPQProxy::TEvUpdateSession, Handle); // from partitionActor + // Balancer events HFunc(TEvPersQueue::TEvLockPartition, Handle); // can be sent to itself when reading without a consumer @@ -242,6 +272,8 @@ private: void Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx); void Handle(typename TEvReadResponse::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvDirectReadResponse::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQProxy::TEvDone::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx); @@ -255,6 +287,7 @@ private: void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvUpdateSession::TPtr& ev, const TActorContext& ctx); // Balancer events void Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const TActorContext& ctx); // can be sent to itself when reading without a consumer @@ -406,6 +439,8 @@ private: NPersQueue::TTopicsListController TopicsHandler; NPersQueue::TTopicsToConverter TopicsList; + + bool DirectRead; }; } diff --git a/ydb/services/persqueue_v1/actors/read_session_actor.ipp b/ydb/services/persqueue_v1/actors/read_session_actor.ipp index 5fb6a4a3b3..d4532610d3 100644 --- a/ydb/services/persqueue_v1/actors/read_session_actor.ipp +++ b/ydb/services/persqueue_v1/actors/read_session_actor.ipp @@ -59,6 +59,7 @@ TReadSessionActor<UseMigrationProtocol>::TReadSessionActor( , RequestedBytes(0) , ReadsInfly(0) , TopicsHandler(topicsHandler) + , DirectRead(false) { Y_ASSERT(Request); } @@ -212,10 +213,11 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadF } case TClientMessage::kStopPartitionSessionResponse: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(getAssignId(request.stop_partition_session_response()))); if (ReadWithoutConsumer) { return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "it is forbidden to send StopPartitionSessionResponse when reading without a consumer", ctx); } + + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(getAssignId(request.stop_partition_session_response()), request.stop_partition_session_response().graceful())); return (void)ReadFromStreamOrDie(ctx); } @@ -267,6 +269,13 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadF return (void)ReadFromStreamOrDie(ctx); } + case TClientMessage::kDirectReadAck: { + const auto& ddrr = request.direct_read_ack(); + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvDirectReadAck(ddrr.partition_session_id(), ddrr.direct_read_id())); + return (void)ReadFromStreamOrDie(ctx); + } + + default: { return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "unsupported request", ctx); } @@ -442,6 +451,54 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvAuth::TPtr& } } + +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActorContext& ctx) { + + auto it = Partitions.find(ev->Get()->AssignId); + if (it == Partitions.end()) { + // do nothing - already released partition + return; + } + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got DirectReadAck from client" + << ": partition# " << it->second.Partition + << ", directReadId# " << ev->Get()->DirectReadId + << ", bytesInflight# " << BytesInflight_); + + auto drIt = it->second.DirectReads.find(ev->Get()->DirectReadId); + + if (drIt == it->second.DirectReads.end()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "unknown direct read in in Ack: " << ev->Get()->DirectReadId, ctx); + + } + + if (it->second.MaxProcessedDirectReadId + 1 != (ui64)ev->Get()->DirectReadId) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "direct reads must be confirmed in strict order - expecting " << (it->second.MaxProcessedDirectReadId + 1) + << " but got " << ev->Get()->DirectReadId, ctx); + } + + if (it->second.LastDirectReadId < (ui64)ev->Get()->DirectReadId) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() << "got direct read id that is not existing yet " << + ev->Get()->DirectReadId, ctx); + } + + + it->second.MaxProcessedDirectReadId = ev->Get()->DirectReadId; + + BytesInflight_ -= drIt->second.ByteSize; + if (BytesInflight) { + (*BytesInflight) -= drIt->second.ByteSize; + } + it->second.DirectReads.erase(drIt); + + ProcessReads(ctx); + ctx.Send(it->second.Actor, new TEvPQProxy::TEvDirectReadAck(ev->Get()->AssignId, ev->Get()->DirectReadId)); +} + + template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const TActorContext& ctx) { RequestNotChecked = true; @@ -476,16 +533,35 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvReleased::TP return; } - if (!it->second.Releasing) { - return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() - << "release of partition that is not requested for release is forbiden for " << it->second.Partition, ctx); - } - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got Released from client" << ": partition# " << it->second.Partition); Y_ABORT_UNLESS(it->second.LockSent); - ReleasePartition(it, true, ctx); +// ReleasePartition(it, true, ctx); + + if (ev->Get()->Graceful || !DirectRead) { + if (!it->second.Releasing) { + auto p = it->second.Partition; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "graceful release of partition that is not requested for release is forbiden for " << p, ctx); + } + if (it->second.Stopping) { // Ignore release for graceful request if alredy got stopping + return; + } + if (!DirectRead) { + ReleasePartition(it, true, ctx); + } else { + SendReleaseSignal(it, true, ctx); + } + } else { + Y_ABORT_UNLESS(DirectRead); + if (!it->second.Stopping) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "release of partition that is not requested is forbiden for " << it->second.Partition, ctx); + } + //TODO: filter all direct reads + ReleasePartition(it, true, ctx); + } } template <bool UseMigrationProtocol> @@ -516,12 +592,25 @@ void TReadSessionActor<UseMigrationProtocol>::DropPartition(typename TPartitions PartsPerSession.DecFor(Partitions.size(), 1); } + for (auto& [readId, dr] : it->second.DirectReads) { + BytesInflight_ -= dr.ByteSize; + if (BytesInflight) { + (*BytesInflight) -= dr.ByteSize; + } + + Y_ABORT_UNLESS((ui64)readId > it->second.MaxProcessedDirectReadId); + ReadSizeBudget += dr.ByteSize; // bring back all not performed reads in budget + } + BalancerGeneration.erase(it->first); Partitions.erase(it); if (SessionsActive) { PartsPerSession.IncFor(Partitions.size(), 1); } + + // If now have available bytes inflight to read + ProcessReads(ctx); } template <bool UseMigrationProtocol> @@ -639,6 +728,7 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr& << "_" << "v1"; CommitsDisabled = false; + PeerName = ev->Get()->PeerName; if constexpr (UseMigrationProtocol) { @@ -655,6 +745,7 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr& MaxTimeLagMs = 0; // max_lag per topic only ReadTimestampMs = 0; // read_from per topic only ReadOnlyLocal = true; + DirectRead = init.direct_read(); if (init.reader_name()) { PeerName = init.reader_name(); } @@ -1083,13 +1174,15 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPersQueue::TEvLockPartit Y_ABORT_UNLESS(record.GetGeneration() > 0); const ui64 assignId = NextAssignId++; + Y_ABORT_UNLESS(converterIter->second != nullptr); + BalancerGeneration[assignId] = {record.GetGeneration(), record.GetStep()}; const TPartitionId partitionId{converterIter->second, record.GetPartition(), assignId}; const TActorId actorId = ctx.Register(new TPartitionActor( ctx.SelfID, ClientId, ClientPath, Cookie, Session, partitionId, record.GetGeneration(), record.GetStep(), record.GetTabletId(), it->second, CommitsDisabled, ClientDC, RangesMode, - converterIter->second, UseMigrationProtocol)); + converterIter->second, DirectRead, UseMigrationProtocol)); if (SessionsActive) { PartsPerSession.DecFor(Partitions.size(), 1); @@ -1157,6 +1250,12 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionSta result.mutable_start_partition_session_request()->mutable_partition_offsets()->set_start(ev->Get()->Offset); result.mutable_start_partition_session_request()->mutable_partition_offsets()->set_end(ev->Get()->EndOffset); + + if (DirectRead) { + result.mutable_start_partition_session_request()->mutable_partition_location()->set_node_id(ev->Get()->NodeId); + result.mutable_start_partition_session_request()->mutable_partition_location()->set_generation(ev->Get()->Generation); + } + } } else { Y_ABORT_UNLESS(it->second.LockSent); @@ -1186,6 +1285,36 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionSta } template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvUpdateSession::TPtr& ev, const TActorContext& ctx) { + if (!ActualPartitionActors.contains(ev->Sender)) { + return; + } + + if (!DirectRead) { + return; + } + auto it = Partitions.find(ev->Get()->Partition.AssignId); + Y_ABORT_UNLESS(it != Partitions.end()); + + TServerMessage result; + result.set_status(Ydb::StatusIds::SUCCESS); + + Y_ABORT_UNLESS(it->second.LockSent); + + if constexpr (!UseMigrationProtocol) { + result.mutable_update_partition_session()->set_partition_session_id(it->first); + result.mutable_update_partition_session()->mutable_partition_location()->set_node_id(ev->Get()->NodeId); + result.mutable_update_partition_session()->mutable_partition_location()->set_generation(ev->Get()->Generation); + + } + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " sending to client update partition stream event"); + SendControlMessage(it->second.Partition, std::move(result), ctx); +} + + + +template <bool UseMigrationProtocol> bool TReadSessionActor<UseMigrationProtocol>::SendControlMessage(TPartitionId id, TServerMessage&& message, const TActorContext& ctx) { id.AssignId = 0; @@ -1210,6 +1339,8 @@ void TReadSessionActor<UseMigrationProtocol>::SendReleaseSignal(typename TPartit TServerMessage result; result.set_status(Ydb::StatusIds::SUCCESS); + if (kill) it->second.Stopping = true; + if constexpr (UseMigrationProtocol) { result.mutable_release()->mutable_topic()->set_path(it->second.Topic->GetFederationPath()); result.mutable_release()->set_cluster(it->second.Topic->GetCluster()); @@ -1221,6 +1352,9 @@ void TReadSessionActor<UseMigrationProtocol>::SendReleaseSignal(typename TPartit result.mutable_stop_partition_session_request()->set_partition_session_id(it->second.Partition.AssignId); result.mutable_stop_partition_session_request()->set_graceful(!kill); result.mutable_stop_partition_session_request()->set_committed_offset(it->second.Offset); + if (DirectRead) { + result.mutable_stop_partition_session_request()->set_last_direct_read_id(it->second.LastDirectReadId); + } } if (!SendControlMessage(it->second.Partition, std::move(result), ctx)) { @@ -1351,7 +1485,6 @@ void TReadSessionActor<UseMigrationProtocol>::CloseSession(PersQueue::ErrorCode: LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc double finish failed"); } } - Die(ctx); } @@ -1397,6 +1530,10 @@ void TReadSessionActor<UseMigrationProtocol>::ReleasePartition(typename TPartiti Y_ABORT_UNLESS(couldBeReads || !it->second.Reading); typename TFormedReadResponse<TServerMessage>::TPtr response; + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got all from client, actual releasing" + << ": partition# " << it->second.Partition); + + // process reads if (it->second.Reading) { auto readIt = PartitionToReadResponse.find(it->second.Actor); @@ -1451,8 +1588,9 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessBalancerDead(ui64 tabletId, if (jt->second.LockSent) { SendReleaseSignal(jt, true, ctx); } - - ReleasePartition(jt, true, ctx); + if (!DirectRead || !jt->second.LockSent) { // in direct read mode wait for final release from client + ReleasePartition(jt, true, ctx); + } } else { ++it; } @@ -1536,6 +1674,23 @@ i64 TFormedReadResponse<TServerMessage>::ApplyResponse(TServerMessage&& resp) { return ByteSize - prev; } +template <typename TServerMessage> +i64 TFormedReadResponse<TServerMessage>::ApplyDirectReadResponse(TEvPQProxy::TEvDirectReadResponse::TPtr& ev) { + + constexpr bool UseMigrationProtocol = std::is_same_v<TServerMessage, PersQueue::V1::MigrationStreamingReadServerMessage>; + Y_ABORT_UNLESS(!UseMigrationProtocol); + + IsDirectRead = true; + AssignId = ev->Get()->AssignId; + DirectReadId = ev->Get()->DirectReadId; + DirectReadByteSize = ev->Get()->ByteSize; + + i64 diff = DirectReadByteSize - ByteSize; + ByteSize = DirectReadByteSize; + return diff; +} + + template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadResponse::TPtr& ev, const TActorContext& ctx) { if (!ActualPartitionActors.contains(ev->Sender)) { @@ -1606,7 +1761,71 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadResponse::T } template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvDirectReadResponse::TPtr& ev, const TActorContext& ctx) { + if (!ActualPartitionActors.contains(ev->Sender)) { + return; + } + + Y_DEBUG_ABORT_UNLESS(!UseMigrationProtocol); + + ui64 assignId; + + assignId = ev->Get()->AssignId; + + typename TFormedReadResponse<TServerMessage>::TPtr formedResponse; + { + auto it = PartitionToReadResponse.find(ev->Sender); + Y_ABORT_UNLESS(it != PartitionToReadResponse.end()); + formedResponse = it->second; + } + + auto it = Partitions.find(assignId); + Y_ABORT_UNLESS(it != Partitions.end()); + Y_ABORT_UNLESS(it->second.Reading); + it->second.Reading = false; + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " direct read preparation done" + << ": guid# " << formedResponse->Guid + << ", partition# " << it->second.Partition + << ", size# " << ev->Get()->ByteSize + << ", direct_read_id# " << ev->Get()->DirectReadId); + + const i64 diff = formedResponse->ApplyDirectReadResponse(ev); + + --formedResponse->RequestsInfly; + Y_ABORT_UNLESS(formedResponse->RequestsInfly == 0); + + BytesInflight_ += diff; + if (BytesInflight) { + (*BytesInflight) += diff; + } + + Y_ABORT_UNLESS(formedResponse->RequestsInfly == 0); + + if (const auto ru = CalcRuConsumption(PrepareResponse(formedResponse))) { + formedResponse->RequiredQuota = ru; + if (MaybeRequestQuota(ru, EWakeupTag::RlAllowed, ctx)) { + Y_ABORT_UNLESS(!PendingQuota); + PendingQuota = formedResponse; + } else { + WaitingQuota.push_back(formedResponse); + } + } else { + ProcessAnswer(formedResponse, ctx); + } +} + + + + + +template <bool UseMigrationProtocol> ui64 TReadSessionActor<UseMigrationProtocol>::PrepareResponse(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse) { + + if (formedResponse->IsDirectRead) { + return formedResponse->DirectReadByteSize; + } + formedResponse->ByteSizeBeforeFiltering = formedResponse->Response.ByteSize(); if constexpr (UseMigrationProtocol) { @@ -1618,34 +1837,49 @@ ui64 TReadSessionActor<UseMigrationProtocol>::PrepareResponse(typename TFormedRe return formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0; } + template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse, const TActorContext& ctx) { ui32 readDurationMs = (ctx.Now() - formedResponse->Start - formedResponse->WaitQuotaTime).MilliSeconds(); - if (formedResponse->FromDisk) { - if (ReadLatencyFromDisk) - ReadLatencyFromDisk.IncFor(readDurationMs, 1); + const ui64 diff = formedResponse->ByteSizeBeforeFiltering; + ui64 sizeEstimation = 0; + + if (formedResponse->IsDirectRead) { + sizeEstimation = formedResponse->DirectReadByteSize; } else { - if (ReadLatency) - ReadLatency.IncFor(readDurationMs, 1); - } + sizeEstimation = formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0; + + if (formedResponse->FromDisk) { + if (ReadLatencyFromDisk) + ReadLatencyFromDisk.IncFor(readDurationMs, 1); + } else { + if (ReadLatency) + ReadLatency.IncFor(readDurationMs, 1); + } - const auto latencyThreshold = formedResponse->FromDisk - ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs() - : AppData(ctx)->PQConfig.GetReadLatencyBigMs(); - if (readDurationMs >= latencyThreshold && SLIBigReadLatency) { - SLIBigReadLatency.Inc(); + const auto latencyThreshold = formedResponse->FromDisk + ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs() + : AppData(ctx)->PQConfig.GetReadLatencyBigMs(); + if (readDurationMs >= latencyThreshold && SLIBigReadLatency) { + SLIBigReadLatency.Inc(); + } } Y_ABORT_UNLESS(formedResponse->RequestsInfly == 0); - const ui64 diff = formedResponse->ByteSizeBeforeFiltering; - const ui64 sizeEstimation = formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0; if constexpr (!UseMigrationProtocol) { formedResponse->Response.mutable_read_response()->set_bytes_size(sizeEstimation); } - if (formedResponse->HasMessages) { + if (formedResponse->IsDirectRead) { + auto it = Partitions.find(formedResponse->AssignId); + Y_ABORT_UNLESS(it != Partitions.end()); + it->second.DirectReads[formedResponse->DirectReadId] = {formedResponse->DirectReadId, sizeEstimation}; + it->second.LastDirectReadId = formedResponse->DirectReadId; + + Y_ABORT_UNLESS(diff == 0); // diff is zero; sizeEstimation already counted in inflight; + } else if (formedResponse->HasMessages) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " response to read" << ": guid# " << formedResponse->Guid); if (!WriteToStreamOrDie(ctx, std::move(formedResponse->Response))) { @@ -1655,7 +1889,6 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedRead LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " empty read result, start new reading" << ": guid# " << formedResponse->Guid); } - BytesInflight_ -= diff; if (BytesInflight) { (*BytesInflight) -= diff; @@ -1691,6 +1924,8 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedRead // Bring back available partitions. // If some partition was removed from partitions container, it is not bad because it will be checked during read processing. AvailablePartitions.insert(formedResponse->PartitionsBecameAvailable.begin(), formedResponse->PartitionsBecameAvailable.end()); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Process answer. Aval parts: " << AvailablePartitions.size()); + if constexpr (UseMigrationProtocol) { if (!formedResponse->HasMessages) { @@ -1700,6 +1935,7 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedRead } } + ProcessReads(ctx); } @@ -1734,7 +1970,6 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessReads(const TActorContext& return ReadSizeBudget > 0; } }; - while (shouldContinueReads() && BytesInflight_ + RequestedBytes < MAX_INFLY_BYTES) { ui32 count = MaxReadMessagesCount; ui64 size = MaxReadSize; @@ -1815,14 +2050,14 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessReads(const TActorContext& RequestedBytes += csize; formedResponse->RequestedBytes += csize; + ReadSizeBudget -= csize; ctx.Send(it->second.Actor, ev.Release()); res = PartitionToReadResponse.emplace(it->second.Actor, formedResponse).second; Y_ABORT_UNLESS(res); - // TODO (ildar-khisam@): Gather data from all partitions. - // For now send messages only from single partition. + // Do not aggregate messages from different partitions together. if constexpr (!UseMigrationProtocol) { break; } @@ -1877,6 +2112,7 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionRea ev->Get()->SizeLag, ev->Get()->EndOffset - ev->Get()->ReadOffset).second; Y_ABORT_UNLESS(res); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << "TEvPartitionReady. Aval parts: " << AvailablePartitions.size()); ProcessReads(ctx); } diff --git a/ydb/services/persqueue_v1/actors/schema_actors.cpp b/ydb/services/persqueue_v1/actors/schema_actors.cpp index 3254b4566a..3ba1b844dd 100644 --- a/ydb/services/persqueue_v1/actors/schema_actors.cpp +++ b/ydb/services/persqueue_v1/actors/schema_actors.cpp @@ -678,6 +678,7 @@ void TDescribeTopicActorImpl::RequestPartitionsLocation(const TActorContext& ctx } void TDescribeTopicActorImpl::RequestReadSessionsInfo(const TActorContext& ctx) { + Y_ABORT_UNLESS(Settings.Mode == TDescribeTopicActorSettings::EMode::DescribeConsumer); NTabletPipe::SendData( ctx, *BalancerPipe, new TEvPersQueue::TEvGetReadSessionsInfo(NPersQueue::ConvertNewConsumerName(Settings.Consumer, ctx)) diff --git a/ydb/services/persqueue_v1/actors/ya.make b/ydb/services/persqueue_v1/actors/ya.make index fb6ec9149e..1329c08574 100644 --- a/ydb/services/persqueue_v1/actors/ya.make +++ b/ydb/services/persqueue_v1/actors/ya.make @@ -4,6 +4,7 @@ PEERDIR( ydb/library/actors/core library/cpp/containers/disjoint_interval_tree library/cpp/string_utils/base64 + ydb/core/util ydb/core/base ydb/core/grpc_services ydb/core/persqueue @@ -38,6 +39,8 @@ SRCS( read_info_actor.h read_info_actor.cpp read_session_actor.h + direct_read_actor.h + direct_read_actor.cpp write_session_actor.h schema_actors.h schema_actors.cpp diff --git a/ydb/services/persqueue_v1/grpc_pq_read.cpp b/ydb/services/persqueue_v1/grpc_pq_read.cpp index 4d18fd7fa4..ff92306325 100644 --- a/ydb/services/persqueue_v1/grpc_pq_read.cpp +++ b/ydb/services/persqueue_v1/grpc_pq_read.cpp @@ -21,6 +21,13 @@ namespace V1 { using namespace PersQueue::V1; +Topic::StreamDirectReadMessage::FromServer FillDirectReadResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code) { + Topic::StreamDirectReadMessage::FromServer res; + FillIssue(res.add_issues(), code, errorReason); + res.set_status(ConvertPersQueueInternalCodeToStatus(code)); + return res; +} + IActor* CreatePQReadService(const TActorId& schemeCache, const TActorId& newSchemeCache, @@ -120,6 +127,43 @@ void TPQReadService::Handle(NGRpcService::TEvStreamTopicReadRequest::TPtr& ev, c HandleStreamPQReadRequest<NGRpcService::TEvStreamTopicReadRequest>(ev, ctx); } +void TPQReadService::Handle(NGRpcService::TEvStreamTopicDirectReadRequest::TPtr& ev, const TActorContext& ctx) { + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection"); + + if (TooMuchSessions()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection failed - too much sessions"); + ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); + ev->Get()->GetStreamCtx()->WriteAndFinish( + FillDirectReadResponse("proxy overloaded", PersQueue::ErrorCode::OVERLOAD), grpc::Status::OK); //CANCELLED + return; + } + if (HaveClusters && (Clusters.empty() || LocalCluster.empty())) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection failed - cluster is not known yet"); + + ev->Get()->GetStreamCtx()->Attach(ctx.SelfID); + ev->Get()->GetStreamCtx()->WriteAndFinish( + FillDirectReadResponse("cluster initializing", PersQueue::ErrorCode::INITIALIZING), grpc::Status::OK); //CANCELLED + // TODO: Inc SLI Errors + return; + } else { + Y_ABORT_UNLESS(TopicsHandler != nullptr); + auto ip = ev->Get()->GetStreamCtx()->GetPeerName(); + + const ui64 cookie = NextCookie(); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new direct session created cookie " << cookie); + + TActorId worker = ctx.Register(new TDirectReadSessionActor( + ev->Release().Release(), cookie, SchemeCache, NewSchemeCache, Counters, + DatacenterClassifier ? DatacenterClassifier->ClassifyAddress(NAddressClassifier::ExtractAddress(ip)) : "unknown", + *TopicsHandler + )); + Sessions[cookie] = worker; + } +} + + void TPQReadService::Handle(NGRpcService::TEvStreamPQMigrationReadRequest::TPtr& ev, const TActorContext& ctx) { HandleStreamPQReadRequest<NGRpcService::TEvStreamPQMigrationReadRequest>(ev, ctx); } @@ -169,6 +213,11 @@ void NKikimr::NGRpcService::TGRpcRequestProxyHandleMethods::Handle(NKikimr::NGRp ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release()); } +void NKikimr::NGRpcService::TGRpcRequestProxyHandleMethods::Handle(NKikimr::NGRpcService::TEvStreamTopicDirectReadRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release()); +} + + void NKikimr::NGRpcService::TGRpcRequestProxyHandleMethods::Handle(NKikimr::NGRpcService::TEvStreamPQMigrationReadRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release()); } diff --git a/ydb/services/persqueue_v1/grpc_pq_read.h b/ydb/services/persqueue_v1/grpc_pq_read.h index d691ec7726..0eaf98c22a 100644 --- a/ydb/services/persqueue_v1/grpc_pq_read.h +++ b/ydb/services/persqueue_v1/grpc_pq_read.h @@ -1,6 +1,7 @@ #pragma once #include "actors/read_session_actor.h" +#include "actors/direct_read_actor.h" #include <ydb/core/client/server/grpc_base.h> #include <ydb/core/persqueue/cluster_tracker.h> @@ -42,6 +43,7 @@ private: STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { HFunc(NGRpcService::TEvStreamTopicReadRequest, Handle); + HFunc(NGRpcService::TEvStreamTopicDirectReadRequest, Handle); HFunc(NGRpcService::TEvStreamPQMigrationReadRequest, Handle); HFunc(NGRpcService::TEvCommitOffsetRequest, Handle); HFunc(NGRpcService::TEvPQReadInfoRequest, Handle); @@ -56,6 +58,7 @@ private: private: void Handle(NGRpcService::TEvStreamTopicReadRequest::TPtr& ev, const TActorContext& ctx); + void Handle(NGRpcService::TEvStreamTopicDirectReadRequest::TPtr& ev, const TActorContext& ctx); void Handle(NGRpcService::TEvStreamPQMigrationReadRequest::TPtr& ev, const TActorContext& ctx); void Handle(NGRpcService::TEvCommitOffsetRequest::TPtr& ev, const TActorContext& ctx); void Handle(NGRpcService::TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx); @@ -98,6 +101,9 @@ auto FillReadResponse(const TString& errorReason, const PersQueue::ErrorCode::Er return res; } +Topic::StreamDirectReadMessage::FromServer FillDirectReadResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code); + + template <typename ReadRequest> void TPQReadService::HandleStreamPQReadRequest(typename ReadRequest::TPtr& ev, const TActorContext& ctx) { constexpr bool UseMigrationProtocol = std::is_same_v<ReadRequest, NGRpcService::TEvStreamPQMigrationReadRequest>; diff --git a/ydb/services/persqueue_v1/persqueue_ut.cpp b/ydb/services/persqueue_v1/persqueue_ut.cpp index 7d9b8f60ea..51a4bffae0 100644 --- a/ydb/services/persqueue_v1/persqueue_ut.cpp +++ b/ydb/services/persqueue_v1/persqueue_ut.cpp @@ -110,16 +110,20 @@ namespace { const static TString SHORT_TOPIC_NAME = "topic1"; } -#define MAKE_INSECURE_STUB(Service) \ +#define MAKE_INSECURE_STUB(Service) \ std::shared_ptr<grpc::Channel> Channel_; \ - std::unique_ptr<Service::Stub> StubP_; \ + std::unique_ptr<Service::Stub> StubP_; \ \ { \ - Channel_ = grpc::CreateChannel( \ - "localhost:" + ToString(server.Server->GrpcPort), \ - grpc::InsecureChannelCredentials() \ + grpc::ChannelArguments args; \ + args.SetMaxReceiveMessageSize(64_MB); \ + args.SetMaxSendMessageSize(64_MB); \ + Channel_ = grpc::CreateCustomChannel( \ + "localhost:" + ToString(server.Server->GrpcPort), \ + grpc::InsecureChannelCredentials(), \ + args \ ); \ - StubP_ = Service::NewStub(Channel_); \ + StubP_ = Service::NewStub(Channel_); \ } \ grpc::ClientContext rcontext; @@ -708,6 +712,465 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { } + Y_UNIT_TEST(UpdatePartitionLocation) { + TPersQueueV1TestServer server; + SET_LOCALS; + MAKE_INSECURE_STUB(Ydb::Topic::V1::TopicService); + server.EnablePQLogs({ NKikimrServices::PQ_METACACHE, NKikimrServices::PQ_READ_PROXY}); + server.EnablePQLogs({ NKikimrServices::KQP_PROXY }, NLog::EPriority::PRI_EMERG); + server.EnablePQLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD }, NLog::EPriority::PRI_ERROR); + + auto readStream = StubP_->StreamRead(&rcontext); + UNIT_ASSERT(readStream); + + // init read session + { + Ydb::Topic::StreamReadMessage::FromClient req; + Ydb::Topic::StreamReadMessage::FromServer resp; + + req.mutable_init_request()->add_topics_read_settings()->set_path("acc/topic1"); + + req.mutable_init_request()->set_consumer("user"); + req.mutable_init_request()->set_direct_read(true); + + if (!readStream->Write(req)) { + ythrow yexception() << "write fail"; + } + UNIT_ASSERT(readStream->Read(&resp)); + Cerr << "===Got response: " << resp.ShortDebugString() << Endl; + UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kInitResponse); + } + + // await and confirm CreatePartitionStreamRequest from server + i64 assignId = 0; + i64 generation = 0; + { + Ydb::Topic::StreamReadMessage::FromServer resp; + + //lock partition + UNIT_ASSERT(readStream->Read(&resp)); + + Cerr << "GOT SERVER MESSAGE1: " << resp.DebugString() << "\n"; + + UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kStartPartitionSessionRequest); + UNIT_ASSERT_VALUES_EQUAL(resp.start_partition_session_request().partition_session().path(), "acc/topic1"); + UNIT_ASSERT(resp.start_partition_session_request().partition_session().partition_id() == 0); + UNIT_ASSERT(resp.start_partition_session_request().partition_location().generation() > 0); + generation = resp.start_partition_session_request().partition_location().generation(); + assignId = resp.start_partition_session_request().partition_session().partition_session_id(); + } + + server.Server->AnnoyingClient->RestartPartitionTablets(server.Server->CleverServer->GetRuntime(), "rt3.dc1--acc--topic1"); + + { + Ydb::Topic::StreamReadMessage::FromServer resp; + + //update partition location + UNIT_ASSERT(readStream->Read(&resp)); + + Cerr << "GOT SERVER MESSAGE2: " << resp.DebugString() << "\n"; + + UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kUpdatePartitionSession); + UNIT_ASSERT(resp.update_partition_session().partition_session_id() == assignId); + UNIT_ASSERT(resp.update_partition_session().partition_location().generation() > generation); + } + } + + using namespace Ydb; + class TDirectReadTestSetup { + using Service = Ydb::Topic::V1::TopicService; + private: + std::shared_ptr<grpc::Channel> Channel; + std::unique_ptr<Service::Stub> Stub; + THolder<grpc::ClientContext> ControlContext; + THolder<grpc::ClientContext> ReadContext; + + public: + std::unique_ptr<grpc::ClientReaderWriter<Topic::StreamReadMessage_FromClient, Topic::StreamReadMessage_FromServer>> ControlStream; + std::unique_ptr<grpc::ClientReaderWriter<Topic::StreamDirectReadMessage_FromClient, Topic::StreamDirectReadMessage_FromServer>> ReadStream; + TString SessionId; + + TDirectReadTestSetup(TPersQueueV1TestServer& server) + : ReadContext(MakeHolder<grpc::ClientContext>()) + { + server.EnablePQLogs({ NKikimrServices::PQ_READ_PROXY, NKikimrServices::PERSQUEUE }); + server.EnablePQLogs({ NKikimrServices::KQP_PROXY }, NLog::EPriority::PRI_EMERG); + server.EnablePQLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD }, NLog::EPriority::PRI_ERROR); + + Connect(server); + } + + void Connect(TPersQueueV1TestServer& server) { + grpc::ChannelArguments args; + args.SetMaxReceiveMessageSize(64_MB); + args.SetMaxSendMessageSize(64_MB); + Channel = grpc::CreateCustomChannel( + "localhost:" + ToString(server.Server->GrpcPort), + grpc::InsecureChannelCredentials(), + args + ); + Stub = Service::NewStub(Channel); + } + void InitControlSession(const TString& topic) { + ControlContext = MakeHolder<grpc::ClientContext>(); + ControlStream = Stub->StreamRead(ControlContext.Get()); + UNIT_ASSERT(ControlStream); + Topic::StreamReadMessage::FromClient req; + Topic::StreamReadMessage::FromServer resp; + + req.mutable_init_request()->add_topics_read_settings()->set_path(topic); + + req.mutable_init_request()->set_consumer("user"); + req.mutable_init_request()->set_direct_read(true); + + if (!ControlStream->Write(req)) { + ythrow yexception() << "write fail"; + } + UNIT_ASSERT(ControlStream->Read(&resp)); + Cerr << "Got init response: " << resp.ShortDebugString() << Endl; + UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kInitResponse); + SessionId = resp.init_response().session_id(); + + req.Clear(); + req.mutable_read_request()->set_bytes_size(40_MB); + if (!ControlStream->Write(req)) { + ythrow yexception() << "write fail"; + } + } + std::pair<ui32, i64> GetNextAssign(const TString& topic) { + Cerr << "Get next assign id\n"; + Topic::StreamReadMessage::FromClient req; + Topic::StreamReadMessage::FromServer resp; + + //lock partition + UNIT_ASSERT(ControlStream->Read(&resp)); + + Cerr << "GOT SERVER MESSAGE - start session: " << resp.DebugString() << "\n"; + + UNIT_ASSERT(resp.server_message_case() == Topic::StreamReadMessage::FromServer::kStartPartitionSessionRequest); + UNIT_ASSERT_VALUES_EQUAL(resp.start_partition_session_request().partition_session().path(), topic); + auto pId = resp.start_partition_session_request().partition_session().partition_id(); + UNIT_ASSERT(resp.start_partition_session_request().partition_location().generation() > 0); + auto assignId = resp.start_partition_session_request().partition_session().partition_session_id(); + + req.Clear(); + req.mutable_start_partition_session_response()->set_partition_session_id(assignId); + if (!ControlStream->Write(req)) { + ythrow yexception() << "write fail"; + } + return std::make_pair(pId, assignId); + } + + void DoWrite(NYdb::TDriver* driver, const TString& topic, ui64 size, ui32 count, + const TString& srcId = "srcID", const std::optional<ui64>& partGroup = {}) + { + auto writer = CreateSimpleWriter(*driver, topic, srcId, partGroup, {"raw"}); + + for (ui32 i = 0; i < count; ++i) { + auto writeSome = [&]() { + TString data(size, 'x'); + UNIT_ASSERT(writer->Write(data)); + }; + writeSome(); + } + writer->Close(); + } + + void DoRead(ui64 assignId, ui64& nextReadId, ui32& currTotalMessages, ui32 messageLimit) { + while (currTotalMessages < messageLimit) { + Cerr << "Wait for direct read id: " << nextReadId << ", currently have " << currTotalMessages << " messages" << Endl; + Ydb::Topic::StreamDirectReadMessage::FromServer resp; + UNIT_ASSERT(ReadStream->Read(&resp)); + Cerr << "Got direct read response: " << resp.direct_read_response().direct_read_id() << Endl; + UNIT_ASSERT_C(resp.status() == Ydb::StatusIds::SUCCESS, resp.DebugString()); + UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamDirectReadMessage::FromServer::kDirectReadResponse); + UNIT_ASSERT_VALUES_EQUAL(resp.direct_read_response().direct_read_id(), nextReadId); + + Ydb::Topic::StreamReadMessage::FromClient req; + req.mutable_direct_read_ack()->set_partition_session_id(assignId); + req.mutable_direct_read_ack()->set_direct_read_id(nextReadId++); + if (!ControlStream->Write(req)) { + ythrow yexception() << "write fail"; + } + for (const auto& batch : resp.direct_read_response().partition_data().batches()) { + currTotalMessages += batch.message_data_size(); + } + } + UNIT_ASSERT_VALUES_EQUAL(currTotalMessages, messageLimit); + } + + void InitReadSession(const TString& topic, const TMaybe<Ydb::StatusIds::StatusCode>& status = {}, const TString& consumer = "user", + TMaybe<ui64> assingId = Nothing()) { + if(ReadStream) { + ReadStream->Finish(); + ReadStream = nullptr; + ReadContext = MakeHolder<grpc::ClientContext>(); + } + ReadStream = Stub->StreamDirectRead(ReadContext.Release()); + UNIT_ASSERT(ReadStream); + + Topic::StreamDirectReadMessage::FromClient req; + Topic::StreamDirectReadMessage::FromServer resp; + + req.mutable_init_direct_read()->add_topics_read_settings()->set_path(topic); + + req.mutable_init_direct_read()->set_consumer(consumer); + req.mutable_init_direct_read()->set_session_id(SessionId); + + if (!ReadStream->Write(req)) { + ythrow yexception() << "write fail"; + } + if (status.Defined()){ + if (status.GetRef() != Ydb::StatusIds_StatusCode_SCHEME_ERROR) { + SendReadSessionAssign(assingId.Defined() ? *assingId : GetNextAssign(topic).second); + } + UNIT_ASSERT(ReadStream->Read(&resp)); + Cerr << "Got direct read init response: " << resp.ShortDebugString() << Endl; + UNIT_ASSERT(resp.status() == status.GetRef()); + } + } + + void SendReadSessionAssign(ui64 assignId) { + Cerr << "Send next assign to data session" << assignId << Endl; + Topic::StreamDirectReadMessage::FromClient req; + + auto x = req.mutable_start_direct_read_partition_session(); + x->set_partition_session_id(assignId); + x->set_last_direct_read_id(0); + x->set_generation(1); + if (!ReadStream->Write(req)) { + ythrow yexception() << "write fail"; + } + } + }; + + THolder<TEvPQ::TEvGetFullDirectReadData> RequestCacheData(TTestActorRuntime* runtime, TEvPQ::TEvGetFullDirectReadData* request) { + const auto& edgeId = runtime->AllocateEdgeActor(); + runtime->Send(NPQ::MakePQDReadCacheServiceActorId(), edgeId, request); + auto resp = runtime->GrabEdgeEvent<TEvPQ::TEvGetFullDirectReadData>(); + UNIT_ASSERT(resp); + return resp; + } + + Y_UNIT_TEST(DirectReadPreCached) { + TPersQueueV1TestServer server{true}; + SET_LOCALS; + TDirectReadTestSetup setup{server}; + setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 1_MB, 30); + + setup.InitControlSession("acc/topic1"); + auto pair = setup.GetNextAssign("acc/topic1"); + UNIT_ASSERT_VALUES_EQUAL(pair.first, 0); + auto assignId = pair.second; + setup.InitReadSession("acc/topic1"); + + auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1); + setup.SendReadSessionAssign(assignId); + + ui32 totalMsg = 0; + ui64 nextReadId = 1; + setup.DoRead(assignId, nextReadId, totalMsg, 30); + + Sleep(TDuration::Seconds(1)); + cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.begin()->second.StagedReads.size(), 0); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.begin()->second.Reads.size(), 0); + + } + + Y_UNIT_TEST(DirectReadNotCached) { + TPersQueueV1TestServer server{true}; + SET_LOCALS; + TDirectReadTestSetup setup{server}; + + setup.InitControlSession("acc/topic1"); + auto pair = setup.GetNextAssign("acc/topic1"); + UNIT_ASSERT_VALUES_EQUAL(pair.first, 0); + auto assignId = pair.second; + setup.InitReadSession("acc/topic1"); + setup.SendReadSessionAssign(assignId); + + ui32 totalMsg = 0; + ui64 nextReadId = 1; + Sleep(TDuration::Seconds(3)); + setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 1_MB, 50); + setup.DoRead(assignId, nextReadId, totalMsg, 40); + + Topic::StreamReadMessage::FromClient req; + req.mutable_read_request()->set_bytes_size(40_MB); + if (!setup.ControlStream->Write(req)) { + ythrow yexception() << "write fail"; + } + setup.DoRead(assignId, nextReadId, totalMsg, 50); + + Sleep(TDuration::Seconds(1)); + auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.begin()->second.StagedReads.size(), 0); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.begin()->second.Reads.size(), 0); + } + + Y_UNIT_TEST(DirectReadBadCases) { + TPersQueueV1TestServer server{true}; + SET_LOCALS; + TDirectReadTestSetup setup{server}; + setup.InitControlSession("acc/topic1"); + auto sessionId = setup.SessionId; + auto assign = setup.GetNextAssign("acc/topic1").second; + setup.SessionId = "bad-session"; + Cerr << "First init bad session\n"; + setup.InitReadSession("acc/topic1", Ydb::StatusIds::BAD_REQUEST, "user", 1); // no control session + setup.SessionId = sessionId; + Cerr << "Init badtopic session\n"; + setup.InitReadSession("acc/topic-bad", Ydb::StatusIds::SCHEME_ERROR); + //setup.InitReadSession("acc/topic1", Ydb::StatusIds::SCHEME_ERROR, "bad-user"); //ToDo - enable ACL (read rules) check + + setup.ControlStream->WritesDone(); + Cerr << "Close control session\n"; + setup.ControlStream->Finish(); + Cerr << "Close control session - done\n"; + setup.ControlStream = nullptr; + + setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 100_KB, 10); + Cerr << "Init read session\n"; + setup.InitReadSession("acc/topic1", Ydb::StatusIds::BAD_REQUEST, "user", assign); // no control session + + auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 0); + } + + Y_UNIT_TEST(DirectReadStop) { + TPersQueueV1TestServer server{true}; + SET_LOCALS; + + server.Server->AnnoyingClient->AlterTopicNoLegacy("Root/PQ/rt3.dc1--acc--topic1", 2); + + TDirectReadTestSetup setup{server}; + setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 100_KB, 1, "src1", 0); + setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 100_KB, 1, "src2", 1); + + setup.InitControlSession("acc/topic1"); + auto pair1 = setup.GetNextAssign("acc/topic1"); + auto pair2 = setup.GetNextAssign("acc/topic1"); + UNIT_ASSERT(pair1.first + pair2.first == 1); // partitions 0 and 1; + auto assign1 = pair1.second; + auto assign2 = pair2.second; + UNIT_ASSERT(assign1 != assign2); + + setup.InitReadSession("acc/topic1"); + setup.SendReadSessionAssign(assign1); + setup.SendReadSessionAssign(assign2); + + // Read from both parts so thatLastReadId goes forward; + for (auto i = 0u; i != 2; ++i) { + Cerr << "Wait for direct read" << Endl; + Ydb::Topic::StreamDirectReadMessage::FromServer resp; + UNIT_ASSERT(setup.ReadStream->Read(&resp)); + Cerr << "Got direct read response: " << resp.direct_read_response().direct_read_id() << Endl; + UNIT_ASSERT_C(resp.status() == Ydb::StatusIds::SUCCESS, resp.DebugString()); + UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamDirectReadMessage::FromServer::kDirectReadResponse); + UNIT_ASSERT_VALUES_EQUAL(resp.direct_read_response().direct_read_id(), 1); + i64 assignId = resp.direct_read_response().partition_session_id(); + UNIT_ASSERT(assignId == assign1 || assignId == assign2); + + Ydb::Topic::StreamReadMessage::FromClient req; + req.mutable_direct_read_ack()->set_partition_session_id(assignId); + req.mutable_direct_read_ack()->set_direct_read_id(1); + if (!setup.ControlStream->Write(req)) { + ythrow yexception() << "write fail"; + } + } + + NYdb::NTopic::TTopicClient topicClient(*pqClient->GetDriver()); + NYdb::NTopic::TReadSessionSettings rSettings; + rSettings.ConsumerName("user").AppendTopics({"acc/topic1"}); + auto readSession = topicClient.CreateReadSession(rSettings); + + auto assignId = 0; + { + Topic::StreamReadMessage::FromServer resp; + + //lock partition + UNIT_ASSERT(setup.ControlStream->Read(&resp)); + + Cerr << "GOT SERVER MESSAGE (stop session): " << resp.DebugString() << "\n"; + + UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kStopPartitionSessionRequest); + UNIT_ASSERT_VALUES_EQUAL(resp.stop_partition_session_request().graceful(), true); + UNIT_ASSERT_VALUES_EQUAL(resp.stop_partition_session_request().last_direct_read_id(), 1); + + assignId = resp.stop_partition_session_request().partition_session_id(); + UNIT_ASSERT(assignId == assign1 || assignId == assign2); + + Topic::StreamReadMessage::FromClient req; + req.mutable_stop_partition_session_response()->set_partition_session_id(assignId); + req.mutable_stop_partition_session_response()->set_graceful(true); + if (!setup.ControlStream->Write(req)) { + ythrow yexception() << "write fail"; + } + } + + { + Ydb::Topic::StreamReadMessage::FromServer resp; + + //lock partition + UNIT_ASSERT(setup.ControlStream->Read(&resp)); + + Cerr << "GOT SERVER MESSAGE (stop session 2): " << resp.DebugString() << "\n"; + + UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kStopPartitionSessionRequest); + UNIT_ASSERT_VALUES_EQUAL(resp.stop_partition_session_request().graceful(), false); + UNIT_ASSERT_VALUES_EQUAL(resp.stop_partition_session_request().partition_session_id(), assignId); + Ydb::Topic::StreamReadMessage::FromClient req; + req.mutable_stop_partition_session_response()->set_partition_session_id(assignId); + req.mutable_stop_partition_session_response()->set_graceful(false); + if (!setup.ControlStream->Write(req)) { + ythrow yexception() << "write fail"; + } + } + } + + Y_UNIT_TEST(DirectReadCleanCache) { + TPersQueueV1TestServer server; + SET_LOCALS; + TString topicPath{"/Root/PQ/rt3.dc1--acc--topic2"}; + server.Server->AnnoyingClient->CreateTopicNoLegacy(topicPath, 1); + auto pathDescr = server.Server->AnnoyingClient->Ls(topicPath)->Record.GetPathDescription().GetPersQueueGroup(); + auto tabletId = pathDescr.GetPartitions(0).GetTabletId(); + Cerr << "PQ descr: " << pathDescr.DebugString() << Endl; + + + TDirectReadTestSetup setup{server}; + + setup.InitControlSession("acc/topic2"); + setup.InitReadSession("acc/topic2"); + auto pair = setup.GetNextAssign("acc/topic2"); + UNIT_ASSERT_VALUES_EQUAL(pair.first, 0); + auto assignId = pair.second; + setup.SendReadSessionAssign(assignId); + // auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData()); + // UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1); + setup.DoWrite(pqClient->GetDriver(), "acc/topic2", 10_MB, 1); + Ydb::Topic::StreamDirectReadMessage::FromServer resp; + Cerr << "Request initial read data\n"; + UNIT_ASSERT(setup.ReadStream->Read(&resp)); + + Cerr << "Request cache data\n"; + auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1); + Cerr << "Kill the tablet\n"; + server.Server->AnnoyingClient->KillTablet(*(server.Server->CleverServer), tabletId); + Cerr << "Get session closure\n"; + resp.Clear(); + UNIT_ASSERT(setup.ReadStream->Read(&resp)); + UNIT_ASSERT_C(resp.status() == Ydb::StatusIds::SESSION_EXPIRED, resp.status()); + Cerr << "Check caching service data empty\n"; + cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData()); + UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 0); + } + Y_UNIT_TEST(StreamReadManyUpdateTokenAndRead) { TPersQueueV1TestServer server; SET_LOCALS; @@ -748,7 +1211,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { ythrow yexception() << "write fail"; } UNIT_ASSERT(readStream->Read(&resp)); - Cerr << "===Got response: " << resp.ShortDebugString() << Endl; + Cerr << "Got response: " << resp.ShortDebugString() << Endl; UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kInitResponse); // send some reads req.Clear(); @@ -894,7 +1357,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { ythrow yexception() << "write fail"; } UNIT_ASSERT(readStream->Read(&resp)); - Cerr << "===Got response: " << resp.ShortDebugString() << Endl; + Cerr << "Got response: " << resp.ShortDebugString() << Endl; UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kInitResponse); req.Clear(); diff --git a/ydb/services/persqueue_v1/topic.cpp b/ydb/services/persqueue_v1/topic.cpp index e1d0f8764c..40a7f2c65a 100644 --- a/ydb/services/persqueue_v1/topic.cpp +++ b/ydb/services/persqueue_v1/topic.cpp @@ -85,6 +85,27 @@ void TGRpcTopicService::SetupIncomingRequests(NYdbGrpc::TLoggerPtr logger) { ); } + { + using TBiRequest = Ydb::Topic::StreamDirectReadMessage::FromClient; + + using TBiResponse = Ydb::Topic::StreamDirectReadMessage::FromServer; + + using TStreamGRpcRequest = NGRpcServer::TGRpcStreamingRequest< + TBiRequest, + TBiResponse, + TGRpcTopicService, + NKikimrServices::GRPC_SERVER>; + + + TStreamGRpcRequest::Start(this, this->GetService(), CQ_, &Ydb::Topic::V1::TopicService::AsyncService::RequestStreamDirectRead, + [this](TIntrusivePtr<TStreamGRpcRequest::IContext> context) { + ActorSystem_->Send(GRpcRequestProxyId_, new NKikimr::NGRpcService::TEvStreamTopicDirectReadRequest(context, IsRlAllowed())); + }, + *ActorSystem_, "TopicService/StreamDirectRead", getCounterBlock("topic", "StreamDirectRead", true), nullptr + ); + } + + #ifdef ADD_REQUEST #error ADD_REQUEST macro already defined #endif diff --git a/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h b/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h index 89369402f3..f225ddf9aa 100644 --- a/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h +++ b/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h @@ -101,7 +101,6 @@ static void ModifyTopicACL(NYdb::TDriver* driver, const TString& topic, const TV Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--topic1", 1); Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--acc--topic1", 1); Server->WaitInit("topic1"); - Sleep(TDuration::Seconds(10)); } Cerr << "=== EnablePQLogs" << Endl; |