aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkomels <komels@ydb.tech>2023-12-08 09:49:15 +0300
committerkomels <komels@ydb.tech>2023-12-08 10:33:45 +0300
commitc47f199e3a1629a3b0a82811bb2f95cba4b633d1 (patch)
tree777f81cf8b49dc3ade389acb169d76e250ad5b2d
parent58f95f0c7d92b1ec514c855893a4caf45fb430fe (diff)
downloadydb-c47f199e3a1629a3b0a82811bb2f95cba4b633d1.tar.gz
Partition direct read
-rw-r--r--.mapping.json7
-rw-r--r--ydb/core/driver_lib/run/config.h1
-rw-r--r--ydb/core/driver_lib/run/kikimr_services_initializers.cpp14
-rw-r--r--ydb/core/driver_lib/run/kikimr_services_initializers.h7
-rw-r--r--ydb/core/driver_lib/run/run.cpp4
-rw-r--r--ydb/core/grpc_services/base/base.h1
-rw-r--r--ydb/core/grpc_services/grpc_request_proxy.cpp1
-rw-r--r--ydb/core/grpc_services/grpc_request_proxy_handle_methods.h1
-rw-r--r--ydb/core/grpc_services/rpc_calls.cpp6
-rw-r--r--ydb/core/grpc_services/rpc_calls.h1
-rw-r--r--ydb/core/persqueue/CMakeLists.darwin-arm64.txt2
-rw-r--r--ydb/core/persqueue/CMakeLists.darwin-x86_64.txt2
-rw-r--r--ydb/core/persqueue/CMakeLists.linux-aarch64.txt2
-rw-r--r--ydb/core/persqueue/CMakeLists.linux-x86_64.txt2
-rw-r--r--ydb/core/persqueue/CMakeLists.windows-x86_64.txt2
-rw-r--r--ydb/core/persqueue/dread_cache_service/CMakeLists.txt9
-rw-r--r--ydb/core/persqueue/dread_cache_service/caching_service.cpp546
-rw-r--r--ydb/core/persqueue/dread_cache_service/caching_service.h14
-rw-r--r--ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt79
-rw-r--r--ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt80
-rw-r--r--ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt83
-rw-r--r--ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt85
-rw-r--r--ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt19
-rw-r--r--ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt73
-rw-r--r--ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp237
-rw-r--r--ydb/core/persqueue/dread_cache_service/ut/ya.make29
-rw-r--r--ydb/core/persqueue/events/internal.h161
-rw-r--r--ydb/core/persqueue/key.h30
-rw-r--r--ydb/core/persqueue/partition.cpp53
-rw-r--r--ydb/core/persqueue/partition.h17
-rw-r--r--ydb/core/persqueue/partition_read.cpp115
-rw-r--r--ydb/core/persqueue/partition_types.h4
-rw-r--r--ydb/core/persqueue/partition_write.cpp5
-rw-r--r--ydb/core/persqueue/percentile_counter.cpp1
-rw-r--r--ydb/core/persqueue/pq_impl.cpp408
-rw-r--r--ydb/core/persqueue/pq_impl.h32
-rw-r--r--ydb/core/persqueue/subscriber.h16
-rw-r--r--ydb/core/persqueue/user_info.cpp29
-rw-r--r--ydb/core/persqueue/user_info.h21
-rw-r--r--ydb/core/persqueue/ut/common/pq_ut_common.cpp263
-rw-r--r--ydb/core/persqueue/ut/common/pq_ut_common.h76
-rw-r--r--ydb/core/persqueue/ut/partition_ut.cpp33
-rw-r--r--ydb/core/persqueue/ut/pq_ut.cpp141
-rw-r--r--ydb/core/persqueue/ya.make2
-rw-r--r--ydb/core/protos/counters_pq.proto2
-rw-r--r--ydb/core/protos/msgbus_pq.proto45
-rw-r--r--ydb/core/protos/pqconfig.proto2
-rw-r--r--ydb/core/testlib/test_client.cpp6
-rw-r--r--ydb/core/testlib/test_pq_client.h20
-rw-r--r--ydb/library/persqueue/topic_parser/counters.h1
-rw-r--r--ydb/public/api/grpc/ydb_topic_v1.proto47
-rw-r--r--ydb/public/api/protos/ydb_topic.proto152
-rw-r--r--ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp22
-rw-r--r--ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt2
-rw-r--r--ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt2
-rw-r--r--ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt2
-rw-r--r--ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt2
-rw-r--r--ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt2
-rw-r--r--ydb/services/persqueue_v1/actors/direct_read_actor.cpp471
-rw-r--r--ydb/services/persqueue_v1/actors/direct_read_actor.h180
-rw-r--r--ydb/services/persqueue_v1/actors/events.h156
-rw-r--r--ydb/services/persqueue_v1/actors/helpers.h4
-rw-r--r--ydb/services/persqueue_v1/actors/partition_actor.cpp223
-rw-r--r--ydb/services/persqueue_v1/actors/partition_actor.h24
-rw-r--r--ydb/services/persqueue_v1/actors/read_session_actor.h37
-rw-r--r--ydb/services/persqueue_v1/actors/read_session_actor.ipp294
-rw-r--r--ydb/services/persqueue_v1/actors/schema_actors.cpp1
-rw-r--r--ydb/services/persqueue_v1/actors/ya.make3
-rw-r--r--ydb/services/persqueue_v1/grpc_pq_read.cpp49
-rw-r--r--ydb/services/persqueue_v1/grpc_pq_read.h6
-rw-r--r--ydb/services/persqueue_v1/persqueue_ut.cpp479
-rw-r--r--ydb/services/persqueue_v1/topic.cpp21
-rw-r--r--ydb/services/persqueue_v1/ut/persqueue_test_fixture.h1
73 files changed, 4565 insertions, 405 deletions
diff --git a/.mapping.json b/.mapping.json
index 32f3b89e9e..f6384adc5f 100644
--- a/.mapping.json
+++ b/.mapping.json
@@ -5824,6 +5824,13 @@
"ydb/core/persqueue/config/CMakeLists.linux-x86_64.txt":"",
"ydb/core/persqueue/config/CMakeLists.txt":"",
"ydb/core/persqueue/config/CMakeLists.windows-x86_64.txt":"",
+ "ydb/core/persqueue/dread_cache_service/CMakeLists.txt":"",
+ "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt":"",
+ "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt":"",
+ "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt":"",
+ "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt":"",
+ "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt":"",
+ "ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt":"",
"ydb/core/persqueue/events/CMakeLists.darwin-arm64.txt":"",
"ydb/core/persqueue/events/CMakeLists.darwin-x86_64.txt":"",
"ydb/core/persqueue/events/CMakeLists.linux-aarch64.txt":"",
diff --git a/ydb/core/driver_lib/run/config.h b/ydb/core/driver_lib/run/config.h
index 85fca864c7..f71205fecc 100644
--- a/ydb/core/driver_lib/run/config.h
+++ b/ydb/core/driver_lib/run/config.h
@@ -57,6 +57,7 @@ union TBasicKikimrServicesMask {
bool EnablePersQueueClusterDiscovery:1;
bool EnableNetClassifier:1;
bool EnablePersQueueClusterTracker:1;
+ bool EnablePersQueueDirectReadCache:1;
bool EnableSysViewService:1;
bool EnableMeteringWriter:1;
bool EnableAuditWriter:1;
diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
index ab88bc8bf0..615589b3bc 100644
--- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
+++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
@@ -102,6 +102,7 @@
#include <ydb/core/node_whiteboard/node_whiteboard.h>
#include <ydb/core/persqueue/cluster_tracker.h>
+#include <ydb/core/persqueue/dread_cache_service/caching_service.h>
#include <ydb/core/persqueue/pq.h>
#include <ydb/core/persqueue/pq_l2_service.h>
@@ -1954,6 +1955,19 @@ void TPersQueueClusterTrackerInitializer::InitializeServices(NActors::TActorSyst
TActorSetupCmd(actor, TMailboxType::HTSwap, appData->UserPoolId)));
}
+// TPersQueueDirectReadCache
+
+TPersQueueDirectReadCacheInitializer::TPersQueueDirectReadCacheInitializer(const TKikimrRunConfig& runConfig)
+ : IKikimrServicesInitializer(runConfig)
+{}
+
+void TPersQueueDirectReadCacheInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) {
+ IActor* actor = NPQ::CreatePQDReadCacheService(appData->Counters);
+ setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>(
+ NPQ::MakePQDReadCacheServiceActorId(),
+ TActorSetupCmd(actor, TMailboxType::HTSwap, appData->UserPoolId)));
+}
+
// TMemProfMonitorInitializer
TMemProfMonitorInitializer::TMemProfMonitorInitializer(const TKikimrRunConfig& runConfig, TIntrusivePtr<TMemObserver> memObserver)
diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.h b/ydb/core/driver_lib/run/kikimr_services_initializers.h
index 4f3b622d8b..a15bd6c040 100644
--- a/ydb/core/driver_lib/run/kikimr_services_initializers.h
+++ b/ydb/core/driver_lib/run/kikimr_services_initializers.h
@@ -355,6 +355,13 @@ public:
void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override;
};
+class TPersQueueDirectReadCacheInitializer : public IKikimrServicesInitializer {
+public:
+ TPersQueueDirectReadCacheInitializer(const TKikimrRunConfig& runConfig);
+
+ void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override;
+};
+
class TMemProfMonitorInitializer : public IKikimrServicesInitializer {
TIntrusivePtr<TMemObserver> MemObserver;
diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp
index 7476c90fcc..66bd3c186d 100644
--- a/ydb/core/driver_lib/run/run.cpp
+++ b/ydb/core/driver_lib/run/run.cpp
@@ -1449,6 +1449,10 @@ TIntrusivePtr<TServiceInitializersList> TKikimrRunner::CreateServiceInitializers
sil->AddServiceInitializer(new TPersQueueClusterTrackerInitializer(runConfig));
}
+ if (serviceMask.EnablePersQueueDirectReadCache) {
+ sil->AddServiceInitializer(new TPersQueueDirectReadCacheInitializer(runConfig));
+ }
+
if (serviceMask.EnableIcNodeCacheService) {
sil->AddServiceInitializer(new TIcNodeCacheServiceInitializer(runConfig));
}
diff --git a/ydb/core/grpc_services/base/base.h b/ydb/core/grpc_services/base/base.h
index 010244c5ea..a0e5f479b7 100644
--- a/ydb/core/grpc_services/base/base.h
+++ b/ydb/core/grpc_services/base/base.h
@@ -103,6 +103,7 @@ struct TRpcServices {
EvStreamPQMigrationRead,
EvStreamTopicWrite,
EvStreamTopicRead,
+ EvStreamTopicDirectRead,
EvPQReadInfo,
EvTopicCommitOffset,
EvListOperations,
diff --git a/ydb/core/grpc_services/grpc_request_proxy.cpp b/ydb/core/grpc_services/grpc_request_proxy.cpp
index 4a1358c8db..27e6b0e321 100644
--- a/ydb/core/grpc_services/grpc_request_proxy.cpp
+++ b/ydb/core/grpc_services/grpc_request_proxy.cpp
@@ -548,6 +548,7 @@ void TGRpcRequestProxyImpl::StateFunc(TAutoPtr<IEventHandle>& ev) {
HFunc(TEvStreamPQMigrationReadRequest, PreHandle);
HFunc(TEvStreamTopicWriteRequest, PreHandle);
HFunc(TEvStreamTopicReadRequest, PreHandle);
+ HFunc(TEvStreamTopicDirectReadRequest, PreHandle);
HFunc(TEvCommitOffsetRequest, PreHandle);
HFunc(TEvPQReadInfoRequest, PreHandle);
HFunc(TEvPQDropTopicRequest, PreHandle);
diff --git a/ydb/core/grpc_services/grpc_request_proxy_handle_methods.h b/ydb/core/grpc_services/grpc_request_proxy_handle_methods.h
index d8fe632d1c..155f344d7a 100644
--- a/ydb/core/grpc_services/grpc_request_proxy_handle_methods.h
+++ b/ydb/core/grpc_services/grpc_request_proxy_handle_methods.h
@@ -12,6 +12,7 @@ protected:
static void Handle(TEvStreamPQMigrationReadRequest::TPtr& ev, const TActorContext& ctx);
static void Handle(TEvStreamTopicWriteRequest::TPtr& ev, const TActorContext& ctx);
static void Handle(TEvStreamTopicReadRequest::TPtr& ev, const TActorContext& ctx);
+ static void Handle(TEvStreamTopicDirectReadRequest::TPtr& ev, const TActorContext& ctx);
static void Handle(TEvCommitOffsetRequest::TPtr& ev, const TActorContext& ctx);
static void Handle(TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx);
static void Handle(TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx);
diff --git a/ydb/core/grpc_services/rpc_calls.cpp b/ydb/core/grpc_services/rpc_calls.cpp
index 0bb41f28b6..766e4e05cc 100644
--- a/ydb/core/grpc_services/rpc_calls.cpp
+++ b/ydb/core/grpc_services/rpc_calls.cpp
@@ -37,6 +37,12 @@ void FillYdbStatus(Ydb::Topic::StreamReadMessage::FromServer& resp, const NYql::
}
template <>
+void FillYdbStatus(Ydb::Topic::StreamDirectReadMessage::FromServer& resp, const NYql::TIssues& issues, Ydb::StatusIds::StatusCode status) {
+ resp.set_status(status);
+ NYql::IssuesToMessage(issues, resp.mutable_issues());
+}
+
+template <>
void FillYdbStatus(Draft::Dummy::PingResponse& resp, const NYql::TIssues& issues, Ydb::StatusIds::StatusCode status) {
Y_UNUSED(resp);
Y_UNUSED(issues);
diff --git a/ydb/core/grpc_services/rpc_calls.h b/ydb/core/grpc_services/rpc_calls.h
index cbeeaec420..d6e1e2ef6d 100644
--- a/ydb/core/grpc_services/rpc_calls.h
+++ b/ydb/core/grpc_services/rpc_calls.h
@@ -66,6 +66,7 @@ using TEvStreamPQWriteRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStre
using TEvStreamPQMigrationReadRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStreamPQMigrationRead, Ydb::PersQueue::V1::MigrationStreamingReadClientMessage, Ydb::PersQueue::V1::MigrationStreamingReadServerMessage>;
using TEvStreamTopicWriteRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStreamTopicWrite, Ydb::Topic::StreamWriteMessage::FromClient, Ydb::Topic::StreamWriteMessage::FromServer, TRateLimiterMode::RuManual>;
using TEvStreamTopicReadRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStreamTopicRead, Ydb::Topic::StreamReadMessage::FromClient, Ydb::Topic::StreamReadMessage::FromServer, TRateLimiterMode::RuManual>;
+using TEvStreamTopicDirectReadRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvStreamTopicDirectRead, Ydb::Topic::StreamDirectReadMessage::FromClient, Ydb::Topic::StreamDirectReadMessage::FromServer, TRateLimiterMode::RuManual>;
using TEvCommitOffsetRequest = TGRpcRequestWrapper<TRpcServices::EvTopicCommitOffset, Ydb::Topic::CommitOffsetRequest, Ydb::Topic::CommitOffsetResponse, true>;
using TEvPQReadInfoRequest = TGRpcRequestWrapper<TRpcServices::EvPQReadInfo, Ydb::PersQueue::V1::ReadInfoRequest, Ydb::PersQueue::V1::ReadInfoResponse, true>;
using TEvPQDropTopicRequest = TGRpcRequestValidationWrapper<TRpcServices::EvPQDropTopic, Ydb::PersQueue::V1::DropTopicRequest, Ydb::PersQueue::V1::DropTopicResponse, true>;
diff --git a/ydb/core/persqueue/CMakeLists.darwin-arm64.txt b/ydb/core/persqueue/CMakeLists.darwin-arm64.txt
index fcb42492da..6c3e709acf 100644
--- a/ydb/core/persqueue/CMakeLists.darwin-arm64.txt
+++ b/ydb/core/persqueue/CMakeLists.darwin-arm64.txt
@@ -8,6 +8,7 @@
add_subdirectory(codecs)
add_subdirectory(config)
+add_subdirectory(dread_cache_service)
add_subdirectory(events)
add_subdirectory(partition_key_range)
add_subdirectory(ut)
@@ -78,6 +79,7 @@ target_sources(ydb-core-persqueue PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp
)
generate_enum_serilization(ydb-core-persqueue
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h
diff --git a/ydb/core/persqueue/CMakeLists.darwin-x86_64.txt b/ydb/core/persqueue/CMakeLists.darwin-x86_64.txt
index fcb42492da..6c3e709acf 100644
--- a/ydb/core/persqueue/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/persqueue/CMakeLists.darwin-x86_64.txt
@@ -8,6 +8,7 @@
add_subdirectory(codecs)
add_subdirectory(config)
+add_subdirectory(dread_cache_service)
add_subdirectory(events)
add_subdirectory(partition_key_range)
add_subdirectory(ut)
@@ -78,6 +79,7 @@ target_sources(ydb-core-persqueue PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp
)
generate_enum_serilization(ydb-core-persqueue
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h
diff --git a/ydb/core/persqueue/CMakeLists.linux-aarch64.txt b/ydb/core/persqueue/CMakeLists.linux-aarch64.txt
index efa9885d58..38253dbbae 100644
--- a/ydb/core/persqueue/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/persqueue/CMakeLists.linux-aarch64.txt
@@ -8,6 +8,7 @@
add_subdirectory(codecs)
add_subdirectory(config)
+add_subdirectory(dread_cache_service)
add_subdirectory(events)
add_subdirectory(partition_key_range)
add_subdirectory(ut)
@@ -79,6 +80,7 @@ target_sources(ydb-core-persqueue PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp
)
generate_enum_serilization(ydb-core-persqueue
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h
diff --git a/ydb/core/persqueue/CMakeLists.linux-x86_64.txt b/ydb/core/persqueue/CMakeLists.linux-x86_64.txt
index efa9885d58..38253dbbae 100644
--- a/ydb/core/persqueue/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/persqueue/CMakeLists.linux-x86_64.txt
@@ -8,6 +8,7 @@
add_subdirectory(codecs)
add_subdirectory(config)
+add_subdirectory(dread_cache_service)
add_subdirectory(events)
add_subdirectory(partition_key_range)
add_subdirectory(ut)
@@ -79,6 +80,7 @@ target_sources(ydb-core-persqueue PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp
)
generate_enum_serilization(ydb-core-persqueue
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h
diff --git a/ydb/core/persqueue/CMakeLists.windows-x86_64.txt b/ydb/core/persqueue/CMakeLists.windows-x86_64.txt
index fcb42492da..6c3e709acf 100644
--- a/ydb/core/persqueue/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/persqueue/CMakeLists.windows-x86_64.txt
@@ -8,6 +8,7 @@
add_subdirectory(codecs)
add_subdirectory(config)
+add_subdirectory(dread_cache_service)
add_subdirectory(events)
add_subdirectory(partition_key_range)
add_subdirectory(ut)
@@ -78,6 +79,7 @@ target_sources(ydb-core-persqueue PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/utils.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/write_meta.cpp
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/microseconds_sliding_window.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/caching_service.cpp
)
generate_enum_serilization(ydb-core-persqueue
${CMAKE_SOURCE_DIR}/ydb/core/persqueue/sourceid.h
diff --git a/ydb/core/persqueue/dread_cache_service/CMakeLists.txt b/ydb/core/persqueue/dread_cache_service/CMakeLists.txt
new file mode 100644
index 0000000000..1703b0a27b
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
diff --git a/ydb/core/persqueue/dread_cache_service/caching_service.cpp b/ydb/core/persqueue/dread_cache_service/caching_service.cpp
new file mode 100644
index 0000000000..d9d20a6dbf
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/caching_service.cpp
@@ -0,0 +1,546 @@
+#include "caching_service.h"
+
+#include <ydb/public/api/protos/persqueue_error_codes_v1.pb.h>
+#include <ydb/public/api/protos/ydb_topic.pb.h>
+#include <ydb/public/lib/base/msgbus_status.h>
+#include <ydb/core/persqueue/key.h>
+#include <ydb/core/persqueue/writer/source_id_encoding.h>
+#include <ydb/core/persqueue/write_meta.h>
+#include <ydb/core/protos/grpc_pq_old.pb.h>
+#include <ydb/services/persqueue_v1/actors/events.h>
+#include <ydb/services/persqueue_v1/actors/persqueue_utils.h>
+#include <ydb/library/actors/core/actor_bootstrapped.h>
+#include <contrib/libs/protobuf/src/google/protobuf/util/time_util.h>
+
+namespace NKikimr::NPQ {
+using namespace NActors;
+using namespace Ydb::Topic;
+using namespace NGRpcProxy::V1;
+
+
+i32 GetDataChunkCodec(const NKikimrPQClient::TDataChunk& proto) {
+ if (proto.HasCodec()) {
+ return proto.GetCodec() + 1;
+ }
+ return 0;
+}
+
+
+class TPQDirectReadCacheService : public TActorBootstrapped<TPQDirectReadCacheService> {
+public:
+ TPQDirectReadCacheService(const ::NMonitoring::TDynamicCounterPtr& counters)
+ : Counters(counters)
+ {
+
+ }
+
+ void Bootstrap(const TActorContext& ctx) {
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "Direct read cache created");
+
+ Become(&TThis::StateWork);
+ Y_UNUSED(ctx);
+ //Y_ABORT_UNLESS(Counters);
+ }
+
+ STRICT_STFUNC(StateWork,
+ hFunc(TEvPQ::TEvPublishDirectRead, HandlePublish)
+ hFunc(TEvPQ::TEvStageDirectReadData, HandleFetchData)
+ hFunc(TEvPQ::TEvForgetDirectRead, HandleForget)
+ hFunc(TEvPQ::TEvRegisterDirectReadSession, HandleRegister)
+ hFunc(TEvPQ::TEvDeregisterDirectReadSession, HandleDeregister)
+ hFunc(TEvPQ::TEvGetFullDirectReadData, HandleGetData)
+ hFunc(TEvPQProxy::TEvDirectReadDataSessionConnected, HandleCreateClientSession)
+ hFunc(TEvPQProxy::TEvDirectReadDataSessionDead, HandleDestroyClientSession)
+ )
+
+private:
+ using TSessionsMap = THashMap<TReadSessionKey, TCacheServiceData>;
+
+ void HandleCreateClientSession(TEvPQProxy::TEvDirectReadDataSessionConnected::TPtr& ev) {
+ const auto& ctx = ActorContext();
+ auto key = MakeSessionKey(ev->Get());
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << "Direct read cache: client session connected with id '" << key.SessionId << "'");
+ auto sessionIter = ServerSessions.find(key);
+ if (sessionIter.IsEnd()) {
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << "Direct read cache: unknown session id '" << key.SessionId << "', close session");
+ CloseSession(ev->Sender, Ydb::PersQueue::ErrorCode::ErrorCode::BAD_REQUEST, "Unknown session");
+ return;
+ }
+ if (sessionIter->second.Generation != ev->Get()->Generation) {
+ ctx.Send(
+ sessionIter->second.Client->ProxyId,
+ new TEvPQProxy::TEvDirectReadDestroyPartitionSession(key, Ydb::PersQueue::ErrorCode::ErrorCode::ERROR, "Generation mismatch")
+ );
+ return;
+ }
+
+ sessionIter->second.Client = TCacheClientContext{ev->Sender, ev->Get()->StartingReadId};
+ AssignByProxy[ev->Sender].insert(key.PartitionSessionId);
+ while(SendNextReadToClient(sessionIter)) {
+ // Empty
+ }
+ }
+
+ void HandleDestroyClientSession(TEvPQProxy::TEvDirectReadDataSessionDead::TPtr& ev) {
+ auto assignIter = AssignByProxy.find(ev->Sender);
+ if (assignIter.IsEnd())
+ return;
+ for (auto id : assignIter->second) {
+ return DestroyClientSession(ServerSessions.find(
+ TReadSessionKey{ev->Get()->Session, id}), false,
+ Ydb::PersQueue::ErrorCode::ErrorCode::OK, "", ev->Sender
+ );
+ }
+ }
+
+ void HandleRegister(TEvPQ::TEvRegisterDirectReadSession::TPtr& ev) {
+ const auto& key = ev->Get()->Session;
+ RegisterServerSession(key, ev->Get()->Generation);
+ }
+
+ void HandleDeregister(TEvPQ::TEvDeregisterDirectReadSession::TPtr& ev) {
+ const auto& key = ev->Get()->Session;
+ const auto& ctx = ActorContext();
+
+ auto destroyDone = DestroyServerSession(ServerSessions.find(key), ev->Get()->Generation);
+ if (destroyDone) {
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: server session deregistered: " << key.SessionId
+ );
+ } else {
+ LOG_WARN_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: attempted to deregister unknown server session: " << key.SessionId
+ << ":" << key.PartitionSessionId << " with generation " << ev->Get()->Generation << ", ignored"
+ );
+ return;
+ }
+ }
+
+ void HandleFetchData(TEvPQ::TEvStageDirectReadData::TPtr& ev) {
+ const auto& ctx = ActorContext();
+ auto sessionKey = MakeSessionKey(ev->Get());
+ auto sessionIter = ServerSessions.find(sessionKey);
+ if (sessionIter.IsEnd()) {
+ LOG_ERROR_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: tried to stage direct read for unregistered session: "
+ << sessionKey.SessionId << ":" << sessionKey.PartitionSessionId
+ );
+ return;
+ }
+ if (sessionIter->second.Generation != ev->Get()->TabletGeneration) {
+ LOG_ALERT_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: tried to stage direct read for session " << sessionKey.SessionId
+ << " with generation " << ev->Get()->TabletGeneration << ", previously had this session with generation "
+ << sessionIter->second.Generation << ". Data ignored"
+ );
+ return;
+ }
+ auto ins = sessionIter->second.StagedReads.insert(std::make_pair(ev->Get()->ReadKey.ReadId, ev->Get()->Response));
+ if (!ins.second) {
+ LOG_WARN_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: tried to stage duplicate direct read for session " << sessionKey.SessionId << " with id "
+ << ev->Get()->ReadKey.ReadId << ", new data ignored"
+ );
+ return;
+ }
+ ChangeCounterValue("StagedReadDataSize", ins.first->second->ByteSize(), false);
+ ChangeCounterValue("StagedReadsCount", 1, false);
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: staged direct read id " << ev->Get()->ReadKey.ReadId << " for session: "
+ << sessionKey.SessionId
+ );
+ }
+
+ void HandlePublish(TEvPQ::TEvPublishDirectRead::TPtr& ev) {
+ const auto& ctx = ActorContext();
+ auto key = MakeSessionKey(ev->Get());
+ const auto readId = ev->Get()->ReadKey.ReadId;
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << "Direct read cache: publish read: " << readId << " for session " << key.SessionId);
+ auto iter = ServerSessions.find(key);
+ if (iter.IsEnd()) {
+ LOG_ERROR_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: attempt to publish read for unknow session " << key.SessionId << " ignored"
+ );
+ return;
+ }
+
+ const auto& generation = ev->Get()->TabletGeneration;
+ if (iter->second.Generation != generation)
+ return;
+
+ auto stagedIter = iter->second.StagedReads.find(readId);
+ if (stagedIter == iter->second.StagedReads.end()) {
+ LOG_ERROR_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: attempt to publish unknown read id " << readId << " from session: "
+ << key.SessionId << " ignored");
+ return;
+ }
+ auto inserted = iter->second.Reads.insert(std::make_pair(ev->Get()->ReadKey.ReadId, stagedIter->second)).second;
+ if (inserted) {
+ ChangeCounterValue("PublishedReadDataSize", stagedIter->second->ByteSize(), false);
+ ChangeCounterValue("PublishedReadsCount", 1, false);
+ }
+ ChangeCounterValue("StagedReadDataSize", -stagedIter->second->ByteSize(), false);
+ ChangeCounterValue("StagedReadsCount", -1, false);
+
+ iter->second.StagedReads.erase(stagedIter);
+
+ SendNextReadToClient(iter);
+ }
+
+ void HandleForget(TEvPQ::TEvForgetDirectRead::TPtr& ev) {
+ const auto& ctx = ActorContext();
+ auto key = MakeSessionKey(ev->Get());
+ auto iter = ServerSessions.find(key);
+ if (iter.IsEnd()) {
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: attempt to forget read for unknown session: "
+ << ev->Get()->ReadKey.SessionId << " ignored"
+ );
+ return;
+ }
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: forget read: " << ev->Get()->ReadKey.ReadId << " for session "
+ << key.SessionId
+ );
+
+ const auto& generation = ev->Get()->TabletGeneration;
+ if (iter->second.Generation != generation) { // Stale generation in event, ignore it
+ return;
+ }
+ auto readIter = iter->second.Reads.find(ev->Get()->ReadKey.ReadId);
+ if (readIter != iter->second.Reads.end()) {
+ ChangeCounterValue("PublishedReadDataSize", -readIter->second->ByteSize(), false);
+ ChangeCounterValue("PublishedReadsCount", -1, false);
+
+ iter->second.Reads.erase(readIter);
+ }
+ auto stagedIter = iter->second.StagedReads.find(ev->Get()->ReadKey.ReadId);
+ if (stagedIter != iter->second.StagedReads.end()) {
+ ChangeCounterValue("StagedReadDataSize", -stagedIter->second->ByteSize(), false);
+ ChangeCounterValue("StagedReadsCount", -1, false);
+ iter->second.StagedReads.erase(stagedIter);
+ }
+ iter->second.StagedReads.erase(ev->Get()->ReadKey.ReadId);
+ }
+
+ void DestroyClientSession(
+ TSessionsMap::iterator sessionIter, bool doRespondToProxy, Ydb::PersQueue::ErrorCode::ErrorCode code,
+ const TString& reason, const TMaybe<TActorId>& proxyId = Nothing()
+ ) {
+ if (sessionIter.IsEnd() || !sessionIter->second.Client.Defined())
+ return;
+ auto& client = sessionIter->second.Client.GetRef();
+ if (proxyId.Defined() && *proxyId != client.ProxyId)
+ return;
+
+ if (doRespondToProxy) {
+ DestroyPartitionSession(sessionIter, code, reason);
+ }
+ auto assignIter = AssignByProxy.find(sessionIter->second.Client->ProxyId);
+ if (!assignIter.IsEnd()) {
+ assignIter->second.erase(sessionIter->first.PartitionSessionId);
+ }
+ sessionIter->second.Client = Nothing();
+ }
+
+ [[nodiscard]] bool DestroyServerSession(TSessionsMap::iterator sessionIter, ui64 generation) {
+ if (sessionIter.IsEnd() || sessionIter->second.Generation > generation)
+ return false;
+ DestroyPartitionSession(sessionIter, Ydb::PersQueue::ErrorCode::READ_ERROR_NO_SESSION, "Closed by server");
+ ServerSessions.erase(sessionIter);
+ ChangeCounterValue("ActiveServerSessions", ServerSessions.size(), true);
+ return true;
+ }
+
+ void RegisterServerSession(const TReadSessionKey& key, ui32 generation) {
+ const auto& ctx = ActorContext();
+ auto sessionsIter = ServerSessions.find(key);
+ if (sessionsIter.IsEnd()) {
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: registered server session: " << key.SessionId
+ << ":" << key.PartitionSessionId << " with generation " << generation
+ );
+ ServerSessions.insert(std::make_pair(key, TCacheServiceData{generation}));
+ } else if (sessionsIter->second.Generation == generation) {
+ LOG_WARN_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: attempted to register duplicate server session: " << key.SessionId
+ << ":" << key.PartitionSessionId << " with same generation " << generation << ", ignored"
+ );
+ } else if (DestroyServerSession(sessionsIter, generation)) {
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: registered server session: " << key.SessionId
+ << ":" << key.PartitionSessionId << " with generation " << generation
+ << ", killed existing session with older generation "
+ );
+ ServerSessions.insert(std::make_pair(key, TCacheServiceData{generation}));
+ } else {
+ LOG_INFO_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << "Direct read cache: attempted to register server session: " << key.SessionId
+ << ":" << key.PartitionSessionId << " with stale generation " << generation << ", ignored"
+ );
+ }
+ ChangeCounterValue("ActiveServerSessions", ServerSessions.size(), true);
+ }
+
+ template<class TEv>
+ const TReadSessionKey MakeSessionKey(TEv* ev) {
+ return TReadSessionKey{ev->ReadKey.SessionId, ev->ReadKey.PartitionSessionId};
+ }
+
+ void HandleGetData(TEvPQ::TEvGetFullDirectReadData::TPtr& ev) {
+ auto* response = new TEvPQ::TEvGetFullDirectReadData();
+ auto& data = response->Data;
+ auto key = MakeSessionKey(ev->Get());
+
+ if (key.SessionId.Empty()) {
+ for (const auto& [k,v] : ServerSessions) {
+ data.emplace_back(k, v);
+ }
+ } else {
+ auto iter = ServerSessions.find(key);
+ if (iter.IsEnd()) {
+ response->Error = true;
+ } else if (ev->Get()->Generation == iter->second.Generation) {
+ data.emplace_back(key, iter->second);
+ }
+ }
+ ActorContext().Send(ev->Sender, response);
+ }
+
+private:
+ using TServerMessage = StreamDirectReadMessage::FromServer;
+ using TClientMessage = StreamDirectReadMessage::FromClient;
+ using IContext = NGRpcServer::IGRpcStreamingContext<TClientMessage, TServerMessage>;
+
+ bool SendNextReadToClient(TSessionsMap::iterator& sessionIter) {
+ if (sessionIter.IsEnd() || !sessionIter->second.Client.Defined()) {
+ return false;
+ }
+ auto& client = sessionIter->second.Client.GetRef();
+ auto nextData = sessionIter->second.Reads.lower_bound(client.NextReadId);
+ if (nextData == sessionIter->second.Reads.end()) {
+ return false;
+ }
+ auto result = SendData(sessionIter->first.PartitionSessionId, client, nextData->first, nextData->second);
+ if (!result) {
+ //ToDo: for discuss. Error in parsing partition response - shall we kill the entire session or just the partition session?
+ DestroyClientSession(sessionIter, false, Ydb::PersQueue::ErrorCode::OK, "");
+ return false;
+ }
+ client.NextReadId = nextData->first + 1;
+ return true;
+ }
+
+ [[nodiscard]] bool SendData(
+ ui64 partSessionId, TCacheClientContext& proxyClient, ui64 readId, const std::shared_ptr<NKikimrClient::TResponse>& response
+ ) {
+ const auto& ctx = ActorContext();
+ auto message = std::make_shared<StreamDirectReadMessage::FromServer>();
+ auto* directReadMessage = message->mutable_direct_read_response();
+ directReadMessage->set_direct_read_id(readId);
+ directReadMessage->set_partition_session_id(partSessionId);
+
+ auto ok = VaildatePartitionResponse(proxyClient, *response);
+ if (!ok) {
+ return false;
+ }
+
+ FillBatchedData(directReadMessage->mutable_partition_data(), response->GetPartitionResponse().GetCmdReadResult(),
+ partSessionId);
+ message->set_status(Ydb::StatusIds::SUCCESS);
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << "Direct read cache: send data to client. AssignId: "
+ << partSessionId << ", readId: " << readId);
+
+ ctx.Send(proxyClient.ProxyId, new TEvPQProxy::TEvDirectReadSendClientData(std::move(message)));
+ return true;
+ }
+
+ void CloseSession(
+ const TActorId& proxyId,
+ Ydb::PersQueue::ErrorCode::ErrorCode code,
+ const TString& reason
+ ) {
+ const auto& ctx = ActorContext();
+ ctx.Send(proxyId, new TEvPQProxy::TEvDirectReadCloseSession(code, reason));
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, TStringBuilder() << " Direct read cache: close session for proxy " << proxyId.ToString());
+
+ }
+
+ bool DestroyPartitionSession(
+ TSessionsMap::iterator sessionIter, Ydb::PersQueue::ErrorCode::ErrorCode code, const TString& reason
+ ) {
+ if (sessionIter.IsEnd() || !sessionIter->second.Client.Defined()) {
+ return false;
+ }
+
+ const auto& ctx = ActorContext();
+ ctx.Send(
+ sessionIter->second.Client->ProxyId, new TEvPQProxy::TEvDirectReadDestroyPartitionSession(sessionIter->first, code, reason)
+ );
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PQ_READ_PROXY,
+ TStringBuilder() << " Direct read cache: close session for proxy "
+ << sessionIter->second.Client->ProxyId.ToString()
+ );
+ return true;
+ }
+
+ void ChangeCounterValue(const TString& name, i64 value, bool isAbs) {
+ if (!Counters)
+ return;
+ auto counter = Counters->GetCounter(name, false);
+ if (isAbs)
+ counter->Set(value);
+ else if (value >= 0)
+ counter->Add(value);
+ else
+ counter->Sub(-value);
+ }
+
+ bool VaildatePartitionResponse(
+ TCacheClientContext& proxyClient, NKikimrClient::TResponse& response
+ ) {
+ if (response.HasErrorCode() && response.GetErrorCode() != NPersQueue::NErrorCode::OK) {
+ CloseSession(
+ proxyClient.ProxyId,
+ NGRpcProxy::V1::ConvertOldCode(response.GetErrorCode()),
+ "Status is not ok: " + response.GetErrorReason()
+ );
+ return false;
+ }
+
+ if (response.GetStatus() != NKikimr::NMsgBusProxy::MSTATUS_OK) { //this is incorrect answer, die
+ CloseSession(
+ proxyClient.ProxyId,
+ Ydb::PersQueue::ErrorCode::ERROR,
+ "Status is not ok: " + response.GetErrorReason()
+ );
+ return false;
+ }
+ if (!response.HasPartitionResponse()) { //this is incorrect answer, die
+ CloseSession(
+ proxyClient.ProxyId,
+ Ydb::PersQueue::ErrorCode::ERROR,
+ "Direct read cache got empty partition response"
+ );
+ return false;
+ }
+
+ const auto& partResponse = response.GetPartitionResponse();
+ if (!partResponse.HasCmdReadResult()) { //this is incorrect answer, die
+ CloseSession(
+ proxyClient.ProxyId,
+ Ydb::PersQueue::ErrorCode::ERROR,
+ "Malformed response from partition"
+ );
+ return false;
+ }
+ return true;
+ }
+
+ void FillBatchedData(auto* partitionData, const NKikimrClient::TCmdReadResult& res, ui64 assignId) {
+ partitionData->set_partition_session_id(assignId);
+
+ i32 batchCodec = 0; // UNSPECIFIED
+
+ StreamReadMessage::ReadResponse::Batch* currentBatch = nullptr;
+ for (ui32 i = 0; i < res.ResultSize(); ++i) {
+ const auto& r = res.GetResult(i);
+
+ auto proto(GetDeserializedData(r.GetData()));
+ if (proto.GetChunkType() != NKikimrPQClient::TDataChunk::REGULAR) {
+ continue; //TODO - no such chunks must be on prod
+ }
+
+ TString sourceId;
+ if (!r.GetSourceId().empty()) {
+ sourceId = NPQ::NSourceIdEncoding::Decode(r.GetSourceId());
+ }
+
+ i64 currBatchWrittenAt = currentBatch ? ::google::protobuf::util::TimeUtil::TimestampToMilliseconds(currentBatch->written_at()) : 0;
+ if (currentBatch == nullptr || currBatchWrittenAt != static_cast<i64>(r.GetWriteTimestampMS()) ||
+ currentBatch->producer_id() != sourceId ||
+ GetDataChunkCodec(proto) != batchCodec
+ ) {
+ // If write time and source id are the same, the rest fields will be the same too.
+ currentBatch = partitionData->add_batches();
+ i64 write_ts = static_cast<i64>(r.GetWriteTimestampMS());
+ Y_ABORT_UNLESS(write_ts >= 0);
+ *currentBatch->mutable_written_at() = ::google::protobuf::util::TimeUtil::MillisecondsToTimestamp(write_ts);
+ currentBatch->set_producer_id(std::move(sourceId));
+ batchCodec = GetDataChunkCodec(proto);
+ currentBatch->set_codec(batchCodec);
+
+ if (proto.HasMeta()) {
+ const auto& header = proto.GetMeta();
+ if (header.HasServer()) {
+ (*currentBatch->mutable_write_session_meta())["server"] = header.GetServer();
+ }
+ if (header.HasFile()) {
+ (*currentBatch->mutable_write_session_meta())["file"] = header.GetFile();
+ }
+ if (header.HasIdent()) {
+ (*currentBatch->mutable_write_session_meta())["ident"] = header.GetIdent();
+ }
+ if (header.HasLogType()) {
+ (*currentBatch->mutable_write_session_meta())["logtype"] = header.GetLogType();
+ }
+ }
+ if (proto.HasExtraFields()) {
+ const auto& map = proto.GetExtraFields();
+ for (const auto& kv : map.GetItems()) {
+ (*currentBatch->mutable_write_session_meta())[kv.GetKey()] = kv.GetValue();
+ }
+ }
+
+ if (proto.HasIp() && IsUtf(proto.GetIp())) {
+ (*currentBatch->mutable_write_session_meta())["_ip"] = proto.GetIp();
+ }
+ }
+
+ auto* message = currentBatch->add_message_data();
+
+ message->set_seq_no(r.GetSeqNo());
+ message->set_offset(r.GetOffset());
+ message->set_data(proto.GetData());
+ message->set_uncompressed_size(r.GetUncompressedSize());
+
+ *message->mutable_created_at() =
+ ::google::protobuf::util::TimeUtil::MillisecondsToTimestamp(r.GetCreateTimestampMS());
+
+ message->set_message_group_id(currentBatch->producer_id());
+ auto* msgMeta = message->mutable_metadata_items();
+ *msgMeta = (proto.GetMessageMeta());
+ }
+ }
+private:
+ TSessionsMap ServerSessions;
+ THashMap<TActorId, TSet<ui64>> AssignByProxy;
+
+ ::NMonitoring::TDynamicCounterPtr Counters;
+};
+
+
+IActor* CreatePQDReadCacheService(const ::NMonitoring::TDynamicCounterPtr& counters) {
+ Y_VERIFY_DEBUG(counters);
+ return new TPQDirectReadCacheService(
+ GetServiceCounters(counters, "persqueue")->GetSubgroup("subsystem", "caching_service"));
+}
+
+} // namespace NKikimr::NPQ
diff --git a/ydb/core/persqueue/dread_cache_service/caching_service.h b/ydb/core/persqueue/dread_cache_service/caching_service.h
new file mode 100644
index 0000000000..8ae61876c5
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/caching_service.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <ydb/library/actors/core/actor.h>
+#include <ydb/core/persqueue/events/internal.h>
+
+namespace NKikimr::NPQ {
+
+inline NActors::TActorId MakePQDReadCacheServiceActorId() {
+ return NActors::TActorId(0, "PQCacheProxy");
+}
+
+IActor* CreatePQDReadCacheService(const ::NMonitoring::TDynamicCounterPtr& counters);
+
+} // namespace
diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt
new file mode 100644
index 0000000000..a8a2d23b42
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-arm64.txt
@@ -0,0 +1,79 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-core-persqueue-dread_cache_service-ut)
+target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue
+)
+target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-testing-unittest_main
+ ydb-core-persqueue
+ persqueue-ut-common
+ core-testlib-default
+ ydb_persqueue_core-ut-ut_utils
+)
+target_link_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 10
+)
+add_yunittest(
+ NAME
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ TIMEOUT
+ 60
+)
+target_allocator(ydb-core-persqueue-dread_cache_service-ut
+ system_allocator
+)
+vcs_info(ydb-core-persqueue-dread_cache_service-ut)
diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..4809c9a471
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,80 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-core-persqueue-dread_cache_service-ut)
+target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue
+)
+target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ ydb-core-persqueue
+ persqueue-ut-common
+ core-testlib-default
+ ydb_persqueue_core-ut-ut_utils
+)
+target_link_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 10
+)
+add_yunittest(
+ NAME
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ TIMEOUT
+ 60
+)
+target_allocator(ydb-core-persqueue-dread_cache_service-ut
+ system_allocator
+)
+vcs_info(ydb-core-persqueue-dread_cache_service-ut)
diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..6fba9d2680
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,83 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-core-persqueue-dread_cache_service-ut)
+target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue
+)
+target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-testing-unittest_main
+ ydb-core-persqueue
+ persqueue-ut-common
+ core-testlib-default
+ ydb_persqueue_core-ut-ut_utils
+)
+target_link_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 10
+)
+add_yunittest(
+ NAME
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ TIMEOUT
+ 60
+)
+target_allocator(ydb-core-persqueue-dread_cache_service-ut
+ cpp-malloc-jemalloc
+)
+vcs_info(ydb-core-persqueue-dread_cache_service-ut)
diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..a8c5e30700
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,85 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-core-persqueue-dread_cache_service-ut)
+target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue
+)
+target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ ydb-core-persqueue
+ persqueue-ut-common
+ core-testlib-default
+ ydb_persqueue_core-ut-ut_utils
+)
+target_link_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 10
+)
+add_yunittest(
+ NAME
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ TIMEOUT
+ 60
+)
+target_allocator(ydb-core-persqueue-dread_cache_service-ut
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+vcs_info(ydb-core-persqueue-dread_cache_service-ut)
diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt
new file mode 100644
index 0000000000..d863ebd180
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.txt
@@ -0,0 +1,19 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
+ include(CMakeLists.darwin-arm64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+endif()
diff --git a/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..1c348b2635
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/ut/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,73 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-core-persqueue-dread_cache_service-ut)
+target_compile_options(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_include_directories(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue
+)
+target_link_libraries(ydb-core-persqueue-dread_cache_service-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ ydb-core-persqueue
+ persqueue-ut-common
+ core-testlib-default
+ ydb_persqueue_core-ut-ut_utils
+)
+target_sources(ydb-core-persqueue-dread_cache_service-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp
+)
+set_property(
+ TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ SPLIT_FACTOR
+ 10
+)
+add_yunittest(
+ NAME
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_TARGET
+ ydb-core-persqueue-dread_cache_service-ut
+ TEST_ARG
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ LABELS
+ MEDIUM
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ PROCESSORS
+ 1
+)
+set_yunittest_property(
+ TEST
+ ydb-core-persqueue-dread_cache_service-ut
+ PROPERTY
+ TIMEOUT
+ 60
+)
+target_allocator(ydb-core-persqueue-dread_cache_service-ut
+ system_allocator
+)
+vcs_info(ydb-core-persqueue-dread_cache_service-ut)
diff --git a/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp b/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp
new file mode 100644
index 0000000000..0280e72592
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/ut/caching_proxy_ut.cpp
@@ -0,0 +1,237 @@
+#include <ydb/core/persqueue/dread_cache_service/caching_service.h>
+#include <ydb/core/persqueue/ut/common/pq_ut_common.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace NKikimr::NPQ {
+
+Y_UNIT_TEST_SUITE(TPQCachingProxyTest) {
+struct TTestSetup {
+ TTestContext Context;
+ TActorId ProxyId;
+ TTestSetup() {
+ Context.Prepare();
+ Context.Runtime->SetLogPriority(NKikimrServices::PQ_READ_PROXY, NLog::PRI_DEBUG);
+ ProxyId = Context.Runtime->Register(CreatePQDReadCacheService(new NMonitoring::TDynamicCounters()));
+ Context.Runtime->AllocateEdgeActor();
+ TDispatchOptions opts;
+ opts.FinalEvents.emplace_back(TEvents::TEvBootstrap::EventType, 1);
+ Context.Runtime->DispatchEvents(opts);
+ }
+ auto* GetRuntime() {
+ return Context.Runtime.Get();
+ }
+ THolder<TEvPQ::TEvGetFullDirectReadData> SendRequest(TEvPQ::TEvGetFullDirectReadData* request, bool status = true) {
+ GetRuntime()->Send(ProxyId, Context.Edge, request);
+ auto resp = GetRuntime()->GrabEdgeEvent<TEvPQ::TEvGetFullDirectReadData>();
+ UNIT_ASSERT(resp);
+ UNIT_ASSERT(resp->Error != status);
+ return resp;
+ }
+};
+
+Y_UNIT_TEST(TestPublishAndForget) {
+ TTestSetup setup;
+ auto runtime = setup.GetRuntime();
+ auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT(resp->Data.empty());
+
+ {
+ auto* reg = new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1);
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1));
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 1);
+ UNIT_ASSERT(resp->Data[0].second.Reads.empty());
+ {
+ auto* reg = new TEvPQ::TEvStageDirectReadData(
+ {"session1", 1, 1}, 1, std::make_shared<NKikimrClient::TResponse>()
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ {
+ auto* reg = new TEvPQ::TEvPublishDirectRead(
+ {"session1", 1, 1},
+ 1
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1));
+ UNIT_ASSERT(!resp->Error);
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data[0].second.Reads.size(), 1);
+ {
+ auto* reg = new TEvPQ::TEvForgetDirectRead(
+ {"session1", 1, 1}, 1
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1));
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data[0].second.Reads.size(), 0);
+}
+
+Y_UNIT_TEST(TestDeregister) {
+ TTestSetup setup;
+ auto runtime = setup.GetRuntime();
+ {
+ auto* reg = new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1);
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ {
+ auto* reg = new TEvPQ::TEvRegisterDirectReadSession({"session2", 1}, 1);
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData(
+ {"session1", 1}, 1)
+ );
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 1);
+ UNIT_ASSERT(resp->Data[0].second.Reads.empty());
+ resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 2);
+ {
+ auto* reg = new TEvPQ::TEvDeregisterDirectReadSession({"session1", 1}, 1);
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 1);
+}
+
+Y_UNIT_TEST(TestWrongSessionOrGeneration) {
+ TTestSetup setup;
+ auto runtime = setup.GetRuntime();
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 2)
+ );
+ {
+ auto* reg = new TEvPQ::TEvStageDirectReadData(
+ {"session1", 1, 1}, 2, std::make_shared<NKikimrClient::TResponse>()
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvPublishDirectRead({"session1", 1, 1}, 2)
+ );
+
+ // Session with old id, shold not have any effect
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1)
+ );
+ {
+ auto* reg = new TEvPQ::TEvStageDirectReadData(
+ {"session1", 1, 1}, 1, std::make_shared<NKikimrClient::TResponse>()
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvPublishDirectRead({"session1", 1, 1}, 1)
+ );
+
+ auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1));
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 0);
+
+ // Forget with old generation, should have no effect
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvForgetDirectRead({"session1", 1, 1}, 1)
+ );
+
+ resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 2));
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data[0].second.Reads.size(), 1);
+
+ resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session-2", 1}, 2), false);
+ resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 99}, 2), false);
+}
+
+Y_UNIT_TEST(OutdatedSession) {
+ TTestSetup setup;
+ auto runtime = setup.GetRuntime();
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1)
+ );
+ {
+ auto* reg = new TEvPQ::TEvStageDirectReadData(
+ {"session1", 1, 1}, 1, std::make_shared<NKikimrClient::TResponse>()
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvPublishDirectRead({"session1", 1, 1}, 1)
+ );
+
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 2)
+ );
+
+ auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData({"session1", 1}, 1));
+ UNIT_ASSERT(resp->Data.empty());
+}
+
+
+Y_UNIT_TEST(MultipleSessions) {
+ TTestSetup setup;
+ auto runtime = setup.GetRuntime();
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvRegisterDirectReadSession({"session1", 1}, 1)
+ );
+ {
+ auto* reg = new TEvPQ::TEvStageDirectReadData(
+ {"session1", 1, 1}, 1, std::make_shared<NKikimrClient::TResponse>()
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ {
+ auto* reg = new TEvPQ::TEvStageDirectReadData(
+ {"session1", 1, 2}, 1, std::make_shared<NKikimrClient::TResponse>()
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvPublishDirectRead({"session1", 1, 1}, 1)
+ );
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvPublishDirectRead({"session1", 1, 2}, 1)
+ );
+
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvRegisterDirectReadSession({"session2", 1}, 2)
+ );
+ {
+ auto* reg = new TEvPQ::TEvStageDirectReadData(
+ {"session2", 1, 3}, 2, std::make_shared<NKikimrClient::TResponse>()
+ );
+ runtime->Send(setup.ProxyId, TActorId{}, reg);
+ }
+ runtime->Send(
+ setup.ProxyId, TActorId{},
+ new TEvPQ::TEvPublishDirectRead({"session2", 1, 3}, 2)
+ );
+
+ auto resp = setup.SendRequest(new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(resp->Data.size(), 2);
+ for (const auto& [key, data] : resp->Data) {
+ if (key.SessionId == "session1") {
+ UNIT_ASSERT_VALUES_EQUAL(data.Generation, 1);
+ UNIT_ASSERT_VALUES_EQUAL(data.Reads.size(), 2);
+ auto iter = data.Reads.begin();
+ UNIT_ASSERT_VALUES_EQUAL(iter->first, 1);
+ UNIT_ASSERT_VALUES_EQUAL((++iter)->first, 2);
+ }else if (key.SessionId == "session2") {
+ UNIT_ASSERT_VALUES_EQUAL(data.Generation, 2);
+ UNIT_ASSERT_VALUES_EQUAL(data.Reads.size(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(data.Reads.begin()->first, 3);
+ }
+ }
+}
+} // Test suite
+
+} //namespace NKikimr::NPQ
diff --git a/ydb/core/persqueue/dread_cache_service/ut/ya.make b/ydb/core/persqueue/dread_cache_service/ut/ya.make
new file mode 100644
index 0000000000..58f1a6f991
--- /dev/null
+++ b/ydb/core/persqueue/dread_cache_service/ut/ya.make
@@ -0,0 +1,29 @@
+UNITTEST_FOR(ydb/core/persqueue)
+
+FORK_SUBTESTS()
+
+IF (SANITIZER_TYPE == "thread" OR WITH_VALGRIND)
+ SIZE(LARGE)
+ TAG(ya:fat)
+ TIMEOUT(300)
+ELSE()
+ SIZE(MEDIUM)
+ TIMEOUT(60)
+ENDIF()
+
+PEERDIR(
+ ydb/core/persqueue/ut/common
+ ydb/core/testlib/default
+ ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/ut_utils
+)
+
+YQL_LAST_ABI_VERSION()
+
+SRCS(
+ caching_proxy_ut.cpp
+)
+
+# RESOURCE(
+# )
+
+END()
diff --git a/ydb/core/persqueue/events/internal.h b/ydb/core/persqueue/events/internal.h
index bfcce7a817..428f3e9b36 100644
--- a/ydb/core/persqueue/events/internal.h
+++ b/ydb/core/persqueue/events/internal.h
@@ -11,7 +11,8 @@
#include <ydb/library/actors/core/event_local.h>
#include <ydb/library/actors/core/actorid.h>
-
+#include <ydb/core/grpc_services/rpc_calls.h>
+#include <ydb/public/api/protos/persqueue_error_codes_v1.pb.h>
#include <util/generic/maybe.h>
namespace NYdb {
@@ -22,6 +23,24 @@ namespace NKikimr {
namespace NPQ {
+ struct TCacheClientContext {
+ TActorId ProxyId;
+ ui64 NextReadId = 1;
+ };
+
+ struct TCacheServiceData {
+ //ui32 TabletId;
+ ui32 Generation = 0;
+ TMap<ui64, std::shared_ptr<NKikimrClient::TResponse>> StagedReads;
+ TMap<ui64, std::shared_ptr<NKikimrClient::TResponse>> Reads;
+ TMaybe<TCacheClientContext> Client;
+ TCacheServiceData() = delete;
+
+ TCacheServiceData(ui32 generation)
+ : Generation(generation)
+ {}
+ };
+
struct TRequestedBlob {
ui64 Offset;
ui16 PartNo;
@@ -68,7 +87,7 @@ namespace NPQ {
inline bool HasError(const T& event) {
return event.Error.HasError();
}
-}
+} // namespace NPQ;
struct TEvPQ {
enum EEv {
@@ -140,6 +159,16 @@ struct TEvPQ {
EvFetchResponse,
EvSourceIdRequest,
EvSourceIdResponse,
+ EvPublishRead,
+ EvForgetRead,
+ EvRegisterDirectReadSession,
+ EvRegisterDirectReadSessionResponse,
+ EvDeregisterDirectReadSession,
+ EvStageDirectReadData,
+ EvCacheProxyPublishRead,
+ EvCacheProxyForgetRead,
+ EvGetFullDirectReadData,
+ EvProvideDirectReadInfo,
EvEnd
};
@@ -196,22 +225,24 @@ struct TEvPQ {
};
struct TEvRead : public TEventLocal<TEvRead, EvRead> {
- TEvRead(const ui64 cookie, const ui64 offset, const ui16 partNo, const ui32 count,
+ TEvRead(const ui64 cookie, const ui64 offset, ui64 lastOffset, const ui16 partNo, const ui32 count,
const TString& sessionId, const TString& clientId, const ui32 timeout, const ui32 size,
const ui32 maxTimeLagMs, const ui64 readTimestampMs, const TString& clientDC,
- bool externalOperation)
- : Cookie(cookie)
- , Offset(offset)
- , PartNo(partNo)
- , Count(count)
- , SessionId(sessionId)
- , ClientId(clientId)
- , Timeout(timeout)
- , Size(size)
- , MaxTimeLagMs(maxTimeLagMs)
- , ReadTimestampMs(readTimestampMs)
- , ClientDC(clientDC)
- , ExternalOperation(externalOperation)
+ bool externalOperation, const TActorId& pipeClient)
+ : Cookie(cookie)
+ , Offset(offset)
+ , PartNo(partNo)
+ , Count(count)
+ , SessionId(sessionId)
+ , ClientId(clientId)
+ , Timeout(timeout)
+ , Size(size)
+ , MaxTimeLagMs(maxTimeLagMs)
+ , ReadTimestampMs(readTimestampMs)
+ , ClientDC(clientDC)
+ , ExternalOperation(externalOperation)
+ , PipeClient(pipeClient)
+ , LastOffset(lastOffset)
{}
ui64 Cookie;
@@ -226,6 +257,19 @@ struct TEvPQ {
ui64 ReadTimestampMs;
TString ClientDC;
bool ExternalOperation;
+ TActorId PipeClient;
+ ui64 LastOffset;
+ };
+
+ struct TEvDirectReadBase {
+ TEvDirectReadBase(ui64 cookie, const NPQ::TDirectReadKey& readKey, const TActorId& pipeClient)
+ : Cookie(cookie)
+ , ReadKey(readKey)
+ , PipeClient(pipeClient)
+ {}
+ ui64 Cookie;
+ NPQ::TDirectReadKey ReadKey;
+ TActorId PipeClient;
};
struct TEvMonRequest : public TEventLocal<TEvMonRequest, EvMonRequest> {
@@ -270,18 +314,20 @@ struct TEvPQ {
ESCI_DROP_READ_RULE
};
- TEvSetClientInfo(const ui64 cookie, const TString& clientId, const ui64 offset, const TString& sessionId,
- const ui32 generation, const ui32 step, ESetClientInfoType type = ESCI_OFFSET,
- ui64 readRuleGeneration = 0, bool strict = false)
+ TEvSetClientInfo(const ui64 cookie, const TString& clientId, const ui64 offset, const TString& sessionId, const ui64 partitionSessionId,
+ const ui32 generation, const ui32 step, const TActorId& pipeClient,
+ ESetClientInfoType type = ESCI_OFFSET, ui64 readRuleGeneration = 0, bool strict = false)
: Cookie(cookie)
, ClientId(clientId)
, Offset(offset)
, SessionId(sessionId)
+ , PartitionSessionId(partitionSessionId)
, Generation(generation)
, Step(step)
, Type(type)
, ReadRuleGeneration(readRuleGeneration)
, Strict(strict)
+ , PipeClient(pipeClient)
{
}
@@ -289,13 +335,16 @@ struct TEvPQ {
TString ClientId;
ui64 Offset;
TString SessionId;
+ ui64 PartitionSessionId;
ui32 Generation;
ui32 Step;
ESetClientInfoType Type;
ui64 ReadRuleGeneration;
bool Strict;
+ TActorId PipeClient;
};
+
struct TEvGetClientOffset : public TEventLocal<TEvGetClientOffset, EvGetClientOffset> {
TEvGetClientOffset(const ui64 cookie, const TString& clientId)
: Cookie(cookie)
@@ -365,10 +414,11 @@ struct TEvPQ {
struct TEvProxyResponse : public TEventLocal<TEvProxyResponse, EvProxyResponse> {
TEvProxyResponse(ui64 cookie)
- : Cookie(cookie)
+ : Cookie(cookie)
+ , Response(std::make_shared<NKikimrClient::TResponse>())
{}
ui64 Cookie;
- NKikimrClient::TResponse Response;
+ std::shared_ptr<NKikimrClient::TResponse> Response;
};
struct TEvInitComplete : public TEventLocal<TEvInitComplete, EvInitComplete> {
@@ -458,8 +508,8 @@ struct TEvPQ {
struct TEvPipeDisconnected : public TEventLocal<TEvPipeDisconnected, EvPipeDisconnected> {
explicit TEvPipeDisconnected(const TString& owner, const TActorId& pipeClient)
- : Owner(owner)
- , PipeClient(pipeClient)
+ : Owner(owner)
+ , PipeClient(pipeClient)
{}
TString Owner;
@@ -874,6 +924,71 @@ struct TEvPQ {
struct TEvSourceIdResponse : public TEventPB<TEvSourceIdResponse, NKikimrPQ::TEvSourceIdResponse, EvSourceIdResponse> {
};
+
+ struct TEvRegisterDirectReadSession : public TEventLocal<TEvRegisterDirectReadSession, EvRegisterDirectReadSession> {
+ TEvRegisterDirectReadSession(const NPQ::TReadSessionKey& sessionKey, ui32 tabletGeneration)
+ : Session(sessionKey)
+ , Generation(tabletGeneration)
+ {}
+ NPQ::TReadSessionKey Session;
+ ui32 Generation;
+ };
+
+ struct TEvDeregisterDirectReadSession : public TEventLocal<TEvDeregisterDirectReadSession, EvDeregisterDirectReadSession> {
+ TEvDeregisterDirectReadSession(const NPQ::TReadSessionKey& sessionKey, ui32 tabletGeneration)
+ : Session(sessionKey)
+ , Generation(tabletGeneration)
+ {}
+ NPQ::TReadSessionKey Session;
+ ui32 Generation;
+ };
+
+ struct TEvStageDirectReadData : public TEventLocal<TEvStageDirectReadData, EvStageDirectReadData> {
+ TEvStageDirectReadData(const NPQ::TDirectReadKey& readKey, ui32 tabletGeneration,
+ const std::shared_ptr<NKikimrClient::TResponse>& response)
+ : TabletGeneration(tabletGeneration)
+ , ReadKey(readKey)
+ , Response(response)
+ {}
+ ui32 TabletGeneration;
+ NPQ::TDirectReadKey ReadKey;
+ std::shared_ptr<NKikimrClient::TResponse> Response;
+ };
+
+ struct TEvPublishDirectRead : public TEventLocal<TEvPublishDirectRead, EvCacheProxyPublishRead> {
+ TEvPublishDirectRead(const NPQ::TDirectReadKey& readKey, ui32 tabletGeneration)
+ : ReadKey(readKey)
+ , TabletGeneration(tabletGeneration)
+ {}
+ NPQ::TDirectReadKey ReadKey;
+ ui32 TabletGeneration;
+ };
+
+ struct TEvForgetDirectRead : public TEventLocal<TEvForgetDirectRead, EvCacheProxyForgetRead> {
+ TEvForgetDirectRead(const NPQ::TDirectReadKey& readKey, ui32 tabletGeneration)
+ : TabletGeneration(tabletGeneration)
+ , ReadKey(readKey)
+ {}
+ ui32 TabletGeneration;
+ NPQ::TDirectReadKey ReadKey;
+ };
+
+ struct TEvGetFullDirectReadData : public TEventLocal<TEvGetFullDirectReadData, EvGetFullDirectReadData> {
+ TEvGetFullDirectReadData() = default;
+ TEvGetFullDirectReadData(const NPQ::TReadSessionKey& key, ui32 generation)
+ : ReadKey(key)
+ , Generation(generation)
+ {}
+
+ NPQ::TReadSessionKey ReadKey;
+ ui32 Generation;
+ bool Error = false;
+ TVector<std::pair<NPQ::TReadSessionKey, NPQ::TCacheServiceData>> Data;
+ };
+
+ struct TEvProvideDirectReadInfo : public TEventLocal<TEvProvideDirectReadInfo, EvProvideDirectReadInfo> {
+ };
+
};
} //NKikimr
diff --git a/ydb/core/persqueue/key.h b/ydb/core/persqueue/key.h
index f1aa0fd506..4be6be821a 100644
--- a/ydb/core/persqueue/key.h
+++ b/ydb/core/persqueue/key.h
@@ -3,6 +3,7 @@
#include <util/generic/buffer.h>
#include <util/string/cast.h>
#include <util/string/printf.h>
+#include <util/str_stl.h>
namespace NKikimr {
namespace NPQ {
@@ -252,5 +253,34 @@ TString GetTxKey(ui64 txId)
return Sprintf("tx_%" PRIu64, txId);
}
+
+struct TReadSessionKey {
+ TString SessionId;
+ ui64 PartitionSessionId = 0;
+ bool operator ==(const TReadSessionKey& rhs) const {
+ return SessionId == rhs.SessionId && PartitionSessionId == rhs.PartitionSessionId;
+ }
+};
+
+struct TDirectReadKey {
+ TString SessionId;
+ ui64 PartitionSessionId = 0;
+ ui64 ReadId = 0;
+ bool operator ==(const TDirectReadKey& rhs) const {
+ return SessionId == rhs.SessionId && PartitionSessionId == rhs.PartitionSessionId && ReadId == rhs.ReadId;
+ }
+};
+
}// NPQ
}// NKikimr
+
+template <>
+struct THash<NKikimr::NPQ::TReadSessionKey> {
+public:
+ inline size_t operator()(const NKikimr::NPQ::TReadSessionKey& key) const {
+ size_t res = 0;
+ res += THash<TString>()(key.SessionId);
+ res += THash<ui64>()(key.PartitionSessionId);
+ return res;
+ }
+};
diff --git a/ydb/core/persqueue/partition.cpp b/ydb/core/persqueue/partition.cpp
index 1fc027d1b0..7d814dd3f2 100644
--- a/ydb/core/persqueue/partition.cpp
+++ b/ydb/core/persqueue/partition.cpp
@@ -137,13 +137,14 @@ void AddCheckDiskRequest(TEvKeyValue::TEvRequest *request, ui32 numChannels) {
}
}
-TPartition::TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, const TActorId& blobCache,
+TPartition::TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, ui32 tabletGeneration, const TActorId& blobCache,
const NPersQueue::TTopicConverterPtr& topicConverter, TString dcId, bool isServerless,
const NKikimrPQ::TPQTabletConfig& tabletConfig, const TTabletCountersBase& counters, bool subDomainOutOfSpace, ui32 numChannels,
bool newPartition,
TVector<TTransaction> distrTxs)
: Initializer(this)
, TabletID(tabletId)
+ , TabletGeneration(tabletGeneration)
, Partition(partition)
, TabletConfig(tabletConfig)
, Counters(counters)
@@ -468,7 +469,6 @@ void TPartition::Handle(TEvents::TEvPoisonPill::TPtr&, const TActorContext& ctx)
Die(ctx);
}
-
bool CheckDiskStatus(const TStorageStatusFlags status) {
return !status.Check(NKikimrBlobStorage::StatusDiskSpaceYellowStop);
}
@@ -581,6 +581,7 @@ void TPartition::Handle(TEvPQ::TEvPipeDisconnected::TPtr& ev, const TActorContex
DropOwner(it, ctx);
ProcessChangeOwnerRequests(ctx);
}
+
}
void TPartition::Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) {
@@ -929,7 +930,7 @@ void TPartition::ProcessMaxSeqNoRequest(const TActorContext& ctx) {
auto& ev = MaxSeqNoRequests.front();
auto response = MakeHolder<TEvPQ::TEvProxyResponse>(ev->Get()->Cookie);
- NKikimrClient::TResponse& resp = response->Response;
+ NKikimrClient::TResponse& resp = *response->Response;
resp.SetStatus(NMsgBusProxy::MSTATUS_OK);
resp.SetErrorCode(NPersQueue::NErrorCode::OK);
@@ -969,15 +970,18 @@ void TPartition::Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& c
auto it = ReadInfo.find(cookie);
Y_ABORT_UNLESS(it != ReadInfo.end());
+
TReadInfo info = std::move(it->second);
ReadInfo.erase(it);
//make readinfo class
+ auto& userInfo = UsersInfoStorage->GetOrCreate(info.User, ctx);
TReadAnswer answer(info.FormAnswer(
- ctx, *ev->Get(), EndOffset, Partition, &UsersInfoStorage->GetOrCreate(info.User, ctx),
+ ctx, *ev->Get(), EndOffset, Partition, &userInfo,
info.Destination, GetSizeLag(info.Offset), Tablet, Config.GetMeteringMode()
));
-
+ const auto& resp = dynamic_cast<TEvPQ::TEvProxyResponse*>(answer.Event.Get())->Response;
+
if (HasError(*ev->Get())) {
if (info.IsSubscription) {
TabletCounters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_ERROR].Increment(1);
@@ -988,10 +992,9 @@ void TPartition::Handle(TEvPQ::TEvBlobResponse::TPtr& ev, const TActorContext& c
if (info.IsSubscription) {
TabletCounters.Cumulative()[COUNTER_PQ_READ_SUBSCRIPTION_OK].Increment(1);
}
- const auto& resp = dynamic_cast<TEvPQ::TEvProxyResponse*>(answer.Event.Get())->Response;
TabletCounters.Cumulative()[COUNTER_PQ_READ_OK].Increment(1);
TabletCounters.Percentile()[COUNTER_LATENCY_PQ_READ_OK].IncrementFor((ctx.Now() - info.Timestamp).MilliSeconds());
- TabletCounters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp.ByteSize());
+ TabletCounters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp->ByteSize());
}
ctx.Send(info.Destination != 0 ? Tablet : ctx.SelfID, answer.Event.Release());
OnReadRequestFinished(cookie, answer.Size, info.User, ctx);
@@ -1693,7 +1696,7 @@ void TPartition::BeginChangePartitionConfig(const NKikimrPQ::TPQTabletConfig& co
ui64 rrGen = i < config.ReadRuleGenerationsSize() ? config.GetReadRuleGenerations(i) : 0;
if (userInfo.ReadRuleGeneration != rrGen) {
- TEvPQ::TEvSetClientInfo act(0, consumer, 0, "", 0, 0,
+ TEvPQ::TEvSetClientInfo act(0, consumer, 0, "", 0, 0, 0, TActorId{},
TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE, rrGen);
ProcessUserAct(act, ctx);
@@ -1703,8 +1706,8 @@ void TPartition::BeginChangePartitionConfig(const NKikimrPQ::TPQTabletConfig& co
for (auto& consumer : hasReadRule) {
GetOrCreatePendingUser(consumer);
- TEvPQ::TEvSetClientInfo act(0, consumer,
- 0, "", 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0);
+ TEvPQ::TEvSetClientInfo act(0, consumer, 0, "", 0, 0, 0, TActorId{},
+ TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0);
ProcessUserAct(act, ctx);
}
@@ -1991,10 +1994,16 @@ void TPartition::ProcessUserAct(TEvPQ::TEvSetClientInfo& act,
return;
}
- if (act.Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION && act.SessionId == userInfo.Session) { //this is retry of current request, answer ok
- auto *ui = UsersInfoStorage->GetIfExists(userInfo.User);
+ if ( //this is retry of current request, answer ok
+ act.Type == TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION
+ && act.SessionId == userInfo.Session
+ && act.Generation == userInfo.Generation
+ && act.Step == userInfo.Step
+ ) {
+ auto* ui = UsersInfoStorage->GetIfExists(userInfo.User);
auto ts = ui ? GetTime(*ui, userInfo.Offset) : std::make_pair<TInstant, TInstant>(TInstant::Zero(), TInstant::Zero());
+ userInfo.PipeClient = act.PipeClient;
ScheduleReplyGetClientOffsetOk(act.Cookie,
userInfo.Offset,
ts.first, ts.second);
@@ -2113,6 +2122,7 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act,
userInfo.ReadRuleGeneration = readRuleGeneration;
userInfo.Session = "";
+ userInfo.PartitionSessionId = 0;
userInfo.Generation = userInfo.Step = 0;
userInfo.Offset = 0;
@@ -2136,10 +2146,14 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act,
userInfo.Session = session;
userInfo.Generation = generation;
userInfo.Step = step;
- } else if (dropSession || strictCommitOffset) {
+ userInfo.PipeClient = act.PipeClient;
+ userInfo.PartitionSessionId = act.PartitionSessionId;
+ } else if ((dropSession && act.PipeClient == userInfo.PipeClient) || strictCommitOffset) {
userInfo.Session = "";
+ userInfo.PartitionSessionId = 0;
userInfo.Generation = 0;
userInfo.Step = 0;
+ userInfo.PipeClient = {};
}
Y_ABORT_UNLESS(offset <= (ui64)Max<i64>(), "Unexpected Offset: %" PRIu64, offset);
@@ -2293,7 +2307,8 @@ void TPartition::AddCmdWriteUserInfos(NKikimrClient::TKeyValueRequest& request)
auto *ui = UsersInfoStorage->GetIfExists(user);
AddCmdWrite(request,
ikey, ikeyDeprecated,
- userInfo->Offset, userInfo->Generation, userInfo->Step, userInfo->Session,
+ userInfo->Offset, userInfo->Generation, userInfo->Step,
+ userInfo->Session,
ui ? ui->ReadOffsetRewindSum : 0,
userInfo->ReadRuleGeneration);
} else {
@@ -2327,11 +2342,13 @@ TUserInfoBase& TPartition::GetOrCreatePendingUser(const TString& user,
auto i = PendingUsersInfo.find(user);
if (i == PendingUsersInfo.end()) {
auto ui = UsersInfoStorage->GetIfExists(user);
- auto [p, _] = PendingUsersInfo.emplace(user, UsersInfoStorage->CreateUserInfo(user,
- readRuleGeneration));
+ auto [p, _] = PendingUsersInfo.emplace(user, UsersInfoStorage->CreateUserInfo(user, readRuleGeneration));
if (ui) {
p->second.Session = ui->Session;
+ p->second.PartitionSessionId = ui->PartitionSessionId;
+ p->second.PipeClient = ui->PipeClient;
+
p->second.Generation = ui->Generation;
p->second.Step = ui->Step;
p->second.Offset = ui->Offset;
@@ -2362,7 +2379,7 @@ TUserInfoBase* TPartition::GetPendingUserIfExists(const TString& user)
THolder<TEvPQ::TEvProxyResponse> TPartition::MakeReplyOk(const ui64 dst)
{
auto response = MakeHolder<TEvPQ::TEvProxyResponse>(dst);
- NKikimrClient::TResponse& resp = response->Response;
+ NKikimrClient::TResponse& resp = *response->Response;
resp.SetStatus(NMsgBusProxy::MSTATUS_OK);
resp.SetErrorCode(NPersQueue::NErrorCode::OK);
@@ -2375,7 +2392,7 @@ THolder<TEvPQ::TEvProxyResponse> TPartition::MakeReplyGetClientOffsetOk(const ui
const TInstant writeTimestamp, const TInstant createTimestamp)
{
auto response = MakeHolder<TEvPQ::TEvProxyResponse>(dst);
- NKikimrClient::TResponse& resp = response->Response;
+ NKikimrClient::TResponse& resp = *response->Response;
resp.SetStatus(NMsgBusProxy::MSTATUS_OK);
resp.SetErrorCode(NPersQueue::NErrorCode::OK);
diff --git a/ydb/core/persqueue/partition.h b/ydb/core/persqueue/partition.h
index ff7dda0150..914e330e7c 100644
--- a/ydb/core/persqueue/partition.h
+++ b/ydb/core/persqueue/partition.h
@@ -225,8 +225,11 @@ private:
THashMap<TString, TOwnerInfo>::iterator DropOwner(THashMap<TString, TOwnerInfo>::iterator& it,
const TActorContext& ctx);
// will return rcount and rsize also
- TVector<TRequestedBlob> GetReadRequestFromBody(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, ui32* rcount, ui32* rsize);
- TVector<TClientBlob> GetReadRequestFromHead(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount, ui32* rsize, ui64* insideHeadOffset);
+ TVector<TRequestedBlob> GetReadRequestFromBody(const ui64 startOffset, const ui16 partNo, const ui32 maxCount,
+ const ui32 maxSize, ui32* rcount, ui32* rsize, ui64 lastOffset);
+ TVector<TClientBlob> GetReadRequestFromHead(const ui64 startOffset, const ui16 partNo, const ui32 maxCount,
+ const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount,
+ ui32* rsize, ui64* insideHeadOffset, ui64 lastOffset);
ui64 GetUsedStorage(const TActorContext& ctx);
@@ -335,16 +338,21 @@ private:
void ChangePlanStepAndTxId(ui64 step, ui64 txId);
void ResendPendingEvents(const TActorContext& ctx);
+ void SendReadPreparedProxyResponse(const TReadAnswer& answer, const TReadInfo& readInfo, TUserInfo& user);
+
+ void CheckIfSessionExists(TUserInfoBase& userInfo, const TActorId& newPipe);
+ // void DestroyReadSession(const TReadSessionKey& key);
void Handle(TEvPQ::TEvSourceIdRequest::TPtr& ev, const TActorContext& ctx);
TString LogPrefix() const;
+
public:
static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
return NKikimrServices::TActivity::PERSQUEUE_PARTITION_ACTOR;
}
- TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, const TActorId& blobCache,
+ TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, ui32 tabletGeneration, const TActorId& blobCache,
const NPersQueue::TTopicConverterPtr& topicConverter, TString dcId, bool isServerless,
const NKikimrPQ::TPQTabletConfig& config, const TTabletCountersBase& counters, bool SubDomainOutOfSpace, ui32 numChannels,
bool newPartition = false,
@@ -566,6 +574,7 @@ private:
private:
ui64 TabletID;
+ ui32 TabletGeneration;
ui32 Partition;
NKikimrPQ::TPQTabletConfig Config;
NKikimrPQ::TPQTabletConfig TabletConfig;
@@ -748,5 +757,5 @@ private:
TDeque<std::unique_ptr<IEventBase>> PendingEvents;
};
-
} // namespace NKikimr::NPQ
+
diff --git a/ydb/core/persqueue/partition_read.cpp b/ydb/core/persqueue/partition_read.cpp
index 136fd9b3ad..12d7877153 100644
--- a/ydb/core/persqueue/partition_read.cpp
+++ b/ydb/core/persqueue/partition_read.cpp
@@ -3,6 +3,7 @@
#include "partition_util.h"
#include "partition.h"
#include "read.h"
+#include "dread_cache_service/caching_service.h"
#include <ydb/core/base/appdata.h>
#include <ydb/core/base/blobstorage.h>
@@ -42,8 +43,9 @@ void TPartition::FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config
userInfo.HasReadRule = true;
ui64 rrGen = i < config.ReadRuleGenerationsSize() ? config.GetReadRuleGenerations(i) : 0;
if (userInfo.ReadRuleGeneration != rrGen) {
- THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(0, consumer, 0, "", 0, 0,
- TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE, rrGen);
+ THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(
+ 0, consumer, 0, "", 0, 0, 0, TActorId{}, TEvPQ::TEvSetClientInfo::ESCI_INIT_READ_RULE, rrGen
+ );
//
// TODO(abcdef): заменить на вызов ProcessUserAct
//
@@ -66,8 +68,9 @@ void TPartition::FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config
if (userInfo.NoConsumer) {
continue;
}
- THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(0, consumer,
- 0, "", 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0);
+ THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(
+ 0, consumer, 0, "", 0, 0, 0, TActorId{}, TEvPQ::TEvSetClientInfo::ESCI_DROP_READ_RULE, 0
+ );
if (!userInfo.Important && userInfo.LabeledCounters) {
ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCountersDrop(Partition, userInfo.LabeledCounters->GetGroup()));
}
@@ -184,7 +187,9 @@ void TPartition::InitUserInfoForImportantClients(const TActorContext& ctx) {
continue;
}
if (!userInfo) {
- userInfo = &UsersInfoStorage->Create(ctx, importantUser, 0, true, "", 0, 0, 0, 0, TInstant::Zero());
+ userInfo = &UsersInfoStorage->Create(
+ ctx, importantUser, 0, true, "", 0, 0, 0, 0, 0, TInstant::Zero(), {}
+ );
}
if (userInfo->Offset < (i64)StartOffset)
userInfo->Offset = StartOffset;
@@ -311,21 +316,21 @@ TReadAnswer TReadInfo::FormAnswer(
const ui64 endOffset,
const ui32 partition,
TUserInfo* userInfo,
- const ui64 cookie,
+ const ui64 destination,
const ui64 sizeLag,
const TActorId& tablet,
const NKikimrPQ::TPQTabletConfig::EMeteringMode meteringMode
) {
Y_UNUSED(meteringMode);
Y_UNUSED(partition);
- THolder<TEvPQ::TEvProxyResponse> answer = MakeHolder<TEvPQ::TEvProxyResponse>(cookie);
- NKikimrClient::TResponse& res = answer->Response;
+ THolder<TEvPQ::TEvProxyResponse> answer = MakeHolder<TEvPQ::TEvProxyResponse>(destination);
+ NKikimrClient::TResponse& res = *answer->Response;
const TEvPQ::TEvBlobResponse* response = &blobResponse;
-
if (HasError(blobResponse)) {
+ Error = true;
return TReadAnswer{
blobResponse.Error.ErrorStr.size(),
- MakeHolder<TEvPQ::TEvError>(blobResponse.Error.ErrorCode, blobResponse.Error.ErrorStr, cookie)
+ MakeHolder<TEvPQ::TEvError>(blobResponse.Error.ErrorCode, blobResponse.Error.ErrorStr, destination)
};
}
@@ -335,6 +340,7 @@ TReadAnswer TReadInfo::FormAnswer(
readResult->SetWaitQuotaTimeMs(WaitQuotaTime.MilliSeconds());
readResult->SetMaxOffset(endOffset);
readResult->SetRealReadOffset(Offset);
+ ui64 realReadOffset = Offset;
readResult->SetReadFromTimestampMs(ReadTimestampMs);
Y_ABORT_UNLESS(endOffset <= (ui64)Max<i64>(), "Max offset is too big: %" PRIu64, endOffset);
@@ -380,12 +386,18 @@ TReadAnswer TReadInfo::FormAnswer(
if (blobValue.empty()) { // this is ok. Means that someone requested too much data or retention race
LOG_DEBUG(ctx, NKikimrServices::PERSQUEUE, "Not full answer here!");
- ui64 answerSize = answer->Response.ByteSize();
+ ui64 answerSize = answer->Response->ByteSize();
if (userInfo && Destination != 0) {
userInfo->ReadDone(ctx, ctx.Now(), answerSize, cnt, ClientDC,
tablet, IsExternalRead);
}
readResult->SetSizeLag(sizeLag - size);
+ RealReadOffset = realReadOffset;
+ LastOffset = Offset - 1;
+ SizeEstimate = answerSize;
+ readResult->SetSizeEstimate(SizeEstimate);
+ readResult->SetLastOffset(LastOffset);
+ readResult->SetEndOffset(endOffset);
return {answerSize, std::move(answer)};
}
Y_ABORT_UNLESS(blobValue.size() == blobs[pos].Size, "value for offset %" PRIu64 " count %u size must be %u, but got %u",
@@ -434,7 +446,6 @@ TReadAnswer TReadInfo::FormAnswer(
}
AddResultBlob(readResult, res, Offset);
-
if (res.IsLastPart()) {
PartNo = 0;
++Offset;
@@ -470,7 +481,6 @@ TReadAnswer TReadInfo::FormAnswer(
);
}
AddResultBlob(readResult, writeBlob, Offset);
-
if (writeBlob.IsLastPart()) {
++Offset;
}
@@ -480,13 +490,20 @@ TReadAnswer TReadInfo::FormAnswer(
}
}
Y_ABORT_UNLESS(Offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, Offset);
- ui64 answerSize = answer->Response.ByteSize();
+ ui64 answerSize = answer->Response->ByteSize();
if (userInfo && Destination != 0) {
userInfo->ReadDone(ctx, ctx.Now(), answerSize, cnt, ClientDC,
tablet, IsExternalRead);
}
readResult->SetSizeLag(sizeLag - size);
+ RealReadOffset = realReadOffset;
+ LastOffset = Offset - 1;
+ SizeEstimate = answerSize;
+ readResult->SetSizeEstimate(SizeEstimate);
+ readResult->SetLastOffset(LastOffset);
+ readResult->SetEndOffset(endOffset);
+
return {answerSize, std::move(answer)};
}
@@ -494,7 +511,9 @@ void TPartition::Handle(TEvPQ::TEvReadTimeout::TPtr& ev, const TActorContext& ct
auto res = Subscriber.OnTimeout(ev);
if (!res)
return;
- TReadAnswer answer(res->FormAnswer(ctx, res->Offset, Partition, nullptr, res->Destination, 0, Tablet, Config.GetMeteringMode()));
+ TReadAnswer answer(res->FormAnswer(
+ ctx, res->Offset, Partition, nullptr, res->Destination, 0, Tablet, Config.GetMeteringMode()
+ ));
ctx.Send(Tablet, answer.Event.Release());
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, " waiting read cookie " << ev->Get()->Cookie
<< " partition " << Partition << " read timeout for " << res->User << " offset " << res->Offset);
@@ -505,7 +524,9 @@ void TPartition::Handle(TEvPQ::TEvReadTimeout::TPtr& ev, const TActorContext& ct
}
-TVector<TRequestedBlob> TPartition::GetReadRequestFromBody(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, ui32* rcount, ui32* rsize) {
+TVector<TRequestedBlob> TPartition::GetReadRequestFromBody(
+ const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, ui32* rcount, ui32* rsize, ui64 lastOffset
+) {
Y_ABORT_UNLESS(rcount && rsize);
ui32& count = *rcount;
ui32& size = *rsize;
@@ -533,7 +554,10 @@ TVector<TRequestedBlob> TPartition::GetReadRequestFromBody(const ui64 startOffse
cnt = it->Key.GetCount() - (startOffset - it->Key.GetOffset()); //don't count all elements from first blob
sz = (cnt == it->Key.GetCount() ? it->Size : 0); //not readed client blobs can be of ~8Mb, so don't count this size at all
}
- while (it != DataKeysBody.end() && (size < maxSize && count < maxCount || count == 0)) { //count== 0 grants that blob with offset from ReadFromTimestamp will be readed
+ while (it != DataKeysBody.end()
+ && (size < maxSize && count < maxCount || count == 0) //count== 0 grants that blob with offset from ReadFromTimestamp will be readed
+ && (lastOffset == 0 || it->Key.GetOffset() < lastOffset)
+ ) {
size += sz;
count += cnt;
TRequestedBlob reqBlob(it->Key.GetOffset(), it->Key.GetPartNo(), it->Key.GetCount(),
@@ -550,7 +574,10 @@ TVector<TRequestedBlob> TPartition::GetReadRequestFromBody(const ui64 startOffse
return blobs;
}
-TVector<TClientBlob> TPartition::GetReadRequestFromHead(const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount, ui32* rsize, ui64* insideHeadOffset) {
+TVector<TClientBlob> TPartition::GetReadRequestFromHead(
+ const ui64 startOffset, const ui16 partNo, const ui32 maxCount, const ui32 maxSize, const ui64 readTimestampMs, ui32* rcount,
+ ui32* rsize, ui64* insideHeadOffset, ui64 lastOffset
+) {
ui32& count = *rcount;
ui32& size = *rsize;
TVector<TClientBlob> res;
@@ -581,7 +608,11 @@ TVector<TClientBlob> TPartition::GetReadRequestFromHead(const ui64 startOffset,
} else {
++pno;
}
+ if (lastOffset > 0 && offset >= lastOffset)
+ break;
+
if (skip) continue;
+
if (blobs[i].IsLastPart()) {
bool messageSkippingBehaviour = AppData()->PQConfig.GetTopicsAreFirstClassCitizen() &&
readTimestampMs > blobs[i].WriteTimestamp.MilliSeconds();
@@ -650,13 +681,12 @@ void TPartition::Handle(TEvPQ::TEvRead::TPtr& ev, const TActorContext& ctx) {
read->Offset << ", " << read->PartNo << " EndOffset " << EndOffset);
return;
}
-
const TString& user = read->ClientId;
Y_ABORT_UNLESS(read->Offset <= EndOffset);
auto& userInfo = UsersInfoStorage->GetOrCreate(user, ctx);
-
+
if (!read->SessionId.empty() && !userInfo.NoConsumer) {
if (userInfo.Session != read->SessionId) {
TabletCounters.Cumulative()[COUNTER_PQ_READ_ERROR_NO_SESSION].Increment(1);
@@ -694,7 +724,10 @@ void TPartition::DoRead(TEvPQ::TEvRead::TPtr ev, TDuration waitQuotaTime, const
userInfo->ReadOffsetRewindSum += offset - read->Offset;
}
- TReadInfo info(user, read->ClientDC, offset, read->PartNo, read->Count, read->Size, read->Cookie, read->ReadTimestampMs, waitQuotaTime, read->ExternalOperation);
+ TReadInfo info(
+ user, read->ClientDC, offset, read->LastOffset, read->PartNo, read->Count, read->Size, read->Cookie, read->ReadTimestampMs,
+ waitQuotaTime, read->ExternalOperation, userInfo->PipeClient
+ );
ui64 cookie = Cookie++;
@@ -703,8 +736,7 @@ void TPartition::DoRead(TEvPQ::TEvRead::TPtr ev, TDuration waitQuotaTime, const
"read cookie " << cookie << " Topic '" << TopicConverter->GetClientsideName() << "' partition " << Partition
<< " user " << user
<< " offset " << read->Offset << " count " << read->Count << " size " << read->Size << " endOffset " << EndOffset
- << " max time lag " << read->MaxTimeLagMs << "ms effective offset " << offset
- );
+ << " max time lag " << read->MaxTimeLagMs << "ms effective offset " << offset);
if (offset == EndOffset) {
@@ -791,10 +823,9 @@ void TPartition::ReadTimestampForOffset(const TString& user, TUserInfo& userInfo
<< " ReadingTimestamp " << ReadingTimestamp << " rrg " << ReadingForUserReadRuleGeneration
);
-
- THolder<TEvPQ::TEvRead> event = MakeHolder<TEvPQ::TEvRead>(0, userInfo.Offset, 0, 1, "",
+ THolder<TEvPQ::TEvRead> event = MakeHolder<TEvPQ::TEvRead>(0, userInfo.Offset, 0, 0, 1, "",
user, 0, MAX_BLOB_PART_SIZE * 2, 0, 0, "",
- false);
+ false, TActorId{});
ctx.Send(ctx.SelfID, event.Release());
TabletCounters.Cumulative()[COUNTER_PQ_WRITE_TIMESTAMP_CACHE_MISS].Increment(1);
@@ -841,15 +872,16 @@ void TPartition::Handle(TEvPQ::TEvProxyResponse::TPtr& ev, const TActorContext&
LOG_INFO_S(
ctx,
NKikimrServices::PERSQUEUE,
- "Reading Timestamp failed for offset " << ReadingForOffset << " ( "<< userInfo->Offset << " ) " << ev->Get()->Response.DebugString()
+ "Reading Timestamp failed for offset " << ReadingForOffset << " ( "<< userInfo->Offset << " ) "
+ << ev->Get()->Response->DebugString()
);
- if (ev->Get()->Response.GetStatus() == NMsgBusProxy::MSTATUS_OK &&
- ev->Get()->Response.GetErrorCode() == NPersQueue::NErrorCode::OK &&
- ev->Get()->Response.GetPartitionResponse().HasCmdReadResult() &&
- ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().ResultSize() > 0 &&
- (i64)ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().GetResult(0).GetOffset() >= userInfo->Offset) {
+ if (ev->Get()->Response->GetStatus() == NMsgBusProxy::MSTATUS_OK &&
+ ev->Get()->Response->GetErrorCode() == NPersQueue::NErrorCode::OK &&
+ ev->Get()->Response->GetPartitionResponse().HasCmdReadResult() &&
+ ev->Get()->Response->GetPartitionResponse().GetCmdReadResult().ResultSize() > 0 &&
+ (i64)ev->Get()->Response->GetPartitionResponse().GetCmdReadResult().GetResult(0).GetOffset() >= userInfo->Offset) {
//offsets is inside gap - return timestamp of first record after gap
- const auto& res = ev->Get()->Response.GetPartitionResponse().GetCmdReadResult().GetResult(0);
+ const auto& res = ev->Get()->Response->GetPartitionResponse().GetCmdReadResult().GetResult(0);
userInfo->WriteTimestamp = TInstant::MilliSeconds(res.GetWriteTimestampMS());
userInfo->CreateTimestamp = TInstant::MilliSeconds(res.GetCreateTimestampMS());
userInfo->ActualTimestamps = true;
@@ -898,7 +930,9 @@ void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const u
userInfo.ForgetSubscription(ctx.Now());
}
- TVector<TRequestedBlob> blobs = GetReadRequestFromBody(info.Offset, info.PartNo, info.Count, info.Size, &count, &size);
+ TVector<TRequestedBlob> blobs = GetReadRequestFromBody(
+ info.Offset, info.PartNo, info.Count, info.Size, &count, &size, info.LastOffset
+ );
info.Blobs = blobs;
ui64 lastOffset = info.Offset + Min(count, info.Count);
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "read cookie " << cookie << " added " << info.Blobs.size()
@@ -906,7 +940,10 @@ void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const u
if (blobs.empty() || blobs.back().Key == DataKeysBody.back().Key) { // read from head only when all blobs from body processed
ui64 insideHeadOffset{0};
- info.Cached = GetReadRequestFromHead(info.Offset, info.PartNo, info.Count, info.Size, info.ReadTimestampMs, &count, &size, &insideHeadOffset);
+ info.Cached = GetReadRequestFromHead(
+ info.Offset, info.PartNo, info.Count, info.Size, info.ReadTimestampMs, &count,
+ &size, &insideHeadOffset, info.LastOffset
+ );
info.CachedOffset = insideHeadOffset;
}
if (info.Destination != 0) {
@@ -927,19 +964,23 @@ void TPartition::ProcessRead(const TActorContext& ctx, TReadInfo&& info, const u
}
TabletCounters.Cumulative()[COUNTER_PQ_READ_HEAD_ONLY_OK].Increment(1);
TabletCounters.Percentile()[COUNTER_LATENCY_PQ_READ_HEAD_ONLY].IncrementFor((ctx.Now() - info.Timestamp).MilliSeconds());
- TabletCounters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp.ByteSize());
+
+ TabletCounters.Cumulative()[COUNTER_PQ_READ_BYTES].Increment(resp->ByteSize());
+
ctx.Send(info.Destination != 0 ? Tablet : ctx.SelfID, answer.Event.Release());
- OnReadRequestFinished(info.Destination, answer.Size, info.User, ctx);
+ OnReadRequestFinished(cookie, answer.Size, info.User, ctx);
return;
}
const TString user = info.User;
bool res = ReadInfo.insert({cookie, std::move(info)}).second;
+ LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Reading cookie " << cookie << ". Send blob request.");
Y_ABORT_UNLESS(res);
THolder<TEvPQ::TEvBlobRequest> request(new TEvPQ::TEvBlobRequest(user, cookie, Partition,
lastOffset, std::move(blobs)));
+
ctx.Send(BlobCache, request.Release());
}
diff --git a/ydb/core/persqueue/partition_types.h b/ydb/core/persqueue/partition_types.h
index dd9d74b95d..a84cc54e86 100644
--- a/ydb/core/persqueue/partition_types.h
+++ b/ydb/core/persqueue/partition_types.h
@@ -127,4 +127,6 @@ struct TDataKey {
ui64 CumulativeSize;
};
-} // namespace NKikimr
+
+} // namespace NKikimr::NPQ
+
diff --git a/ydb/core/persqueue/partition_write.cpp b/ydb/core/persqueue/partition_write.cpp
index 19dfd9476b..204d96b38c 100644
--- a/ydb/core/persqueue/partition_write.cpp
+++ b/ydb/core/persqueue/partition_write.cpp
@@ -35,7 +35,7 @@ void TPartition::ReplyOwnerOk(const TActorContext& ctx, const ui64 dst, const TS
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "TPartition::ReplyOwnerOk. Partition: " << Partition);
THolder<TEvPQ::TEvProxyResponse> response = MakeHolder<TEvPQ::TEvProxyResponse>(dst);
- NKikimrClient::TResponse& resp = response->Response;
+ NKikimrClient::TResponse& resp = *response->Response;
resp.SetStatus(NMsgBusProxy::MSTATUS_OK);
resp.SetErrorCode(NPersQueue::NErrorCode::OK);
resp.MutablePartitionResponse()->MutableCmdGetOwnershipResult()->SetOwnerCookie(cookie);
@@ -53,7 +53,7 @@ void TPartition::ReplyWrite(
Y_ABORT_UNLESS(seqNo <= (ui64)Max<i64>(), "SeqNo is too big: %" PRIu64, seqNo);
THolder<TEvPQ::TEvProxyResponse> response = MakeHolder<TEvPQ::TEvProxyResponse>(dst);
- NKikimrClient::TResponse& resp = response->Response;
+ NKikimrClient::TResponse& resp = *response->Response;
resp.SetStatus(NMsgBusProxy::MSTATUS_OK);
resp.SetErrorCode(NPersQueue::NErrorCode::OK);
auto write = resp.MutablePartitionResponse()->AddCmdWriteResult();
@@ -1334,7 +1334,6 @@ void TPartition::AddNewWriteBlob(std::pair<TKey, ui32>& res, TEvKeyValue::TEvReq
void TPartition::SetDeadlinesForWrites(const TActorContext& ctx) {
PQ_LOG_T("TPartition::SetDeadlinesForWrites.");
-
if (AppData(ctx)->PQConfig.GetQuotingConfig().GetQuotaWaitDurationMs() > 0 && QuotaDeadline == TInstant::Zero()) {
QuotaDeadline = ctx.Now() + TDuration::MilliSeconds(AppData(ctx)->PQConfig.GetQuotingConfig().GetQuotaWaitDurationMs());
diff --git a/ydb/core/persqueue/percentile_counter.cpp b/ydb/core/persqueue/percentile_counter.cpp
index f97130bb47..f9d95fad81 100644
--- a/ydb/core/persqueue/percentile_counter.cpp
+++ b/ydb/core/persqueue/percentile_counter.cpp
@@ -7,6 +7,7 @@ namespace NKikimr {
namespace NPQ {
+
TMultiCounter::TMultiCounter(::NMonitoring::TDynamicCounterPtr counters,
const TVector<NPersQueue::TPQLabelsInfo>& labels,
const TVector<std::pair<TString, TString>>& subgroups,
diff --git a/ydb/core/persqueue/pq_impl.cpp b/ydb/core/persqueue/pq_impl.cpp
index acba0a168e..86e066d3b9 100644
--- a/ydb/core/persqueue/pq_impl.cpp
+++ b/ydb/core/persqueue/pq_impl.cpp
@@ -89,6 +89,10 @@ static TMaybe<TPartitionKeyRange> GetPartitionKeyRange(const NKikimrPQ::TPQTable
return TPartitionKeyRange::Parse(proto.GetKeyRange());
}
+static bool IsDirectReadCmd(const auto& cmd) {
+ return cmd.GetDirectReadId() != 0;
+}
+
/******************************************************* ReadProxy *********************************************************/
//megaqc - remove it when LB will be ready
class TReadProxy : public TActorBootstrapped<TReadProxy> {
@@ -97,28 +101,35 @@ public:
return NKikimrServices::TActivity::PERSQUEUE_ANS_ACTOR;
}
- TReadProxy(const TActorId& sender, const TActorId& tablet, const NKikimrClient::TPersQueueRequest& request)
- : Sender(sender)
- , Tablet(tablet)
- , Request(request)
- , Response(new TEvPersQueue::TEvResponse)
+ TReadProxy(const TActorId& sender, const TActorId& tablet, ui64 tabletGeneration,
+ const TDirectReadKey& directReadKey, const NKikimrClient::TPersQueueRequest& request)
+ : Sender(sender)
+ , Tablet(tablet)
+ , TabletGeneration(tabletGeneration)
+ , Request(request)
+ , Response(new TEvPersQueue::TEvResponse)
+ , DirectReadKey(directReadKey)
{
Y_ABORT_UNLESS(Request.HasPartitionRequest() && Request.GetPartitionRequest().HasCmdRead());
Y_ABORT_UNLESS(Request.GetPartitionRequest().GetCmdRead().GetPartNo() == 0); //partial request are not allowed, otherwise remove ReadProxy
Y_ABORT_UNLESS(!Response->Record.HasPartitionResponse());
+ if (!directReadKey.SessionId.Empty()) {
+ DirectReadKey.ReadId = Request.GetPartitionRequest().GetCmdRead().GetDirectReadId();
+ }
}
- void Bootstrap(const TActorContext&)
+ void Bootstrap(const TActorContext& ctx)
{
+ LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Read proxy: bootstrap for direct read id: " << DirectReadKey.ReadId);
Become(&TThis::StateFunc);
}
private:
-
void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx)
{
Y_ABORT_UNLESS(Response);
const auto& record = ev->Get()->Record;
+
if (!record.HasPartitionResponse() || !record.GetPartitionResponse().HasCmdReadResult() ||
record.GetStatus() != NMsgBusProxy::MSTATUS_OK || record.GetErrorCode() != NPersQueue::NErrorCode::OK ||
record.GetPartitionResponse().GetCmdReadResult().ResultSize() == 0) {
@@ -127,43 +138,47 @@ private:
Die(ctx);
return;
}
-
-
Y_ABORT_UNLESS(record.HasPartitionResponse() && record.GetPartitionResponse().HasCmdReadResult());
-
- const auto& res = record.GetPartitionResponse().GetCmdReadResult();
-
- Response->Record.SetStatus(NMsgBusProxy::MSTATUS_OK);
- Response->Record.SetErrorCode(NPersQueue::NErrorCode::OK);
-
- Y_ABORT_UNLESS(res.ResultSize() > 0);
+ const auto& readResult = record.GetPartitionResponse().GetCmdReadResult();
+ auto isDirectRead = IsDirectReadCmd(Request.GetPartitionRequest().GetCmdRead());
+ if (isDirectRead) {
+ if (!PreparedResponse) {
+ PreparedResponse = std::make_shared<NKikimrClient::TResponse>();
+ }
+ }
+
+ auto& responseRecord = isDirectRead ? *PreparedResponse : Response->Record;
+ responseRecord.SetStatus(NMsgBusProxy::MSTATUS_OK);
+ responseRecord.SetErrorCode(NPersQueue::NErrorCode::OK);
+
+ Y_ABORT_UNLESS(readResult.ResultSize() > 0);
bool isStart = false;
- if (!Response->Record.HasPartitionResponse()) {
- Y_ABORT_UNLESS(!res.GetResult(0).HasPartNo() || res.GetResult(0).GetPartNo() == 0); //starts from begin of record
- auto partResp = Response->Record.MutablePartitionResponse();
+ if (!responseRecord.HasPartitionResponse()) {
+ Y_ABORT_UNLESS(!readResult.GetResult(0).HasPartNo() || readResult.GetResult(0).GetPartNo() == 0); //starts from begin of record
+ auto partResp = responseRecord.MutablePartitionResponse();
auto readRes = partResp->MutableCmdReadResult();
- readRes->SetBlobsFromDisk(readRes->GetBlobsFromDisk() + res.GetBlobsFromDisk());
- readRes->SetBlobsFromCache(readRes->GetBlobsFromCache() + res.GetBlobsFromCache());
+ readRes->SetBlobsFromDisk(readRes->GetBlobsFromDisk() + readResult.GetBlobsFromDisk());
+ readRes->SetBlobsFromCache(readRes->GetBlobsFromCache() + readResult.GetBlobsFromCache());
isStart = true;
}
ui64 readFromTimestampMs = AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()
- ? (isStart ? res.GetReadFromTimestampMs()
- : Response->Record.GetPartitionResponse().GetCmdReadResult().GetReadFromTimestampMs())
+ ? (isStart ? readResult.GetReadFromTimestampMs()
+ : responseRecord.GetPartitionResponse().GetCmdReadResult().GetReadFromTimestampMs())
: 0;
if (record.GetPartitionResponse().HasCookie())
- Response->Record.MutablePartitionResponse()->SetCookie(record.GetPartitionResponse().GetCookie());
+ responseRecord.MutablePartitionResponse()->SetCookie(record.GetPartitionResponse().GetCookie());
- auto partResp = Response->Record.MutablePartitionResponse()->MutableCmdReadResult();
+ auto partResp = responseRecord.MutablePartitionResponse()->MutableCmdReadResult();
- partResp->SetMaxOffset(res.GetMaxOffset());
- partResp->SetSizeLag(res.GetSizeLag());
- partResp->SetWaitQuotaTimeMs(partResp->GetWaitQuotaTimeMs() + res.GetWaitQuotaTimeMs());
+ partResp->SetMaxOffset(readResult.GetMaxOffset());
+ partResp->SetSizeLag(readResult.GetSizeLag());
+ partResp->SetWaitQuotaTimeMs(partResp->GetWaitQuotaTimeMs() + readResult.GetWaitQuotaTimeMs());
- partResp->SetRealReadOffset(Max(partResp->GetRealReadOffset(), res.GetRealReadOffset()));
+ partResp->SetRealReadOffset(Max(partResp->GetRealReadOffset(), readResult.GetRealReadOffset()));
- for (ui32 i = 0; i < res.ResultSize(); ++i) {
- bool isNewMsg = !res.GetResult(i).HasPartNo() || res.GetResult(i).GetPartNo() == 0;
+ for (ui32 i = 0; i < readResult.ResultSize(); ++i) {
+ bool isNewMsg = !readResult.GetResult(i).HasPartNo() || readResult.GetResult(i).GetPartNo() == 0;
if (!isStart) {
Y_ABORT_UNLESS(partResp->ResultSize() > 0);
auto& back = partResp->GetResult(partResp->ResultSize() - 1);
@@ -176,25 +191,26 @@ private:
}
if (isNewMsg) {
- if (!isStart && res.GetResult(i).HasTotalParts() && res.GetResult(i).GetTotalParts() + i > res.ResultSize()) //last blob is not full
+ if (!isStart && readResult.GetResult(i).HasTotalParts()
+ && readResult.GetResult(i).GetTotalParts() + i > readResult.ResultSize()) //last blob is not full
break;
- partResp->AddResult()->CopyFrom(res.GetResult(i));
+ partResp->AddResult()->CopyFrom(readResult.GetResult(i));
isStart = false;
} else { //glue to last res
auto rr = partResp->MutableResult(partResp->ResultSize() - 1);
- if (rr->GetSeqNo() != res.GetResult(i).GetSeqNo() || rr->GetPartNo() + 1 != res.GetResult(i).GetPartNo()) {
+ if (rr->GetSeqNo() != readResult.GetResult(i).GetSeqNo() || rr->GetPartNo() + 1 != readResult.GetResult(i).GetPartNo()) {
LOG_CRIT_S(ctx, NKikimrServices::PERSQUEUE, "Handle TEvRead tablet: " << Tablet
<< " last read pos (seqno/parno): " << rr->GetSeqNo() << "," << rr->GetPartNo() << " readed now "
- << res.GetResult(i).GetSeqNo() << ", " << res.GetResult(i).GetPartNo()
+ << readResult.GetResult(i).GetSeqNo() << ", " << readResult.GetResult(i).GetPartNo()
<< " full request(now): " << Request);
}
- Y_ABORT_UNLESS(rr->GetSeqNo() == res.GetResult(i).GetSeqNo());
- (*rr->MutableData()) += res.GetResult(i).GetData();
- rr->SetPartitionKey(res.GetResult(i).GetPartitionKey());
- rr->SetExplicitHash(res.GetResult(i).GetExplicitHash());
- rr->SetPartNo(res.GetResult(i).GetPartNo());
- rr->SetUncompressedSize(rr->GetUncompressedSize() + res.GetResult(i).GetUncompressedSize());
- if (res.GetResult(i).GetPartNo() + 1 == res.GetResult(i).GetTotalParts()) {
+ Y_ABORT_UNLESS(rr->GetSeqNo() == readResult.GetResult(i).GetSeqNo());
+ (*rr->MutableData()) += readResult.GetResult(i).GetData();
+ rr->SetPartitionKey(readResult.GetResult(i).GetPartitionKey());
+ rr->SetExplicitHash(readResult.GetResult(i).GetExplicitHash());
+ rr->SetPartNo(readResult.GetResult(i).GetPartNo());
+ rr->SetUncompressedSize(rr->GetUncompressedSize() + readResult.GetResult(i).GetUncompressedSize());
+ if (readResult.GetResult(i).GetPartNo() + 1 == readResult.GetResult(i).GetTotalParts()) {
Y_ABORT_UNLESS((ui32)rr->GetTotalSize() == (ui32)rr->GetData().size());
}
}
@@ -216,10 +232,10 @@ private:
THolder<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest);
req->Record = Request;
ctx.Send(Tablet, req.Release());
+
return;
}
}
-
//filter old messages
::google::protobuf::RepeatedPtrField<NKikimrClient::TCmdReadResult::TResult> records;
records.Swap(partResp->MutableResult());
@@ -231,8 +247,29 @@ private:
result->CopyFrom(rec);
}
}
+ if (isDirectRead) {
+ auto* prepareResponse = Response->Record.MutablePartitionResponse()->MutableCmdPrepareReadResult();
+ prepareResponse->SetBytesSizeEstimate(readResult.GetSizeEstimate());
+ prepareResponse->SetDirectReadId(DirectReadKey.ReadId);
+ prepareResponse->SetReadOffset(readResult.GetRealReadOffset());
+ prepareResponse->SetLastOffset(readResult.GetLastOffset());
+ prepareResponse->SetEndOffset(readResult.GetEndOffset());
- ctx.Send(Sender, Response.Release());
+ prepareResponse->SetSizeLag(readResult.GetSizeLag());
+ Response->Record.MutablePartitionResponse()->SetCookie(record.GetPartitionResponse().GetCookie());
+ if (readResult.ResultSize()) {
+ prepareResponse->SetWriteTimestampMS(readResult.GetResult(readResult.ResultSize() - 1).GetWriteTimestampMS());
+ }
+ Response->Record.SetStatus(NMsgBusProxy::MSTATUS_OK);
+ Response->Record.SetErrorCode(NPersQueue::NErrorCode::OK);
+ ctx.Send(Sender, Response.Release());
+ ctx.Send(
+ MakePQDReadCacheServiceActorId(),
+ new TEvPQ::TEvStageDirectReadData(DirectReadKey, TabletGeneration, PreparedResponse)
+ );
+ } else {
+ ctx.Send(Sender, Response.Release());
+ }
Die(ctx);
}
@@ -246,15 +283,20 @@ private:
const TActorId Sender;
const TActorId Tablet;
+ ui32 TabletGeneration;
NKikimrClient::TPersQueueRequest Request;
THolder<TEvPersQueue::TEvResponse> Response;
+ std::shared_ptr<NKikimrClient::TResponse> PreparedResponse;
+ TDirectReadKey DirectReadKey;
+
};
-TActorId CreateReadProxy(const TActorId& sender, const TActorId& tablet, const NKikimrClient::TPersQueueRequest& request,
+TActorId CreateReadProxy(const TActorId& sender, const TActorId& tablet, ui32 tabletGeneration,
+ const TDirectReadKey& directReadKey, const NKikimrClient::TPersQueueRequest& request,
const TActorContext& ctx)
{
- return ctx.Register(new TReadProxy(sender, tablet, request));
+ return ctx.Register(new TReadProxy(sender, tablet, tabletGeneration, directReadKey, request));
}
/******************************************************* AnswerBuilderProxy *********************************************************/
@@ -262,7 +304,8 @@ class TResponseBuilder {
public:
TResponseBuilder(const TActorId& sender, const TActorId& tablet, const TString& topicName, const ui32 partition, const ui64 messageNo,
- const TString& reqId, const TMaybe<ui64> cookie, NMetrics::TResourceMetrics* resourceMetrics, const TActorContext& ctx)
+ const TString& reqId, const TMaybe<ui64> cookie, NMetrics::TResourceMetrics* resourceMetrics,
+ const TActorContext& ctx)
: Sender(sender)
, Tablet(tablet)
, TopicName(topicName)
@@ -305,13 +348,13 @@ public:
Y_ABORT_UNLESS(Response);
--Waiting;
bool skip = false;
- if (WasSplit && ev->Get()->Response.GetPartitionResponse().CmdWriteResultSize() == 1) { //megaqc - remove this
- const auto& x = ev->Get()->Response.GetPartitionResponse().GetCmdWriteResult(0);
+ if (WasSplit && ev->Get()->Response->GetPartitionResponse().CmdWriteResultSize() == 1) { //megaqc - remove this
+ const auto& x = ev->Get()->Response->GetPartitionResponse().GetCmdWriteResult(0);
if (x.HasPartNo() && x.GetPartNo() > 0)
skip = true;
}
if (!skip) //megaqc - remove this
- Response->Record.MergeFrom(ev->Get()->Response);
+ Response->Record.MergeFrom(*ev->Get()->Response);
if (!Waiting) {
LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Answer ok topic: '" << TopicName << "' partition: " << Partition
@@ -1611,29 +1654,41 @@ void TPersQueue::HandleGetMaxSeqNoRequest(const ui64 responseCookie, const TActo
ctx.Send(partActor, event.Release());
}
-void TPersQueue::HandleDeleteSessionRequest(const ui64 responseCookie, const TActorId& partActor,
- const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx)
+void TPersQueue::HandleDeleteSessionRequest(
+ const ui64 responseCookie, const TActorId& partActor,
+ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx,
+ const TActorId& pipeClient, const TActorId&
+)
{
Y_ABORT_UNLESS(req.HasCmdDeleteSession());
InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_DELETE_SESSION);
const auto& cmd = req.GetCmdDeleteSession();
+ //To do : priority
+ LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Got cmd delete session: " << cmd.DebugString());
if (!cmd.HasClientId()){
- ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST,
+ return ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST,
TStringBuilder() << "no clientId in DeleteSession request: " << ToString(req).data());
} else if (!cmd.HasSessionId()) {
- ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST,
+ return ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST,
TStringBuilder() << "not sessionId in DeleteSession request: " << ToString(req).data());
} else {
- THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(),
- 0, cmd.GetSessionId(), 0, 0, TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION);
+ THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(
+ responseCookie, cmd.GetClientId(), 0, cmd.GetSessionId(), 0, 0, 0, pipeClient,
+ TEvPQ::TEvSetClientInfo::ESCI_DROP_SESSION
+ );
ctx.Send(partActor, event.Release());
}
+ auto pipe = PipesInfo.find(pipeClient);
+ if (!pipe.IsEnd()) {
+ DestroySession(pipe->second);
+ }
}
void TPersQueue::HandleCreateSessionRequest(const ui64 responseCookie, const TActorId& partActor,
- const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx)
-{
+ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx,
+ const TActorId& pipeClient, const TActorId&
+) {
Y_ABORT_UNLESS(req.HasCmdCreateSession());
const auto& cmd = req.GetCmdCreateSession();
@@ -1650,9 +1705,31 @@ void TPersQueue::HandleCreateSessionRequest(const ui64 responseCookie, const TAc
ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST,
TStringBuilder() << "not step in CreateSession request: " << ToString(req).data());
} else {
+ bool isDirectRead = cmd.GetPartitionSessionId() > 0;
InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_CREATE_SESSION);
- THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(),
- 0, cmd.GetSessionId(), cmd.GetGeneration(), cmd.GetStep(), TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION);
+ THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(
+ responseCookie, cmd.GetClientId(), 0, cmd.GetSessionId(), cmd.GetPartitionSessionId(), cmd.GetGeneration(), cmd.GetStep(),
+ pipeClient, TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION, 0, false
+ );
+ if (isDirectRead) {
+ auto pipeIter = PipesInfo.find(pipeClient);
+ if (pipeIter.IsEnd()) {
+ ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::ERROR,
+ TStringBuilder() << "Internal error - server pipe " << pipeClient.ToString() << " not found");
+ return;
+ }
+ pipeIter->second.ClientId = cmd.GetClientId();
+ pipeIter->second.SessionId = cmd.GetSessionId();
+ pipeIter->second.PartitionSessionId = cmd.GetPartitionSessionId();
+ LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Created session " << cmd.GetSessionId() << " on pipe: " << pipeIter->first.ToString());
+ ctx.Send(MakePQDReadCacheServiceActorId(),
+ new TEvPQ::TEvRegisterDirectReadSession(
+ TReadSessionKey{cmd.GetSessionId(), cmd.GetPartitionSessionId()},
+ GetGeneration()
+ )
+ );
+
+ }
ctx.Send(partActor, event.Release());
}
}
@@ -1674,10 +1751,10 @@ void TPersQueue::HandleSetClientOffsetRequest(const ui64 responseCookie, const T
TStringBuilder() << "negative offset in SetClientOffset request: " << ToString(req).data());
} else {
InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_SET_OFFSET);
- THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(responseCookie, cmd.GetClientId(),
- cmd.GetOffset(),
- cmd.HasSessionId() ? cmd.GetSessionId() : "", 0, 0,
- TEvPQ::TEvSetClientInfo::ESCI_OFFSET, 0, cmd.GetStrict());
+ THolder<TEvPQ::TEvSetClientInfo> event = MakeHolder<TEvPQ::TEvSetClientInfo>(
+ responseCookie, cmd.GetClientId(), cmd.GetOffset(), cmd.HasSessionId() ? cmd.GetSessionId() : "", 0, 0, 0,
+ TActorId{}, TEvPQ::TEvSetClientInfo::ESCI_OFFSET, 0, cmd.GetStrict()
+ );
ctx.Send(partActor, event.Release());
}
}
@@ -1968,7 +2045,7 @@ void TPersQueue::HandleGetOwnershipRequest(const ui64 responseCookie, const TAct
return;
}
- it->second = {partActor, owner, it->second.ServerActors};
+ it->second = TPipeInfo::ForOwner(partActor, owner, it->second.ServerActors);
InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_GET_OWNERSHIP);
THolder<TEvPQ::TEvChangeOwner> event = MakeHolder<TEvPQ::TEvChangeOwner>(responseCookie, owner, pipeClient, sender, req.GetCmdGetOwnership().GetForce());
@@ -1976,9 +2053,11 @@ void TPersQueue::HandleGetOwnershipRequest(const ui64 responseCookie, const TAct
}
-void TPersQueue::HandleReadRequest(const ui64 responseCookie, const TActorId& partActor,
- const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx)
-{
+void TPersQueue::HandleReadRequest(
+ const ui64 responseCookie, const TActorId& partActor,
+ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx,
+ const TActorId& pipeClient, const TActorId&
+) {
Y_ABORT_UNLESS(req.HasCmdRead());
auto cmd = req.GetCmdRead();
@@ -2015,8 +2094,25 @@ void TPersQueue::HandleReadRequest(const ui64 responseCookie, const TActorId& pa
ui32 bytes = Min<ui32>(MAX_BYTES, cmd.HasBytes() ? cmd.GetBytes() : MAX_BYTES);
auto clientDC = cmd.HasClientDC() ? to_lower(cmd.GetClientDC()) : "unknown";
clientDC.to_title();
+ if (IsDirectReadCmd(cmd)) {
+ auto pipeIter = PipesInfo.find(pipeClient);
+ if (pipeIter.IsEnd()) {
+ ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::READ_ERROR_NO_SESSION,
+ TStringBuilder() << "Read prepare request from unknown(old?) pipe");
+ return;
+ } else if (cmd.GetSessionId().empty()) {
+ ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::READ_ERROR_NO_SESSION,
+ TStringBuilder() << "Read prepare request with empty session id");
+ return;
+ } else if (pipeIter->second.SessionId != cmd.GetSessionId()) {
+ ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::READ_ERROR_NO_SESSION,
+ TStringBuilder() << "Read prepare request with unknown(old?) session id " << cmd.GetSessionId());
+ return;
+ }
+ }
+
THolder<TEvPQ::TEvRead> event =
- MakeHolder<TEvPQ::TEvRead>(responseCookie, cmd.GetOffset(),
+ MakeHolder<TEvPQ::TEvRead>(responseCookie, cmd.GetOffset(), cmd.GetLastOffset(),
cmd.HasPartNo() ? cmd.GetPartNo() : 0,
count,
cmd.HasSessionId() ? cmd.GetSessionId() : "",
@@ -2024,11 +2120,117 @@ void TPersQueue::HandleReadRequest(const ui64 responseCookie, const TActorId& pa
cmd.HasTimeoutMs() ? cmd.GetTimeoutMs() : 0, bytes,
cmd.HasMaxTimeLagMs() ? cmd.GetMaxTimeLagMs() : 0,
cmd.HasReadTimestampMs() ? cmd.GetReadTimestampMs() : 0, clientDC,
- cmd.GetExternalOperation());
+ cmd.GetExternalOperation(),
+ pipeClient);
+
ctx.Send(partActor, event.Release());
}
}
+template<class TRequest>
+bool ValidateDirectReadRequestBase(
+ const TRequest& cmd, const THashMap<TActorId, TPersQueue::TPipeInfo>::iterator& pipeIter,
+ TStringBuilder& error, TDirectReadKey& key
+) {
+ key = TDirectReadKey{cmd.GetSessionKey().GetSessionId(), cmd.GetSessionKey().GetPartitionSessionId(), cmd.GetDirectReadId()};
+ if (key.SessionId.Empty()) {
+ error << "no session id in publish read request: ";
+ return false;
+ } else if (key.PartitionSessionId == 0) {
+ error << "No or zero partition session id in publish read request: ";
+ return false;
+ } else if (key.ReadId == 0) {
+ error << "No or zero ReadId in publish read request: ";
+ return false;
+ }
+ if (pipeIter.IsEnd()) {
+ error << "Read prepare request from unknown(old?) pipe";
+ return false;
+ } else if (pipeIter->second.SessionId != key.SessionId) {
+ error << "Read prepare request with unknown(old?) session id " << key.SessionId;
+ return false;
+ }
+ return true;
+}
+
+void TPersQueue::HandlePublishReadRequest(
+ const ui64 responseCookie, const TActorId&,
+ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx,
+ const TActorId& pipeClient, const TActorId&
+) {
+ auto cmd = req.GetCmdPublishRead();
+ TDirectReadKey key;
+ TStringBuilder error;
+
+ if (!ValidateDirectReadRequestBase(cmd, PipesInfo.find(pipeClient), error, key)) {
+ error << req.DebugString();
+ return ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, error);
+ }
+ InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_PUBLISH_READ);
+ THolder<TEvPQ::TEvProxyResponse> publishDoneEvent = MakeHolder<TEvPQ::TEvProxyResponse>(responseCookie);
+ publishDoneEvent->Response->SetStatus(NMsgBusProxy::MSTATUS_OK);
+ publishDoneEvent->Response->SetErrorCode(NPersQueue::NErrorCode::OK);
+
+ publishDoneEvent->Response->MutablePartitionResponse()->MutableCmdPublishReadResult();
+ ctx.Send(SelfId(), publishDoneEvent.Release());
+
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PERSQUEUE, "Publish direct read id " << key.ReadId << " for session " << key.SessionId
+ );
+ ctx.Send(
+ MakePQDReadCacheServiceActorId(),
+ new TEvPQ::TEvPublishDirectRead(key, GetGeneration())
+ );
+
+}
+
+void TPersQueue::HandleForgetReadRequest(
+ const ui64 responseCookie, const TActorId& ,
+ const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx,
+ const TActorId& pipeClient, const TActorId&
+) {
+ auto cmd = req.GetCmdForgetRead();
+ TDirectReadKey key;
+ TStringBuilder error;
+ if (!ValidateDirectReadRequestBase(cmd, PipesInfo.find(pipeClient), error, key)) {
+ error << req.DebugString();
+ return ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, error);
+ }
+ InitResponseBuilder(responseCookie, 1, COUNTER_LATENCY_PQ_FORGET_READ);
+ THolder<TEvPQ::TEvProxyResponse> forgetDoneEvent = MakeHolder<TEvPQ::TEvProxyResponse>(responseCookie);
+ forgetDoneEvent->Response->SetStatus(NMsgBusProxy::MSTATUS_OK);
+ forgetDoneEvent->Response->SetErrorCode(NPersQueue::NErrorCode::OK);
+
+ forgetDoneEvent->Response->MutablePartitionResponse()->MutableCmdForgetReadResult();
+ ctx.Send(SelfId(), forgetDoneEvent.Release());
+
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PERSQUEUE, "Forget direct read id " << key.ReadId << " for session " << key.SessionId
+ );
+ ctx.Send(
+ MakePQDReadCacheServiceActorId(),
+ new TEvPQ::TEvForgetDirectRead(key, GetGeneration())
+ );
+
+}
+
+void TPersQueue::DestroySession(TPipeInfo& pipeInfo) {
+ const auto& ctx = ActorContext();
+ LOG_DEBUG_S(
+ ctx, NKikimrServices::PERSQUEUE, "PQ: Destroy direct read session " << pipeInfo.SessionId
+ );
+ if (pipeInfo.SessionId.Empty())
+ return;
+ ActorContext().Send(
+ MakePQDReadCacheServiceActorId(),
+ new TEvPQ::TEvDeregisterDirectReadSession(
+ TReadSessionKey{pipeInfo.SessionId, pipeInfo.PartitionSessionId},
+ GetGeneration()
+ )
+ );
+ pipeInfo.SessionId = TString{};
+}
+
TMaybe<TEvPQ::TEvRegisterMessageGroup::TBody> TPersQueue::MakeRegisterMessageGroup(
const NKikimrClient::TPersQueuePartitionRequest::TCmdRegisterMessageGroup& cmd,
NPersQueue::NErrorCode::EErrorCode& code, TString& error) const
@@ -2153,15 +2355,26 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext&
if (request.HasPartitionRequest() && request.GetPartitionRequest().HasCookie())
c = request.GetPartitionRequest().GetCookie();
TAutoPtr<TResponseBuilder> ans;
- if (request.HasPartitionRequest() && request.GetPartitionRequest().HasCmdRead() && s != TMP_REQUEST_MARKER) {
- TActorId rr = CreateReadProxy(ev->Sender, ctx.SelfID, request, ctx);
+ ui64 responseCookie = ++NextResponseCookie;
+
+ auto& req = request.GetPartitionRequest();
+ TActorId pipeClient = ActorIdFromProto(req.GetPipeClient());
+
+ if (request.GetPartitionRequest().HasCmdRead() && s != TMP_REQUEST_MARKER) {
+ auto pipeIter = PipesInfo.find(pipeClient);
+ TDirectReadKey directKey{};
+ if (!pipeIter.IsEnd()) {
+ directKey.SessionId = pipeIter->second.SessionId;
+ directKey.PartitionSessionId = pipeIter->second.PartitionSessionId;
+ }
+ TActorId rr = CreateReadProxy(ev->Sender, ctx.SelfID, GetGeneration(), directKey, request, ctx);
ans = CreateResponseProxy(rr, ctx.SelfID, TopicName, p, m, s, c, ResourceMetrics, ctx);
} else {
ans = CreateResponseProxy(ev->Sender, ctx.SelfID, TopicName, p, m, s, c, ResourceMetrics, ctx);
}
- ui64 responseCookie = ++NextResponseCookie;
ResponseProxy[responseCookie] = ans;
Counters->Simple()[COUNTER_PQ_TABLET_INFLIGHT].Set(ResponseProxy.size());
+
if (!ConfigInited) {
ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::INITIALIZING, "tablet is not ready");
return;
@@ -2177,13 +2390,12 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext&
return;
}
- auto& req = request.GetPartitionRequest();
if (!req.HasPartition()) {
ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST, "no partition number");
return;
}
-
+
ui32 partition = req.GetPartition();
auto it = Partitions.find(partition);
@@ -2214,7 +2426,9 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext&
+ req.HasCmdUpdateWriteTimestamp()
+ req.HasCmdRegisterMessageGroup()
+ req.HasCmdDeregisterMessageGroup()
- + req.HasCmdSplitMessageGroup();
+ + req.HasCmdSplitMessageGroup()
+ + req.HasCmdPublishRead()
+ + req.HasCmdForgetRead();
if (count != 1) {
ReplyError(ctx, responseCookie, NPersQueue::NErrorCode::BAD_REQUEST,
@@ -2224,14 +2438,12 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext&
const TActorId& partActor = it->second.Actor;
- TActorId pipeClient = ActorIdFromProto(req.GetPipeClient());
-
if (req.HasCmdGetMaxSeqNo()) {
HandleGetMaxSeqNoRequest(responseCookie, partActor, req, ctx);
} else if (req.HasCmdDeleteSession()) {
- HandleDeleteSessionRequest(responseCookie, partActor, req, ctx);
+ HandleDeleteSessionRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender);
} else if (req.HasCmdCreateSession()) {
- HandleCreateSessionRequest(responseCookie, partActor, req, ctx);
+ HandleCreateSessionRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender);
} else if (req.HasCmdSetClientOffset()) {
HandleSetClientOffsetRequest(responseCookie, partActor, req, ctx);
} else if (req.HasCmdGetClientOffset()) {
@@ -2241,7 +2453,11 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext&
} else if (req.HasCmdUpdateWriteTimestamp()) {
HandleUpdateWriteTimestampRequest(responseCookie, partActor, req, ctx);
} else if (req.HasCmdRead()) {
- HandleReadRequest(responseCookie, partActor, req, ctx);
+ HandleReadRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender);
+ } else if (req.HasCmdPublishRead()) {
+ HandlePublishReadRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender);
+ } else if (req.HasCmdForgetRead()) {
+ HandleForgetReadRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender);
} else if (req.HasCmdGetOwnership()) {
HandleGetOwnershipRequest(responseCookie, partActor, req, ctx, pipeClient, ev->Sender);
} else if (req.HasCmdReserveBytes()) {
@@ -2258,13 +2474,8 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext&
void TPersQueue::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext&)
{
- auto it = PipesInfo.find(ev->Get()->ClientId);
-
- if (it == PipesInfo.end()) {
- PipesInfo.insert({ev->Get()->ClientId, {TActorId(), "", 1}});
- } else {
- it->second.ServerActors++;
- }
+ auto it = PipesInfo.insert({ev->Get()->ClientId, {}}).first;
+ it->second.ServerActors++;
Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size();
}
@@ -2279,7 +2490,12 @@ void TPersQueue::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TA
return;
}
if (it->second.PartActor != TActorId()) {
- ctx.Send(it->second.PartActor, new TEvPQ::TEvPipeDisconnected(it->second.Owner, it->first));
+ ctx.Send(it->second.PartActor, new TEvPQ::TEvPipeDisconnected(
+ it->second.Owner, it->first
+ ));
+ }
+ if (!it->second.SessionId.Empty()) {
+ DestroySession(it->second);
}
PipesInfo.erase(it);
Counters->Simple()[COUNTER_PQ_TABLET_OPENED_PIPES] = PipesInfo.size();
@@ -2352,7 +2568,11 @@ void TPersQueue::HandleDie(const TActorContext& ctx)
}
ctx.Send(CacheActor, new TEvents::TEvPoisonPill());
-
+ for (auto& pipe : PipesInfo) {
+ if (!pipe.second.SessionId.empty()) {
+ DestroySession(pipe.second);
+ }
+ }
for (const auto& p : ResponseProxy) {
THolder<TEvPQ::TEvError> ev = MakeHolder<TEvPQ::TEvError>(NPersQueue::NErrorCode::INITIALIZING, "tablet will be restarted right now", p.first);
bool res = p.second->HandleError(ev.Get(), ctx);
@@ -3498,6 +3718,7 @@ TPartition* TPersQueue::CreatePartitionActor(ui32 partitionId,
return new TPartition(TabletID(),
partitionId,
ctx.SelfID,
+ GetGeneration(),
CacheActor,
topicConverter,
DCId,
@@ -3694,6 +3915,13 @@ TString TPersQueue::LogPrefix() const {
return TStringBuilder() << SelfId() << " ";
}
+ui64 TPersQueue::GetGeneration() {
+ if (!TabletGeneration.Defined()) {
+ TabletGeneration = Executor()->Generation();
+ }
+ return *TabletGeneration;
+}
+
bool TPersQueue::HandleHook(STFUNC_SIG)
{
SetActivityType(NKikimrServices::TActivity::PERSQUEUE_ACTOR);
diff --git a/ydb/core/persqueue/pq_impl.h b/ydb/core/persqueue/pq_impl.h
index dca7795581..dd78e89add 100644
--- a/ydb/core/persqueue/pq_impl.h
+++ b/ydb/core/persqueue/pq_impl.h
@@ -109,6 +109,9 @@ class TPersQueue : public NKeyValue::TKeyValueFlat {
void ReadState(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx);
void InitializeMeteringSink(const TActorContext& ctx);
+ void ProcessReadRequestImpl(const ui64 responseCookie, const TActorId& partActor,
+ const NKikimrClient::TPersQueuePartitionRequest& req, bool doPrepare, ui32 readId,
+ const TActorContext& ctx);
TMaybe<TEvPQ::TEvRegisterMessageGroup::TBody> MakeRegisterMessageGroup(
const NKikimrClient::TPersQueuePartitionRequest::TCmdRegisterMessageGroup& cmd,
@@ -129,23 +132,28 @@ class TPersQueue : public NKeyValue::TKeyValueFlat {
#define DESCRIBE_HANDLE(A) void A(const ui64 responseCookie, const TActorId& partActor, \
const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx);
DESCRIBE_HANDLE(HandleGetMaxSeqNoRequest)
- DESCRIBE_HANDLE(HandleDeleteSessionRequest)
- DESCRIBE_HANDLE(HandleCreateSessionRequest)
DESCRIBE_HANDLE(HandleSetClientOffsetRequest)
DESCRIBE_HANDLE(HandleGetClientOffsetRequest)
DESCRIBE_HANDLE(HandleWriteRequest)
DESCRIBE_HANDLE(HandleUpdateWriteTimestampRequest)
- DESCRIBE_HANDLE(HandleReadRequest)
DESCRIBE_HANDLE(HandleRegisterMessageGroupRequest)
DESCRIBE_HANDLE(HandleDeregisterMessageGroupRequest)
DESCRIBE_HANDLE(HandleSplitMessageGroupRequest)
#undef DESCRIBE_HANDLE
+
#define DESCRIBE_HANDLE_WITH_SENDER(A) void A(const ui64 responseCookie, const TActorId& partActor, \
const NKikimrClient::TPersQueuePartitionRequest& req, const TActorContext& ctx,\
const TActorId& pipeClient, const TActorId& sender);
+
+ DESCRIBE_HANDLE_WITH_SENDER(HandleCreateSessionRequest)
+ DESCRIBE_HANDLE_WITH_SENDER(HandleDeleteSessionRequest)
+ DESCRIBE_HANDLE_WITH_SENDER(HandleReadRequest)
+ DESCRIBE_HANDLE_WITH_SENDER(HandlePublishReadRequest)
+ DESCRIBE_HANDLE_WITH_SENDER(HandleForgetReadRequest)
DESCRIBE_HANDLE_WITH_SENDER(HandleGetOwnershipRequest)
DESCRIBE_HANDLE_WITH_SENDER(HandleReserveBytesRequest)
#undef DESCRIBE_HANDLE_WITH_SENDER
+
bool ChangingState() const { return !TabletStateRequests.empty(); }
void TryReturnTabletStateAll(const TActorContext& ctx, NKikimrProto::EReplyStatus status = NKikimrProto::OK);
void ReturnTabletState(const TActorContext& ctx, const TChangeNotification& req, NKikimrProto::EReplyStatus status);
@@ -212,12 +220,25 @@ private:
TVector<TAutoPtr<TEvPersQueue::TEvHasDataInfo>> HasDataRequests;
TVector<std::pair<TAutoPtr<TEvPersQueue::TEvUpdateConfig>, TActorId> > UpdateConfigRequests;
+public:
struct TPipeInfo {
TActorId PartActor;
TString Owner;
- ui32 ServerActors;
+ ui32 ServerActors = 0;
+ TString ClientId;
+ TString SessionId;
+ ui64 PartitionSessionId = 0;
+ TPipeInfo() = default;
+ static TPipeInfo ForOwner(const TActorId& partActor, const TString& owner, ui32 serverActors) {
+ TPipeInfo res;
+ res.Owner = owner;
+ res.PartActor = partActor;
+ res.ServerActors = serverActors;
+ return res;
+ }
};
+private:
THashMap<TActorId, TPipeInfo> PipesInfo;
ui64 NextResponseCookie;
@@ -379,9 +400,12 @@ private:
bool CanProcessWriteTxs() const;
bool CanProcessDeleteTxs() const;
+ ui64 GetGeneration();
+ void DestroySession(TPipeInfo& pipeInfo);
bool UseMediatorTimeCast = true;
THashMap<ui32, TVector<TEvPQ::TEvSourceIdRequest::TPtr>> SourceIdRequests;
+ TMaybe<ui64> TabletGeneration;
};
diff --git a/ydb/core/persqueue/subscriber.h b/ydb/core/persqueue/subscriber.h
index c1b6247ec4..7d8890ebb9 100644
--- a/ydb/core/persqueue/subscriber.h
+++ b/ydb/core/persqueue/subscriber.h
@@ -13,7 +13,7 @@ namespace NPQ {
struct TUserInfo;
struct TReadAnswer {
- ui64 Size;
+ ui64 Size = 0;
THolder<IEventBase> Event;
};
@@ -35,19 +35,27 @@ struct TReadInfo {
TVector<TRequestedBlob> Blobs; //offset, count, value
ui64 CachedOffset; //offset of head can be bigger than last databody offset
TVector<TClientBlob> Cached; //records from head
+ TActorId PipeClient;
+
+ ui64 SizeEstimate = 0;
+ ui64 RealReadOffset = 0;
+ ui64 LastOffset = 0;
+ bool Error = false;
TReadInfo() = delete;
TReadInfo(
const TString& user,
const TString& clientDC,
const ui64 offset,
+ const ui64 lastOffset,
const ui16 partNo,
const ui64 count,
const ui32 size,
const ui64 dst,
ui64 readTimestampMs,
TDuration waitQuotaTime,
- const bool isExternalRead
+ const bool isExternalRead,
+ const TActorId& pipeClient
)
: User(user)
, ClientDC(clientDC)
@@ -62,6 +70,8 @@ struct TReadInfo {
, IsExternalRead(isExternalRead)
, IsSubscription(false)
, CachedOffset(0)
+ , PipeClient(pipeClient)
+ , LastOffset(lastOffset)
{}
TReadAnswer FormAnswer(
@@ -70,7 +80,7 @@ struct TReadInfo {
const ui64 endOffset,
const ui32 partition,
TUserInfo* ui,
- const ui64 dst,
+ const ui64 dst,
const ui64 sizeLag,
const TActorId& tablet,
const NKikimrPQ::TPQTabletConfig::EMeteringMode meteringMode
diff --git a/ydb/core/persqueue/user_info.cpp b/ydb/core/persqueue/user_info.cpp
index 131f9057fb..5a13af0ad6 100644
--- a/ydb/core/persqueue/user_info.cpp
+++ b/ydb/core/persqueue/user_info.cpp
@@ -95,7 +95,7 @@ void TUsersInfoStorage::ParseDeprecated(const TString& key, const TString& data,
Y_ABORT_UNLESS(offset <= (ui64)Max<i64>(), "Offset is too big: %" PRIu64, offset);
if (!userInfo) {
- Create(ctx, user, 0, false, session, gen, step, static_cast<i64>(offset), 0, TInstant::Zero());
+ Create(ctx, user, 0, false, session, 0, gen, step, static_cast<i64>(offset), 0, TInstant::Zero(), {});
} else {
userInfo->Session = session;
userInfo->Generation = gen;
@@ -121,8 +121,9 @@ void TUsersInfoStorage::Parse(const TString& key, const TString& data, const TAc
TUserInfo* userInfo = GetIfExists(user);
if (!userInfo) {
Create(
- ctx, user, userData.GetReadRuleGeneration(), false, userData.GetSession(),
- userData.GetGeneration(), userData.GetStep(), offset, userData.GetOffsetRewindSum(), TInstant::Zero()
+ ctx, user, userData.GetReadRuleGeneration(), false, userData.GetSession(), userData.GetPartitionSessionId(),
+ userData.GetGeneration(), userData.GetStep(), offset,
+ userData.GetOffsetRewindSum(), TInstant::Zero(), {}
);
} else {
userInfo->Session = userData.GetSession();
@@ -147,7 +148,10 @@ TUserInfo& TUsersInfoStorage::GetOrCreate(const TString& user, const TActorConte
Y_ABORT_UNLESS(!user.empty());
auto it = UsersInfo.find(user);
if (it == UsersInfo.end()) {
- return Create(ctx, user, readRuleGeneration ? *readRuleGeneration : ++CurReadRuleGeneration, false, "", 0, 0, 0, 0, TInstant::Zero());
+ return Create(
+ ctx, user, readRuleGeneration ? *readRuleGeneration : ++CurReadRuleGeneration, false, "", 0,
+ 0, 0, 0, 0, TInstant::Zero(), {}
+ );
}
return it->second;
}
@@ -171,8 +175,9 @@ TUserInfo TUsersInfoStorage::CreateUserInfo(const TActorContext& ctx,
const ui64 readRuleGeneration,
bool important,
const TString& session,
+ ui64 partitionSessionId,
ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum,
- TInstant readFromTimestamp) const
+ TInstant readFromTimestamp, const TActorId& pipeClient) const
{
TString defaultServiceType = AppData(ctx)->PQConfig.GetDefaultClientServiceType().GetName();
TString userServiceType = "";
@@ -189,8 +194,8 @@ TUserInfo TUsersInfoStorage::CreateUserInfo(const TActorContext& ctx,
return {
ctx, StreamCountersSubgroup,
user, readRuleGeneration, important, TopicConverter, Partition,
- session, gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, DbPath,
- meterRead
+ session, partitionSessionId, gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, DbPath,
+ meterRead, pipeClient
};
}
@@ -198,14 +203,16 @@ TUserInfoBase TUsersInfoStorage::CreateUserInfo(const TString& user,
TMaybe<ui64> readRuleGeneration) const
{
return TUserInfoBase{user, readRuleGeneration ? *readRuleGeneration : ++CurReadRuleGeneration,
- "", 0, 0, 0, false, {}};
+ "", 0, 0, 0, false, {}, 0, {}};
}
TUserInfo& TUsersInfoStorage::Create(
- const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session,
- ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, TInstant readFromTimestamp
+ const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session,
+ ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum,
+ TInstant readFromTimestamp, const TActorId& pipeClient
) {
- auto userInfo = CreateUserInfo(ctx, user, readRuleGeneration, important, session, gen, step, offset, readOffsetRewindSum, readFromTimestamp);
+ auto userInfo = CreateUserInfo(ctx, user, readRuleGeneration, important, session, partitionSessionId,
+ gen, step, offset, readOffsetRewindSum, readFromTimestamp, pipeClient);
auto result = UsersInfo.emplace(user, std::move(userInfo));
Y_ABORT_UNLESS(result.second);
return result.first->second;
diff --git a/ydb/core/persqueue/user_info.h b/ydb/core/persqueue/user_info.h
index 3d6319accd..328fdef716 100644
--- a/ydb/core/persqueue/user_info.h
+++ b/ydb/core/persqueue/user_info.h
@@ -6,6 +6,7 @@
#include "quota_tracker.h"
#include "account_read_quoter.h"
#include "metering_sink.h"
+#include "dread_cache_service/caching_service.h"
#include <ydb/core/base/counters.h>
#include <ydb/core/protos/counters_pq.pb.h>
@@ -47,6 +48,9 @@ struct TUserInfoBase {
bool Important = false;
TInstant ReadFromTimestamp;
+
+ ui64 PartitionSessionId = 0;
+ TActorId PipeClient;
};
struct TUserInfo: public TUserInfoBase {
@@ -88,6 +92,8 @@ struct TUserInfo: public TUserInfoBase {
std::shared_ptr<TPercentileCounter> ReadTimeLag;
bool NoConsumer = false;
+
+ bool DoInternalRead = false;
bool MeterRead = true;
bool Parsed = false;
@@ -163,11 +169,12 @@ struct TUserInfo: public TUserInfoBase {
NMonitoring::TDynamicCounterPtr streamCountersSubgroup,
const TString& user,
const ui64 readRuleGeneration, const bool important, const NPersQueue::TTopicConverterPtr& topicConverter,
- const ui32 partition, const TString &session, ui32 gen, ui32 step, i64 offset,
+ const ui32 partition, const TString& session, ui64 partitionSession, ui32 gen, ui32 step, i64 offset,
const ui64 readOffsetRewindSum, const TString& dcId, TInstant readFromTimestamp,
- const TString& dbPath, bool meterRead
+ const TString& dbPath, bool meterRead, const TActorId& pipeClient
)
- : TUserInfoBase{user, readRuleGeneration, session, gen, step, offset, important, readFromTimestamp}
+ : TUserInfoBase{user, readRuleGeneration, session, gen, step, offset, important,
+ readFromTimestamp, partitionSession, pipeClient}
, WriteTimestamp(TAppData::TimeProvider->Now())
, CreateTimestamp(TAppData::TimeProvider->Now())
, ReadTimestamp(TAppData::TimeProvider->Now())
@@ -385,8 +392,9 @@ public:
TUserInfoBase CreateUserInfo(const TString& user,
TMaybe<ui64> readRuleGeneration = {}) const;
TUserInfo& Create(
- const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString &session,
- ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum, TInstant readFromTimestamp
+ const TActorContext& ctx, const TString& user, const ui64 readRuleGeneration, bool important, const TString& session,
+ ui64 partitionSessionId, ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum,
+ TInstant readFromTimestamp, const TActorId& pipeClient
);
void Clear(const TActorContext& ctx);
@@ -400,8 +408,9 @@ private:
const ui64 readRuleGeneration,
bool important,
const TString& session,
+ ui64 partitionSessionId,
ui32 gen, ui32 step, i64 offset, ui64 readOffsetRewindSum,
- TInstant readFromTimestamp) const;
+ TInstant readFromTimestamp, const TActorId& pipeClient) const;
private:
THashMap<TString, TUserInfo> UsersInfo;
diff --git a/ydb/core/persqueue/ut/common/pq_ut_common.cpp b/ydb/core/persqueue/ut/common/pq_ut_common.cpp
index 7e4b911c68..ade9422864 100644
--- a/ydb/core/persqueue/ut/common/pq_ut_common.cpp
+++ b/ydb/core/persqueue/ut/common/pq_ut_common.cpp
@@ -714,7 +714,11 @@ void CmdSetOffset(const ui32 partition, const TString& user, ui64 offset, bool e
}
-void CmdCreateSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc, const i64 offset, const ui32 gen, const ui32 step, bool error) {
+TActorId CmdCreateSession(const TPQCmdSettings& settings, TTestContext& tc) {
+
+ TActorId pipeClient = tc.Runtime->ConnectToPipe(tc.BalancerTabletId, tc.Edge, 0, GetPipeConfigWithRetries());
+ TActorId tabletPipe = tc.Runtime->ConnectToPipe(tc.TabletId, tc.Edge, 0, GetPipeConfigWithRetries());
+
TAutoPtr<IEventHandle> handle;
TEvPersQueue::TEvResponse *result;
THolder<TEvPersQueue::TEvRequest> request;
@@ -723,12 +727,18 @@ void CmdCreateSession(const ui32 partition, const TString& user, const TString&
tc.Runtime->ResetScheduledCount();
request.Reset(new TEvPersQueue::TEvRequest);
auto req = request->Record.MutablePartitionRequest();
- req->SetPartition(partition);
+
+ ActorIdToProto(tabletPipe, req->MutablePipeClient());
+ Cerr << "Set pipe for create session: " << tabletPipe.ToString();
+
+ req->SetPartition(settings.Partition);
auto off = req->MutableCmdCreateSession();
- off->SetClientId(user);
- off->SetSessionId(session);
- off->SetGeneration(gen);
- off->SetStep(step);
+ off->SetClientId(settings.User);
+ off->SetSessionId(settings.Session);
+ off->SetGeneration(settings.Generation);
+ off->SetStep(settings.Step);
+ off->SetPartitionSessionId(settings.PartitionSessionId);
+
tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries());
result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle);
@@ -740,24 +750,25 @@ void CmdCreateSession(const ui32 partition, const TString& user, const TString&
continue;
}
- if (error) {
+ if (settings.ToFail) {
UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::WRONG_COOKIE);
- return;
+ return pipeClient;
}
- UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK);
+ UNIT_ASSERT_EQUAL_C(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK, result->Record.DebugString());
UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdGetClientOffsetResult());
auto resp = result->Record.GetPartitionResponse().GetCmdGetClientOffsetResult();
- UNIT_ASSERT(resp.HasOffset() && (i64)resp.GetOffset() == offset);
+ UNIT_ASSERT(resp.HasOffset() && (i64)resp.GetOffset() == settings.Offset);
retriesLeft = 0;
} catch (NActors::TSchedulingLimitReachedException) {
UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2);
}
}
+ return tabletPipe;
}
-void CmdKillSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc) {
+void CmdKillSession(const ui32 partition, const TString& user, const TString& session, TTestContext& tc, const TActorId& pipe) {
TAutoPtr<IEventHandle> handle;
TEvPersQueue::TEvResponse *result;
THolder<TEvPersQueue::TEvRequest> request;
@@ -770,6 +781,9 @@ void CmdKillSession(const ui32 partition, const TString& user, const TString& se
auto off = req->MutableCmdDeleteSession();
off->SetClientId(user);
off->SetSessionId(session);
+ if (pipe) {
+ ActorIdToProto(pipe, req->MutablePipeClient());
+ }
tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries());
result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle);
@@ -865,83 +879,218 @@ TVector<TString> CmdSourceIdRead(TTestContext& tc) {
return sourceIds;
}
+bool CheckCmdReadResult(const TPQCmdReadSettings& settings, TEvPersQueue::TEvResponse* result) {
+ Y_UNUSED(settings);
+
+ UNIT_ASSERT(result);
+ UNIT_ASSERT(result->Record.HasStatus());
-void CmdRead(const ui32 partition, const ui64 offset, const ui32 count, const ui32 size, const ui32 resCount, bool timeouted, TTestContext& tc, TVector<i32> offsets, const ui32 maxTimeLagMs, const ui64 readTimestampMs, const TString user) {
+ UNIT_ASSERT(result->Record.HasPartitionResponse());
+ UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCookie(), 123);
+ if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) {
+ return false;
+ }
+ if (settings.Timeout) {
+ UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK);
+ UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult());
+ auto res = result->Record.GetPartitionResponse().GetCmdReadResult();
+ UNIT_ASSERT_EQUAL(res.ResultSize(), 0);
+ return true;
+ }
+ if (settings.ToFail) {
+ UNIT_ASSERT_C(result->Record.GetErrorCode() != NPersQueue::NErrorCode::OK, result->Record.DebugString());
+ return true;
+ }
+ UNIT_ASSERT_EQUAL_C(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK, result->Record.DebugString());
+ if (!settings.DirectReadId) {
+ UNIT_ASSERT_C(result->Record.GetPartitionResponse().HasCmdReadResult(), result->Record.GetPartitionResponse().DebugString());
+ auto res = result->Record.GetPartitionResponse().GetCmdReadResult();
+
+ UNIT_ASSERT_EQUAL(res.ResultSize(), settings.ResCount);
+ ui64 off = settings.Offset;
+
+ for (ui32 i = 0; i < settings.ResCount; ++i) {
+ auto r = res.GetResult(i);
+ if (settings.Offsets.empty()) {
+ if (settings.ReadTimestampMs == 0) {
+ UNIT_ASSERT_EQUAL((ui64)r.GetOffset(), off);
+ }
+ UNIT_ASSERT(r.GetSourceId().size() == 9 && r.GetSourceId().StartsWith("sourceid"));
+ UNIT_ASSERT_EQUAL(ui32(r.GetData()[0]), off);
+ UNIT_ASSERT_EQUAL(ui32((unsigned char)r.GetData().back()), r.GetSeqNo() % 256);
+ ++off;
+ } else {
+ UNIT_ASSERT(settings.Offsets[i] == (i64)r.GetOffset());
+ }
+ }
+ } else {
+ UNIT_ASSERT_C(result->Record.GetPartitionResponse().HasCmdPrepareReadResult(), result->Record.GetPartitionResponse().DebugString());
+ auto res = result->Record.GetPartitionResponse().GetCmdPrepareReadResult();
+ UNIT_ASSERT(res.GetBytesSizeEstimate() > 0);
+ UNIT_ASSERT(res.GetEndOffset() > 0);
+ UNIT_ASSERT_VALUES_EQUAL(res.GetDirectReadId(), settings.DirectReadId);
+ }
+ return true;
+}
+
+void CmdRead(
+ const ui32 partition, const ui64 offset, const ui32 count, const ui32 size, const ui32 resCount, bool timeouted,
+ TTestContext& tc, TVector<i32> offsets, const ui32 maxTimeLagMs, const ui64 readTimestampMs, const TString user
+) {
+ return CmdRead(
+ TPQCmdReadSettings("", partition, offset, count, size, resCount, timeouted,
+ offsets, maxTimeLagMs, readTimestampMs, user),
+ tc
+ );
+}
+
+void CmdRead(const TPQCmdReadSettings& settings, TTestContext& tc) {
TAutoPtr<IEventHandle> handle;
TEvPersQueue::TEvResponse *result;
THolder<TEvPersQueue::TEvRequest> request;
- for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) {
+ for (ui32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) {
try {
tc.Runtime->ResetScheduledCount();
request.Reset(new TEvPersQueue::TEvRequest);
auto req = request->Record.MutablePartitionRequest();
- req->SetPartition(partition);
+ req->SetPartition(settings.Partition);
auto read = req->MutableCmdRead();
- read->SetOffset(offset);
- read->SetClientId(user);
- read->SetCount(count);
- read->SetBytes(size);
- if (maxTimeLagMs > 0) {
- read->SetMaxTimeLagMs(maxTimeLagMs);
+ read->SetOffset(settings.Offset);
+ read->SetSessionId(settings.Session);
+ read->SetClientId(settings.User);
+ read->SetCount(settings.Count);
+ read->SetBytes(settings.Size);
+ if (settings.MaxTimeLagMs > 0) {
+ read->SetMaxTimeLagMs(settings.MaxTimeLagMs);
}
- if (readTimestampMs > 0) {
- read->SetReadTimestampMs(readTimestampMs);
+ if (settings.ReadTimestampMs > 0) {
+ read->SetReadTimestampMs(settings.ReadTimestampMs);
}
+ if (settings.DirectReadId > 0) {
+ read->SetDirectReadId(settings.DirectReadId);
+ }
+ if (settings.PartitionSessionId > 0) {
+ read->SetPartitionSessionId(settings.PartitionSessionId);
+ }
+ if (settings.Pipe) {
+ ActorIdToProto(settings.Pipe, req->MutablePipeClient());
+ }
+
req->SetCookie(123);
+
+ Cerr << "Send read request: " << request->Record.DebugString() << " via pipe: " << tc.Edge.ToString() << Endl;
tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries());
result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle);
-
- UNIT_ASSERT(result);
- UNIT_ASSERT(result->Record.HasStatus());
-
- UNIT_ASSERT(result->Record.HasPartitionResponse());
- UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCookie(), 123);
- if (result->Record.GetErrorCode() == NPersQueue::NErrorCode::INITIALIZING) {
+ auto checkRes = CheckCmdReadResult(settings, result);
+ if (!checkRes) {
tc.Runtime->DispatchEvents(); // Dispatch events so that initialization can make progress
retriesLeft = 3;
continue;
- }
- if (timeouted) {
- UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK);
- UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult());
- auto res = result->Record.GetPartitionResponse().GetCmdReadResult();
- UNIT_ASSERT_EQUAL(res.ResultSize(), 0);
+ } else {
break;
}
- UNIT_ASSERT_EQUAL(result->Record.GetErrorCode(), NPersQueue::NErrorCode::OK);
+ } catch (NActors::TSchedulingLimitReachedException) {
+ UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2);
+ }
+ }
+}
+
+template <class TProto>
+void FillDirectReadKey(TProto* proto, const TCmdDirectReadSettings& settings) {
+ proto->SetDirectReadId(settings.DirectReadId);
+ auto* key = proto->MutableSessionKey();
+ key->SetSessionId(settings.Session);
+ key->SetPartitionSessionId(settings.PartitionSessionId);
+}
- UNIT_ASSERT(result->Record.GetPartitionResponse().HasCmdReadResult());
- auto res = result->Record.GetPartitionResponse().GetCmdReadResult();
+template <class TEvent>
+void CheckDirectReadEvent(TEvent* event, const TCmdDirectReadSettings& settings) {
+ UNIT_ASSERT(event->ReadKey.ReadId == settings.DirectReadId);
+ UNIT_ASSERT(event->ReadKey.SessionId == settings.Session);
+ UNIT_ASSERT(event->ReadKey.PartitionSessionId > 0);
+}
- UNIT_ASSERT_EQUAL_C(res.ResultSize(), resCount,
- "Result size missmatch: expected " << resCount << " but received " << res.ResultSize());
- ui64 off = offset;
+void CmdPublishOrForgetRead(const TCmdDirectReadSettings& settings, bool isPublish, TTestContext& tc) {
+ TAutoPtr<IEventHandle> handle;
+ TEvPersQueue::TEvResponse *result;
+ THolder<TEvPersQueue::TEvRequest> request;
+ tc.Runtime->ResetScheduledCount();
+ request.Reset(new TEvPersQueue::TEvRequest);
+ auto req = request->Record.MutablePartitionRequest();
- for (ui32 i = 0; i < resCount; ++i) {
+ ActorIdToProto(settings.Pipe, req->MutablePipeClient());
- auto r = res.GetResult(i);
- if (offsets.empty()) {
- if (readTimestampMs == 0) {
- UNIT_ASSERT_EQUAL((ui64)r.GetOffset(), off);
- }
- UNIT_ASSERT(r.GetSourceId().size() == 9 && r.GetSourceId().StartsWith("sourceid"));
- UNIT_ASSERT_EQUAL(ui32(r.GetData()[0]), off);
- UNIT_ASSERT_EQUAL(ui32((unsigned char)r.GetData().back()), r.GetSeqNo() % 256);
- ++off;
- } else {
- UNIT_ASSERT(offsets[i] == (i64)r.GetOffset());
+ req->SetPartition(settings.Partition);
+ req->SetCookie(123);
+ if (isPublish) {
+ FillDirectReadKey(req->MutableCmdPublishRead(), settings);
+ } else {
+ FillDirectReadKey(req->MutableCmdForgetRead(), settings);
+ }
+
+ TAtomic hasEvent = 0;
+ tc.Runtime->SetObserverFunc(
+ [&](TAutoPtr<IEventHandle>& ev) {
+ if (auto* msg = ev->CastAsLocal<TEvPQ::TEvStageDirectReadData>()) {
+ Cerr << "Got publish event\n";
+ UNIT_ASSERT(isPublish);
+ UNIT_ASSERT(msg->TabletGeneration);
+ //AtomicSet(hasEvent, 1);
+ UNIT_ASSERT(msg->Response != nullptr);
+ } else if (auto* msg = ev->CastAsLocal<TEvPQ::TEvPublishDirectRead>()) {
+ Cerr << "Got publish event\n";
+ UNIT_ASSERT(isPublish);
+ CheckDirectReadEvent(msg, settings);
+ AtomicSet(hasEvent, 1);
+ } else if (auto* msg = ev->CastAsLocal<TEvPQ::TEvForgetDirectRead>()) {
+ UNIT_ASSERT(!isPublish);
+ CheckDirectReadEvent(msg, settings);
+ AtomicSet(hasEvent, 1);
}
+ return TTestActorRuntimeBase::EEventAction::PROCESS;
}
- retriesLeft = 0;
- } catch (NActors::TSchedulingLimitReachedException) {
- UNIT_ASSERT_VALUES_EQUAL(retriesLeft, 2);
- }
+ );
+ Cerr << "Send " << (isPublish? "publish " : "forget ") << "read request: " << req->DebugString() << Endl;
+
+ tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries());
+ result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvResponse>(handle);
+
+ UNIT_ASSERT(result);
+ UNIT_ASSERT(result->Record.HasStatus());
+ Cerr << "Got direct read response: " << result->Record.DebugString() << Endl;
+ if (settings.Fail) {
+ UNIT_ASSERT(result->Record.GetErrorCode() != NPersQueue::NErrorCode::OK);
+ return;
}
+ UNIT_ASSERT_C(result->Record.GetErrorCode() == NPersQueue::NErrorCode::OK, result->Record.DebugString());
+
+ UNIT_ASSERT(result->Record.HasPartitionResponse());
+ UNIT_ASSERT_EQUAL(result->Record.GetPartitionResponse().GetCookie(), 123);
+ if (isPublish) {
+ UNIT_ASSERT_C(result->Record.GetPartitionResponse().HasCmdPublishReadResult(), result->Record.DebugString());
+ } else {
+ UNIT_ASSERT_C(result->Record.GetPartitionResponse().HasCmdForgetReadResult(), result->Record.DebugString());
+ }
+ //tc.Runtime->DispatchEvents();
+ Cerr << "Expect failure: " << settings.Fail << ", event received: " << AtomicGet(hasEvent) << Endl;
+ if (settings.Fail) {
+ UNIT_ASSERT(!AtomicGet(hasEvent));
+ } else {
+ // UNIT_ASSERT(AtomicGet(hasEvent)); // ToDo: !! Fix this - event is send but not cathed for some reason;
+
+ }
+}
+
+void CmdPublishRead(const TCmdDirectReadSettings& settings, TTestContext& tc) {
+ return CmdPublishOrForgetRead(settings, true, tc);
}
+void CmdForgetRead(const TCmdDirectReadSettings& settings, TTestContext& tc) {
+ return CmdPublishOrForgetRead(settings, false, tc);
+}
void FillUserInfo(NKikimrClient::TKeyValueRequest_TCmdWrite* write, const TString& client, ui32 partition, ui64 offset) {
NPQ::TKeyPrefix ikey(NPQ::TKeyPrefix::TypeInfo, partition, NPQ::TKeyPrefix::MarkUser);
diff --git a/ydb/core/persqueue/ut/common/pq_ut_common.h b/ydb/core/persqueue/ut/common/pq_ut_common.h
index 8593c7fd8d..301e836a1f 100644
--- a/ydb/core/persqueue/ut/common/pq_ut_common.h
+++ b/ydb/core/persqueue/ut/common/pq_ut_common.h
@@ -411,6 +411,62 @@ void WritePartDataWithBigMsg(
//
TVector<TString> CmdSourceIdRead(TTestContext& tc);
+struct TPQCmdSettingsBase {
+ ui32 Partition = 0;
+ TString User;
+ TString Session;
+ ui64 PartitionSessionId = 0;
+ i64 Offset = 0;
+ bool ToFail = false;
+};
+
+struct TPQCmdSettings : public TPQCmdSettingsBase {
+ ui32 Generation = 0;
+ ui32 Step = 0;
+ TPQCmdSettings() = default;
+ TPQCmdSettings(ui32 partition, const TString& user, const TString& session, i64 offset = 0, ui32 generation = 0,
+ ui32 step = 0, bool error = false)
+ : TPQCmdSettingsBase{partition, user, session, 0, offset, error}
+ , Generation(generation)
+ , Step(step)
+ {}
+};
+
+struct TPQCmdReadSettings : public TPQCmdSettingsBase {
+ ui32 Count = 0;
+ ui32 Size = 0;
+ ui32 ResCount = 0;
+ bool Timeout = false;
+ TVector<i32> Offsets;
+ ui32 MaxTimeLagMs = 0;
+ ui32 ReadTimestampMs = 0;
+ ui64 DirectReadId = 0;
+ TActorId Pipe;
+ TPQCmdReadSettings() = default;
+ TPQCmdReadSettings(const TString& session, ui32 partition, i64 offset, ui32 count, ui32 size, ui32 resCount, bool timeout = false,
+ TVector<i32> offsets = {}, const ui32 maxTimeLagMs = 0, const ui64 readTimestampMs = 0,
+ const TString user = "user")
+
+ : TPQCmdSettingsBase{partition, user, session, 0, offset, false}
+ , Count(count)
+ , Size(size)
+ , ResCount(resCount)
+ , Timeout(timeout)
+ , Offsets (offsets)
+ , MaxTimeLagMs(maxTimeLagMs)
+ , ReadTimestampMs(readTimestampMs)
+ {}
+};
+
+struct TCmdDirectReadSettings {
+ ui32 Partition;
+ TString Session;
+ ui64 PartitionSessionId;
+ ui64 DirectReadId;
+ TActorId Pipe;
+ bool Fail = false;
+};
+
std::pair<TString, TActorId> CmdSetOwner(
const ui32 partition,
TTestContext& tc,
@@ -425,15 +481,7 @@ std::pair<TString, TActorId> CmdSetOwner(
const TString& owner = "default",
bool force = true);
-void CmdCreateSession(
- const ui32 partition,
- const TString& user,
- const TString& session,
- TTestContext& tc,
- const i64 offset = 0,
- const ui32 gen = 0,
- const ui32 step = 0,
- bool error = false);
+TActorId CmdCreateSession(const TPQCmdSettings& settings, TTestContext& tc);
void CmdGetOffset(
const ui32 partition,
@@ -447,7 +495,8 @@ void CmdKillSession(
const ui32 partition,
const TString& user,
const TString& session,
- TTestContext& tc);
+ TTestContext& tc,
+ const TActorId& pipe = {});
void CmdRead(
const ui32 partition,
@@ -462,6 +511,13 @@ void CmdRead(
const ui64 readTimestampMs = 0,
const TString user = "user");
+void CmdRead(
+ const TPQCmdReadSettings& settings,
+ TTestContext& tc);
+
+void CmdPublishRead(const TCmdDirectReadSettings& settings, TTestContext& tc);
+void CmdForgetRead(const TCmdDirectReadSettings& settings, TTestContext& tc);
+
void CmdReserveBytes(
const ui32 partition,
TTestContext& tc,
diff --git a/ydb/core/persqueue/ut/partition_ut.cpp b/ydb/core/persqueue/ut/partition_ut.cpp
index 2f0a9b584f..2ea5da2d6a 100644
--- a/ydb/core/persqueue/ut/partition_ut.cpp
+++ b/ydb/core/persqueue/ut/partition_ut.cpp
@@ -265,6 +265,7 @@ void TPartitionFixture::CreatePartitionActor(ui32 id,
auto actor = new NPQ::TPartition(Ctx->TabletId,
id,
Ctx->Edge,
+ 0,
Ctx->Edge,
TopicConverter,
"dcId",
@@ -339,8 +340,10 @@ void TPartitionFixture::SendCreateSession(ui64 cookie,
clientId,
0,
sessionId,
+ 0,
generation,
step,
+ TActorId{},
TEvPQ::TEvSetClientInfo::ESCI_CREATE_SESSION);
Ctx->Runtime->SingleSys()->Send(new IEventHandle(ActorId, Ctx->Edge, event.Release()));
}
@@ -355,7 +358,9 @@ void TPartitionFixture::SendSetOffset(ui64 cookie,
offset,
sessionId,
0,
- 0);
+ 0,
+ 0,
+ TActorId{});
Ctx->Runtime->SingleSys()->Send(new IEventHandle(ActorId, Ctx->Edge, event.Release()));
}
@@ -523,19 +528,19 @@ void TPartitionFixture::WaitProxyResponse(const TProxyResponseMatcher& matcher)
}
if (matcher.Status) {
- UNIT_ASSERT(event->Response.HasStatus());
- UNIT_ASSERT(*matcher.Status == event->Response.GetStatus());
+ UNIT_ASSERT(event->Response->HasStatus());
+ UNIT_ASSERT(*matcher.Status == event->Response->GetStatus());
}
if (matcher.ErrorCode) {
- UNIT_ASSERT(event->Response.HasErrorCode());
- UNIT_ASSERT(*matcher.ErrorCode == event->Response.GetErrorCode());
+ UNIT_ASSERT(event->Response->HasErrorCode());
+ UNIT_ASSERT(*matcher.ErrorCode == event->Response->GetErrorCode());
}
if (matcher.Offset) {
- UNIT_ASSERT(event->Response.HasPartitionResponse());
- UNIT_ASSERT(event->Response.GetPartitionResponse().HasCmdGetClientOffsetResult());
- UNIT_ASSERT_VALUES_EQUAL(*matcher.Offset, event->Response.GetPartitionResponse().GetCmdGetClientOffsetResult().GetOffset());
+ UNIT_ASSERT(event->Response->HasPartitionResponse());
+ UNIT_ASSERT(event->Response->GetPartitionResponse().HasCmdGetClientOffsetResult());
+ UNIT_ASSERT_VALUES_EQUAL(*matcher.Offset, event->Response->GetPartitionResponse().GetCmdGetClientOffsetResult().GetOffset());
}
}
@@ -1342,7 +1347,7 @@ Y_UNIT_TEST_F(ReserveSubDomainOutOfSpace, TPartitionFixture)
SendChangeOwner(cookie, "owner1", Ctx->Edge);
auto ownerEvent = Ctx->Runtime->GrabEdgeEvent<TEvPQ::TEvProxyResponse>(TDuration::Seconds(1));
UNIT_ASSERT(ownerEvent != nullptr);
- auto ownerCookie = ownerEvent->Response.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie();
+ auto ownerCookie = ownerEvent->Response->GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie();
TAutoPtr<IEventHandle> handle;
std::function<bool(const TEvPQ::TEvProxyResponse&)> truth = [&](const TEvPQ::TEvProxyResponse& e) { return cookie == e.Cookie; };
@@ -1387,7 +1392,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace, TPartitionFixture)
SendChangeOwner(cookie, "owner1", Ctx->Edge, true);
auto ownerEvent = Ctx->Runtime->GrabEdgeEvent<TEvPQ::TEvProxyResponse>(TDuration::Seconds(1));
UNIT_ASSERT(ownerEvent != nullptr);
- auto ownerCookie = ownerEvent->Response.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie();
+ auto ownerCookie = ownerEvent->Response->GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie();
TAutoPtr<IEventHandle> handle;
std::function<bool(const TEvPQ::TEvError&)> truth = [&](const TEvPQ::TEvError& e) { return cookie == e.Cookie; };
@@ -1437,7 +1442,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_DisableExpiration, TPartitionFixture)
SendChangeOwner(cookie, "owner1", Ctx->Edge, true);
auto ownerEvent = Ctx->Runtime->GrabEdgeEvent<TEvPQ::TEvProxyResponse>(TDuration::Seconds(1));
UNIT_ASSERT(ownerEvent != nullptr);
- auto ownerCookie = ownerEvent->Response.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie();
+ auto ownerCookie = ownerEvent->Response->GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie();
TAutoPtr<IEventHandle> handle;
std::function<bool(const TEvPQ::TEvProxyResponse&)> truth = [&](const TEvPQ::TEvProxyResponse& e) { return cookie == e.Cookie; };
@@ -1464,7 +1469,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_DisableExpiration, TPartitionFixture)
event = Ctx->Runtime->GrabEdgeEventIf<TEvPQ::TEvProxyResponse>(handle, truth, TDuration::Seconds(1));
UNIT_ASSERT(event != nullptr);
- UNIT_ASSERT_EQUAL(NMsgBusProxy::MSTATUS_OK, event->Response.GetStatus());
+ UNIT_ASSERT_EQUAL(NMsgBusProxy::MSTATUS_OK, event->Response->GetStatus());
}
Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_IgnoreQuotaDeadline, TPartitionFixture)
@@ -1493,7 +1498,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_IgnoreQuotaDeadline, TPartitionFixture)
SendChangeOwner(cookie, "owner1", Ctx->Edge, true);
auto ownerEvent = Ctx->Runtime->GrabEdgeEvent<TEvPQ::TEvProxyResponse>(TDuration::Seconds(1));
UNIT_ASSERT(ownerEvent != nullptr);
- auto ownerCookie = ownerEvent->Response.GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie();
+ auto ownerCookie = ownerEvent->Response->GetPartitionResponse().GetCmdGetOwnershipResult().GetOwnerCookie();
TAutoPtr<IEventHandle> handle;
std::function<bool(const TEvPQ::TEvProxyResponse&)> truth = [&](const TEvPQ::TEvProxyResponse& e) { return cookie == e.Cookie; };
@@ -1520,7 +1525,7 @@ Y_UNIT_TEST_F(WriteSubDomainOutOfSpace_IgnoreQuotaDeadline, TPartitionFixture)
event = Ctx->Runtime->GrabEdgeEventIf<TEvPQ::TEvProxyResponse>(handle, truth, TDuration::Seconds(1));
UNIT_ASSERT(event != nullptr);
- UNIT_ASSERT_EQUAL(NMsgBusProxy::MSTATUS_OK, event->Response.GetStatus());
+ UNIT_ASSERT_EQUAL(NMsgBusProxy::MSTATUS_OK, event->Response->GetStatus());
}
}
diff --git a/ydb/core/persqueue/ut/pq_ut.cpp b/ydb/core/persqueue/ut/pq_ut.cpp
index 8c1d44844b..2c89b9f95c 100644
--- a/ydb/core/persqueue/ut/pq_ut.cpp
+++ b/ydb/core/persqueue/ut/pq_ut.cpp
@@ -20,6 +20,135 @@ const static TString TOPIC_NAME = "rt3.dc1--topic";
Y_UNIT_TEST_SUITE(TPQTest) {
+Y_UNIT_TEST(TestDirectReadHappyWay) {
+ TTestContext tc;
+ RunTestWithReboots(tc.TabletIds, [&]() {
+ return tc.InitialEventsFilter.Prepare();
+ }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) {
+ activeZone = false;
+ TFinalizer finalizer(tc);
+ tc.Prepare(dispatchName, setup, activeZone);
+ tc.Runtime->SetScheduledLimit(1000);
+ tc.Runtime->RegisterService(MakePQDReadCacheServiceActorId(), tc.Runtime->Register(
+ CreatePQDReadCacheService(new NMonitoring::TDynamicCounters()))
+ );
+
+ PQTabletPrepare({.partitions = 1, .writeSpeed = 100_KB}, {{"user1", true}}, tc);
+ TVector<std::pair<ui64, TString>> data;
+ TString s{2_MB, 'c'};
+ data.push_back({1, s});
+ CmdWrite(0, "sourceid0", data, tc, false, {}, false, "", -1, 0, false, false, true);
+ TString sessionId = "session1";
+ TString user = "user1";
+ TPQCmdSettings sessionSettings{0, user, sessionId};
+ sessionSettings.PartitionSessionId = 1;
+
+ TPQCmdReadSettings readSettings{sessionId, 0, 0, 1, 99999, 1};
+ readSettings.PartitionSessionId = 1;
+ readSettings.DirectReadId = 1;
+ readSettings.User = user;
+
+ activeZone = false;
+ auto pipe = CmdCreateSession(sessionSettings, tc);
+ TCmdDirectReadSettings publishSettings{0, sessionId, 1, 1, pipe, false};
+ readSettings.Pipe = pipe;
+ CmdRead(readSettings, tc);
+ Cerr << "Run cmd publish\n";
+ CmdPublishRead(publishSettings, tc);
+ Cerr << "Run cmd forget\n";
+ CmdForgetRead(publishSettings, tc);
+ });
+}
+
+Y_UNIT_TEST(DirectReadBadSessionOrPipe) {
+ TTestContext tc;
+ RunTestWithReboots(tc.TabletIds, [&]() {
+ return tc.InitialEventsFilter.Prepare();
+ }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) {
+ TFinalizer finalizer(tc);
+ tc.Prepare(dispatchName, setup, activeZone);
+ activeZone = false;
+ tc.Runtime->SetScheduledLimit(1000);
+
+ PQTabletPrepare({.partitions = 1, .writeSpeed = 100_KB}, {{"user1", true}}, tc);
+ TVector<std::pair<ui64, TString>> data;
+ TString s{2_MB, 'c'};
+ data.push_back({1, s});
+ CmdWrite(0, "sourceid2", data, tc, false, {}, false, "", -1, 0, false, false, true);
+ TString sessionId = "session2";
+ TString user = "user2";
+ TPQCmdSettings sessionSettings{0, user, sessionId};
+ sessionSettings.PartitionSessionId = 1;
+
+ TPQCmdReadSettings readSettings(sessionId, 0, 0, 1, 99999, 1);
+ readSettings.PartitionSessionId = 1;
+ readSettings.DirectReadId = 1;
+ readSettings.User = user;
+ activeZone = false;
+
+ readSettings.ToFail = true;
+ //No pipe
+ CmdRead(readSettings, tc);
+ auto pipe = CmdCreateSession(sessionSettings, tc);
+ readSettings.Pipe = pipe;
+ readSettings.Session = "";
+ // No session
+ CmdRead(readSettings, tc);
+ readSettings.Session = "bad-session";
+ // Bad session
+ CmdRead(readSettings, tc);
+ activeZone = false;
+ readSettings.Session = sessionId;
+ CmdKillSession(0, user, sessionId,tc, pipe);
+ activeZone = false;
+ // Dead session
+ CmdRead(readSettings, tc);
+
+ activeZone = false;
+ TCmdDirectReadSettings publishSettings{0, sessionId, 1, 1, pipe, true};
+ readSettings.Pipe = pipe;
+ activeZone = false;
+ // Dead session
+ Cerr << "Publish read\n";
+ CmdPublishRead(publishSettings, tc);
+ Cerr << "Forget read\n";
+ CmdForgetRead(publishSettings, tc);
+ });
+}
+Y_UNIT_TEST(DirectReadOldPipe) {
+ TTestContext tc;
+ RunTestWithReboots(tc.TabletIds, [&]() {
+ return tc.InitialEventsFilter.Prepare();
+ }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) {
+ TFinalizer finalizer(tc);
+ tc.Prepare(dispatchName, setup, activeZone);
+ activeZone = false;
+ tc.Runtime->SetScheduledLimit(1000);
+
+ PQTabletPrepare({.partitions = 1, .writeSpeed = 100_KB}, {{"user1", true}}, tc);
+ TString sessionId = "session2";
+ TString user = "user2";
+ TPQCmdSettings sessionSettings{0, user, sessionId};
+ sessionSettings.PartitionSessionId = 1;
+
+ TPQCmdReadSettings readSettings(sessionId, 0, 0, 1, 99999, 1);
+ readSettings.PartitionSessionId = 1;
+ readSettings.DirectReadId = 1;
+ readSettings.ToFail = true;
+ activeZone = false;
+
+ auto pipe = CmdCreateSession(sessionSettings, tc);
+
+ auto event = MakeHolder<TEvTabletPipe::TEvServerDisconnected>(0, pipe, TActorId{});
+ tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, event.Release(), 0, GetPipeConfigWithRetries());
+ readSettings.Pipe = pipe;
+
+ CmdRead(readSettings, tc);
+ });
+}
+
+
+
Y_UNIT_TEST(TestPartitionTotalQuota) {
TTestContext tc;
RunTestWithReboots(tc.TabletIds, [&]() {
@@ -1698,7 +1827,7 @@ Y_UNIT_TEST(TestReadSessions) {
activeZone = true;
TVector<std::pair<ui64, TString>> data;
- CmdCreateSession(0, "user1", "session1", tc);
+ CmdCreateSession(TPQCmdSettings{0, "user1", "session1"}, tc);
CmdSetOffset(0, "user1", 0, false, tc, "session1"); //all ok - session is set
CmdSetOffset(0, "user1", 0, true, tc, "other_session"); //fails - session1 is active
@@ -1706,10 +1835,10 @@ Y_UNIT_TEST(TestReadSessions) {
CmdSetOffset(0, "user1", 0, false, tc, "session1");
- CmdCreateSession(0, "user1", "session2", tc, 0, 1, 1);
- CmdCreateSession(0, "user1", "session3", tc, 0, 1, 1, true); //error on creation
- CmdCreateSession(0, "user1", "session3", tc, 0, 0, 2, true); //error on creation
- CmdCreateSession(0, "user1", "session3", tc, 0, 0, 0, true); //error on creation
+ CmdCreateSession(TPQCmdSettings{0, "user1", "session2", 0, 1, 1}, tc);
+ CmdCreateSession(TPQCmdSettings{0, "user1", "session3", 0, 1, 1, true}, tc); //error on creation
+ CmdCreateSession(TPQCmdSettings{0, "user1", "session3", 0, 0, 2, true}, tc); //error on creation
+ CmdCreateSession(TPQCmdSettings{0, "user1", "session3", 0, 0, 0, true}, tc); //error on creation
CmdSetOffset(0, "user1", 0, true, tc, "session1");
CmdSetOffset(0, "user1", 0, true, tc, "session3");
CmdSetOffset(0, "user1", 0, false, tc, "session2");
@@ -1827,7 +1956,7 @@ Y_UNIT_TEST(TestReadSubscription) {
TVector<std::pair<ui64, TString>> data;
- ui32 pp = 8 + 4 + 2 + 9;
+ ui32 pp = 8 + 4 + 2 + 9;
TString tmp0{32 - pp - 2, '-'};
char k = 0;
for (ui32 i = 0; i < 5; ++i) {
diff --git a/ydb/core/persqueue/ya.make b/ydb/core/persqueue/ya.make
index 28086333e4..8000bbe68a 100644
--- a/ydb/core/persqueue/ya.make
+++ b/ydb/core/persqueue/ya.make
@@ -36,6 +36,7 @@ SRCS(
utils.cpp
write_meta.cpp
microseconds_sliding_window.cpp
+ dread_cache_service/caching_service.cpp
)
GENERATE_ENUM_SERIALIZATION(sourceid.h)
@@ -66,5 +67,6 @@ END()
RECURSE_FOR_TESTS(
ut
+ dread_cache_service/ut
ut/slow
)
diff --git a/ydb/core/protos/counters_pq.proto b/ydb/core/protos/counters_pq.proto
index 01cbbeaba2..c381e0f1eb 100644
--- a/ydb/core/protos/counters_pq.proto
+++ b/ydb/core/protos/counters_pq.proto
@@ -137,6 +137,8 @@ enum EPercentileCounters {
COUNTER_LATENCY_PQ_REGISTER_MESSAGE_GROUP = 17 [(CounterOpts) = {Name: "LatencyRegisterMessageGroup"}];
COUNTER_LATENCY_PQ_DEREGISTER_MESSAGE_GROUP = 18 [(CounterOpts) = {Name: "LatencyDeregisterMessageGroup"}];
COUNTER_LATENCY_PQ_SPLIT_MESSAGE_GROUP = 19 [(CounterOpts) = {Name: "LatencySplitMessageGroup"}];
+ COUNTER_LATENCY_PQ_PUBLISH_READ = 20 [(CounterOpts) = {Name: "LatencyPublishRead"}];
+ COUNTER_LATENCY_PQ_FORGET_READ = 21 [(CounterOpts) = {Name: "LatencyForgetRead"}];
}
diff --git a/ydb/core/protos/msgbus_pq.proto b/ydb/core/protos/msgbus_pq.proto
index 6d93226f3d..8ab13eff77 100644
--- a/ydb/core/protos/msgbus_pq.proto
+++ b/ydb/core/protos/msgbus_pq.proto
@@ -6,6 +6,12 @@ package NKikimrClient;
option java_package = "ru.yandex.kikimr.proto";
+message TReadSessionKey {
+ optional string SessionId = 1;
+ optional uint64 PartitionSessionId = 2;
+}
+
+
message TPersQueuePartitionRequest {
message TCmdRead {
optional string ClientId = 1; // mandatory
@@ -26,6 +32,20 @@ message TPersQueuePartitionRequest {
optional string ExplicitHash = 13;
optional bool ExternalOperation = 14 [default = false];
+ optional uint64 DirectReadId = 15;
+ optional uint64 PartitionSessionId = 16;
+ optional int64 LastOffset = 17;
+
+ }
+
+ message TCmdPublishDirectRead {
+ optional TReadSessionKey SessionKey = 1;
+ optional uint64 DirectReadId = 2;
+ }
+
+ message TCmdForgetDirectRead {
+ optional TReadSessionKey SessionKey = 1;
+ optional uint64 DirectReadId = 2;
}
message TCmdCreateSession {
@@ -33,6 +53,7 @@ message TPersQueuePartitionRequest {
optional string SessionId = 2;
optional uint64 Generation = 3;
optional uint64 Step = 4;
+ optional uint64 PartitionSessionId = 5;
}
message TCmdDeleteSession {
@@ -129,6 +150,8 @@ message TPersQueuePartitionRequest {
optional TCmdDeleteSession CmdDeleteSession = 6;
optional TCmdCreateSession CmdCreateSession = 7;
optional TCmdRead CmdRead = 8;
+ optional TCmdPublishDirectRead CmdPublishRead = 24;
+ optional TCmdForgetDirectRead CmdForgetRead = 25;
optional TCmdSetClientOffset CmdSetClientOffset = 9;
optional TCmdGetClientOffset CmdGetClientOffset = 10;
optional TCmdGetOwnership CmdGetOwnership = 11;
@@ -387,6 +410,9 @@ message TCmdReadResult {
optional uint64 RealReadOffset = 10;
optional uint64 WaitQuotaTimeMs = 11;
optional uint64 ReadFromTimestampMs = 12;
+ optional uint64 SizeEstimate = 13;
+ optional int64 LastOffset = 14;
+ optional uint64 EndOffset = 15;
}
@@ -444,11 +470,30 @@ message TPersQueuePartitionResponse {
optional string OwnerCookie = 1;
}
+ message TCmdPrepareDirectReadResult {
+ optional uint32 BytesSizeEstimate = 1;
+ optional uint64 ReadOffset = 2;
+ optional uint64 LastOffset = 3;
+
+ optional uint64 EndOffset = 4;
+ optional uint64 SizeLag = 6;
+ optional uint64 WriteTimestampMS = 7;
+
+ optional uint64 DirectReadId = 5;
+ }
+
+ message TCmdPublishDirectReadResult {
+ }
+
+
repeated TCmdWriteResult CmdWriteResult = 1;
optional TCmdGetMaxSeqNoResult CmdGetMaxSeqNoResult = 2;
optional TCmdReadResult CmdReadResult = 3;
optional TCmdGetClientOffsetResult CmdGetClientOffsetResult = 4;
optional TCmdGetOwnershipResult CmdGetOwnershipResult = 5;
+ optional TCmdPrepareDirectReadResult CmdPrepareReadResult = 7;
+ optional TCmdPublishDirectReadResult CmdPublishReadResult = 8;
+ optional TCmdPublishDirectReadResult CmdForgetReadResult = 9;
optional uint64 Cookie = 6;
}
diff --git a/ydb/core/protos/pqconfig.proto b/ydb/core/protos/pqconfig.proto
index bc0af912d5..6abc3855e4 100644
--- a/ydb/core/protos/pqconfig.proto
+++ b/ydb/core/protos/pqconfig.proto
@@ -809,6 +809,8 @@ message TUserInfo {
optional string Session = 4;
optional uint64 OffsetRewindSum = 5;
optional uint64 ReadRuleGeneration = 6;
+ optional uint64 PartitionSessionId = 7;
+
}
message TPartitionClientInfo {
diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp
index 517a65ecb6..259d5cf6d9 100644
--- a/ydb/core/testlib/test_client.cpp
+++ b/ydb/core/testlib/test_client.cpp
@@ -99,6 +99,7 @@
#include <ydb/core/keyvalue/keyvalue.h>
#include <ydb/core/persqueue/pq.h>
#include <ydb/core/persqueue/cluster_tracker.h>
+#include <ydb/core/persqueue/dread_cache_service/caching_service.h>
#include <ydb/library/security/ydb_credentials_provider_factory.h>
#include <ydb/core/fq/libs/init/init.h>
#include <ydb/core/fq/libs/mock/yql_mock.h>
@@ -961,6 +962,11 @@ namespace Tests {
TActorId pqClusterTrackerId = Runtime->Register(pqClusterTracker, nodeIdx);
Runtime->RegisterService(NPQ::NClusterTracker::MakeClusterTrackerID(), pqClusterTrackerId, nodeIdx);
}
+ {
+ IActor* pqReadCacheService = NPQ::CreatePQDReadCacheService(Runtime->GetDynamicCounters());
+ TActorId readCacheId = Runtime->Register(pqReadCacheService, nodeIdx);
+ Runtime->RegisterService(NPQ::MakePQDReadCacheServiceActorId(), readCacheId, nodeIdx);
+ }
{
if (Settings->PQConfig.GetEnabled() == true) {
diff --git a/ydb/core/testlib/test_pq_client.h b/ydb/core/testlib/test_pq_client.h
index 7448bfd6ca..717d653260 100644
--- a/ydb/core/testlib/test_pq_client.h
+++ b/ydb/core/testlib/test_pq_client.h
@@ -988,17 +988,31 @@ public:
THolder<NMsgBusProxy::TBusPersQueue> alterRequest = requestDescr.GetRequest();
ui32 prevVersion = GetTopicVersionFromMetadata(name);
+ while (prevVersion == 0) {
+ Sleep(TDuration::MilliSeconds(500));
+ prevVersion = GetTopicVersionFromMetadata(name);
+ }
CallPersQueueGRPC(alterRequest->Record);
+ Cerr << "Alter got " << prevVersion << "\n";
const TInstant start = TInstant::Now();
AlterTopic();
- while (GetTopicVersionFromMetadata(name, cacheSize) != prevVersion + 1) {
+ auto ver = GetTopicVersionFromMetadata(name, cacheSize);
+ while (ver != prevVersion + 1) {
+ Cerr << "Alter1 got " << ver << "\n";
+
Sleep(TDuration::MilliSeconds(500));
+ ver = GetTopicVersionFromMetadata(name, cacheSize);
UNIT_ASSERT(TInstant::Now() - start < ::DEFAULT_DISPATCH_TIMEOUT);
}
- while (GetTopicVersionFromPath(name) != prevVersion + 1) {
+ auto ver2 = GetTopicVersionFromPath(name);
+ while (ver2 != prevVersion + 1) {
+ Cerr << "Alter2 got " << ver << "\n";
+
Sleep(TDuration::MilliSeconds(500));
+ ver2 = GetTopicVersionFromPath(name);
+
UNIT_ASSERT(TInstant::Now() - start < ::DEFAULT_DISPATCH_TIMEOUT);
}
@@ -1412,6 +1426,8 @@ public:
auto settings = NYdb::NPersQueue::TCreateTopicSettings().PartitionsCount(params.PartsCount).ClientWriteDisabled(!params.CanWrite);
settings.FederationAccount(params.Account);
settings.SupportedCodecs(params.Codecs);
+ //settings.MaxPartitionWriteSpeed(50_MB);
+ //settings.MaxPartitionWriteBurst(50_MB);
TVector<NYdb::NPersQueue::TReadRuleSettings> rrSettings;
for (auto &user : params.ReadRules) {
rrSettings.push_back({NYdb::NPersQueue::TReadRuleSettings{}.ConsumerName(user)});
diff --git a/ydb/library/persqueue/topic_parser/counters.h b/ydb/library/persqueue/topic_parser/counters.h
index 8367e10f82..d69b247d68 100644
--- a/ydb/library/persqueue/topic_parser/counters.h
+++ b/ydb/library/persqueue/topic_parser/counters.h
@@ -12,7 +12,6 @@ TVector<NPersQueue::TPQLabelsInfo> GetLabelsForCustomCluster(const TTopicConvert
TVector<std::pair<TString, TString>> GetSubgroupsForTopic(const TTopicConverterPtr& topic, const TString& cloudId,
const TString& dbId, const TString& dbPath,
const TString& folderId);
-
::NMonitoring::TDynamicCounterPtr GetCounters(::NMonitoring::TDynamicCounterPtr counters,
const TString& subsystem,
const TTopicConverterPtr& topic);
diff --git a/ydb/public/api/grpc/ydb_topic_v1.proto b/ydb/public/api/grpc/ydb_topic_v1.proto
index 4a59c7fb88..1650f60b40 100644
--- a/ydb/public/api/grpc/ydb_topic_v1.proto
+++ b/ydb/public/api/grpc/ydb_topic_v1.proto
@@ -53,9 +53,9 @@ service TopicService {
// StopPartitionSessionResponse(PartitionSessionID1, ...)
// only after this response server will give this parittion to other session.
// ---------------->
- // StartPartitionSessionResponse(PartitionSession2, ...)
+ // StartPartitionSessionResponse(PartitionSessionID2, ...)
// ---------------->
- // ReadResponse(data, ...)
+ // ReadResponse(MessageData, ...)
// <----------------
// CommitRequest(PartitionCommit1, ...)
// ---------------->
@@ -63,8 +63,51 @@ service TopicService {
// <----------------
// [something went wrong] (status != SUCCESS, issues not empty)
// <----------------
+ //
+ // Pipeline for direct reading is similar, but the client receives data directly from the partition node
+ // The following is an excerpt from the pipeline for direct reading:
+ // client server
+ // InitRequest(Topics, ClientID, ..., direct_read = true)
+ // ---------------->
+ // InitResponse(SessionID1)
+ // <----------------
+ // ReadRequest
+ // ---------------->
+ // StartPartitionSessionRequest(Topic1, Partition1, PartitionSessionID1, PartitionLocation1, Secret1,...)
+ // <----------------
+ // StartPartitionSessionResponse(PartitionSessionID1, ...)
+ // ---------------->
+ //
+ // Start data session to the partition node using StreamDirectRead.StartDirectReadSession
+ //
+ // Get data from the partition node using StreamDirectRead.DirectReadResponse
+ //
+ // DirectReadAck(PartitionSessionID1, DirectReadID1, ...)
+ // ---------------->
+ // ReadRequest
+ // ---------------->
+ //
+ // Get data the partition node using StreamDirectRead.DirectReadResponse
+ //
+ // DirectReadAckRequest(PartitionSessionID1,DirectReadID2, ...)
+ // ---------------->
rpc StreamRead(stream StreamReadMessage.FromClient) returns (stream StreamReadMessage.FromServer);
+ // Create DirectRead Session
+ // Pipeline:
+ // client server
+ // StartDirectReadPartitionSession(SessionID1, PartitionSessionID1, TabletGeneration1, Secret1,...)
+ // ---------------->
+ // DirectReadResponse(PartitionSessionID1, DirectReadID1, Secret1, ...)
+ // <----------------
+ // DirectReadResponse(PartitionSessionID1, DirectReadID2, Secret1, ...)
+ // <----------------
+ // UpdateDirectReadPartitionSession(SessionID1, PartitionSessionID1, TabletGeneration1, Secret1,...)
+ // ---------------->
+ // StopDirectReadPartitionSession(SessionID1, PartitionSessionID1, TabletGeneration1, Secret1,...)
+ // <----------------
+ rpc StreamDirectRead(stream StreamDirectReadMessage.FromClient) returns (stream StreamDirectReadMessage.FromServer);
+
// Single commit offset request.
rpc CommitOffset(CommitOffsetRequest) returns (CommitOffsetResponse);
diff --git a/ydb/public/api/protos/ydb_topic.proto b/ydb/public/api/protos/ydb_topic.proto
index c4c0606055..8b5c0aac9c 100644
--- a/ydb/public/api/protos/ydb_topic.proto
+++ b/ydb/public/api/protos/ydb_topic.proto
@@ -125,7 +125,7 @@ message StreamWriteMessage {
}
- // Response for handshake.
+ // Response to the handshake.
message InitResponse {
// Last persisted message's sequence number for this producer.
// Zero for new producer.
@@ -234,13 +234,12 @@ message StreamWriteMessage {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// StreamRead
-
// Messages for bidirectional streaming rpc StreamRead
message StreamReadMessage {
// Within a StreamRead session delivered messages are separated by partition.
// Reads from a single partition are represented by a partition session.
message PartitionSession {
- // Identitifier of partition session. Unique inside one RPC call.
+ // Identifier of partition session. Unique inside one RPC call.
int64 partition_session_id = 1;
// Topic path of partition.
string path = 2;
@@ -254,6 +253,7 @@ message StreamReadMessage {
// CommitOffsetRequest - request for commit of some read data.
// PartitionSessionStatusRequest - request for session status
// UpdateTokenRequest - request to update auth token
+ // DirectReadAck - client signals it has finished direct reading from the partition node.
//
// StartPartitionSessionResponse - Response to StreamReadServerMessage.StartPartitionSessionRequest.
// Client signals it is ready to get data from partition.
@@ -267,10 +267,12 @@ message StreamReadMessage {
CommitOffsetRequest commit_offset_request = 3;
PartitionSessionStatusRequest partition_session_status_request = 4;
UpdateTokenRequest update_token_request = 5;
+ DirectReadAck direct_read_ack = 8;
// Responses to respective server commands.
StartPartitionSessionResponse start_partition_session_response = 6;
StopPartitionSessionResponse stop_partition_session_response = 7;
+
}
}
@@ -283,6 +285,7 @@ message StreamReadMessage {
//
// StartPartitionSessionRequest - command from server to create a partition session.
// StopPartitionSessionRequest - command from server to destroy a partition session.
+ // UpdatePartitionSession - command from server to update a partition session.
message FromServer {
// Server status of response.
Ydb.StatusIds.StatusCode status = 1;
@@ -301,6 +304,8 @@ message StreamReadMessage {
// Server commands.
StartPartitionSessionRequest start_partition_session_request = 8;
StopPartitionSessionRequest stop_partition_session_request = 9;
+
+ UpdatePartitionSession update_partition_session = 10;
}
}
@@ -313,6 +318,8 @@ message StreamReadMessage {
string consumer = 2;
// Optional name. Will be shown in debug stat.
string reader_name = 3;
+ // Direct reading from a partition node.
+ bool direct_read = 4;
message TopicReadSettings {
// Topic path.
@@ -331,7 +338,7 @@ message StreamReadMessage {
// Handshake response.
message InitResponse {
- // Read session identifier for debug purposes.
+ // Read session identifier.
string session_id = 1;
}
@@ -355,7 +362,7 @@ message StreamReadMessage {
// 4) Server is free to send up to 50 + 100 = 150 bytes. But the next read message is too big,
// and it sends 160 bytes ReadResponse.
// 5) Let's assume client somehow processes it, and its 200 bytes buffer is free again.
- // It shoud account for excess 10 bytes and send ReadRequest with bytes_size = 210.
+ // It should account for excess 10 bytes and send ReadRequest with bytes_size = 210.
int64 bytes_size = 1;
}
@@ -364,7 +371,7 @@ message StreamReadMessage {
// One client message representation.
message MessageData {
// Partition offset in partition that assigned for message.
- int64 offset = 1; //unique value for clientside deduplication - Topic:Partition:Offset
+ int64 offset = 1; //unique value for client side deduplication - Topic:Partition:Offset
// Sequence number that provided with message on write from client.
int64 seq_no = 2;
// Timestamp of creation of message provided on write from client.
@@ -379,7 +386,6 @@ message StreamReadMessage {
// Filled if message_group_id was set on message write.
string message_group_id = 7 [(Ydb.length).le = 2048];
repeated MetadataItem metadata_items = 8;
-
}
// Representation of sequence of client messages from one write session.
@@ -448,7 +454,7 @@ message StreamReadMessage {
int64 partition_session_id = 1;
}
- // Response for status request.
+ // Response to status request.
message PartitionSessionStatusResponse {
// Identifier of partition session whose status was requested.
int64 partition_session_id = 1;
@@ -474,6 +480,9 @@ message StreamReadMessage {
// Partition contains messages with offsets in range [start, end).
OffsetsRange partition_offsets = 3;
+
+ // Partition location, filled only when InitRequest.direct_read is true.
+ PartitionLocation partition_location = 4;
}
// Signal for server that cient is ready to recive data for partition.
@@ -490,6 +499,7 @@ message StreamReadMessage {
// Server will return data starting from offset that is maximum of actual committed offset, read_offset (if set)
// and offsets calculated from InitRequest.max_lag and InitRequest.read_from.
optional int64 read_offset = 2;
+
// All messages with offset less than commit_offset are processed by client.
// Server will commit this position if this is not done yet.
optional int64 commit_offset = 3;
@@ -510,6 +520,9 @@ message StreamReadMessage {
// Upper bound for committed offsets.
int64 committed_offset = 3;
+
+ // Upper bound for read request identifiers, filled only when InitRequest.direct_read is true and graceful is true.
+ int64 last_direct_read_id = 4;
}
// Signal for server that client finished working with this partition.
@@ -518,7 +531,124 @@ message StreamReadMessage {
message StopPartitionSessionResponse {
// Partition session identifier of partition session that is released by client.
int64 partition_session_id = 1;
+
+ // Flag of graceful stop, used only when InitRequest.direct_read is true
+ // Client must pass this value unchanged from the StopPartitionSessionRequest.
+ // Server can sent two StopPartitionSessionRequests, the first with graceful=true, the second with graceful=false. The client must answer both of them.
+ bool graceful = 2;
+ }
+
+ // Command from server to notify about a partition session update.
+ // Client should not send a response to the command.
+ message UpdatePartitionSession {
+ // Partition session identifier.
+ int64 partition_session_id = 1;
+
+ // Partition location, filled only when InitRequest.direct_read is true.
+ PartitionLocation partition_location = 2;
+ }
+
+ // Signal for server that client has finished direct reading.
+ // Server should not send a response to the command.
+ message DirectReadAck {
+ // Partition session identifier.
+ int64 partition_session_id = 1;
+
+ // Identifier of the successfully completed read request.
+ int64 direct_read_id = 2;
+ }
+
+}
+
+// Messages for bidirectional streaming rpc StreamDirectRead
+message StreamDirectReadMessage {
+
+ // Client-server message for direct read session.
+ // InitDirectRead - command from client to create and start a direct read session.
+ // StartDirectReadPartitionSession - command from client to create and start a direct read partition session.
+ // UpdateTokenRequest - request to update auth token
+ message FromClient {
+ oneof client_message {
+ InitDirectRead init_direct_read = 1;
+ StartDirectReadPartitionSession start_direct_read_partition_session = 2;
+ UpdateTokenRequest update_token_request = 3;
+ }
+ }
+
+ // Server-client message for direct read session.
+ // DirectReadResponse - portion of message data.
+ // StopDirectReadPartitionSession - command from server to stop a direct read partition session.
+ // UpdateTokenResponse - acknowledgment of token update.
+ message FromServer {
+ // Server status of response.
+ Ydb.StatusIds.StatusCode status = 1;
+
+ // Issues if any.
+ repeated Ydb.Issue.IssueMessage issues = 2;
+
+
+ oneof server_message {
+ StopDirectReadPartitionSession stop_direct_read_partition_session = 3;
+ DirectReadResponse direct_read_response = 4;
+ UpdateTokenResponse update_token_response = 5;
+ }
+ }
+
+ // Command from client to create and start a direct read session.
+ // Server should not send a response to the command.
+ message InitDirectRead {
+ // Read session identifier.
+ string session_id = 1;
+ // Topics that will be read by this session.
+ repeated TopicReadSettings topics_read_settings = 2;
+ // Path of consumer that is used for reading by this session.
+ string consumer = 3;
+
+ message TopicReadSettings {
+ // Topic path.
+ string path = 1;
+ }
+ }
+
+ // Command from client to create and start a direct read partition session.
+ // Server should not send a response to the command.
+ message StartDirectReadPartitionSession {
+ // Partition session identifier.
+ int64 partition_session_id = 1;
+
+ // Upper bound for read request identifiers.
+ int64 last_direct_read_id = 2;
+
+ // Partition generation.
+ int64 generation = 3;
+ }
+
+ // Command from server to stop a direct read partition session.
+ // Client should not send a response to the command.
+ message StopDirectReadPartitionSession {
+ // The reason for the stop.
+ Ydb.StatusIds.StatusCode status = 1;
+
+ // Issues if any.
+ repeated Ydb.Issue.IssueMessage issues = 2;
+
+ // Partition session identifier.
+ int64 partition_session_id = 3;
}
+
+
+ // Messages that have been read directly from the partition node.
+ // It's a response to StreamRead.ReadRequest
+ message DirectReadResponse {
+ // Partition session identifier.
+ int64 partition_session_id = 1;
+
+ // Read request identifier.
+ int64 direct_read_id = 2;
+
+ // Messages data
+ StreamReadMessage.ReadResponse.PartitionData partition_data = 3;
+ }
}
message TransactionIdentity {
@@ -602,7 +732,7 @@ message CommitOffsetResult {
// Control messages
-// message representing statistics by seleveral windows
+// message representing statistics by several windows
message MultipleWindowsStat {
int64 per_minute = 1;
int64 per_hour = 2;
@@ -639,7 +769,7 @@ message Consumer {
google.protobuf.Duration max_read_time_lag = 2;
// Maximum of differences between write timestamp and create timestamp for all messages, read during last minute.
google.protobuf.Duration max_write_time_lag = 3;
- // Bytes read stastics.
+ // Bytes read statistics.
MultipleWindowsStat bytes_read = 4;
}
}
@@ -964,7 +1094,7 @@ message DescribeConsumerResult {
// Maximum of differences between write timestamp and create timestamp for all messages, read during last minute.
google.protobuf.Duration max_write_time_lag = 7;
- // How much bytes were read during several windows statistics from this partiton.
+ // How much bytes were read during several windows statistics from this partition.
MultipleWindowsStat bytes_read = 8;
// Read session name, provided by client.
diff --git a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp
index c9dde5232f..8fc23e95c5 100644
--- a/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp
+++ b/ydb/public/sdk/cpp/client/ydb_persqueue_core/impl/read_session.ipp
@@ -894,6 +894,10 @@ void TSingleClusterReadSessionImpl<UseMigrationProtocol>::OnReadDone(NYdbGrpc::T
case TServerMessage<false>::kStartPartitionSessionRequest:
OnReadDoneImpl(std::move(*ServerMessage->mutable_start_partition_session_request()), deferred);
break;
+ case TServerMessage<false>::kUpdatePartitionSession:
+ OnReadDoneImpl(std::move(*ServerMessage->mutable_update_partition_session()), deferred);
+ break;
+
case TServerMessage<false>::kStopPartitionSessionRequest:
OnReadDoneImpl(std::move(*ServerMessage->mutable_stop_partition_session_request()), deferred);
break;
@@ -907,6 +911,9 @@ void TSingleClusterReadSessionImpl<UseMigrationProtocol>::OnReadDone(NYdbGrpc::T
OnReadDoneImpl(std::move(*ServerMessage->mutable_update_token_response()), deferred);
break;
case TServerMessage<false>::SERVER_MESSAGE_NOT_SET:
+ errorStatus = TPlainStatus::Internal("Server message is not set");
+ break;
+ default:
errorStatus = TPlainStatus::Internal("Unexpected response from server");
break;
}
@@ -1320,6 +1327,21 @@ inline void TSingleClusterReadSessionImpl<false>::OnReadDoneImpl(
template <>
template <>
inline void TSingleClusterReadSessionImpl<false>::OnReadDoneImpl(
+ Ydb::Topic::StreamReadMessage::UpdatePartitionSession&& msg,
+ TDeferredActions<false>& deferred) {
+ Y_ABORT_UNLESS(Lock.IsLocked());
+ Y_UNUSED(deferred);
+
+ auto partitionStreamIt = PartitionStreams.find(msg.partition_session_id());
+ if (partitionStreamIt == PartitionStreams.end()) {
+ return;
+ }
+ //TODO: update generation/nodeid info
+}
+
+template <>
+template <>
+inline void TSingleClusterReadSessionImpl<false>::OnReadDoneImpl(
Ydb::Topic::StreamReadMessage::StopPartitionSessionRequest&& msg,
TDeferredActions<false>& deferred) {
Y_ABORT_UNLESS(Lock.IsLocked());
diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt
index 3d815020b5..d2f25bf3c9 100644
--- a/ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt
+++ b/ydb/services/persqueue_v1/actors/CMakeLists.darwin-arm64.txt
@@ -14,6 +14,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC
library-actors-core
cpp-containers-disjoint_interval_tree
cpp-string_utils-base64
+ ydb-core-util
ydb-core-base
ydb-core-grpc_services
ydb-core-persqueue
@@ -37,6 +38,7 @@ target_sources(services-persqueue_v1-actors PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp
diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt
index 3d815020b5..d2f25bf3c9 100644
--- a/ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt
+++ b/ydb/services/persqueue_v1/actors/CMakeLists.darwin-x86_64.txt
@@ -14,6 +14,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC
library-actors-core
cpp-containers-disjoint_interval_tree
cpp-string_utils-base64
+ ydb-core-util
ydb-core-base
ydb-core-grpc_services
ydb-core-persqueue
@@ -37,6 +38,7 @@ target_sources(services-persqueue_v1-actors PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp
diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt
index babb76e529..326faf12eb 100644
--- a/ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt
+++ b/ydb/services/persqueue_v1/actors/CMakeLists.linux-aarch64.txt
@@ -15,6 +15,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC
library-actors-core
cpp-containers-disjoint_interval_tree
cpp-string_utils-base64
+ ydb-core-util
ydb-core-base
ydb-core-grpc_services
ydb-core-persqueue
@@ -38,6 +39,7 @@ target_sources(services-persqueue_v1-actors PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp
diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt
index babb76e529..326faf12eb 100644
--- a/ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt
+++ b/ydb/services/persqueue_v1/actors/CMakeLists.linux-x86_64.txt
@@ -15,6 +15,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC
library-actors-core
cpp-containers-disjoint_interval_tree
cpp-string_utils-base64
+ ydb-core-util
ydb-core-base
ydb-core-grpc_services
ydb-core-persqueue
@@ -38,6 +39,7 @@ target_sources(services-persqueue_v1-actors PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp
diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt b/ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt
index 3d815020b5..d2f25bf3c9 100644
--- a/ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt
+++ b/ydb/services/persqueue_v1/actors/CMakeLists.windows-x86_64.txt
@@ -14,6 +14,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC
library-actors-core
cpp-containers-disjoint_interval_tree
cpp-string_utils-base64
+ ydb-core-util
ydb-core-base
ydb-core-grpc_services
ydb-core-persqueue
@@ -37,6 +38,7 @@ target_sources(services-persqueue_v1-actors PRIVATE
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/read_info_actor.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/direct_read_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/schema_actors.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/update_offsets_in_transaction_actor.cpp
${CMAKE_SOURCE_DIR}/ydb/services/persqueue_v1/actors/partition_writer.cpp
diff --git a/ydb/services/persqueue_v1/actors/direct_read_actor.cpp b/ydb/services/persqueue_v1/actors/direct_read_actor.cpp
new file mode 100644
index 0000000000..0f53ebc819
--- /dev/null
+++ b/ydb/services/persqueue_v1/actors/direct_read_actor.cpp
@@ -0,0 +1,471 @@
+#include "direct_read_actor.h"
+
+#include "helpers.h"
+#include "read_init_auth_actor.h"
+#include "read_session_actor.h"
+
+#include <ydb/library/persqueue/topic_parser/counters.h>
+#include <ydb/core/persqueue/dread_cache_service/caching_service.h>
+
+#include <library/cpp/protobuf/util/repeated_field_utils.h>
+
+#include <google/protobuf/util/time_util.h>
+
+#include <util/string/join.h>
+#include <util/string/strip.h>
+
+#include <utility>
+
+#define LOG_PREFIX "Direct read proxy " << ctx.SelfID.ToString() << ": " PQ_LOG_PREFIX
+
+namespace NKikimr::NGRpcProxy::V1 {
+
+using namespace NKikimrClient;
+using namespace NMsgBusProxy;
+using namespace PersQueue::V1;
+
+TDirectReadSessionActor::TDirectReadSessionActor(
+ TEvStreamReadRequest* request, const ui64 cookie,
+ const TActorId& schemeCache, const TActorId& newSchemeCache,
+ TIntrusivePtr<NMonitoring::TDynamicCounters> counters,
+ const TMaybe<TString> clientDC,
+ const NPersQueue::TTopicsListController& topicsHandler)
+ : TRlHelpers({}, request, READ_BLOCK_SIZE, false, TDuration::Minutes(1))
+ , Request(request)
+ , Cookie(cookie)
+ , ClientDC(clientDC.GetOrElse("other"))
+ , StartTimestamp(TInstant::Now())
+ , SchemeCache(schemeCache)
+ , NewSchemeCache(newSchemeCache)
+ , InitDone(false)
+ , ForceACLCheck(false)
+ , LastACLCheckTimestamp(TInstant::Zero())
+ , Counters(counters)
+ , TopicsHandler(topicsHandler)
+{
+ Y_ASSERT(Request);
+}
+
+void TDirectReadSessionActor::Bootstrap(const TActorContext& ctx) {
+ if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) {
+ ++(*GetServiceCounters(Counters, "pqproxy|readSession")
+ ->GetNamedCounter("sensor", "DirectSessionsCreatedTotal", true));
+ }
+
+ Request->GetStreamCtx()->Attach(ctx.SelfID);
+ if (!ReadFromStreamOrDie(ctx)) {
+ return;
+ }
+
+ StartTime = ctx.Now();
+ this->Become(&TDirectReadSessionActor::TThis::StateFunc);
+}
+
+void TDirectReadSessionActor::Handle(typename IContext::TEvNotifiedWhenDone::TPtr&, const TActorContext& ctx) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc closed");
+ Die(ctx);
+}
+
+
+bool TDirectReadSessionActor::ReadFromStreamOrDie(const TActorContext& ctx) {
+ if (!Request->GetStreamCtx()->Read()) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc read failed at start");
+ Die(ctx);
+ return false;
+ }
+ return true;
+}
+
+void TDirectReadSessionActor::Handle(typename IContext::TEvReadFinished::TPtr& ev, const TActorContext& ctx) {
+ auto& request = ev->Get()->Record;
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc read done"
+ << ": success# " << ev->Get()->Success
+ << ", data# " << request);
+
+ if (!ev->Get()->Success) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << "grpc read failed");
+ ctx.Send(ctx.SelfID, new TEvPQProxy::TEvDone());
+ return;
+ }
+
+ switch (request.client_message_case()) {
+ case TClientMessage::kInitDirectRead: {
+ ctx.Send(ctx.SelfID, new TEvPQProxy::TEvInitDirectRead(request, Request->GetStreamCtx()->GetPeerName()));
+ return;
+ }
+
+ case TClientMessage::kStartDirectReadPartitionSession: {
+ const auto& req = request.start_direct_read_partition_session();
+
+ ctx.Send(ctx.SelfID, new TEvPQProxy::TEvStartDirectRead(req.partition_session_id(), req.generation(), req.last_direct_read_id()));
+ return (void)ReadFromStreamOrDie(ctx);
+ }
+
+ case TClientMessage::kUpdateTokenRequest: {
+ if (const auto token = request.update_token_request().token()) { // TODO: refresh token here
+ ctx.Send(ctx.SelfID, new TEvPQProxy::TEvAuth(token));
+ }
+ return (void)ReadFromStreamOrDie(ctx);
+ }
+
+ default: {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "unsupported request");
+ }
+ }
+}
+
+
+bool TDirectReadSessionActor::WriteToStreamOrDie(const TActorContext& ctx, TServerMessage&& response, bool finish) {
+ bool res = false;
+
+ if (!finish) {
+ res = Request->GetStreamCtx()->Write(std::move(response));
+ } else {
+ res = Request->GetStreamCtx()->WriteAndFinish(std::move(response), grpc::Status::OK);
+ }
+
+ if (!res) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc write failed at start");
+ Die(ctx);
+ }
+
+ return res;
+}
+
+
+void TDirectReadSessionActor::Handle(typename IContext::TEvWriteFinished::TPtr& ev, const TActorContext& ctx) {
+ if (!ev->Get()->Success) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " grpc write failed");
+ return Die(ctx);
+ }
+}
+
+
+void TDirectReadSessionActor::Die(const TActorContext& ctx) {
+ if (AuthInitActor) {
+ ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill());
+ }
+
+ if (DirectSessionsActive) {
+ --(*DirectSessionsActive);
+ }
+
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " proxy is DEAD");
+ ctx.Send(GetPQReadServiceActorID(), new TEvPQProxy::TEvSessionDead(Cookie));
+ ctx.Send(NPQ::MakePQDReadCacheServiceActorId(), new TEvPQProxy::TEvDirectReadDataSessionDead(Session));
+
+ TActorBootstrapped<TDirectReadSessionActor>::Die(ctx);
+}
+
+
+void TDirectReadSessionActor::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext&) {
+ CloseSession(PersQueue::ErrorCode::OK, "reads done signal, closing everything");
+}
+
+
+void TDirectReadSessionActor::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext&) {
+ CloseSession(ev->Get()->ErrorCode, ev->Get()->Reason);
+}
+
+void TDirectReadSessionActor::Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx) {
+ const auto& auth = ev->Get()->Auth;
+ if (!auth.empty() && auth != Auth) {
+ Auth = auth;
+ Request->RefreshToken(auth, ctx, ctx.SelfID);
+ }
+}
+
+
+void TDirectReadSessionActor::Handle(TEvPQProxy::TEvStartDirectRead::TPtr& ev, const TActorContext& ctx) {
+
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " got StartDirectRead from client"
+ << ": sessionId# " << Session
+ << ", assignId# " << ev->Get()->AssignId
+ << ", lastDirectReadId# " << ev->Get()->LastDirectReadId
+ << ", generation# " << ev->Get()->Generation);
+
+ ctx.Send(NPQ::MakePQDReadCacheServiceActorId(), new TEvPQProxy::TEvDirectReadDataSessionConnected(
+ {Session, ev->Get()->AssignId}, ev->Get()->Generation, ev->Get()->LastDirectReadId + 1)
+ );
+}
+
+
+void TDirectReadSessionActor::Handle(TEvPQProxy::TEvInitDirectRead::TPtr& ev, const TActorContext& ctx) {
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << "got init request:" << ev->Get()->Request.DebugString());
+
+ if (Initing) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "got second init request");
+ }
+ Initing = true;
+
+ const auto& init = ev->Get()->Request.init_direct_read();
+
+ if (!init.topics_read_settings_size()) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "no topics in init request");
+ }
+
+ if (init.consumer().empty()) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "no consumer in init request");
+ }
+
+ ClientId = NPersQueue::ConvertNewConsumerName(init.consumer(), ctx);
+ if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) {
+ ClientPath = init.consumer();
+ } else {
+ ClientPath = NPersQueue::StripLeadSlash(NPersQueue::MakeConsumerPath(init.consumer()));
+ }
+
+ Session = init.session_id();
+ if (Session.empty()) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "no session id in init request");
+ }
+ PeerName = ev->Get()->PeerName;
+
+ auto database = Request->GetDatabaseName().GetOrElse(TString());
+
+ for (const auto& topic : init.topics_read_settings()) {
+ const TString path = topic.path();
+ if (path.empty()) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "empty topic in init request");
+ }
+
+ TopicsToResolve.insert(path);
+ }
+
+ if (Request->GetSerializedToken().empty()) {
+ if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) {
+ return CloseSession(PersQueue::ErrorCode::ACCESS_DENIED,
+ "unauthenticated access is forbidden, please provide credentials");
+ }
+ } else {
+ Y_ABORT_UNLESS(Request->GetYdbToken());
+ Auth = *(Request->GetYdbToken());
+ Token = new NACLib::TUserToken(Request->GetSerializedToken());
+ }
+
+ TopicsList = TopicsHandler.GetReadTopicsList(TopicsToResolve, true, database);
+
+ if (!TopicsList.IsValid) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TopicsList.Reason);
+ }
+
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " read init"
+ << ": from# " << PeerName
+ << ", request# " << ev->Get()->Request);
+
+ if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) {
+ SetupCounters();
+ }
+
+ RunAuthActor(ctx);
+}
+
+
+void TDirectReadSessionActor::SetupCounters() {
+ if (DirectSessionsCreated) {
+ return;
+ }
+
+ auto subGroup = GetServiceCounters(Counters, "pqproxy|readSession");
+ subGroup = subGroup->GetSubgroup("Client", ClientId)->GetSubgroup("ConsumerPath", ClientPath);
+ const TString name = "sensor";
+
+ Errors = subGroup->GetExpiringNamedCounter(name, "Errors", true);
+ DirectSessionsActive = subGroup->GetExpiringNamedCounter(name, "DirectSessionsActive", false);
+ DirectSessionsCreated = subGroup->GetExpiringNamedCounter(name, "DirectSessionsCreated", true);
+
+ ++(*DirectSessionsCreated);
+ ++(*DirectSessionsActive);
+}
+
+
+
+void TDirectReadSessionActor::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth ok"
+ << ": topics# " << ev->Get()->TopicAndTablets.size()
+ << ", initDone# " << InitDone);
+
+ LastACLCheckTimestamp = ctx.Now();
+ AuthInitActor = TActorId();
+
+
+ if (!InitDone) {
+ for (const auto& [name, t] : ev->Get()->TopicAndTablets) { // TODO: return something from Init and Auth Actor (Full Path - ?)
+
+ if (!GetMeteringMode()) {
+ SetMeteringMode(t.MeteringMode);
+ } else if (*GetMeteringMode() != t.MeteringMode) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST,
+ "cannot read from topics with different metering modes");
+ }
+ }
+
+ if (IsQuotaRequired()) {
+ Y_ABORT_UNLESS(MaybeRequestQuota(1, EWakeupTag::RlInit, ctx));
+ } else {
+ InitSession(ctx);
+ }
+
+ } else {
+ for (const auto& [name, t] : ev->Get()->TopicAndTablets) {
+ if (t.MeteringMode != *GetMeteringMode()) {
+ return CloseSession(PersQueue::ErrorCode::OVERLOAD, TStringBuilder()
+ << "metering mode of topic: " << name << " has been changed");
+ }
+ }
+ }
+}
+
+void TDirectReadSessionActor::InitSession(const TActorContext& ctx) {
+ InitDone = true;
+ ReadFromStreamOrDie(ctx);
+ ctx.Schedule(TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()), new TEvents::TEvWakeup(EWakeupTag::RecheckAcl));
+}
+
+
+void TDirectReadSessionActor::CloseSession(PersQueue::ErrorCode::ErrorCode code, const TString& reason) {
+ auto ctx = ActorContext();
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, LOG_PREFIX << " Close session with reason: " << reason);
+ if (code != PersQueue::ErrorCode::OK) {
+ if (Errors) {
+ ++(*Errors);
+ } else if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) {
+ ++(*GetServiceCounters(Counters, "pqproxy|readSession")->GetCounter("Errors", true));
+ }
+
+ TServerMessage result;
+ result.set_status(ConvertPersQueueInternalCodeToStatus(code));
+ FillIssue(result.add_issues(), code, reason);
+
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed with error"
+ << ": reason# " << reason);
+ if (!WriteToStreamOrDie(ctx, std::move(result), true)) {
+ return;
+ }
+ } else {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed");
+ if (!Request->GetStreamCtx()->Finish(grpc::Status::OK)) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc double finish failed");
+ }
+ }
+ Die(ctx);
+}
+
+
+void TDirectReadSessionActor::Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev , const TActorContext& ctx) {
+ if (ev->Get()->Authenticated && ev->Get()->InternalToken && !ev->Get()->InternalToken->GetSerializedToken().empty()) {
+ Token = ev->Get()->InternalToken;
+ ForceACLCheck = true;
+
+ TServerMessage result;
+ result.set_status(Ydb::StatusIds::SUCCESS);
+ result.mutable_update_token_response();
+ WriteToStreamOrDie(ctx, std::move(result));
+ } else {
+ if (ev->Get()->Retryable) {
+ Request->ReplyUnavaliable();
+ } else {
+ Request->ReplyUnauthenticated("refreshed token is invalid");
+ }
+ Die(ctx);
+ }
+}
+
+
+void TDirectReadSessionActor::ProcessAnswer(TFormedDirectReadResponse::TPtr response, const TActorContext& ctx) {
+ if (!WriteToStreamOrDie(ctx, std::move(*response->Response))) {
+ return;
+ }
+}
+
+void TDirectReadSessionActor::Handle(TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx) {
+ const auto tag = static_cast<EWakeupTag>(ev->Get()->Tag);
+ OnWakeup(tag);
+
+ switch (tag) {
+ case EWakeupTag::RlInit:
+ return InitSession(ctx);
+
+ case EWakeupTag::RecheckAcl:
+ return RecheckACL(ctx);
+
+ case EWakeupTag::RlAllowed:
+ if (auto counters = Request->GetCounters()) {
+ counters->AddConsumedRequestUnits(PendingQuota->RequiredQuota);
+ }
+
+ ProcessAnswer(PendingQuota, ctx);
+
+ if (!WaitingQuota.empty()) {
+ PendingQuota = WaitingQuota.front();
+ WaitingQuota.pop_front();
+ } else {
+ PendingQuota = nullptr;
+ }
+ if (PendingQuota) {
+ auto res = MaybeRequestQuota(PendingQuota->RequiredQuota, EWakeupTag::RlAllowed, ctx);
+ Y_ABORT_UNLESS(res);
+ }
+
+ break;
+
+ case EWakeupTag::RlNoResource:
+ case EWakeupTag::RlInitNoResource:
+ if (PendingQuota) {
+ auto res = MaybeRequestQuota(PendingQuota->RequiredQuota, EWakeupTag::RlAllowed, ctx);
+ Y_ABORT_UNLESS(res);
+ } else {
+ return CloseSession(PersQueue::ErrorCode::OVERLOAD, "throughput limit exceeded");
+ }
+ break;
+ }
+}
+
+
+void TDirectReadSessionActor::RecheckACL(const TActorContext& ctx) {
+ const auto timeout = TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec());
+
+ ctx.Schedule(timeout, new TEvents::TEvWakeup(EWakeupTag::RecheckAcl));
+
+ const bool authTimedOut = (ctx.Now() - LastACLCheckTimestamp) > timeout;
+
+ if (Token && !AuthInitActor && (ForceACLCheck || authTimedOut)) {
+ ForceACLCheck = false;
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " checking auth because of timeout");
+ RunAuthActor(ctx);
+ }
+}
+
+
+void TDirectReadSessionActor::RunAuthActor(const TActorContext& ctx) {
+ Y_ABORT_UNLESS(!AuthInitActor);
+ AuthInitActor = ctx.Register(new TReadInitAndAuthActor(
+ ctx, ctx.SelfID, ClientId, Cookie, Session, SchemeCache, NewSchemeCache, Counters, Token, TopicsList,
+ TopicsHandler.GetLocalCluster()));
+}
+
+void TDirectReadSessionActor::HandleDestroyPartitionSession(TEvPQProxy::TEvDirectReadDestroyPartitionSession::TPtr& ev) {
+ TServerMessage result;
+ result.set_status(Ydb::StatusIds::SUCCESS);
+ auto* stop = result.mutable_stop_direct_read_partition_session();
+ stop->set_partition_session_id(ev->Get()->ReadKey.PartitionSessionId);
+ result.set_status(ConvertPersQueueInternalCodeToStatus(ev->Get()->Code));
+ FillIssue(stop->add_issues(), ev->Get()->Code, ev->Get()->Reason);
+ WriteToStreamOrDie(ActorContext(), std::move(result));
+
+}
+
+void TDirectReadSessionActor::HandleSessionKilled(TEvPQProxy::TEvDirectReadCloseSession::TPtr& ev) {
+ // ToDo: Close session uses other error code.
+ CloseSession(ev->Get()->Code, ev->Get()->Reason);
+}
+
+void TDirectReadSessionActor::HandleGotData(TEvPQProxy::TEvDirectReadSendClientData::TPtr& ev) {
+ auto formedResponse = MakeIntrusive<TFormedDirectReadResponse>();
+ formedResponse->Response = std::move(ev->Get()->Message);
+ ProcessAnswer(formedResponse, ActorContext());
+}
+
+}
diff --git a/ydb/services/persqueue_v1/actors/direct_read_actor.h b/ydb/services/persqueue_v1/actors/direct_read_actor.h
new file mode 100644
index 0000000000..1804fcce70
--- /dev/null
+++ b/ydb/services/persqueue_v1/actors/direct_read_actor.h
@@ -0,0 +1,180 @@
+#pragma once
+
+#include "events.h"
+#include "persqueue_utils.h"
+
+#include <ydb/core/base/tablet_pipe.h>
+#include <ydb/core/grpc_services/grpc_request_proxy.h>
+#include <ydb/core/persqueue/events/global.h>
+
+#include <ydb/core/persqueue/pq_rl_helpers.h>
+
+#include <ydb/library/actors/core/actor_bootstrapped.h>
+
+namespace NKikimr::NGRpcProxy::V1 {
+
+struct TFormedDirectReadResponse: public TSimpleRefCount<TFormedDirectReadResponse> {
+ using TPtr = TIntrusivePtr<TFormedDirectReadResponse>;
+
+ TFormedDirectReadResponse() = default;
+
+ TFormedDirectReadResponse(TInstant start)
+ : Start(start)
+ {
+ }
+
+ std::shared_ptr<Topic::StreamDirectReadMessage::FromServer> Response;
+
+ TInstant Start;
+ TDuration WaitQuotaTime;
+
+ ui64 RequiredQuota = 0;
+};
+
+
+
+class TDirectReadSessionActor
+ : public TActorBootstrapped<TDirectReadSessionActor>
+ , private NPQ::TRlHelpers
+{
+ using TClientMessage = Topic::StreamDirectReadMessage::FromClient;
+
+ using TServerMessage = Topic::StreamDirectReadMessage::FromServer;
+
+ using TEvStreamReadRequest = NGRpcService::TEvStreamTopicDirectReadRequest;
+
+ using IContext = NGRpcServer::IGRpcStreamingContext<TClientMessage, TServerMessage>;
+
+public:
+ TDirectReadSessionActor(TEvStreamReadRequest* request, const ui64 cookie,
+ const TActorId& schemeCache, const TActorId& newSchemeCache,
+ TIntrusivePtr<::NMonitoring::TDynamicCounters> counters,
+ const TMaybe<TString> clientDC,
+ const NPersQueue::TTopicsListController& topicsHandler);
+
+ void Bootstrap(const TActorContext& ctx);
+
+ void Die(const TActorContext& ctx) override;
+
+ static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
+ return NKikimrServices::TActivity::FRONT_PQ_READ;
+ }
+
+private:
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ // grpc events
+ HFunc(IContext::TEvReadFinished, Handle);
+ HFunc(IContext::TEvWriteFinished, Handle);
+ HFunc(IContext::TEvNotifiedWhenDone, Handle)
+ HFunc(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse, Handle);
+
+ // proxy events
+ HFunc(TEvPQProxy::TEvAuthResultOk, Handle); // form auth actor
+ HFunc(TEvPQProxy::TEvInitDirectRead, Handle); // from gRPC
+ HFunc(TEvPQProxy::TEvDone, Handle); // from gRPC
+ HFunc(TEvPQProxy::TEvCloseSession, Handle); // from auth actor
+ HFunc(TEvPQProxy::TEvStartDirectRead, Handle); // from gRPC
+ HFunc(TEvPQProxy::TEvAuth, Handle); // from gRPC
+
+ hFunc(TEvPQProxy::TEvDirectReadCloseSession, HandleSessionKilled) // from CachingService
+ hFunc(TEvPQProxy::TEvDirectReadDestroyPartitionSession, HandleDestroyPartitionSession) // from CachingService
+ hFunc(TEvPQProxy::TEvDirectReadSendClientData, HandleGotData) // from CachingService
+ // system events
+ HFunc(TEvents::TEvWakeup, Handle);
+
+ default:
+ break;
+ }
+ }
+
+ bool ReadFromStreamOrDie(const TActorContext& ctx);
+ bool WriteToStreamOrDie(const TActorContext& ctx, TServerMessage&& response, bool finish = false);
+
+ void InitSession(const TActorContext& ctx);
+
+ // grpc events
+ void Handle(typename IContext::TEvReadFinished::TPtr& ev, const TActorContext &ctx);
+ void Handle(typename IContext::TEvWriteFinished::TPtr& ev, const TActorContext &ctx);
+ void Handle(typename IContext::TEvNotifiedWhenDone::TPtr& ev, const TActorContext &ctx);
+ void Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev, const TActorContext &ctx);
+
+ // proxy events
+ void Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvInitDirectRead::TPtr& ev, const TActorContext& ctx);
+ //void Handle(typename TEvReadResponse::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvDone::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx);
+ //void Handle(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvStartDirectRead::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx);
+
+ // Caching service events
+ void HandleSessionKilled(TEvPQProxy::TEvDirectReadCloseSession::TPtr& ev);
+ void HandleDestroyPartitionSession(TEvPQProxy::TEvDirectReadDestroyPartitionSession::TPtr& ev);
+
+ void HandleGotData(TEvPQProxy::TEvDirectReadSendClientData::TPtr& ev);
+
+ // system events
+ void Handle(TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx);
+
+ void RunAuthActor(const TActorContext& ctx);
+ void RecheckACL(const TActorContext& ctx);
+
+ void CloseSession(PersQueue::ErrorCode::ErrorCode code, const TString& reason);
+
+ void SetupCounters();
+ void SetupCounters(const TString& cloudId, const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId);
+
+ void ProcessAnswer(typename TFormedDirectReadResponse::TPtr response, const TActorContext& ctx);
+
+private:
+ std::unique_ptr<TEvStreamReadRequest> Request;
+ ui64 Cookie;
+ const TString ClientDC;
+ const TInstant StartTimestamp;
+
+ TActorId SchemeCache;
+ TActorId NewSchemeCache;
+
+ TActorId AuthInitActor;
+ TIntrusiveConstPtr<NACLib::TUserToken> Token;
+
+ TString ClientId;
+ TString ClientPath;
+ TString Session;
+ TString PeerName;
+
+ bool InitDone;
+
+ TString Auth;
+
+ bool ForceACLCheck;
+ TInstant LastACLCheckTimestamp;
+
+ //THashMap<TString, TTopicHolder> Topics; // topic -> info
+ THashMap<TString, NPersQueue::TTopicConverterPtr> FullPathToConverter; // PrimaryFullPath -> Converter, for balancer replies matching
+ THashSet<TString> TopicsToResolve;
+
+ // Response that currenly pending quota
+ TFormedDirectReadResponse::TPtr PendingQuota;
+
+ // Responses that will be quoted next
+ std::deque<TFormedDirectReadResponse::TPtr> WaitingQuota;
+
+ TIntrusivePtr<::NMonitoring::TDynamicCounters> Counters;
+
+ ::NMonitoring::TDynamicCounters::TCounterPtr DirectSessionsCreated;
+ ::NMonitoring::TDynamicCounters::TCounterPtr DirectSessionsActive;
+
+ ::NMonitoring::TDynamicCounters::TCounterPtr Errors;
+
+ TInstant StartTime;
+
+ NPersQueue::TTopicsListController TopicsHandler;
+ NPersQueue::TTopicsToConverter TopicsList;
+
+ bool Initing = false;
+};
+
+}
diff --git a/ydb/services/persqueue_v1/actors/events.h b/ydb/services/persqueue_v1/actors/events.h
index b2953e7a7f..f147836b54 100644
--- a/ydb/services/persqueue_v1/actors/events.h
+++ b/ydb/services/persqueue_v1/actors/events.h
@@ -5,6 +5,7 @@
#include <ydb/core/base/events.h>
#include <ydb/core/grpc_services/rpc_calls.h>
#include <ydb/core/protos/pqconfig.pb.h>
+#include <ydb/core/persqueue/key.h>
#include <ydb/core/persqueue/percentile_counter.h>
#include <ydb/public/api/protos/persqueue_error_codes_v1.pb.h>
@@ -65,6 +66,16 @@ struct TEvPQProxy {
EvCommitRange,
EvRequestTablet,
EvPartitionLocationResponse,
+ EvUpdateSession,
+ EvDirectReadResponse,
+ EvDirectReadAck,
+ EvInitDirectRead,
+ EvStartDirectRead,
+ EvDirectReadDataSessionConnected,
+ EvDirectReadDataSessionDead,
+ EvDirectReadDestroyPartitionSession,
+ EvDirectReadCloseSession,
+ EvDirectReadSendClientData,
EvEnd
};
@@ -257,6 +268,31 @@ struct TEvPQProxy {
ui64 EndOffset;
};
+
+ struct TEvDirectReadResponse : public NActors::TEventLocal<TEvDirectReadResponse, EvDirectReadResponse> {
+ explicit TEvDirectReadResponse(ui64 assignId, ui64 nextReadOffset, ui64 directReadId, ui64 byteSize)
+ : AssignId(assignId)
+ , NextReadOffset(nextReadOffset)
+ , DirectReadId(directReadId)
+ , ByteSize(byteSize)
+ { }
+
+ ui64 AssignId;
+ ui64 NextReadOffset;
+ ui64 DirectReadId;
+ ui64 ByteSize;
+ };
+
+ struct TEvDirectReadAck : public NActors::TEventLocal<TEvDirectReadAck, EvDirectReadAck> {
+ explicit TEvDirectReadAck(ui64 assignId, ui64 directReadId)
+ : AssignId(assignId)
+ , DirectReadId(directReadId)
+ { }
+
+ ui64 AssignId;
+ ui64 DirectReadId;
+ };
+
struct TEvReadResponse : public NActors::TEventLocal<TEvReadResponse, EvReadResponse> {
explicit TEvReadResponse(Topic::StreamReadMessage::FromServer&& resp, ui64 nextReadOffset, bool fromDisk, TDuration waitQuotaTime)
: Response(std::move(resp))
@@ -330,22 +366,22 @@ struct TEvPQProxy {
, ReadOffset(readOffset)
, CommitOffset(commitOffset)
, VerifyReadOffset(verifyReadOffset)
- , Generation(0)
{ }
const ui64 AssignId;
ui64 ReadOffset;
TMaybe<ui64> CommitOffset;
bool VerifyReadOffset;
- ui64 Generation;
};
struct TEvReleased : public NActors::TEventLocal<TEvReleased, EvReleased> {
- TEvReleased(ui64 id)
+ TEvReleased(ui64 id, bool graceful = true)
: AssignId(id)
+ , Graceful(graceful)
{ }
const ui64 AssignId;
+ const bool Graceful;
};
struct TEvGetStatus : public NActors::TEventLocal<TEvGetStatus, EvGetStatus> {
@@ -376,6 +412,13 @@ struct TEvPQProxy {
{ }
};
+ struct TEvPartitionReleased : public NActors::TEventLocal<TEvPartitionReleased, EvPartitionReleased> {
+ TEvPartitionReleased(const TPartitionId& partition)
+ : Partition(partition)
+ { }
+ TPartitionId Partition;
+ };
+
struct TEvLockPartition : public NActors::TEventLocal<TEvLockPartition, EvLockPartition> {
explicit TEvLockPartition(const ui64 readOffset, const TMaybe<ui64>& commitOffset, bool verifyReadOffset,
bool startReading)
@@ -392,13 +435,6 @@ struct TEvPQProxy {
};
- struct TEvPartitionReleased : public NActors::TEventLocal<TEvPartitionReleased, EvPartitionReleased> {
- TEvPartitionReleased(const TPartitionId& partition)
- : Partition(partition)
- { }
- TPartitionId Partition;
- };
-
struct TEvRestartPipe : public NActors::TEventLocal<TEvRestartPipe, EvRestartPipe> {
TEvRestartPipe()
@@ -425,11 +461,14 @@ struct TEvPQProxy {
};
struct TEvPartitionStatus : public NActors::TEventLocal<TEvPartitionStatus, EvPartitionStatus> {
- TEvPartitionStatus(const TPartitionId& partition, const ui64 offset, const ui64 endOffset, const ui64 writeTimestampEstimateMs, bool init = true)
+ TEvPartitionStatus(const TPartitionId& partition, const ui64 offset, const ui64 endOffset, const ui64 writeTimestampEstimateMs, ui64 nodeId, ui64 generation,
+ bool init = true)
: Partition(partition)
, Offset(offset)
, EndOffset(endOffset)
, WriteTimestampEstimateMs(writeTimestampEstimateMs)
+ , NodeId(nodeId)
+ , Generation(generation)
, Init(init)
{ }
@@ -437,8 +476,11 @@ struct TEvPQProxy {
ui64 Offset;
ui64 EndOffset;
ui64 WriteTimestampEstimateMs;
+ ui64 NodeId;
+ ui64 Generation;
bool Init;
};
+
struct TEvRequestTablet : public NActors::TEventLocal<TEvRequestTablet, EvRequestTablet> {
TEvRequestTablet(const ui64 tabletId)
: TabletId(tabletId)
@@ -461,7 +503,6 @@ struct TEvPQProxy {
struct TEvPartitionLocationResponse : public NActors::TEventLocal<TEvPartitionLocationResponse, EvPartitionLocationResponse>
, public TLocalResponseBase
-
{
TEvPartitionLocationResponse() {}
TVector<TPartitionLocationInfo> Partitions;
@@ -469,6 +510,95 @@ struct TEvPQProxy {
ui64 PathId;
};
+ struct TEvUpdateSession : public NActors::TEventLocal<TEvUpdateSession, EvUpdateSession> {
+ TEvUpdateSession(const TPartitionId& partition, ui64 nodeId, ui64 generation)
+ : Partition(partition)
+ , NodeId(nodeId)
+ , Generation(generation)
+ { }
+
+ TPartitionId Partition;
+ ui64 NodeId;
+ ui64 Generation;
+ };
+
+ struct TEvInitDirectRead : public NActors::TEventLocal<TEvInitDirectRead, EvInitDirectRead> {
+ TEvInitDirectRead(const Topic::StreamDirectReadMessage::FromClient& req, const TString& peerName)
+ : Request(req)
+ , PeerName(peerName)
+ { }
+
+ Topic::StreamDirectReadMessage::FromClient Request;
+ TString PeerName;
+ };
+
+ struct TEvStartDirectRead : public NActors::TEventLocal<TEvStartDirectRead, EvStartDirectRead> {
+ TEvStartDirectRead(ui64 assignId, ui64 generation, ui64 lastDirectReadId)
+ : AssignId(assignId)
+ , Generation(generation)
+ , LastDirectReadId(lastDirectReadId)
+ { }
+
+ const ui64 AssignId;
+ ui64 Generation;
+ const ui64 LastDirectReadId;
+ };
+
+
+ struct TEvDirectReadDataSessionConnected : public TEventLocal<TEvDirectReadDataSessionConnected, EvDirectReadDataSessionConnected> {
+ TEvDirectReadDataSessionConnected(const NKikimr::NPQ::TReadSessionKey& sessionKey, ui32 tabletGeneration,
+ ui64 startingReadId)
+ : ReadKey(sessionKey)
+ , Generation(tabletGeneration)
+ , StartingReadId(startingReadId)
+ {}
+
+ NPQ::TReadSessionKey ReadKey;
+ ui32 Generation;
+ ui64 StartingReadId;
+ };
+
+ struct TEvDirectReadDataSessionDead : public TEventLocal<TEvDirectReadDataSessionDead, EvDirectReadDataSessionDead> {
+ TEvDirectReadDataSessionDead(const TString& session)
+ : Session(session)
+ {}
+
+ TString Session;
+ };
+
+ struct TEvDirectReadDestroyPartitionSession : public TEventLocal<TEvDirectReadDestroyPartitionSession, EvDirectReadDestroyPartitionSession> {
+ TEvDirectReadDestroyPartitionSession(const NKikimr::NPQ::TReadSessionKey& sessionKey,
+ Ydb::PersQueue::ErrorCode::ErrorCode code, const TString& reason)
+ : ReadKey(sessionKey)
+ , Code(code)
+ , Reason(reason)
+ {}
+ NPQ::TReadSessionKey ReadKey;
+ Ydb::PersQueue::ErrorCode::ErrorCode Code;
+ TString Reason;
+ };
+
+ struct TEvDirectReadCloseSession : public TEventLocal<TEvDirectReadCloseSession, EvDirectReadCloseSession> {
+ TEvDirectReadCloseSession(Ydb::PersQueue::ErrorCode::ErrorCode code, const TString& reason)
+ : Code(code)
+ , Reason(reason)
+ {}
+ Ydb::PersQueue::ErrorCode::ErrorCode Code;
+ TString Reason;
+ };
+
+ struct TEvDirectReadSendClientData : public TEventLocal<TEvDirectReadSendClientData, EvDirectReadSendClientData> {
+ TEvDirectReadSendClientData(std::shared_ptr<Ydb::Topic::StreamDirectReadMessage::FromServer>&& message)
+ : Message(std::move(message))
+ {}
+
+ TEvDirectReadSendClientData(const std::shared_ptr<Ydb::Topic::StreamDirectReadMessage::FromServer>& message)
+ : Message(message)
+ {}
+ std::shared_ptr<Ydb::Topic::StreamDirectReadMessage::FromServer> Message;;
+ };
+
+
};
struct TLocalRequestBase {
@@ -479,7 +609,7 @@ struct TLocalRequestBase {
, Database(database)
, Token(token)
{}
-
+
TString Topic;
TString Database;
TString Token;
diff --git a/ydb/services/persqueue_v1/actors/helpers.h b/ydb/services/persqueue_v1/actors/helpers.h
index 4b3dd4ff0e..a7b4ddb92e 100644
--- a/ydb/services/persqueue_v1/actors/helpers.h
+++ b/ydb/services/persqueue_v1/actors/helpers.h
@@ -5,8 +5,12 @@
#include <ydb/core/persqueue/writer/source_id_encoding.h>
#include <ydb/services/lib/sharding/sharding.h>
+#include <util/generic/size_literals.h>
+
namespace NKikimr::NGRpcProxy::V1 {
+static constexpr ui64 READ_BLOCK_SIZE = 8_KB; // metering
+
using namespace Ydb;
bool RemoveEmptyMessages(PersQueue::V1::MigrationStreamingReadServerMessage::DataBatch& data);
diff --git a/ydb/services/persqueue_v1/actors/partition_actor.cpp b/ydb/services/persqueue_v1/actors/partition_actor.cpp
index 7429bc626a..242b2943fa 100644
--- a/ydb/services/persqueue_v1/actors/partition_actor.cpp
+++ b/ydb/services/persqueue_v1/actors/partition_actor.cpp
@@ -25,7 +25,7 @@ TPartitionActor::TPartitionActor(
const TString& session, const TPartitionId& partition, const ui32 generation, const ui32 step,
const ui64 tabletID, const TTopicCounters& counters, bool commitsDisabled,
const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic,
- bool useMigrationProtocol
+ bool directRead, bool useMigrationProtocol
)
: ParentId(parentId)
, ClientId(clientId)
@@ -53,11 +53,11 @@ TPartitionActor::TPartitionActor(
, FirstInit(true)
, PipeClient()
, PipeGeneration(0)
+ , TabletGeneration(0)
+ , NodeId(0)
, RequestInfly(false)
, EndOffset(0)
, SizeLag(0)
- , NeedRelease(false)
- , Released(false)
, WaitDataCookie(0)
, WaitForData(false)
, LockCounted(false)
@@ -65,6 +65,7 @@ TPartitionActor::TPartitionActor(
, CommitsDisabled(commitsDisabled)
, CommitCookie(1)
, Topic(topic)
+ , DirectRead(directRead)
, UseMigrationProtocol(useMigrationProtocol)
{
}
@@ -147,26 +148,8 @@ TPartitionActor::~TPartitionActor() = default;
void TPartitionActor::Bootstrap(const TActorContext&) {
- Become(&TThis::StateFunc);
-}
-
-
-void TPartitionActor::CheckRelease(const TActorContext& ctx) {
- const bool hasUncommittedData = ReadOffset > ClientCommitOffset && ReadOffset > ClientReadOffset; //TODO: remove ReadOffset > ClientReadOffset - otherwise wait for commit with cookie(0)
- if (NeedRelease) {
- LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition
- << " checking release readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid
- << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released);
- }
-
- if (NeedRelease && (ReadGuid.empty() && CommitsInfly.empty() && !hasUncommittedData && !Released)) {
- Released = true;
- ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReleased(Partition));
- LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition
- << " check release done - releasing; readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid
- << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released);
- }
+ Become(&TThis::StateFunc);
}
@@ -195,6 +178,59 @@ void TPartitionActor::SendCommit(const ui64 readId, const ui64 offset, const TAc
NTabletPipe::SendData(ctx, PipeClient, req.Release());
}
+void TPartitionActor::SendPublishDirectRead(const ui64 directReadId, const TActorContext& ctx) {
+ NKikimrClient::TPersQueueRequest request;
+ request.MutablePartitionRequest()->SetTopic(Topic->GetPrimaryPath());
+ request.MutablePartitionRequest()->SetPartition(Partition.Partition);
+ request.MutablePartitionRequest()->SetCookie(ReadOffset);
+
+ Y_ABORT_UNLESS(PipeClient);
+
+ ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient());
+ auto publish = request.MutablePartitionRequest()->MutableCmdPublishRead();
+ publish->SetDirectReadId(directReadId);
+ Y_ABORT_UNLESS(!Session.empty());
+
+ publish->MutableSessionKey()->SetSessionId(Session);
+ publish->MutableSessionKey()->SetPartitionSessionId(Partition.AssignId);
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition
+ << " publishing direct read with id " << directReadId);
+
+ TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest);
+ req->Record.Swap(&request);
+
+ NTabletPipe::SendData(ctx, PipeClient, req.Release());
+}
+
+void TPartitionActor::SendForgetDirectRead(const ui64 directReadId, const TActorContext& ctx) {
+ NKikimrClient::TPersQueueRequest request;
+ request.MutablePartitionRequest()->SetTopic(Topic->GetPrimaryPath());
+ request.MutablePartitionRequest()->SetPartition(Partition.Partition);
+ request.MutablePartitionRequest()->SetCookie(ReadOffset);
+
+ Y_ABORT_UNLESS(PipeClient);
+
+ ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient());
+ auto publish = request.MutablePartitionRequest()->MutableCmdForgetRead();
+ publish->SetDirectReadId(directReadId);
+ Y_ABORT_UNLESS(!Session.empty());
+
+ publish->MutableSessionKey()->SetSessionId(Session);
+ publish->MutableSessionKey()->SetPartitionSessionId(Partition.AssignId);
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition
+ << " forgetting " << directReadId);
+
+ TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest);
+ req->Record.Swap(&request);
+
+ NTabletPipe::SendData(ctx, PipeClient, req.Release());
+}
+
+
+
+
void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) {
if (!PipeClient)
@@ -220,6 +256,25 @@ void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reaso
}
+void TPartitionActor::Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActorContext& ctx) {
+ auto it = DirectReads.find(ev->Get()->DirectReadId);
+
+ if (it == DirectReads.end() || ev->Get()->DirectReadId == DirectReadId) {
+ ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() << "got direct read ack for uknown direct read id " << ev->Get()->DirectReadId,
+ PersQueue::ErrorCode::BAD_REQUEST));
+ return;
+
+ }
+ DirectReads.erase(it);
+
+ if (!PipeClient) return; //all direct reads will be cleared on pipe restart
+
+ SendForgetDirectRead(ev->Get()->DirectReadId, ctx);
+
+}
+
+
+
void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TActorContext& ctx) {
Y_ABORT_UNLESS(!PipeClient);
@@ -239,6 +294,12 @@ void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TAct
<< " pipe restart attempt " << PipeGeneration << " RequestInfly " << RequestInfly << " ReadOffset " << ReadOffset << " EndOffset " << EndOffset
<< " InitDone " << InitDone << " WaitForData " << WaitForData);
+ //TODO: Register in partition this session_id, partition_session_id, pipe actor id
+ //TODO: RestoreDirectReads if any
+ if (InitDone) {
+ //Resend CmdCreateSession with restore reads
+ }
+
if (RequestInfly) { //got read infly
LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition
<< " resend " << CurrentRequest);
@@ -250,6 +311,7 @@ void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TAct
NTabletPipe::SendData(ctx, PipeClient, event.Release());
}
+
if (InitDone) {
for (auto& c : CommitsInfly) { //resend all commits
if (c.second.Offset != Max<ui64>())
@@ -468,6 +530,7 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo
Y_ABORT_UNLESS(!ev->Get()->Record.HasErrorCode());
Counters.Errors.Inc();
// map NMsgBusProxy::EResponseStatus to PersQueue::ErrorCode???
+
ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + ev->Get()->Record.GetErrorReason(), PersQueue::ErrorCode::ERROR));
return;
}
@@ -530,14 +593,14 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo
if (!StartReading) {
- ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs));
+ ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, TabletGeneration, NodeId));
} else {
InitStartReading(ctx);
}
return;
}
- if (!result.HasCmdReadResult()) { //this is commit response
+ if (!(result.HasCmdReadResult() || result.HasCmdPrepareReadResult() || result.HasCmdPublishReadResult() || result.HasCmdForgetReadResult())) { //this is commit response
if (CommitsInfly.empty()) {
LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition
<< " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for nothing");
@@ -571,15 +634,15 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo
LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition
<< " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId);
- CheckRelease(ctx);
PipeGeneration = 0; //reset tries counter - all ok
MakeCommit(ctx);
return;
}
- //This is read
- Y_ABORT_UNLESS(result.HasCmdReadResult());
- const auto& res = result.GetCmdReadResult();
+ if (result.HasCmdForgetReadResult()) {
+ // ignore it
+ return;
+ }
if (result.GetCookie() != (ui64)ReadOffset) {
LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Partition
@@ -587,6 +650,80 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo
return;
}
+ //This is read
+ Y_ABORT_UNLESS(result.HasCmdReadResult() || result.HasCmdPrepareReadResult() || result.HasCmdPublishReadResult());
+ if (result.HasCmdPrepareReadResult()) {
+ const auto& res = result.GetCmdPrepareReadResult();
+
+ Y_ABORT_UNLESS(DirectRead);
+ Y_ABORT_UNLESS(res.GetDirectReadId() == DirectReadId);
+
+ EndOffset = res.GetEndOffset();
+ SizeLag = res.GetSizeLag();
+ WTime = res.GetWriteTimestampMS();
+
+ if (res.GetReadOffset() > 0)
+ ReadOffset = res.GetReadOffset();
+
+ DirectReads[DirectReadId] = res;
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " after direct read state " << Partition
+ << " EndOffset " << EndOffset << " ReadOffset " << ReadOffset << " ReadGuid " << ReadGuid << " with direct read id " << DirectReadId);
+
+ SendPublishDirectRead(DirectReadId, ctx);
+
+ Y_ABORT_UNLESS(RequestInfly);
+
+ CurrentRequest.Clear();
+ RequestInfly = false;
+
+
+ return;
+ }
+ if (result.HasCmdPublishReadResult()) {
+ ++ReadIdToResponse;
+ ReadGuid = TString();
+
+ Y_ABORT_UNLESS(DirectReads.find(DirectReadId) != DirectReads.end());
+
+ Y_ABORT_UNLESS(!RequestInfly);
+
+
+ const auto& dr = DirectReads[DirectReadId];
+
+ auto readResponse = MakeHolder<TEvPQProxy::TEvDirectReadResponse>(
+ Partition.AssignId,
+ dr.GetReadOffset(),
+ DirectReadId,
+ dr.GetBytesSizeEstimate()
+ );
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " after publish direct read state " << Partition
+ << " EndOffset " << EndOffset << " ReadOffset " << ReadOffset << " ReadGuid " << ReadGuid << " with direct read id " << DirectReadId);
+
+
+ ++DirectReadId;
+
+ ctx.Send(ParentId, readResponse.Release());
+
+ Y_ABORT_UNLESS(!WaitForData);
+
+ ReadOffset = dr.GetLastOffset() + 1;
+
+ Y_ABORT_UNLESS(!RequestInfly);
+
+ if (EndOffset > ReadOffset) {
+ ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Partition, WTime, SizeLag, ReadOffset, EndOffset));
+ } else {
+ WaitForData = true;
+ if (PipeClient) //pipe will be recreated soon
+ WaitDataInPartition(ctx);
+ }
+
+ return;
+ }
+ const auto& res = result.GetCmdReadResult();
+
Y_ABORT_UNLESS(res.HasMaxOffset());
EndOffset = res.GetMaxOffset();
SizeLag = res.GetSizeLag();
@@ -647,6 +784,7 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo
);
ctx.Send(ParentId, readResponse.Release());
} else {
+ Y_ABORT_UNLESS(!DirectRead);
auto readResponse = MakeHolder<TEvPQProxy::TEvReadResponse>(
std::move(response),
ReadOffset,
@@ -655,7 +793,6 @@ void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorCo
);
ctx.Send(ParentId, readResponse.Release());
}
- CheckRelease(ctx);
PipeGeneration = 0; //reset tries counter - all ok
}
@@ -671,6 +808,15 @@ void TPartitionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const
RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << msg->TabletId, NPersQueue::NErrorCode::TABLET_PIPE_DISCONNECTED);
return;
}
+
+ auto prevGeneration = TabletGeneration;
+ Y_UNUSED(prevGeneration);
+ TabletGeneration = msg->Generation;
+ NodeId = msg->ServerId.NodeId();
+
+ if (InitDone) {
+ ctx.Send(ParentId, new TEvPQProxy::TEvUpdateSession(Partition, NodeId, TabletGeneration));
+ }
}
void TPartitionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) {
@@ -678,16 +824,9 @@ void TPartitionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const
}
-void TPartitionActor::Handle(TEvPQProxy::TEvReleasePartition::TPtr&, const TActorContext& ctx) {
- LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " (partition)releasing " << Partition << " ReadOffset " << ReadOffset << " ClientCommitOffset " << ClientCommitOffset
- << " CommittedOffst " << CommittedOffset);
- NeedRelease = true;
- CheckRelease(ctx);
-}
-
void TPartitionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr&, const TActorContext& ctx) {
- ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, false));
+ ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, TabletGeneration, NodeId, false));
}
@@ -808,7 +947,6 @@ void TPartitionActor::InitLockPartition(const TActorContext& ctx) {
.DoFirstRetryInstantly = true
};
PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, TabletID, clientConfig));
-
NKikimrClient::TPersQueueRequest request;
request.MutablePartitionRequest()->SetTopic(Topic->GetPrimaryPath());
@@ -822,6 +960,7 @@ void TPartitionActor::InitLockPartition(const TActorContext& ctx) {
cmd->SetSessionId(Session);
cmd->SetGeneration(Generation);
cmd->SetStep(Step);
+ cmd->SetPartitionSessionId(Partition.AssignId);
LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " INITING " << Partition);
@@ -913,7 +1052,6 @@ void TPartitionActor::Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, con
if (PipeClient)
WaitDataInPartition(ctx);
}
- CheckRelease(ctx); //just for logging purpose
}
@@ -924,9 +1062,6 @@ void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext&
<< " readOffset " << ReadOffset << " EndOffset " << EndOffset << " ClientCommitOffset "
<< ClientCommitOffset << " committedOffset " << CommittedOffset << " Guid " << ev->Get()->Guid);
- Y_ABORT_UNLESS(!NeedRelease);
- Y_ABORT_UNLESS(!Released);
-
Y_ABORT_UNLESS(ReadGuid.empty());
Y_ABORT_UNLESS(!RequestInfly);
@@ -946,6 +1081,10 @@ void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext&
read->SetClientId(ClientId);
read->SetClientDC(ClientDC);
read->SetSessionId(Session);
+ if (DirectRead) {
+ read->SetDirectReadId(DirectReadId);
+ }
+
if (req->MaxCount) {
read->SetCount(req->MaxCount);
}
diff --git a/ydb/services/persqueue_v1/actors/partition_actor.h b/ydb/services/persqueue_v1/actors/partition_actor.h
index 9bff7aff09..58bb333ff8 100644
--- a/ydb/services/persqueue_v1/actors/partition_actor.h
+++ b/ydb/services/persqueue_v1/actors/partition_actor.h
@@ -9,6 +9,7 @@
#include <ydb/core/base/tablet_pipe.h>
#include <ydb/core/persqueue/events/global.h>
+#include <ydb/core/util/ulid.h>
#include <ydb/library/services/services.pb.h>
@@ -71,8 +72,8 @@ public:
TPartitionActor(const TActorId& parentId, const TString& clientId, const TString& clientPath, const ui64 cookie,
const TString& session, const TPartitionId& partition, ui32 generation, ui32 step,
const ui64 tabletID, const TTopicCounters& counters, const bool commitsDisabled,
- const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic,
- bool useMigrationProtocol = true);
+ const TString& clientDC, bool rangesMode, const NPersQueue::TTopicConverterPtr& topic, bool directRead,
+ bool useMigrationProtocol);
~TPartitionActor();
void Bootstrap(const NActors::TActorContext& ctx);
@@ -90,10 +91,10 @@ private:
HFunc(TEvPQProxy::TEvRead, Handle)
HFunc(TEvPQProxy::TEvCommitCookie, Handle)
HFunc(TEvPQProxy::TEvCommitRange, Handle)
- HFunc(TEvPQProxy::TEvReleasePartition, Handle)
HFunc(TEvPQProxy::TEvLockPartition, Handle)
HFunc(TEvPQProxy::TEvGetStatus, Handle)
HFunc(TEvPQProxy::TEvRestartPipe, Handle)
+ HFunc(TEvPQProxy::TEvDirectReadAck, Handle)
HFunc(TEvTabletPipe::TEvClientDestroyed, Handle);
HFunc(TEvTabletPipe::TEvClientConnected, Handle);
@@ -105,10 +106,11 @@ private:
}
- void Handle(TEvPQProxy::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx);
void Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx);
void Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const NActors::TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const NActors::TActorContext& ctx);
+
void Handle(TEvPQProxy::TEvDeadlineExceeded::TPtr& ev, const NActors::TActorContext& ctx);
void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx);
@@ -124,7 +126,6 @@ private:
void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx);
void HandleWakeup(const NActors::TActorContext& ctx);
- void CheckRelease(const NActors::TActorContext& ctx);
void InitLockPartition(const NActors::TActorContext& ctx);
void InitStartReading(const NActors::TActorContext& ctx);
@@ -132,6 +133,8 @@ private:
void WaitDataInPartition(const NActors::TActorContext& ctx);
void SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx);
void MakeCommit(const TActorContext& ctx);
+ void SendPublishDirectRead(const ui64 directReadId, const TActorContext& ctx);
+ void SendForgetDirectRead(const ui64 directReadId, const TActorContext& ctx);
private:
@@ -170,6 +173,9 @@ private:
bool FirstInit;
TActorId PipeClient;
ui32 PipeGeneration;
+ ui64 TabletGeneration;
+ ui64 NodeId;
+
bool RequestInfly;
NKikimrClient::TPersQueueRequest CurrentRequest;
@@ -178,9 +184,6 @@ private:
TString ReadGuid; // empty if not reading
- bool NeedRelease;
- bool Released;
-
std::set<ui64> WaitDataInfly;
ui64 WaitDataCookie;
bool WaitForData;
@@ -200,6 +203,11 @@ private:
ui64 CommitCookie;
NPersQueue::TTopicConverterPtr Topic;
+ bool DirectRead = false;
+
+ ui64 DirectReadId = 1;
+ std::map<ui64, NKikimrClient::TPersQueuePartitionResponse::TCmdPrepareDirectReadResult> DirectReads;
+
bool UseMigrationProtocol;
};
diff --git a/ydb/services/persqueue_v1/actors/read_session_actor.h b/ydb/services/persqueue_v1/actors/read_session_actor.h
index 03065dcece..61a1cd52cb 100644
--- a/ydb/services/persqueue_v1/actors/read_session_actor.h
+++ b/ydb/services/persqueue_v1/actors/read_session_actor.h
@@ -30,6 +30,7 @@ struct TPartitionActorInfo {
std::deque<ui64> Commits;
bool Reading;
bool Releasing;
+ bool Stopping;
bool Released;
bool LockSent;
bool ReleaseSent;
@@ -42,6 +43,20 @@ struct TPartitionActorInfo {
TInstant AssignTimestamp;
+ ui64 Generation;
+ ui64 NodeId;
+
+
+ struct TDirectReadInfo {
+ ui64 DirectReadId = 0;
+ ui64 ByteSize = 0;
+ };
+
+ ui64 MaxProcessedDirectReadId = 0;
+ ui64 LastDirectReadId = 0;
+
+ std::map<i64, TDirectReadInfo> DirectReads;
+
explicit TPartitionActorInfo(
const TActorId& actor,
const TPartitionId& partition,
@@ -52,6 +67,7 @@ struct TPartitionActorInfo {
, Topic(topic)
, Reading(false)
, Releasing(false)
+ , Stopping(false)
, Released(false)
, LockSent(false)
, ReleaseSent(false)
@@ -59,7 +75,10 @@ struct TPartitionActorInfo {
, ReadIdCommitted(0)
, Offset(0)
, AssignTimestamp(timestamp)
+ , Generation(0)
+ , NodeId(0)
{
+ Y_ABORT_UNLESS(partition.DiscoveryConverter != nullptr);
}
};
@@ -102,9 +121,16 @@ struct TFormedReadResponse: public TSimpleRefCount<TFormedReadResponse<TServerMe
i64 ByteSizeBeforeFiltering = 0;
ui64 RequiredQuota = 0;
+ bool IsDirectRead = false;
+ ui64 AssignId = 0;
+ ui64 DirectReadId = 0;
+ ui64 DirectReadByteSize = 0;
+
// returns byteSize diff
i64 ApplyResponse(TServerMessage&& resp);
+ i64 ApplyDirectReadResponse(TEvPQProxy::TEvDirectReadResponse::TPtr& ev);
+
THashSet<TActorId> PartitionsTookPartInRead;
TSet<TPartitionId> PartitionsTookPartInControlMessages;
@@ -195,11 +221,13 @@ private:
HFunc(TEvPQProxy::TEvReadSessionStatus, Handle); // from read sessions info builder proxy
HFunc(TEvPQProxy::TEvRead, Handle); // from gRPC
HFunc(/* type alias */ TEvReadResponse, Handle); // from partitionActor
+ HFunc(TEvPQProxy::TEvDirectReadResponse, Handle); // from partitionActor
+ HFunc(TEvPQProxy::TEvDirectReadAck, Handle); // from gRPC
HFunc(TEvPQProxy::TEvDone, Handle); // from gRPC
HFunc(TEvPQProxy::TEvCloseSession, Handle); // from partitionActor
HFunc(TEvPQProxy::TEvDieCommand, Handle);
HFunc(TEvPQProxy::TEvPartitionReady, Handle); // from partitionActor
- HFunc(TEvPQProxy::TEvPartitionReleased, Handle); // from partitionActor
+ HFunc(TEvPQProxy::TEvPartitionReleased, Handle); // from partitionActor
HFunc(TEvPQProxy::TEvCommitCookie, Handle); // from gRPC
HFunc(TEvPQProxy::TEvCommitRange, Handle); // from gRPC
HFunc(TEvPQProxy::TEvStartRead, Handle); // from gRPC
@@ -208,6 +236,8 @@ private:
HFunc(TEvPQProxy::TEvAuth, Handle); // from gRPC
HFunc(TEvPQProxy::TEvCommitDone, Handle); // from PartitionActor
HFunc(TEvPQProxy::TEvPartitionStatus, Handle); // from partitionActor
+ HFunc(TEvPQProxy::TEvUpdateSession, Handle); // from partitionActor
+
// Balancer events
HFunc(TEvPersQueue::TEvLockPartition, Handle); // can be sent to itself when reading without a consumer
@@ -242,6 +272,8 @@ private:
void Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const TActorContext& ctx);
void Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx);
void Handle(typename TEvReadResponse::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvDirectReadResponse::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActorContext& ctx);
void Handle(TEvPQProxy::TEvDone::TPtr& ev, const TActorContext& ctx);
void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx);
void Handle(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx);
@@ -255,6 +287,7 @@ private:
void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx);
void Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const TActorContext& ctx);
void Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx);
+ void Handle(TEvPQProxy::TEvUpdateSession::TPtr& ev, const TActorContext& ctx);
// Balancer events
void Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const TActorContext& ctx); // can be sent to itself when reading without a consumer
@@ -406,6 +439,8 @@ private:
NPersQueue::TTopicsListController TopicsHandler;
NPersQueue::TTopicsToConverter TopicsList;
+
+ bool DirectRead;
};
}
diff --git a/ydb/services/persqueue_v1/actors/read_session_actor.ipp b/ydb/services/persqueue_v1/actors/read_session_actor.ipp
index 5fb6a4a3b3..d4532610d3 100644
--- a/ydb/services/persqueue_v1/actors/read_session_actor.ipp
+++ b/ydb/services/persqueue_v1/actors/read_session_actor.ipp
@@ -59,6 +59,7 @@ TReadSessionActor<UseMigrationProtocol>::TReadSessionActor(
, RequestedBytes(0)
, ReadsInfly(0)
, TopicsHandler(topicsHandler)
+ , DirectRead(false)
{
Y_ASSERT(Request);
}
@@ -212,10 +213,11 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadF
}
case TClientMessage::kStopPartitionSessionResponse: {
- ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(getAssignId(request.stop_partition_session_response())));
if (ReadWithoutConsumer) {
return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "it is forbidden to send StopPartitionSessionResponse when reading without a consumer", ctx);
}
+
+ ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(getAssignId(request.stop_partition_session_response()), request.stop_partition_session_response().graceful()));
return (void)ReadFromStreamOrDie(ctx);
}
@@ -267,6 +269,13 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadF
return (void)ReadFromStreamOrDie(ctx);
}
+ case TClientMessage::kDirectReadAck: {
+ const auto& ddrr = request.direct_read_ack();
+ ctx.Send(ctx.SelfID, new TEvPQProxy::TEvDirectReadAck(ddrr.partition_session_id(), ddrr.direct_read_id()));
+ return (void)ReadFromStreamOrDie(ctx);
+ }
+
+
default: {
return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "unsupported request", ctx);
}
@@ -442,6 +451,54 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvAuth::TPtr&
}
}
+
+template <bool UseMigrationProtocol>
+void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvDirectReadAck::TPtr& ev, const TActorContext& ctx) {
+
+ auto it = Partitions.find(ev->Get()->AssignId);
+ if (it == Partitions.end()) {
+ // do nothing - already released partition
+ return;
+ }
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got DirectReadAck from client"
+ << ": partition# " << it->second.Partition
+ << ", directReadId# " << ev->Get()->DirectReadId
+ << ", bytesInflight# " << BytesInflight_);
+
+ auto drIt = it->second.DirectReads.find(ev->Get()->DirectReadId);
+
+ if (drIt == it->second.DirectReads.end()) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder()
+ << "unknown direct read in in Ack: " << ev->Get()->DirectReadId, ctx);
+
+ }
+
+ if (it->second.MaxProcessedDirectReadId + 1 != (ui64)ev->Get()->DirectReadId) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder()
+ << "direct reads must be confirmed in strict order - expecting " << (it->second.MaxProcessedDirectReadId + 1)
+ << " but got " << ev->Get()->DirectReadId, ctx);
+ }
+
+ if (it->second.LastDirectReadId < (ui64)ev->Get()->DirectReadId) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() << "got direct read id that is not existing yet " <<
+ ev->Get()->DirectReadId, ctx);
+ }
+
+
+ it->second.MaxProcessedDirectReadId = ev->Get()->DirectReadId;
+
+ BytesInflight_ -= drIt->second.ByteSize;
+ if (BytesInflight) {
+ (*BytesInflight) -= drIt->second.ByteSize;
+ }
+ it->second.DirectReads.erase(drIt);
+
+ ProcessReads(ctx);
+ ctx.Send(it->second.Actor, new TEvPQProxy::TEvDirectReadAck(ev->Get()->AssignId, ev->Get()->DirectReadId));
+}
+
+
template <bool UseMigrationProtocol>
void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const TActorContext& ctx) {
RequestNotChecked = true;
@@ -476,16 +533,35 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvReleased::TP
return;
}
- if (!it->second.Releasing) {
- return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder()
- << "release of partition that is not requested for release is forbiden for " << it->second.Partition, ctx);
- }
-
LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got Released from client"
<< ": partition# " << it->second.Partition);
Y_ABORT_UNLESS(it->second.LockSent);
- ReleasePartition(it, true, ctx);
+// ReleasePartition(it, true, ctx);
+
+ if (ev->Get()->Graceful || !DirectRead) {
+ if (!it->second.Releasing) {
+ auto p = it->second.Partition;
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder()
+ << "graceful release of partition that is not requested for release is forbiden for " << p, ctx);
+ }
+ if (it->second.Stopping) { // Ignore release for graceful request if alredy got stopping
+ return;
+ }
+ if (!DirectRead) {
+ ReleasePartition(it, true, ctx);
+ } else {
+ SendReleaseSignal(it, true, ctx);
+ }
+ } else {
+ Y_ABORT_UNLESS(DirectRead);
+ if (!it->second.Stopping) {
+ return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder()
+ << "release of partition that is not requested is forbiden for " << it->second.Partition, ctx);
+ }
+ //TODO: filter all direct reads
+ ReleasePartition(it, true, ctx);
+ }
}
template <bool UseMigrationProtocol>
@@ -516,12 +592,25 @@ void TReadSessionActor<UseMigrationProtocol>::DropPartition(typename TPartitions
PartsPerSession.DecFor(Partitions.size(), 1);
}
+ for (auto& [readId, dr] : it->second.DirectReads) {
+ BytesInflight_ -= dr.ByteSize;
+ if (BytesInflight) {
+ (*BytesInflight) -= dr.ByteSize;
+ }
+
+ Y_ABORT_UNLESS((ui64)readId > it->second.MaxProcessedDirectReadId);
+ ReadSizeBudget += dr.ByteSize; // bring back all not performed reads in budget
+ }
+
BalancerGeneration.erase(it->first);
Partitions.erase(it);
if (SessionsActive) {
PartsPerSession.IncFor(Partitions.size(), 1);
}
+
+ // If now have available bytes inflight to read
+ ProcessReads(ctx);
}
template <bool UseMigrationProtocol>
@@ -639,6 +728,7 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr&
<< "_" << "v1";
CommitsDisabled = false;
+
PeerName = ev->Get()->PeerName;
if constexpr (UseMigrationProtocol) {
@@ -655,6 +745,7 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr&
MaxTimeLagMs = 0; // max_lag per topic only
ReadTimestampMs = 0; // read_from per topic only
ReadOnlyLocal = true;
+ DirectRead = init.direct_read();
if (init.reader_name()) {
PeerName = init.reader_name();
}
@@ -1083,13 +1174,15 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPersQueue::TEvLockPartit
Y_ABORT_UNLESS(record.GetGeneration() > 0);
const ui64 assignId = NextAssignId++;
+ Y_ABORT_UNLESS(converterIter->second != nullptr);
+
BalancerGeneration[assignId] = {record.GetGeneration(), record.GetStep()};
const TPartitionId partitionId{converterIter->second, record.GetPartition(), assignId};
const TActorId actorId = ctx.Register(new TPartitionActor(
ctx.SelfID, ClientId, ClientPath, Cookie, Session, partitionId, record.GetGeneration(),
record.GetStep(), record.GetTabletId(), it->second, CommitsDisabled, ClientDC, RangesMode,
- converterIter->second, UseMigrationProtocol));
+ converterIter->second, DirectRead, UseMigrationProtocol));
if (SessionsActive) {
PartsPerSession.DecFor(Partitions.size(), 1);
@@ -1157,6 +1250,12 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionSta
result.mutable_start_partition_session_request()->mutable_partition_offsets()->set_start(ev->Get()->Offset);
result.mutable_start_partition_session_request()->mutable_partition_offsets()->set_end(ev->Get()->EndOffset);
+
+ if (DirectRead) {
+ result.mutable_start_partition_session_request()->mutable_partition_location()->set_node_id(ev->Get()->NodeId);
+ result.mutable_start_partition_session_request()->mutable_partition_location()->set_generation(ev->Get()->Generation);
+ }
+
}
} else {
Y_ABORT_UNLESS(it->second.LockSent);
@@ -1186,6 +1285,36 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionSta
}
template <bool UseMigrationProtocol>
+void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvUpdateSession::TPtr& ev, const TActorContext& ctx) {
+ if (!ActualPartitionActors.contains(ev->Sender)) {
+ return;
+ }
+
+ if (!DirectRead) {
+ return;
+ }
+ auto it = Partitions.find(ev->Get()->Partition.AssignId);
+ Y_ABORT_UNLESS(it != Partitions.end());
+
+ TServerMessage result;
+ result.set_status(Ydb::StatusIds::SUCCESS);
+
+ Y_ABORT_UNLESS(it->second.LockSent);
+
+ if constexpr (!UseMigrationProtocol) {
+ result.mutable_update_partition_session()->set_partition_session_id(it->first);
+ result.mutable_update_partition_session()->mutable_partition_location()->set_node_id(ev->Get()->NodeId);
+ result.mutable_update_partition_session()->mutable_partition_location()->set_generation(ev->Get()->Generation);
+
+ }
+
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " sending to client update partition stream event");
+ SendControlMessage(it->second.Partition, std::move(result), ctx);
+}
+
+
+
+template <bool UseMigrationProtocol>
bool TReadSessionActor<UseMigrationProtocol>::SendControlMessage(TPartitionId id, TServerMessage&& message, const TActorContext& ctx) {
id.AssignId = 0;
@@ -1210,6 +1339,8 @@ void TReadSessionActor<UseMigrationProtocol>::SendReleaseSignal(typename TPartit
TServerMessage result;
result.set_status(Ydb::StatusIds::SUCCESS);
+ if (kill) it->second.Stopping = true;
+
if constexpr (UseMigrationProtocol) {
result.mutable_release()->mutable_topic()->set_path(it->second.Topic->GetFederationPath());
result.mutable_release()->set_cluster(it->second.Topic->GetCluster());
@@ -1221,6 +1352,9 @@ void TReadSessionActor<UseMigrationProtocol>::SendReleaseSignal(typename TPartit
result.mutable_stop_partition_session_request()->set_partition_session_id(it->second.Partition.AssignId);
result.mutable_stop_partition_session_request()->set_graceful(!kill);
result.mutable_stop_partition_session_request()->set_committed_offset(it->second.Offset);
+ if (DirectRead) {
+ result.mutable_stop_partition_session_request()->set_last_direct_read_id(it->second.LastDirectReadId);
+ }
}
if (!SendControlMessage(it->second.Partition, std::move(result), ctx)) {
@@ -1351,7 +1485,6 @@ void TReadSessionActor<UseMigrationProtocol>::CloseSession(PersQueue::ErrorCode:
LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc double finish failed");
}
}
-
Die(ctx);
}
@@ -1397,6 +1530,10 @@ void TReadSessionActor<UseMigrationProtocol>::ReleasePartition(typename TPartiti
Y_ABORT_UNLESS(couldBeReads || !it->second.Reading);
typename TFormedReadResponse<TServerMessage>::TPtr response;
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got all from client, actual releasing"
+ << ": partition# " << it->second.Partition);
+
+
// process reads
if (it->second.Reading) {
auto readIt = PartitionToReadResponse.find(it->second.Actor);
@@ -1451,8 +1588,9 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessBalancerDead(ui64 tabletId,
if (jt->second.LockSent) {
SendReleaseSignal(jt, true, ctx);
}
-
- ReleasePartition(jt, true, ctx);
+ if (!DirectRead || !jt->second.LockSent) { // in direct read mode wait for final release from client
+ ReleasePartition(jt, true, ctx);
+ }
} else {
++it;
}
@@ -1536,6 +1674,23 @@ i64 TFormedReadResponse<TServerMessage>::ApplyResponse(TServerMessage&& resp) {
return ByteSize - prev;
}
+template <typename TServerMessage>
+i64 TFormedReadResponse<TServerMessage>::ApplyDirectReadResponse(TEvPQProxy::TEvDirectReadResponse::TPtr& ev) {
+
+ constexpr bool UseMigrationProtocol = std::is_same_v<TServerMessage, PersQueue::V1::MigrationStreamingReadServerMessage>;
+ Y_ABORT_UNLESS(!UseMigrationProtocol);
+
+ IsDirectRead = true;
+ AssignId = ev->Get()->AssignId;
+ DirectReadId = ev->Get()->DirectReadId;
+ DirectReadByteSize = ev->Get()->ByteSize;
+
+ i64 diff = DirectReadByteSize - ByteSize;
+ ByteSize = DirectReadByteSize;
+ return diff;
+}
+
+
template <bool UseMigrationProtocol>
void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadResponse::TPtr& ev, const TActorContext& ctx) {
if (!ActualPartitionActors.contains(ev->Sender)) {
@@ -1606,7 +1761,71 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadResponse::T
}
template <bool UseMigrationProtocol>
+void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvDirectReadResponse::TPtr& ev, const TActorContext& ctx) {
+ if (!ActualPartitionActors.contains(ev->Sender)) {
+ return;
+ }
+
+ Y_DEBUG_ABORT_UNLESS(!UseMigrationProtocol);
+
+ ui64 assignId;
+
+ assignId = ev->Get()->AssignId;
+
+ typename TFormedReadResponse<TServerMessage>::TPtr formedResponse;
+ {
+ auto it = PartitionToReadResponse.find(ev->Sender);
+ Y_ABORT_UNLESS(it != PartitionToReadResponse.end());
+ formedResponse = it->second;
+ }
+
+ auto it = Partitions.find(assignId);
+ Y_ABORT_UNLESS(it != Partitions.end());
+ Y_ABORT_UNLESS(it->second.Reading);
+ it->second.Reading = false;
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " direct read preparation done"
+ << ": guid# " << formedResponse->Guid
+ << ", partition# " << it->second.Partition
+ << ", size# " << ev->Get()->ByteSize
+ << ", direct_read_id# " << ev->Get()->DirectReadId);
+
+ const i64 diff = formedResponse->ApplyDirectReadResponse(ev);
+
+ --formedResponse->RequestsInfly;
+ Y_ABORT_UNLESS(formedResponse->RequestsInfly == 0);
+
+ BytesInflight_ += diff;
+ if (BytesInflight) {
+ (*BytesInflight) += diff;
+ }
+
+ Y_ABORT_UNLESS(formedResponse->RequestsInfly == 0);
+
+ if (const auto ru = CalcRuConsumption(PrepareResponse(formedResponse))) {
+ formedResponse->RequiredQuota = ru;
+ if (MaybeRequestQuota(ru, EWakeupTag::RlAllowed, ctx)) {
+ Y_ABORT_UNLESS(!PendingQuota);
+ PendingQuota = formedResponse;
+ } else {
+ WaitingQuota.push_back(formedResponse);
+ }
+ } else {
+ ProcessAnswer(formedResponse, ctx);
+ }
+}
+
+
+
+
+
+template <bool UseMigrationProtocol>
ui64 TReadSessionActor<UseMigrationProtocol>::PrepareResponse(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse) {
+
+ if (formedResponse->IsDirectRead) {
+ return formedResponse->DirectReadByteSize;
+ }
+
formedResponse->ByteSizeBeforeFiltering = formedResponse->Response.ByteSize();
if constexpr (UseMigrationProtocol) {
@@ -1618,34 +1837,49 @@ ui64 TReadSessionActor<UseMigrationProtocol>::PrepareResponse(typename TFormedRe
return formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0;
}
+
template <bool UseMigrationProtocol>
void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse, const TActorContext& ctx) {
ui32 readDurationMs = (ctx.Now() - formedResponse->Start - formedResponse->WaitQuotaTime).MilliSeconds();
- if (formedResponse->FromDisk) {
- if (ReadLatencyFromDisk)
- ReadLatencyFromDisk.IncFor(readDurationMs, 1);
+ const ui64 diff = formedResponse->ByteSizeBeforeFiltering;
+ ui64 sizeEstimation = 0;
+
+ if (formedResponse->IsDirectRead) {
+ sizeEstimation = formedResponse->DirectReadByteSize;
} else {
- if (ReadLatency)
- ReadLatency.IncFor(readDurationMs, 1);
- }
+ sizeEstimation = formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0;
+
+ if (formedResponse->FromDisk) {
+ if (ReadLatencyFromDisk)
+ ReadLatencyFromDisk.IncFor(readDurationMs, 1);
+ } else {
+ if (ReadLatency)
+ ReadLatency.IncFor(readDurationMs, 1);
+ }
- const auto latencyThreshold = formedResponse->FromDisk
- ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs()
- : AppData(ctx)->PQConfig.GetReadLatencyBigMs();
- if (readDurationMs >= latencyThreshold && SLIBigReadLatency) {
- SLIBigReadLatency.Inc();
+ const auto latencyThreshold = formedResponse->FromDisk
+ ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs()
+ : AppData(ctx)->PQConfig.GetReadLatencyBigMs();
+ if (readDurationMs >= latencyThreshold && SLIBigReadLatency) {
+ SLIBigReadLatency.Inc();
+ }
}
Y_ABORT_UNLESS(formedResponse->RequestsInfly == 0);
- const ui64 diff = formedResponse->ByteSizeBeforeFiltering;
- const ui64 sizeEstimation = formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0;
if constexpr (!UseMigrationProtocol) {
formedResponse->Response.mutable_read_response()->set_bytes_size(sizeEstimation);
}
- if (formedResponse->HasMessages) {
+ if (formedResponse->IsDirectRead) {
+ auto it = Partitions.find(formedResponse->AssignId);
+ Y_ABORT_UNLESS(it != Partitions.end());
+ it->second.DirectReads[formedResponse->DirectReadId] = {formedResponse->DirectReadId, sizeEstimation};
+ it->second.LastDirectReadId = formedResponse->DirectReadId;
+
+ Y_ABORT_UNLESS(diff == 0); // diff is zero; sizeEstimation already counted in inflight;
+ } else if (formedResponse->HasMessages) {
LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " response to read"
<< ": guid# " << formedResponse->Guid);
if (!WriteToStreamOrDie(ctx, std::move(formedResponse->Response))) {
@@ -1655,7 +1889,6 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedRead
LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " empty read result, start new reading"
<< ": guid# " << formedResponse->Guid);
}
-
BytesInflight_ -= diff;
if (BytesInflight) {
(*BytesInflight) -= diff;
@@ -1691,6 +1924,8 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedRead
// Bring back available partitions.
// If some partition was removed from partitions container, it is not bad because it will be checked during read processing.
AvailablePartitions.insert(formedResponse->PartitionsBecameAvailable.begin(), formedResponse->PartitionsBecameAvailable.end());
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Process answer. Aval parts: " << AvailablePartitions.size());
+
if constexpr (UseMigrationProtocol) {
if (!formedResponse->HasMessages) {
@@ -1700,6 +1935,7 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedRead
}
}
+
ProcessReads(ctx);
}
@@ -1734,7 +1970,6 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessReads(const TActorContext&
return ReadSizeBudget > 0;
}
};
-
while (shouldContinueReads() && BytesInflight_ + RequestedBytes < MAX_INFLY_BYTES) {
ui32 count = MaxReadMessagesCount;
ui64 size = MaxReadSize;
@@ -1815,14 +2050,14 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessReads(const TActorContext&
RequestedBytes += csize;
formedResponse->RequestedBytes += csize;
+
ReadSizeBudget -= csize;
ctx.Send(it->second.Actor, ev.Release());
res = PartitionToReadResponse.emplace(it->second.Actor, formedResponse).second;
Y_ABORT_UNLESS(res);
- // TODO (ildar-khisam@): Gather data from all partitions.
- // For now send messages only from single partition.
+ // Do not aggregate messages from different partitions together.
if constexpr (!UseMigrationProtocol) {
break;
}
@@ -1877,6 +2112,7 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionRea
ev->Get()->SizeLag,
ev->Get()->EndOffset - ev->Get()->ReadOffset).second;
Y_ABORT_UNLESS(res);
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << "TEvPartitionReady. Aval parts: " << AvailablePartitions.size());
ProcessReads(ctx);
}
diff --git a/ydb/services/persqueue_v1/actors/schema_actors.cpp b/ydb/services/persqueue_v1/actors/schema_actors.cpp
index 3254b4566a..3ba1b844dd 100644
--- a/ydb/services/persqueue_v1/actors/schema_actors.cpp
+++ b/ydb/services/persqueue_v1/actors/schema_actors.cpp
@@ -678,6 +678,7 @@ void TDescribeTopicActorImpl::RequestPartitionsLocation(const TActorContext& ctx
}
void TDescribeTopicActorImpl::RequestReadSessionsInfo(const TActorContext& ctx) {
+ Y_ABORT_UNLESS(Settings.Mode == TDescribeTopicActorSettings::EMode::DescribeConsumer);
NTabletPipe::SendData(
ctx, *BalancerPipe,
new TEvPersQueue::TEvGetReadSessionsInfo(NPersQueue::ConvertNewConsumerName(Settings.Consumer, ctx))
diff --git a/ydb/services/persqueue_v1/actors/ya.make b/ydb/services/persqueue_v1/actors/ya.make
index fb6ec9149e..1329c08574 100644
--- a/ydb/services/persqueue_v1/actors/ya.make
+++ b/ydb/services/persqueue_v1/actors/ya.make
@@ -4,6 +4,7 @@ PEERDIR(
ydb/library/actors/core
library/cpp/containers/disjoint_interval_tree
library/cpp/string_utils/base64
+ ydb/core/util
ydb/core/base
ydb/core/grpc_services
ydb/core/persqueue
@@ -38,6 +39,8 @@ SRCS(
read_info_actor.h
read_info_actor.cpp
read_session_actor.h
+ direct_read_actor.h
+ direct_read_actor.cpp
write_session_actor.h
schema_actors.h
schema_actors.cpp
diff --git a/ydb/services/persqueue_v1/grpc_pq_read.cpp b/ydb/services/persqueue_v1/grpc_pq_read.cpp
index 4d18fd7fa4..ff92306325 100644
--- a/ydb/services/persqueue_v1/grpc_pq_read.cpp
+++ b/ydb/services/persqueue_v1/grpc_pq_read.cpp
@@ -21,6 +21,13 @@ namespace V1 {
using namespace PersQueue::V1;
+Topic::StreamDirectReadMessage::FromServer FillDirectReadResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code) {
+ Topic::StreamDirectReadMessage::FromServer res;
+ FillIssue(res.add_issues(), code, errorReason);
+ res.set_status(ConvertPersQueueInternalCodeToStatus(code));
+ return res;
+}
+
IActor* CreatePQReadService(const TActorId& schemeCache, const TActorId& newSchemeCache,
@@ -120,6 +127,43 @@ void TPQReadService::Handle(NGRpcService::TEvStreamTopicReadRequest::TPtr& ev, c
HandleStreamPQReadRequest<NGRpcService::TEvStreamTopicReadRequest>(ev, ctx);
}
+void TPQReadService::Handle(NGRpcService::TEvStreamTopicDirectReadRequest::TPtr& ev, const TActorContext& ctx) {
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection");
+
+ if (TooMuchSessions()) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection failed - too much sessions");
+ ev->Get()->GetStreamCtx()->Attach(ctx.SelfID);
+ ev->Get()->GetStreamCtx()->WriteAndFinish(
+ FillDirectReadResponse("proxy overloaded", PersQueue::ErrorCode::OVERLOAD), grpc::Status::OK); //CANCELLED
+ return;
+ }
+ if (HaveClusters && (Clusters.empty() || LocalCluster.empty())) {
+ LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, "new grpc connection failed - cluster is not known yet");
+
+ ev->Get()->GetStreamCtx()->Attach(ctx.SelfID);
+ ev->Get()->GetStreamCtx()->WriteAndFinish(
+ FillDirectReadResponse("cluster initializing", PersQueue::ErrorCode::INITIALIZING), grpc::Status::OK); //CANCELLED
+ // TODO: Inc SLI Errors
+ return;
+ } else {
+ Y_ABORT_UNLESS(TopicsHandler != nullptr);
+ auto ip = ev->Get()->GetStreamCtx()->GetPeerName();
+
+ const ui64 cookie = NextCookie();
+
+ LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new direct session created cookie " << cookie);
+
+ TActorId worker = ctx.Register(new TDirectReadSessionActor(
+ ev->Release().Release(), cookie, SchemeCache, NewSchemeCache, Counters,
+ DatacenterClassifier ? DatacenterClassifier->ClassifyAddress(NAddressClassifier::ExtractAddress(ip)) : "unknown",
+ *TopicsHandler
+ ));
+ Sessions[cookie] = worker;
+ }
+}
+
+
void TPQReadService::Handle(NGRpcService::TEvStreamPQMigrationReadRequest::TPtr& ev, const TActorContext& ctx) {
HandleStreamPQReadRequest<NGRpcService::TEvStreamPQMigrationReadRequest>(ev, ctx);
}
@@ -169,6 +213,11 @@ void NKikimr::NGRpcService::TGRpcRequestProxyHandleMethods::Handle(NKikimr::NGRp
ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release());
}
+void NKikimr::NGRpcService::TGRpcRequestProxyHandleMethods::Handle(NKikimr::NGRpcService::TEvStreamTopicDirectReadRequest::TPtr& ev, const TActorContext& ctx) {
+ ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release());
+}
+
+
void NKikimr::NGRpcService::TGRpcRequestProxyHandleMethods::Handle(NKikimr::NGRpcService::TEvStreamPQMigrationReadRequest::TPtr& ev, const TActorContext& ctx) {
ctx.Send(NKikimr::NGRpcProxy::V1::GetPQReadServiceActorID(), ev->Release().Release());
}
diff --git a/ydb/services/persqueue_v1/grpc_pq_read.h b/ydb/services/persqueue_v1/grpc_pq_read.h
index d691ec7726..0eaf98c22a 100644
--- a/ydb/services/persqueue_v1/grpc_pq_read.h
+++ b/ydb/services/persqueue_v1/grpc_pq_read.h
@@ -1,6 +1,7 @@
#pragma once
#include "actors/read_session_actor.h"
+#include "actors/direct_read_actor.h"
#include <ydb/core/client/server/grpc_base.h>
#include <ydb/core/persqueue/cluster_tracker.h>
@@ -42,6 +43,7 @@ private:
STFUNC(StateFunc) {
switch (ev->GetTypeRewrite()) {
HFunc(NGRpcService::TEvStreamTopicReadRequest, Handle);
+ HFunc(NGRpcService::TEvStreamTopicDirectReadRequest, Handle);
HFunc(NGRpcService::TEvStreamPQMigrationReadRequest, Handle);
HFunc(NGRpcService::TEvCommitOffsetRequest, Handle);
HFunc(NGRpcService::TEvPQReadInfoRequest, Handle);
@@ -56,6 +58,7 @@ private:
private:
void Handle(NGRpcService::TEvStreamTopicReadRequest::TPtr& ev, const TActorContext& ctx);
+ void Handle(NGRpcService::TEvStreamTopicDirectReadRequest::TPtr& ev, const TActorContext& ctx);
void Handle(NGRpcService::TEvStreamPQMigrationReadRequest::TPtr& ev, const TActorContext& ctx);
void Handle(NGRpcService::TEvCommitOffsetRequest::TPtr& ev, const TActorContext& ctx);
void Handle(NGRpcService::TEvPQReadInfoRequest::TPtr& ev, const TActorContext& ctx);
@@ -98,6 +101,9 @@ auto FillReadResponse(const TString& errorReason, const PersQueue::ErrorCode::Er
return res;
}
+Topic::StreamDirectReadMessage::FromServer FillDirectReadResponse(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode code);
+
+
template <typename ReadRequest>
void TPQReadService::HandleStreamPQReadRequest(typename ReadRequest::TPtr& ev, const TActorContext& ctx) {
constexpr bool UseMigrationProtocol = std::is_same_v<ReadRequest, NGRpcService::TEvStreamPQMigrationReadRequest>;
diff --git a/ydb/services/persqueue_v1/persqueue_ut.cpp b/ydb/services/persqueue_v1/persqueue_ut.cpp
index 7d9b8f60ea..51a4bffae0 100644
--- a/ydb/services/persqueue_v1/persqueue_ut.cpp
+++ b/ydb/services/persqueue_v1/persqueue_ut.cpp
@@ -110,16 +110,20 @@ namespace {
const static TString SHORT_TOPIC_NAME = "topic1";
}
-#define MAKE_INSECURE_STUB(Service) \
+#define MAKE_INSECURE_STUB(Service) \
std::shared_ptr<grpc::Channel> Channel_; \
- std::unique_ptr<Service::Stub> StubP_; \
+ std::unique_ptr<Service::Stub> StubP_; \
\
{ \
- Channel_ = grpc::CreateChannel( \
- "localhost:" + ToString(server.Server->GrpcPort), \
- grpc::InsecureChannelCredentials() \
+ grpc::ChannelArguments args; \
+ args.SetMaxReceiveMessageSize(64_MB); \
+ args.SetMaxSendMessageSize(64_MB); \
+ Channel_ = grpc::CreateCustomChannel( \
+ "localhost:" + ToString(server.Server->GrpcPort), \
+ grpc::InsecureChannelCredentials(), \
+ args \
); \
- StubP_ = Service::NewStub(Channel_); \
+ StubP_ = Service::NewStub(Channel_); \
} \
grpc::ClientContext rcontext;
@@ -708,6 +712,465 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) {
}
+ Y_UNIT_TEST(UpdatePartitionLocation) {
+ TPersQueueV1TestServer server;
+ SET_LOCALS;
+ MAKE_INSECURE_STUB(Ydb::Topic::V1::TopicService);
+ server.EnablePQLogs({ NKikimrServices::PQ_METACACHE, NKikimrServices::PQ_READ_PROXY});
+ server.EnablePQLogs({ NKikimrServices::KQP_PROXY }, NLog::EPriority::PRI_EMERG);
+ server.EnablePQLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD }, NLog::EPriority::PRI_ERROR);
+
+ auto readStream = StubP_->StreamRead(&rcontext);
+ UNIT_ASSERT(readStream);
+
+ // init read session
+ {
+ Ydb::Topic::StreamReadMessage::FromClient req;
+ Ydb::Topic::StreamReadMessage::FromServer resp;
+
+ req.mutable_init_request()->add_topics_read_settings()->set_path("acc/topic1");
+
+ req.mutable_init_request()->set_consumer("user");
+ req.mutable_init_request()->set_direct_read(true);
+
+ if (!readStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ UNIT_ASSERT(readStream->Read(&resp));
+ Cerr << "===Got response: " << resp.ShortDebugString() << Endl;
+ UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kInitResponse);
+ }
+
+ // await and confirm CreatePartitionStreamRequest from server
+ i64 assignId = 0;
+ i64 generation = 0;
+ {
+ Ydb::Topic::StreamReadMessage::FromServer resp;
+
+ //lock partition
+ UNIT_ASSERT(readStream->Read(&resp));
+
+ Cerr << "GOT SERVER MESSAGE1: " << resp.DebugString() << "\n";
+
+ UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kStartPartitionSessionRequest);
+ UNIT_ASSERT_VALUES_EQUAL(resp.start_partition_session_request().partition_session().path(), "acc/topic1");
+ UNIT_ASSERT(resp.start_partition_session_request().partition_session().partition_id() == 0);
+ UNIT_ASSERT(resp.start_partition_session_request().partition_location().generation() > 0);
+ generation = resp.start_partition_session_request().partition_location().generation();
+ assignId = resp.start_partition_session_request().partition_session().partition_session_id();
+ }
+
+ server.Server->AnnoyingClient->RestartPartitionTablets(server.Server->CleverServer->GetRuntime(), "rt3.dc1--acc--topic1");
+
+ {
+ Ydb::Topic::StreamReadMessage::FromServer resp;
+
+ //update partition location
+ UNIT_ASSERT(readStream->Read(&resp));
+
+ Cerr << "GOT SERVER MESSAGE2: " << resp.DebugString() << "\n";
+
+ UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kUpdatePartitionSession);
+ UNIT_ASSERT(resp.update_partition_session().partition_session_id() == assignId);
+ UNIT_ASSERT(resp.update_partition_session().partition_location().generation() > generation);
+ }
+ }
+
+ using namespace Ydb;
+ class TDirectReadTestSetup {
+ using Service = Ydb::Topic::V1::TopicService;
+ private:
+ std::shared_ptr<grpc::Channel> Channel;
+ std::unique_ptr<Service::Stub> Stub;
+ THolder<grpc::ClientContext> ControlContext;
+ THolder<grpc::ClientContext> ReadContext;
+
+ public:
+ std::unique_ptr<grpc::ClientReaderWriter<Topic::StreamReadMessage_FromClient, Topic::StreamReadMessage_FromServer>> ControlStream;
+ std::unique_ptr<grpc::ClientReaderWriter<Topic::StreamDirectReadMessage_FromClient, Topic::StreamDirectReadMessage_FromServer>> ReadStream;
+ TString SessionId;
+
+ TDirectReadTestSetup(TPersQueueV1TestServer& server)
+ : ReadContext(MakeHolder<grpc::ClientContext>())
+ {
+ server.EnablePQLogs({ NKikimrServices::PQ_READ_PROXY, NKikimrServices::PERSQUEUE });
+ server.EnablePQLogs({ NKikimrServices::KQP_PROXY }, NLog::EPriority::PRI_EMERG);
+ server.EnablePQLogs({ NKikimrServices::FLAT_TX_SCHEMESHARD }, NLog::EPriority::PRI_ERROR);
+
+ Connect(server);
+ }
+
+ void Connect(TPersQueueV1TestServer& server) {
+ grpc::ChannelArguments args;
+ args.SetMaxReceiveMessageSize(64_MB);
+ args.SetMaxSendMessageSize(64_MB);
+ Channel = grpc::CreateCustomChannel(
+ "localhost:" + ToString(server.Server->GrpcPort),
+ grpc::InsecureChannelCredentials(),
+ args
+ );
+ Stub = Service::NewStub(Channel);
+ }
+ void InitControlSession(const TString& topic) {
+ ControlContext = MakeHolder<grpc::ClientContext>();
+ ControlStream = Stub->StreamRead(ControlContext.Get());
+ UNIT_ASSERT(ControlStream);
+ Topic::StreamReadMessage::FromClient req;
+ Topic::StreamReadMessage::FromServer resp;
+
+ req.mutable_init_request()->add_topics_read_settings()->set_path(topic);
+
+ req.mutable_init_request()->set_consumer("user");
+ req.mutable_init_request()->set_direct_read(true);
+
+ if (!ControlStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ UNIT_ASSERT(ControlStream->Read(&resp));
+ Cerr << "Got init response: " << resp.ShortDebugString() << Endl;
+ UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kInitResponse);
+ SessionId = resp.init_response().session_id();
+
+ req.Clear();
+ req.mutable_read_request()->set_bytes_size(40_MB);
+ if (!ControlStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ }
+ std::pair<ui32, i64> GetNextAssign(const TString& topic) {
+ Cerr << "Get next assign id\n";
+ Topic::StreamReadMessage::FromClient req;
+ Topic::StreamReadMessage::FromServer resp;
+
+ //lock partition
+ UNIT_ASSERT(ControlStream->Read(&resp));
+
+ Cerr << "GOT SERVER MESSAGE - start session: " << resp.DebugString() << "\n";
+
+ UNIT_ASSERT(resp.server_message_case() == Topic::StreamReadMessage::FromServer::kStartPartitionSessionRequest);
+ UNIT_ASSERT_VALUES_EQUAL(resp.start_partition_session_request().partition_session().path(), topic);
+ auto pId = resp.start_partition_session_request().partition_session().partition_id();
+ UNIT_ASSERT(resp.start_partition_session_request().partition_location().generation() > 0);
+ auto assignId = resp.start_partition_session_request().partition_session().partition_session_id();
+
+ req.Clear();
+ req.mutable_start_partition_session_response()->set_partition_session_id(assignId);
+ if (!ControlStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ return std::make_pair(pId, assignId);
+ }
+
+ void DoWrite(NYdb::TDriver* driver, const TString& topic, ui64 size, ui32 count,
+ const TString& srcId = "srcID", const std::optional<ui64>& partGroup = {})
+ {
+ auto writer = CreateSimpleWriter(*driver, topic, srcId, partGroup, {"raw"});
+
+ for (ui32 i = 0; i < count; ++i) {
+ auto writeSome = [&]() {
+ TString data(size, 'x');
+ UNIT_ASSERT(writer->Write(data));
+ };
+ writeSome();
+ }
+ writer->Close();
+ }
+
+ void DoRead(ui64 assignId, ui64& nextReadId, ui32& currTotalMessages, ui32 messageLimit) {
+ while (currTotalMessages < messageLimit) {
+ Cerr << "Wait for direct read id: " << nextReadId << ", currently have " << currTotalMessages << " messages" << Endl;
+ Ydb::Topic::StreamDirectReadMessage::FromServer resp;
+ UNIT_ASSERT(ReadStream->Read(&resp));
+ Cerr << "Got direct read response: " << resp.direct_read_response().direct_read_id() << Endl;
+ UNIT_ASSERT_C(resp.status() == Ydb::StatusIds::SUCCESS, resp.DebugString());
+ UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamDirectReadMessage::FromServer::kDirectReadResponse);
+ UNIT_ASSERT_VALUES_EQUAL(resp.direct_read_response().direct_read_id(), nextReadId);
+
+ Ydb::Topic::StreamReadMessage::FromClient req;
+ req.mutable_direct_read_ack()->set_partition_session_id(assignId);
+ req.mutable_direct_read_ack()->set_direct_read_id(nextReadId++);
+ if (!ControlStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ for (const auto& batch : resp.direct_read_response().partition_data().batches()) {
+ currTotalMessages += batch.message_data_size();
+ }
+ }
+ UNIT_ASSERT_VALUES_EQUAL(currTotalMessages, messageLimit);
+ }
+
+ void InitReadSession(const TString& topic, const TMaybe<Ydb::StatusIds::StatusCode>& status = {}, const TString& consumer = "user",
+ TMaybe<ui64> assingId = Nothing()) {
+ if(ReadStream) {
+ ReadStream->Finish();
+ ReadStream = nullptr;
+ ReadContext = MakeHolder<grpc::ClientContext>();
+ }
+ ReadStream = Stub->StreamDirectRead(ReadContext.Release());
+ UNIT_ASSERT(ReadStream);
+
+ Topic::StreamDirectReadMessage::FromClient req;
+ Topic::StreamDirectReadMessage::FromServer resp;
+
+ req.mutable_init_direct_read()->add_topics_read_settings()->set_path(topic);
+
+ req.mutable_init_direct_read()->set_consumer(consumer);
+ req.mutable_init_direct_read()->set_session_id(SessionId);
+
+ if (!ReadStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ if (status.Defined()){
+ if (status.GetRef() != Ydb::StatusIds_StatusCode_SCHEME_ERROR) {
+ SendReadSessionAssign(assingId.Defined() ? *assingId : GetNextAssign(topic).second);
+ }
+ UNIT_ASSERT(ReadStream->Read(&resp));
+ Cerr << "Got direct read init response: " << resp.ShortDebugString() << Endl;
+ UNIT_ASSERT(resp.status() == status.GetRef());
+ }
+ }
+
+ void SendReadSessionAssign(ui64 assignId) {
+ Cerr << "Send next assign to data session" << assignId << Endl;
+ Topic::StreamDirectReadMessage::FromClient req;
+
+ auto x = req.mutable_start_direct_read_partition_session();
+ x->set_partition_session_id(assignId);
+ x->set_last_direct_read_id(0);
+ x->set_generation(1);
+ if (!ReadStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ }
+ };
+
+ THolder<TEvPQ::TEvGetFullDirectReadData> RequestCacheData(TTestActorRuntime* runtime, TEvPQ::TEvGetFullDirectReadData* request) {
+ const auto& edgeId = runtime->AllocateEdgeActor();
+ runtime->Send(NPQ::MakePQDReadCacheServiceActorId(), edgeId, request);
+ auto resp = runtime->GrabEdgeEvent<TEvPQ::TEvGetFullDirectReadData>();
+ UNIT_ASSERT(resp);
+ return resp;
+ }
+
+ Y_UNIT_TEST(DirectReadPreCached) {
+ TPersQueueV1TestServer server{true};
+ SET_LOCALS;
+ TDirectReadTestSetup setup{server};
+ setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 1_MB, 30);
+
+ setup.InitControlSession("acc/topic1");
+ auto pair = setup.GetNextAssign("acc/topic1");
+ UNIT_ASSERT_VALUES_EQUAL(pair.first, 0);
+ auto assignId = pair.second;
+ setup.InitReadSession("acc/topic1");
+
+ auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1);
+ setup.SendReadSessionAssign(assignId);
+
+ ui32 totalMsg = 0;
+ ui64 nextReadId = 1;
+ setup.DoRead(assignId, nextReadId, totalMsg, 30);
+
+ Sleep(TDuration::Seconds(1));
+ cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.begin()->second.StagedReads.size(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.begin()->second.Reads.size(), 0);
+
+ }
+
+ Y_UNIT_TEST(DirectReadNotCached) {
+ TPersQueueV1TestServer server{true};
+ SET_LOCALS;
+ TDirectReadTestSetup setup{server};
+
+ setup.InitControlSession("acc/topic1");
+ auto pair = setup.GetNextAssign("acc/topic1");
+ UNIT_ASSERT_VALUES_EQUAL(pair.first, 0);
+ auto assignId = pair.second;
+ setup.InitReadSession("acc/topic1");
+ setup.SendReadSessionAssign(assignId);
+
+ ui32 totalMsg = 0;
+ ui64 nextReadId = 1;
+ Sleep(TDuration::Seconds(3));
+ setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 1_MB, 50);
+ setup.DoRead(assignId, nextReadId, totalMsg, 40);
+
+ Topic::StreamReadMessage::FromClient req;
+ req.mutable_read_request()->set_bytes_size(40_MB);
+ if (!setup.ControlStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ setup.DoRead(assignId, nextReadId, totalMsg, 50);
+
+ Sleep(TDuration::Seconds(1));
+ auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.begin()->second.StagedReads.size(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.begin()->second.Reads.size(), 0);
+ }
+
+ Y_UNIT_TEST(DirectReadBadCases) {
+ TPersQueueV1TestServer server{true};
+ SET_LOCALS;
+ TDirectReadTestSetup setup{server};
+ setup.InitControlSession("acc/topic1");
+ auto sessionId = setup.SessionId;
+ auto assign = setup.GetNextAssign("acc/topic1").second;
+ setup.SessionId = "bad-session";
+ Cerr << "First init bad session\n";
+ setup.InitReadSession("acc/topic1", Ydb::StatusIds::BAD_REQUEST, "user", 1); // no control session
+ setup.SessionId = sessionId;
+ Cerr << "Init badtopic session\n";
+ setup.InitReadSession("acc/topic-bad", Ydb::StatusIds::SCHEME_ERROR);
+ //setup.InitReadSession("acc/topic1", Ydb::StatusIds::SCHEME_ERROR, "bad-user"); //ToDo - enable ACL (read rules) check
+
+ setup.ControlStream->WritesDone();
+ Cerr << "Close control session\n";
+ setup.ControlStream->Finish();
+ Cerr << "Close control session - done\n";
+ setup.ControlStream = nullptr;
+
+ setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 100_KB, 10);
+ Cerr << "Init read session\n";
+ setup.InitReadSession("acc/topic1", Ydb::StatusIds::BAD_REQUEST, "user", assign); // no control session
+
+ auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 0);
+ }
+
+ Y_UNIT_TEST(DirectReadStop) {
+ TPersQueueV1TestServer server{true};
+ SET_LOCALS;
+
+ server.Server->AnnoyingClient->AlterTopicNoLegacy("Root/PQ/rt3.dc1--acc--topic1", 2);
+
+ TDirectReadTestSetup setup{server};
+ setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 100_KB, 1, "src1", 0);
+ setup.DoWrite(pqClient->GetDriver(), "acc/topic1", 100_KB, 1, "src2", 1);
+
+ setup.InitControlSession("acc/topic1");
+ auto pair1 = setup.GetNextAssign("acc/topic1");
+ auto pair2 = setup.GetNextAssign("acc/topic1");
+ UNIT_ASSERT(pair1.first + pair2.first == 1); // partitions 0 and 1;
+ auto assign1 = pair1.second;
+ auto assign2 = pair2.second;
+ UNIT_ASSERT(assign1 != assign2);
+
+ setup.InitReadSession("acc/topic1");
+ setup.SendReadSessionAssign(assign1);
+ setup.SendReadSessionAssign(assign2);
+
+ // Read from both parts so thatLastReadId goes forward;
+ for (auto i = 0u; i != 2; ++i) {
+ Cerr << "Wait for direct read" << Endl;
+ Ydb::Topic::StreamDirectReadMessage::FromServer resp;
+ UNIT_ASSERT(setup.ReadStream->Read(&resp));
+ Cerr << "Got direct read response: " << resp.direct_read_response().direct_read_id() << Endl;
+ UNIT_ASSERT_C(resp.status() == Ydb::StatusIds::SUCCESS, resp.DebugString());
+ UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamDirectReadMessage::FromServer::kDirectReadResponse);
+ UNIT_ASSERT_VALUES_EQUAL(resp.direct_read_response().direct_read_id(), 1);
+ i64 assignId = resp.direct_read_response().partition_session_id();
+ UNIT_ASSERT(assignId == assign1 || assignId == assign2);
+
+ Ydb::Topic::StreamReadMessage::FromClient req;
+ req.mutable_direct_read_ack()->set_partition_session_id(assignId);
+ req.mutable_direct_read_ack()->set_direct_read_id(1);
+ if (!setup.ControlStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ }
+
+ NYdb::NTopic::TTopicClient topicClient(*pqClient->GetDriver());
+ NYdb::NTopic::TReadSessionSettings rSettings;
+ rSettings.ConsumerName("user").AppendTopics({"acc/topic1"});
+ auto readSession = topicClient.CreateReadSession(rSettings);
+
+ auto assignId = 0;
+ {
+ Topic::StreamReadMessage::FromServer resp;
+
+ //lock partition
+ UNIT_ASSERT(setup.ControlStream->Read(&resp));
+
+ Cerr << "GOT SERVER MESSAGE (stop session): " << resp.DebugString() << "\n";
+
+ UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kStopPartitionSessionRequest);
+ UNIT_ASSERT_VALUES_EQUAL(resp.stop_partition_session_request().graceful(), true);
+ UNIT_ASSERT_VALUES_EQUAL(resp.stop_partition_session_request().last_direct_read_id(), 1);
+
+ assignId = resp.stop_partition_session_request().partition_session_id();
+ UNIT_ASSERT(assignId == assign1 || assignId == assign2);
+
+ Topic::StreamReadMessage::FromClient req;
+ req.mutable_stop_partition_session_response()->set_partition_session_id(assignId);
+ req.mutable_stop_partition_session_response()->set_graceful(true);
+ if (!setup.ControlStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ }
+
+ {
+ Ydb::Topic::StreamReadMessage::FromServer resp;
+
+ //lock partition
+ UNIT_ASSERT(setup.ControlStream->Read(&resp));
+
+ Cerr << "GOT SERVER MESSAGE (stop session 2): " << resp.DebugString() << "\n";
+
+ UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kStopPartitionSessionRequest);
+ UNIT_ASSERT_VALUES_EQUAL(resp.stop_partition_session_request().graceful(), false);
+ UNIT_ASSERT_VALUES_EQUAL(resp.stop_partition_session_request().partition_session_id(), assignId);
+ Ydb::Topic::StreamReadMessage::FromClient req;
+ req.mutable_stop_partition_session_response()->set_partition_session_id(assignId);
+ req.mutable_stop_partition_session_response()->set_graceful(false);
+ if (!setup.ControlStream->Write(req)) {
+ ythrow yexception() << "write fail";
+ }
+ }
+ }
+
+ Y_UNIT_TEST(DirectReadCleanCache) {
+ TPersQueueV1TestServer server;
+ SET_LOCALS;
+ TString topicPath{"/Root/PQ/rt3.dc1--acc--topic2"};
+ server.Server->AnnoyingClient->CreateTopicNoLegacy(topicPath, 1);
+ auto pathDescr = server.Server->AnnoyingClient->Ls(topicPath)->Record.GetPathDescription().GetPersQueueGroup();
+ auto tabletId = pathDescr.GetPartitions(0).GetTabletId();
+ Cerr << "PQ descr: " << pathDescr.DebugString() << Endl;
+
+
+ TDirectReadTestSetup setup{server};
+
+ setup.InitControlSession("acc/topic2");
+ setup.InitReadSession("acc/topic2");
+ auto pair = setup.GetNextAssign("acc/topic2");
+ UNIT_ASSERT_VALUES_EQUAL(pair.first, 0);
+ auto assignId = pair.second;
+ setup.SendReadSessionAssign(assignId);
+ // auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData());
+ // UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1);
+ setup.DoWrite(pqClient->GetDriver(), "acc/topic2", 10_MB, 1);
+ Ydb::Topic::StreamDirectReadMessage::FromServer resp;
+ Cerr << "Request initial read data\n";
+ UNIT_ASSERT(setup.ReadStream->Read(&resp));
+
+ Cerr << "Request cache data\n";
+ auto cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 1);
+ Cerr << "Kill the tablet\n";
+ server.Server->AnnoyingClient->KillTablet(*(server.Server->CleverServer), tabletId);
+ Cerr << "Get session closure\n";
+ resp.Clear();
+ UNIT_ASSERT(setup.ReadStream->Read(&resp));
+ UNIT_ASSERT_C(resp.status() == Ydb::StatusIds::SESSION_EXPIRED, resp.status());
+ Cerr << "Check caching service data empty\n";
+ cachedData = RequestCacheData(runtime, new TEvPQ::TEvGetFullDirectReadData());
+ UNIT_ASSERT_VALUES_EQUAL(cachedData->Data.size(), 0);
+ }
+
Y_UNIT_TEST(StreamReadManyUpdateTokenAndRead) {
TPersQueueV1TestServer server;
SET_LOCALS;
@@ -748,7 +1211,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) {
ythrow yexception() << "write fail";
}
UNIT_ASSERT(readStream->Read(&resp));
- Cerr << "===Got response: " << resp.ShortDebugString() << Endl;
+ Cerr << "Got response: " << resp.ShortDebugString() << Endl;
UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kInitResponse);
// send some reads
req.Clear();
@@ -894,7 +1357,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) {
ythrow yexception() << "write fail";
}
UNIT_ASSERT(readStream->Read(&resp));
- Cerr << "===Got response: " << resp.ShortDebugString() << Endl;
+ Cerr << "Got response: " << resp.ShortDebugString() << Endl;
UNIT_ASSERT(resp.server_message_case() == Ydb::Topic::StreamReadMessage::FromServer::kInitResponse);
req.Clear();
diff --git a/ydb/services/persqueue_v1/topic.cpp b/ydb/services/persqueue_v1/topic.cpp
index e1d0f8764c..40a7f2c65a 100644
--- a/ydb/services/persqueue_v1/topic.cpp
+++ b/ydb/services/persqueue_v1/topic.cpp
@@ -85,6 +85,27 @@ void TGRpcTopicService::SetupIncomingRequests(NYdbGrpc::TLoggerPtr logger) {
);
}
+ {
+ using TBiRequest = Ydb::Topic::StreamDirectReadMessage::FromClient;
+
+ using TBiResponse = Ydb::Topic::StreamDirectReadMessage::FromServer;
+
+ using TStreamGRpcRequest = NGRpcServer::TGRpcStreamingRequest<
+ TBiRequest,
+ TBiResponse,
+ TGRpcTopicService,
+ NKikimrServices::GRPC_SERVER>;
+
+
+ TStreamGRpcRequest::Start(this, this->GetService(), CQ_, &Ydb::Topic::V1::TopicService::AsyncService::RequestStreamDirectRead,
+ [this](TIntrusivePtr<TStreamGRpcRequest::IContext> context) {
+ ActorSystem_->Send(GRpcRequestProxyId_, new NKikimr::NGRpcService::TEvStreamTopicDirectReadRequest(context, IsRlAllowed()));
+ },
+ *ActorSystem_, "TopicService/StreamDirectRead", getCounterBlock("topic", "StreamDirectRead", true), nullptr
+ );
+ }
+
+
#ifdef ADD_REQUEST
#error ADD_REQUEST macro already defined
#endif
diff --git a/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h b/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h
index 89369402f3..f225ddf9aa 100644
--- a/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h
+++ b/ydb/services/persqueue_v1/ut/persqueue_test_fixture.h
@@ -101,7 +101,6 @@ static void ModifyTopicACL(NYdb::TDriver* driver, const TString& topic, const TV
Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--topic1", 1);
Server->AnnoyingClient->CreateTopicNoLegacy("rt3.dc1--acc--topic1", 1);
Server->WaitInit("topic1");
- Sleep(TDuration::Seconds(10));
}
Cerr << "=== EnablePQLogs" << Endl;