diff options
author | komels <komels@ydb.tech> | 2023-10-25 17:00:15 +0300 |
---|---|---|
committer | komels <komels@ydb.tech> | 2023-10-25 17:44:43 +0300 |
commit | f5da48657a33dddfe861af3dc645ff79875237ef (patch) | |
tree | c15db5e3613a2d0d04ca89d7d41a5410b7af5b7a | |
parent | c79726a5aedde9c9a148a5e5442bfe0b3821a84d (diff) | |
download | ydb-f5da48657a33dddfe861af3dc645ff79875237ef.tar.gz |
Move pq-v0 to ydb-OSS
59 files changed, 8045 insertions, 1 deletions
diff --git a/.mapping.json b/.mapping.json index b61d37e62f..a4c1a28bad 100644 --- a/.mapping.json +++ b/.mapping.json @@ -6488,6 +6488,12 @@ "ydb/library/persqueue/counter_time_keeper/CMakeLists.linux-x86_64.txt":"", "ydb/library/persqueue/counter_time_keeper/CMakeLists.txt":"", "ydb/library/persqueue/counter_time_keeper/CMakeLists.windows-x86_64.txt":"", + "ydb/library/persqueue/deprecated/CMakeLists.txt":"", + "ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.darwin-x86_64.txt":"", + "ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.linux-aarch64.txt":"", + "ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.linux-x86_64.txt":"", + "ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.txt":"", + "ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.windows-x86_64.txt":"", "ydb/library/persqueue/obfuscate/CMakeLists.darwin-x86_64.txt":"", "ydb/library/persqueue/obfuscate/CMakeLists.linux-aarch64.txt":"", "ydb/library/persqueue/obfuscate/CMakeLists.linux-x86_64.txt":"", @@ -9467,6 +9473,23 @@ "ydb/services/datastreams/ut/CMakeLists.linux-x86_64.txt":"", "ydb/services/datastreams/ut/CMakeLists.txt":"", "ydb/services/datastreams/ut/CMakeLists.windows-x86_64.txt":"", + "ydb/services/deprecated/CMakeLists.txt":"", + "ydb/services/deprecated/persqueue_v0/CMakeLists.darwin-x86_64.txt":"", + "ydb/services/deprecated/persqueue_v0/CMakeLists.linux-aarch64.txt":"", + "ydb/services/deprecated/persqueue_v0/CMakeLists.linux-x86_64.txt":"", + "ydb/services/deprecated/persqueue_v0/CMakeLists.txt":"", + "ydb/services/deprecated/persqueue_v0/CMakeLists.windows-x86_64.txt":"", + "ydb/services/deprecated/persqueue_v0/api/CMakeLists.txt":"", + "ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.darwin-x86_64.txt":"", + "ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.linux-aarch64.txt":"", + "ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.linux-x86_64.txt":"", + "ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.txt":"", + "ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.windows-x86_64.txt":"", + "ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.darwin-x86_64.txt":"", + "ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.linux-aarch64.txt":"", + "ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.linux-x86_64.txt":"", + "ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.txt":"", + "ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.windows-x86_64.txt":"", "ydb/services/discovery/CMakeLists.darwin-x86_64.txt":"", "ydb/services/discovery/CMakeLists.linux-aarch64.txt":"", "ydb/services/discovery/CMakeLists.linux-x86_64.txt":"", diff --git a/ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt b/ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt index 44e811c14f..105a543cf8 100644 --- a/ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt @@ -143,6 +143,7 @@ target_link_libraries(run PUBLIC services-ext_index-metadata ydb-services-monitoring ydb-services-persqueue_cluster_discovery + services-deprecated-persqueue_v0 ydb-services-persqueue_v1 ydb-services-rate_limiter ydb-services-ydb diff --git a/ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt b/ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt index 86b089d2ee..43ad518bdc 100644 --- a/ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt +++ b/ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt @@ -144,6 +144,7 @@ target_link_libraries(run PUBLIC services-ext_index-metadata ydb-services-monitoring ydb-services-persqueue_cluster_discovery + services-deprecated-persqueue_v0 ydb-services-persqueue_v1 ydb-services-rate_limiter ydb-services-ydb diff --git a/ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt b/ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt index 86b089d2ee..43ad518bdc 100644 --- a/ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt +++ b/ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt @@ -144,6 +144,7 @@ target_link_libraries(run PUBLIC services-ext_index-metadata ydb-services-monitoring ydb-services-persqueue_cluster_discovery + services-deprecated-persqueue_v0 ydb-services-persqueue_v1 ydb-services-rate_limiter ydb-services-ydb diff --git a/ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt b/ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt index 44e811c14f..105a543cf8 100644 --- a/ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt +++ b/ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt @@ -143,6 +143,7 @@ target_link_libraries(run PUBLIC services-ext_index-metadata ydb-services-monitoring ydb-services-persqueue_cluster_discovery + services-deprecated-persqueue_v0 ydb-services-persqueue_v1 ydb-services-rate_limiter ydb-services-ydb diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index 10906a8157..833b70f79f 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -94,6 +94,7 @@ #include <ydb/services/maintenance/grpc_service.h> #include <ydb/services/monitoring/grpc_service.h> #include <ydb/services/persqueue_cluster_discovery/grpc_service.h> +#include <ydb/services/deprecated/persqueue_v0/persqueue.h> #include <ydb/services/persqueue_v1/persqueue.h> #include <ydb/services/persqueue_v1/topic.h> #include <ydb/services/rate_limiter/grpc_service.h> @@ -749,6 +750,10 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) { grpcRequestProxies[0], hasKesus.IsRlAllowed())); } + if (hasPQ) { + server.AddService(new NKikimr::NGRpcService::TGRpcPersQueueService(ActorSystem.Get(), Counters, NMsgBusProxy::CreatePersQueueMetaCacheV2Id())); + } + if (hasPQv1) { server.AddService(new NGRpcService::V1::TGRpcPersQueueService(ActorSystem.Get(), Counters, NMsgBusProxy::CreatePersQueueMetaCacheV2Id(), grpcRequestProxies[0], hasPQv1.IsRlAllowed())); diff --git a/ydb/core/driver_lib/run/ya.make b/ydb/core/driver_lib/run/ya.make index 206a0d6750..9ae7e14eac 100644 --- a/ydb/core/driver_lib/run/ya.make +++ b/ydb/core/driver_lib/run/ya.make @@ -152,6 +152,7 @@ PEERDIR( ydb/services/ext_index/metadata ydb/services/monitoring ydb/services/persqueue_cluster_discovery + ydb/services/deprecated/persqueue_v0 ydb/services/persqueue_v1 ydb/services/rate_limiter ydb/services/ydb diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index 41da3e0664..f47a327560 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -29,6 +29,7 @@ #include <ydb/services/discovery/grpc_service.h> #include <ydb/services/rate_limiter/grpc_service.h> #include <ydb/services/persqueue_cluster_discovery/grpc_service.h> +#include <ydb/services/deprecated/persqueue_v0/persqueue.h> #include <ydb/services/persqueue_v1/persqueue.h> #include <ydb/services/persqueue_v1/topic.h> #include <ydb/services/persqueue_v1/grpc_pq_write.h> @@ -341,7 +342,7 @@ namespace Tests { } desc->ServedDatabases.insert(desc->ServedDatabases.end(), rootDomains.begin(), rootDomains.end()); - TVector<TString> grpcServices = {"yql", "clickhouse_internal", "datastreams", "table_service", "scripting", "experimental", "discovery", "pqcd", "fds", "pq", "pqv1" }; + TVector<TString> grpcServices = {"yql", "clickhouse_internal", "datastreams", "table_service", "scripting", "experimental", "discovery", "pqcd", "fds", "pq", "pqv0", "pqv1" }; desc->ServedServices.insert(desc->ServedServices.end(), grpcServices.begin(), grpcServices.end()); system->Register(NGRpcService::CreateGrpcEndpointPublishActor(desc.Get()), TMailboxType::ReadAsFilled, appData.UserPoolId); diff --git a/ydb/library/persqueue/CMakeLists.txt b/ydb/library/persqueue/CMakeLists.txt index 92b51deaad..8c5c3f41fc 100644 --- a/ydb/library/persqueue/CMakeLists.txt +++ b/ydb/library/persqueue/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(counter_time_keeper) +add_subdirectory(deprecated) add_subdirectory(obfuscate) add_subdirectory(tests) add_subdirectory(topic_parser) diff --git a/ydb/library/persqueue/deprecated/CMakeLists.txt b/ydb/library/persqueue/deprecated/CMakeLists.txt new file mode 100644 index 0000000000..c753df0d4d --- /dev/null +++ b/ydb/library/persqueue/deprecated/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(read_batch_converter) diff --git a/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.darwin-x86_64.txt b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..6a24a82cac --- /dev/null +++ b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(persqueue-deprecated-read_batch_converter) +target_link_libraries(persqueue-deprecated-read_batch_converter PUBLIC + contrib-libs-cxxsupp + yutil + api-protos-persqueue-deprecated +) +target_sources(persqueue-deprecated-read_batch_converter PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.cpp +) diff --git a/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.linux-aarch64.txt b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..c15f715b29 --- /dev/null +++ b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.linux-aarch64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(persqueue-deprecated-read_batch_converter) +target_link_libraries(persqueue-deprecated-read_batch_converter PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + api-protos-persqueue-deprecated +) +target_sources(persqueue-deprecated-read_batch_converter PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.cpp +) diff --git a/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.linux-x86_64.txt b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..c15f715b29 --- /dev/null +++ b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.linux-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(persqueue-deprecated-read_batch_converter) +target_link_libraries(persqueue-deprecated-read_batch_converter PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + api-protos-persqueue-deprecated +) +target_sources(persqueue-deprecated-read_batch_converter PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.cpp +) diff --git a/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.txt b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.windows-x86_64.txt b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..6a24a82cac --- /dev/null +++ b/ydb/library/persqueue/deprecated/read_batch_converter/CMakeLists.windows-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(persqueue-deprecated-read_batch_converter) +target_link_libraries(persqueue-deprecated-read_batch_converter PUBLIC + contrib-libs-cxxsupp + yutil + api-protos-persqueue-deprecated +) +target_sources(persqueue-deprecated-read_batch_converter PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.cpp +) diff --git a/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.cpp b/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.cpp new file mode 100644 index 0000000000..bca03dc72f --- /dev/null +++ b/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.cpp @@ -0,0 +1,43 @@ +#include "read_batch_converter.h" + +namespace NPersQueue { + +static void Convert(const ReadResponse::BatchedData::PartitionData& partition, ReadResponse::Data::MessageBatch* dstBatch) { + dstBatch->set_topic(partition.topic()); + dstBatch->set_partition(partition.partition()); + for (const ReadResponse::BatchedData::Batch& batch : partition.batch()) { + for (const ReadResponse::BatchedData::MessageData& message : batch.message_data()) { + ReadResponse::Data::Message* const dstMessage = dstBatch->add_message(); + dstMessage->set_data(message.data()); + dstMessage->set_offset(message.offset()); + + MessageMeta* const meta = dstMessage->mutable_meta(); + meta->set_source_id(batch.source_id()); + meta->set_seq_no(message.seq_no()); + meta->set_create_time_ms(message.create_time_ms()); + meta->set_write_time_ms(batch.write_time_ms()); + meta->set_codec(message.codec()); + meta->set_ip(batch.ip()); + meta->set_uncompressed_size(message.uncompressed_size()); + if (batch.has_extra_fields()) { + *meta->mutable_extra_fields() = batch.extra_fields(); + } + } + } +} + +void ConvertToOldBatch(ReadResponse& response) { + if (!response.has_batched_data()) { + return; + } + ReadResponse::BatchedData data; + data.Swap(response.mutable_batched_data()); + + ReadResponse::Data& dstData = *response.mutable_data(); // this call will clear BatchedData field + dstData.set_cookie(data.cookie()); + for (const ReadResponse::BatchedData::PartitionData& partition : data.partition_data()) { + Convert(partition, dstData.add_message_batch()); + } +} + +} // namespace NPersQueue diff --git a/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.h b/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.h new file mode 100644 index 0000000000..304bd8baf9 --- /dev/null +++ b/ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.h @@ -0,0 +1,10 @@ +#pragma once +#include <ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h> + +namespace NPersQueue { + +// Converts responses with BatchedData field to responses with Data field. +// Other responses will be leaved unchanged. +void ConvertToOldBatch(ReadResponse& response); + +} // namespace NPersQueue diff --git a/ydb/library/persqueue/deprecated/read_batch_converter/ya.make b/ydb/library/persqueue/deprecated/read_batch_converter/ya.make new file mode 100644 index 0000000000..d82be01cac --- /dev/null +++ b/ydb/library/persqueue/deprecated/read_batch_converter/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +PEERDIR( + ydb/services/deprecated/persqueue_v0/api/protos +) + +SRCS( + read_batch_converter.cpp +) + +END() diff --git a/ydb/library/persqueue/deprecated/ya.make b/ydb/library/persqueue/deprecated/ya.make new file mode 100644 index 0000000000..2b38d75ff3 --- /dev/null +++ b/ydb/library/persqueue/deprecated/ya.make @@ -0,0 +1,4 @@ +RECURSE( + read_batch_converter +) + diff --git a/ydb/library/persqueue/ya.make b/ydb/library/persqueue/ya.make index 52f8dc401e..fc7884db77 100644 --- a/ydb/library/persqueue/ya.make +++ b/ydb/library/persqueue/ya.make @@ -1,7 +1,9 @@ RECURSE( counter_time_keeper + deprecated obfuscate tests topic_parser topic_parser_public + ) diff --git a/ydb/services/CMakeLists.txt b/ydb/services/CMakeLists.txt index 2073d2df19..a805f6f8f4 100644 --- a/ydb/services/CMakeLists.txt +++ b/ydb/services/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(auth) add_subdirectory(bg_tasks) add_subdirectory(cms) add_subdirectory(datastreams) +add_subdirectory(deprecated) add_subdirectory(discovery) add_subdirectory(dynamic_config) add_subdirectory(ext_index) diff --git a/ydb/services/deprecated/CMakeLists.txt b/ydb/services/deprecated/CMakeLists.txt new file mode 100644 index 0000000000..6238c6279e --- /dev/null +++ b/ydb/services/deprecated/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(persqueue_v0) diff --git a/ydb/services/deprecated/persqueue_v0/CMakeLists.darwin-x86_64.txt b/ydb/services/deprecated/persqueue_v0/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..8770c5018b --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,42 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(api) + +add_library(services-deprecated-persqueue_v0) +target_link_libraries(services-deprecated-persqueue_v0 PUBLIC + contrib-libs-cxxsupp + yutil + api-grpc-persqueue-deprecated + api-protos-persqueue-deprecated + persqueue-deprecated-read_batch_converter + ydb-core-base + core-tx-tx_proxy + core-client-server + ydb-core-grpc_services + core-mind-address_classification + ydb-core-persqueue + core-persqueue-events + core-persqueue-writer + ydb-core-protos + ydb-library-aclib + library-persqueue-topic_parser + services-lib-actors + services-lib-sharding + ydb-services-persqueue_v1 + ydb-services-metadata +) +target_sources(services-deprecated-persqueue_v0 PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_read.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_write.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_write_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/move_topic_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/persqueue.cpp +) diff --git a/ydb/services/deprecated/persqueue_v0/CMakeLists.linux-aarch64.txt b/ydb/services/deprecated/persqueue_v0/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..d3fc8c8c68 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/CMakeLists.linux-aarch64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(api) + +add_library(services-deprecated-persqueue_v0) +target_link_libraries(services-deprecated-persqueue_v0 PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + api-grpc-persqueue-deprecated + api-protos-persqueue-deprecated + persqueue-deprecated-read_batch_converter + ydb-core-base + core-tx-tx_proxy + core-client-server + ydb-core-grpc_services + core-mind-address_classification + ydb-core-persqueue + core-persqueue-events + core-persqueue-writer + ydb-core-protos + ydb-library-aclib + library-persqueue-topic_parser + services-lib-actors + services-lib-sharding + ydb-services-persqueue_v1 + ydb-services-metadata +) +target_sources(services-deprecated-persqueue_v0 PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_read.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_write.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_write_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/move_topic_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/persqueue.cpp +) diff --git a/ydb/services/deprecated/persqueue_v0/CMakeLists.linux-x86_64.txt b/ydb/services/deprecated/persqueue_v0/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..d3fc8c8c68 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/CMakeLists.linux-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(api) + +add_library(services-deprecated-persqueue_v0) +target_link_libraries(services-deprecated-persqueue_v0 PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + api-grpc-persqueue-deprecated + api-protos-persqueue-deprecated + persqueue-deprecated-read_batch_converter + ydb-core-base + core-tx-tx_proxy + core-client-server + ydb-core-grpc_services + core-mind-address_classification + ydb-core-persqueue + core-persqueue-events + core-persqueue-writer + ydb-core-protos + ydb-library-aclib + library-persqueue-topic_parser + services-lib-actors + services-lib-sharding + ydb-services-persqueue_v1 + ydb-services-metadata +) +target_sources(services-deprecated-persqueue_v0 PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_read.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_write.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_write_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/move_topic_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/persqueue.cpp +) diff --git a/ydb/services/deprecated/persqueue_v0/CMakeLists.txt b/ydb/services/deprecated/persqueue_v0/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/services/deprecated/persqueue_v0/CMakeLists.windows-x86_64.txt b/ydb/services/deprecated/persqueue_v0/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..8770c5018b --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/CMakeLists.windows-x86_64.txt @@ -0,0 +1,42 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(api) + +add_library(services-deprecated-persqueue_v0) +target_link_libraries(services-deprecated-persqueue_v0 PUBLIC + contrib-libs-cxxsupp + yutil + api-grpc-persqueue-deprecated + api-protos-persqueue-deprecated + persqueue-deprecated-read_batch_converter + ydb-core-base + core-tx-tx_proxy + core-client-server + ydb-core-grpc_services + core-mind-address_classification + ydb-core-persqueue + core-persqueue-events + core-persqueue-writer + ydb-core-protos + ydb-library-aclib + library-persqueue-topic_parser + services-lib-actors + services-lib-sharding + ydb-services-persqueue_v1 + ydb-services-metadata +) +target_sources(services-deprecated-persqueue_v0 PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_read.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_write.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/grpc_pq_write_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/move_topic_actor.cpp + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/persqueue.cpp +) diff --git a/ydb/services/deprecated/persqueue_v0/api/CMakeLists.txt b/ydb/services/deprecated/persqueue_v0/api/CMakeLists.txt new file mode 100644 index 0000000000..fefd854625 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/CMakeLists.txt @@ -0,0 +1,10 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(grpc) +add_subdirectory(protos) diff --git a/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.darwin-x86_64.txt b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..77bda3ed06 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,59 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_grpc_cpp_bin + TOOL_grpc_cpp_dependency + contrib/tools/protoc/plugins/grpc_cpp + grpc_cpp +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(api-grpc-persqueue-deprecated) +set_property(TARGET api-grpc-persqueue-deprecated PROPERTY + PROTOC_EXTRA_OUTS .grpc.pb.cc .grpc.pb.h +) +target_link_libraries(api-grpc-persqueue-deprecated PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-grpc + api-protos-persqueue-deprecated + api-protos + contrib-libs-protobuf +) +target_proto_messages(api-grpc-persqueue-deprecated PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.proto +) +target_proto_addincls(api-grpc-persqueue-deprecated + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(api-grpc-persqueue-deprecated + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +target_proto_plugin(api-grpc-persqueue-deprecated + grpc_cpp + grpc_cpp +) diff --git a/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.linux-aarch64.txt b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..42026a7c4d --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.linux-aarch64.txt @@ -0,0 +1,60 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_grpc_cpp_bin + TOOL_grpc_cpp_dependency + contrib/tools/protoc/plugins/grpc_cpp + grpc_cpp +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(api-grpc-persqueue-deprecated) +set_property(TARGET api-grpc-persqueue-deprecated PROPERTY + PROTOC_EXTRA_OUTS .grpc.pb.cc .grpc.pb.h +) +target_link_libraries(api-grpc-persqueue-deprecated PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-grpc + api-protos-persqueue-deprecated + api-protos + contrib-libs-protobuf +) +target_proto_messages(api-grpc-persqueue-deprecated PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.proto +) +target_proto_addincls(api-grpc-persqueue-deprecated + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(api-grpc-persqueue-deprecated + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +target_proto_plugin(api-grpc-persqueue-deprecated + grpc_cpp + grpc_cpp +) diff --git a/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.linux-x86_64.txt b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..42026a7c4d --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.linux-x86_64.txt @@ -0,0 +1,60 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_grpc_cpp_bin + TOOL_grpc_cpp_dependency + contrib/tools/protoc/plugins/grpc_cpp + grpc_cpp +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(api-grpc-persqueue-deprecated) +set_property(TARGET api-grpc-persqueue-deprecated PROPERTY + PROTOC_EXTRA_OUTS .grpc.pb.cc .grpc.pb.h +) +target_link_libraries(api-grpc-persqueue-deprecated PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-grpc + api-protos-persqueue-deprecated + api-protos + contrib-libs-protobuf +) +target_proto_messages(api-grpc-persqueue-deprecated PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.proto +) +target_proto_addincls(api-grpc-persqueue-deprecated + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(api-grpc-persqueue-deprecated + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +target_proto_plugin(api-grpc-persqueue-deprecated + grpc_cpp + grpc_cpp +) diff --git a/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.txt b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.windows-x86_64.txt b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..77bda3ed06 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/grpc/CMakeLists.windows-x86_64.txt @@ -0,0 +1,59 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_grpc_cpp_bin + TOOL_grpc_cpp_dependency + contrib/tools/protoc/plugins/grpc_cpp + grpc_cpp +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(api-grpc-persqueue-deprecated) +set_property(TARGET api-grpc-persqueue-deprecated PROPERTY + PROTOC_EXTRA_OUTS .grpc.pb.cc .grpc.pb.h +) +target_link_libraries(api-grpc-persqueue-deprecated PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-grpc + api-protos-persqueue-deprecated + api-protos + contrib-libs-protobuf +) +target_proto_messages(api-grpc-persqueue-deprecated PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.proto +) +target_proto_addincls(api-grpc-persqueue-deprecated + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(api-grpc-persqueue-deprecated + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +target_proto_plugin(api-grpc-persqueue-deprecated + grpc_cpp + grpc_cpp +) diff --git a/ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.proto b/ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.proto new file mode 100644 index 0000000000..4e109304db --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.proto @@ -0,0 +1,69 @@ +syntax = "proto3"; + +package NPersQueue; + +option java_package = "com.yandex.persqueue"; +option java_outer_classname = "PersqueueGrpc"; + +import "ydb/services/deprecated/persqueue_v0/api/protos/persqueue.proto"; + +service PersQueueService { + + /** + * Creates Write Session + * Pipeline: + * client server + * Init(Topic, SourceId, ...) + * ----------------> + * Init(Partition, MaxSeqNo, ...) + * <---------------- + * write(data1, seqNo1) + * ----------------> + * write(data2, seqNo2) + * ----------------> + * ack(seqNo1, offset1, ...) + * <---------------- + * write(data3, seqNo3) + * ----------------> + * ack(seqNo2, offset2, ...) + * <---------------- + * error(description, errorCode) + * <---------------- + */ + + rpc WriteSession(stream WriteRequest) returns (stream WriteResponse); + + /** + * Creates Read Session + * Pipeline: + * client server + * Init(Topics, ClientId, ...) + * ----------------> + * Init(SessionId) + * <---------------- + * read1 + * ----------------> + * read2 + * ----------------> + * lock(Topic1,Partition1, ...) - locks and releases are optional + * <---------------- + * lock(Topic2, Partition2, ...) + * <---------------- + * release(Topic1, Partition1, ...) + * <---------------- + * locked(Topic2, Partition2, ...) - client must respond to lock request with this message. Only after this client will start recieving messages from this partition + * ----------------> + * read result(data, ...) + * <---------------- + * commit(cookie1) + * ----------------> + * commit result(cookie1) + * <---------------- + * error(description, errorCode) + * <---------------- + */ + + rpc ReadSession(stream ReadRequest) returns (stream ReadResponse); + + rpc MoveTopic(MoveTopicRequest) returns (MoveTopicResponse); +} diff --git a/ydb/services/deprecated/persqueue_v0/api/grpc/ya.make b/ydb/services/deprecated/persqueue_v0/api/grpc/ya.make new file mode 100644 index 0000000000..3ab4d662ed --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/grpc/ya.make @@ -0,0 +1,18 @@ +PROTO_LIBRARY(api-grpc-persqueue-deprecated) + +MAVEN_GROUP_ID(com.yandex.ydb) + +GRPC() + +SRCS( + persqueue.proto +) + +PEERDIR( + ydb/services/deprecated/persqueue_v0/api/protos + ydb/public/api/protos +) + +EXCLUDE_TAGS(GO_PROTO) + +END() diff --git a/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.darwin-x86_64.txt b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..14b1c5308a --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(api-protos-persqueue-deprecated) +target_link_libraries(api-protos-persqueue-deprecated PUBLIC + contrib-libs-cxxsupp + yutil + api-protos + tools-enum_parser-enum_serialization_runtime + contrib-libs-protobuf +) +target_proto_messages(api-protos-persqueue-deprecated PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.proto +) +generate_enum_serilization(api-protos-persqueue-deprecated + ${CMAKE_BINARY_DIR}/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h + INCLUDE_HEADERS + ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h +) +target_proto_addincls(api-protos-persqueue-deprecated + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(api-protos-persqueue-deprecated + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.linux-aarch64.txt b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..2f0a5f943c --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.linux-aarch64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(api-protos-persqueue-deprecated) +target_link_libraries(api-protos-persqueue-deprecated PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + api-protos + tools-enum_parser-enum_serialization_runtime + contrib-libs-protobuf +) +target_proto_messages(api-protos-persqueue-deprecated PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.proto +) +generate_enum_serilization(api-protos-persqueue-deprecated + ${CMAKE_BINARY_DIR}/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h + INCLUDE_HEADERS + ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h +) +target_proto_addincls(api-protos-persqueue-deprecated + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(api-protos-persqueue-deprecated + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.linux-x86_64.txt b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..2f0a5f943c --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.linux-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(api-protos-persqueue-deprecated) +target_link_libraries(api-protos-persqueue-deprecated PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + api-protos + tools-enum_parser-enum_serialization_runtime + contrib-libs-protobuf +) +target_proto_messages(api-protos-persqueue-deprecated PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.proto +) +generate_enum_serilization(api-protos-persqueue-deprecated + ${CMAKE_BINARY_DIR}/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h + INCLUDE_HEADERS + ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h +) +target_proto_addincls(api-protos-persqueue-deprecated + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(api-protos-persqueue-deprecated + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.txt b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.windows-x86_64.txt b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..14b1c5308a --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/protos/CMakeLists.windows-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(api-protos-persqueue-deprecated) +target_link_libraries(api-protos-persqueue-deprecated PUBLIC + contrib-libs-cxxsupp + yutil + api-protos + tools-enum_parser-enum_serialization_runtime + contrib-libs-protobuf +) +target_proto_messages(api-protos-persqueue-deprecated PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.proto +) +generate_enum_serilization(api-protos-persqueue-deprecated + ${CMAKE_BINARY_DIR}/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h + INCLUDE_HEADERS + ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h +) +target_proto_addincls(api-protos-persqueue-deprecated + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(api-protos-persqueue-deprecated + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.proto b/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.proto new file mode 100644 index 0000000000..57954cd543 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/protos/persqueue.proto @@ -0,0 +1,350 @@ +syntax = "proto3"; +import "google/protobuf/descriptor.proto"; +import "ydb/public/api/protos/draft/persqueue_common.proto"; +import "ydb/public/api/protos/ydb_status_codes.proto"; +import "ydb/public/api/protos/ydb_issue_message.proto"; + +package NPersQueue; + +option java_package = "com.yandex.ydb.persqueue"; +option cc_enable_arenas = true; + +extend google.protobuf.FileOptions { + bool GenerateYaStyle = 66677; +} + +message Path { + // Path of object (topic/consumer). + string path = 1; +} + +// WRITE REQUEST + +message KeyValue { + string key = 1; + string value = 2; +} + +message MapType { + repeated KeyValue items = 1; +} + +/** + * Request for write session. Contains one of : + * Init - consists of initialization info - Topic, SourceId and so on + * Data - data to be writen + * DataBatch - batch of data to be written + */ +message WriteRequest { + message Init { + string topic = 1; + bytes source_id = 2; + + MapType extra_fields = 7; //server and file inside here + + uint64 proxy_cookie = 8; //cookie provided by ChooseProxy request //change to bytes + + uint32 partition_group = 12; //Group to write to - 0 means any; + + string version = 999; //must be filled by client lib + } + + message Data { + uint64 seq_no = 1; + bytes data = 2; + uint64 create_time_ms = 3; //timestamp in ms + NPersQueueCommon.ECodec codec = 4; + uint32 uncompressed_size = 5; + } + + message DataBatch { + repeated Data data = 1; + } + + oneof request { + //init must be sent as first message + Init init = 1; + Data data = 2; + DataBatch data_batch = 3; + } + + NPersQueueCommon.Credentials credentials = 20; +} + +/** + * Response for write session. Contains one of : + * Error - in any error state - grpc errors, session dies, incorrect Init request and so on + * Init - contains SessionId of created session, MaxSeqNo and Partition + * Ack - acknowlegment of storing corresponding message + * AckBatch - acknowlegment of storing corresponding message batch + */ +message WriteResponse { + message Init { + uint64 max_seq_no = 1; + string session_id = 2; + uint32 partition = 3; + string topic = 4; + } + + message Stat { + uint32 write_time_ms = 1; + uint32 total_time_in_partition_queue_ms = 2; + uint32 partition_quoted_time_ms = 3; + uint32 topic_quoted_time_ms = 4; + } + + message Ack { + uint64 seq_no = 1; + uint64 offset = 2; + bool already_written = 3; + + Stat stat = 4; //not filled in batch case + } + + message AckBatch { + Stat stat = 2; //common statistics for batch storing + + repeated Ack ack = 1; + } + + oneof response { + Init init = 1; + Ack ack = 2; + AckBatch ack_batch = 4; + NPersQueueCommon.Error error = 3; + } +} + +// READ REQUEST + +/** + * Request for read session. Contains one of : + * Init - contains of Topics to be readed, ClientId and other metadata + * Read - request for read batch. Contains of restrictments for result - MaxSize, MaxCount and so on + * Commit - request for commit some read batches. Contains corresponding cookies + * Locked - comfirming to server that client is ready to get data from partition from concreet offset + */ +message ReadRequest { + enum EProtocolVersion { + Base = 0; // Base protocol version + Batching = 1; // Client supports more effective batching structs (TBatchedData instead of TData) + ReadParamsInInit = 2; // Client sets read params in Init request + } + + message Init { + repeated string topics = 1; + bool read_only_local = 2; // ReadOnlyLocal=false - read mirrored topics from other clusters too; will be renamed to read_only_original + + string client_id = 4; + bool clientside_locks_allowed = 5; //if true then partitions Lock signal will be sent from server, + //and reads from partitions will began only after Locked signal recieved by server from client + + uint64 proxy_cookie = 6; //cookie provided by ChooseProxy request + + bool balance_partition_right_now = 8; //if set then do not wait for commits from client on data from partition in case of balancing + + repeated uint32 partition_groups = 9; //Groups to be read - if empty then read from all of them + + uint32 idle_timeout_sec = 10; //TODO: do we need it? + + uint32 commit_interval_ms = 12; // How often server must commit data. If client sends commits faster, + // then server will hold them in order to archive corresponding rate; zero means server default = 1sec + + // Read request params + uint32 max_read_messages_count = 14; // Max messages to give to client in one read request + uint32 max_read_size = 15; // Max size in bytes to give to client in one read request + uint32 max_read_partitions_count = 16; // 0 means not matters // Maximum partitions count to give to client in one read request + uint32 max_time_lag_ms = 17; // Read data only with time lag less than or equal to specified + uint64 read_timestamp_ms = 18; // Read data only after this timestamp + + bool commits_disabled = 19; // Client will never commit + + string version = 999; //must be filled by client lib + + // Protocol version to let server know about new features that client supports + uint32 protocol_version = 13; // version must be integer (not enum) because client may be newer than server + } + + message Read { + // It is not allowed to change these parameters. + // They will be removed in future from TRead structure. + uint32 max_count = 1; + uint32 max_size = 2; + uint32 partitions_at_once = 3; //0 means not matters + uint32 max_time_lag_ms = 5; + uint64 read_timestamp_ms = 6; //read data only after this timestamp + } + + message StartRead { + string topic = 1; + uint32 partition = 2; + + uint64 read_offset = 3; //skip upto this position; if committed position is bigger, then do nothing + bool verify_read_offset = 4; //if true then check that committed position is <= ReadOffset; otherwise it means error in client logic + uint64 generation = 5; + uint64 commit_offset = 6; //all messages BEFORE this position are processed by client + } + + message Commit { + repeated uint64 cookie = 1; + } + + message Status { + uint64 generation = 1; + string topic = 2; + uint32 partition = 3; + } + + oneof request { + //init must be sent as first message + Init init = 1; + Read read = 2; + StartRead start_read = 3; + Commit commit = 4; + Status status = 5; + } + + NPersQueueCommon.Credentials credentials = 20; +} + + +message MessageMeta { + bytes source_id = 1; + uint64 seq_no = 2; + uint64 create_time_ms = 3; + uint64 write_time_ms = 4; + + MapType extra_fields = 7; + NPersQueueCommon.ECodec codec = 8; + string ip = 9; + uint32 uncompressed_size = 10; +} + +/** + * Response for read session. Contains one of : + * Error - in any error state - grpc errors, session dies, incorrect Init request and so on + * Init - contains SessionId of created session + * Data - result of read, contains of messages batch and cookie + * Commit - acknowlegment for commit + * Lock - informs client that server is ready to read data from corresponding partition + * Release - informs client that server will not get data from this partition in future read results, unless other Lock-Locked conversation will be done + */ + +message ReadResponse { + message Init { + string session_id = 2; //for debug only + } + + message Data { + message Message { + MessageMeta meta = 1; //SeqNo ... + bytes data = 2; + //unique value for clientside deduplication - Topic:Partition:Offset + uint64 offset = 3; + bytes broken_packed_data = 4; // TODO: move to pqlib + } + + message MessageBatch { + string topic = 1; + uint32 partition = 2; + repeated Message message = 3; + } + + repeated MessageBatch message_batch = 1; + uint64 cookie = 2; //Cookie to be committed by server + } + + message BatchedData { + message MessageData { + NPersQueueCommon.ECodec codec = 2; + + uint64 offset = 3; //unique value for clientside deduplication - Topic:Partition:Offset + uint64 seq_no = 4; + + uint64 create_time_ms = 5; + uint64 uncompressed_size = 6; + + bytes data = 1; + } + + message Batch { + bytes source_id = 2; + MapType extra_fields = 3; + uint64 write_time_ms = 4; + string ip = 5; + + repeated MessageData message_data = 1; + } + + message PartitionData { + string topic = 2; + uint32 partition = 3; + + repeated Batch batch = 1; + } + + uint64 cookie = 2; //Cookie to be committed by server + + repeated PartitionData partition_data = 1; //not greater than one PartitionData for each partition + } + + message Lock { + string topic = 1; + uint32 partition = 2; + + uint64 read_offset = 3; //offset to read from + uint64 end_offset = 4; //know till this time end offset + uint64 generation = 5; + } + + message Release { + string topic = 1; + uint32 partition = 2; + bool can_commit = 3; //if CanCommit=false then you can not store progress of processing data for that partition at server; + //all commits will have no effect for this partition + //if you rely on committing offsets then just drop all data for this partition without processing - another session will get them later + //if CanCommit=true and you are relying on committing offsets - you can process all data for this partition you got, + //commit cookies and be sure that no other session will ever get this data + uint64 generation = 4; + } + + message Commit { + repeated uint64 cookie = 1; //for debug purposes only + } + + // Response for status requst. + message PartitionStatus { + uint64 generation = 1; + string topic = 2; + uint32 partition = 3; + + uint64 committed_offset = 4; + uint64 end_offset = 5; + uint64 write_watermark_ms = 6; + } + + oneof response { + Init init = 1; + Data data = 2; + BatchedData batched_data = 7; + NPersQueueCommon.Error error = 3; + Lock lock = 4; + Release release = 5; + Commit commit = 6; + PartitionStatus partition_status = 8; + } +} + +message MoveTopicRequest { + string source_path = 1; + string destination_path = 2; + string token = 3; + bool do_not_deallocate = 4; + bool skip_destination_check = 5; +} + +message MoveTopicResponse { + Ydb.StatusIds.StatusCode status = 1; + repeated Ydb.Issue.IssueMessage issues = 2; +} + diff --git a/ydb/services/deprecated/persqueue_v0/api/protos/ya.make b/ydb/services/deprecated/persqueue_v0/api/protos/ya.make new file mode 100644 index 0000000000..307dfd75f3 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/protos/ya.make @@ -0,0 +1,17 @@ +PROTO_LIBRARY(api-protos-persqueue-deprecated) + +MAVEN_GROUP_ID(com.yandex.ydb) + +PEERDIR( + ydb/public/api/protos +) + +SRCS( + persqueue.proto +) + +EXCLUDE_TAGS(GO_PROTO) + +GENERATE_ENUM_SERIALIZATION(persqueue.pb.h) + +END() diff --git a/ydb/services/deprecated/persqueue_v0/api/ya.make b/ydb/services/deprecated/persqueue_v0/api/ya.make new file mode 100644 index 0000000000..9e2772ca77 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/api/ya.make @@ -0,0 +1,4 @@ +RECURSE( + grpc + protos +) diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_actor.h b/ydb/services/deprecated/persqueue_v0/grpc_pq_actor.h new file mode 100644 index 0000000000..1801779b26 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_actor.h @@ -0,0 +1,943 @@ +#pragma once + +#include "grpc_pq_session.h" +#include "ydb/core/client/server/msgbus_server_pq_metacache.h" +#include "ydb/core/client/server/msgbus_server_persqueue.h" + +#include <ydb/core/base/events.h> +#include <ydb/core/tx/scheme_cache/scheme_cache.h> +#include <ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.grpc.pb.h> + +#include <ydb/core/protos/grpc_pq_old.pb.h> +#include <ydb/core/protos/pqconfig.pb.h> +#include <ydb/core/persqueue/writer/source_id_encoding.h> + +#include <library/cpp/actors/core/actor_bootstrapped.h> + +#include <library/cpp/actors/core/hfunc.h> + +#include <ydb/library/persqueue/topic_parser/topic_parser.h> +#include <ydb/core/persqueue/events/global.h> +#include <ydb/core/persqueue/writer/writer.h> +#include <ydb/core/persqueue/percentile_counter.h> +#include <ydb/core/base/appdata.h> +#include <ydb/core/base/tablet_pipe.h> +#include <ydb/core/tx/tx_proxy/proxy.h> +#include <ydb/public/lib/base/msgbus_status.h> +#include <ydb/core/kqp/common/kqp.h> + +#include <ydb/core/base/ticket_parser.h> +#include <ydb/services/lib/actors/type_definitions.h> +#include <ydb/services/persqueue_v1/actors/read_init_auth_actor.h> +#include <ydb/services/persqueue_v1/actors/read_session_actor.h> +#include <ydb/services/persqueue_v1/actors/persqueue_utils.h> +#include <ydb/services/metadata/service.h> + +#include <util/generic/guid.h> +#include <util/system/compiler.h> + +namespace NKikimr { +namespace NGRpcProxy { + + + + + +static inline bool InternalErrorCode(NPersQueue::NErrorCode::EErrorCode errorCode) { + switch(errorCode) { + case NPersQueue::NErrorCode::UNKNOWN_TOPIC: + case NPersQueue::NErrorCode::ERROR: + case NPersQueue::NErrorCode::INITIALIZING: + case NPersQueue::NErrorCode::OVERLOAD: + case NPersQueue::NErrorCode::WRITE_ERROR_DISK_IS_FULL: + return true; + default: + return false; + } + return false; +} + + + +Ydb::StatusIds::StatusCode ConvertPersQueueInternalCodeToStatus(const NPersQueue::NErrorCode::EErrorCode code); +void FillIssue(Ydb::Issue::IssueMessage* issue, const NPersQueue::NErrorCode::EErrorCode errorCode, const TString& errorReason); + +using IWriteSessionHandlerRef = TIntrusivePtr<ISessionHandler<NPersQueue::TWriteResponse>>; +using IReadSessionHandlerRef = TIntrusivePtr<ISessionHandler<NPersQueue::TReadResponse>>; + +const TString& LocalDCPrefix(); +const TString& MirroredDCPrefix(); + +constexpr ui64 MAGIC_COOKIE_VALUE = 123456789; + +static const TDuration CHECK_ACL_DELAY = TDuration::Minutes(5); + +struct TEvPQProxy { + enum EEv { + EvWriteInit = EventSpaceBegin(TKikimrEvents::ES_PQ_PROXY), + EvWrite, + EvDone, + EvReadInit, + EvRead, + EvCloseSession, + EvPartitionReady, + EvReadResponse, + EvCommit, + EvCommitDone, + EvLocked, + EvReleasePartition, + EvPartitionReleased, + EvLockPartition, + EvRestartPipe, + EvDieCommand, + EvPartitionStatus, + EvAuth, + EvReadSessionStatus, + EvReadSessionStatusResponse, + EvDeadlineExceeded, + EvGetStatus, + EvWriteDone, + EvMoveTopic, + EvEnd, + }; + + struct TEvReadSessionStatus : public TEventPB<TEvReadSessionStatus, NKikimrPQ::TReadSessionStatus, EvReadSessionStatus> { + }; + + struct TEvReadSessionStatusResponse : public TEventPB<TEvReadSessionStatusResponse, NKikimrPQ::TReadSessionStatusResponse, EvReadSessionStatusResponse> { + }; + + + + struct TEvWriteInit : public NActors::TEventLocal<TEvWriteInit, EvWriteInit> { + TEvWriteInit(const NPersQueue::TWriteRequest& req, const TString& peerName, const TString& database) + : Request(req) + , PeerName(peerName) + , Database(database) + { } + + NPersQueue::TWriteRequest Request; + TString PeerName; + TString Database; + }; + + struct TEvWrite : public NActors::TEventLocal<TEvWrite, EvWrite> { + explicit TEvWrite(const NPersQueue::TWriteRequest& req) + : Request(req) + { } + + NPersQueue::TWriteRequest Request; + }; + + struct TEvDone : public NActors::TEventLocal<TEvDone, EvDone> { + TEvDone() + { } + }; + + struct TEvWriteDone : public NActors::TEventLocal<TEvWriteDone, EvWriteDone> { + TEvWriteDone(ui64 size) + : Size(size) + { } + + ui64 Size; + }; + + struct TEvReadInit : public NActors::TEventLocal<TEvReadInit, EvReadInit> { + TEvReadInit(const NPersQueue::TReadRequest& req, const TString& peerName, const TString& database) + : Request(req) + , PeerName(peerName) + , Database(database) + { } + + NPersQueue::TReadRequest Request; + TString PeerName; + TString Database; + }; + + struct TEvRead : public NActors::TEventLocal<TEvRead, EvRead> { + explicit TEvRead(const NPersQueue::TReadRequest& req, const TString& guid = CreateGuidAsString()) + : Request(req) + , Guid(guid) + { } + + NPersQueue::TReadRequest Request; + const TString Guid; + }; + struct TEvCloseSession : public NActors::TEventLocal<TEvCloseSession, EvCloseSession> { + TEvCloseSession(const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) + : Reason(reason) + , ErrorCode(errorCode) + { } + + const TString Reason; + NPersQueue::NErrorCode::EErrorCode ErrorCode; + }; + + struct TEvPartitionReady : public NActors::TEventLocal<TEvPartitionReady, EvPartitionReady> { + TEvPartitionReady(const NPersQueue::TTopicConverterPtr& topic, const ui32 partition, const ui64 wTime, const ui64 sizeLag, + const ui64 readOffset, const ui64 endOffset) + : Topic(topic) + , Partition(partition) + , WTime(wTime) + , SizeLag(sizeLag) + , ReadOffset(readOffset) + , EndOffset(endOffset) + { } + + NPersQueue::TTopicConverterPtr Topic; + ui32 Partition; + ui64 WTime; + ui64 SizeLag; + ui64 ReadOffset; + ui64 EndOffset; + }; + + struct TEvReadResponse : public NActors::TEventLocal<TEvReadResponse, EvReadResponse> { + explicit TEvReadResponse( + NPersQueue::TReadResponse&& resp, + ui64 nextReadOffset, + bool fromDisk, + TDuration waitQuotaTime + ) + : Response(std::move(resp)) + , NextReadOffset(nextReadOffset) + , FromDisk(fromDisk) + , WaitQuotaTime(waitQuotaTime) + { } + + NPersQueue::TReadResponse Response; + ui64 NextReadOffset; + bool FromDisk; + TDuration WaitQuotaTime; + }; + + struct TEvCommit : public NActors::TEventLocal<TEvCommit, EvCommit> { + explicit TEvCommit(ui64 readId, ui64 offset = Max<ui64>()) + : ReadId(readId) + , Offset(offset) + { } + + ui64 ReadId; + ui64 Offset; // Actual value for requests to concreete partitions + }; + + struct TEvAuth : public NActors::TEventLocal<TEvAuth, EvAuth> { + TEvAuth(const NPersQueueCommon::TCredentials& auth) + : Auth(auth) + { } + + NPersQueueCommon::TCredentials Auth; + }; + + struct TEvLocked : public NActors::TEventLocal<TEvLocked, EvLocked> { + TEvLocked(const TString& topic, ui32 partition, ui64 readOffset, ui64 commitOffset, bool verifyReadOffset, ui64 generation) + : Topic(topic) + , Partition(partition) + , ReadOffset(readOffset) + , CommitOffset(commitOffset) + , VerifyReadOffset(verifyReadOffset) + , Generation(generation) + { } + + TString Topic; + ui32 Partition; + ui64 ReadOffset; + ui64 CommitOffset; + bool VerifyReadOffset; + ui64 Generation; + }; + + struct TEvGetStatus : public NActors::TEventLocal<TEvGetStatus, EvGetStatus> { + TEvGetStatus(const TString& topic, ui32 partition, ui64 generation) + : Topic(topic) + , Partition(partition) + , Generation(generation) + { } + + TString Topic; + ui32 Partition; + ui64 Generation; + }; + + + + struct TEvCommitDone : public NActors::TEventLocal<TEvCommitDone, EvCommitDone> { + TEvCommitDone(ui64 readId, const NPersQueue::TTopicConverterPtr& topic, const ui32 partition) + : ReadId(readId) + , Topic(topic) + , Partition(partition) + { } + + ui64 ReadId; + NPersQueue::TTopicConverterPtr Topic; + ui32 Partition; + }; + + struct TEvReleasePartition : public NActors::TEventLocal<TEvReleasePartition, EvReleasePartition> { + TEvReleasePartition() + { } + }; + + struct TEvLockPartition : public NActors::TEventLocal<TEvLockPartition, EvLockPartition> { + explicit TEvLockPartition(const ui64 readOffset, const ui64 commitOffset, bool verifyReadOffset, bool startReading) + : ReadOffset(readOffset) + , CommitOffset(commitOffset) + , VerifyReadOffset(verifyReadOffset) + , StartReading(startReading) + { } + + ui64 ReadOffset; + ui64 CommitOffset; + bool VerifyReadOffset; + bool StartReading; + }; + + + struct TEvPartitionReleased : public NActors::TEventLocal<TEvPartitionReleased, EvPartitionReleased> { + TEvPartitionReleased(const NPersQueue::TTopicConverterPtr& topic, const ui32 partition) + : Topic(topic) + , Partition(partition) + { } + + NPersQueue::TTopicConverterPtr Topic; + ui32 Partition; + }; + + + struct TEvRestartPipe : public NActors::TEventLocal<TEvRestartPipe, EvRestartPipe> { + TEvRestartPipe() + { } + }; + + struct TEvDeadlineExceeded : public NActors::TEventLocal<TEvDeadlineExceeded, EvDeadlineExceeded> { + TEvDeadlineExceeded(ui64 cookie) + : Cookie(cookie) + { } + + ui64 Cookie; + }; + + + struct TEvDieCommand : public NActors::TEventLocal<TEvDieCommand, EvDieCommand> { + TEvDieCommand(const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) + : Reason(reason) + , ErrorCode(errorCode) + { } + + TString Reason; + NPersQueue::NErrorCode::EErrorCode ErrorCode; + }; + + struct TEvPartitionStatus : public NActors::TEventLocal<TEvPartitionStatus, EvPartitionStatus> { + TEvPartitionStatus(const NPersQueue::TTopicConverterPtr& topic, const ui32 partition, const ui64 offset, + const ui64 endOffset, ui64 writeTimestampEstimateMs, bool init = true) + : Topic(topic) + , Partition(partition) + , Offset(offset) + , EndOffset(endOffset) + , WriteTimestampEstimateMs(writeTimestampEstimateMs) + , Init(init) + { } + + NPersQueue::TTopicConverterPtr Topic; + ui32 Partition; + ui64 Offset; + ui64 EndOffset; + ui64 WriteTimestampEstimateMs; + bool Init; + }; + + + struct TEvMoveTopic : public NActors::TEventLocal<TEvMoveTopic, EvMoveTopic> { + TEvMoveTopic(const TString& sourcePath, const TString& destinationPath) + : SourcePath(sourcePath) + , DestinationPath(destinationPath) + { } + + TString SourcePath; + TString DestinationPath; + }; +}; + + + +/// WRITE ACTOR +class TWriteSessionActor : public NActors::TActorBootstrapped<TWriteSessionActor> { + using TEvDescribeTopicsRequest = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsRequest; + using TEvDescribeTopicsResponse = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsResponse; + using TPQGroupInfoPtr = TIntrusiveConstPtr<NSchemeCache::TSchemeCacheNavigate::TPQGroupInfo>; +public: + TWriteSessionActor(IWriteSessionHandlerRef handler, const ui64 cookie, const NActors::TActorId& schemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const TString& localDC, + const TMaybe<TString> clientDC); + ~TWriteSessionActor(); + + void Bootstrap(const NActors::TActorContext& ctx); + + void Die(const NActors::TActorContext& ctx) override; + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_WRITE; } +private: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup) + + HFunc(TEvTicketParser::TEvAuthorizeTicketResult, Handle); + + HFunc(TEvPQProxy::TEvDieCommand, HandlePoison) + HFunc(TEvPQProxy::TEvWriteInit, Handle) + HFunc(TEvPQProxy::TEvWrite, Handle) + HFunc(TEvPQProxy::TEvDone, Handle) + HFunc(TEvPersQueue::TEvGetPartitionIdForWriteResponse, Handle) + + HFunc(TEvDescribeTopicsResponse, Handle); + + HFunc(NPQ::TEvPartitionWriter::TEvInitResult, Handle); + HFunc(NPQ::TEvPartitionWriter::TEvWriteAccepted, Handle); + HFunc(NPQ::TEvPartitionWriter::TEvWriteResponse, Handle); + HFunc(NPQ::TEvPartitionWriter::TEvDisconnected, Handle); + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + + HFunc(NKqp::TEvKqp::TEvQueryResponse, Handle); + HFunc(NKqp::TEvKqp::TEvProcessResponse, Handle); + HFunc(NKqp::TEvKqp::TEvCreateSessionResponse, Handle); + HFunc(NMetadata::NProvider::TEvManagerPrepared, Handle); + + default: + break; + }; + } + + void Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr &ev, const TActorContext &ctx); + void Handle(NKqp::TEvKqp::TEvProcessResponse::TPtr &ev, const TActorContext &ctx); + void Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr &ev, const NActors::TActorContext& ctx); + void TryCloseSession(const TActorContext& ctx); + + TString CheckSupportedCodec(const ui32 codecId); + void CheckACL(const TActorContext& ctx); + void InitCheckACL(const TActorContext& ctx); + void Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvWrite::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvDone::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvGetPartitionIdForWriteResponse::TPtr& ev, const NActors::TActorContext& ctx); + + void LogSession(const TActorContext& ctx); + + void InitAfterDiscovery(const TActorContext& ctx); + void DiscoverPartition(const NActors::TActorContext& ctx); + TString GetDatabaseName(const NActors::TActorContext& ctx); + void StartSession(const NActors::TActorContext& ctx); + void SendCreateManagerRequest(const TActorContext& ctx); + + void SendSelectPartitionRequest(const TString& topic, const NActors::TActorContext& ctx); + void UpdatePartition(const NActors::TActorContext& ctx); + void RequestNextPartition(const NActors::TActorContext& ctx); + void ProceedPartition(const ui32 partition, const NActors::TActorContext& ctx); + + THolder<NKqp::TEvKqp::TEvQueryRequest> MakeUpdateSourceIdMetadataRequest(const TString& topic, + const TActorContext& ctx); + + + void Handle(TEvDescribeTopicsResponse::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(NPQ::TEvPartitionWriter::TEvInitResult::TPtr& ev, const TActorContext& ctx); + void Handle(NPQ::TEvPartitionWriter::TEvWriteAccepted::TPtr& ev, const TActorContext& ctx); + void Handle(NPQ::TEvPartitionWriter::TEvWriteResponse::TPtr& ev, const TActorContext& ctx); + void Handle(NPQ::TEvPartitionWriter::TEvDisconnected::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(NMetadata::NProvider::TEvManagerPrepared::TPtr &ev, const NActors::TActorContext& ctx); + + void HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const NActors::TActorContext& ctx); + void HandleWakeup(const NActors::TActorContext& ctx); + + void CloseSession(const TString& errorReason, const NPersQueue::NErrorCode::EErrorCode errorCode, const NActors::TActorContext& ctx); + + void CheckFinish(const NActors::TActorContext& ctx); + + void GenerateNextWriteRequest(const NActors::TActorContext& ctx); + + void SetupCounters(); + void SetupCounters(const TString& cloudId, const TString& dbId, const TString& dbPath, + bool isServerless, const TString& folderId); + + +private: + IWriteSessionHandlerRef Handler; + + enum EState { + ES_CREATED = 1, + ES_WAIT_SCHEME = 2, + ES_WAIT_SESSION = 3, + ES_WAIT_TABLE_REQUEST_1 = 4, + ES_WAIT_NEXT_PARTITION = 5, + ES_WAIT_TABLE_REQUEST_2 = 6, + ES_WAIT_WRITER_INIT = 7, + ES_INITED = 8, + ES_DYING = 9, + }; + + EState State; + TActorId SchemeCache; + TActorId Writer; + + TString PeerName; + TString Database; + ui64 Cookie; + + ui32 Partition; + bool PartitionFound = false; + ui32 PreferedPartition; + TString SourceId; + ui32 SelectReqsInflight = 0; + ui64 MaxSrcIdAccessTime = 0; + NPQ::NSourceIdEncoding::TEncodedSourceId EncodedSourceId; + TString OwnerCookie; + TString UserAgent; + + ui32 NumReserveBytesRequests; + + struct TWriteRequestBatchInfo: public TSimpleRefCount<TWriteRequestBatchInfo> { + using TPtr = TIntrusivePtr<TWriteRequestBatchInfo>; + + // Source requests from user (grpc session object) + std::deque<THolder<TEvPQProxy::TEvWrite>> UserWriteRequests; + + // Formed write request's size + ui64 ByteSize = 0; + + // Formed write request's cookie + ui64 Cookie = 0; + }; + + // Nonprocessed source client requests + std::deque<THolder<TEvPQProxy::TEvWrite>> Writes; + + // Formed, but not sent, batch requests to partition actor + std::deque<TWriteRequestBatchInfo::TPtr> FormedWrites; + + // Requests that is already sent to partition actor + std::deque<TWriteRequestBatchInfo::TPtr> SentMessages; + + bool WritesDone; + + THashMap<ui32, ui64> PartitionToTablet; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + NKikimr::NPQ::TMultiCounter BytesInflight; + NKikimr::NPQ::TMultiCounter BytesInflightTotal; + + ui64 BytesInflight_; + ui64 BytesInflightTotal_; + + bool NextRequestInited; + + NKikimr::NPQ::TMultiCounter SessionsCreated; + NKikimr::NPQ::TMultiCounter SessionsActive; + NKikimr::NPQ::TMultiCounter SessionsWithoutAuth; + + NKikimr::NPQ::TMultiCounter Errors; + std::vector<NKikimr::NPQ::TMultiCounter> CodecCounters; + ui64 NextRequestCookie; + + TIntrusiveConstPtr<NACLib::TUserToken> Token; + NPersQueueCommon::TCredentials Auth; + TString AuthStr; + bool ACLCheckInProgress; + bool FirstACLCheck; + bool ForceACLCheck; + bool RequestNotChecked; + TInstant LastACLCheckTimestamp; + TInstant LogSessionDeadline; + + ui64 BalancerTabletId; + TString DatabaseId; + TString FolderId; + TActorId PipeToBalancer; + TIntrusivePtr<TSecurityObject> SecurityObject; + TPQGroupInfoPtr PQInfo; + + NKikimrPQClient::TDataChunk InitMeta; + TString LocalDC; + TString ClientDC; + TString SelectSourceIdQuery; + TString UpdateSourceIdQuery; + TInstant LastSourceIdUpdate; + TString TxId; + TString KqpSessionId; + + ui64 SourceIdCreateTime = 0; + ui32 SourceIdUpdatesInflight = 0; + + + TVector<NPersQueue::TPQLabelsInfo> Aggr; + NKikimr::NPQ::TMultiCounter SLITotal; + NKikimr::NPQ::TMultiCounter SLIErrors; + TInstant StartTime; + NKikimr::NPQ::TPercentileCounter InitLatency; + NKikimr::NPQ::TMultiCounter SLIBigLatency; + + THolder<NPersQueue::TTopicNamesConverterFactory> ConverterFactory; + NPersQueue::TDiscoveryConverterPtr DiscoveryConverter; + NPersQueue::TTopicConverterPtr FullConverter; + + NPersQueue::TWriteRequest::TInit InitRequest; + NPQ::ESourceIdTableGeneration SrcIdTableGeneration; +}; + +class TReadSessionActor : public TActorBootstrapped<TReadSessionActor> { + using TEvDescribeTopicsRequest = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsRequest; + using TEvDescribeTopicsResponse = NMsgBusProxy::NPqMetaCacheV2::TEvPqNewMetaCache::TEvDescribeTopicsResponse; +public: + TReadSessionActor(IReadSessionHandlerRef handler, const NPersQueue::TTopicsListController& topicsHandler, const ui64 cookie, + const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const TMaybe<TString> clientDC); + ~TReadSessionActor(); + + void Bootstrap(const NActors::TActorContext& ctx); + + void Die(const NActors::TActorContext& ctx) override; + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_READ; } + + + struct TTopicCounters { + NKikimr::NPQ::TMultiCounter PartitionsLocked; + NKikimr::NPQ::TMultiCounter PartitionsReleased; + NKikimr::NPQ::TMultiCounter PartitionsToBeReleased; + NKikimr::NPQ::TMultiCounter PartitionsToBeLocked; + NKikimr::NPQ::TMultiCounter PartitionsInfly; + NKikimr::NPQ::TMultiCounter Errors; + NKikimr::NPQ::TMultiCounter Commits; + NKikimr::NPQ::TMultiCounter WaitsForData; + }; + +private: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup) + + HFunc(NKikimr::NGRpcProxy::V1::TEvPQProxy::TEvAuthResultOk, Handle); // form auth actor + + HFunc(TEvPQProxy::TEvDieCommand, HandlePoison) + HFunc(TEvPQProxy::TEvReadInit, Handle) //from gRPC + HFunc(TEvPQProxy::TEvReadSessionStatus, Handle) // from read sessions info builder proxy + HFunc(TEvPQProxy::TEvRead, Handle) //from gRPC + HFunc(TEvPQProxy::TEvDone, Handle) //from gRPC + HFunc(TEvPQProxy::TEvWriteDone, Handle) //from gRPC + HFunc(NKikimr::NGRpcProxy::V1::TEvPQProxy::TEvCloseSession, Handle) //from partitionActor + HFunc(TEvPQProxy::TEvCloseSession, Handle) //from partitionActor + + HFunc(TEvPQProxy::TEvPartitionReady, Handle) //from partitionActor + HFunc(TEvPQProxy::TEvPartitionReleased, Handle) //from partitionActor + + HFunc(TEvPQProxy::TEvReadResponse, Handle) //from partitionActor + HFunc(TEvPQProxy::TEvCommit, Handle) //from gRPC + HFunc(TEvPQProxy::TEvLocked, Handle) //from gRPC + HFunc(TEvPQProxy::TEvGetStatus, Handle) //from gRPC + HFunc(TEvPQProxy::TEvAuth, Handle) //from gRPC + + HFunc(TEvPQProxy::TEvCommitDone, Handle) //from PartitionActor + HFunc(TEvPQProxy::TEvPartitionStatus, Handle) //from partitionActor + + HFunc(TEvPersQueue::TEvLockPartition, Handle) //from Balancer + HFunc(TEvPersQueue::TEvReleasePartition, Handle) //from Balancer + HFunc(TEvPersQueue::TEvError, Handle) //from Balancer + + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + + HFunc(TEvDescribeTopicsResponse, HandleDescribeTopicsResponse); + HFunc(TEvTicketParser::TEvAuthorizeTicketResult, Handle); + + default: + break; + }; + } + + void Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvReadResponse::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvDone::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvWriteDone::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(NKikimr::NGRpcProxy::V1::TEvPQProxy::TEvCloseSession::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommit::TPtr& ev, const NActors::TActorContext& ctx); + void MakeCommit(const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvLocked::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const NActors::TActorContext& ctx); + void ProcessAuth(const NPersQueueCommon::TCredentials& auth); + void Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const NActors::TActorContext& ctx); + void AnswerForCommitsIfCan(const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvError::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); + [[nodiscard]] bool ProcessBalancerDead(const ui64 tabletId, const NActors::TActorContext& ctx); // returns false if actor died + + void HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const NActors::TActorContext& ctx); + void HandleWakeup(const NActors::TActorContext& ctx); + void Handle(NKikimr::NGRpcProxy::V1::TEvPQProxy::TEvAuthResultOk::TPtr& ev, const NActors::TActorContext& ctx); + + void CloseSession(const TString& errorReason, const NPersQueue::NErrorCode::EErrorCode errorCode, + const NActors::TActorContext& ctx); + + void Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev, const TActorContext& ctx); + void HandleDescribeTopicsResponse(TEvDescribeTopicsResponse::TPtr& ev, const TActorContext& ctx); + + void SendAuthRequest(const TActorContext& ctx); + void CreateInitAndAuthActor(const TActorContext& ctx); + + void SetupCounters(); + void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic); + void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic, const TString& cloudId, const TString& dbId, + const TString& dbPath, bool isServerless, const TString& folderId); + + [[nodiscard]] bool ProcessReads(const NActors::TActorContext& ctx); // returns false if actor died + struct TFormedReadResponse; + [[nodiscard]] bool ProcessAnswer(const NActors::TActorContext& ctx, TIntrusivePtr<TFormedReadResponse> formedResponse); // returns false if actor died + + void RegisterSessions(const NActors::TActorContext& ctx); + void RegisterSession(const TActorId& pipe, const TString& topic, const TActorContext& ctx); + + struct TPartitionActorInfo; + void DropPartitionIfNeeded(THashMap<std::pair<TString, ui32>, TPartitionActorInfo>::iterator it, const TActorContext& ctx); + + bool ActualPartitionActor(const TActorId& part); + [[nodiscard]] bool ProcessReleasePartition(const THashMap<std::pair<TString, ui32>, TPartitionActorInfo>::iterator& it, + bool kill, bool couldBeReads, const TActorContext& ctx); // returns false if actor died + void InformBalancerAboutRelease(const THashMap<std::pair<TString, ui32>, TPartitionActorInfo>::iterator& it, const TActorContext& ctx); + + // returns false if check failed. + bool CheckAndUpdateReadSettings(const NPersQueue::TReadRequest::TRead& readRequest); + + static ui32 NormalizeMaxReadMessagesCount(ui32 sourceValue); + static ui32 NormalizeMaxReadSize(ui32 sourceValue); + static ui32 NormalizeMaxReadPartitionsCount(ui32 sourceValue); + + static bool RemoveEmptyMessages(NPersQueue::TReadResponse::TBatchedData& data); // returns true if there are nonempty messages + +private: + IReadSessionHandlerRef Handler; + + const TInstant StartTimestamp; + + TActorId PqMetaCache; + TActorId NewSchemeCache; + + TActorId AuthInitActor; + bool AuthInflight; + + TString InternalClientId; + TString ExternalClientId; + const TString ClientDC; + TString ClientPath; + TString Session; + TString PeerName; + TString Database; + + bool ClientsideLocksAllowed; + bool BalanceRightNow; + bool CommitsDisabled; + bool BalancersInitStarted; + + bool InitDone; + + ui32 ProtocolVersion; // from NPersQueue::TReadRequest::EProtocolVersion + // Read settings. + // Can be initialized during Init request (new preferable way) + // or during read request (old way that will be removed in future). + // These settings can't be changed (in that case server closes session). + ui32 MaxReadMessagesCount; + ui32 MaxReadSize; + ui32 MaxReadPartitionsCount; + ui32 MaxTimeLagMs; + ui64 ReadTimestampMs; + bool ReadSettingsInited; + + NPersQueueCommon::TCredentials Auth; + TString AuthStr; + TIntrusiveConstPtr<NACLib::TUserToken> Token; + bool ForceACLCheck; + bool RequestNotChecked; + TInstant LastACLCheckTimestamp; + + struct TPartitionActorInfo { + TActorId Actor; + std::deque<ui64> Commits; + bool Reading; + bool Releasing; + bool Released; + ui64 LockGeneration; + bool LockSent; + NPersQueue::TTopicConverterPtr Converter; + + TPartitionActorInfo(const TActorId& actor, ui64 generation, const NPersQueue::TTopicConverterPtr& topic) + : Actor(actor) + , Reading(false) + , Releasing(false) + , Released(false) + , LockGeneration(generation) + , LockSent(false) + , Converter(topic) + {} + }; + + + THashSet<TActorId> ActualPartitionActors; + THashMap<std::pair<TString, ui32>, TPartitionActorInfo> Partitions; //topic[ClientSideName!]:partition -> info + + THashMap<TString, NPersQueue::TTopicConverterPtr> FullPathToConverter; // PrimaryFullPath -> Converter, for balancer replies matching + THashMap<TString, TTopicHolder> Topics; // PrimaryName ->topic info + + TVector<ui32> Groups; + bool ReadOnlyLocal; + + struct TPartitionInfo { + NPersQueue::TTopicConverterPtr Topic; + ui32 Partition; + ui64 WTime; + ui64 SizeLag; + ui64 MsgLag; + TActorId Actor; + bool operator < (const TPartitionInfo& rhs) const { + return std::tie(WTime, Topic, Partition, Actor) < std::tie(rhs.WTime, rhs.Topic, rhs.Partition, rhs.Actor); + } + }; + + TSet<TPartitionInfo> AvailablePartitions; + + struct TOffsetsInfo { + struct TPartitionOffsetInfo { + TPartitionOffsetInfo(const TActorId& sender, const TString& topic, ui32 partition, ui64 offset) + : Sender(sender) + , Topic(topic) + , Partition(partition) + , Offset(offset) + { + } + + TActorId Sender; + TString Topic; + ui32 Partition; + ui64 Offset; + }; + + // find by read id + bool operator<(ui64 readId) const { + return ReadId < readId; + } + + friend bool operator<(ui64 readId, const TOffsetsInfo& info) { + return readId < info.ReadId; + } + + ui64 ReadId = 0; + std::vector<TPartitionOffsetInfo> PartitionOffsets; + }; + + std::deque<TOffsetsInfo> Offsets; // Sequential read id -> offsets + + struct TFormedReadResponse: public TSimpleRefCount<TFormedReadResponse> { + using TPtr = TIntrusivePtr<TFormedReadResponse>; + + TFormedReadResponse(const TString& guid, const TInstant start) + : Guid(guid) + , Start(start) + , FromDisk(false) + { + } + + NPersQueue::TReadResponse Response; + ui32 RequestsInfly = 0; + i64 ByteSize = 0; + + ui64 RequestedBytes = 0; + + //returns byteSize diff + i64 ApplyResponse(NPersQueue::TReadResponse&& resp); + + TVector<NPersQueue::TReadResponse> ControlMessages; + + THashSet<TActorId> PartitionsTookPartInRead; + TSet<TPartitionInfo> PartitionsBecameAvailable; // Partitions that became available during this read request execution. + // These partitions are bringed back to AvailablePartitions after reply to this read request. + TOffsetsInfo Offsets; // Offsets without assigned read id. + + const TString Guid; + TInstant Start; + bool FromDisk; + TDuration WaitQuotaTime; + }; + + THashMap<TActorId, TFormedReadResponse::TPtr> PartitionToReadResponse; // Partition actor -> TFormedReadResponse answer that has this partition. + // PartitionsTookPartInRead in formed read response contain this actor id. + + ui64 ReadIdToResponse; + ui64 ReadIdCommitted; + TSet<ui64> NextCommits; + TInstant LastCommitTimestamp; + TDuration CommitInterval; + ui32 CommitsInfly; + + std::deque<THolder<TEvPQProxy::TEvRead>> Reads; + + ui64 Cookie; + + struct TCommitInfo { + ui64 StartReadId; + ui32 Partitions; + TInstant StartTime; + }; + + TMap<ui64, TCommitInfo> Commits; //readid->TCommitInfo + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + NMonitoring::TDynamicCounters::TCounterPtr SessionsCreated; + NMonitoring::TDynamicCounters::TCounterPtr SessionsActive; + NMonitoring::TDynamicCounters::TCounterPtr SessionsWithoutAuth; + NMonitoring::TDynamicCounters::TCounterPtr SessionsWithOldBatchingVersion; // LOGBROKER-3173 + + NMonitoring::TDynamicCounters::TCounterPtr Errors; + NMonitoring::TDynamicCounters::TCounterPtr PipeReconnects; + NMonitoring::TDynamicCounters::TCounterPtr BytesInflight; + ui64 BytesInflight_; + ui64 RequestedBytes; + ui32 ReadsInfly; + + NKikimr::NPQ::TPercentileCounter PartsPerSession; + + THashMap<TString, TTopicCounters> TopicCounters; + THashMap<TString, ui32> NumPartitionsFromTopic; + + TVector<NPersQueue::TPQLabelsInfo> Aggr; + NKikimr::NPQ::TMultiCounter SLITotal; + NKikimr::NPQ::TMultiCounter SLIErrors; + TInstant StartTime; + NKikimr::NPQ::TPercentileCounter InitLatency; + NKikimr::NPQ::TPercentileCounter CommitLatency; + NKikimr::NPQ::TMultiCounter SLIBigLatency; + + NKikimr::NPQ::TPercentileCounter ReadLatency; + NKikimr::NPQ::TPercentileCounter ReadLatencyFromDisk; + NKikimr::NPQ::TMultiCounter SLIBigReadLatency; + NKikimr::NPQ::TMultiCounter ReadsTotal; + + NPersQueue::TTopicsListController TopicsHandler; + NPersQueue::TTopicsToConverter TopicsList; +}; + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.cpp b/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.cpp new file mode 100644 index 0000000000..a88437fdbe --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.cpp @@ -0,0 +1,86 @@ +#include "grpc_pq_clusters_updater_actor.h" + +#include <ydb/core/base/appdata.h> +#include <ydb/core/persqueue/pq_database.h> + +namespace NKikimr { +namespace NGRpcProxy { + +static const int CLUSTERS_UPDATER_TIMEOUT_ON_ERROR = 1; + + +TClustersUpdater::TClustersUpdater(IPQClustersUpdaterCallback* callback) + : Callback(callback) + {}; + +void TClustersUpdater::Bootstrap(const NActors::TActorContext& ctx) { + ctx.Send(ctx.SelfID, new TEvPQClustersUpdater::TEvUpdateClusters()); + ctx.Send(NNetClassifier::MakeNetClassifierID(), new NNetClassifier::TEvNetClassifier::TEvSubscribe); + + Become(&TThis::StateFunc); +} + +void TClustersUpdater::Handle(TEvPQClustersUpdater::TEvUpdateClusters::TPtr&, const TActorContext &ctx) { + auto req = MakeHolder<NKqp::TEvKqp::TEvQueryRequest>(); + req->Record.MutableRequest()->SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); + req->Record.MutableRequest()->SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); + req->Record.MutableRequest()->SetKeepSession(false); + req->Record.MutableRequest()->SetQuery("--!syntax_v1\nSELECT `name`, `local`, `enabled` FROM `" + AppData(ctx)->PQConfig.GetRoot() + "/Config/V2/Cluster`;"); + req->Record.MutableRequest()->SetDatabase(NKikimr::NPQ::GetDatabaseFromConfig(AppData(ctx)->PQConfig)); + req->Record.MutableRequest()->MutableTxControl()->set_commit_tx(true); + req->Record.MutableRequest()->MutableTxControl()->mutable_begin_tx()->mutable_serializable_read_write(); + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), req.Release()); +} + +void TClustersUpdater::Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext&) { + + Callback->NetClassifierUpdated(ev->Get()->Classifier); +} + + + + +void TClustersUpdater::Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr &ev, const TActorContext &ctx) { + auto& record = ev->Get()->Record.GetRef(); + + if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { + auto& t = record.GetResponse().GetResults(0).GetValue().GetStruct(0); + bool local = false; + TVector<TString> clusters; + for (size_t i = 0; i < t.ListSize(); ++i) { + TString dc = t.GetList(i).GetStruct(0).GetOptional().GetText(); + local = t.GetList(i).GetStruct(1).GetOptional().GetBool(); + clusters.push_back(dc); + if (local) { + bool enabled = t.GetList(i).GetStruct(2).GetOptional().GetBool(); + Y_ABORT_UNLESS(LocalCluster.empty() || LocalCluster == dc); + bool changed = LocalCluster != dc || Enabled != enabled; + if (changed) { + LocalCluster = dc; + Enabled = enabled; + Callback->CheckClusterChange(LocalCluster, Enabled); + } + } + } + if (Clusters != clusters) { + Clusters = clusters; + Callback->CheckClustersListChange(Clusters); + } + ctx.Schedule(TDuration::Seconds(AppData(ctx)->PQConfig.GetClustersUpdateTimeoutSec()), new TEvPQClustersUpdater::TEvUpdateClusters()); + } else { + LOG_ERROR_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "can't update clusters " << record); + ctx.Schedule(TDuration::Seconds(CLUSTERS_UPDATER_TIMEOUT_ON_ERROR), new TEvPQClustersUpdater::TEvUpdateClusters()); + } +} + + +void TClustersUpdater::Handle(NKqp::TEvKqp::TEvProcessResponse::TPtr &ev, const TActorContext &ctx) { + auto& record = ev->Get()->Record; + + LOG_ERROR_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "can't update clusters " << record); + ctx.Schedule(TDuration::Seconds(CLUSTERS_UPDATER_TIMEOUT_ON_ERROR), new TEvPQClustersUpdater::TEvUpdateClusters()); +} + + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.h b/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.h new file mode 100644 index 0000000000..efa1354963 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_clusters_updater_actor.h @@ -0,0 +1,77 @@ +#pragma once + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/actors/core/event_local.h> +#include <library/cpp/actors/core/hfunc.h> + +#include <ydb/core/base/events.h> +#include <ydb/core/kqp/common/kqp.h> +#include <ydb/core/mind/address_classification/net_classifier.h> + +namespace NKikimr { +namespace NGRpcProxy { + +struct TEvPQClustersUpdater { + enum EEv { + EvUpdateClusters = EventSpaceBegin(TKikimrEvents::ES_PQ_CLUSTERS_UPDATER), + EvEnd, + }; + + struct TEvUpdateClusters : public NActors::TEventLocal<TEvUpdateClusters, EvUpdateClusters> { + TEvUpdateClusters() + {} + }; +}; + +class IPQClustersUpdaterCallback { +public: + virtual ~IPQClustersUpdaterCallback() = default; + virtual void CheckClusterChange(const TString& localCluster, const bool enabled) + { + Y_UNUSED(localCluster); + Y_UNUSED(enabled); + } + + virtual void CheckClustersListChange(const TVector<TString>& clusters) + { + Y_UNUSED(clusters); + } + + virtual void NetClassifierUpdated(NAddressClassifier::TLabeledAddressClassifier::TConstPtr classifier) { + Y_UNUSED(classifier); + } +}; + +class TClustersUpdater : public NActors::TActorBootstrapped<TClustersUpdater> { +public: + TClustersUpdater(IPQClustersUpdaterCallback* callback); + + void Bootstrap(const NActors::TActorContext& ctx); + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_WRITE; } // FIXME + +private: + IPQClustersUpdaterCallback* Callback; + TString LocalCluster; + TVector<TString> Clusters; + bool Enabled = false; + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPQClustersUpdater::TEvUpdateClusters, Handle); + HFunc(NKqp::TEvKqp::TEvQueryResponse, Handle); + HFunc(NKqp::TEvKqp::TEvProcessResponse, Handle); + HFunc(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate, Handle); + } + } + + void Handle(TEvPQClustersUpdater::TEvUpdateClusters::TPtr &ev, const TActorContext &ctx); + void Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr &ev, const TActorContext &ctx); + void Handle(NKqp::TEvKqp::TEvProcessResponse::TPtr &ev, const TActorContext &ctx); + void Handle(NNetClassifier::TEvNetClassifier::TEvClassifierUpdate::TPtr& ev, const TActorContext& ctx); + +}; + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_read.cpp b/ydb/services/deprecated/persqueue_v0/grpc_pq_read.cpp new file mode 100644 index 0000000000..08506c667a --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_read.cpp @@ -0,0 +1,268 @@ +#include "grpc_pq_read.h" +#include "grpc_pq_actor.h" +#include "grpc_pq_session.h" +#include "ydb/core/client/server/grpc_proxy_status.h" + +#include <ydb/core/grpc_services/grpc_helper.h> +#include <ydb/core/tx/scheme_board/cache.h> + +using namespace NActors; +using namespace NKikimrClient; + +using grpc::Status; + +namespace NKikimr { +namespace NGRpcProxy { + +/////////////////////////////////////////////////////////////////////////////// + +using namespace NPersQueue; + +void TPQReadService::TSession::OnCreated() { + // Start waiting for new session. + Proxy->WaitReadSession(); + if (Proxy->TooMuchSessions()) { + ReplyWithError("proxy overloaded", NPersQueue::NErrorCode::OVERLOAD); + return; + } + // Create actor for current session. + auto clusters = Proxy->GetClusters(); + auto localCluster = Proxy->GetLocalCluster(); + if (NeedDiscoverClusters && (clusters.empty() || localCluster.empty())) { + //TODO: inc sli errors counter + ReplyWithError("clusters list or local cluster is empty", NPersQueue::NErrorCode::INITIALIZING); + return; + + } + if (!TopicConverterFactory->GetLocalCluster().empty()) { + TopicConverterFactory->SetLocalCluster(localCluster); + } + auto topicsHandler = std::make_unique<NPersQueue::TTopicsListController>( + TopicConverterFactory, clusters + ); + + CreateActor(std::move(topicsHandler)); + ReadyForNextRead(); +} + +void TPQReadService::TSession::OnRead(const NPersQueue::TReadRequest& request) { + switch (request.GetRequestCase()) { + case TReadRequest::kInit: { + SendEvent(new TEvPQProxy::TEvReadInit(request, GetPeerName(), GetDatabase())); + break; + } + case TReadRequest::kRead: { + SendEvent(new TEvPQProxy::TEvRead(request)); + break; + } + case TReadRequest::kStatus: { + Y_ABORT_UNLESS(ActorId); + const auto& req = request.GetStatus(); + const TString& topic = req.GetTopic(); + const ui32 partition = req.GetPartition(); + const ui64 generation = req.GetGeneration(); + SendEvent(new TEvPQProxy::TEvGetStatus(topic, partition, generation)); + ReadyForNextRead(); + break; + } + case TReadRequest::kStartRead: { + Y_ABORT_UNLESS(ActorId); + const auto& req = request.GetStartRead(); + const TString& topic = req.GetTopic(); + const ui32 partition = req.GetPartition(); + const ui64 readOffset = req.GetReadOffset(); + const ui64 commitOffset = req.GetCommitOffset(); + const bool verifyReadOffset = req.GetVerifyReadOffset(); + const ui64 generation = req.GetGeneration(); + + if (request.GetCredentials().GetCredentialsCase() != NPersQueueCommon::TCredentials::CREDENTIALS_NOT_SET) { + SendEvent(new TEvPQProxy::TEvAuth(request.GetCredentials())); + } + SendEvent(new TEvPQProxy::TEvLocked(topic, partition, readOffset, commitOffset, verifyReadOffset, generation)); + ReadyForNextRead(); + break; + } + case TReadRequest::kCommit: { + Y_ABORT_UNLESS(ActorId); + const auto& req = request.GetCommit(); + + if (request.GetCredentials().GetCredentialsCase() != NPersQueueCommon::TCredentials::CREDENTIALS_NOT_SET) { + SendEvent(new TEvPQProxy::TEvAuth(request.GetCredentials())); + } + + // Empty cookies list will lead to no effect. + for (ui32 i = 0; i < req.CookieSize(); ++i) { + SendEvent(new TEvPQProxy::TEvCommit(req.GetCookie(i))); + } + + ReadyForNextRead(); + break; + } + + default: { + SendEvent(new TEvPQProxy::TEvCloseSession("unsupported request", NPersQueue::NErrorCode::BAD_REQUEST)); + break; + } + } +} + +void TPQReadService::TSession::OnDone() { + SendEvent(new TEvPQProxy::TEvDone()); +} + +void TPQReadService::TSession::OnWriteDone(ui64 size) { + SendEvent(new TEvPQProxy::TEvWriteDone(size)); +} + +void TPQReadService::TSession::DestroyStream(const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) { + // Send poison pill to the actor(if it is alive) + SendEvent(new TEvPQProxy::TEvDieCommand("read-session " + ToString<ui64>(Cookie) + ": " + reason, errorCode)); + // Remove reference to session from "cookie -> session" map. + Proxy->ReleaseSession(Cookie); +} + +bool TPQReadService::TSession::IsShuttingDown() const { + return Proxy->IsShuttingDown(); +} + +TPQReadService::TSession::TSession(std::shared_ptr<TPQReadService> proxy, + grpc::ServerCompletionQueue* cq, ui64 cookie, const TActorId& schemeCache, const TActorId& newSchemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, bool needDiscoverClusters, + const NPersQueue::TConverterFactoryPtr& converterFactory) + : ISession(cq) + , Proxy(proxy) + , Cookie(cookie) + , ActorId() + , SchemeCache(schemeCache) + , NewSchemeCache(newSchemeCache) + , Counters(counters) + , NeedDiscoverClusters(needDiscoverClusters) + , TopicConverterFactory(converterFactory) +{ +} + +void TPQReadService::TSession::Start() { + if (!Proxy->IsShuttingDown()) { + Proxy->RequestSession(&Context, &Stream, CQ, CQ, new TRequestCreated(this)); + } +} + +void TPQReadService::TSession::SendEvent(IEventBase* ev) { + Proxy->ActorSystem->Send(ActorId, ev); +} + +void TPQReadService::TSession::CreateActor(std::unique_ptr<NPersQueue::TTopicsListController>&& topicsHandler) { + auto classifier = Proxy->GetClassifier(); + + ActorId = Proxy->ActorSystem->Register( + new TReadSessionActor(this, *topicsHandler, Cookie, SchemeCache, NewSchemeCache, Counters, + classifier ? classifier->ClassifyAddress(GetPeerName()) + : "unknown")); +} + + + +ui64 TPQReadService::TSession::GetCookie() const { + return Cookie; +} + +/////////////////////////////////////////////////////////////////////////////// + + +TPQReadService::TPQReadService(NKikimr::NGRpcService::TGRpcPersQueueService* service, grpc::ServerCompletionQueue* cq, + NActors::TActorSystem* as, const TActorId& schemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const ui32 maxSessions) + : Service(service) + , CQ(cq) + , ActorSystem(as) + , SchemeCache(schemeCache) + , Counters(counters) + , MaxSessions(maxSessions) +{ + auto appData = ActorSystem->AppData<TAppData>(); + auto cacheCounters = GetServiceCounters(counters, "pqproxy|schemecache"); + auto cacheConfig = MakeIntrusive<NSchemeCache::TSchemeCacheConfig>(appData, cacheCounters); + NewSchemeCache = ActorSystem->Register(CreateSchemeBoardSchemeCache(cacheConfig.Get())); + // ToDo[migration]: Other conditions; + NeedDiscoverClusters = !ActorSystem->AppData<TAppData>()->PQConfig.GetTopicsAreFirstClassCitizen(); + TopicConverterFactory = std::make_shared<NPersQueue::TTopicNamesConverterFactory>( + ActorSystem->AppData<TAppData>()->PQConfig, "" + ); + + if (NeedDiscoverClusters) { + ActorSystem->Register(new TClustersUpdater(this)); + } +} + + +ui64 TPQReadService::NextCookie() { + return AtomicIncrement(LastCookie); +} + + +void TPQReadService::ReleaseSession(ui64 cookie) { + auto g(Guard(Lock)); + bool erased = Sessions.erase(cookie); + if (erased) + ActorSystem->Send(MakeGRpcProxyStatusID(ActorSystem->NodeId), new TEvGRpcProxyStatus::TEvUpdateStatus(0,0,-1,0)); + +} + +void TPQReadService::CheckClusterChange(const TString& localCluster, const bool) { + auto g(Guard(Lock)); + LocalCluster = localCluster; + TopicConverterFactory->SetLocalCluster(localCluster); +} + +void TPQReadService::NetClassifierUpdated(NAddressClassifier::TLabeledAddressClassifier::TConstPtr classifier) { + auto g(Guard(Lock)); + if (!DatacenterClassifier) { + for (auto it = Sessions.begin(); it != Sessions.end();) { + auto jt = it++; + jt->second->DestroyStream("datacenter classifier initialized, restart session please", NPersQueue::NErrorCode::INITIALIZING); + } + } + + DatacenterClassifier = classifier; +} + + +void TPQReadService::CheckClustersListChange(const TVector<TString> &clusters) { + auto g(Guard(Lock)); + Clusters = clusters; +} + +void TPQReadService::SetupIncomingRequests() { + WaitReadSession(); +} + + +void TPQReadService::WaitReadSession() { + + const ui64 cookie = NextCookie(); + + ActorSystem->Send(MakeGRpcProxyStatusID(ActorSystem->NodeId), new TEvGRpcProxyStatus::TEvUpdateStatus(0,0,1,0)); + + TSessionRef session(new TSession(shared_from_this(), CQ, cookie, SchemeCache, NewSchemeCache, Counters, + NeedDiscoverClusters, TopicConverterFactory)); + + { + auto g(Guard(Lock)); + Sessions.insert(std::make_pair(cookie, session)); + } + + session->Start(); +} + + + +bool TPQReadService::TooMuchSessions() { + auto g(Guard(Lock)); + return Sessions.size() >= MaxSessions; +} + +/////////////////////////////////////////////////////////////////////////////// + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_read.h b/ydb/services/deprecated/persqueue_v0/grpc_pq_read.h new file mode 100644 index 0000000000..9fbc177e6f --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_read.h @@ -0,0 +1,146 @@ +#pragma once + +#include "persqueue.h" +#include "grpc_pq_clusters_updater_actor.h" +#include "grpc_pq_session.h" + +#include <ydb/core/client/server/grpc_base.h> +#include <ydb/library/persqueue/topic_parser/topic_parser.h> + +#include <library/cpp/grpc/server/grpc_request.h> +#include <library/cpp/actors/core/actorsystem.h> + +#include <util/generic/hash.h> +#include <util/system/mutex.h> + +namespace NKikimr { +namespace NGRpcProxy { + +class TPQReadService : public IPQClustersUpdaterCallback, public std::enable_shared_from_this<TPQReadService> { + class TSession + : public ISession<NPersQueue::TReadRequest, NPersQueue::TReadResponse> + { + + public: + void OnCreated() override; + void OnRead(const NPersQueue::TReadRequest& request) override; + void OnDone() override; + void OnWriteDone(ui64 size) override; + void DestroyStream(const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) override; + bool IsShuttingDown() const override; + TSession(std::shared_ptr<TPQReadService> proxy, + grpc::ServerCompletionQueue* cq, ui64 cookie, const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, bool needDiscoverClusters, + const NPersQueue::TConverterFactoryPtr& converterFactory); + void Start() override; + void SendEvent(NActors::IEventBase* ev); + + private: + void CreateActor(std::unique_ptr<NPersQueue::TTopicsListController>&& topicsHandler); + ui64 GetCookie() const; + + private: + std::shared_ptr<TPQReadService> Proxy; + const ui64 Cookie; + + NActors::TActorId ActorId; + + const NActors::TActorId SchemeCache; + const NActors::TActorId NewSchemeCache; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + bool NeedDiscoverClusters; + + NPersQueue::TConverterFactoryPtr TopicConverterFactory; + + }; + + using TSessionRef = TIntrusivePtr<TSession>; + +public: + + TPQReadService(NGRpcService::TGRpcPersQueueService* service, + grpc::ServerCompletionQueue* cq, + NActors::TActorSystem* as, const NActors::TActorId& schemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const ui32 maxSessions); + + virtual ~TPQReadService() + {} + + void RequestSession(::grpc::ServerContext* context, ::grpc::ServerAsyncReaderWriter< ::NPersQueue::TReadResponse, ::NPersQueue::TReadRequest>* stream, + ::grpc::CompletionQueue* new_call_cq, ::grpc::ServerCompletionQueue* notification_cq, void *tag) + { + Service->GetService()->RequestReadSession(context, stream, new_call_cq, notification_cq, tag); + } + + void SetupIncomingRequests(); + + void StopService() { + AtomicSet(ShuttingDown_, 1); + } + + bool IsShuttingDown() const { + return AtomicGet(ShuttingDown_); + } + + TVector<TString> GetClusters() const { + auto g(Guard(Lock)); + return Clusters; + } + TString GetLocalCluster() const { + auto g(Guard(Lock)); + return LocalCluster; + } + + NAddressClassifier::TLabeledAddressClassifier::TConstPtr GetClassifier() const { + auto g(Guard(Lock)); + return DatacenterClassifier; + } + +private: + ui64 NextCookie(); + + void CheckClustersListChange(const TVector<TString>& clusters) override; + void CheckClusterChange(const TString& localCluster, const bool enabled) override; + void NetClassifierUpdated(NAddressClassifier::TLabeledAddressClassifier::TConstPtr classifier) override; + void UpdateTopicsHandler(); + //! Unregistry session object. + void ReleaseSession(ui64 cookie); + + //! Start listening for incoming connections. + void WaitReadSession(); + + bool TooMuchSessions(); + +private: + NKikimr::NGRpcService::TGRpcPersQueueService* Service; + + grpc::ServerContext Context; + grpc::ServerCompletionQueue* CQ; + NActors::TActorSystem* ActorSystem; + NActors::TActorId SchemeCache; + NActors::TActorId NewSchemeCache; + + TAtomic LastCookie = 0; + TMutex Lock; + THashMap<ui64, TSessionRef> Sessions; + + TVector<TString> Clusters; + TString LocalCluster; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + ui32 MaxSessions; + + TAtomic ShuttingDown_ = 0; + + NAddressClassifier::TLabeledAddressClassifier::TConstPtr DatacenterClassifier; // Detects client's datacenter by IP. May be null + + bool NeedDiscoverClusters; + NPersQueue::TConverterFactoryPtr TopicConverterFactory; + std::unique_ptr<NPersQueue::TTopicsListController> TopicsHandler; +}; + + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp b/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp new file mode 100644 index 0000000000..13ab0d5bd1 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_read_actor.cpp @@ -0,0 +1,2594 @@ +#include "grpc_pq_actor.h" + +#include <ydb/core/base/path.h> +#include <ydb/core/client/server/msgbus_server_persqueue.h> +#include <ydb/library/services/services.pb.h> +#include <ydb/core/persqueue/percentile_counter.h> +#include <ydb/core/persqueue/pq_database.h> +#include <ydb/core/persqueue/write_meta.h> +#include <ydb/core/persqueue/writer/source_id_encoding.h> +#include <ydb/library/persqueue/topic_parser/type_definitions.h> +#include <ydb/library/persqueue/topic_parser/topic_parser.h> +#include <ydb/library/persqueue/topic_parser/counters.h> +#include <ydb/library/persqueue/deprecated/read_batch_converter/read_batch_converter.h> + +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/interconnect/interconnect.h> +#include <library/cpp/protobuf/util/repeated_field_utils.h> + +#include <util/string/strip.h> +#include <util/charset/utf8.h> + +#include <algorithm> + +using namespace NActors; +using namespace NKikimrClient; + +namespace NKikimr { + +using namespace NMsgBusProxy; + +namespace NGRpcProxy { + +using namespace NPersQueue; +using namespace NSchemeCache; + +#ifdef PQ_LOG_PREFIX +#undef PQ_LOG_PREFIX +#endif +#define PQ_LOG_PREFIX "session cookie " << Cookie << " client " << InternalClientId << " session " << Session + + +//11 tries = 10,23 seconds, then each try for 5 seconds , so 21 retries will take near 1 min +static const NTabletPipe::TClientRetryPolicy RetryPolicyForPipes = { + .RetryLimitCount = 21, + .MinRetryTime = TDuration::MilliSeconds(10), + .MaxRetryTime = TDuration::Seconds(5), + .BackoffMultiplier = 2, + .DoFirstRetryInstantly = true +}; + +static const ui64 MAX_INFLY_BYTES = 25_MB; +static const ui32 MAX_INFLY_READS = 10; + +static const TDuration READ_TIMEOUT_DURATION = TDuration::Seconds(1); + +static const TDuration WAIT_DATA = TDuration::Seconds(10); +static const TDuration PREWAIT_DATA = TDuration::Seconds(9); +static const TDuration WAIT_DELTA = TDuration::MilliSeconds(500); + +static const ui64 INIT_COOKIE = Max<ui64>(); //some identifier + +static const ui32 MAX_PIPE_RESTARTS = 100; //after 100 restarts without progress kill session +static const ui32 RESTART_PIPE_DELAY_MS = 100; + +static const ui64 MAX_READ_SIZE = 100 << 20; //100mb; + +static const TDuration DEFAULT_COMMIT_RATE = TDuration::Seconds(1); //1 second; +static const ui32 MAX_COMMITS_INFLY = 3; + +static const double LAG_GROW_MULTIPLIER = 1.2; //assume that 20% more data arrived to partitions + + +//TODO: add here tracking of bytes in/out + +#define LOG_PROTO(FieldName) \ + if (proto.Has##FieldName()) { \ + res << " " << Y_STRINGIZE(FieldName) << " { " << proto.Get##FieldName().ShortDebugString() << " }"; \ + } + +#define LOG_FIELD(proto, FieldName) \ + if (proto.Has##FieldName()) { \ + res << " " << Y_STRINGIZE(FieldName) << ": " << proto.Get##FieldName(); \ + } + +TString PartitionResponseToLog(const NKikimrClient::TPersQueuePartitionResponse& proto) { + if (!proto.HasCmdReadResult()) { + return proto.ShortDebugString(); + } + TStringBuilder res; + res << "{"; + + + if (proto.CmdWriteResultSize() > 0) { + res << " CmdWriteResult {"; + for (const auto& writeRes : proto.GetCmdWriteResult()) { + res << " { " << writeRes.ShortDebugString() << " }"; + } + res << " }"; + } + + LOG_PROTO(CmdGetMaxSeqNoResult); + LOG_PROTO(CmdGetClientOffsetResult); + LOG_PROTO(CmdGetOwnershipResult); + + + if (proto.HasCmdReadResult()) { + const auto& readRes = proto.GetCmdReadResult(); + res << " CmdReadResult {"; + LOG_FIELD(readRes, MaxOffset); + LOG_FIELD(readRes, BlobsFromDisk); + LOG_FIELD(readRes, BlobsFromCache); + //LOG_FIELD(readRes, ErrorCode); + LOG_FIELD(readRes, ErrorReason); + LOG_FIELD(readRes, BlobsCachedSize); + LOG_FIELD(readRes, SizeLag); + LOG_FIELD(readRes, RealReadOffset); + if (readRes.ResultSize() > 0) { + res << " Result {"; + for (const auto &tRes: readRes.GetResult()) { + res << " {"; + LOG_FIELD(tRes, Offset); + LOG_FIELD(tRes, SeqNo); + LOG_FIELD(tRes, PartNo); + LOG_FIELD(tRes, TotalParts); + LOG_FIELD(tRes, TotalSize); + LOG_FIELD(tRes, WriteTimestampMS); + LOG_FIELD(tRes, CreateTimestampMS); + LOG_FIELD(tRes, UncompressedSize); + LOG_FIELD(tRes, PartitionKey); + res << " }"; + } + res << " }"; + } + res << " }"; + } + res << " }"; + return res; +} +#undef LOG_PROTO +#undef LOG_FIELD + +class TPartitionActor : public NActors::TActorBootstrapped<TPartitionActor> { +public: + TPartitionActor(const TActorId& parentId, const TString& clientId, const ui64 cookie, const TString& session, const ui32 generation, + const ui32 step, const NPersQueue::TTopicConverterPtr& topic, const ui32 partition, const ui64 tabletID, + const TReadSessionActor::TTopicCounters& counters, const TString& clientDC); + ~TPartitionActor(); + + void Bootstrap(const NActors::TActorContext& ctx); + void Die(const NActors::TActorContext& ctx) override; + + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_PARTITION; } +private: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + CFunc(NActors::TEvents::TSystem::Wakeup, HandleWakeup) + HFunc(TEvPQProxy::TEvDeadlineExceeded, Handle) + + HFunc(NActors::TEvents::TEvPoisonPill, HandlePoison) + HFunc(TEvPQProxy::TEvRead, Handle) + HFunc(TEvPQProxy::TEvCommit, Handle) + HFunc(TEvPQProxy::TEvReleasePartition, Handle) + HFunc(TEvPQProxy::TEvLockPartition, Handle) + HFunc(TEvPQProxy::TEvGetStatus, Handle) + HFunc(TEvPQProxy::TEvRestartPipe, Handle) + + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + HFunc(TEvPersQueue::TEvResponse, Handle); + HFunc(TEvPersQueue::TEvHasDataInfoResponse, Handle); + default: + break; + }; + } + + + void Handle(TEvPQProxy::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvPQProxy::TEvDeadlineExceeded::TPtr& ev, const NActors::TActorContext& ctx); + + void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommit::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const NActors::TActorContext& ctx); + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvResponse::TPtr& ev, const NActors::TActorContext& ctx); + void Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, const NActors::TActorContext& ctx); + + void HandlePoison(NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); + void HandleWakeup(const NActors::TActorContext& ctx); + + void CheckRelease(const NActors::TActorContext& ctx); + void InitLockPartition(const NActors::TActorContext& ctx); + void InitStartReading(const NActors::TActorContext& ctx); + + void RestartPipe(const NActors::TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode); + void WaitDataInPartition(const NActors::TActorContext& ctx); + void SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx); + +private: + const TActorId ParentId; + const TString InternalClientId; + const TString ClientDC; + const ui64 Cookie; + const TString Session; + const ui32 Generation; + const ui32 Step; + + NPersQueue::TTopicConverterPtr Topic; + const ui32 Partition; + + const ui64 TabletID; + + ui64 ReadOffset; + ui64 ClientReadOffset; + ui64 ClientCommitOffset; + bool ClientVerifyReadOffset; + ui64 CommittedOffset; + ui64 WriteTimestampEstimateMs; + + ui64 WTime; + bool InitDone; + bool StartReading; + bool AllPrepareInited; + bool FirstInit; + TActorId PipeClient; + ui32 PipeGeneration; + bool RequestInfly; + NKikimrClient::TPersQueueRequest CurrentRequest; + + ui64 EndOffset; + ui64 SizeLag; + + TString ReadGuid; // empty if not reading + + bool NeedRelease; + bool Released; + + std::set<ui64> WaitDataInfly; + ui64 WaitDataCookie; + bool WaitForData; + + bool LockCounted; + + std::deque<std::pair<ui64, ui64>> CommitsInfly; //ReadId, Offset + + TReadSessionActor::TTopicCounters Counters; +}; + + +TReadSessionActor::TReadSessionActor( + IReadSessionHandlerRef handler, const NPersQueue::TTopicsListController& topicsHandler, const ui64 cookie, + const TActorId& pqMetaCache, const TActorId& newSchemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const TMaybe<TString> clientDC +) + : Handler(handler) + , StartTimestamp(TInstant::Now()) + , PqMetaCache(pqMetaCache) + , NewSchemeCache(newSchemeCache) + , AuthInitActor() + , AuthInflight(false) + , ClientDC(clientDC ? *clientDC : "other") + , ClientPath() + , Session() + , ClientsideLocksAllowed(false) + , BalanceRightNow(false) + , CommitsDisabled(false) + , BalancersInitStarted(false) + , InitDone(false) + , ProtocolVersion(NPersQueue::TReadRequest::Base) + , MaxReadMessagesCount(0) + , MaxReadSize(0) + , MaxReadPartitionsCount(0) + , MaxTimeLagMs(0) + , ReadTimestampMs(0) + , ReadSettingsInited(false) + , ForceACLCheck(false) + , RequestNotChecked(true) + , LastACLCheckTimestamp(TInstant::Zero()) + , ReadOnlyLocal(false) + , ReadIdToResponse(1) + , ReadIdCommitted(0) + , LastCommitTimestamp(TInstant::Zero()) + , CommitInterval(DEFAULT_COMMIT_RATE) + , CommitsInfly(0) + , Cookie(cookie) + , Counters(counters) + , BytesInflight_(0) + , RequestedBytes(0) + , ReadsInfly(0) + , TopicsHandler(topicsHandler) +{ + Y_ASSERT(Handler); +} + + + +TReadSessionActor::~TReadSessionActor() = default; + + +void TReadSessionActor::Bootstrap(const TActorContext& ctx) { + if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + ++(*GetServiceCounters(Counters, "pqproxy|readSession")->GetCounter("SessionsCreatedTotal", true)); + } + StartTime = ctx.Now(); + Become(&TThis::StateFunc); +} + + +void TReadSessionActor::Die(const TActorContext& ctx) { + + ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill()); + + for (auto& p : Partitions) { + ctx.Send(p.second.Actor, new TEvents::TEvPoisonPill()); + + if (!p.second.Released) { + auto it = TopicCounters.find(p.second.Converter->GetInternalName()); + Y_ABORT_UNLESS(it != TopicCounters.end()); + it->second.PartitionsInfly.Dec(); + it->second.PartitionsReleased.Inc(); + if (p.second.Releasing) + it->second.PartitionsToBeReleased.Dec(); + } + } + + for (auto& t : Topics) { + if (t.second.PipeClient) + NTabletPipe::CloseClient(ctx, t.second.PipeClient); + } + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " is DEAD"); + + if (SessionsActive) { + --(*SessionsActive); + } + if (BytesInflight) { + (*BytesInflight) -= BytesInflight_; + } + if (SessionsActive) { //PartsPerSession is inited too + PartsPerSession.DecFor(Partitions.size(), 1); + } + if (!Handler->IsShuttingDown()) + Handler->Finish(); + TActorBootstrapped<TReadSessionActor>::Die(ctx); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext& ctx) { + CloseSession(TStringBuilder() << "Reads done signal - closing everything", NPersQueue::NErrorCode::OK, ctx); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvWriteDone::TPtr& ev, const TActorContext& ctx) { + Y_ABORT_UNLESS(BytesInflight_ >= ev->Get()->Size); + BytesInflight_ -= ev->Get()->Size; + if (BytesInflight) (*BytesInflight) -= ev->Get()->Size; + + const bool isAlive = ProcessReads(ctx); + Y_UNUSED(isAlive); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvCommit::TPtr& ev, const TActorContext& ctx) { + RequestNotChecked = true; + + if (CommitsDisabled) { + CloseSession(TStringBuilder() << "commits in session are disabled by client option", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + const ui64 readId = ev->Get()->ReadId; + if (readId <= ReadIdCommitted) { + CloseSession(TStringBuilder() << "commit of " << ev->Get()->ReadId << " that is already committed", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + if (readId >= ReadIdToResponse) { + CloseSession(TStringBuilder() << "commit of unknown cookie " << readId, NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + if (NextCommits.size() >= AppData(ctx)->PQConfig.GetMaxReadCookies()) { + CloseSession(TStringBuilder() << "got more than " << AppData(ctx)->PQConfig.GetMaxReadCookies() << " unordered cookies to commit " << readId, NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + bool res = NextCommits.insert(readId).second; + if (!res) { + CloseSession(TStringBuilder() << "double commit of cookie " << readId, NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " commit request from client for " << readId); + MakeCommit(ctx); +} + +void TReadSessionActor::MakeCommit(const TActorContext& ctx) { + if (CommitsDisabled) + return; + if (ctx.Now() - LastCommitTimestamp < CommitInterval) + return; + if (CommitsInfly > MAX_COMMITS_INFLY) + return; + ui64 readId = ReadIdCommitted; + auto it = NextCommits.begin(); + for (;it != NextCommits.end() && (*it) == readId + 1; ++it) { + ++readId; + } + if (readId == ReadIdCommitted) + return; + NextCommits.erase(NextCommits.begin(), it); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " commit request from " << ReadIdCommitted + 1 << " to " << readId); + + auto& commit = Commits[readId]; + commit.StartReadId = ReadIdCommitted + 1; + commit.Partitions = 0; + commit.StartTime = ctx.Now(); + ReadIdCommitted = readId; + LastCommitTimestamp = ctx.Now(); + ++CommitsInfly; + SLITotal.Inc(); + Y_ABORT_UNLESS(Commits.size() == CommitsInfly); + + // Find last offset info belonging to our read id and its ancestors. + const auto firstGreater = std::upper_bound(Offsets.begin(), Offsets.end(), readId); + THashSet<std::pair<TString, ui64>> processedPartitions; + + // Iterate from last to first offsets to find partitions' offsets. + // Offsets in queue have nondecreasing values (for each partition), + // so it it sufficient to take only the last offset for each partition. + // Note: reverse_iterator(firstGreater) points to _before_ firstGreater + + for (auto i = std::make_reverse_iterator(firstGreater), end = std::make_reverse_iterator(Offsets.begin()); i != end; ++i) { + const TOffsetsInfo& info = *i; + for (const TOffsetsInfo::TPartitionOffsetInfo& pi : info.PartitionOffsets) { + if (!ActualPartitionActor(pi.Sender)) { + continue; + } + const auto partitionKey = std::make_pair(pi.Topic, pi.Partition); + if (!processedPartitions.insert(partitionKey).second) { + continue; // already processed + } + const auto partitionIt = Partitions.find(partitionKey); + if (partitionIt != Partitions.end() && !partitionIt->second.Released) { + ctx.Send(partitionIt->second.Actor, new TEvPQProxy::TEvCommit(readId, pi.Offset)); + partitionIt->second.Commits.push_back(readId); + ++commit.Partitions; + } + } + } + Offsets.erase(Offsets.begin(), firstGreater); + + AnswerForCommitsIfCan(ctx); //Could be done if all partitions are lost because of balancer dead +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext&) { + ProcessAuth(ev->Get()->Auth); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const TActorContext& ctx) { + + if (!ClientsideLocksAllowed) { + CloseSession("Partition status available only when ClientsideLocksAllowed is true", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + auto it = Partitions.find(std::make_pair(ev->Get()->Topic, ev->Get()->Partition)); + + if (it == Partitions.end() || it->second.Releasing || it->second.LockGeneration != ev->Get()->Generation) { + //do nothing - already released partition + LOG_WARN_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got NOTACTUAL get status request from client for " << ev->Get()->Topic + << ":" << ev->Get()->Partition << " generation " << ev->Get()->Generation); + return; + } + + //proxy request to partition - allow initing + //TODO: add here VerifyReadOffset too and check it against Committed position + ctx.Send(it->second.Actor, new TEvPQProxy::TEvGetStatus(ev->Get()->Topic, ev->Get()->Partition, ev->Get()->Generation)); +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvLocked::TPtr& ev, const TActorContext& ctx) { + + RequestNotChecked = true; + if (!ClientsideLocksAllowed) { + CloseSession("Locked requests are allowed only when ClientsideLocksAllowed is true", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + auto& topic = ev->Get()->Topic; + if (topic.empty()) { + CloseSession("empty topic in start_read request", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + + } + auto it = Partitions.find(std::make_pair(topic, ev->Get()->Partition)); + + if (it == Partitions.end() || it->second.Releasing || it->second.LockGeneration != ev->Get()->Generation) { + //do nothing - already released partition + LOG_WARN_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got NOTACTUAL lock from client for " << topic + << ":" << ev->Get()->Partition << " at offset " << ev->Get()->ReadOffset << " generation " << ev->Get()->Generation); + + return; + } + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got lock from client for " << ev->Get()->Topic + << ":" << ev->Get()->Partition << " at readOffset " << ev->Get()->ReadOffset << " commitOffset " << ev->Get()->CommitOffset << " generation " << ev->Get()->Generation); + + //proxy request to partition - allow initing + //TODO: add here VerifyReadOffset too and check it against Committed position + ctx.Send(it->second.Actor, new TEvPQProxy::TEvLockPartition(ev->Get()->ReadOffset, ev->Get()->CommitOffset, ev->Get()->VerifyReadOffset, true)); +} + +void TReadSessionActor::DropPartitionIfNeeded(THashMap<std::pair<TString, ui32>, TPartitionActorInfo>::iterator it, const TActorContext& ctx) { + if (it->second.Commits.empty() && it->second.Released) { + ctx.Send(it->second.Actor, new TEvents::TEvPoisonPill()); + bool res = ActualPartitionActors.erase(it->second.Actor); + Y_ABORT_UNLESS(res); + + if (--NumPartitionsFromTopic[it->second.Converter->GetInternalName()] == 0) { + bool res = TopicCounters.erase(it->second.Converter->GetInternalName()); + Y_ABORT_UNLESS(res); + } + + if (SessionsActive) { + PartsPerSession.DecFor(Partitions.size(), 1); + } + Partitions.erase(it); + if (SessionsActive) { + PartsPerSession.IncFor(Partitions.size(), 1); + } + } +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const TActorContext& ctx) { + + Y_ABORT_UNLESS(!CommitsDisabled); + + if (!ActualPartitionActor(ev->Sender)) + return; + + ui64 readId = ev->Get()->ReadId; + + auto it = Commits.find(readId); + Y_ABORT_UNLESS(it != Commits.end()); + --it->second.Partitions; + + auto jt = Partitions.find(std::make_pair(ev->Get()->Topic->GetClientsideName(), ev->Get()->Partition)); + Y_ABORT_UNLESS(jt != Partitions.end()); + Y_ABORT_UNLESS(!jt->second.Commits.empty() && jt->second.Commits.front() == readId); + jt->second.Commits.pop_front(); + + DropPartitionIfNeeded(jt, ctx); + + AnswerForCommitsIfCan(ctx); + + MakeCommit(ctx); +} + +void TReadSessionActor::AnswerForCommitsIfCan(const TActorContext& ctx) { + while (!Commits.empty() && Commits.begin()->second.Partitions == 0) { + auto it = Commits.begin(); + ui64 readId = it->first; + TReadResponse result; + for (ui64 i = it->second.StartReadId; i <= readId; ++i){ + result.MutableCommit()->AddCookie(i); + } + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " replying for commits from " << it->second.StartReadId + << " to " << readId); + ui64 diff = result.ByteSize(); + BytesInflight_ += diff; + if (BytesInflight) (*BytesInflight) += diff; + Handler->Reply(result); + + ui32 commitDurationMs = (ctx.Now() - it->second.StartTime).MilliSeconds(); + CommitLatency.IncFor(commitDurationMs, 1); + if (commitDurationMs >= AppData(ctx)->PQConfig.GetCommitLatencyBigMs()) { + SLIBigLatency.Inc(); + } + Commits.erase(it); + --CommitsInfly; + Y_ABORT_UNLESS(Commits.size() == CommitsInfly); + } +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const TActorContext& ctx) { + + THolder<TEvPQProxy::TEvReadSessionStatusResponse> result(new TEvPQProxy::TEvReadSessionStatusResponse()); + result->Record.SetSession(Session); + result->Record.SetTimestamp(StartTimestamp.MilliSeconds()); + + result->Record.SetClientNode(PeerName); + result->Record.SetProxyNodeId(ctx.SelfID.NodeId()); + + for (auto& p : Partitions) { + auto part = result->Record.AddPartition(); + part->SetTopic(p.first.first); + part->SetPartition(p.first.second); + part->SetAssignId(0); + for (auto& c : NextCommits) { + part->AddNextCommits(c); + } + part->SetReadIdCommitted(ReadIdCommitted); + part->SetLastReadId(ReadIdToResponse - 1); + part->SetTimestampMs(0); + } + + ctx.Send(ev->Sender, result.Release()); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvReadInit::TPtr& ev, const TActorContext& ctx) { + + THolder<TEvPQProxy::TEvReadInit> event(ev->Release()); + + if (!Topics.empty()) { + //answer error + CloseSession("got second init request", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + const auto& init = event->Request.GetInit(); + + if (!init.TopicsSize()) { + CloseSession("no topics in init request", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + if (init.GetClientId().empty()) { + CloseSession("no clientId in init request", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + if (init.GetProxyCookie() != ctx.SelfID.NodeId() && init.GetProxyCookie() != MAGIC_COOKIE_VALUE) { + CloseSession("you must perform ChooseProxy request at first and go to ProxyName server with ProxyCookie", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + // ToDo[migration] - consider separate consumer conversion logic - ? + if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + ClientPath = init.GetClientId(); + ExternalClientId = ClientPath; + InternalClientId = ConvertNewConsumerName(init.GetClientId()); + } else { + ClientPath = StripLeadSlash(MakeConsumerPath(init.GetClientId())); + ExternalClientId = ClientPath; + InternalClientId = ConvertNewConsumerName(init.GetClientId()); + } + + Auth = event->Request.GetCredentials(); + event->Request.ClearCredentials(); + Y_PROTOBUF_SUPPRESS_NODISCARD Auth.SerializeToString(&AuthStr); + TStringBuilder session; + session << ExternalClientId << "_" << ctx.SelfID.NodeId() << "_" << Cookie << "_" << TAppData::RandomProvider->GenRand64(); + Session = session; + ProtocolVersion = init.GetProtocolVersion(); + CommitsDisabled = init.GetCommitsDisabled(); + + if (ProtocolVersion >= NPersQueue::TReadRequest::ReadParamsInInit) { + ReadSettingsInited = true; + MaxReadMessagesCount = NormalizeMaxReadMessagesCount(init.GetMaxReadMessagesCount()); + MaxReadSize = NormalizeMaxReadSize(init.GetMaxReadSize()); + MaxReadPartitionsCount = NormalizeMaxReadPartitionsCount(init.GetMaxReadPartitionsCount()); + MaxTimeLagMs = init.GetMaxTimeLagMs(); + ReadTimestampMs = init.GetReadTimestampMs(); + } + + PeerName = event->PeerName; + Database = event->Database; + + ReadOnlyLocal = init.GetReadOnlyLocal(); + + if (init.GetCommitIntervalMs()) { + CommitInterval = Min(CommitInterval, TDuration::MilliSeconds(init.GetCommitIntervalMs())); + } + + for (ui32 i = 0; i < init.PartitionGroupsSize(); ++i) { + Groups.push_back(init.GetPartitionGroups(i)); + } + THashSet<TString> topicsToResolve; + for (ui32 i = 0; i < init.TopicsSize(); ++i) { + const auto& t = init.GetTopics(i); + + if (t.empty()) { + CloseSession("empty topic in init request", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + topicsToResolve.insert(t); + } + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " init: " << event->Request << " from " << PeerName); + + ClientsideLocksAllowed = init.GetClientsideLocksAllowed(); + BalanceRightNow = init.GetBalancePartitionRightNow() || CommitsDisabled; + + if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + SetupCounters(); + } + + if (Auth.GetCredentialsCase() == NPersQueueCommon::TCredentials::CREDENTIALS_NOT_SET) { + LOG_WARN_S(ctx, NKikimrServices::PQ_READ_PROXY, "session without AuthInfo : " << ExternalClientId << " from " << PeerName); + if (SessionsWithoutAuth) { + ++(*SessionsWithoutAuth); + } + if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { + CloseSession("Unauthenticated access is forbidden, please provide credentials", NPersQueue::NErrorCode::ACCESS_DENIED, ctx); + return; + } + } + TopicsList = TopicsHandler.GetReadTopicsList( + topicsToResolve, ReadOnlyLocal, Database + ); + if (!TopicsList.IsValid) { + return CloseSession( + TopicsList.Reason, + NPersQueue::NErrorCode::BAD_REQUEST, ctx + ); + } + SendAuthRequest(ctx); + + auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); + Aggr = {{{{"Account", ClientPath.substr(0, ClientPath.find("/"))}}, {"total"}}}; + SLITotal = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsTotal"}, true, "sensor", false); + SLIErrors = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsError"}, true, "sensor", false); + + SLITotal.Inc(); +} + + +void TReadSessionActor::SendAuthRequest(const TActorContext& ctx) { + AuthInitActor = {}; + AuthInflight = true; + + if (Auth.GetCredentialsCase() == NPersQueueCommon::TCredentials::CREDENTIALS_NOT_SET) { + Token = nullptr; + CreateInitAndAuthActor(ctx); + return; + } + auto database = Database.empty() ? NKikimr::NPQ::GetDatabaseFromConfig(AppData(ctx)->PQConfig) : Database; + Y_ABORT_UNLESS(TopicsList.IsValid); + TVector<TDiscoveryConverterPtr> topics; + for(const auto& t : TopicsList.Topics) { + if (topics.size() >= 10) { + break; + } + topics.push_back(t.second); + } + ctx.Send(PqMetaCache, new TEvDescribeTopicsRequest(topics, false)); +} + + + +void TReadSessionActor::HandleDescribeTopicsResponse(TEvDescribeTopicsResponse::TPtr& ev, const TActorContext& ctx) { + TString dbId, folderId; + for (const auto& entry : ev->Get()->Result->ResultSet) { + if (!entry.PQGroupInfo) + continue; + auto& pqDescr = entry.PQGroupInfo->Description; + dbId = pqDescr.GetPQTabletConfig().GetYdbDatabaseId(); + folderId = pqDescr.GetPQTabletConfig().GetYcFolderId(); + break; + } + + auto entries = NKikimr::NGRpcProxy::V1::GetTicketParserEntries(dbId, folderId); + + TString ticket; + switch (Auth.GetCredentialsCase()) { + case NPersQueueCommon::TCredentials::kTvmServiceTicket: + ticket = Auth.GetTvmServiceTicket(); + break; + case NPersQueueCommon::TCredentials::kOauthToken: + ticket = Auth.GetOauthToken(); + break; + default: + CloseSession("Unknown Credentials case", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + ctx.Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ + .Database = Database, + .Ticket = ticket, + .PeerName = PeerName, + .Entries = entries + })); +} + +void TReadSessionActor::CreateInitAndAuthActor(const TActorContext& ctx) { + auto database = Database.empty() ? NKikimr::NPQ::GetDatabaseFromConfig(AppData(ctx)->PQConfig) : Database; + AuthInitActor = ctx.Register(new V1::TReadInitAndAuthActor( + ctx, ctx.SelfID, InternalClientId, Cookie, Session, PqMetaCache, NewSchemeCache, Counters, Token, + TopicsList, TopicsHandler.GetLocalCluster() + )); +} + +void TReadSessionActor::Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev, const TActorContext& ctx) { + TString ticket = ev->Get()->Ticket; + TString maskedTicket = ticket.size() > 5 ? (ticket.substr(0, 5) + "***" + ticket.substr(ticket.size() - 5)) : "***"; + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "CheckACL ticket " << maskedTicket << " got result from TICKET_PARSER response: error: " << ev->Get()->Error << " user: " + << (ev->Get()->Error.empty() ? ev->Get()->Token->GetUserSID() : "")); + + if (!ev->Get()->Error.empty()) { + CloseSession(TStringBuilder() << "Ticket parsing error: " << ev->Get()->Error, NPersQueue::NErrorCode::ACCESS_DENIED, ctx); + return; + } + Token = ev->Get()->Token; + CreateInitAndAuthActor(ctx); +} + + +void TReadSessionActor::RegisterSession(const TActorId& pipe, const TString& topic, const TActorContext& ctx) { + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " register session to " << topic); + THolder<TEvPersQueue::TEvRegisterReadSession> request; + request.Reset(new TEvPersQueue::TEvRegisterReadSession); + auto& req = request->Record; + req.SetSession(Session); + req.SetClientNode(PeerName); + ActorIdToProto(pipe, req.MutablePipeClient()); + req.SetClientId(InternalClientId); + + for (ui32 i = 0; i < Groups.size(); ++i) { + req.AddGroups(Groups[i]); + } + + NTabletPipe::SendData(ctx, pipe, request.Release()); +} + +void TReadSessionActor::RegisterSessions(const TActorContext& ctx) { + InitDone = true; + + for (auto& t : Topics) { + auto& topic = t.first; + RegisterSession(t.second.PipeClient, topic, ctx); + NumPartitionsFromTopic[topic] = 0; + } +} + + +void TReadSessionActor::SetupCounters() +{ + if (SessionsCreated) { + return; + } + + auto subGroup = GetServiceCounters(Counters, "pqproxy|readSession")->GetSubgroup("Client", InternalClientId)->GetSubgroup("ConsumerPath", ClientPath); + SessionsCreated = subGroup->GetExpiringCounter("SessionsCreated", true); + SessionsActive = subGroup->GetExpiringCounter("SessionsActive", false); + SessionsWithoutAuth = subGroup->GetExpiringCounter("WithoutAuth", true); + SessionsWithOldBatchingVersion = subGroup->GetExpiringCounter("SessionsWithOldBatchingVersion", true); // monitoring to ensure that old version is not used anymore + Errors = subGroup->GetExpiringCounter("Errors", true); + PipeReconnects = subGroup->GetExpiringCounter("PipeReconnects", true); + + BytesInflight = subGroup->GetExpiringCounter("BytesInflight", false); + + PartsPerSession = NKikimr::NPQ::TPercentileCounter(subGroup->GetSubgroup("sensor", "PartsPerSession"), {}, {}, "Count", + TVector<std::pair<ui64, TString>>{{1, "1"}, {2, "2"}, {5, "5"}, + {10, "10"}, {20, "20"}, {50, "50"}, {70, "70"}, + {100, "100"}, {150, "150"}, {300,"300"}, {99999999, "99999999"}}, false); + + ++(*SessionsCreated); + ++(*SessionsActive); + PartsPerSession.IncFor(Partitions.size(), 1); //for 0 + + if (ProtocolVersion < NPersQueue::TReadRequest::Batching) { + ++(*SessionsWithOldBatchingVersion); + } +} + + +void TReadSessionActor::SetupTopicCounters(const TTopicConverterPtr& topic) +{ + auto& topicCounters = TopicCounters[topic->GetInternalName()]; + auto subGroup = GetServiceCounters(Counters, "pqproxy|readSession"); +//client/consumerPath Account/Producer OriginDC Topic/TopicPath + + auto aggr = GetLabels(topic); + TVector<std::pair<TString, TString>> cons = {{"Client", InternalClientId}, {"ConsumerPath", ClientPath}}; + + topicCounters.PartitionsLocked = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsLocked"}, true); + topicCounters.PartitionsReleased = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsReleased"}, true); + topicCounters.PartitionsToBeReleased = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsToBeReleased"}, false); + topicCounters.PartitionsToBeLocked = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsToBeLocked"}, false); + topicCounters.PartitionsInfly = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsInfly"}, false); + topicCounters.Errors = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsErrors"}, true); + topicCounters.Commits = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"Commits"}, true); + topicCounters.WaitsForData = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"WaitsForData"}, true); +} + +void TReadSessionActor::SetupTopicCounters(const TTopicConverterPtr& topic, const TString& cloudId, + const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId) +{ + auto& topicCounters = TopicCounters[topic->GetInternalName()]; + auto subGroup = NPersQueue::GetCountersForTopic(Counters, isServerless); +//client/consumerPath Account/Producer OriginDC Topic/TopicPath + auto subgroups = GetSubgroupsForTopic(topic, cloudId, dbId, dbPath, folderId); + subgroups.push_back({"consumer", ClientPath}); + + topicCounters.PartitionsLocked = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.started"}, true, "name"); + topicCounters.PartitionsReleased = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.stopped"}, true, "name"); + topicCounters.PartitionsToBeReleased = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.stopping_count"}, false, "name"); + topicCounters.PartitionsToBeLocked = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.starting_count"}, false, "name"); + topicCounters.PartitionsInfly = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.count"}, false, "name"); + topicCounters.Errors = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.errors"}, true, "name"); + topicCounters.Commits = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.commits"}, true, "name"); +} + +void TReadSessionActor::Handle(V1::TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) { + + LastACLCheckTimestamp = ctx.Now(); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth ok, got " << ev->Get()->TopicAndTablets.size() << " topics, init done " << InitDone); + + AuthInitActor = TActorId(); + AuthInflight = false; + + if (!InitDone) { + + ui32 initBorder = AppData(ctx)->PQConfig.GetReadInitLatencyBigMs(); + ui32 readBorder = AppData(ctx)->PQConfig.GetReadLatencyBigMs(); + ui32 readBorderFromDisk = AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs(); + + auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); + InitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "ReadInit", initBorder, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + CommitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "Commit", AppData(ctx)->PQConfig.GetCommitLatencyBigMs(), {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + SLIBigLatency = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsBigLatency"}, true, "sensor", false); + ReadLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "Read", readBorder, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + ReadLatencyFromDisk = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "ReadFromDisk", readBorderFromDisk, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + SLIBigReadLatency = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"ReadBigLatency"}, true, "sensor", false); + ReadsTotal = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"ReadsTotal"}, true, "sensor", false); + + ui32 initDurationMs = (ctx.Now() - StartTime).MilliSeconds(); + InitLatency.IncFor(initDurationMs, 1); + if (initDurationMs >= initBorder) { + SLIBigLatency.Inc(); + } + + + TReadResponse result; + result.MutableInit()->SetSessionId(Session); + ui64 diff = result.ByteSize(); + BytesInflight_ += diff; + if (BytesInflight) (*BytesInflight) += diff; + + Handler->Reply(result); + + Handler->ReadyForNextRead(); + + Y_ABORT_UNLESS(!BalancersInitStarted); + BalancersInitStarted = true; + + for (auto& [name, t] : ev->Get()->TopicAndTablets) { + auto& topicHolder = Topics[t.TopicNameConverter->GetInternalName()]; + topicHolder.TabletID = t.TabletID; + topicHolder.CloudId = t.CloudId; + topicHolder.DbId = t.DbId; + topicHolder.DbPath = t.DbPath; + topicHolder.IsServerless = t.IsServerless; + topicHolder.FolderId = t.FolderId; + topicHolder.FullConverter = t.TopicNameConverter; + FullPathToConverter[t.TopicNameConverter->GetPrimaryPath()] = t.TopicNameConverter; + const auto& second = t.TopicNameConverter->GetSecondaryPath(); + if (!second.empty()) { + FullPathToConverter[second] = t.TopicNameConverter; + } + } + + for (auto& t : Topics) { + NTabletPipe::TClientConfig clientConfig; + + clientConfig.CheckAliveness = false; + + clientConfig.RetryPolicy = RetryPolicyForPipes; + t.second.PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, t.second.TabletID, clientConfig)); + } + + RegisterSessions(ctx); + + ctx.Schedule(Min(CommitInterval, CHECK_ACL_DELAY), new TEvents::TEvWakeup()); + } else { + for (auto& [name, t] : ev->Get()->TopicAndTablets) { + if (Topics.find(t.TopicNameConverter->GetInternalName()) == Topics.end()) { + CloseSession(TStringBuilder() << "list of topics changed - new topic '" << + t.TopicNameConverter->GetInternalName() << "' found", + NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + } + } +} + + +void TReadSessionActor::Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { + + auto& record = ev->Get()->Record; + Y_ABORT_UNLESS(record.GetSession() == Session); + Y_ABORT_UNLESS(record.GetClientId() == InternalClientId); + + TActorId pipe = ActorIdFromProto(record.GetPipeClient()); + auto path = record.GetPath(); + if (path.empty()) { + path = record.GetTopic(); + } + + auto converterIter = FullPathToConverter.find(NPersQueue::NormalizeFullPath(path)); + if (converterIter.IsEnd()) { + LOG_ALERT_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " ignored ev lock for event = " << record.ShortDebugString() << " path not recognized" + ); + CloseSession( + TStringBuilder() << "Internal server error, cannot parse lock event: " << record.ShortDebugString() << ", reason: topic not found", + NPersQueue::NErrorCode::ERROR, ctx + ); + return; + } + //auto topic = converterIter->second->GetClientsideName(); + auto intName = converterIter->second->GetInternalName(); + Y_ABORT_UNLESS(!intName.empty()); + auto jt = Topics.find(intName); + + if (jt == Topics.end() || pipe != jt->second.PipeClient) { //this is message from old version of pipe + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " ignored ev lock for topic = " << converterIter->second->GetPrintableString() + << " path recognized, but topic is unknown, this is unexpected" + ); + return; + } + // ToDo[counters] + if (NumPartitionsFromTopic[intName]++ == 0) { + if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + SetupTopicCounters(converterIter->second, jt->second.CloudId, jt->second.DbId, jt->second.DbPath, jt->second.IsServerless, jt->second.FolderId); + } else { + SetupTopicCounters(converterIter->second); + } + } + + auto it = TopicCounters.find(intName); + Y_ABORT_UNLESS(it != TopicCounters.end()); + + IActor* partitionActor = new TPartitionActor( + ctx.SelfID, InternalClientId, Cookie, Session, record.GetGeneration(), + record.GetStep(), jt->second.FullConverter, record.GetPartition(), record.GetTabletId(), it->second, + ClientDC + ); + + TActorId actorId = ctx.Register(partitionActor); + if (SessionsActive) { + PartsPerSession.DecFor(Partitions.size(), 1); + } + Y_ABORT_UNLESS(record.GetGeneration() > 0); + //Partitions use clientside name ! + auto pp = Partitions.insert({ + std::make_pair(jt->second.FullConverter->GetClientsideName(), record.GetPartition()), + TPartitionActorInfo{actorId, (((ui64)record.GetGeneration()) << 32) + record.GetStep(), jt->second.FullConverter} + }); + Y_ABORT_UNLESS(pp.second); + if (SessionsActive) { + PartsPerSession.IncFor(Partitions.size(), 1); + } + + bool res = ActualPartitionActors.insert(actorId).second; + Y_ABORT_UNLESS(res); + + it->second.PartitionsLocked.Inc(); + it->second.PartitionsInfly.Inc(); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " lock: " << record); + + ctx.Send(actorId, new TEvPQProxy::TEvLockPartition(0, 0, false, !ClientsideLocksAllowed)); +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const TActorContext&) { + if (!ActualPartitionActor(ev->Sender)) + return; + + auto& evTopic = ev->Get()->Topic; + auto it = Partitions.find(std::make_pair(evTopic->GetClientsideName(), ev->Get()->Partition)); + Y_ABORT_UNLESS(it != Partitions.end()); + Y_ABORT_UNLESS(it->second.LockGeneration); + + if (it->second.Releasing) //lock request for already released partition - ignore + return; + + if (ev->Get()->Init) { + Y_ABORT_UNLESS(!it->second.LockSent); + + it->second.LockSent = true; + auto topicIter = Topics.find(evTopic->GetInternalName()); + Y_ABORT_UNLESS(topicIter != Topics.end()); + Y_ABORT_UNLESS(ClientsideLocksAllowed); + TReadResponse result; + auto lock = result.MutableLock(); + lock->SetTopic(topicIter->second.FullConverter->GetClientsideName()); + lock->SetPartition(ev->Get()->Partition); + lock->SetReadOffset(ev->Get()->Offset); + lock->SetEndOffset(ev->Get()->EndOffset); + lock->SetGeneration(it->second.LockGeneration); + auto jt = PartitionToReadResponse.find(it->second.Actor); + if (jt == PartitionToReadResponse.end()) { + ui64 diff = result.ByteSize(); + BytesInflight_ += diff; + if (BytesInflight) (*BytesInflight) += diff; + Handler->Reply(result); + } else { + jt->second->ControlMessages.push_back(result); + } + } else { + Y_ABORT_UNLESS(it->second.LockSent); + TReadResponse result; + auto status = result.MutablePartitionStatus(); + status->SetTopic(ev->Get()->Topic->GetClientsideName()); + status->SetPartition(ev->Get()->Partition); + status->SetEndOffset(ev->Get()->EndOffset); + status->SetGeneration(it->second.LockGeneration); + status->SetCommittedOffset(ev->Get()->Offset); + status->SetWriteWatermarkMs(ev->Get()->WriteTimestampEstimateMs); + auto jt = PartitionToReadResponse.find(it->second.Actor); + if (jt == PartitionToReadResponse.end()) { + ui64 diff = result.ByteSize(); + BytesInflight_ += diff; + if (BytesInflight) (*BytesInflight) += diff; + Handler->Reply(result); + } else { + jt->second->ControlMessages.push_back(result); + } + + } +} + +void TReadSessionActor::Handle(TEvPersQueue::TEvError::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->Record.GetDescription(), ev->Get()->Record.GetCode(), ctx); +} + + +void TReadSessionActor::Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const TActorContext& ctx) { + auto& record = ev->Get()->Record; + Y_ABORT_UNLESS(record.GetSession() == Session); + Y_ABORT_UNLESS(record.GetClientId() == InternalClientId); + auto topic = record.GetPath(); + if (topic.empty()) { + topic = record.GetTopic(); + } + ui32 group = record.HasGroup() ? record.GetGroup() : 0; + + auto converterIter = FullPathToConverter.find(NPersQueue::NormalizeFullPath(topic)); + if (converterIter.IsEnd()) { + LOG_ALERT_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Failed to parse balancer response: " << record.ShortDebugString()); + CloseSession( + TStringBuilder() << "Internal server error, cannot parse release event: " << record.ShortDebugString() << ", path not recognized", + NPersQueue::NErrorCode::ERROR, ctx + ); + return; + } + auto name = converterIter->second->GetInternalName(); + auto clientName = converterIter->second->GetClientsideName(); + + auto it = Topics.find(name); + Y_ABORT_UNLESS(it != Topics.end()); + + TActorId pipe = ActorIdFromProto(record.GetPipeClient()); + + if (pipe != it->second.PipeClient) { //this is message from old version of pipe + return; + } + + for (ui32 c = 0; c < record.GetCount(); ++c) { + Y_ABORT_UNLESS(!Partitions.empty()); + + TActorId actorId = TActorId{}; + auto jt = Partitions.begin(); + ui32 i = 0; + for (auto it = Partitions.begin(); it != Partitions.end(); ++it) { + if (it->first.first == clientName && !it->second.Releasing && (group == 0 || it->first.second + 1 == group)) { + ++i; + if (rand() % i == 0) { //will lead to 1/n probability for each of n partitions + actorId = it->second.Actor; + jt = it; + } + } + } + Y_ABORT_UNLESS(actorId); + + { + auto it = TopicCounters.find(name); + Y_ABORT_UNLESS(it != TopicCounters.end()); + it->second.PartitionsToBeReleased.Inc(); + } + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " releasing " << jt->first.first << ":" << jt->first.second); + jt->second.Releasing = true; + + ctx.Send(actorId, new TEvPQProxy::TEvReleasePartition()); + if (ClientsideLocksAllowed && jt->second.LockSent && !jt->second.Reading) { //locked and no active reads + if (!ProcessReleasePartition(jt, BalanceRightNow, false, ctx)) { // returns false if actor died + return; + } + } + } + AnswerForCommitsIfCan(ctx); // in case of killing partition +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx) { + if (!ActualPartitionActor(ev->Sender)) + return; + + const auto& topic = ev->Get()->Topic; + const ui32 partition = ev->Get()->Partition; + + auto jt = Partitions.find(std::make_pair(topic->GetClientsideName(), partition)); + Y_ABORT_UNLESS(jt != Partitions.end(), "session %s topic %s part %u", Session.c_str(), topic->GetInternalName().c_str(), partition); + Y_ABORT_UNLESS(jt->second.Releasing); + jt->second.Released = true; + + { + auto it = TopicCounters.find(topic->GetInternalName()); + Y_ABORT_UNLESS(it != TopicCounters.end()); + it->second.PartitionsReleased.Inc(); + it->second.PartitionsInfly.Dec(); + it->second.PartitionsToBeReleased.Dec(); + + } + + InformBalancerAboutRelease(jt, ctx); + + DropPartitionIfNeeded(jt, ctx); +} + +void TReadSessionActor::InformBalancerAboutRelease(const THashMap<std::pair<TString, ui32>, TPartitionActorInfo>::iterator& it, const TActorContext& ctx) { + + THolder<TEvPersQueue::TEvPartitionReleased> request; + request.Reset(new TEvPersQueue::TEvPartitionReleased); + auto& req = request->Record; + + auto jt = Topics.find(it->second.Converter->GetInternalName()); + Y_ABORT_UNLESS(jt != Topics.end()); + + req.SetSession(Session); + ActorIdToProto(jt->second.PipeClient, req.MutablePipeClient()); + req.SetClientId(InternalClientId); + req.SetTopic(it->first.first); + req.SetPartition(it->first.second); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " released: " << it->first.first << ":" << it->first.second); + + NTabletPipe::SendData(ctx, jt->second.PipeClient, request.Release()); +} + + +void TReadSessionActor::CloseSession(const TString& errorReason, const NPersQueue::NErrorCode::EErrorCode errorCode, const NActors::TActorContext& ctx) { + + if (errorCode != NPersQueue::NErrorCode::OK) { + + if (InternalErrorCode(errorCode)) { + SLIErrors.Inc(); + } + + if (Errors) { + ++(*Errors); + } else { + if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + ++(*GetServiceCounters(Counters, "pqproxy|readSession")->GetCounter("Errors", true)); + } + } + + TReadResponse result; + + auto error = result.MutableError(); + error->SetDescription(errorReason); + error->SetCode(errorCode); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed with error reason: " << errorReason); + if (!Handler->IsShuttingDown()) { + ui64 diff = result.ByteSize(); + BytesInflight_ += diff; + if (BytesInflight) (*BytesInflight) += diff; + Handler->Reply(result); + } else { + LOG_WARN_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " GRps is shutting dows, skip reply"); + } + } else { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed"); + } + + Die(ctx); +} + + +void TReadSessionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + TEvTabletPipe::TEvClientConnected *msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { + if (msg->Dead) { + CloseSession(TStringBuilder() << "one of topics is deleted, tablet " << msg->TabletId, NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + //TODO: remove it + CloseSession(TStringBuilder() << "unable to connect to one of topics, tablet " << msg->TabletId, NPersQueue::NErrorCode::ERROR, ctx); + return; + + const bool isAlive = ProcessBalancerDead(msg->TabletId, ctx); // returns false if actor died + Y_UNUSED(isAlive); + return; + } +} + +bool TReadSessionActor::ActualPartitionActor(const TActorId& part) { + return ActualPartitionActors.contains(part); +} + + +bool TReadSessionActor::ProcessReleasePartition(const THashMap<std::pair<TString, ui32>, TPartitionActorInfo>::iterator& it, + bool kill, bool couldBeReads, const TActorContext& ctx) +{ + //inform client + if (ClientsideLocksAllowed && it->second.LockSent) { + TReadResponse result; + result.MutableRelease()->SetTopic(it->first.first); + result.MutableRelease()->SetPartition(it->first.second); + result.MutableRelease()->SetCanCommit(!kill); + result.MutableRelease()->SetGeneration(it->second.LockGeneration); + auto jt = PartitionToReadResponse.find(it->second.Actor); + if (jt == PartitionToReadResponse.end()) { + ui64 diff = result.ByteSize(); + BytesInflight_ += diff; + if (BytesInflight) (*BytesInflight) += diff; + Handler->Reply(result); + } else { + jt->second->ControlMessages.push_back(result); + } + it->second.LockGeneration = 0; + it->second.LockSent = false; + } + + if (!kill) { + return true; + } + + { + auto jt = TopicCounters.find(it->second.Converter->GetInternalName()); + Y_ABORT_UNLESS(jt != TopicCounters.end()); + jt->second.PartitionsReleased.Inc(); + jt->second.PartitionsInfly.Dec(); + if (!it->second.Released && it->second.Releasing) { + jt->second.PartitionsToBeReleased.Dec(); + } + } + + //process commits + for (auto& c : it->second.Commits) { + auto kt = Commits.find(c); + Y_ABORT_UNLESS(kt != Commits.end()); + --kt->second.Partitions; + } + it->second.Commits.clear(); + + Y_ABORT_UNLESS(couldBeReads || !it->second.Reading); + //process reads + TFormedReadResponse::TPtr formedResponseToAnswer; + if (it->second.Reading) { + const auto readIt = PartitionToReadResponse.find(it->second.Actor); + Y_ABORT_UNLESS(readIt != PartitionToReadResponse.end()); + if (--readIt->second->RequestsInfly == 0) { + formedResponseToAnswer = readIt->second; + } + } + + InformBalancerAboutRelease(it, ctx); + + it->second.Released = true; //to force drop + DropPartitionIfNeeded(it, ctx); //partition will be dropped + + if (formedResponseToAnswer) { + return ProcessAnswer(ctx, formedResponseToAnswer); // returns false if actor died + } + return true; +} + + +bool TReadSessionActor::ProcessBalancerDead(const ui64 tablet, const TActorContext& ctx) { + for (auto& t : Topics) { + if (t.second.TabletID == tablet) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " balancer for topic " << t.first << " is dead, restarting all from this topic"); + + //Drop all partitions from this topic + for (auto it = Partitions.begin(); it != Partitions.end();) { + if (it->second.Converter->GetInternalName() == t.first) { //partition from this topic + // kill actor + auto jt = it; + ++it; + if (!ProcessReleasePartition(jt, true, true, ctx)) { // returns false if actor died + return false; + } + } else { + ++it; + } + } + + AnswerForCommitsIfCan(ctx); + + //reconnect pipe + NTabletPipe::TClientConfig clientConfig; + clientConfig.CheckAliveness = false; + clientConfig.RetryPolicy = RetryPolicyForPipes; + t.second.PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, t.second.TabletID, clientConfig)); + if (InitDone) { + if (PipeReconnects) { + ++(*PipeReconnects); + ++(*Errors); + } + + RegisterSession(t.second.PipeClient, t.first, ctx); + } + } + } + return true; +} + + +void TReadSessionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + const bool isAlive = ProcessBalancerDead(ev->Get()->TabletId, ctx); // returns false if actor died + Y_UNUSED(isAlive); +} + +void TReadSessionActor::ProcessAuth(const NPersQueueCommon::TCredentials& auth) { + TString tmp; + Y_PROTOBUF_SUPPRESS_NODISCARD auth.SerializeToString(&tmp); + if (auth.GetCredentialsCase() != NPersQueueCommon::TCredentials::CREDENTIALS_NOT_SET && tmp != AuthStr) { + Auth = auth; + AuthStr = tmp; + ForceACLCheck = true; + } +} + +void TReadSessionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx) { + RequestNotChecked = true; + + THolder<TEvPQProxy::TEvRead> event(ev->Release()); + + Handler->ReadyForNextRead(); + + + ProcessAuth(event->Request.GetCredentials()); + event->Request.ClearCredentials(); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got read request: " << event->Request.GetRead() << " with guid: " << event->Guid); + + Reads.emplace_back(event.Release()); + + const bool isAlive = ProcessReads(ctx); // returns false if actor died + Y_UNUSED(isAlive); +} + + +i64 TReadSessionActor::TFormedReadResponse::ApplyResponse(NPersQueue::TReadResponse&& resp) { + Y_ABORT_UNLESS(resp.GetBatchedData().PartitionDataSize() == 1); + Response.MutableBatchedData()->AddPartitionData()->Swap(resp.MutableBatchedData()->MutablePartitionData(0)); + i64 prev = Response.ByteSize(); + std::swap<i64>(prev, ByteSize); + return ByteSize - prev; +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvReadResponse::TPtr& ev, const TActorContext& ctx) { + TActorId sender = ev->Sender; + if (!ActualPartitionActor(sender)) + return; + + THolder<TEvPQProxy::TEvReadResponse> event(ev->Release()); + + Y_ABORT_UNLESS(event->Response.GetBatchedData().GetCookie() == 0); // cookie is not assigned + Y_ABORT_UNLESS(event->Response.GetBatchedData().PartitionDataSize() == 1); + + const TString topic = event->Response.GetBatchedData().GetPartitionData(0).GetTopic(); + const ui32 partition = event->Response.GetBatchedData().GetPartitionData(0).GetPartition(); + std::pair<TString, ui32> key(topic, partition); + // Topic is expected to have clientSide name + const auto partitionIt = Partitions.find(key); + Y_ABORT_UNLESS(partitionIt != Partitions.end()); + Y_ABORT_UNLESS(partitionIt->second.Reading); + partitionIt->second.Reading = false; + + auto it = PartitionToReadResponse.find(sender); + Y_ABORT_UNLESS(it != PartitionToReadResponse.end()); + + TFormedReadResponse::TPtr formedResponse = it->second; + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " read done guid " << formedResponse->Guid + << " " << key.first << ":" << key.second + << " size " << event->Response.ByteSize()); + + const i64 diff = formedResponse->ApplyResponse(std::move(event->Response)); + if (event->FromDisk) { + formedResponse->FromDisk = true; + } + formedResponse->WaitQuotaTime = Max(formedResponse->WaitQuotaTime, event->WaitQuotaTime); + --formedResponse->RequestsInfly; + formedResponse->Offsets.PartitionOffsets.emplace_back(sender, topic, partition, event->NextReadOffset); + + BytesInflight_ += diff; + if (BytesInflight) (*BytesInflight) += diff; + + if (ClientsideLocksAllowed && partitionIt->second.LockSent && partitionIt->second.Releasing) { //locked and need to be released + if (!ProcessReleasePartition(partitionIt, BalanceRightNow, false, ctx)) { // returns false if actor died + return; + } + } + AnswerForCommitsIfCan(ctx); // in case of killing partition + + if (formedResponse->RequestsInfly == 0) { + const bool isAlive = ProcessAnswer(ctx, formedResponse); // returns false if actor died + Y_UNUSED(isAlive); + } +} + + +bool TReadSessionActor::ProcessAnswer(const TActorContext& ctx, TFormedReadResponse::TPtr formedResponse) { + ui32 readDurationMs = (ctx.Now() - formedResponse->Start - formedResponse->WaitQuotaTime).MilliSeconds(); + if (formedResponse->FromDisk) { + ReadLatencyFromDisk.IncFor(readDurationMs, 1); + } else { + ReadLatency.IncFor(readDurationMs, 1); + } + if (readDurationMs >= (formedResponse->FromDisk ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs() : AppData(ctx)->PQConfig.GetReadLatencyBigMs())) { + SLIBigReadLatency.Inc(); + } + + Y_ABORT_UNLESS(formedResponse->RequestsInfly == 0); + i64 diff = formedResponse->Response.ByteSize(); + const bool hasMessages = RemoveEmptyMessages(*formedResponse->Response.MutableBatchedData()); + if (hasMessages) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " assign read id " << ReadIdToResponse << " to read request " << formedResponse->Guid); + formedResponse->Response.MutableBatchedData()->SetCookie(ReadIdToResponse); + // reply to client + if (ProtocolVersion < NPersQueue::TReadRequest::Batching) { + ConvertToOldBatch(formedResponse->Response); + } + diff -= formedResponse->Response.ByteSize(); // Bytes will be tracked inside handler + Handler->Reply(formedResponse->Response); + } else { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " empty read result " << formedResponse->Guid << ", start new reading"); + } + + BytesInflight_ -= diff; + if (BytesInflight) (*BytesInflight) -= diff; + + for (auto& r : formedResponse->ControlMessages) { + ui64 diff = r.ByteSize(); + BytesInflight_ += diff; + if (BytesInflight) (*BytesInflight) += diff; + Handler->Reply(r); + } + + for (const TActorId& p : formedResponse->PartitionsTookPartInRead) { + PartitionToReadResponse.erase(p); + } + + // Bring back available partitions. + // If some partition was removed from partitions container, it is not bad because it will be checked during read processing. + AvailablePartitions.insert(formedResponse->PartitionsBecameAvailable.begin(), formedResponse->PartitionsBecameAvailable.end()); + + formedResponse->Offsets.ReadId = ReadIdToResponse; + + RequestedBytes -= formedResponse->RequestedBytes; + + ReadsInfly--; + + if (hasMessages) { + if (!CommitsDisabled) + Offsets.emplace_back(std::move(formedResponse->Offsets)); // even empty responses are needed for correct offsets commit. + ReadIdToResponse++; + } else { + // process new read + NPersQueue::TReadRequest req; + req.MutableRead(); + Reads.emplace_back(new TEvPQProxy::TEvRead(req, formedResponse->Guid)); // Start new reading request with the same guid + } + + return ProcessReads(ctx); // returns false if actor died +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); +} + +void TReadSessionActor::Handle(V1::TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->Reason, NErrorCode::EErrorCode(ev->Get()->ErrorCode - 500000), ctx); +} + +ui32 TReadSessionActor::NormalizeMaxReadMessagesCount(ui32 sourceValue) { + ui32 count = Min<ui32>(sourceValue, Max<i32>()); + if (count == 0) { + count = Max<i32>(); + } + return count; +} + +ui32 TReadSessionActor::NormalizeMaxReadSize(ui32 sourceValue) { + ui32 size = Min<ui32>(sourceValue, MAX_READ_SIZE); + if (size == 0) { + size = MAX_READ_SIZE; + } + return size; +} + +ui32 TReadSessionActor::NormalizeMaxReadPartitionsCount(ui32 sourceValue) { + ui32 maxPartitions = sourceValue; + if (maxPartitions == 0) { + maxPartitions = Max<ui32>(); + } + return maxPartitions; +} + +bool TReadSessionActor::CheckAndUpdateReadSettings(const NPersQueue::TReadRequest::TRead& readRequest) { + if (ReadSettingsInited) { // already updated. Check that settings are not changed. + const bool hasSettings = readRequest.GetMaxCount() + || readRequest.GetMaxSize() + || readRequest.GetPartitionsAtOnce() + || readRequest.GetMaxTimeLagMs() + || readRequest.GetReadTimestampMs(); + if (!hasSettings) { + return true; + } + + const bool settingsChanged = NormalizeMaxReadMessagesCount(readRequest.GetMaxCount()) != MaxReadMessagesCount + || NormalizeMaxReadSize(readRequest.GetMaxSize()) != MaxReadSize + || NormalizeMaxReadPartitionsCount(readRequest.GetPartitionsAtOnce()) != MaxReadPartitionsCount + || readRequest.GetMaxTimeLagMs() != MaxTimeLagMs + || readRequest.GetReadTimestampMs() != ReadTimestampMs; + return !settingsChanged; + } else { + // Update settings for the first time + ReadSettingsInited = true; + MaxReadMessagesCount = NormalizeMaxReadMessagesCount(readRequest.GetMaxCount()); + MaxReadSize = NormalizeMaxReadSize(readRequest.GetMaxSize()); + MaxReadPartitionsCount = NormalizeMaxReadPartitionsCount(readRequest.GetPartitionsAtOnce()); + MaxTimeLagMs = readRequest.GetMaxTimeLagMs(); + ReadTimestampMs = readRequest.GetReadTimestampMs(); + return true; + } +} + +bool TReadSessionActor::ProcessReads(const TActorContext& ctx) { + while (!Reads.empty() && BytesInflight_ + RequestedBytes < MAX_INFLY_BYTES && ReadsInfly < MAX_INFLY_READS) { + const auto& readRequest = Reads.front()->Request.GetRead(); + if (!CheckAndUpdateReadSettings(readRequest)) { + CloseSession("read settings were changed in read request", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return false; + } + + if (Offsets.size() >= AppData(ctx)->PQConfig.GetMaxReadCookies() + 10) { + CloseSession(TStringBuilder() << "got more than " << AppData(ctx)->PQConfig.GetMaxReadCookies() << " uncommitted reads", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return false; + } + + ui32 count = MaxReadMessagesCount; + ui64 size = MaxReadSize; + ui32 maxPartitions = MaxReadPartitionsCount; + ui32 partitionsAsked = 0; + + TFormedReadResponse::TPtr formedResponse = new TFormedReadResponse(Reads.front()->Guid, ctx.Now()); + while (!AvailablePartitions.empty()) { + auto part = *AvailablePartitions.begin(); + AvailablePartitions.erase(AvailablePartitions.begin()); + + auto it = Partitions.find(std::make_pair(part.Topic->GetClientsideName(), part.Partition)); + if (it == Partitions.end() || it->second.Releasing || it->second.Actor != part.Actor) { //this is already released partition + continue; + } + //add this partition to reading + ++partitionsAsked; + + TAutoPtr<TEvPQProxy::TEvRead> read = new TEvPQProxy::TEvRead(Reads.front()->Request, Reads.front()->Guid); + const ui32 ccount = Min<ui32>(part.MsgLag * LAG_GROW_MULTIPLIER, count); + count -= ccount; + const ui64 csize = (ui64)Min<double>(part.SizeLag * LAG_GROW_MULTIPLIER, size); + size -= csize; + + Y_ABORT_UNLESS(csize < Max<i32>()); + auto* readR = read->Request.MutableRead(); + readR->SetMaxCount(ccount); + readR->SetMaxSize(csize); + readR->SetMaxTimeLagMs(MaxTimeLagMs); + readR->SetReadTimestampMs(ReadTimestampMs); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX + << " performing read request: " << (*readR) << " with guid " << read->Guid + << " from " << part.Topic->GetPrintableString() << ", partition:" << part.Partition + << " count " << ccount << " size " << csize + << " partitionsAsked " << partitionsAsked << " maxTimeLag " << MaxTimeLagMs << "ms"); + + + Y_ABORT_UNLESS(!it->second.Reading); + it->second.Reading = true; + formedResponse->PartitionsTookPartInRead.insert(it->second.Actor); + + RequestedBytes += csize; + formedResponse->RequestedBytes += csize; + + ctx.Send(it->second.Actor, read.Release()); + const auto insertResult = PartitionToReadResponse.insert(std::make_pair(it->second.Actor, formedResponse)); + Y_ABORT_UNLESS(insertResult.second); + + if (--maxPartitions == 0 || count == 0 || size == 0) + break; + } + if (partitionsAsked == 0) + break; + ReadsTotal.Inc(); + formedResponse->RequestsInfly = partitionsAsked; + + ReadsInfly++; + + i64 diff = formedResponse->Response.ByteSize(); + BytesInflight_ += diff; + formedResponse->ByteSize = diff; + if (BytesInflight) (*BytesInflight) += diff; + Reads.pop_front(); + } + return true; +} + + +void TReadSessionActor::Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const TActorContext& ctx) { + + if (!ActualPartitionActor(ev->Sender)) + return; + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << ev->Get()->Topic->GetPrintableString() + << " partition:" << ev->Get()->Partition << " ready for read with readOffset " + << ev->Get()->ReadOffset << " endOffset " << ev->Get()->EndOffset << " WTime " + << ev->Get()->WTime << " sizeLag " << ev->Get()->SizeLag); + + const auto it = PartitionToReadResponse.find(ev->Sender); // check whether this partition is taking part in read response + auto& container = it != PartitionToReadResponse.end() ? it->second->PartitionsBecameAvailable : AvailablePartitions; + auto res = container.insert({ev->Get()->Topic, ev->Get()->Partition, ev->Get()->WTime, ev->Get()->SizeLag, + ev->Get()->EndOffset - ev->Get()->ReadOffset, ev->Sender}); + Y_ABORT_UNLESS(res.second); + const bool isAlive = ProcessReads(ctx); // returns false if actor died + Y_UNUSED(isAlive); +} + + +void TReadSessionActor::HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); +} + + +void TReadSessionActor::HandleWakeup(const TActorContext& ctx) { + ctx.Schedule(Min(CommitInterval, CHECK_ACL_DELAY), new TEvents::TEvWakeup()); + MakeCommit(ctx); + if (!AuthInflight && (ForceACLCheck || (ctx.Now() - LastACLCheckTimestamp > TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()) && RequestNotChecked))) { + ForceACLCheck = false; + RequestNotChecked = false; + Y_ABORT_UNLESS(!AuthInitActor); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " checking auth because of timeout"); + + SendAuthRequest(ctx); + } +} + +bool TReadSessionActor::RemoveEmptyMessages(TReadResponse::TBatchedData& data) { + bool hasNonEmptyMessages = false; + auto isMessageEmpty = [&](TReadResponse::TBatchedData::TMessageData& message) -> bool { + if (message.GetData().empty()) { + return true; + } else { + hasNonEmptyMessages = true; + return false; + } + }; + auto batchRemover = [&](TReadResponse::TBatchedData::TBatch& batch) -> bool { + NProtoBuf::RemoveRepeatedFieldItemIf(batch.MutableMessageData(), isMessageEmpty); + return batch.MessageDataSize() == 0; + }; + auto partitionDataRemover = [&](TReadResponse::TBatchedData::TPartitionData& partition) -> bool { + NProtoBuf::RemoveRepeatedFieldItemIf(partition.MutableBatch(), batchRemover); + return partition.BatchSize() == 0; + }; + NProtoBuf::RemoveRepeatedFieldItemIf(data.MutablePartitionData(), partitionDataRemover); + return hasNonEmptyMessages; +} + + +////////////////// PARTITION ACTOR + +TPartitionActor::TPartitionActor( + const TActorId& parentId, const TString& internalClientId, const ui64 cookie, const TString& session, + const ui32 generation, const ui32 step, const NPersQueue::TTopicConverterPtr& topic, const ui32 partition, + const ui64 tabletID, const TReadSessionActor::TTopicCounters& counters, const TString& clientDC +) + : ParentId(parentId) + , InternalClientId(internalClientId) + , ClientDC(clientDC) + , Cookie(cookie) + , Session(session) + , Generation(generation) + , Step(step) + , Topic(topic) + , Partition(partition) + , TabletID(tabletID) + , ReadOffset(0) + , ClientReadOffset(0) + , ClientCommitOffset(0) + , ClientVerifyReadOffset(false) + , CommittedOffset(0) + , WriteTimestampEstimateMs(0) + , WTime(0) + , InitDone(false) + , StartReading(false) + , AllPrepareInited(false) + , FirstInit(true) + , PipeClient() + , PipeGeneration(0) + , RequestInfly(false) + , EndOffset(0) + , SizeLag(0) + , NeedRelease(false) + , Released(false) + , WaitDataCookie(0) + , WaitForData(false) + , LockCounted(false) + , Counters(counters) +{ +} + + +TPartitionActor::~TPartitionActor() = default; + + +void TPartitionActor::Bootstrap(const TActorContext&) { + Become(&TThis::StateFunc); +} + + +void TPartitionActor::CheckRelease(const TActorContext& ctx) { + const bool hasUncommittedData = ReadOffset > ClientCommitOffset && ReadOffset > ClientReadOffset; + if (NeedRelease) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " checking release readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid + << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released); + } + + if (NeedRelease && ReadGuid.empty() && CommitsInfly.empty() && !hasUncommittedData && !Released) { + Released = true; + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReleased(Topic, Partition)); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " check release done - releasing; readOffset " << ReadOffset << " committedOffset " << CommittedOffset << " ReadGuid " << ReadGuid + << " CommitsInfly.size " << CommitsInfly.size() << " Released " << Released); + + } +} + + +void TPartitionActor::SendCommit(const ui64 readId, const ui64 offset, const TActorContext& ctx) { + NKikimrClient::TPersQueueRequest request; + request.MutablePartitionRequest()->SetTopic(Topic->GetClientsideName()); + request.MutablePartitionRequest()->SetPartition(Partition); + request.MutablePartitionRequest()->SetCookie(readId); + + Y_ABORT_UNLESS(PipeClient); + + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); + auto commit = request.MutablePartitionRequest()->MutableCmdSetClientOffset(); + commit->SetClientId(InternalClientId); + commit->SetOffset(offset); + Y_ABORT_UNLESS(!Session.empty()); + commit->SetSessionId(Session); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" + << Partition << " committing to position " << offset << " prev " << CommittedOffset + << " end " << EndOffset << " by cookie " << readId); + + TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); +} + +void TPartitionActor::RestartPipe(const TActorContext& ctx, const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) { + + if (!PipeClient) + return; + + Counters.Errors.Inc(); + + NTabletPipe::CloseClient(ctx, PipeClient); + PipeClient = TActorId{}; + if (errorCode != NPersQueue::NErrorCode::OVERLOAD) + ++PipeGeneration; + + if (PipeGeneration == MAX_PIPE_RESTARTS) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("too much attempts to restart pipe", NPersQueue::NErrorCode::ERROR)); + return; + } + + ctx.Schedule(TDuration::MilliSeconds(RESTART_PIPE_DELAY_MS), new TEvPQProxy::TEvRestartPipe()); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " schedule pipe restart attempt " << PipeGeneration << " reason: " << reason); +} + + +void TPartitionActor::Handle(const TEvPQProxy::TEvRestartPipe::TPtr&, const TActorContext& ctx) { + + Y_ABORT_UNLESS(!PipeClient); + + NTabletPipe::TClientConfig clientConfig; + clientConfig.RetryPolicy = { + .RetryLimitCount = 6, + .MinRetryTime = TDuration::MilliSeconds(10), + .MaxRetryTime = TDuration::MilliSeconds(100), + .BackoffMultiplier = 2, + .DoFirstRetryInstantly = true + }; + PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, TabletID, clientConfig)); + Y_ABORT_UNLESS(TabletID); + + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " pipe restart attempt " << PipeGeneration << " RequestInfly " << RequestInfly << " ReadOffset " << ReadOffset << " EndOffset " << EndOffset + << " InitDone " << InitDone << " WaitForData " << WaitForData); + + if (RequestInfly) { //got read infly + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " resend " << CurrentRequest); + + TAutoPtr<TEvPersQueue::TEvRequest> event(new TEvPersQueue::TEvRequest); + event->Record = CurrentRequest; + + ActorIdToProto(PipeClient, event->Record.MutablePartitionRequest()->MutablePipeClient()); + + NTabletPipe::SendData(ctx, PipeClient, event.Release()); + } + if (InitDone) { + for (auto& c : CommitsInfly) { //resend all commits + if (c.second != Max<ui64>()) + SendCommit(c.first, c.second, ctx); + } + if (WaitForData) { //resend wait-for-data requests + WaitDataInfly.clear(); + WaitDataInPartition(ctx); + } + } +} + +void TPartitionActor::Handle(TEvPersQueue::TEvResponse::TPtr& ev, const TActorContext& ctx) { + + if (ev->Get()->Record.HasErrorCode() && ev->Get()->Record.GetErrorCode() != NPersQueue::NErrorCode::OK) { + const auto errorCode = ev->Get()->Record.GetErrorCode(); + if (errorCode == NPersQueue::NErrorCode::WRONG_COOKIE || errorCode == NPersQueue::NErrorCode::BAD_REQUEST) { + Counters.Errors.Inc(); + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + ev->Get()->Record.GetErrorReason(), ev->Get()->Record.GetErrorCode())); + } else { + RestartPipe(ctx, TStringBuilder() << "status is not ok. Code: " << EErrorCode_Name(errorCode) << ". Reason: " << ev->Get()->Record.GetErrorReason(), errorCode); + } + return; + } + + if (ev->Get()->Record.GetStatus() != NMsgBusProxy::MSTATUS_OK) { //this is incorrect answer, die + Y_ABORT_UNLESS(!ev->Get()->Record.HasErrorCode()); + Counters.Errors.Inc(); + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("status is not ok: " + ev->Get()->Record.GetErrorReason(), NPersQueue::NErrorCode::ERROR)); + return; + } + if (!ev->Get()->Record.HasPartitionResponse()) { //this is incorrect answer, die + Counters.Errors.Inc(); + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("empty partition in response", NPersQueue::NErrorCode::ERROR)); + return; + } + + const auto& result = ev->Get()->Record.GetPartitionResponse(); + + if (!result.HasCookie()) { //this is incorrect answer, die + Counters.Errors.Inc(); + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("no cookie in response", NPersQueue::NErrorCode::ERROR)); + return; + } + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << " partition:" << Partition + << " initDone " << InitDone << " event " << PartitionResponseToLog(result)); + + + if (!InitDone) { + if (result.GetCookie() != INIT_COOKIE) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << " partition:" << Partition + << " unwaited response in init with cookie " << result.GetCookie()); + return; + } + Y_ABORT_UNLESS(RequestInfly); + CurrentRequest.Clear(); + RequestInfly = false; + + Y_ABORT_UNLESS(result.HasCmdGetClientOffsetResult()); + const auto& resp = result.GetCmdGetClientOffsetResult(); + Y_ABORT_UNLESS(resp.HasEndOffset()); + EndOffset = resp.GetEndOffset(); + SizeLag = resp.GetSizeLag(); + + ClientCommitOffset = ReadOffset = CommittedOffset = resp.HasOffset() ? resp.GetOffset() : 0; + Y_ABORT_UNLESS(EndOffset >= CommittedOffset); + + if (resp.HasWriteTimestampMS()) + WTime = resp.GetWriteTimestampMS(); + WriteTimestampEstimateMs = resp.GetWriteTimestampEstimateMS(); + InitDone = true; + PipeGeneration = 0; //reset tries counter - all ok + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " INIT DONE " << Topic->GetPrintableString() + << " partition:" << Partition + << " EndOffset " << EndOffset << " readOffset " << ReadOffset << " committedOffset " << CommittedOffset); + + + if (!StartReading) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Topic, Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, true)); + } else { + InitStartReading(ctx); + } + return; + } + + if (!result.HasCmdReadResult()) { //this is commit response + if (CommitsInfly.empty()) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << " partition:" << Partition + << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for nothing"); + return; + } + ui64 readId = CommitsInfly.front().first; + + if (result.GetCookie() != readId) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << " partition:" << Partition + << " unwaited commit-response with cookie " << result.GetCookie() << "; waiting for " << readId); + return; + } + + Counters.Commits.Inc(); + + CommittedOffset = CommitsInfly.front().second; + CommitsInfly.pop_front(); + if (readId != Max<ui64>()) //this readId is reserved for upcommits on client skipping with ClientCommitOffset + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(readId, Topic, Partition)); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << " partition:" << Partition + << " commit done to position " << CommittedOffset << " endOffset " << EndOffset << " with cookie " << readId); + + while (!CommitsInfly.empty() && CommitsInfly.front().second == Max<ui64>()) { //this is cookies that have no effect on this partition + readId = CommitsInfly.front().first; + CommitsInfly.pop_front(); + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(readId, Topic, Partition)); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << "partition :" << Partition + << " commit done with no effect with cookie " << readId); + } + + CheckRelease(ctx); + PipeGeneration = 0; //reset tries counter - all ok + return; + } + + //This is read + + Y_ABORT_UNLESS(result.HasCmdReadResult()); + const auto& res = result.GetCmdReadResult(); + + if (result.GetCookie() != (ui64)ReadOffset) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << Topic->GetPrintableString() + << "partition :" << Partition + << " unwaited read-response with cookie " << result.GetCookie() << "; waiting for " << ReadOffset << "; current read guid is " << ReadGuid); + return; + } + + Y_ABORT_UNLESS(res.HasMaxOffset()); + EndOffset = res.GetMaxOffset(); + SizeLag = res.GetSizeLag(); + + const ui64 realReadOffset = res.HasRealReadOffset() ? res.GetRealReadOffset() : 0; + + TReadResponse response; + + auto* data = response.MutableBatchedData(); + auto* partitionData = data->AddPartitionData(); + partitionData->SetTopic(Topic->GetClientsideName()); + partitionData->SetPartition(Partition); + + bool hasOffset = false; + + TReadResponse::TBatchedData::TBatch* currentBatch = nullptr; + for (ui32 i = 0; i < res.ResultSize(); ++i) { + const auto& r = res.GetResult(i); + + WTime = r.GetWriteTimestampMS(); + WriteTimestampEstimateMs = Max(WriteTimestampEstimateMs, WTime); + Y_ABORT_UNLESS(r.GetOffset() >= ReadOffset); + ReadOffset = r.GetOffset() + 1; + hasOffset = true; + + auto proto(GetDeserializedData(r.GetData())); + if (proto.GetChunkType() != NKikimrPQClient::TDataChunk::REGULAR) { + continue; //TODO - no such chunks must be on prod + } + TString sourceId = ""; + if (!r.GetSourceId().empty()) { + if (!NPQ::NSourceIdEncoding::IsValidEncoded(r.GetSourceId())) { + LOG_ERROR_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << "read bad sourceId from topic " << Topic->GetPrintableString() + << " partition:" << Partition + << " offset " << r.GetOffset() << " seqNo " << r.GetSeqNo() << " sourceId '" << r.GetSourceId() << "' ReadGuid " << ReadGuid); + } + sourceId = NPQ::NSourceIdEncoding::Decode(r.GetSourceId()); + } + + if (!currentBatch || currentBatch->GetWriteTimeMs() != r.GetWriteTimestampMS() || currentBatch->GetSourceId() != sourceId) { + // If write time and source id are the same, the rest fields will be the same too. + currentBatch = partitionData->AddBatch(); + currentBatch->SetWriteTimeMs(r.GetWriteTimestampMS()); + currentBatch->SetSourceId(sourceId); + + if (proto.HasMeta()) { + const auto& header = proto.GetMeta(); + if (header.HasServer()) { + auto* item = currentBatch->MutableExtraFields()->AddItems(); + item->SetKey("server"); + item->SetValue(header.GetServer()); + } + if (header.HasFile()) { + auto* item = currentBatch->MutableExtraFields()->AddItems(); + item->SetKey("file"); + item->SetValue(header.GetFile()); + } + if (header.HasIdent()) { + auto* item = currentBatch->MutableExtraFields()->AddItems(); + item->SetKey("ident"); + item->SetValue(header.GetIdent()); + } + if (header.HasLogType()) { + auto* item = currentBatch->MutableExtraFields()->AddItems(); + item->SetKey("logtype"); + item->SetValue(header.GetLogType()); + } + } + + if (proto.HasExtraFields()) { + const auto& map = proto.GetExtraFields(); + for (const auto& kv : map.GetItems()) { + auto* item = currentBatch->MutableExtraFields()->AddItems(); + item->SetKey(kv.GetKey()); + item->SetValue(kv.GetValue()); + } + } + + if (proto.HasIp() && IsUtf(proto.GetIp())) { + currentBatch->SetIp(proto.GetIp()); + } + } + + auto* message = currentBatch->AddMessageData(); + message->SetSeqNo(r.GetSeqNo()); + message->SetCreateTimeMs(r.GetCreateTimestampMS()); + message->SetOffset(r.GetOffset()); + message->SetUncompressedSize(r.GetUncompressedSize()); + if (proto.HasCodec()) { + const auto codec = proto.GetCodec(); + if (codec < Min<int>() || codec > Max<int>() || !NPersQueueCommon::ECodec_IsValid(codec)) { + LOG_ERROR_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << "data chunk (topic " << Topic->GetInternalName() << ", partition " << Partition + << ", offset " << r.GetOffset() << ", seqNo " << r.GetSeqNo() << ", sourceId " + << r.GetSourceId() << ") codec (id " << codec + << ") is not valid NPersQueueCommon::ECodec, loss of data compression codec information" + ); + } + message->SetCodec((NPersQueueCommon::ECodec)proto.GetCodec()); + } + message->SetData(proto.GetData()); + } + + if (!hasOffset) { //no data could be read from paritition at offset ReadOffset - no data in partition at all??? + ReadOffset = Min(Max(ReadOffset + 1, realReadOffset + 1), EndOffset); + } + + CurrentRequest.Clear(); + RequestInfly = false; + + Y_ABORT_UNLESS(!WaitForData); + + if (EndOffset > ReadOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Topic, Partition, WTime, SizeLag, ReadOffset, EndOffset)); + } else { + WaitForData = true; + if (PipeClient) //pipe will be recreated soon + WaitDataInPartition(ctx); + } + + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " after read state " << Topic->GetPrintableString() + << " partition:" << Partition + << " EndOffset " << EndOffset << " ReadOffset " << ReadOffset << " ReadGuid " << ReadGuid); + + ReadGuid = TString(); + + auto readResponse = MakeHolder<TEvPQProxy::TEvReadResponse>( + std::move(response), + ReadOffset, + res.GetBlobsFromDisk() > 0, + TDuration::MilliSeconds(res.GetWaitQuotaTimeMs()) + ); + ctx.Send(ParentId, readResponse.Release()); + CheckRelease(ctx); + + PipeGeneration = 0; //reset tries counter - all ok +} + +void TPartitionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + TEvTabletPipe::TEvClientConnected *msg = ev->Get(); + + LOG_INFO_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " pipe restart attempt " << PipeGeneration << " pipe creation result: " << msg->Status); + + if (msg->Status != NKikimrProto::OK) { + RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << msg->TabletId, NPersQueue::NErrorCode::ERROR); + return; + } +} + +void TPartitionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + RestartPipe(ctx, TStringBuilder() << "pipe to tablet is dead " << ev->Get()->TabletId, NPersQueue::NErrorCode::ERROR); +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvReleasePartition::TPtr&, const TActorContext& ctx) { + LOG_INFO_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " (partition)releasing " << Topic->GetPrintableString() << " partition:" << Partition + << " ReadOffset " << ReadOffset << " ClientCommitOffset " << ClientCommitOffset + << " CommittedOffst " << CommittedOffset + ); + NeedRelease = true; + CheckRelease(ctx); +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvGetStatus::TPtr&, const TActorContext& ctx) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionStatus(Topic, Partition, CommittedOffset, EndOffset, WriteTimestampEstimateMs, false)); +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { + ClientReadOffset = ev->Get()->ReadOffset; + ClientCommitOffset = ev->Get()->CommitOffset; + ClientVerifyReadOffset = ev->Get()->VerifyReadOffset; + + if (StartReading) { + Y_ABORT_UNLESS(ev->Get()->StartReading); //otherwise it is signal from actor, this could not be done + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("double partition locking", NPersQueue::NErrorCode::BAD_REQUEST)); + return; + } + + StartReading = ev->Get()->StartReading; + InitLockPartition(ctx); +} + +void TPartitionActor::InitStartReading(const TActorContext& ctx) { + + Y_ABORT_UNLESS(AllPrepareInited); + Y_ABORT_UNLESS(!WaitForData); + LOG_INFO_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " Start reading " << Topic->GetPrintableString() << " partition:" << Partition + << " EndOffset " << EndOffset << " readOffset " << ReadOffset << " committedOffset " + << CommittedOffset << " clientCommittedOffset " << ClientCommitOffset + << " clientReadOffset " << ClientReadOffset + ); + + Counters.PartitionsToBeLocked.Dec(); + LockCounted = false; + + ReadOffset = Max<ui64>(CommittedOffset, ClientReadOffset); + + if (ClientVerifyReadOffset) { + if (ClientReadOffset < CommittedOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() + << "trying to read from position that is less than committed: read " << ClientReadOffset << " committed " << CommittedOffset, + NPersQueue::NErrorCode::BAD_REQUEST)); + return; + } + } + + if (ClientCommitOffset > CommittedOffset) { + if (ClientCommitOffset > ReadOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() + << "trying to read from position that is less than provided to commit: read " << ReadOffset << " commit " << ClientCommitOffset, + NPersQueue::NErrorCode::BAD_REQUEST)); + return; + } + if (ClientCommitOffset > EndOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession(TStringBuilder() + << "trying to commit to future: commit " << ClientCommitOffset << " endOffset " << EndOffset, + NPersQueue::NErrorCode::BAD_REQUEST)); + return; + } + Y_ABORT_UNLESS(CommitsInfly.empty()); + CommitsInfly.push_back(std::pair<ui64, ui64>(Max<ui64>(), ClientCommitOffset)); + if (PipeClient) //pipe will be recreated soon + SendCommit(CommitsInfly.back().first, CommitsInfly.back().second, ctx); + } else { + ClientCommitOffset = CommittedOffset; + } + + if (EndOffset > ReadOffset) { + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Topic, Partition, WTime, SizeLag, ReadOffset, EndOffset)); + } else { + WaitForData = true; + if (PipeClient) //pipe will be recreated soon + WaitDataInPartition(ctx); + } +} + +void TPartitionActor::InitLockPartition(const TActorContext& ctx) { + if (PipeClient && AllPrepareInited) { + ctx.Send(ParentId, new TEvPQProxy::TEvCloseSession("double partition locking", NPersQueue::NErrorCode::BAD_REQUEST)); + return; + } + if (!LockCounted) { + Counters.PartitionsToBeLocked.Inc(); + LockCounted = true; + } + if (StartReading) + AllPrepareInited = true; + + if (FirstInit) { + Y_ABORT_UNLESS(!PipeClient); + FirstInit = false; + NTabletPipe::TClientConfig clientConfig; + clientConfig.RetryPolicy = { + .RetryLimitCount = 6, + .MinRetryTime = TDuration::MilliSeconds(10), + .MaxRetryTime = TDuration::MilliSeconds(100), + .BackoffMultiplier = 2, + .DoFirstRetryInstantly = true + }; + PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, TabletID, clientConfig)); + + NKikimrClient::TPersQueueRequest request; + + request.MutablePartitionRequest()->SetTopic(Topic->GetClientsideName()); + request.MutablePartitionRequest()->SetPartition(Partition); + request.MutablePartitionRequest()->SetCookie(INIT_COOKIE); + + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); + + auto cmd = request.MutablePartitionRequest()->MutableCmdCreateSession(); + cmd->SetClientId(InternalClientId); + cmd->SetSessionId(Session); + cmd->SetGeneration(Generation); + cmd->SetStep(Step); + + LOG_INFO_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " INITING " << Topic->GetPrintableString() << " partition:" << Partition); + + TAutoPtr<TEvPersQueue::TEvRequest> req(new TEvPersQueue::TEvRequest); + Y_ABORT_UNLESS(!RequestInfly); + CurrentRequest = request; + RequestInfly = true; + req->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, req.Release()); + } else { + Y_ABORT_UNLESS(StartReading); //otherwise it is double locking from actor, not client - client makes lock always with StartReading == true + Y_ABORT_UNLESS(InitDone); + InitStartReading(ctx); + } +} + + +void TPartitionActor::WaitDataInPartition(const TActorContext& ctx) { + + if (WaitDataInfly.size() > 1) //already got 2 requests inflight + return; + Y_ABORT_UNLESS(InitDone); + + Y_ABORT_UNLESS(PipeClient); + + if (!WaitForData) + return; + + Y_ABORT_UNLESS(ReadOffset >= EndOffset); + + TAutoPtr<TEvPersQueue::TEvHasDataInfo> event(new TEvPersQueue::TEvHasDataInfo()); + event->Record.SetPartition(Partition); + event->Record.SetOffset(ReadOffset); + event->Record.SetCookie(++WaitDataCookie); + ui64 deadline = (ctx.Now() + WAIT_DATA - WAIT_DELTA).MilliSeconds(); + event->Record.SetDeadline(deadline); + event->Record.SetClientId(InternalClientId); + + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " wait data in partition inited, cookie " << WaitDataCookie + ); + + NTabletPipe::SendData(ctx, PipeClient, event.Release()); + + ctx.Schedule(PREWAIT_DATA, new TEvents::TEvWakeup()); + + ctx.Schedule(WAIT_DATA, new TEvPQProxy::TEvDeadlineExceeded(WaitDataCookie)); + + WaitDataInfly.insert(WaitDataCookie); +} + +void TPartitionActor::Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, const TActorContext& ctx) { + const auto& record = ev->Get()->Record; + + WriteTimestampEstimateMs = Max(WriteTimestampEstimateMs, record.GetWriteTimestampEstimateMS()); + + auto it = WaitDataInfly.find(ev->Get()->Record.GetCookie()); + if (it == WaitDataInfly.end()) { + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " unwaited response for WaitData " << ev->Get()->Record); + return; + } + WaitDataInfly.erase(it); + if (!WaitForData) { + return; + } + + Counters.WaitsForData.Inc(); + + Y_ABORT_UNLESS(record.HasEndOffset()); + Y_ABORT_UNLESS(EndOffset <= record.GetEndOffset()); //end offset could not be changed if no data arrived, but signal will be sended anyway after timeout + Y_ABORT_UNLESS(ReadOffset >= EndOffset); //otherwise no WaitData were needed + + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " wait for data done: " << " readOffset " << ReadOffset << " EndOffset " << EndOffset + << " newEndOffset " << record.GetEndOffset() << " commitOffset " << CommittedOffset + << " clientCommitOffset " << ClientCommitOffset << " cookie " << ev->Get()->Record.GetCookie() + ); + + EndOffset = record.GetEndOffset(); + SizeLag = record.GetSizeLag(); + + if (ReadOffset < EndOffset) { + WaitForData = false; + WaitDataInfly.clear(); + ctx.Send(ParentId, new TEvPQProxy::TEvPartitionReady(Topic, Partition, WTime, SizeLag, ReadOffset, EndOffset)); + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " ready for read with readOffset " << ReadOffset << " endOffset " << EndOffset + ); + } else { + if (PipeClient) + WaitDataInPartition(ctx); + } + CheckRelease(ctx); //just for logging purpose +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx) { + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " READ FROM " << Topic->GetPrintableString() << " partition:" << Partition + << " event " << ev->Get()->Request << " readOffset " << ReadOffset + << " EndOffset " << EndOffset << " ClientCommitOffset " << ClientCommitOffset + << " committedOffset " << CommittedOffset << " Guid " << ev->Get()->Guid + ); + + Y_ABORT_UNLESS(!NeedRelease); + Y_ABORT_UNLESS(!Released); + + Y_ABORT_UNLESS(ReadGuid.empty()); + Y_ABORT_UNLESS(!RequestInfly); + + ReadGuid = ev->Get()->Guid; + + const auto& req = ev->Get()->Request.GetRead(); + + NKikimrClient::TPersQueueRequest request; + + request.MutablePartitionRequest()->SetTopic(Topic->GetClientsideName()); + + request.MutablePartitionRequest()->SetPartition(Partition); + request.MutablePartitionRequest()->SetCookie((ui64)ReadOffset); + + ActorIdToProto(PipeClient, request.MutablePartitionRequest()->MutablePipeClient()); + auto read = request.MutablePartitionRequest()->MutableCmdRead(); + read->SetClientId(InternalClientId); + read->SetClientDC(ClientDC); + if (req.GetMaxCount()) { + read->SetCount(req.GetMaxCount()); + } + if (req.GetMaxSize()) { + read->SetBytes(req.GetMaxSize()); + } + if (req.GetMaxTimeLagMs()) { + read->SetMaxTimeLagMs(req.GetMaxTimeLagMs()); + } + if (req.GetReadTimestampMs()) { + read->SetReadTimestampMs(req.GetReadTimestampMs()); + } + + read->SetOffset(ReadOffset); + read->SetTimeoutMs(READ_TIMEOUT_DURATION.MilliSeconds()); + RequestInfly = true; + CurrentRequest = request; + + if (!PipeClient) //Pipe will be recreated soon + return; + + TAutoPtr<TEvPersQueue::TEvRequest> event(new TEvPersQueue::TEvRequest); + event->Record.Swap(&request); + + NTabletPipe::SendData(ctx, PipeClient, event.Release()); +} + + +void TPartitionActor::Handle(TEvPQProxy::TEvCommit::TPtr& ev, const TActorContext& ctx) { + const ui64 readId = ev->Get()->ReadId; + const ui64 offset = ev->Get()->Offset; + Y_ABORT_UNLESS(offset != Max<ui64>()); // has concreete offset + if (offset < ClientCommitOffset) { + LOG_ERROR_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " commit done to too small position " << offset + << " committedOffset " << ClientCommitOffset << " cookie " << readId + ); + } + Y_ABORT_UNLESS(offset >= ClientCommitOffset); + + const bool hasProgress = offset > ClientCommitOffset; + + if (!hasProgress) {//nothing to commit for this partition + if (CommitsInfly.empty()) { + LOG_DEBUG_S( + ctx, NKikimrServices::PQ_READ_PROXY, + PQ_LOG_PREFIX << " " << Topic->GetPrintableString() << " partition:" << Partition + << " commit done with no effect with cookie " << readId + ); + ctx.Send(ParentId, new TEvPQProxy::TEvCommitDone(readId, Topic, Partition)); + CheckRelease(ctx); + } else { + CommitsInfly.push_back(std::pair<ui64, ui64>(readId, Max<ui64>())); + } + return; + } + + ClientCommitOffset = offset; + CommitsInfly.push_back(std::pair<ui64, ui64>(readId, offset)); + + if (PipeClient) //if not then pipe will be recreated soon and SendCommit will be done + SendCommit(readId, offset, ctx); +} + + +void TPartitionActor::Die(const TActorContext& ctx) { + if (PipeClient) + NTabletPipe::CloseClient(ctx, PipeClient); + TActorBootstrapped<TPartitionActor>::Die(ctx); +} + +void TPartitionActor::HandlePoison(TEvents::TEvPoisonPill::TPtr&, const TActorContext& ctx) { + if (LockCounted) + Counters.PartitionsToBeLocked.Dec(); + Die(ctx); +} + +void TPartitionActor::Handle(TEvPQProxy::TEvDeadlineExceeded::TPtr& ev, const TActorContext& ctx) { + + WaitDataInfly.erase(ev->Get()->Cookie); + if (ReadOffset >= EndOffset && WaitDataInfly.size() <= 1 && PipeClient) { + Y_ABORT_UNLESS(WaitForData); + WaitDataInPartition(ctx); + } + +} + +void TPartitionActor::HandleWakeup(const TActorContext& ctx) { + if (ReadOffset >= EndOffset && WaitDataInfly.size() <= 1 && PipeClient) { + Y_ABORT_UNLESS(WaitForData); + WaitDataInPartition(ctx); + } +} +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_session.h b/ydb/services/deprecated/persqueue_v0/grpc_pq_session.h new file mode 100644 index 0000000000..dd2fd96e0a --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_session.h @@ -0,0 +1,321 @@ +#pragma once + +#include "ydb/core/client/server/grpc_base.h" +#include <library/cpp/grpc/server/grpc_server.h> +#include <library/cpp/string_utils/quote/quote.h> +#include <util/generic/queue.h> + +using grpc::Status; + + +namespace NKikimr { +namespace NGRpcProxy { + +/////////////////////////////////////////////////////////////////////////////// + +using namespace NKikimrClient; + +template<class TResponse> +class ISessionHandler : public TAtomicRefCount<ISessionHandler<TResponse>> { +public: + virtual ~ISessionHandler() + { } + + /// Finish session. + virtual void Finish() = 0; + + /// Send reply to client. + virtual void Reply(const TResponse& resp) = 0; + + virtual void ReadyForNextRead() = 0; + + virtual bool IsShuttingDown() const = 0; +}; + +template<class TResponse> +using ISessionHandlerRef = TIntrusivePtr<ISessionHandler<TResponse>>; + + +template <class TRequest, class TResponse> +class ISession : public ISessionHandler<TResponse> +{ + + using ISessionRef = TIntrusivePtr<ISession<TRequest, TResponse>>; + +protected: + class TRequestCreated : public NGrpc::IQueueEvent { + public: + TRequestCreated(ISessionRef session) + : Session(session) + { } + + bool Execute(bool ok) override { + if (!ok) { + Session->DestroyStream("waiting stream creating failed"); + return false; + } + + Session->OnCreated(); + return false; + } + + void DestroyRequest() override { + if (!Session->Context.c_call() && Session->ClientDone) { + // AsyncNotifyWhenDone will not appear on the queue. + delete Session->ClientDone; + Session->ClientDone = nullptr; + } + delete this; + } + + ISessionRef Session; + }; + + class TReadDone : public NGrpc::IQueueEvent { + public: + TReadDone(ISessionRef session) + : Session(session) + { } + + bool Execute(bool ok) override { + if (ok) { + Session->OnRead(Request); + } else { + if (Session->IsCancelled()) { + Session->DestroyStream("reading from stream failed"); + } else { + Session->OnDone(); + } + } + return false; + } + + void DestroyRequest() override { + delete this; + } + + TRequest Request; + ISessionRef Session; + }; + + class TWriteDone : public NGrpc::IQueueEvent { + public: + TWriteDone(ISessionRef session, ui64 size) + : Session(session) + , Size(size) + { } + + bool Execute(bool ok) override { + Session->OnWriteDone(Size); + if (!ok) { + Session->DestroyStream("writing to stream failed"); + return false; + } + + TGuard<TSpinLock> lock(Session->Lock); + if (Session->Responses.empty()) { + Session->HaveWriteInflight = false; + if (Session->NeedFinish) { + lock.Release(); + Session->Stream.Finish(Status::OK, new TFinishDone(Session)); + } + } else { + auto resp = Session->Responses.front(); + Session->Responses.pop(); + lock.Release(); + ui64 sz = resp.ByteSize(); + Session->Stream.Write(resp, new TWriteDone(Session, sz)); + } + + return false; + } + + void DestroyRequest() override { + delete this; + } + + ISessionRef Session; + ui64 Size; + }; + + class TFinishDone : public NGrpc::IQueueEvent { + public: + TFinishDone(ISessionRef session) + : Session(session) + { } + + bool Execute(bool) override { + Session->DestroyStream("some stream finished"); + return false; + } + + void DestroyRequest() override { + delete this; + } + + ISessionRef Session; + }; + + class TClientDone : public NGrpc::IQueueEvent { + public: + TClientDone(ISessionRef session) + : Session(session) + { + Session->ClientDone = this; + } + + bool Execute(bool) override { + Session->ClientIsDone = true; + Session->DestroyStream("sesison closed"); + return false; + } + + void DestroyRequest() override { + Y_ABORT_UNLESS(Session->ClientDone); + Session->ClientDone = nullptr; + delete this; + } + + ISessionRef Session; + }; + +public: + ISession(grpc::ServerCompletionQueue* cq) + : CQ(cq) + , Stream(&Context) + , HaveWriteInflight(false) + , NeedFinish(false) + , ClientIsDone(false) + { + Context.AsyncNotifyWhenDone(new TClientDone(this)); + } + + TString GetDatabase() const { + TString key = "x-ydb-database"; + const auto& clientMetadata = Context.client_metadata(); + const auto range = clientMetadata.equal_range(grpc::string_ref{key.data(), key.size()}); + if (range.first == range.second) { + return ""; + } + + TVector<TStringBuf> values; + values.reserve(std::distance(range.first, range.second)); + + for (auto it = range.first; it != range.second; ++it) { + return TString(it->second.data(), it->second.size()); + } + return ""; + } + + TString GetPeerName() const { + auto res = Context.peer(); + // Remove percent-encoding + CGIUnescape(res); + + if (res.StartsWith("ipv4:[") || res.StartsWith("ipv6:[")) { + size_t pos = res.find(']'); + Y_ABORT_UNLESS(pos != TString::npos); + res = res.substr(6, pos - 6); + } else if (res.StartsWith("ipv4:")) { + size_t pos = res.rfind(':'); + if (pos == TString::npos) {//no port + res = res.substr(5); + } else { + res = res.substr(5, pos - 5); + } + } else { + size_t pos = res.rfind(":"); //port + if (pos != TString::npos) { + res = res.substr(0, pos); + } + } + return res; + } + +protected: + + virtual void OnCreated() = 0; + virtual void OnRead(const TRequest& request) = 0; + virtual void OnDone() = 0; + virtual void OnWriteDone(ui64 size) = 0; + + virtual void DestroyStream(const TString& reason, NPersQueue::NErrorCode::EErrorCode code = NPersQueue::NErrorCode::BAD_REQUEST) = 0; + + /// Start accepting session's requests. + virtual void Start() = 0; + + bool IsCancelled() const { + return ClientIsDone && Context.IsCancelled(); + } + + void ReplyWithError(const TString& description, NPersQueue::NErrorCode::EErrorCode code) + { + TResponse response; + response.MutableError()->SetDescription(description); + response.MutableError()->SetCode(code); + Reply(response); + Finish(); + } + + /// Finish session. + void Finish() override { + { + TGuard<TSpinLock> lock(Lock); + if (NeedFinish) + return; + if (HaveWriteInflight || !Responses.empty()) { + NeedFinish = true; + return; + } + HaveWriteInflight = true; + } + + Stream.Finish(Status::OK, new TFinishDone(this)); + } + + /// Send reply to client. + void Reply(const TResponse& resp) override { + { + TGuard<TSpinLock> lock(Lock); + if (NeedFinish) //ignore responses after finish + return; + if (HaveWriteInflight || !Responses.empty()) { + Responses.push(resp); + return; + } else { + HaveWriteInflight = true; + } + } + + ui64 size = resp.ByteSize(); + Stream.Write(resp, new TWriteDone(this, size)); + } + + void ReadyForNextRead() override { + { + TGuard<TSpinLock> lock(Lock); + if (NeedFinish) { + return; + } + } + + auto read = new TReadDone(this); + Stream.Read(&read->Request, read); + } + +protected: + grpc::ServerCompletionQueue* const CQ; + grpc::ServerContext Context; + grpc::ServerAsyncReaderWriter<TResponse, TRequest> + Stream; +private: + TSpinLock Lock; + bool HaveWriteInflight; + bool NeedFinish; + std::atomic<bool> ClientIsDone; + TClientDone* ClientDone; + TQueue<TResponse> Responses; //TODO: if Responses total size is too big - fail this session; +}; + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_write.cpp b/ydb/services/deprecated/persqueue_v0/grpc_pq_write.cpp new file mode 100644 index 0000000000..36ba3fa8f6 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_write.cpp @@ -0,0 +1,221 @@ +#include "grpc_pq_write.h" +#include "grpc_pq_actor.h" +#include "grpc_pq_session.h" +#include "ydb/core/client/server/grpc_proxy_status.h" + +#include <ydb/core/base/appdata.h> +#include <util/generic/queue.h> + +using namespace NActors; +using namespace NKikimrClient; + +using grpc::Status; + +namespace NKikimr { +namespace NGRpcProxy { + +using namespace NPersQueue; + +/////////////////////////////////////////////////////////////////////////////// + + +void TPQWriteServiceImpl::TSession::OnCreated() { // Start waiting for new session. + Proxy->WaitWriteSession(); + if (Proxy->TooMuchSessions()) { + ReplyWithError("proxy overloaded", NPersQueue::NErrorCode::OVERLOAD); + return; + } + TMaybe<TString> localCluster = Proxy->AvailableLocalCluster(); + if (NeedDiscoverClusters) { + if (!localCluster.Defined()) { + ReplyWithError("initializing", NPersQueue::NErrorCode::INITIALIZING); + return; + } else if (localCluster->empty()) { + ReplyWithError("cluster disabled", NPersQueue::NErrorCode::CLUSTER_DISABLED); + return; + } else { + CreateActor(*localCluster); + } + } else { + CreateActor(TString()); + } + ReadyForNextRead(); +} + +void TPQWriteServiceImpl::TSession::OnRead(const TWriteRequest& request) { + + switch (request.GetRequestCase()) { + case TWriteRequest::kInit: { + SendEvent(new TEvPQProxy::TEvWriteInit(request, GetPeerName(), GetDatabase())); + break; + } + case TWriteRequest::kDataBatch: + case TWriteRequest::kData: { + SendEvent(new TEvPQProxy::TEvWrite(request)); + break; + } + default: { + ReplyWithError("unsupported request", NPersQueue::NErrorCode::BAD_REQUEST); + } + } +} + +void TPQWriteServiceImpl::TSession::OnDone() { + SendEvent(new TEvPQProxy::TEvDone()); +} + +TPQWriteServiceImpl::TSession::TSession(std::shared_ptr<TPQWriteServiceImpl> proxy, + grpc::ServerCompletionQueue* cq, ui64 cookie, const TActorId& schemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, bool needDiscoverClusters) + : ISession(cq) + , Proxy(proxy) + , Cookie(cookie) + , SchemeCache(schemeCache) + , Counters(counters) + , NeedDiscoverClusters(needDiscoverClusters) +{ +} + +void TPQWriteServiceImpl::TSession::Start() { + if (!Proxy->IsShuttingDown()) { + Proxy->RequestSession(&Context, &Stream, CQ, CQ, new TRequestCreated(this)); + } +} + +ui64 TPQWriteServiceImpl::TSession::GetCookie() const { + return Cookie; +} + +void TPQWriteServiceImpl::TSession::DestroyStream(const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) { + // Send poison pill to the actor(if it is alive) + SendEvent(new TEvPQProxy::TEvDieCommand("write-session " + ToString<ui64>(Cookie) + ": " + reason, errorCode)); + // Remove reference to session from "cookie -> session" map. + Proxy->ReleaseSession(this); +} + +bool TPQWriteServiceImpl::TSession::IsShuttingDown() const { + return Proxy->IsShuttingDown(); +} + +void TPQWriteServiceImpl::TSession::CreateActor(const TString &localCluster) { + + auto classifier = Proxy->GetClassifier(); + ActorId = Proxy->ActorSystem->Register( + new TWriteSessionActor(this, Cookie, SchemeCache, Counters, localCluster, + classifier ? classifier->ClassifyAddress(GetPeerName()) + : "unknown"), TMailboxType::Simple, 0 + ); +} + +void TPQWriteServiceImpl::TSession::SendEvent(IEventBase* ev) { + Proxy->ActorSystem->Send(ActorId, ev); +} + +/////////////////////////////////////////////////////////////////////////////// + + +TPQWriteServiceImpl::TPQWriteServiceImpl(grpc::ServerCompletionQueue* cq, + NActors::TActorSystem* as, const TActorId& schemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const ui32 maxSessions) + : CQ(cq) + , ActorSystem(as) + , SchemeCache(schemeCache) + , Counters(counters) + , MaxSessions(maxSessions) + , NeedDiscoverClusters(false) +{ +} + +void TPQWriteServiceImpl::InitClustersUpdater() +{ + TAppData* appData = ActorSystem->AppData<TAppData>(); + NeedDiscoverClusters = !appData->PQConfig.GetTopicsAreFirstClassCitizen(); + if (NeedDiscoverClusters) { + ActorSystem->Register(new TClustersUpdater(this)); + } +} + + +ui64 TPQWriteServiceImpl::NextCookie() { + return AtomicIncrement(LastCookie); +} + + +void TPQWriteServiceImpl::ReleaseSession(TSessionRef session) { + with_lock (Lock) { + bool erased = Sessions.erase(session->GetCookie()); + if (erased) { + ActorSystem->Send(MakeGRpcProxyStatusID(ActorSystem->NodeId), new TEvGRpcProxyStatus::TEvUpdateStatus(0, 0, -1, 0)); + } + } +} + + +void TPQWriteServiceImpl::SetupIncomingRequests() { + WaitWriteSession(); +} + + +void TPQWriteServiceImpl::WaitWriteSession() { + + const ui64 cookie = NextCookie(); + + ActorSystem->Send(MakeGRpcProxyStatusID(ActorSystem->NodeId), new TEvGRpcProxyStatus::TEvUpdateStatus(0,0,1,0)); + + TSessionRef session(new TSession(shared_from_this(), CQ, cookie, SchemeCache, Counters, NeedDiscoverClusters)); + { + with_lock (Lock) { + Sessions[cookie] = session; + } + } + + session->Start(); +} + + +bool TPQWriteServiceImpl::TooMuchSessions() { + with_lock (Lock) { + return Sessions.size() >= MaxSessions; + } +} + + +TMaybe<TString> TPQWriteServiceImpl::AvailableLocalCluster() { + with_lock (Lock) { + return AvailableLocalClusterName; + } +} + + +void TPQWriteServiceImpl::NetClassifierUpdated(NAddressClassifier::TLabeledAddressClassifier::TConstPtr classifier) { + auto g(Guard(Lock)); + if (!DatacenterClassifier) { + for (auto it = Sessions.begin(); it != Sessions.end();) { + auto jt = it++; + jt->second->DestroyStream("datacenter classifier initialized, restart session please", NPersQueue::NErrorCode::INITIALIZING); + } + } + + DatacenterClassifier = classifier; +} + + + +void TPQWriteServiceImpl::CheckClusterChange(const TString &localCluster, const bool enabled) { + with_lock (Lock) { + AvailableLocalClusterName = enabled ? localCluster : TString(); + + if (!enabled) { + for (auto it = Sessions.begin(); it != Sessions.end();) { + auto jt = it++; + jt->second->DestroyStream("cluster disabled", NPersQueue::NErrorCode::CLUSTER_DISABLED); + } + } + } +} + + +/////////////////////////////////////////////////////////////////////////////// + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_write.h b/ydb/services/deprecated/persqueue_v0/grpc_pq_write.h new file mode 100644 index 0000000000..35f1dd5a41 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_write.h @@ -0,0 +1,148 @@ +#pragma once + +#include "grpc_pq_clusters_updater_actor.h" +#include "grpc_pq_session.h" + +#include <ydb/core/client/server/grpc_base.h> + +#include <ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.grpc.pb.h> + +#include <library/cpp/actors/core/actorsystem.h> + +#include <util/generic/hash.h> +#include <util/generic/maybe.h> +#include <util/system/mutex.h> + +namespace NKikimr { +namespace NGRpcProxy { + +// Класс, отвечающий за обработку запросов на запись. + +class TPQWriteServiceImpl : public IPQClustersUpdaterCallback, public std::enable_shared_from_this<TPQWriteServiceImpl> { + + class TSession : public ISession<NPersQueue::TWriteRequest, NPersQueue::TWriteResponse> + { + + void OnCreated() override; + void OnRead(const NPersQueue::TWriteRequest& request) override; + void OnDone() override; + void OnWriteDone(ui64) override {}; + + public: + TSession(std::shared_ptr<TPQWriteServiceImpl> proxy, + grpc::ServerCompletionQueue* cq, ui64 cookie, const TActorId& schemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, bool needDiscoverClusters); + void Start() override; + ui64 GetCookie() const; + void DestroyStream(const TString& reason, const NPersQueue::NErrorCode::EErrorCode errorCode) override; + bool IsShuttingDown() const override; + + private: + void CreateActor(const TString& localCluster); + void SendEvent(NActors::IEventBase* ev); + + private: + std::shared_ptr<TPQWriteServiceImpl> Proxy; + const ui64 Cookie; + + NActors::TActorId ActorId; + + const NActors::TActorId SchemeCache; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + bool NeedDiscoverClusters; + }; + using TSessionRef = TIntrusivePtr<TSession>; + +public: + TPQWriteServiceImpl(grpc::ServerCompletionQueue* cq, + NActors::TActorSystem* as, const NActors::TActorId& schemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const ui32 maxSessions); + virtual ~TPQWriteServiceImpl() = default; + + void SetupIncomingRequests(); + + virtual void RequestSession(::grpc::ServerContext* context, ::grpc::ServerAsyncReaderWriter< ::NPersQueue::TWriteResponse, ::NPersQueue::TWriteRequest>* stream, + ::grpc::CompletionQueue* new_call_cq, ::grpc::ServerCompletionQueue* notification_cq, void *tag) = 0; + + void StopService() { + AtomicSet(ShuttingDown_, 1); + } + + bool IsShuttingDown() const { + return AtomicGet(ShuttingDown_); + } + void InitClustersUpdater(); + +private: + ui64 NextCookie(); + + //! Unregistry session object. + void ReleaseSession(TSessionRef session); + + //! Start listening for incoming connections. + void WaitWriteSession(); + bool TooMuchSessions(); + TMaybe<TString> AvailableLocalCluster(); + NAddressClassifier::TLabeledAddressClassifier::TConstPtr GetClassifier() const { + auto g(Guard(Lock)); + return DatacenterClassifier; + } + void CheckClusterChange(const TString& localCluster, const bool enabled) override; + void NetClassifierUpdated(NAddressClassifier::TLabeledAddressClassifier::TConstPtr classifier) override; + +private: + grpc::ServerContext Context; + grpc::ServerCompletionQueue* CQ; + + NActors::TActorSystem* ActorSystem; + NActors::TActorId SchemeCache; + + TAtomic LastCookie = 0; + + TMutex Lock; + THashMap<ui64, TSessionRef> Sessions; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + ui32 MaxSessions; + TMaybe<TString> AvailableLocalClusterName; + TString SelectSourceIdQuery; + TString UpdateSourceIdQuery; + TString DeleteSourceIdQuery; + + TAtomic ShuttingDown_ = 0; + + bool NeedDiscoverClusters; // Legacy mode OR account-mode in multi-cluster setup; + + NAddressClassifier::TLabeledAddressClassifier::TConstPtr DatacenterClassifier; // Detects client's datacenter by IP. May be null +}; + + +class TPQWriteService : public TPQWriteServiceImpl { +public: + TPQWriteService(NPersQueue::PersQueueService::AsyncService* service, + grpc::ServerCompletionQueue* cq, + NActors::TActorSystem* as, const NActors::TActorId& schemeCache, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const ui32 maxSessions) + : TPQWriteServiceImpl(cq, as, schemeCache, counters, maxSessions) + , Service(service) + {} + + virtual ~TPQWriteService() + {} + + void RequestSession(::grpc::ServerContext* context, ::grpc::ServerAsyncReaderWriter< ::NPersQueue::TWriteResponse, ::NPersQueue::TWriteRequest>* stream, + ::grpc::CompletionQueue* new_call_cq, ::grpc::ServerCompletionQueue* notification_cq, void *tag) override + { + Service->RequestWriteSession(context, stream, new_call_cq, notification_cq, tag); + } + +private: + NPersQueue::PersQueueService::AsyncService* Service; +}; + + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/grpc_pq_write_actor.cpp b/ydb/services/deprecated/persqueue_v0/grpc_pq_write_actor.cpp new file mode 100644 index 0000000000..deac184e2f --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/grpc_pq_write_actor.cpp @@ -0,0 +1,1206 @@ +#include "grpc_pq_actor.h" + +#include <ydb/services/metadata/manager/common.h> +#include <ydb/core/persqueue/writer/metadata_initializers.h> + +#include <ydb/core/persqueue/pq_database.h> +#include <ydb/core/persqueue/write_meta.h> +#include <ydb/library/services/services.pb.h> +#include <ydb/public/lib/deprecated/kicli/kicli.h> +#include <ydb/library/persqueue/topic_parser/topic_parser.h> +#include <ydb/library/persqueue/topic_parser/counters.h> +#include <ydb/services/lib/sharding/sharding.h> + +#include <library/cpp/actors/core/log.h> +#include <util/string/hex.h> +#include <util/string/vector.h> +#include <util/string/escape.h> + +using namespace NActors; +using namespace NKikimrClient; + + +namespace NKikimr { +using namespace NMsgBusProxy::NPqMetaCacheV2; +using namespace NSchemeCache; +using namespace NPQ; + +template <> +void FillChunkDataFromReq(NKikimrPQClient::TDataChunk& proto, const NPersQueue::TWriteRequest::TData& data) { + proto.SetData(data.GetData()); + proto.SetSeqNo(data.GetSeqNo()); + proto.SetCreateTime(data.GetCreateTimeMs()); + proto.SetCodec(data.GetCodec()); +} + +template <> +void FillExtraFieldsForDataChunk( + const NPersQueue::TWriteRequest::TInit& init, + NKikimrPQClient::TDataChunk& data, + TString& server, + TString& ident, + TString& logType, + TString& file +) { + for (ui32 i = 0; i < init.GetExtraFields().ItemsSize(); ++i) { + const auto& item = init.GetExtraFields().GetItems(i); + if (item.GetKey() == "server") { + server = item.GetValue(); + } else if (item.GetKey() == "ident") { + ident = item.GetValue(); + } else if (item.GetKey() == "logtype") { + logType = item.GetValue(); + } else if (item.GetKey() == "file") { + file = item.GetValue(); + } else { + auto res = data.MutableExtraFields()->AddItems(); + res->SetKey(item.GetKey()); + res->SetValue(item.GetValue()); + } + } +} + +namespace NGRpcProxy { + +using namespace NPersQueue; + +static const ui32 MAX_RESERVE_REQUESTS_INFLIGHT = 5; + +static const ui32 MAX_BYTES_INFLIGHT = 1_MB; +static const TDuration SOURCEID_UPDATE_PERIOD = TDuration::Hours(1); + +//TODO: add here tracking of bytes in/out + +TWriteSessionActor::TWriteSessionActor(IWriteSessionHandlerRef handler, const ui64 cookie, const TActorId& schemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const TString& localDC, + const TMaybe<TString> clientDC) + : Handler(handler) + , State(ES_CREATED) + , SchemeCache(schemeCache) + , PeerName("") + , Cookie(cookie) + , Partition(0) + , PreferedPartition(Max<ui32>()) + , NumReserveBytesRequests(0) + , WritesDone(false) + , Counters(counters) + , BytesInflight_(0) + , BytesInflightTotal_(0) + , NextRequestInited(false) + , NextRequestCookie(0) + , Token(nullptr) + , ACLCheckInProgress(true) + , FirstACLCheck(true) + , ForceACLCheck(false) + , RequestNotChecked(true) + , LastACLCheckTimestamp(TInstant::Zero()) + , LogSessionDeadline(TInstant::Zero()) + , BalancerTabletId(0) + , PipeToBalancer() + , LocalDC(localDC) + , ClientDC(clientDC ? *clientDC : "other") + , LastSourceIdUpdate(TInstant::Zero()) + , SourceIdCreateTime(0) + , SourceIdUpdatesInflight(0) + +{ + Y_ASSERT(Handler); +} + +TWriteSessionActor::~TWriteSessionActor() = default; + + +void TWriteSessionActor::Bootstrap(const TActorContext& ctx) { + if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + ++(*GetServiceCounters(Counters, "pqproxy|writeSession")->GetCounter("SessionsCreatedTotal", true)); + } + Become(&TThis::StateFunc); + const auto& pqConfig = AppData(ctx)->PQConfig; + SrcIdTableGeneration = pqConfig.GetTopicsAreFirstClassCitizen() ? ESourceIdTableGeneration::PartitionMapping + : ESourceIdTableGeneration::SrcIdMeta2; + + Database = NKikimr::NPQ::GetDatabaseFromConfig(pqConfig); + const auto& root = pqConfig.GetRoot(); + SelectSourceIdQuery = GetSelectSourceIdQuery(root, SrcIdTableGeneration); + UpdateSourceIdQuery = GetUpdateSourceIdQuery(root, SrcIdTableGeneration); + ConverterFactory = MakeHolder<NPersQueue::TTopicNamesConverterFactory>( + pqConfig, LocalDC + ); + StartTime = ctx.Now(); +} + + +void TWriteSessionActor::Die(const TActorContext& ctx) { + if (State == ES_DYING) + return; + if (Writer) + ctx.Send(Writer, new TEvents::TEvPoisonPill()); + + if (PipeToBalancer) + NTabletPipe::CloseClient(ctx, PipeToBalancer); + + if (SessionsActive) { + SessionsActive.Dec(); + BytesInflight.Dec(BytesInflight_); + BytesInflightTotal.Dec(BytesInflightTotal_); + } + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session cookie: " << Cookie << " sessionId: " << OwnerCookie << " is DEAD"); + + TryCloseSession(ctx); + + if (!Handler->IsShuttingDown()) + Handler->Finish(); + + if (State == ES_WAIT_SESSION) { // final die will be done later, on session discover + State = ES_DYING; + return; + } + + State = ES_DYING; + + TryCloseSession(ctx); + + TActorBootstrapped<TWriteSessionActor>::Die(ctx); +} + +void TWriteSessionActor::TryCloseSession(const TActorContext& ctx) { + if (KqpSessionId) { + auto ev = MakeHolder<NKqp::TEvKqp::TEvCloseSessionRequest>(); + ev->Record.MutableRequest()->SetSessionId(KqpSessionId); + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); + KqpSessionId = ""; + } +} + +void TWriteSessionActor::CheckFinish(const TActorContext& ctx) { + if (!WritesDone) + return; + if (State != ES_INITED) { + CloseSession("out of order Writes done before initialization", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + if (Writes.empty() && FormedWrites.empty() && SentMessages.empty()) { + CloseSession("", NPersQueue::NErrorCode::OK, ctx); + return; + } +} + +void TWriteSessionActor::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext& ctx) { + WritesDone = true; + CheckFinish(ctx); +} + +void TWriteSessionActor::CheckACL(const TActorContext& ctx) { + Y_ABORT_UNLESS(ACLCheckInProgress); + Y_ABORT_UNLESS(SecurityObject); + NACLib::EAccessRights rights = NACLib::EAccessRights::UpdateRow; + if (!AppData(ctx)->PQConfig.GetCheckACL() || SecurityObject->CheckAccess(rights, *Token)) { + ACLCheckInProgress = false; + if (FirstACLCheck) { + FirstACLCheck = false; + DiscoverPartition(ctx); + } + } else { + TString errorReason = Sprintf("access to topic '%s' denied for '%s' due to 'no WriteTopic rights', Marker# PQ1125", + DiscoveryConverter->GetPrintableString().c_str(), + Token->GetUserSID().c_str()); + CloseSession(errorReason, NPersQueue::NErrorCode::ACCESS_DENIED, ctx); + } +} + +void TWriteSessionActor::Handle(TEvPQProxy::TEvWriteInit::TPtr& ev, const TActorContext& ctx) { + THolder<TEvPQProxy::TEvWriteInit> event(ev->Release()); + + if (State != ES_CREATED) { + //answer error + CloseSession("got second init request", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + const auto& init = event->Request.GetInit(); + + if (init.GetTopic().empty() || init.GetSourceId().empty()) { + CloseSession("no topic or SourceId in init request", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + if (init.GetProxyCookie() != ctx.SelfID.NodeId() && init.GetProxyCookie() != MAGIC_COOKIE_VALUE) { + CloseSession("you must perform ChooseProxy request at first and go to ProxyName server with ProxyCookie", + NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + //1. Database - !(Root or empty) (Need to bring root DB(s) list to PQConfig) - ONLY search modern path /Database/Path + //2. No database. Try parse and resolve account to database. If possible, try search this path. + //3. Fallback from 2 - legacy mode. + + DiscoveryConverter = ConverterFactory->MakeDiscoveryConverter(init.GetTopic(), true, LocalDC, Database); + if (!DiscoveryConverter->IsValid()) { + CloseSession( + TStringBuilder() << "incorrect topic \"" << DiscoveryConverter->GetOriginalTopic() + << "\": " << DiscoveryConverter->GetReason(), + NPersQueue::NErrorCode::BAD_REQUEST, + ctx + ); + return; + } + PeerName = event->PeerName; + if (!event->Database.empty()) { + Database = event->Database; + } + + SourceId = init.GetSourceId(); + //TODO: check that sourceId does not have characters '"\_% - espace them on client may be? + + Auth = event->Request.GetCredentials(); + event->Request.ClearCredentials(); + Y_PROTOBUF_SUPPRESS_NODISCARD Auth.SerializeToString(&AuthStr); + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session request cookie: " << Cookie << " " << init << ", Database: '" << Database << "' from " << PeerName); + UserAgent = init.GetVersion(); + LogSession(ctx); + + auto* request = new TEvDescribeTopicsRequest({DiscoveryConverter}); + //TODO: GetNode for /Root/PQ then describe from balancer + ctx.Send(SchemeCache, request); + State = ES_WAIT_SCHEME; + InitRequest = init; + PreferedPartition = init.GetPartitionGroup() > 0 ? init.GetPartitionGroup() - 1 : Max<ui32>(); +} + +void TWriteSessionActor::InitAfterDiscovery(const TActorContext& ctx) { + try { + EncodedSourceId = NSourceIdEncoding::EncodeSrcId(FullConverter->GetTopicForSrcIdHash(), SourceId, + SrcIdTableGeneration); + } catch (yexception& e) { + CloseSession(TStringBuilder() << "incorrect sourceId \"" << SourceId << "\": " << e.what(), NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + InitMeta = GetInitialDataChunk(InitRequest, FullConverter->GetClientsideName(), PeerName); + + auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); + Aggr = {{{{"Account", FullConverter->GetAccount()}}, {"total"}}}; + + SLIErrors = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsError"}, true, "sensor", false); + SLITotal = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsTotal"}, true, "sensor", false); + SLITotal.Inc(); +} + + +void TWriteSessionActor::SetupCounters() +{ + if (SessionsCreated) { + return; + } + + //now topic is checked, can create group for real topic, not garbage + auto subGroup = GetServiceCounters(Counters, "pqproxy|writeSession"); + Y_ABORT_UNLESS(FullConverter); + auto aggr = GetLabels(FullConverter); + + BytesInflight = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"BytesInflight"}, false); + SessionsWithoutAuth = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"WithoutAuth"}, true); + BytesInflightTotal = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"BytesInflightTotal"}, false); + SessionsCreated = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"SessionsCreated"}, true); + SessionsActive = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"SessionsActive"}, false); + Errors = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"Errors"}, true); + + CodecCounters.push_back(NKikimr::NPQ::TMultiCounter(subGroup, aggr, {{"codec", "user"}}, {"MessagesWrittenByCodec"}, true)); + + auto allNames = GetEnumAllCppNames<NPersQueueCommon::ECodec>(); + allNames.pop_back(); + allNames.pop_back(); + for (auto &name : allNames) { + auto nm = to_lower(name).substr(18); + CodecCounters.push_back(NKikimr::NPQ::TMultiCounter(subGroup, aggr, {{"codec", nm}}, {"MessagesWrittenByCodec"}, true)); + } + + SessionsCreated.Inc(); + SessionsActive.Inc(); +} + + +void TWriteSessionActor::SetupCounters(const TString& cloudId, const TString& dbId, const TString& dbPath, + bool isServerless, const TString& folderId) +{ + if (SessionsCreated) { + return; + } + + //now topic is checked, can create group for real topic, not garbage + auto subGroup = GetCountersForTopic(Counters, isServerless); + Y_ABORT_UNLESS(FullConverter); + auto subgroups = GetSubgroupsForTopic(FullConverter, cloudId, dbId, dbPath, folderId); + + SessionsCreated = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_write.sessions_created"}, true, "name"); + SessionsActive = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_write.sessions_active_count"}, false, "name"); + Errors = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_write.errors"}, true, "name"); + + SessionsCreated.Inc(); + SessionsActive.Inc(); +} + + +void TWriteSessionActor::Handle(TEvDescribeTopicsResponse::TPtr& ev, const TActorContext& ctx) { + Y_ABORT_UNLESS(State == ES_WAIT_SCHEME || State == ES_INITED); + auto& res = ev->Get()->Result; + Y_ABORT_UNLESS(res->ResultSet.size() == 1); + + auto& entry = res->ResultSet[0]; + TString errorReason; + + auto& path = entry.Path; + auto& topic = ev->Get()->TopicsRequested[0]; + switch (entry.Status) { + case TSchemeCacheNavigate::EStatus::RootUnknown: { + errorReason = Sprintf("path '%s' has incorrect root prefix, Marker# PQ14", JoinPath(path).c_str()); + CloseSession(errorReason, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx); + return; + } + case TSchemeCacheNavigate::EStatus::PathErrorUnknown: { + errorReason = Sprintf("no path '%s', Marker# PQ151", JoinPath(path).c_str()); + CloseSession(errorReason, NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx); + return; + } + case TSchemeCacheNavigate::EStatus::Ok: + break; + default: { + errorReason = Sprintf("topic '%s' describe error, Status# %s, Marker# PQ1", path.back().c_str(), + ToString(entry.Status).c_str()); + CloseSession(errorReason, NPersQueue::NErrorCode::ERROR, ctx); + break; + } + } + if (!entry.PQGroupInfo) { + + errorReason = Sprintf("topic '%s' describe error, reason - could not retrieve topic metadata, Marker# PQ99", + topic->GetPrintableString().c_str()); + CloseSession(errorReason, NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + PQInfo = entry.PQGroupInfo; + const auto& description = PQInfo->Description; + //const TString topicName = description.GetName(); + + if (entry.Kind != TSchemeCacheNavigate::EKind::KindTopic) { + errorReason = Sprintf("item '%s' is not a topic, Marker# PQ13", DiscoveryConverter->GetPrintableString().c_str()); + CloseSession(errorReason, NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + if (!DiscoveryConverter->IsValid()) { + errorReason = Sprintf("Internal server error with topic '%s', Marker# PQ503", DiscoveryConverter->GetPrintableString().c_str()); + CloseSession(errorReason, NPersQueue::NErrorCode::ERROR, ctx); + return; + } + FullConverter = DiscoveryConverter->UpgradeToFullConverter(description.GetPQTabletConfig(), + AppData(ctx)->PQConfig.GetTestDatabaseRoot()); + InitAfterDiscovery(ctx); + SecurityObject = entry.SecurityObject; + + Y_ABORT_UNLESS(description.PartitionsSize() > 0); + + for (ui32 i = 0; i < description.PartitionsSize(); ++i) { + const auto& pi = description.GetPartitions(i); + PartitionToTablet[pi.GetPartitionId()] = pi.GetTabletId(); + } + BalancerTabletId = description.GetBalancerTabletID(); + DatabaseId = description.GetPQTabletConfig().GetYdbDatabaseId(); + FolderId = description.GetPQTabletConfig().GetYcFolderId(); + + if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + const auto& tabletConfig = description.GetPQTabletConfig(); + SetupCounters(tabletConfig.GetYcCloudId(), tabletConfig.GetYdbDatabaseId(), + tabletConfig.GetYdbDatabasePath(), entry.DomainInfo->IsServerless(), + tabletConfig.GetYcFolderId()); + } else { + SetupCounters(); + } + + if (!PipeToBalancer) { + NTabletPipe::TClientConfig clientConfig; + clientConfig.RetryPolicy = { + .RetryLimitCount = 6, + .MinRetryTime = TDuration::MilliSeconds(10), + .MaxRetryTime = TDuration::MilliSeconds(100), + .BackoffMultiplier = 2, + .DoFirstRetryInstantly = true + }; + PipeToBalancer = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, BalancerTabletId, clientConfig)); + } + + if (Auth.GetCredentialsCase() == NPersQueueCommon::TCredentials::CREDENTIALS_NOT_SET) { + //ACLCheckInProgress is still true - no recheck will be done + LOG_WARN_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session without AuthInfo : " << DiscoveryConverter->GetPrintableString() + << " sourceId " << SourceId << " from " << PeerName); + SessionsWithoutAuth.Inc(); + if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { + CloseSession("Unauthenticated access is forbidden, please provide credentials", NPersQueue::NErrorCode::ACCESS_DENIED, ctx); + return; + } + if (FirstACLCheck) { + FirstACLCheck = false; + DiscoverPartition(ctx); + return; + } + } + + InitCheckACL(ctx); +} + +void TWriteSessionActor::InitCheckACL(const TActorContext& ctx) { + + Y_ABORT_UNLESS(ACLCheckInProgress); + + TString ticket; + switch (Auth.GetCredentialsCase()) { + case NPersQueueCommon::TCredentials::kTvmServiceTicket: + ticket = Auth.GetTvmServiceTicket(); + break; + case NPersQueueCommon::TCredentials::kOauthToken: + ticket = Auth.GetOauthToken(); + break; + default: + CloseSession("Uknown Credentials case", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + auto entries = NKikimr::NGRpcProxy::V1::GetTicketParserEntries(DatabaseId, FolderId); + ctx.Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ + .Database = Database, + .Ticket = ticket, + .PeerName = PeerName, + .Entries = entries + })); +} + +void TWriteSessionActor::Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev, const TActorContext& ctx) { + Y_ABORT_UNLESS(ACLCheckInProgress); + TString ticket = ev->Get()->Ticket; + TString maskedTicket = ticket.size() > 5 ? (ticket.substr(0, 5) + "***" + ticket.substr(ticket.size() - 5)) : "***"; + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "CheckACL ticket " << maskedTicket << " got result from TICKET_PARSER response: error: " + << ev->Get()->Error << " user: " + << (ev->Get()->Error.empty() ? ev->Get()->Token->GetUserSID() : "")); + + if (!ev->Get()->Error.empty()) { + CloseSession(TStringBuilder() << "Ticket parsing error: " << ev->Get()->Error, NPersQueue::NErrorCode::ACCESS_DENIED, ctx); + return; + } + Token = ev->Get()->Token; + + + Y_ABORT_UNLESS(ACLCheckInProgress); + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session cookie: " << Cookie << " sessionId: " << OwnerCookie << " describe result for acl check"); + CheckACL(ctx); +} + + +void TWriteSessionActor::DiscoverPartition(const NActors::TActorContext& ctx) { + const auto &pqConfig = AppData(ctx)->PQConfig; + if (pqConfig.GetTopicsAreFirstClassCitizen()) { + if (pqConfig.GetUseSrcIdMetaMappingInFirstClass()) { + return SendCreateManagerRequest(ctx); + } + auto partitionId = PreferedPartition < Max<ui32>() + ? PreferedPartition + : NDataStreams::V1::CalculateShardFromSrcId(SourceId, PartitionToTablet.size()); + + ProceedPartition(partitionId, ctx); + return; + } + else { + StartSession(ctx); + } +} + +TString TWriteSessionActor::GetDatabaseName(const NActors::TActorContext& ctx) { + switch (SrcIdTableGeneration) { + case ESourceIdTableGeneration::SrcIdMeta2: + return NKikimr::NPQ::GetDatabaseFromConfig(AppData(ctx)->PQConfig); + case ESourceIdTableGeneration::PartitionMapping: + return AppData(ctx)->TenantName; + } +} + +void TWriteSessionActor::StartSession(const NActors::TActorContext& ctx) { + + auto ev = MakeHolder<NKqp::TEvKqp::TEvCreateSessionRequest>(); + ev->Record.MutableRequest()->SetDatabase(GetDatabaseName(ctx)); + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); + + State = ES_WAIT_SESSION; +} + +void TWriteSessionActor::SendCreateManagerRequest(const TActorContext& ctx) { + ctx.Send( + NMetadata::NProvider::MakeServiceId(ctx.SelfID.NodeId()), + new NMetadata::NProvider::TEvPrepareManager(V1::TSrcIdMetaInitManager::GetInstant()) + ); +} + +void TWriteSessionActor::Handle( + NMetadata::NProvider::TEvManagerPrepared::TPtr&, const TActorContext& ctx +) { + StartSession(ctx); +} + +void TWriteSessionActor::Handle(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr &ev, const NActors::TActorContext& ctx) +{ + Y_ABORT_UNLESS(State == ES_WAIT_SESSION || State == ES_DYING); + + const auto& record = ev->Get()->Record; + + KqpSessionId = record.GetResponse().GetSessionId(); + + if (State == ES_DYING) { + TryCloseSession(ctx); + TActorBootstrapped<TWriteSessionActor>::Die(ctx); + return; + } + + State = ES_WAIT_TABLE_REQUEST_1; + + if (record.GetYdbStatus() != Ydb::StatusIds::SUCCESS) { + TStringBuilder errorReason; + errorReason << "kqp error Marker# PQ53 : " << record; + CloseSession(errorReason, NPersQueue::NErrorCode::ERROR, ctx); + return; + } + + KqpSessionId = record.GetResponse().GetSessionId(); + Y_ABORT_UNLESS(!KqpSessionId.empty()); + + //read from DS + // Hash was always valid here, so new and old are the same + //topic was like "rt3.dc--account--topic" + SendSelectPartitionRequest(FullConverter->GetTopicForSrcId(), ctx); +} + + +void TWriteSessionActor::SendSelectPartitionRequest(const TString &topic, const NActors::TActorContext &ctx) { + auto ev = MakeHolder<NKqp::TEvKqp::TEvQueryRequest>(); + ev->Record.MutableRequest()->SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); + ev->Record.MutableRequest()->SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); + ev->Record.MutableRequest()->SetSessionId(KqpSessionId); + ev->Record.MutableRequest()->SetQuery(SelectSourceIdQuery); + ev->Record.MutableRequest()->SetDatabase(GetDatabaseName(ctx)); + ev->Record.MutableRequest()->MutableTxControl()->set_commit_tx(false); + ev->Record.MutableRequest()->MutableTxControl()->mutable_begin_tx()->mutable_serializable_read_write(); + ev->Record.MutableRequest()->MutableQueryCachePolicy()->set_keep_in_cache(true); + + NYdb::TParamsBuilder paramsBuilder = NYdb::TParamsBuilder(); + + SetHashToTParamsBuilder(paramsBuilder, EncodedSourceId); + + paramsBuilder + .AddParam("$Topic") + .Utf8(topic) + .Build() + .AddParam("$SourceId") + .Utf8(EncodedSourceId.EscapedSourceId) + .Build(); + + NYdb::TParams params = paramsBuilder.Build(); + + ev->Record.MutableRequest()->MutableYdbParameters()->swap(*(NYdb::TProtoAccessor::GetProtoMapPtr(params))); + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); + SelectReqsInflight++; +} + + +void TWriteSessionActor::UpdatePartition(const TActorContext& ctx) { + Y_ABORT_UNLESS(State == ES_WAIT_TABLE_REQUEST_1 || State == ES_WAIT_NEXT_PARTITION); + //Previously Topic contained legacy name with DC (rt3.dc1--acc--topic) + auto ev = MakeUpdateSourceIdMetadataRequest(FullConverter->GetTopicForSrcId(), ctx); + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); + SourceIdUpdatesInflight++; + + State = ES_WAIT_TABLE_REQUEST_2; +} + +void TWriteSessionActor::RequestNextPartition(const TActorContext& ctx) { + Y_ABORT_UNLESS(State == ES_WAIT_TABLE_REQUEST_1); + State = ES_WAIT_NEXT_PARTITION; + THolder<TEvPersQueue::TEvGetPartitionIdForWrite> x(new TEvPersQueue::TEvGetPartitionIdForWrite); + Y_ABORT_UNLESS(PipeToBalancer); + + NTabletPipe::SendData(ctx, PipeToBalancer, x.Release()); +} + +void TWriteSessionActor::Handle(TEvPersQueue::TEvGetPartitionIdForWriteResponse::TPtr& ev, const TActorContext& ctx) { + Y_ABORT_UNLESS(State == ES_WAIT_NEXT_PARTITION); + Partition = ev->Get()->Record.GetPartitionId(); + UpdatePartition(ctx); +} + +void TWriteSessionActor::Handle(NKqp::TEvKqp::TEvQueryResponse::TPtr &ev, const TActorContext &ctx) { + auto& record = ev->Get()->Record.GetRef(); + const auto& pqConfig = AppData(ctx)->PQConfig; + + if (record.GetYdbStatus() == Ydb::StatusIds::ABORTED) { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session cookie: " << Cookie << " sessionId: " << OwnerCookie << " sourceID " + << SourceId << " escaped " << EncodedSourceId.EscapedSourceId << " discover partition race, retrying"); + DiscoverPartition(ctx); + return; + } + + if (record.GetYdbStatus() != Ydb::StatusIds::SUCCESS) { + TStringBuilder errorReason; + errorReason << "kqp error Marker# PQ50 : " << record; + if (State == EState::ES_INITED) { + LOG_WARN_S(ctx, NKikimrServices::PQ_WRITE_PROXY, errorReason); + SourceIdUpdatesInflight--; + } else { + CloseSession(errorReason, NPersQueue::NErrorCode::ERROR, ctx); + } + return; + } + + if (State == EState::ES_WAIT_TABLE_REQUEST_1) { + SelectReqsInflight--; + auto& t = record.GetResponse().GetResults(0).GetValue().GetStruct(0); + + TxId = record.GetResponse().GetTxMeta().id(); + + if (t.ListSize() != 0) { + auto& tt = t.GetList(0).GetStruct(0); + if (tt.HasOptional() && tt.GetOptional().HasUint32()) { //already got partition + auto accessTime = t.GetList(0).GetStruct(2).GetOptional().GetUint64(); + if (accessTime > MaxSrcIdAccessTime) { // AccessTime + Partition = tt.GetOptional().GetUint32(); + PartitionFound = true; + SourceIdCreateTime = t.GetList(0).GetStruct(1).GetOptional().GetUint64(); + MaxSrcIdAccessTime = accessTime; + } + } + } + if (SelectReqsInflight != 0) { + return; + } + if (SourceIdCreateTime == 0) { + SourceIdCreateTime = TInstant::Now().MilliSeconds(); + } + if (PartitionFound && PreferedPartition < Max<ui32>() && Partition != PreferedPartition) { + CloseSession(TStringBuilder() << "SourceId " << SourceId << " is already bound to PartitionGroup " + << (Partition + 1) << ", but client provided " << (PreferedPartition + 1) + << ". SourceId->PartitionGroup binding cannot be changed, either use another SourceId, specify PartitionGroup " + << (Partition + 1) << ", or do not specify PartitionGroup at all.", + NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session cookie: " << Cookie << " sessionId: " << OwnerCookie << " sourceID " + << SourceId << " escaped " << EncodedSourceId.EscapedSourceId << " hash " << EncodedSourceId.Hash << " partition " << Partition << " partitions " + << PartitionToTablet.size() << "(" << EncodedSourceId.Hash % PartitionToTablet.size() << ") create " << SourceIdCreateTime << " result " << t); + + if (!PartitionFound) { + auto partition = V1::GetPartitionFromConfigOptions(PreferedPartition, EncodedSourceId, + PartitionToTablet.size(), + pqConfig.GetTopicsAreFirstClassCitizen(), + pqConfig.GetRoundRobinPartitionMapping()); + if (partition.Defined()) { + PartitionFound = true; + Partition = *partition; + } + } + if (PartitionFound) { + UpdatePartition(ctx); + } else { + RequestNextPartition(ctx); + } + return; + } else if (State == EState::ES_WAIT_TABLE_REQUEST_2) { + Y_ABORT_UNLESS(SourceIdUpdatesInflight > 0); + SourceIdUpdatesInflight--; + if (SourceIdUpdatesInflight == 0) { + LastSourceIdUpdate = ctx.Now(); + TryCloseSession(ctx); + ProceedPartition(Partition, ctx); + } + } else if (State == EState::ES_INITED) { + Y_ABORT_UNLESS(SourceIdUpdatesInflight > 0); + SourceIdUpdatesInflight--; + if (SourceIdUpdatesInflight == 0) { + LastSourceIdUpdate = ctx.Now(); + } + } else { + Y_ABORT("Wrong state"); + } +} + +THolder<NKqp::TEvKqp::TEvQueryRequest> TWriteSessionActor::MakeUpdateSourceIdMetadataRequest( + const TString& topic, const TActorContext& ctx +) { + auto ev = MakeHolder<NKqp::TEvKqp::TEvQueryRequest>(); + + ev->Record.MutableRequest()->SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); + ev->Record.MutableRequest()->SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); + ev->Record.MutableRequest()->SetQuery(UpdateSourceIdQuery); + ev->Record.MutableRequest()->SetDatabase(GetDatabaseName(ctx)); + if (KqpSessionId) { + ev->Record.MutableRequest()->SetSessionId(KqpSessionId); + } + ev->Record.MutableRequest()->MutableTxControl()->set_commit_tx(true); + if (TxId) { + ev->Record.MutableRequest()->MutableTxControl()->set_tx_id(TxId); + TxId = ""; + } else { + ev->Record.MutableRequest()->MutableTxControl()->mutable_begin_tx()->mutable_serializable_read_write(); + } + ev->Record.MutableRequest()->MutableQueryCachePolicy()->set_keep_in_cache(true); + + NYdb::TParamsBuilder paramsBuilder = NYdb::TParamsBuilder(); + + SetHashToTParamsBuilder(paramsBuilder, EncodedSourceId); + + paramsBuilder + .AddParam("$Topic") //Previously Topic contained legacy name with DC (rt3.dc1--acc--topic) + .Utf8(topic) + .Build() + .AddParam("$SourceId") + .Utf8(EncodedSourceId.EscapedSourceId) + .Build() + .AddParam("$CreateTime") + .Uint64(SourceIdCreateTime) + .Build() + .AddParam("$AccessTime") + .Uint64(TInstant::Now().MilliSeconds()) + .Build() + .AddParam("$Partition") + .Uint32(Partition) + .Build(); + + NYdb::TParams params = paramsBuilder.Build(); + + ev->Record.MutableRequest()->MutableYdbParameters()->swap(*(NYdb::TProtoAccessor::GetProtoMapPtr(params))); + return ev; +} + + +void TWriteSessionActor::Handle(NKqp::TEvKqp::TEvProcessResponse::TPtr &ev, const TActorContext &ctx) { + auto& record = ev->Get()->Record; + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session cookie: " << Cookie << " sessionId: " << OwnerCookie << " sourceID " + << SourceId << " escaped " << EncodedSourceId.EscapedSourceId << " discover partition error - " << record); + CloseSession("Internal error on discovering partition", NPersQueue::NErrorCode::ERROR, ctx); +} + + +void TWriteSessionActor::ProceedPartition(const ui32 partition, const TActorContext& ctx) { + Partition = partition; + auto it = PartitionToTablet.find(Partition); + + ui64 tabletId = it != PartitionToTablet.end() ? it->second : 0; + + if (!tabletId) { + CloseSession( + Sprintf("no partition %u in topic '%s', Marker# PQ4", Partition, DiscoveryConverter->GetPrintableString().c_str()), + NPersQueue::NErrorCode::UNKNOWN_TOPIC, ctx + ); + return; + } + + Writer = ctx.RegisterWithSameMailbox(NPQ::CreatePartitionWriter(ctx.SelfID, {/* topicPath */}, tabletId, Partition, { /* expectedGeneration */}, SourceId)); + State = ES_WAIT_WRITER_INIT; + + ui32 border = AppData(ctx)->PQConfig.GetWriteInitLatencyBigMs(); + auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); + + InitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "WriteInit", border, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); + SLIBigLatency = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"RequestsBigLatency"}, true, "sensor", false); + + ui32 initDurationMs = (ctx.Now() - StartTime).MilliSeconds(); + InitLatency.IncFor(initDurationMs, 1); + if (initDurationMs >= border) { + SLIBigLatency.Inc(); + } +} + +void TWriteSessionActor::CloseSession(const TString& errorReason, const NPersQueue::NErrorCode::EErrorCode errorCode, const NActors::TActorContext& ctx) { + if (errorCode != NPersQueue::NErrorCode::OK) { + if (InternalErrorCode(errorCode)) { + SLIErrors.Inc(); + } + + if (Errors) { + Errors.Inc(); + } else if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + ++(*GetServiceCounters(Counters, "pqproxy|writeSession")->GetCounter("Errors", true)); + } + + TWriteResponse result; + + auto error = result.MutableError(); + error->SetDescription(errorReason); + error->SetCode(errorCode); + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, + "session error cookie: " << Cookie << " reason: \"" << errorReason << "\" code: " + << EErrorCode_Name(errorCode) << " sessionId: " << OwnerCookie); + + Handler->Reply(result); + } else { + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session closed cookie: " << Cookie << " sessionId: " << OwnerCookie); + } + + Die(ctx); +} + +void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvInitResult::TPtr& ev, const TActorContext& ctx) { + if (State != ES_WAIT_WRITER_INIT) { + return CloseSession("got init result but not wait for it", NPersQueue::NErrorCode::ERROR, ctx); + } + + const auto& result = *ev->Get(); + if (!result.IsSuccess()) { + const auto& error = result.GetError(); + if (error.Response.HasErrorCode()) { + return CloseSession("status is not ok: " + error.Response.GetErrorReason(), error.Response.GetErrorCode(), ctx); + } else { + return CloseSession("error at writer init: " + error.Reason, NPersQueue::NErrorCode::ERROR, ctx); + } + } + + OwnerCookie = result.GetResult().OwnerCookie; + const auto& maxSeqNo = result.GetResult().SourceIdInfo.GetSeqNo(); + + TWriteResponse response; + auto init = response.MutableInit(); + init->SetSessionId(EscapeC(OwnerCookie)); + init->SetMaxSeqNo(maxSeqNo); + init->SetPartition(Partition); + Y_ABORT_UNLESS(FullConverter); + init->SetTopic(FullConverter->GetClientsideName()); + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session inited cookie: " << Cookie << " partition: " << Partition + << " MaxSeqNo: " << maxSeqNo << " sessionId: " << OwnerCookie); + + Handler->Reply(response); + + State = ES_INITED; + + ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); + + //init completed; wait for first data chunk + NextRequestInited = true; + Handler->ReadyForNextRead(); +} + +void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvWriteAccepted::TPtr& ev, const TActorContext& ctx) { + if (State != ES_INITED) { + return CloseSession("got write permission but not wait for it", NPersQueue::NErrorCode::ERROR, ctx); + } + + Y_ABORT_UNLESS(!FormedWrites.empty()); + TWriteRequestBatchInfo::TPtr writeRequest = std::move(FormedWrites.front()); + + if (ev->Get()->Cookie != writeRequest->Cookie) { + return CloseSession("out of order reserve bytes response from server, may be previous is lost", NPersQueue::NErrorCode::ERROR, ctx); + } + + FormedWrites.pop_front(); + + ui64 diff = writeRequest->ByteSize; + + SentMessages.emplace_back(std::move(writeRequest)); + + BytesInflight_ -= diff; + BytesInflight.Dec(diff); + + if (!NextRequestInited && BytesInflight_ < MAX_BYTES_INFLIGHT) { //allow only one big request to be readed but not sended + NextRequestInited = true; + Handler->ReadyForNextRead(); + } + + --NumReserveBytesRequests; + if (!Writes.empty()) + GenerateNextWriteRequest(ctx); +} + +void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvWriteResponse::TPtr& ev, const TActorContext& ctx) { + if (State != ES_INITED) { + return CloseSession("got write response but not wait for it", NPersQueue::NErrorCode::ERROR, ctx); + } + + const auto& result = *ev->Get(); + if (!result.IsSuccess()) { + const auto& record = result.Record; + if (record.HasErrorCode()) { + return CloseSession("status is not ok: " + record.GetErrorReason(), record.GetErrorCode(), ctx); + } else { + return CloseSession("error at write: " + result.GetError().Reason, NPersQueue::NErrorCode::ERROR, ctx); + } + } + + const auto& resp = result.Record.GetPartitionResponse(); + + if (SentMessages.empty()) { + CloseSession("got too many replies from server, internal error", NPersQueue::NErrorCode::ERROR, ctx); + return; + } + + TWriteRequestBatchInfo::TPtr writeRequest = std::move(SentMessages.front()); + SentMessages.pop_front(); + + if (resp.GetCookie() != writeRequest->Cookie) { + return CloseSession("out of order write response from server, may be previous is lost", NPersQueue::NErrorCode::ERROR, ctx); + } + + auto addAck = [](const TPersQueuePartitionResponse::TCmdWriteResult& res, TWriteResponse::TAck* ack, TWriteResponse::TStat* stat) { + ack->SetSeqNo(res.GetSeqNo()); + ack->SetOffset(res.GetOffset()); + ack->SetAlreadyWritten(res.GetAlreadyWritten()); + + stat->SetTotalTimeInPartitionQueueMs( + Max(res.GetTotalTimeInPartitionQueueMs(), stat->GetTotalTimeInPartitionQueueMs())); + stat->SetPartitionQuotedTimeMs( + Max(res.GetPartitionQuotedTimeMs(), stat->GetPartitionQuotedTimeMs())); + stat->SetTopicQuotedTimeMs( + Max(res.GetTopicQuotedTimeMs(), stat->GetTopicQuotedTimeMs())); + stat->SetWriteTimeMs( + Max(res.GetWriteTimeMs(), stat->GetWriteTimeMs())); + }; + + size_t cmdWriteResultIndex = 0; + for (const auto& userWriteRequest : writeRequest->UserWriteRequests) { + TWriteResponse result; + if (userWriteRequest->Request.HasDataBatch()) { + if (resp.CmdWriteResultSize() - cmdWriteResultIndex < userWriteRequest->Request.GetDataBatch().DataSize()) { + CloseSession("too less responses from server", NPersQueue::NErrorCode::ERROR, ctx); + return; + } + for (size_t endIndex = cmdWriteResultIndex + userWriteRequest->Request.GetDataBatch().DataSize(); cmdWriteResultIndex < endIndex; ++cmdWriteResultIndex) { + addAck(resp.GetCmdWriteResult(cmdWriteResultIndex), + result.MutableAckBatch()->AddAck(), + result.MutableAckBatch()->MutableStat()); + } + } else { + Y_ABORT_UNLESS(userWriteRequest->Request.HasData()); + if (cmdWriteResultIndex >= resp.CmdWriteResultSize()) { + CloseSession("too less responses from server", NPersQueue::NErrorCode::ERROR, ctx); + return; + } + auto* ack = result.MutableAck(); + addAck(resp.GetCmdWriteResult(cmdWriteResultIndex), ack, ack->MutableStat()); + ++cmdWriteResultIndex; + } + Handler->Reply(result); + } + + ui64 diff = writeRequest->ByteSize; + + BytesInflightTotal_ -= diff; + BytesInflightTotal.Dec(diff); + + CheckFinish(ctx); +} + +void TWriteSessionActor::Handle(NPQ::TEvPartitionWriter::TEvDisconnected::TPtr&, const TActorContext& ctx) { + CloseSession("pipe to partition's tablet is dead", NPersQueue::NErrorCode::ERROR, ctx); +} + +void TWriteSessionActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + TEvTabletPipe::TEvClientConnected *msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { + CloseSession(TStringBuilder() << "pipe to tablet is dead " << msg->TabletId, NPersQueue::NErrorCode::ERROR, ctx); + return; + } +} + +void TWriteSessionActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + CloseSession(TStringBuilder() << "pipe to tablet is dead " << ev->Get()->TabletId, NPersQueue::NErrorCode::ERROR, ctx); +} + +void TWriteSessionActor::GenerateNextWriteRequest(const TActorContext& ctx) { + TWriteRequestBatchInfo::TPtr writeRequest = new TWriteRequestBatchInfo(); + + auto ev = MakeHolder<NPQ::TEvPartitionWriter::TEvWriteRequest>(++NextRequestCookie); + NKikimrClient::TPersQueueRequest& request = ev->Record; + + writeRequest->UserWriteRequests = std::move(Writes); + Writes.clear(); + + i64 diff = 0; + auto addData = [&](const TWriteRequest::TData& data) { + auto w = request.MutablePartitionRequest()->AddCmdWrite(); + w->SetData(GetSerializedData(InitMeta, data)); + w->SetClientDC(ClientDC); + w->SetSeqNo(data.GetSeqNo()); + w->SetSourceId(NPQ::NSourceIdEncoding::EncodeSimple(SourceId)); // EncodeSimple is needed for compatibility with LB + //TODO: add in SourceID clientId when TVM will be ready + w->SetCreateTimeMS(data.GetCreateTimeMs()); + w->SetUncompressedSize(data.GetUncompressedSize()); + w->SetIgnoreQuotaDeadline(true); + }; + + for (const auto& write : writeRequest->UserWriteRequests) { + diff -= write->Request.ByteSize(); + if (write->Request.HasDataBatch()) { + for (const TWriteRequest::TData& data : write->Request.GetDataBatch().GetData()) { + addData(data); + } + } else { // single data + Y_ABORT_UNLESS(write->Request.HasData()); + addData(write->Request.GetData()); + } + } + + writeRequest->Cookie = request.GetPartitionRequest().GetCookie(); + + Y_ABORT_UNLESS(-diff <= (i64)BytesInflight_); + diff += request.ByteSize(); + BytesInflight_ += diff; + BytesInflightTotal_ += diff; + BytesInflight.Inc(diff); + BytesInflightTotal.Inc(diff); + + writeRequest->ByteSize = request.ByteSize(); + FormedWrites.push_back(writeRequest); + + ctx.Send(Writer, std::move(ev)); + ++NumReserveBytesRequests; +} + +TString TWriteSessionActor::CheckSupportedCodec(const ui32 codecId) { + TString err; + const auto& description = PQInfo->Description; + if (!description.GetPQTabletConfig().HasCodecs() || description.GetPQTabletConfig().GetCodecs().IdsSize() == 0) + return ""; + + Y_ABORT_UNLESS(description.PartitionsSize() > 0); + for (const auto& codec : description.GetPQTabletConfig().GetCodecs().GetIds()) { + if (codecId == codec) { + return ""; + } + } + err = "Unsupported codec provided. Supported codecs for this topic are:"; + bool first = true; + for (const auto& codec : description.GetPQTabletConfig().GetCodecs().GetCodecs()) { + if (first) { + first = false; + } else { + err += ","; + } + err += " " + codec; + } + return err; +} + + +void TWriteSessionActor::Handle(TEvPQProxy::TEvWrite::TPtr& ev, const TActorContext& ctx) { + + RequestNotChecked = true; + + if (State != ES_INITED) { + //answer error + CloseSession("write in not inited session", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return; + } + + auto auth = ev->Get()->Request.GetCredentials(); + ev->Get()->Request.ClearCredentials(); + TString tmp; + Y_PROTOBUF_SUPPRESS_NODISCARD auth.SerializeToString(&tmp); + if (auth.GetCredentialsCase() != NPersQueueCommon::TCredentials::CREDENTIALS_NOT_SET && tmp != AuthStr) { + Auth = auth; + AuthStr = tmp; + ForceACLCheck = true; + } + auto dataCheck = [&](const TWriteRequest::TData& data) -> bool { + if (!data.GetSeqNo()) { + CloseSession("bad write request - SeqNo must be positive", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return false; + } + + if (data.GetData().empty()) { + CloseSession("bad write request - data must be non-empty", NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return false; + } + TString err = CheckSupportedCodec((ui32)data.GetCodec()); + if (!err.empty()) { + CloseSession(err, NPersQueue::NErrorCode::BAD_REQUEST, ctx); + return false; + } + ui32 intCodec = ((ui32)data.GetCodec() + 1) < CodecCounters.size() ? ((ui32)data.GetCodec() + 1) : 0; + if (CodecCounters.size() > intCodec) { + CodecCounters[intCodec].Inc(); + } + + return true; + }; + if (ev->Get()->Request.HasDataBatch()) { + for (const auto& data : ev->Get()->Request.GetDataBatch().GetData()) { + if (!dataCheck(data)) { + return; + } + } + } else { + Y_ABORT_UNLESS(ev->Get()->Request.HasData()); + if (!dataCheck(ev->Get()->Request.GetData())) { + return; + } + } + + THolder<TEvPQProxy::TEvWrite> event(ev->Release()); + Writes.push_back(std::move(event)); + + ui64 diff = Writes.back()->Request.ByteSize(); + BytesInflight_ += diff; + BytesInflightTotal_ += diff; + BytesInflight.Inc(diff); + BytesInflightTotal.Inc(diff); + + if (BytesInflight_ < MAX_BYTES_INFLIGHT) { //allow only one big request to be readed but not sended + Y_ABORT_UNLESS(NextRequestInited); + Handler->ReadyForNextRead(); + } else { + NextRequestInited = false; + } + + if (NumReserveBytesRequests < MAX_RESERVE_REQUESTS_INFLIGHT) { + GenerateNextWriteRequest(ctx); + } +} + + +void TWriteSessionActor::HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); +} + + +void TWriteSessionActor::LogSession(const TActorContext& ctx) { + + LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "write session: cookie=" << Cookie << " sessionId=" << OwnerCookie + << " userAgent=\"" << UserAgent << "\" ip=" << PeerName << " proto=v0 " + << " topic=" << DiscoveryConverter->GetPrintableString() << " durationSec=" << (ctx.Now() - StartTime).Seconds()); + + LogSessionDeadline = ctx.Now() + TDuration::Hours(1) + TDuration::Seconds(rand() % 60); +} + +void TWriteSessionActor::HandleWakeup(const TActorContext& ctx) { + Y_ABORT_UNLESS(State == ES_INITED); + ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup()); + const auto& pqConfig = AppData(ctx)->PQConfig; + if (!ACLCheckInProgress && (ForceACLCheck || (ctx.Now() - LastACLCheckTimestamp > TDuration::Seconds(pqConfig.GetACLRetryTimeoutSec()) && RequestNotChecked))) { + ForceACLCheck = false; + RequestNotChecked = false; + if (Auth.GetCredentialsCase() != NPersQueueCommon::TCredentials::CREDENTIALS_NOT_SET) { + ACLCheckInProgress = true; + auto* request = new TEvDescribeTopicsRequest({DiscoveryConverter}); + ctx.Send(SchemeCache, request); + } + } + if ((!pqConfig.GetTopicsAreFirstClassCitizen() || pqConfig.GetUseSrcIdMetaMappingInFirstClass()) + && !SourceIdUpdatesInflight + && ctx.Now() - LastSourceIdUpdate > SOURCEID_UPDATE_PERIOD + ) { + SourceIdUpdatesInflight++; + Y_ABORT_UNLESS(FullConverter); + auto ev = MakeUpdateSourceIdMetadataRequest(FullConverter->GetTopicForSrcId(), ctx); + ctx.Send(NKqp::MakeKqpProxyID(ctx.SelfID.NodeId()), ev.Release()); + } + if (ctx.Now() >= LogSessionDeadline) { + LogSession(ctx); + } +} + +} +} diff --git a/ydb/services/deprecated/persqueue_v0/move_topic_actor.cpp b/ydb/services/deprecated/persqueue_v0/move_topic_actor.cpp new file mode 100644 index 0000000000..89b1c15579 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/move_topic_actor.cpp @@ -0,0 +1,418 @@ +#include "move_topic_actor.h" +#include <ydb/core/tx/tx_proxy/proxy.h> +#include <ydb/core/cms/console/util.h> +#include <ydb/core/protos/flat_scheme_op.pb.h> + +#include <google/protobuf/util/message_differencer.h> + +using namespace NKikimrSchemeOp; +using namespace NKikimr::NSchemeShard; + +namespace NKikimr { +namespace NGRpcService { +using namespace NPersQueue; + +std::pair<TString, TString> SplitPathToDirAndName (TStringBuf path) { + TStringBuf fullPath(path); + TStringBuf dir, name; + auto res = fullPath.TryRSplit("/", dir, name); + if (!res || dir.empty() || name.empty()) { + return {}; + } else { + return {TString(dir), TString(name)}; + } +} + +#define RESET_DOUBLE(name, value) \ + first.Set##name(value); \ + second.Set##name(value); \ + +#define RESET_DOUBLE_OPT(proto, name, value) \ + first##proto->Set##name(value); \ + second##proto->Set##name(value); \ + + +bool ComparePQDescriptions(NKikimrSchemeOp::TPersQueueGroupDescription first, + NKikimrSchemeOp::TPersQueueGroupDescription second) { + + RESET_DOUBLE(Name, ""); + RESET_DOUBLE(PathId, 0); + RESET_DOUBLE(AlterVersion, 0); + + auto* firstTabletConfig = first.MutablePQTabletConfig(); + auto* secondTabletConfig = second.MutablePQTabletConfig(); + RESET_DOUBLE_OPT(TabletConfig, TopicName, ""); + RESET_DOUBLE_OPT(TabletConfig, Version, 0); + RESET_DOUBLE_OPT(TabletConfig, YdbDatabaseId, ""); + RESET_DOUBLE_OPT(TabletConfig, YdbDatabasePath, ""); + + NKikimrSchemeOp::TPersQueueGroupAllocate emptyAllocate{}; + first.MutableAllocate()->CopyFrom(emptyAllocate); + second.MutableAllocate()->CopyFrom(emptyAllocate); + return google::protobuf::util::MessageDifferencer::Equals(first, second); +} + +#undef RESET_DOUBLE +#undef RESET_DOUBLE_OPT + +TMoveTopicActor::TMoveTopicActor(NGrpc::IRequestContextBase* request) + : TxPending(E_UNDEFINED) + , RequestCtx(request) +{ + const auto* req = GetProtoRequest(); + SrcPath = req->source_path(); + DstPath = req->destination_path(); +} + +const TMoveTopicRequest* TMoveTopicActor::GetProtoRequest() { + auto request = dynamic_cast<const TMoveTopicRequest*>(RequestCtx->GetRequest()); + Y_ABORT_UNLESS(request != nullptr); + return request; +} + +void TMoveTopicActor::Bootstrap(const NActors::TActorContext& ctx) { + Become(&TThis::StateWork); + if (SrcPath == DstPath) { + Reply(Ydb::StatusIds::BAD_REQUEST, + TStringBuilder() << "Source and destination path are equal in request: '" + << GetProtoRequest()->GetSourcePath() << "' and '" + << GetProtoRequest()->GetDestinationPath() << "'"); + } + SendAclRequest(ctx); +} + +void TMoveTopicActor::SendDescribeRequest(const TString& path, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_MOVE_TOPIC, "Describe path: " << path); + + std::unique_ptr<TEvTxUserProxy::TEvNavigate> navigateRequest(new TEvTxUserProxy::TEvNavigate()); + NKikimrSchemeOp::TDescribePath* record = navigateRequest->Record.MutableDescribePath(); + record->SetPath(path); + ctx.Send(MakeTxProxyID(), navigateRequest.release()); +} + +void TMoveTopicActor::HandleDescribeResponse( + TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext& ctx +) { + + auto& record = ev->Get()->GetRecord(); + const auto& path = record.GetPath(); + + LOG_DEBUG_S(ctx, NKikimrServices::PQ_MOVE_TOPIC, "Got describe path result for path: " << path); + bool isSrc; + if (path == SrcPath) { + isSrc = true; + } else if (path == DstPath) { + isSrc = false; + } else { + Verify(false, TStringBuilder() << "Got DescribeResponseNo for unknown path: " << path, ctx); + return; + } + auto& exists = isSrc ? SrcExists : DstExists; + const auto status = record.GetStatus(); + switch (status) { + case NKikimrScheme::StatusSuccess: + { + exists = true; + if (isSrc) { + SrcDescription = ev; + } else { + DstDescription = ev; + } + if (!record.GetPathDescription().HasPersQueueGroup()) { + Reply(Ydb::StatusIds::BAD_REQUEST, + TStringBuilder() << "Path exists, but it is not a topic: " << path); + return; + } + if (!isSrc) { + break; + } + auto& pqDescr = record.GetPathDescription().GetPersQueueGroup(); + if (!pqDescr.HasAllocate() || !pqDescr.GetAllocate().HasBalancerTabletID() + || pqDescr.GetAllocate().GetBalancerTabletID() == 0) { + Reply(Ydb::StatusIds::UNSUPPORTED, + TStringBuilder() << "Could not get PQAllocate for topic: " << path); + return; + } + if (pqDescr.GetAllocate().GetPQTabletConfig().GetFederationAccount().empty()) { + Reply(Ydb::StatusIds::PRECONDITION_FAILED, + TStringBuilder() << "Cannot move topic with no federation account specified: " << path); + + return; + } + if (pqDescr.GetAllocate().GetAlterVersion() != pqDescr.GetAlterVersion()) { + Reply(Ydb::StatusIds::INTERNAL_ERROR, + TStringBuilder() << "Pq allocate alter version mismatch for path: " << path); + + } + break; + } + case NKikimrScheme::StatusPathDoesNotExist: + case NKikimrScheme::StatusSchemeError: + exists = false; + break; + default: + Reply(Ydb::StatusIds::GENERIC_ERROR, TStringBuilder() << "Failed to describe path: " << path); + return; + } + if (SrcExists.Defined() && DstExists.Defined()) { + return ProcessDescriptions(ctx); + } +} + +void TMoveTopicActor::ProcessDescriptions(const TActorContext& ctx) { + if (*SrcExists) { + if (!Verify(SrcDescription != nullptr, + TStringBuilder() << "No src path description when it exists: " << SrcPath, ctx)) + return; + + auto& srcPqDescr = SrcDescription->Get()->GetRecord().GetPathDescription().GetPersQueueGroup(); + if (*DstExists) { + if (!Verify(DstDescription != nullptr, + TStringBuilder() << "No dst path description when it exists: " << DstPath, ctx)) + return; + + auto& dstPqDescr = DstDescription->Get()->GetRecord().GetPathDescription().GetPersQueueGroup(); + if (!GetProtoRequest()->GetSkipDestinationCheck() && !ComparePQDescriptions(srcPqDescr, dstPqDescr)) { + Reply(Ydb::StatusIds::BAD_REQUEST, + TStringBuilder() << "Both source and destination exist but different: " << DstPath); + return; + } else { + /** Destination already exist, deallocate source */ + return SendDeallocateRequest(ctx); + } + } else { + return SendAllocateRequest(srcPqDescr.GetAllocate(), ctx); + } + } else { + if (*DstExists) { + /** Source doesn't exist, destination exists. Suppose already moved */ + Reply(Ydb::StatusIds::ALREADY_EXISTS); + return; + } else { + Reply(Ydb::StatusIds::NOT_FOUND, TStringBuilder() << "Source path " << SrcPath << " doesn't exist"); + return; + } + } +} + +void TMoveTopicActor::SendAllocateRequest( + const TPersQueueGroupAllocate& pqAllocate, const TActorContext& ctx +) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_MOVE_TOPIC, "Send allocate PQ Group on path: " << DstPath); + + TxPending = E_ALLOCATE; + std::unique_ptr <TEvTxUserProxy::TEvProposeTransaction> proposal(new TEvTxUserProxy::TEvProposeTransaction()); + + auto pathPair = SplitPathToDirAndName(DstPath); + if (pathPair.first.empty()) { + Reply(Ydb::StatusIds::BAD_REQUEST, TStringBuilder() << "Bad destination path: " << DstPath); + return; + + } + auto* modifyScheme = proposal->Record.MutableTransaction()->MutableModifyScheme(); + modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpAllocatePersQueueGroup); + auto* allocate = modifyScheme->MutableAllocatePersQueueGroup(); + allocate->CopyFrom(pqAllocate); + + modifyScheme->SetWorkingDir(pathPair.first); + allocate->SetName(pathPair.second); + + ctx.Send(MakeTxProxyID(), proposal.release()); +} + +void TMoveTopicActor::SendDeallocateRequest(const TActorContext& ctx) { + if (GetProtoRequest()->GetDoNotDeallocate()) { + Reply(Ydb::StatusIds::SUCCESS); + return; + } + TxPending = E_DEALLOCATE; + LOG_DEBUG_S(ctx, NKikimrServices::PQ_MOVE_TOPIC, "Send deallocate PQ Group on path: " << SrcPath); + + auto pathPair = SplitPathToDirAndName(SrcPath); + if (pathPair.first.empty()) { + Reply(Ydb::StatusIds::BAD_REQUEST, + TStringBuilder() << "Bad source path: " << SrcPath); + return; + } + + std::unique_ptr <TEvTxUserProxy::TEvProposeTransaction> proposal(new TEvTxUserProxy::TEvProposeTransaction()); + auto* modifyScheme = proposal->Record.MutableTransaction()->MutableModifyScheme(); + modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpDeallocatePersQueueGroup); + modifyScheme->SetWorkingDir(pathPair.first); + auto* deallocate = modifyScheme->MutableDeallocatePersQueueGroup(); + deallocate->SetName(pathPair.second); + ctx.Send(MakeTxProxyID(), proposal.release()); +} + +void TMoveTopicActor::HandleProposeStatus(TEvTxUserProxy::TEvProposeTransactionStatus::TPtr& ev, + const TActorContext& ctx) { + auto& rec = ev->Get()->Record; + switch (rec.GetStatus()) { + case TEvTxUserProxy::TEvProposeTransactionStatus::EStatus::ExecComplete: + return OnTxComplete(rec.GetTxId(), ctx); + case TEvTxUserProxy::TEvProposeTransactionStatus::EStatus::ExecInProgress: + TxId = rec.GetTxId(); + SchemeShardTabletId = rec.GetSchemeShardTabletId(); + SendNotifyRequest(ctx); + break; + case TEvTxUserProxy::TEvProposeTransactionStatus::EStatus::ExecError: + switch (TxPending) { + case E_DEALLOCATE: + // Check if removal finished or in-progress. + Reply(Ydb::StatusIds::GENERIC_ERROR, + TStringBuilder() << "Source path deallocate failed: " << SrcPath << " with status: " + << ev->Get()->Record.DebugString()); + return; + case E_ALLOCATE: + Reply(Ydb::StatusIds::BAD_REQUEST, + TStringBuilder() << "Cannot allocate path: " << DstPath); + return; + default: + Verify(false, "Unknown TxState (HandleProposeStatus)", ctx); + return; + } + default: + { + Reply(Ydb::StatusIds::GENERIC_ERROR, + TStringBuilder() << "Scheme shard tx failed"); + return; + } + } +} + +void TMoveTopicActor::HandleTxComplete(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_MOVE_TOPIC, "Got TEvNotifyTxCompletionResult: " + << ev->Get()->Record.ShortDebugString()); + + OnTxComplete(ev->Get()->Record.GetTxId(), ctx); +} + +void TMoveTopicActor::OnTxComplete(ui64 txId, const TActorContext& ctx) { + if (!Verify(txId, "Empty tx id on TxCompleteHandler.", ctx)) + return; + + switch (TxPending) { + case E_ALLOCATE: + if (!Verify(!AllocateTxId, "Non-empty AllocateTxId on first TxComplete Handler.", ctx)) return; + AllocateTxId = txId; + return SendDeallocateRequest(ctx); + + case E_DEALLOCATE: + if (txId != AllocateTxId) { + Reply(Ydb::StatusIds::SUCCESS); + } else { + LOG_WARN_S(ctx, NKikimrServices::PQ_MOVE_TOPIC, "Duplicate completion for TxId: " << txId << ", ignored"); + } + return; + default: + Verify(false, "Unknown TxState - OnTxComplete", ctx); + return; + } +} + +void TMoveTopicActor::SendNotifyRequest(const TActorContext &ctx) { + if (!Pipe) + OpenPipe(ctx); + + auto request = MakeHolder<TEvSchemeShard::TEvNotifyTxCompletion>(); + request->Record.SetTxId(TxId); + + NTabletPipe::SendData(ctx, Pipe, request.Release()); +} + +void TMoveTopicActor::OpenPipe(const TActorContext &ctx) +{ + if (!Verify(SchemeShardTabletId, "Empty SchemeShardTabletId", ctx)) return; + NTabletPipe::TClientConfig pipeConfig; + pipeConfig.RetryPolicy = NConsole::FastConnectRetryPolicy(); + auto pipe = NTabletPipe::CreateClient(ctx.SelfID, SchemeShardTabletId, pipeConfig); + Pipe = ctx.ExecutorThread.RegisterActor(pipe); +} + +void TMoveTopicActor::OnPipeDestroyed(const TActorContext &ctx) +{ + if (Pipe) { + NTabletPipe::CloseClient(ctx, Pipe); + Pipe = TActorId(); + } + SendNotifyRequest(ctx); +} + +void TMoveTopicActor::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + if (ev->Get()->Status != NKikimrProto::OK) { + OnPipeDestroyed(ctx); + } +} + +void TMoveTopicActor::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr&, const TActorContext& ctx) { + OnPipeDestroyed(ctx); +} + +void TMoveTopicActor::Reply(const Ydb::StatusIds::StatusCode status, const TString& error) { + TMoveTopicResponse response; + response.set_status(status); + if (error) { + auto* issue = response.mutable_issues()->Add(); + //google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage> issues; + //auto& issue = *issues.Add(); + issue->set_severity(NYql::TSeverityIds::S_ERROR); + issue->set_message(error); + } + RequestCtx->Reply(&response); + PassAway(); + //Reply(status, ErrorToIssues(error)); +} + +bool TMoveTopicActor::Verify(bool condition, const TString& error, const TActorContext& ctx) { + if (condition) { + return true; + } + LOG_ALERT_S(ctx, NKikimrServices::PQ_MOVE_TOPIC, error << "(THIS IS A BUG)"); + Reply(Ydb::StatusIds::INTERNAL_ERROR, + TStringBuilder() << error << " This is a bug"); + return false; + +} + +// Auth +void TMoveTopicActor::SendAclRequest(const TActorContext& ctx) { + const auto req = GetProtoRequest(); + TString ticket = req->token(); + ctx.Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket(ticket)); +} + +void TMoveTopicActor::HandleAclResponse(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev, const TActorContext& ctx) { + TString ticket = ev->Get()->Ticket; + TString maskedTicket = ticket.size() > 5 ? (ticket.substr(0, 5) + "***" + ticket.substr(ticket.size() - 5)) : "***"; + LOG_INFO_S(ctx, NKikimrServices::PQ_MOVE_TOPIC, "CheckACL ticket " << maskedTicket << " got result from TICKET_PARSER response: error: " + << ev->Get()->Error << " user: " + << (ev->Get()->Error.empty() ? ev->Get()->Token->GetUserSID() : "")); + + if (!ev->Get()->Error.empty()) { + Reply(Ydb::StatusIds::UNAUTHORIZED, + TStringBuilder() << "Ticket parsing error: " << ev->Get()->Error); + return; + } + auto sid = ev->Get()->Token->GetUserSID(); + auto& pqConfig = AppData(ctx)->PQConfig; + bool authRes = false; + if (pqConfig.HasMoveTopicActorConfig()) { + for (auto& allowed : pqConfig.GetMoveTopicActorConfig().GetAllowedUserSIDs()) { + if (allowed == sid) { + authRes = true; + break; + } + } + } + if (!authRes) { + Reply(Ydb::StatusIds::UNAUTHORIZED, + TStringBuilder() << "User: " << sid << " is not authorized to make this request"); + return; + } + SendDescribeRequest(SrcPath, ctx); + SendDescribeRequest(DstPath, ctx); +} + + +} //namespace NGRpcProxy +} // namespace NKikimr diff --git a/ydb/services/deprecated/persqueue_v0/move_topic_actor.h b/ydb/services/deprecated/persqueue_v0/move_topic_actor.h new file mode 100644 index 0000000000..bf8a1fbfa6 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/move_topic_actor.h @@ -0,0 +1,82 @@ +#pragma once + +#include "grpc_pq_actor.h" +#include <ydb/core/grpc_services/rpc_operation_request_base.h> +#include <ydb/core/grpc_services/base/base.h> +#include <ydb/services/deprecated/persqueue_v0/api/protos/persqueue.pb.h> + +namespace NKikimr { +namespace NGRpcService { + +class TMoveTopicActor : public TActorBootstrapped<TMoveTopicActor> { +public: + + TMoveTopicActor(NGrpc::IRequestContextBase* request); + + ~TMoveTopicActor() = default; + + void Bootstrap(const NActors::TActorContext& ctx); + + STRICT_STFUNC(StateWork, { + HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, HandleDescribeResponse) + HFunc(TEvTxUserProxy::TEvProposeTransactionStatus, HandleProposeStatus) + HFunc(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionResult, HandleTxComplete) + HFunc(TEvTicketParser::TEvAuthorizeTicketResult, HandleAclResponse) + HFunc(TEvTabletPipe::TEvClientConnected, Handle) + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle) + IgnoreFunc(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionRegistered) + }) +private: + //SS requests + void SendAllocateRequest(const NKikimrSchemeOp::TPersQueueGroupAllocate& pqAllocate, const TActorContext& ctx); + void SendDeallocateRequest(const TActorContext& ctx); + void SendDescribeRequest(const TString& path, const TActorContext& ctx); + + // Handlers + void HandleDescribeResponse(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, + const TActorContext& ctx); + + void HandleProposeStatus(TEvTxUserProxy::TEvProposeTransactionStatus::TPtr& ev, const TActorContext& ctx); + void HandleTxComplete(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr&, const TActorContext& ctx); + + //Helpers + void ProcessDescriptions(const TActorContext& ctx); + void OnTxComplete(ui64 txId, const TActorContext& ctx); + void OpenPipe(const TActorContext &ctx); + void OnPipeDestroyed(const TActorContext &ctx); + void SendNotifyRequest(const TActorContext &ctx); + const NPersQueue::TMoveTopicRequest* GetProtoRequest(); + void Reply(const Ydb::StatusIds::StatusCode status, const TString& error = TString()); + bool Verify(bool condition, const TString& error, const TActorContext& ctx); + + //Auth + void SendAclRequest(const TActorContext& ctx); + void HandleAclResponse(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev, const TActorContext& ctx); + +private: + enum { + E_UNDEFINED, + E_ALLOCATE, + E_DEALLOCATE + } TxPending; + + ui64 TxId = 0; + ui64 AllocateTxId = 0; + ui64 SchemeShardTabletId; + TActorId Pipe; + + TString SrcPath; + TString DstPath; + NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr SrcDescription; + NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr DstDescription; + TMaybe<bool> SrcExists; + TMaybe<bool> DstExists; + + TIntrusivePtr<NGrpc::IRequestContextBase> RequestCtx; +}; + +} //namespace NGRpcProxy +} // namespace NKikimr diff --git a/ydb/services/deprecated/persqueue_v0/persqueue.cpp b/ydb/services/deprecated/persqueue_v0/persqueue.cpp new file mode 100644 index 0000000000..67b796e6c4 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/persqueue.cpp @@ -0,0 +1,80 @@ +#include "persqueue.h" +#include "grpc_pq_read.h" +#include "grpc_pq_write.h" + +#include <ydb/core/base/appdata.h> +#include <ydb/core/base/counters.h> +#include "move_topic_actor.h" + +namespace NKikimr { +namespace NGRpcService { + +static const ui32 PersQueueWriteSessionsMaxCount = 1000000; +static const ui32 PersQueueReadSessionsMaxCount = 100000; + +void DoMovePersQueueTopic(TActorSystem* actorSystem, NGrpc::IRequestContextBase* ctx) { + actorSystem->Register(new TMoveTopicActor(ctx)); +} + +TGRpcPersQueueService::TGRpcPersQueueService(NActors::TActorSystem *system, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const NActors::TActorId& schemeCache) + : ActorSystem(system) + , Counters(counters) + , SchemeCache(schemeCache) +{ } + +void TGRpcPersQueueService::InitService(grpc::ServerCompletionQueue *cq, NGrpc::TLoggerPtr logger) { + CQ = cq; + if (ActorSystem->AppData<TAppData>()->PQConfig.GetEnabled()) { + WriteService.reset(new NGRpcProxy::TPQWriteService(GetService(), CQ, ActorSystem, SchemeCache, Counters, PersQueueWriteSessionsMaxCount)); + WriteService->InitClustersUpdater(); + ReadService.reset(new NGRpcProxy::TPQReadService(this, CQ, ActorSystem, SchemeCache, Counters, PersQueueReadSessionsMaxCount)); + SetupIncomingRequests(logger); + } +} + +void TGRpcPersQueueService::SetGlobalLimiterHandle(NGrpc::TGlobalLimiter* limiter) { + Limiter = limiter; +} + +bool TGRpcPersQueueService::IncRequest() { + return Limiter->Inc(); +} + +void TGRpcPersQueueService::DecRequest() { + Limiter->Dec(); +} + +#ifdef ADD_REQUEST +#error ADD_REQUEST macro already defined +#endif +#define ADD_REQUEST(NAME, CB) \ + MakeIntrusive<TGRpcRequest<NPersQueue::NAME##Request, NPersQueue::NAME##Response, TGRpcPersQueueService>> \ + (this, &Service_, CQ, \ + [this](NGrpc::IRequestContextBase* ctx) { \ + NGRpcService::ReportGrpcReqToMon(*ActorSystem, ctx->GetPeer()); \ + CB(this->ActorSystem, ctx); \ + }, &NPersQueue::PersQueueService::AsyncService::Request##NAME , \ + #NAME, logger, getCounterBlock("operation", #NAME))->Run(); + + +void TGRpcPersQueueService::SetupIncomingRequests(NGrpc::TLoggerPtr logger) { + WriteService->SetupIncomingRequests(); + ReadService->SetupIncomingRequests(); + auto getCounterBlock = CreateCounterCb(Counters, ActorSystem); + ADD_REQUEST(MoveTopic, DoMovePersQueueTopic); +} + +void TGRpcPersQueueService::StopService() noexcept { + TGrpcServiceBase::StopService(); + if (WriteService.get() != nullptr) { + WriteService->StopService(); + } + if (ReadService.get() != nullptr) { + ReadService->StopService(); + } +} + +} // namespace NGRpcService +} // namespace NKikimr diff --git a/ydb/services/deprecated/persqueue_v0/persqueue.h b/ydb/services/deprecated/persqueue_v0/persqueue.h new file mode 100644 index 0000000000..93e6a65808 --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/persqueue.h @@ -0,0 +1,49 @@ +#pragma once + +#include <library/cpp/actors/core/actorsystem.h> + +#include <ydb/services/deprecated/persqueue_v0/api/grpc/persqueue.grpc.pb.h> + +#include <library/cpp/grpc/server/grpc_server.h> + + +namespace NKikimr { + +namespace NGRpcProxy { + class TPQWriteService; + class TPQReadService; +} + +namespace NGRpcService { + +class TGRpcPersQueueService + : public NGrpc::TGrpcServiceBase<NPersQueue::PersQueueService> +{ +public: + TGRpcPersQueueService(NActors::TActorSystem* system, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const NActors::TActorId& schemeCache); + + void InitService(grpc::ServerCompletionQueue* cq, NGrpc::TLoggerPtr logger) override; + void SetGlobalLimiterHandle(NGrpc::TGlobalLimiter* limiter) override; + void StopService() noexcept override; + + using NGrpc::TGrpcServiceBase<NPersQueue::PersQueueService>::GetService; + + bool IncRequest(); + void DecRequest(); + +private: + void SetupIncomingRequests(NGrpc::TLoggerPtr logger); + + NActors::TActorSystem* ActorSystem; + grpc::ServerCompletionQueue* CQ = nullptr; + + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + NGrpc::TGlobalLimiter* Limiter = nullptr; + NActors::TActorId SchemeCache; + + std::shared_ptr<NGRpcProxy::TPQWriteService> WriteService; + std::shared_ptr<NGRpcProxy::TPQReadService> ReadService; +}; + +} // namespace NGRpcService +} // namespace NKikimr diff --git a/ydb/services/deprecated/persqueue_v0/ya.make b/ydb/services/deprecated/persqueue_v0/ya.make new file mode 100644 index 0000000000..540ae764dc --- /dev/null +++ b/ydb/services/deprecated/persqueue_v0/ya.make @@ -0,0 +1,35 @@ +LIBRARY() + +SRCS( + grpc_pq_clusters_updater_actor.cpp + grpc_pq_read.cpp + grpc_pq_read_actor.cpp + grpc_pq_write.cpp + grpc_pq_write_actor.cpp + move_topic_actor.cpp + persqueue.cpp +) + +PEERDIR( + ydb/services/deprecated/persqueue_v0/api/grpc + ydb/services/deprecated/persqueue_v0/api/protos + ydb/library/persqueue/deprecated/read_batch_converter + ydb/core/base + ydb/core/tx/tx_proxy + ydb/core/client/server + ydb/core/grpc_services + ydb/core/mind/address_classification + ydb/core/persqueue + ydb/core/persqueue/events + ydb/core/persqueue/writer + ydb/core/protos + ydb/library/aclib + ydb/library/persqueue/topic_parser + ydb/services/lib/actors + ydb/services/lib/sharding + ydb/services/persqueue_v1 + ydb/services/metadata +) + +END() + diff --git a/ydb/services/ya.make b/ydb/services/ya.make index da97baa73c..3928a57970 100644 --- a/ydb/services/ya.make +++ b/ydb/services/ya.make @@ -15,6 +15,8 @@ RECURSE( monitoring persqueue_cluster_discovery persqueue_v1 + deprecated/persqueue_v0 + deprecated/persqueue_v0/api rate_limiter ext_index ydb |