aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Kardymon <kardymon-d@ydb.tech>2024-09-23 14:36:20 +0300
committerGitHub <noreply@github.com>2024-09-23 14:36:20 +0300
commit350eccc5ec38c8d683e9f062da542a40c4a01aac (patch)
tree6591b3c92d94a8de4b2251bf4c425011e1db1dd1
parentb69d55544f8699966642bd3a31cacae325d38ffa (diff)
downloadydb-350eccc5ec38c8d683e9f062da542a40c4a01aac.tar.gz
YQ-3322 Row dispatcher (#5544)
-rw-r--r--ydb/core/fq/libs/actors/clusters_from_connections.cpp1
-rw-r--r--ydb/core/fq/libs/actors/logging/log.h7
-rw-r--r--ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h2
-rw-r--r--ydb/core/fq/libs/config/protos/fq_config.proto2
-rw-r--r--ydb/core/fq/libs/config/protos/row_dispatcher.proto23
-rw-r--r--ydb/core/fq/libs/config/protos/ya.make1
-rw-r--r--ydb/core/fq/libs/events/event_subspace.h2
-rw-r--r--ydb/core/fq/libs/events/ya.make2
-rw-r--r--ydb/core/fq/libs/init/init.cpp13
-rw-r--r--ydb/core/fq/libs/init/ya.make1
-rw-r--r--ydb/core/fq/libs/row_dispatcher/actors_factory.cpp37
-rw-r--r--ydb/core/fq/libs/row_dispatcher/actors_factory.h25
-rw-r--r--ydb/core/fq/libs/row_dispatcher/coordinator.cpp304
-rw-r--r--ydb/core/fq/libs/row_dispatcher/coordinator.h19
-rw-r--r--ydb/core/fq/libs/row_dispatcher/events/data_plane.cpp10
-rw-r--r--ydb/core/fq/libs/row_dispatcher/events/data_plane.h125
-rw-r--r--ydb/core/fq/libs/row_dispatcher/events/ya.make14
-rw-r--r--ydb/core/fq/libs/row_dispatcher/json_filter.cpp300
-rw-r--r--ydb/core/fq/libs/row_dispatcher/json_filter.h34
-rw-r--r--ydb/core/fq/libs/row_dispatcher/json_parser.cpp302
-rw-r--r--ydb/core/fq/libs/row_dispatcher/json_parser.h30
-rw-r--r--ydb/core/fq/libs/row_dispatcher/leader_election.cpp482
-rw-r--r--ydb/core/fq/libs/row_dispatcher/leader_election.h21
-rw-r--r--ydb/core/fq/libs/row_dispatcher/protos/events.proto78
-rw-r--r--ydb/core/fq/libs/row_dispatcher/protos/ya.make15
-rw-r--r--ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp608
-rw-r--r--ydb/core/fq/libs/row_dispatcher/row_dispatcher.h27
-rw-r--r--ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.cpp32
-rw-r--r--ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.h26
-rw-r--r--ydb/core/fq/libs/row_dispatcher/topic_session.cpp776
-rw-r--r--ydb/core/fq/libs/row_dispatcher/topic_session.h24
-rw-r--r--ydb/core/fq/libs/row_dispatcher/ut/coordinator_ut.cpp166
-rw-r--r--ydb/core/fq/libs/row_dispatcher/ut/json_filter_ut.cpp91
-rw-r--r--ydb/core/fq/libs/row_dispatcher/ut/json_parser_ut.cpp110
-rw-r--r--ydb/core/fq/libs/row_dispatcher/ut/leader_election_ut.cpp140
-rw-r--r--ydb/core/fq/libs/row_dispatcher/ut/row_dispatcher_ut.cpp342
-rw-r--r--ydb/core/fq/libs/row_dispatcher/ut/topic_session_ut.cpp357
-rw-r--r--ydb/core/fq/libs/row_dispatcher/ut/ya.make29
-rw-r--r--ydb/core/fq/libs/row_dispatcher/ya.make39
-rw-r--r--ydb/core/fq/libs/ya.make1
-rw-r--r--ydb/core/kqp/query_compiler/kqp_query_compiler.cpp2
-rw-r--r--ydb/library/services/services.proto1
-rw-r--r--ydb/library/yql/dq/actors/common/retry_queue.cpp (renamed from ydb/library/yql/dq/actors/compute/retry_queue.cpp)73
-rw-r--r--ydb/library/yql/dq/actors/common/retry_queue.h (renamed from ydb/library/yql/dq/actors/compute/retry_queue.h)36
-rw-r--r--ydb/library/yql/dq/actors/common/ut/retry_events_queue_ut.cpp191
-rw-r--r--ydb/library/yql/dq/actors/common/ut/ya.make16
-rw-r--r--ydb/library/yql/dq/actors/common/ya.make19
-rw-r--r--ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h2
-rw-r--r--ydb/library/yql/dq/actors/compute/ya.make4
-rw-r--r--ydb/library/yql/dq/integration/yql_dq_integration.h2
-rw-r--r--ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp2
-rw-r--r--ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp2
-rw-r--r--ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h2
-rw-r--r--ydb/library/yql/providers/common/proto/gateways_config.proto1
-rw-r--r--ydb/library/yql/providers/common/pushdown/physical_opt.cpp73
-rw-r--r--ydb/library/yql/providers/common/pushdown/physical_opt.h11
-rw-r--r--ydb/library/yql/providers/common/pushdown/type_ann.cpp36
-rw-r--r--ydb/library/yql/providers/common/pushdown/type_ann.h17
-rw-r--r--ydb/library/yql/providers/common/pushdown/ya.make2
-rw-r--r--ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h2
-rw-r--r--ydb/library/yql/providers/dq/planner/execution_planner.cpp2
-rw-r--r--ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp2
-rw-r--r--ydb/library/yql/providers/generic/provider/ya.make1
-rw-r--r--ydb/library/yql/providers/generic/provider/yql_generic_datasource_type_ann.cpp33
-rw-r--r--ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp2
-rw-r--r--ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp61
-rw-r--r--ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp239
-rw-r--r--ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.h2
-rw-r--r--ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.cpp697
-rw-r--r--ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.h39
-rw-r--r--ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp114
-rw-r--r--ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.cpp86
-rw-r--r--ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.h51
-rw-r--r--ydb/library/yql/providers/pq/async_io/ya.make6
-rw-r--r--ydb/library/yql/providers/pq/common/yql_names.h2
-rw-r--r--ydb/library/yql/providers/pq/expr_nodes/yql_pq_expr_nodes.json4
-rw-r--r--ydb/library/yql/providers/pq/proto/dq_io.proto4
-rw-r--r--ydb/library/yql/providers/pq/provider/ya.make12
-rw-r--r--ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp10
-rw-r--r--ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp88
-rw-r--r--ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp78
-rw-r--r--ydb/library/yql/providers/pq/provider/yql_pq_settings.cpp1
-rw-r--r--ydb/library/yql/providers/pq/provider/yql_pq_settings.h1
-rw-r--r--ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp2
-rw-r--r--ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp2
-rw-r--r--ydb/library/yql/providers/s3/actors/yql_s3_source_queue.cpp2
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp3
-rw-r--r--ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp2
-rw-r--r--ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp2
-rw-r--r--ydb/tests/fq/pq_async_io/ut/dq_pq_rd_read_actor_ut.cpp359
-rw-r--r--ydb/tests/fq/pq_async_io/ut/dq_pq_read_actor_ut.cpp (renamed from ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp)2
-rw-r--r--ydb/tests/fq/pq_async_io/ut/dq_pq_write_actor_ut.cpp (renamed from ydb/tests/fq/pq_async_io/dq_pq_write_actor_ut.cpp)2
-rw-r--r--ydb/tests/fq/pq_async_io/ut/ya.make28
-rw-r--r--ydb/tests/fq/pq_async_io/ut_helpers.h1
-rw-r--r--ydb/tests/fq/pq_async_io/ya.make13
-rw-r--r--ydb/tests/fq/yds/test_row_dispatcher.py681
-rw-r--r--ydb/tests/fq/yds/ya.make1
-rw-r--r--ydb/tests/tools/fq_runner/fq_client.py4
-rw-r--r--ydb/tests/tools/fq_runner/kikimr_runner.py14
99 files changed, 7519 insertions, 246 deletions
diff --git a/ydb/core/fq/libs/actors/clusters_from_connections.cpp b/ydb/core/fq/libs/actors/clusters_from_connections.cpp
index c9a6e31f952..99c0a6c82ca 100644
--- a/ydb/core/fq/libs/actors/clusters_from_connections.cpp
+++ b/ydb/core/fq/libs/actors/clusters_from_connections.cpp
@@ -51,6 +51,7 @@ void FillPqClusterConfig(NYql::TPqClusterConfig& clusterConfig,
clusterConfig.SetUseSsl(ds.secure());
clusterConfig.SetAddBearerToToken(useBearerForYdb);
clusterConfig.SetClusterType(TPqClusterConfig::CT_DATA_STREAMS);
+ clusterConfig.SetSharedReading(ds.shared_reading());
FillClusterAuth(clusterConfig, ds.auth(), authToken, accountIdSignatures);
}
diff --git a/ydb/core/fq/libs/actors/logging/log.h b/ydb/core/fq/libs/actors/logging/log.h
index d5513bc49b7..a5e79b85706 100644
--- a/ydb/core/fq/libs/actors/logging/log.h
+++ b/ydb/core/fq/libs/actors/logging/log.h
@@ -47,6 +47,13 @@
#define LOG_STREAMS_STORAGE_SERVICE_AS_WARN(actorSystem, logRecordStream) LOG_STREAMS_IMPL_AS(actorSystem, WARN, STREAMS_STORAGE_SERVICE, logRecordStream)
#define LOG_STREAMS_STORAGE_SERVICE_AS_ERROR(actorSystem, logRecordStream) LOG_STREAMS_IMPL_AS(actorSystem, ERROR, STREAMS_STORAGE_SERVICE, logRecordStream)
+// Component: ROW_DISPATCHER.
+#define LOG_ROW_DISPATCHER_TRACE(logRecordStream) LOG_STREAMS_IMPL(TRACE, FQ_ROW_DISPATCHER, LogPrefix << logRecordStream)
+#define LOG_ROW_DISPATCHER_DEBUG(logRecordStream) LOG_STREAMS_IMPL(DEBUG, FQ_ROW_DISPATCHER, LogPrefix << logRecordStream)
+#define LOG_ROW_DISPATCHER_INFO(logRecordStream) LOG_STREAMS_IMPL(INFO, FQ_ROW_DISPATCHER, LogPrefix << logRecordStream)
+#define LOG_ROW_DISPATCHER_WARN(logRecordStream) LOG_STREAMS_IMPL(WARN, FQ_ROW_DISPATCHER, LogPrefix << logRecordStream)
+#define LOG_ROW_DISPATCHER_ERROR(logRecordStream) LOG_STREAMS_IMPL(ERROR, FQ_ROW_DISPATCHER, LogPrefix << logRecordStream)
+
// Component: STREAMS_SCHEDULER_SERVICE.
#define LOG_STREAMS_SCHEDULER_SERVICE_EMERG(logRecordStream) LOG_STREAMS_IMPL(EMERG, STREAMS_SCHEDULER_SERVICE, logRecordStream)
#define LOG_STREAMS_SCHEDULER_SERVICE_ALERT(logRecordStream) LOG_STREAMS_IMPL(ALERT, STREAMS_SCHEDULER_SERVICE, logRecordStream)
diff --git a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h
index fd683ba62d8..280130f3816 100644
--- a/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h
+++ b/ydb/core/fq/libs/checkpointing/checkpoint_coordinator.h
@@ -11,7 +11,7 @@
#include <ydb/public/api/protos/draft/fq.pb.h>
#include <ydb/library/yql/dq/actors/compute/dq_compute_actor.h>
-#include <ydb/library/yql/dq/actors/compute/retry_queue.h>
+#include <ydb/library/yql/dq/actors/common/retry_queue.h>
#include <ydb/library/yql/providers/dq/actors/events.h>
#include <ydb/library/yql/providers/dq/actors/task_controller_impl.h>
diff --git a/ydb/core/fq/libs/config/protos/fq_config.proto b/ydb/core/fq/libs/config/protos/fq_config.proto
index fcca1dafe28..edbf50b40b6 100644
--- a/ydb/core/fq/libs/config/protos/fq_config.proto
+++ b/ydb/core/fq/libs/config/protos/fq_config.proto
@@ -22,6 +22,7 @@ import "ydb/core/fq/libs/config/protos/quotas_manager.proto";
import "ydb/core/fq/libs/config/protos/rate_limiter.proto";
import "ydb/core/fq/libs/config/protos/read_actors_factory.proto";
import "ydb/core/fq/libs/config/protos/resource_manager.proto";
+import "ydb/core/fq/libs/config/protos/row_dispatcher.proto";
import "ydb/core/fq/libs/config/protos/test_connection.proto";
import "ydb/core/fq/libs/config/protos/token_accessor.proto";
import "ydb/library/folder_service/proto/config.proto";
@@ -53,4 +54,5 @@ message TConfig {
TRateLimiterConfig RateLimiter = 22;
bool EnableTaskCounters = 23;
TComputeConfig Compute = 24;
+ TRowDispatcherConfig RowDispatcher = 25;
}
diff --git a/ydb/core/fq/libs/config/protos/row_dispatcher.proto b/ydb/core/fq/libs/config/protos/row_dispatcher.proto
new file mode 100644
index 00000000000..10ca10285ea
--- /dev/null
+++ b/ydb/core/fq/libs/config/protos/row_dispatcher.proto
@@ -0,0 +1,23 @@
+syntax = "proto3";
+option cc_enable_arenas = true;
+
+package NFq.NConfig;
+option java_package = "ru.yandex.kikimr.proto";
+
+import "ydb/core/fq/libs/config/protos/storage.proto";
+
+////////////////////////////////////////////////////////////
+
+message TRowDispatcherCoordinatorConfig {
+ TYdbStorageConfig Database = 1;
+ string CoordinationNodePath = 2;
+}
+message TRowDispatcherConfig {
+ bool Enabled = 1;
+ uint64 TimeoutBeforeStartSessionSec = 2;
+ uint64 SendStatusPeriodSec = 3;
+ uint64 MaxSessionUsedMemory = 4;
+ bool WithoutConsumer = 5;
+ TRowDispatcherCoordinatorConfig Coordinator = 6;
+
+}
diff --git a/ydb/core/fq/libs/config/protos/ya.make b/ydb/core/fq/libs/config/protos/ya.make
index 2bf50a9a41f..92acb431961 100644
--- a/ydb/core/fq/libs/config/protos/ya.make
+++ b/ydb/core/fq/libs/config/protos/ya.make
@@ -22,6 +22,7 @@ SRCS(
rate_limiter.proto
read_actors_factory.proto
resource_manager.proto
+ row_dispatcher.proto
storage.proto
test_connection.proto
token_accessor.proto
diff --git a/ydb/core/fq/libs/events/event_subspace.h b/ydb/core/fq/libs/events/event_subspace.h
index 5bef4dc1ea4..d88d44d1a8e 100644
--- a/ydb/core/fq/libs/events/event_subspace.h
+++ b/ydb/core/fq/libs/events/event_subspace.h
@@ -32,7 +32,7 @@ struct TYqEventSubspace {
ControlPlaneConfig,
YdbCompute,
TableOverFq,
-
+ RowDispatcher,
SubspacesEnd,
};
diff --git a/ydb/core/fq/libs/events/ya.make b/ydb/core/fq/libs/events/ya.make
index ad44506d869..21f47a99f65 100644
--- a/ydb/core/fq/libs/events/ya.make
+++ b/ydb/core/fq/libs/events/ya.make
@@ -8,9 +8,11 @@ PEERDIR(
ydb/library/actors/core
ydb/core/fq/libs/graph_params/proto
ydb/core/fq/libs/protos
+ ydb/core/fq/libs/row_dispatcher/protos
ydb/library/yql/core/facade
ydb/library/yql/providers/common/db_id_async_resolver
ydb/library/yql/providers/dq/provider
+ ydb/library/yql/providers/pq/proto
ydb/library/yql/public/issue
ydb/public/api/protos
ydb/public/sdk/cpp/client/ydb_table
diff --git a/ydb/core/fq/libs/init/init.cpp b/ydb/core/fq/libs/init/init.cpp
index f7af6efea34..69657d1cc2f 100644
--- a/ydb/core/fq/libs/init/init.cpp
+++ b/ydb/core/fq/libs/init/init.cpp
@@ -17,6 +17,7 @@
#include <ydb/core/fq/libs/rate_limiter/events/control_plane_events.h>
#include <ydb/core/fq/libs/rate_limiter/events/data_plane.h>
#include <ydb/core/fq/libs/rate_limiter/quoter_service/quoter_service.h>
+#include <ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.h>
#include <ydb/core/fq/libs/shared_resources/shared_resources.h>
#include <ydb/core/fq/libs/test_connection/test_connection.h>
@@ -187,6 +188,18 @@ void Init(
credentialsFactory = NYql::CreateSecuredServiceAccountCredentialsOverTokenAccessorFactory(tokenAccessorConfig.GetEndpoint(), tokenAccessorConfig.GetUseSsl(), caContent, tokenAccessorConfig.GetConnectionPoolSize());
}
+ if (protoConfig.GetRowDispatcher().GetEnabled()) {
+ auto rowDispatcher = NFq::NewRowDispatcherService(
+ protoConfig.GetRowDispatcher(),
+ protoConfig.GetCommon(),
+ NKikimr::CreateYdbCredentialsProviderFactory,
+ yqSharedResources,
+ credentialsFactory,
+ tenant,
+ yqCounters->GetSubgroup("subsystem", "row_dispatcher"));
+ actorRegistrator(NFq::RowDispatcherServiceActorId(), rowDispatcher.release());
+ }
+
auto s3ActorsFactory = NYql::NDq::CreateS3ActorsFactory();
if (protoConfig.GetPrivateApi().GetEnabled()) {
diff --git a/ydb/core/fq/libs/init/ya.make b/ydb/core/fq/libs/init/ya.make
index b8e26738074..3ea6b535087 100644
--- a/ydb/core/fq/libs/init/ya.make
+++ b/ydb/core/fq/libs/init/ya.make
@@ -22,6 +22,7 @@ PEERDIR(
ydb/core/fq/libs/quota_manager
ydb/core/fq/libs/rate_limiter/control_plane_service
ydb/core/fq/libs/rate_limiter/quoter_service
+ ydb/core/fq/libs/row_dispatcher
ydb/core/fq/libs/shared_resources
ydb/core/fq/libs/test_connection
ydb/core/protos
diff --git a/ydb/core/fq/libs/row_dispatcher/actors_factory.cpp b/ydb/core/fq/libs/row_dispatcher/actors_factory.cpp
new file mode 100644
index 00000000000..b3b4d8517c7
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/actors_factory.cpp
@@ -0,0 +1,37 @@
+#include <ydb/core/fq/libs/row_dispatcher/actors_factory.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/topic_session.h>
+
+namespace NFq::NRowDispatcher {
+
+
+struct TActorFactory : public IActorFactory {
+ TActorFactory() {}
+
+ NActors::TActorId RegisterTopicSession(
+ const TString& topicPath,
+ const NConfig::TRowDispatcherConfig& config,
+ NActors::TActorId rowDispatcherActorId,
+ ui32 partitionId,
+ NYdb::TDriver driver,
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> credentialsProviderFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters) const override {
+
+ auto actorPtr = NFq::NewTopicSession(
+ topicPath,
+ config,
+ rowDispatcherActorId,
+ partitionId,
+ std::move(driver),
+ credentialsProviderFactory,
+ counters
+ );
+ return NActors::TlsActivationContext->ExecutorThread.RegisterActor(actorPtr.release(), NActors::TMailboxType::HTSwap, Max<ui32>());
+ }
+};
+
+IActorFactory::TPtr CreateActorFactory() {
+ return MakeIntrusive<TActorFactory>();
+}
+
+}
diff --git a/ydb/core/fq/libs/row_dispatcher/actors_factory.h b/ydb/core/fq/libs/row_dispatcher/actors_factory.h
new file mode 100644
index 00000000000..6cc718b41cd
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/actors_factory.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <ydb/core/fq/libs/config/protos/row_dispatcher.pb.h>
+#include <util/generic/ptr.h>
+#include <ydb/library/actors/core/actor.h>
+#include <ydb/public/sdk/cpp/client/ydb_driver/driver.h>
+
+namespace NFq::NRowDispatcher {
+
+struct IActorFactory : public TThrRefBase {
+ using TPtr = TIntrusivePtr<IActorFactory>;
+
+ virtual NActors::TActorId RegisterTopicSession(
+ const TString& topicPath,
+ const NConfig::TRowDispatcherConfig& config,
+ NActors::TActorId rowDispatcherActorId,
+ ui32 partitionId,
+ NYdb::TDriver driver,
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> credentialsProviderFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters) const = 0;
+};
+
+IActorFactory::TPtr CreateActorFactory();
+
+}
diff --git a/ydb/core/fq/libs/row_dispatcher/coordinator.cpp b/ydb/core/fq/libs/row_dispatcher/coordinator.cpp
new file mode 100644
index 00000000000..dfc483ec939
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/coordinator.cpp
@@ -0,0 +1,304 @@
+#include "coordinator.h"
+
+#include <ydb/core/fq/libs/actors/logging/log.h>
+#include <ydb/core/fq/libs/ydb/ydb.h>
+#include <ydb/core/fq/libs/ydb/schema.h>
+#include <ydb/core/fq/libs/ydb/util.h>
+#include <ydb/core/fq/libs/events/events.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+#include <ydb/library/actors/core/interconnect.h>
+
+#include <ydb/library/actors/core/actor_bootstrapped.h>
+#include <ydb/library/actors/core/hfunc.h>
+#include <ydb/library/actors/protos/actors.pb.h>
+
+namespace NFq {
+
+using namespace NActors;
+using namespace NThreading;
+
+using NYql::TIssues;
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TCoordinatorMetrics {
+ explicit TCoordinatorMetrics(const ::NMonitoring::TDynamicCounterPtr& counters)
+ : Counters(counters) {
+ IncomingRequests = Counters->GetCounter("IncomingRequests", true);
+ LeaderChangedCount = Counters->GetCounter("LeaderChangedCount");
+ }
+
+ ::NMonitoring::TDynamicCounterPtr Counters;
+ ::NMonitoring::TDynamicCounters::TCounterPtr IncomingRequests;
+ ::NMonitoring::TDynamicCounters::TCounterPtr LeaderChangedCount;
+
+};
+
+class TActorCoordinator : public TActorBootstrapped<TActorCoordinator> {
+
+ struct TPartitionKey {
+ TString Endpoint;
+ TString Database;
+ TString TopicName;
+ ui64 PartitionId;
+
+ size_t Hash() const noexcept {
+ ui64 hash = std::hash<TString>()(Endpoint);
+ hash = CombineHashes<ui64>(hash, std::hash<TString>()(Database));
+ hash = CombineHashes<ui64>(hash, std::hash<TString>()(TopicName));
+ hash = CombineHashes<ui64>(hash, std::hash<ui64>()(PartitionId));
+ return hash;
+ }
+ bool operator==(const TPartitionKey& other) const {
+ return Endpoint == other.Endpoint && Database == other.Database
+ && TopicName == other.TopicName && PartitionId == other.PartitionId;
+ }
+ };
+
+ struct TPartitionKeyHash {
+ int operator()(const TPartitionKey& k) const {
+ return k.Hash();
+ }
+ };
+
+ struct RowDispatcherInfo {
+ RowDispatcherInfo(bool connected, bool isLocal)
+ : Connected(connected)
+ , IsLocal(isLocal) {}
+ bool Connected = false;
+ bool IsLocal = false;
+ THashSet<TPartitionKey, TPartitionKeyHash> Locations;
+ };
+
+ NConfig::TRowDispatcherCoordinatorConfig Config;
+ TYqSharedResources::TPtr YqSharedResources;
+ TActorId LocalRowDispatcherId;
+ const TString LogPrefix;
+ const TString Tenant;
+ TMap<NActors::TActorId, RowDispatcherInfo> RowDispatchers;
+ THashMap<TPartitionKey, TActorId, TPartitionKeyHash> PartitionLocations;
+ TCoordinatorMetrics Metrics;
+ ui64 LocationRandomCounter = 0;
+
+public:
+ TActorCoordinator(
+ NActors::TActorId localRowDispatcherId,
+ const NConfig::TRowDispatcherCoordinatorConfig& config,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+ void Bootstrap();
+
+ static constexpr char ActorName[] = "FQ_RD_COORDINATOR";
+
+ void Handle(NActors::TEvents::TEvPing::TPtr& ev);
+ void HandleDisconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev);
+ void HandleConnected(TEvInterconnect::TEvNodeConnected::TPtr& ev);
+ void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvCoordinatorRequest::TPtr& ev);
+
+ STRICT_STFUNC(
+ StateFunc, {
+ hFunc(NActors::TEvents::TEvPing, Handle);
+ hFunc(TEvInterconnect::TEvNodeConnected, HandleConnected);
+ hFunc(TEvInterconnect::TEvNodeDisconnected, HandleDisconnected);
+ hFunc(NActors::TEvents::TEvUndelivered, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvCoordinatorChanged, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvCoordinatorRequest, Handle);
+ })
+
+private:
+
+ void AddRowDispatcher(NActors::TActorId actorId, bool isLocal);
+ void PrintInternalState();
+ NActors::TActorId GetAndUpdateLocation(const TPartitionKey& key);
+};
+
+TActorCoordinator::TActorCoordinator(
+ NActors::TActorId localRowDispatcherId,
+ const NConfig::TRowDispatcherCoordinatorConfig& config,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+ : Config(config)
+ , YqSharedResources(yqSharedResources)
+ , LocalRowDispatcherId(localRowDispatcherId)
+ , LogPrefix("Coordinator: ")
+ , Tenant(tenant)
+ , Metrics(counters) {
+ AddRowDispatcher(localRowDispatcherId, true);
+}
+
+void TActorCoordinator::Bootstrap() {
+ Become(&TActorCoordinator::StateFunc);
+ Send(LocalRowDispatcherId, new NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe());
+ LOG_ROW_DISPATCHER_DEBUG("Successfully bootstrapped coordinator, id " << SelfId());
+}
+
+void TActorCoordinator::AddRowDispatcher(NActors::TActorId actorId, bool isLocal) {
+ auto it = RowDispatchers.find(actorId);
+ if (it != RowDispatchers.end()) {
+ it->second.Connected = true;
+ return;
+ }
+
+ for (auto& [oldActorId, info] : RowDispatchers) {
+ if (oldActorId.NodeId() != actorId.NodeId()) {
+ continue;
+ }
+
+ LOG_ROW_DISPATCHER_TRACE("Move all Locations from old actor " << oldActorId.ToString() << " to new " << actorId.ToString());
+ for (auto& key : info.Locations) {
+ PartitionLocations[key] = actorId;
+ }
+ info.Connected = true;
+ auto node = RowDispatchers.extract(oldActorId);
+ node.key() = actorId;
+ RowDispatchers.insert(std::move(node));
+ return;
+ }
+
+ RowDispatchers.emplace(actorId, RowDispatcherInfo{true, isLocal});
+}
+
+void TActorCoordinator::Handle(NActors::TEvents::TEvPing::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvPing received, " << ev->Sender);
+ AddRowDispatcher(ev->Sender, false);
+ PrintInternalState();
+ LOG_ROW_DISPATCHER_TRACE("Send TEvPong to " << ev->Sender);
+ Send(ev->Sender, new NActors::TEvents::TEvPong(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession);
+}
+
+void TActorCoordinator::PrintInternalState() {
+ TStringStream str;
+ str << "Known row dispatchers:\n";
+
+ for (const auto& [actorId, info] : RowDispatchers) {
+ str << " " << actorId << ", connected " << info.Connected << "\n";
+ }
+
+ str << "\nLocations:\n";
+ for (auto& [key, actorId] : PartitionLocations) {
+ str << " " << key.Endpoint << " / " << key.Database << " / " << key.TopicName << ", partId " << key.PartitionId << ", row dispatcher actor id: " << actorId << "\n";
+ }
+ LOG_ROW_DISPATCHER_DEBUG(str.Str());
+}
+
+void TActorCoordinator::HandleConnected(TEvInterconnect::TEvNodeConnected::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("EvNodeConnected " << ev->Get()->NodeId);
+ // Dont set Connected = true.
+ // Wait TEvPing from row dispatchers.
+}
+
+void TActorCoordinator::HandleDisconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvNodeDisconnected, node id " << ev->Get()->NodeId);
+
+ for (auto& [actorId, info] : RowDispatchers) {
+ if (ev->Get()->NodeId != actorId.NodeId()) {
+ continue;
+ }
+ Y_ENSURE(!info.IsLocal, "EvNodeDisconnected from local row dispatcher");
+ info.Connected = false;
+ }
+}
+
+void TActorCoordinator::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvUndelivered, ev: " << ev->Get()->ToString());
+
+ for (auto& [actorId, info] : RowDispatchers) {
+ if (ev->Sender != actorId) {
+ continue;
+ }
+ info.Connected = false;
+ return;
+ }
+}
+
+void TActorCoordinator::Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("New leader " << ev->Get()->CoordinatorActorId << ", SelfId " << SelfId());
+ Metrics.LeaderChangedCount->Inc();
+}
+
+NActors::TActorId TActorCoordinator::GetAndUpdateLocation(const TPartitionKey& key) {
+ Y_ENSURE(!PartitionLocations.contains(key));
+ auto rand = LocationRandomCounter++ % RowDispatchers.size();
+
+ auto it = std::begin(RowDispatchers);
+ std::advance(it, rand);
+
+ for (size_t i = 0; i < RowDispatchers.size(); ++i) {
+ auto& info = it->second;
+ if (!info.Connected) {
+ it++;
+ if (it == std::end(RowDispatchers)) {
+ it = std::begin(RowDispatchers);
+ }
+ continue;
+ }
+ PartitionLocations[key] = it->first;
+ it->second.Locations.insert(key);
+ return it->first;
+ }
+ Y_ENSURE(false, "Local row dispatcher should always be connected");
+}
+
+void TActorCoordinator::Handle(NFq::TEvRowDispatcher::TEvCoordinatorRequest::TPtr& ev) {
+ const auto source = ev->Get()->Record.GetSource();
+
+ TStringStream str;
+ str << "TEvCoordinatorRequest from " << ev->Sender.ToString() << ", " << source.GetTopicPath() << ", partIds: ";
+ for (auto& partitionId : ev->Get()->Record.GetPartitionId()) {
+ str << partitionId << ", ";
+ }
+ LOG_ROW_DISPATCHER_DEBUG(str.Str());
+ Metrics.IncomingRequests->Inc();
+ Y_ENSURE(!RowDispatchers.empty());
+
+ TMap<NActors::TActorId, TSet<ui64>> tmpResult;
+
+ for (auto& partitionId : ev->Get()->Record.GetPartitionId()) {
+ TPartitionKey key{source.GetEndpoint(), source.GetDatabase(), source.GetTopicPath(), partitionId};
+ auto locationIt = PartitionLocations.find(key);
+ NActors::TActorId rowDispatcherId;
+ if (locationIt != PartitionLocations.end()) {
+ rowDispatcherId = locationIt->second;
+ } else {
+ rowDispatcherId = GetAndUpdateLocation(key);
+ }
+ tmpResult[rowDispatcherId].insert(partitionId);
+ }
+
+ auto response = std::make_unique<TEvRowDispatcher::TEvCoordinatorResult>();
+ for (const auto& [actorId, partitions] : tmpResult) {
+ auto* partitionsProto = response->Record.AddPartitions();
+ ActorIdToProto(actorId, partitionsProto->MutableActorId());
+ for (auto partitionId : partitions) {
+ partitionsProto->AddPartitionId(partitionId);
+ }
+ }
+
+ LOG_ROW_DISPATCHER_DEBUG("Send TEvCoordinatorResult to " << ev->Sender);
+ Send(ev->Sender, response.release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ev->Cookie);
+ PrintInternalState();
+}
+
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<NActors::IActor> NewCoordinator(
+ NActors::TActorId rowDispatcherId,
+ const NConfig::TRowDispatcherCoordinatorConfig& config,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+{
+ return std::unique_ptr<NActors::IActor>(new TActorCoordinator(rowDispatcherId, config, yqSharedResources, tenant, counters));
+}
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/coordinator.h b/ydb/core/fq/libs/row_dispatcher/coordinator.h
new file mode 100644
index 00000000000..60bd2f4f0ad
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/coordinator.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <ydb/core/fq/libs/config/protos/row_dispatcher.pb.h>
+
+#include <ydb/library/actors/core/actor.h>
+#include <ydb/core/fq/libs/shared_resources/shared_resources.h>
+
+namespace NFq {
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<NActors::IActor> NewCoordinator(
+ NActors::TActorId rowDispatcherId,
+ const NConfig::TRowDispatcherCoordinatorConfig& config,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/events/data_plane.cpp b/ydb/core/fq/libs/row_dispatcher/events/data_plane.cpp
new file mode 100644
index 00000000000..dce6540d44a
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/events/data_plane.cpp
@@ -0,0 +1,10 @@
+#include "data_plane.h"
+
+namespace NFq {
+
+NActors::TActorId RowDispatcherServiceActorId() {
+ constexpr TStringBuf name = "ROW_DISP_DP";
+ return NActors::TActorId(0, name);
+}
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/events/data_plane.h b/ydb/core/fq/libs/row_dispatcher/events/data_plane.h
new file mode 100644
index 00000000000..5cecb525167
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/events/data_plane.h
@@ -0,0 +1,125 @@
+#pragma once
+
+#include <ydb/library/actors/core/actorid.h>
+#include <ydb/library/actors/core/event_local.h>
+#include <ydb/core/fq/libs/events/event_subspace.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/protos/events.pb.h>
+#include <ydb/library/yql/providers/pq/proto/dq_io.pb.h>
+
+namespace NFq {
+
+NActors::TActorId RowDispatcherServiceActorId();
+
+struct TEvRowDispatcher {
+ // Event ids.
+ enum EEv : ui32 {
+ EvCoordinatorChanged = YqEventSubspaceBegin(TYqEventSubspace::RowDispatcher),
+ EvStartSession,
+ EvStartSessionAck,
+ EvNewDataArrived,
+ EvGetNextBatch,
+ EvMessageBatch,
+ EvStatus,
+ EvStopSession,
+ EvSessionError,
+ EvCoordinatorChangesSubscribe,
+ EvCoordinatorRequest,
+ EvCoordinatorResult,
+ EvEnd,
+ };
+
+ struct TEvCoordinatorChanged : NActors::TEventLocal<TEvCoordinatorChanged, EEv::EvCoordinatorChanged> {
+ TEvCoordinatorChanged(NActors::TActorId coordinatorActorId)
+ : CoordinatorActorId(coordinatorActorId) {
+ }
+ NActors::TActorId CoordinatorActorId;
+ };
+
+ struct TEvCoordinatorChangesSubscribe : public NActors::TEventLocal<TEvCoordinatorChangesSubscribe, EEv::EvCoordinatorChangesSubscribe> {};
+
+ struct TEvCoordinatorRequest : public NActors::TEventPB<TEvCoordinatorRequest,
+ NFq::NRowDispatcherProto::TEvGetAddressRequest, EEv::EvCoordinatorRequest> {
+ TEvCoordinatorRequest() = default;
+ TEvCoordinatorRequest(
+ const NYql::NPq::NProto::TDqPqTopicSource& sourceParams,
+ const std::vector<ui64>& partitionIds) {
+ *Record.MutableSource() = sourceParams;
+ for (const auto& id : partitionIds) {
+ Record.AddPartitionId(id);
+ }
+ }
+ };
+
+ struct TEvCoordinatorResult : public NActors::TEventPB<TEvCoordinatorResult,
+ NFq::NRowDispatcherProto::TEvGetAddressResponse, EEv::EvCoordinatorResult> {
+ TEvCoordinatorResult() = default;
+ };
+
+ struct TEvStartSession : public NActors::TEventPB<TEvStartSession,
+ NFq::NRowDispatcherProto::TEvStartSession, EEv::EvStartSession> {
+
+ TEvStartSession() = default;
+ TEvStartSession(
+ const NYql::NPq::NProto::TDqPqTopicSource& sourceParams,
+ ui64 partitionId,
+ const TString token,
+ TMaybe<ui64> readOffset,
+ ui64 startingMessageTimestampMs,
+ const TString& queryId) {
+ *Record.MutableSource() = sourceParams;
+ Record.SetPartitionId(partitionId);
+ Record.SetToken(token);
+ if (readOffset) {
+ Record.SetOffset(*readOffset);
+ }
+ Record.SetStartingMessageTimestampMs(startingMessageTimestampMs);
+ Record.SetQueryId(queryId);
+ }
+ };
+
+ struct TEvStartSessionAck : public NActors::TEventPB<TEvStartSessionAck,
+ NFq::NRowDispatcherProto::TEvStartSessionAck, EEv::EvStartSessionAck> {
+ TEvStartSessionAck() = default;
+ explicit TEvStartSessionAck(
+ const NFq::NRowDispatcherProto::TEvStartSession& consumer) {
+ *Record.MutableConsumer() = consumer;
+ }
+ };
+
+ struct TEvNewDataArrived : public NActors::TEventPB<TEvNewDataArrived,
+ NFq::NRowDispatcherProto::TEvNewDataArrived, EEv::EvNewDataArrived> {
+ TEvNewDataArrived() = default;
+ NActors::TActorId ReadActorId;
+ };
+
+ struct TEvGetNextBatch : public NActors::TEventPB<TEvGetNextBatch,
+ NFq::NRowDispatcherProto::TEvGetNextBatch, EEv::EvGetNextBatch> {
+ TEvGetNextBatch() = default;
+ };
+
+ struct TEvStopSession : public NActors::TEventPB<TEvStopSession,
+ NFq::NRowDispatcherProto::TEvStopSession, EEv::EvStopSession> {
+ TEvStopSession() = default;
+ };
+
+ struct TEvMessageBatch : public NActors::TEventPB<TEvMessageBatch,
+ NFq::NRowDispatcherProto::TEvMessageBatch, EEv::EvMessageBatch> {
+ TEvMessageBatch() = default;
+ NActors::TActorId ReadActorId;
+ };
+
+ struct TEvStatus : public NActors::TEventPB<TEvStatus,
+ NFq::NRowDispatcherProto::TEvStatus, EEv::EvStatus> {
+ TEvStatus() = default;
+ NActors::TActorId ReadActorId;
+ };
+
+ struct TEvSessionError : public NActors::TEventPB<TEvSessionError,
+ NFq::NRowDispatcherProto::TEvSessionError, EEv::EvSessionError> {
+ TEvSessionError() = default;
+ NActors::TActorId ReadActorId;
+ };
+};
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/events/ya.make b/ydb/core/fq/libs/row_dispatcher/events/ya.make
new file mode 100644
index 00000000000..60f0b00e7e9
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/events/ya.make
@@ -0,0 +1,14 @@
+LIBRARY()
+
+SRCS(
+ data_plane.cpp
+)
+
+PEERDIR(
+ ydb/core/fq/libs/events
+ ydb/core/fq/libs/row_dispatcher/protos
+ ydb/library/actors/core
+ ydb/library/yql/providers/pq/provider
+)
+
+END()
diff --git a/ydb/core/fq/libs/row_dispatcher/json_filter.cpp b/ydb/core/fq/libs/row_dispatcher/json_filter.cpp
new file mode 100644
index 00000000000..8b7d46a690f
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/json_filter.cpp
@@ -0,0 +1,300 @@
+#include <ydb/library/yql/public/udf/udf_version.h>
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
+#include <ydb/library/yql/minikql/mkql_alloc.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <ydb/library/yql/minikql/mkql_terminator.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/json_filter.h>
+#include <ydb/core/fq/libs/actors/logging/log.h>
+
+
+namespace {
+
+using TCallback = NFq::TJsonFilter::TCallback;
+const char* OffsetFieldName = "_offset";
+TString LogPrefix = "JsonFilter: ";
+
+void AddField(NYT::TNode& node, const TString& fieldName, const TString& fieldType) {
+ node.Add(
+ NYT::TNode::CreateList()
+ .Add(fieldName)
+ .Add(NYT::TNode::CreateList().Add("DataType").Add(fieldType))
+ );
+}
+
+NYT::TNode MakeInputSchema(const TVector<TString>& columns) {
+ auto structMembers = NYT::TNode::CreateList();
+ AddField(structMembers, OffsetFieldName, "Uint64");
+ for (const auto& col : columns) {
+ AddField(structMembers, col, "String");
+ }
+ return NYT::TNode::CreateList().Add("StructType").Add(std::move(structMembers));
+}
+
+NYT::TNode MakeOutputSchema() {
+ auto structMembers = NYT::TNode::CreateList();
+ AddField(structMembers, OffsetFieldName, "Uint64");
+ AddField(structMembers, "data", "String");
+ return NYT::TNode::CreateList().Add("StructType").Add(std::move(structMembers));
+}
+
+class TFilterInputSpec : public NYql::NPureCalc::TInputSpecBase {
+public:
+ TFilterInputSpec(const NYT::TNode& schema)
+ : Schemas({schema}) {
+ }
+
+ const TVector<NYT::TNode>& GetSchemas() const override {
+ return Schemas;
+ }
+
+private:
+ TVector<NYT::TNode> Schemas;
+};
+
+class TFilterInputConsumer : public NYql::NPureCalc::IConsumer<std::pair<ui64, TList<TString>>> {
+public:
+ TFilterInputConsumer(
+ const TFilterInputSpec& spec,
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPushStreamWorker> worker)
+ : Worker(std::move(worker)) {
+ const NKikimr::NMiniKQL::TStructType* structType = Worker->GetInputType();
+ const auto count = structType->GetMembersCount();
+
+ THashMap<TString, size_t> schemaPositions;
+ for (ui32 i = 0; i < count; ++i) {
+ const auto name = structType->GetMemberName(i);
+ if (name == OffsetFieldName) {
+ OffsetPosition = i;
+ continue;
+ }
+ schemaPositions[name] = i;
+ }
+
+ const NYT::TNode& schema = spec.GetSchemas()[0];
+ const auto& fields = schema[1];
+ Y_ENSURE(count == fields.Size());
+ Y_ENSURE(fields.IsList());
+ for (size_t i = 0; i < fields.Size(); ++i) {
+ auto name = fields[i][0].AsString();
+ if (name == OffsetFieldName) {
+ continue;
+ }
+ FieldsPositions.push_back(schemaPositions[name]);
+ }
+ }
+
+ ~TFilterInputConsumer() override {
+ with_lock(Worker->GetScopedAlloc()) {
+ Cache.Clear();
+ }
+ }
+
+ void OnObject(std::pair<ui64, TList<TString>> value) override {
+ NKikimr::NMiniKQL::TThrowingBindTerminator bind;
+
+ with_lock (Worker->GetScopedAlloc()) {
+ auto& holderFactory = Worker->GetGraph().GetHolderFactory();
+ NYql::NUdf::TUnboxedValue* items = nullptr;
+
+ NYql::NUdf::TUnboxedValue result = Cache.NewArray(
+ holderFactory,
+ static_cast<ui32>(value.second.size() + 1),
+ items);
+
+ items[OffsetPosition] = NYql::NUdf::TUnboxedValuePod(value.first);
+
+ Y_ENSURE(FieldsPositions.size() == value.second.size());
+
+ size_t i = 0;
+ for (const auto& v : value.second) {
+ NYql::NUdf::TStringValue str(v);
+ items[FieldsPositions[i++]] = NYql::NUdf::TUnboxedValuePod(std::move(str));
+ }
+ Worker->Push(std::move(result));
+ }
+ }
+
+ void OnFinish() override {
+ NKikimr::NMiniKQL::TBindTerminator bind(Worker->GetGraph().GetTerminator());
+ with_lock(Worker->GetScopedAlloc()) {
+ Worker->OnFinish();
+ }
+ }
+
+private:
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPushStreamWorker> Worker;
+ NKikimr::NMiniKQL::TPlainContainerCache Cache;
+ size_t OffsetPosition = 0;
+ TVector<size_t> FieldsPositions;
+};
+
+class TFilterOutputConsumer: public NYql::NPureCalc::IConsumer<std::pair<ui64, TString>> {
+public:
+ TFilterOutputConsumer(TCallback callback)
+ : Callback(callback) {
+ }
+
+ void OnObject(std::pair<ui64, TString> value) override {
+ Callback(value.first, value.second);
+ }
+
+ void OnFinish() override {
+ Y_UNREACHABLE();
+ }
+private:
+ TCallback Callback;
+};
+
+class TFilterOutputSpec: public NYql::NPureCalc::TOutputSpecBase {
+public:
+ explicit TFilterOutputSpec(const NYT::TNode& schema)
+ : Schema(schema)
+ {}
+
+public:
+ const NYT::TNode& GetSchema() const override {
+ return Schema;
+ }
+
+private:
+ NYT::TNode Schema;
+};
+
+class TFilterPushRelayImpl: public NYql::NPureCalc::IConsumer<const NYql::NUdf::TUnboxedValue*> {
+public:
+ TFilterPushRelayImpl(const TFilterOutputSpec& /*outputSpec*/, NYql::NPureCalc::IPushStreamWorker* worker, THolder<NYql::NPureCalc::IConsumer<std::pair<ui64, TString>>> underlying)
+ : Underlying(std::move(underlying))
+ , Worker(worker)
+ {}
+public:
+ void OnObject(const NYql::NUdf::TUnboxedValue* value) override {
+ auto unguard = Unguard(Worker->GetScopedAlloc());
+ Y_ENSURE(value->GetListLength() == 2);
+ ui64 offset = value->GetElement(0).Get<ui64>();
+ const auto& cell = value->GetElement(1);
+ Y_ENSURE(cell);
+ TString str(cell.AsStringRef());
+ Underlying->OnObject(std::make_pair(offset, str));
+ }
+
+ void OnFinish() override {
+ auto unguard = Unguard(Worker->GetScopedAlloc());
+ Underlying->OnFinish();
+ }
+
+private:
+ THolder<NYql::NPureCalc::IConsumer<std::pair<ui64, TString>>> Underlying;
+ NYql::NPureCalc::IWorker* Worker;
+};
+
+}
+
+template <>
+struct NYql::NPureCalc::TInputSpecTraits<TFilterInputSpec> {
+ static constexpr bool IsPartial = false;
+ static constexpr bool SupportPushStreamMode = true;
+
+ using TConsumerType = THolder<NYql::NPureCalc::IConsumer<std::pair<ui64, TList<TString>>>>;
+
+ static TConsumerType MakeConsumer(
+ const TFilterInputSpec& spec,
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPushStreamWorker> worker)
+ {
+ return MakeHolder<TFilterInputConsumer>(spec, std::move(worker));
+ }
+};
+
+template <>
+struct NYql::NPureCalc::TOutputSpecTraits<TFilterOutputSpec> {
+ static const constexpr bool IsPartial = false;
+ static const constexpr bool SupportPushStreamMode = true;
+
+ static void SetConsumerToWorker(const TFilterOutputSpec& outputSpec, NYql::NPureCalc::IPushStreamWorker* worker, THolder<NYql::NPureCalc::IConsumer<std::pair<ui64, TString>>> consumer) {
+ worker->SetConsumer(MakeHolder<TFilterPushRelayImpl>(outputSpec, worker, std::move(consumer)));
+ }
+};
+
+namespace NFq {
+
+class TJsonFilter::TImpl {
+public:
+ TImpl(const TVector<TString>& columns,
+ const TVector<TString>& types,
+ const TString& whereFilter,
+ TCallback callback)
+ : Sql(GenerateSql(columns, types, whereFilter)) {
+ auto factory = NYql::NPureCalc::MakeProgramFactory(NYql::NPureCalc::TProgramFactoryOptions());
+
+ LOG_ROW_DISPATCHER_DEBUG("Creating program...");
+ Program = factory->MakePushStreamProgram(
+ TFilterInputSpec(MakeInputSchema(columns)),
+ TFilterOutputSpec(MakeOutputSchema()),
+ Sql,
+ NYql::NPureCalc::ETranslationMode::SQL
+ );
+ InputConsumer = Program->Apply(MakeHolder<TFilterOutputConsumer>(callback));
+ LOG_ROW_DISPATCHER_DEBUG("Program created");
+ }
+
+ void Push(ui64 offset, const TList<TString>& value) {
+ InputConsumer->OnObject(std::make_pair(offset, value));
+ }
+
+ TString GetSql() const {
+ return Sql;
+ }
+
+private:
+ TString GenerateSql(const TVector<TString>& columnNames, const TVector<TString>& columnTypes, const TString& whereFilter) {
+ TStringStream str;
+ str << "$fields = SELECT ";
+ Y_ABORT_UNLESS(columnNames.size() == columnTypes.size());
+ str << OffsetFieldName << ", ";
+ for (size_t i = 0; i < columnNames.size(); ++i) {
+ str << "CAST(" << columnNames[i] << " as " << columnTypes[i] << ") as " << columnNames[i] << ((i != columnNames.size() - 1) ? "," : "");
+ }
+ str << " FROM Input;\n";
+ str << "$filtered = SELECT * FROM $fields " << whereFilter << ";\n";
+
+ str << "SELECT " << OffsetFieldName << ", Unwrap(Json::SerializeJson(Yson::From(RemoveMembers(TableRow(), [\"" << OffsetFieldName;
+ str << "\"])))) as data FROM $filtered";
+ LOG_ROW_DISPATCHER_DEBUG("Generated sql: " << str.Str());
+ return str.Str();
+ }
+
+private:
+ THolder<NYql::NPureCalc::TPushStreamProgram<TFilterInputSpec, TFilterOutputSpec>> Program;
+ THolder<NYql::NPureCalc::IConsumer<std::pair<ui64, TList<TString>>>> InputConsumer;
+ const TString Sql;
+};
+
+TJsonFilter::TJsonFilter(
+ const TVector<TString>& columns,
+ const TVector<TString>& types,
+ const TString& whereFilter,
+ TCallback callback)
+ : Impl(std::make_unique<TJsonFilter::TImpl>(columns, types, whereFilter, callback)) {
+}
+
+TJsonFilter::~TJsonFilter() {
+}
+
+void TJsonFilter::Push(ui64 offset, const TList<TString>& value) {
+ Impl->Push(offset, value);
+}
+
+TString TJsonFilter::GetSql() {
+ return Impl->GetSql();
+}
+
+std::unique_ptr<TJsonFilter> NewJsonFilter(
+ const TVector<TString>& columns,
+ const TVector<TString>& types,
+ const TString& whereFilter,
+ TCallback callback) {
+ return std::unique_ptr<TJsonFilter>(new TJsonFilter(columns, types, whereFilter, callback));
+}
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/json_filter.h b/ydb/core/fq/libs/row_dispatcher/json_filter.h
new file mode 100644
index 00000000000..f1694a277fb
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/json_filter.h
@@ -0,0 +1,34 @@
+
+#pragma once
+
+namespace NFq {
+
+#include <ydb/library/yql/public/udf/udf_data_type.h>
+#include <ydb/library/yql/public/udf/udf_value.h>
+
+class TJsonFilter {
+public:
+ using TCallback = std::function<void(ui64, const TString&)>;
+
+public:
+ TJsonFilter(
+ const TVector<TString>& columns,
+ const TVector<TString>& types,
+ const TString& whereFilter,
+ TCallback callback);
+ ~TJsonFilter();
+ void Push(ui64 offset, const TList<TString>& value);
+ TString GetSql();
+
+private:
+ class TImpl;
+ const std::unique_ptr<TImpl> Impl;
+};
+
+std::unique_ptr<TJsonFilter> NewJsonFilter(
+ const TVector<TString>& columns,
+ const TVector<TString>& types,
+ const TString& whereFilter,
+ TJsonFilter::TCallback callback);
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/json_parser.cpp b/ydb/core/fq/libs/row_dispatcher/json_parser.cpp
new file mode 100644
index 00000000000..2c3a3d38f02
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/json_parser.cpp
@@ -0,0 +1,302 @@
+#include <ydb/core/fq/libs/row_dispatcher/json_parser.h>
+
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <ydb/library/yql/minikql/mkql_terminator.h>
+#include <ydb/core/fq/libs/actors/logging/log.h>
+
+
+namespace {
+
+using TCallback = NFq::TJsonParser::TCallback;
+using TInputConsumerArg = std::pair<ui64, TString>;
+const char* OffsetFieldName = "_offset";
+TString LogPrefix = "JsonParser: ";
+
+void AddField(NYT::TNode& node, const TString& fieldName, const TString& fieldType) {
+ node.Add(
+ NYT::TNode::CreateList()
+ .Add(fieldName)
+ .Add(NYT::TNode::CreateList().Add("DataType").Add(fieldType))
+ );
+}
+
+NYT::TNode MakeInputSchema() {
+ auto structMembers = NYT::TNode::CreateList();
+ AddField(structMembers, OffsetFieldName, "Uint64");
+ AddField(structMembers, "data", "String");
+ return NYT::TNode::CreateList().Add("StructType").Add(std::move(structMembers));
+}
+
+NYT::TNode MakeOutputSchema(const TVector<TString>& columns) {
+ auto structMembers = NYT::TNode::CreateList();
+ AddField(structMembers, OffsetFieldName, "Uint64");
+ for (const auto& col : columns) {
+ AddField(structMembers, col, "String");
+ }
+ return NYT::TNode::CreateList().Add("StructType").Add(std::move(structMembers));
+}
+
+class TParserInputConsumer : public NYql::NPureCalc::IConsumer<TInputConsumerArg> {
+public:
+ explicit TParserInputConsumer(NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPushStreamWorker> worker)
+ : Worker(std::move(worker)) {
+ }
+
+ ~TParserInputConsumer() override {
+ with_lock(Worker->GetScopedAlloc()) {
+ Cache.Clear();
+ }
+ }
+
+ void OnObject(std::pair<ui64, TString> value) override {
+ NKikimr::NMiniKQL::TThrowingBindTerminator bind;
+
+ with_lock (Worker->GetScopedAlloc()) {
+ auto& holderFactory = Worker->GetGraph().GetHolderFactory();
+ NYql::NUdf::TUnboxedValue* items = nullptr;
+
+ NYql::NUdf::TUnboxedValue result = Cache.NewArray(
+ holderFactory,
+ static_cast<ui32>(2),
+ items);
+
+ items[0] = NYql::NUdf::TUnboxedValuePod(value.first);
+ NYql::NUdf::TStringValue str(value.second.Size());
+ std::memcpy(str.Data(), value.second.Data(), value.second.Size());
+ items[1] = NYql::NUdf::TUnboxedValuePod(std::move(str));
+ Worker->Push(std::move(result));
+ }
+ }
+
+ void OnFinish() override {
+ NKikimr::NMiniKQL::TBindTerminator bind(Worker->GetGraph().GetTerminator());
+ with_lock(Worker->GetScopedAlloc()) {
+ Worker->OnFinish();
+ }
+ }
+
+private:
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPushStreamWorker> Worker;
+ NKikimr::NMiniKQL::TPlainContainerCache Cache;
+};
+
+
+class TParserInputSpec : public NYql::NPureCalc::TInputSpecBase {
+public:
+ TParserInputSpec() {
+ Schemas = {MakeInputSchema()};
+ }
+
+ const TVector<NYT::TNode>& GetSchemas() const override {
+ return Schemas;
+ }
+
+private:
+ TVector<NYT::TNode> Schemas;
+};
+
+
+class TParserOutputConsumer: public NYql::NPureCalc::IConsumer<std::pair<ui64, TList<TString>>> {
+public:
+ TParserOutputConsumer(TCallback callback)
+ : Callback(callback) {
+ }
+
+ void OnObject(std::pair<ui64, TList<TString>> value) override {
+ Callback(value.first, std::move(value.second));
+ }
+
+ void OnFinish() override {
+ Y_UNREACHABLE();
+ }
+private:
+ TCallback Callback;
+};
+
+class TParserOutputSpec: public NYql::NPureCalc::TOutputSpecBase {
+public:
+ explicit TParserOutputSpec(const NYT::TNode& schema)
+ : Schema(schema)
+ {}
+
+public:
+ const NYT::TNode& GetSchema() const override {
+ return Schema;
+ }
+
+private:
+ NYT::TNode Schema;
+};
+
+struct TFieldsMapping{
+ TVector<size_t> FieldsPositions;
+ size_t OffsetPosition;
+
+ TFieldsMapping(const NYT::TNode& schema, const NKikimr::NMiniKQL::TType* outputType) {
+ THashMap<TString, size_t> outputPositions;
+ Y_ENSURE(outputType->IsStruct());
+ const auto structType = static_cast<const NKikimr::NMiniKQL::TStructType*>(outputType);
+ const auto count = structType->GetMembersCount();
+
+ for (ui32 i = 1; i < count; ++i) { // 0 index - OffsetFieldName
+ const auto name = structType->GetMemberName(i);
+ outputPositions[name] = i;
+ }
+
+ const auto& fields = schema[1];
+ Y_ENSURE(fields.IsList());
+ Y_ENSURE(count == fields.Size());
+ for (size_t i = 0; i < fields.Size(); ++i) {
+ auto name = fields[i][0].AsString();
+ if (name == OffsetFieldName) {
+ OffsetPosition = i;
+ continue;
+ }
+ FieldsPositions.push_back(outputPositions[name]);
+ }
+ }
+};
+
+class TParserPushRelayImpl: public NYql::NPureCalc::IConsumer<const NYql::NUdf::TUnboxedValue*> {
+public:
+ TParserPushRelayImpl(const TParserOutputSpec& outputSpec, NYql::NPureCalc::IPushStreamWorker* worker, THolder<NYql::NPureCalc::IConsumer<std::pair<ui64, TList<TString>>>> underlying)
+ : Underlying(std::move(underlying))
+ , Worker(worker)
+ , FieldsMapping(outputSpec.GetSchema(), Worker->GetOutputType())
+ { }
+
+public:
+ void OnObject(const NYql::NUdf::TUnboxedValue* value) override {
+ auto unguard = Unguard(Worker->GetScopedAlloc());
+ TList<TString> result;
+
+ Y_ENSURE(value->GetListLength() == FieldsMapping.FieldsPositions.size() + 1);
+ ui64 offset = value->GetElement(FieldsMapping.OffsetPosition).Get<ui64>();
+
+ for (auto pos : FieldsMapping.FieldsPositions) {
+ const auto& cell = value->GetElement(pos);
+
+ NYql::NUdf::TStringRef strRef(cell.AsStringRef());
+ result.emplace_back(strRef.Data(), strRef.Size());
+ }
+
+ Underlying->OnObject(std::make_pair(offset, std::move(result)));
+ }
+
+ void OnFinish() override {
+ auto unguard = Unguard(Worker->GetScopedAlloc());
+ Underlying->OnFinish();
+ }
+
+private:
+ THolder<NYql::NPureCalc::IConsumer<std::pair<ui64, TList<TString>>>> Underlying;
+ NYql::NPureCalc::IWorker* Worker;
+ TFieldsMapping FieldsMapping;
+};
+
+}
+
+template <>
+struct NYql::NPureCalc::TInputSpecTraits<TParserInputSpec> {
+ static constexpr bool IsPartial = false;
+ static constexpr bool SupportPushStreamMode = true;
+
+ using TConsumerType = THolder<NYql::NPureCalc::IConsumer<TInputConsumerArg>>;
+
+ static TConsumerType MakeConsumer(
+ const TParserInputSpec& spec,
+ NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPushStreamWorker> worker
+ ) {
+ Y_UNUSED(spec);
+ return MakeHolder<TParserInputConsumer>(std::move(worker));
+ }
+};
+
+template <>
+struct NYql::NPureCalc::TOutputSpecTraits<TParserOutputSpec> {
+ static const constexpr bool IsPartial = false;
+ static const constexpr bool SupportPushStreamMode = true;
+
+ static void SetConsumerToWorker(const TParserOutputSpec& outputSpec, NYql::NPureCalc::IPushStreamWorker* worker, THolder<NYql::NPureCalc::IConsumer<std::pair<ui64, TList<TString>>>> consumer) {
+ worker->SetConsumer(MakeHolder<TParserPushRelayImpl>(outputSpec, worker, std::move(consumer)));
+ }
+};
+
+namespace NFq {
+
+class TJsonParser::TImpl {
+public:
+ TImpl(
+ const TVector<TString>& columns,
+ TCallback callback)
+ : Sql(GenerateSql(columns)) {
+ auto options = NYql::NPureCalc::TProgramFactoryOptions();
+ auto factory = NYql::NPureCalc::MakeProgramFactory(options);
+
+ LOG_ROW_DISPATCHER_DEBUG("Creating program...");
+ Program = factory->MakePushStreamProgram(
+ TParserInputSpec(),
+ TParserOutputSpec(MakeOutputSchema(columns)),
+ Sql,
+ NYql::NPureCalc::ETranslationMode::SQL
+ );
+ LOG_ROW_DISPATCHER_DEBUG("Program created");
+ InputConsumer = Program->Apply(MakeHolder<TParserOutputConsumer>(callback));
+ LOG_ROW_DISPATCHER_DEBUG("InputConsumer created");
+ }
+
+ void Push( ui64 offset, const TString& value) {
+ LOG_ROW_DISPATCHER_TRACE("Push " << value);
+ InputConsumer->OnObject(std::make_pair(offset, value));
+ }
+
+ TString GetSql() const {
+ return Sql;
+ }
+
+private:
+ TString GenerateSql(const TVector<TString>& columns) {
+ TStringStream str;
+ str << "$json = SELECT CAST(data AS Json) as `Json`, " << OffsetFieldName << " FROM Input;";
+ str << "\nSELECT " << OffsetFieldName << ", ";
+ for (auto it = columns.begin(); it != columns.end(); ++it) {
+ str << R"(CAST(Unwrap(JSON_VALUE(`Json`, "$.)" << *it << "\")) as String) as "
+ << *it << ((it != columns.end() - 1) ? "," : "");
+ }
+ str << " FROM $json;";
+ LOG_ROW_DISPATCHER_DEBUG("GenerateSql " << str.Str());
+ return str.Str();
+ }
+
+private:
+ THolder<NYql::NPureCalc::TPushStreamProgram<TParserInputSpec, TParserOutputSpec>> Program;
+ THolder<NYql::NPureCalc::IConsumer<TInputConsumerArg>> InputConsumer;
+ const TString Sql;
+};
+
+TJsonParser::TJsonParser(
+ const TVector<TString>& columns,
+ TCallback callback)
+ : Impl(std::make_unique<TJsonParser::TImpl>(columns, callback)) {
+}
+
+TJsonParser::~TJsonParser() {
+}
+
+void TJsonParser::Push(ui64 offset, const TString& value) {
+ Impl->Push(offset, value);
+}
+
+TString TJsonParser::GetSql() {
+ return Impl->GetSql();
+}
+
+std::unique_ptr<TJsonParser> NewJsonParser(
+ const TVector<TString>& columns,
+ TCallback callback) {
+ return std::unique_ptr<TJsonParser>(new TJsonParser(columns, callback));
+}
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/json_parser.h b/ydb/core/fq/libs/row_dispatcher/json_parser.h
new file mode 100644
index 00000000000..d67761b401c
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/json_parser.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <functional>
+
+#include <util/generic/string.h>
+
+namespace NFq {
+
+class TJsonParser {
+public:
+ using TCallback = std::function<void(ui64, TList<TString>&&)>;
+
+public:
+ TJsonParser(
+ const TVector<TString>& columns,
+ TCallback callback);
+ ~TJsonParser();
+ void Push(ui64 offset, const TString& value);
+ TString GetSql();
+
+private:
+ class TImpl;
+ const std::unique_ptr<TImpl> Impl;
+};
+
+std::unique_ptr<TJsonParser> NewJsonParser(
+ const TVector<TString>& columns,
+ TJsonParser::TCallback callback);
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/leader_election.cpp b/ydb/core/fq/libs/row_dispatcher/leader_election.cpp
new file mode 100644
index 00000000000..6817cfc292c
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/leader_election.cpp
@@ -0,0 +1,482 @@
+#include "coordinator.h"
+
+#include <ydb/core/fq/libs/actors/logging/log.h>
+#include <ydb/core/fq/libs/ydb/ydb.h>
+#include <ydb/core/fq/libs/ydb/schema.h>
+#include <ydb/core/fq/libs/ydb/util.h>
+#include <ydb/core/fq/libs/events/events.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+
+#include <ydb/library/actors/core/actor_bootstrapped.h>
+#include <ydb/library/actors/core/hfunc.h>
+#include <ydb/library/actors/protos/actors.pb.h>
+
+namespace NFq {
+
+using namespace NActors;
+using namespace NThreading;
+using NYql::TIssues;
+
+namespace {
+
+const ui64 TimeoutDurationSec = 3;
+const TString SemaphoreName = "RowDispatcher";
+
+struct TEvPrivate {
+ // Event ids
+ enum EEv : ui32 {
+ EvBegin = EventSpaceBegin(NActors::TEvents::ES_PRIVATE),
+ EvCreateSemaphoreResult = EvBegin,
+ EvCreateSessionResult,
+ EvAcquireSemaphoreResult,
+ EvDescribeSemaphoreResult,
+ EvSessionStopped,
+ EvTimeout,
+ EvOnChangedResult,
+ EvEnd
+ };
+
+ static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)");
+
+ // Events
+ struct TEvCreateSemaphoreResult : NActors::TEventLocal<TEvCreateSemaphoreResult, EvCreateSemaphoreResult> {
+ NYdb::NCoordination::TAsyncResult<void> Result;
+ explicit TEvCreateSemaphoreResult(const NYdb::NCoordination::TAsyncResult<void>& future)
+ : Result(std::move(future)) {}
+ };
+ struct TEvCreateSessionResult : NActors::TEventLocal<TEvCreateSessionResult, EvCreateSessionResult> {
+ NYdb::NCoordination::TAsyncSessionResult Result;
+ explicit TEvCreateSessionResult(NYdb::NCoordination::TAsyncSessionResult future)
+ : Result(std::move(future)) {}
+ };
+
+ struct TEvOnChangedResult : NActors::TEventLocal<TEvOnChangedResult, EvOnChangedResult> {
+ bool Result;
+ explicit TEvOnChangedResult(bool result)
+ : Result(result) {}
+ };
+
+ struct TEvDescribeSemaphoreResult : NActors::TEventLocal<TEvDescribeSemaphoreResult, EvDescribeSemaphoreResult> {
+ NYdb::NCoordination::TAsyncDescribeSemaphoreResult Result;
+ explicit TEvDescribeSemaphoreResult(NYdb::NCoordination::TAsyncDescribeSemaphoreResult future)
+ : Result(std::move(future)) {}
+ };
+
+ struct TEvAcquireSemaphoreResult : NActors::TEventLocal<TEvAcquireSemaphoreResult, EvAcquireSemaphoreResult> {
+ NYdb::NCoordination::TAsyncResult<bool> Result;
+ explicit TEvAcquireSemaphoreResult(NYdb::NCoordination::TAsyncResult<bool> future)
+ : Result(std::move(future)) {}
+ };
+ struct TEvSessionStopped : NActors::TEventLocal<TEvSessionStopped, EvSessionStopped> {};
+ struct TEvTimeout : NActors::TEventLocal<TEvTimeout, EvTimeout> {};
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TLeaderElectionMetrics {
+ explicit TLeaderElectionMetrics(const ::NMonitoring::TDynamicCounterPtr& counters)
+ : Counters(counters) {
+ Errors = Counters->GetCounter("LeaderElectionErrors", true);
+ LeaderChangedCount = Counters->GetCounter("LeaderElectionChangedCount");
+ }
+
+ ::NMonitoring::TDynamicCounterPtr Counters;
+ ::NMonitoring::TDynamicCounters::TCounterPtr Errors;
+ ::NMonitoring::TDynamicCounters::TCounterPtr LeaderChangedCount;
+};
+
+class TLeaderElection: public TActorBootstrapped<TLeaderElection> {
+
+ enum class EState {
+ Init,
+ WaitNodeCreated,
+ WaitSessionCreated,
+ WaitSemaphoreCreated,
+ Started
+ };
+ NFq::NConfig::TRowDispatcherCoordinatorConfig Config;
+ const NKikimr::TYdbCredentialsProviderFactory& CredentialsProviderFactory;
+ TYqSharedResources::TPtr YqSharedResources;
+ TYdbConnectionPtr YdbConnection;
+ TString TablePathPrefix;
+ TString CoordinationNodePath;
+ TMaybe<NYdb::NCoordination::TSession> Session;
+ TActorId ParentId;
+ TActorId CoordinatorId;
+ TString LogPrefix;
+ const TString Tenant;
+ EState State = EState::Init;
+ bool CoordinationNodeCreated = false;
+ bool SemaphoreCreated = false;
+ bool TimeoutScheduled = false;
+ bool PendingDescribe = false;
+ bool PendingAcquire = false;
+
+ TMaybe<TActorId> LeaderActorId;
+
+ struct NodeInfo {
+ bool Connected = false;
+ };
+ std::map<ui32, NodeInfo> RowDispatchersByNode;
+ TLeaderElectionMetrics Metrics;
+
+public:
+ TLeaderElection(
+ NActors::TActorId parentId,
+ NActors::TActorId coordinatorId,
+ const NConfig::TRowDispatcherCoordinatorConfig& config,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+ void Bootstrap();
+ void PassAway() override;
+
+ static constexpr char ActorName[] = "YQ_LEADER_EL";
+
+ void Handle(NFq::TEvents::TEvSchemaCreated::TPtr& ev);
+ void Handle(TEvPrivate::TEvCreateSessionResult::TPtr& ev);
+ void Handle(TEvPrivate::TEvCreateSemaphoreResult::TPtr& ev);
+ void Handle(TEvPrivate::TEvAcquireSemaphoreResult::TPtr& ev);
+ void Handle(TEvPrivate::TEvSessionStopped::TPtr& ev);
+ void Handle(TEvPrivate::TEvTimeout::TPtr&);
+ void Handle(TEvPrivate::TEvDescribeSemaphoreResult::TPtr& ev);
+ void Handle(TEvPrivate::TEvOnChangedResult::TPtr& ev);
+ void HandleException(const std::exception& e);
+
+ STRICT_STFUNC_EXC(StateFunc,
+ hFunc(NFq::TEvents::TEvSchemaCreated, Handle);
+ hFunc(TEvPrivate::TEvCreateSessionResult, Handle);
+ hFunc(TEvPrivate::TEvCreateSemaphoreResult, Handle);
+ hFunc(TEvPrivate::TEvAcquireSemaphoreResult, Handle);
+ hFunc(TEvPrivate::TEvOnChangedResult, Handle);
+ hFunc(TEvPrivate::TEvSessionStopped, Handle);
+ hFunc(TEvPrivate::TEvTimeout, Handle);
+ hFunc(TEvPrivate::TEvDescribeSemaphoreResult, Handle);
+ cFunc(NActors::TEvents::TSystem::Poison, PassAway);,
+ ExceptionFunc(std::exception, HandleException)
+ )
+
+private:
+ void CreateSemaphore();
+ void AcquireSemaphore();
+ void DebugPrint();
+ void StartSession();
+ void DescribeSemaphore();
+ void ProcessState();
+ void ResetState();
+ void SetTimeout();
+};
+
+TLeaderElection::TLeaderElection(
+ NActors::TActorId parentId,
+ NActors::TActorId coordinatorId,
+ const NConfig::TRowDispatcherCoordinatorConfig& config,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+ : Config(config)
+ , CredentialsProviderFactory(credentialsProviderFactory)
+ , YqSharedResources(yqSharedResources)
+ , YdbConnection(NewYdbConnection(config.GetDatabase(), credentialsProviderFactory, yqSharedResources->UserSpaceYdbDriver))
+ , TablePathPrefix(JoinPath(config.GetDatabase().GetDatabase(), config.GetCoordinationNodePath()))
+ , CoordinationNodePath(JoinPath(TablePathPrefix, tenant))
+ , ParentId(parentId)
+ , CoordinatorId(coordinatorId)
+ , Tenant(tenant)
+ , Metrics(counters) {
+}
+
+ERetryErrorClass RetryFunc(const NYdb::TStatus& status) {
+ if (status.IsSuccess()) {
+ return ERetryErrorClass::NoRetry;
+ }
+
+ if (status.IsTransportError()) {
+ return ERetryErrorClass::ShortRetry;
+ }
+
+ const NYdb::EStatus st = status.GetStatus();
+ if (st == NYdb::EStatus::INTERNAL_ERROR || st == NYdb::EStatus::UNAVAILABLE ||
+ st == NYdb::EStatus::TIMEOUT || st == NYdb::EStatus::BAD_SESSION ||
+ st == NYdb::EStatus::SESSION_EXPIRED ||
+ st == NYdb::EStatus::SESSION_BUSY) {
+ return ERetryErrorClass::ShortRetry;
+ }
+
+ if (st == NYdb::EStatus::OVERLOADED) {
+ return ERetryErrorClass::LongRetry;
+ }
+
+ return ERetryErrorClass::NoRetry;
+}
+
+TYdbSdkRetryPolicy::TPtr MakeSchemaRetryPolicy() {
+ static auto policy = TYdbSdkRetryPolicy::GetExponentialBackoffPolicy(RetryFunc, TDuration::MilliSeconds(10), TDuration::Seconds(1), TDuration::Seconds(5));
+ return policy;
+}
+
+void TLeaderElection::Bootstrap() {
+ Become(&TLeaderElection::StateFunc);
+ LogPrefix = "TLeaderElection " + SelfId().ToString() + " ";
+ LOG_ROW_DISPATCHER_DEBUG("Successfully bootstrapped, local coordinator id " << CoordinatorId.ToString());
+ ProcessState();
+}
+
+void TLeaderElection::ProcessState() {
+ switch (State) {
+ case EState::Init:
+ if (!CoordinationNodeCreated) {
+ Register(MakeCreateCoordinationNodeActor(
+ SelfId(),
+ NKikimrServices::FQ_ROW_DISPATCHER,
+ YdbConnection,
+ CoordinationNodePath,
+ MakeSchemaRetryPolicy()));
+ }
+ State = EState::WaitNodeCreated;
+ [[fallthrough]];
+ case EState::WaitNodeCreated:
+ if (!CoordinationNodeCreated) {
+ return;
+ }
+ if (!Session) {
+ StartSession();
+ }
+ State = EState::WaitSessionCreated;
+ [[fallthrough]];
+ case EState::WaitSessionCreated:
+ if (!Session) {
+ return;
+ }
+ if (!SemaphoreCreated) {
+ CreateSemaphore();
+ }
+ State = EState::WaitSemaphoreCreated;
+ [[fallthrough]];
+ case EState::WaitSemaphoreCreated:
+ if (!SemaphoreCreated) {
+ return;
+ }
+ State = EState::Started;
+ [[fallthrough]];
+ case EState::Started:
+ AcquireSemaphore();
+ DescribeSemaphore();
+ break;
+ }
+}
+
+void TLeaderElection::ResetState() {
+ State = EState::Init;
+ SetTimeout();
+}
+
+void TLeaderElection::CreateSemaphore() {
+ Session->CreateSemaphore(SemaphoreName, 1 /* limit */)
+ .Subscribe(
+ [actorId = this->SelfId(), actorSystem = TActivationContext::ActorSystem()](const NYdb::NCoordination::TAsyncResult<void>& future) {
+ actorSystem->Send(actorId, new TEvPrivate::TEvCreateSemaphoreResult(future));
+ });
+}
+
+void TLeaderElection::AcquireSemaphore() {
+ if (PendingAcquire) {
+ return;
+ }
+ LOG_ROW_DISPATCHER_DEBUG("Try to acquire semaphore");
+
+ NActorsProto::TActorId protoId;
+ ActorIdToProto(CoordinatorId, &protoId);
+ TString strActorId;
+ if (!protoId.SerializeToString(&strActorId)) {
+ Y_ABORT("SerializeToString");
+ }
+ PendingAcquire = true;
+ Session->AcquireSemaphore(
+ SemaphoreName,
+ NYdb::NCoordination::TAcquireSemaphoreSettings().Count(1).Data(strActorId))
+ .Subscribe(
+ [actorId = this->SelfId(), actorSystem = TActivationContext::ActorSystem()](const NYdb::NCoordination::TAsyncResult<bool>& future) {
+ actorSystem->Send(actorId, new TEvPrivate::TEvAcquireSemaphoreResult(future));
+ });
+}
+
+void TLeaderElection::StartSession() {
+ LOG_ROW_DISPATCHER_DEBUG("Start session");
+
+ YdbConnection->CoordinationClient
+ .StartSession(
+ CoordinationNodePath,
+ NYdb::NCoordination::TSessionSettings().OnStopped(
+ [actorId = this->SelfId(), actorSystem = TActivationContext::ActorSystem()]() {
+ actorSystem->Send(actorId, new TEvPrivate::TEvSessionStopped());
+ }))
+ .Subscribe([actorId = this->SelfId(), actorSystem = TActivationContext::ActorSystem()](const NYdb::NCoordination::TAsyncSessionResult& future) {
+ actorSystem->Send(actorId, new TEvPrivate::TEvCreateSessionResult(future));
+ });
+}
+
+void TLeaderElection::Handle(NFq::TEvents::TEvSchemaCreated::TPtr& ev) {
+ if (!IsTableCreated(ev->Get()->Result)) {
+ LOG_ROW_DISPATCHER_ERROR("Schema creation error " << ev->Get()->Result.GetIssues());
+ Metrics.Errors->Inc();
+ ResetState();
+ return;
+ }
+ LOG_ROW_DISPATCHER_DEBUG("Coordination node successfully created");
+ CoordinationNodeCreated = true;
+ ProcessState();
+}
+
+void TLeaderElection::Handle(TEvPrivate::TEvCreateSessionResult::TPtr& ev) {
+ auto result = ev->Get()->Result.GetValue();
+ if (!result.IsSuccess()) {
+ LOG_ROW_DISPATCHER_ERROR("CreateSession failed, " << result.GetIssues());
+ Metrics.Errors->Inc();
+ ResetState();
+ return;
+ }
+ Session = result.GetResult();
+ LOG_ROW_DISPATCHER_DEBUG("Session successfully created");
+ ProcessState();
+}
+
+void TLeaderElection::Handle(TEvPrivate::TEvCreateSemaphoreResult::TPtr& ev) {
+ auto result = ev->Get()->Result.GetValue();
+ if (!IsTableCreated(result)) {
+ LOG_ROW_DISPATCHER_ERROR("Semaphore creating error " << result.GetIssues());
+ Metrics.Errors->Inc();
+ ResetState();
+ return;
+ }
+ SemaphoreCreated = true;
+ LOG_ROW_DISPATCHER_DEBUG("Semaphore successfully created");
+ ProcessState();
+}
+
+void TLeaderElection::Handle(TEvPrivate::TEvAcquireSemaphoreResult::TPtr& ev) {
+ auto result = ev->Get()->Result.GetValue();
+ PendingAcquire = false;
+
+ if (!result.IsSuccess()) {
+ LOG_ROW_DISPATCHER_ERROR("Failed to acquire semaphore, " << result.GetIssues());
+ Metrics.Errors->Inc();
+ ResetState();
+ return;
+ }
+ LOG_ROW_DISPATCHER_DEBUG("Semaphore successfully acquired");
+}
+
+void TLeaderElection::PassAway() {
+ LOG_ROW_DISPATCHER_DEBUG("PassAway");
+ TActorBootstrapped::PassAway();
+}
+
+void TLeaderElection::Handle(TEvPrivate::TEvSessionStopped::TPtr&) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvSessionStopped");
+ Session.Clear();
+ PendingAcquire = false;
+ PendingDescribe = false;
+ ResetState();
+}
+
+void TLeaderElection::SetTimeout() {
+ if (TimeoutScheduled) {
+ return;
+ }
+ TimeoutScheduled = true;
+ Schedule(TDuration::Seconds(TimeoutDurationSec), new TEvPrivate::TEvTimeout());
+}
+
+void TLeaderElection::Handle(TEvPrivate::TEvTimeout::TPtr&) {
+ TimeoutScheduled = false;
+ LOG_ROW_DISPATCHER_DEBUG("TEvTimeout");
+ ProcessState();
+}
+
+void TLeaderElection::DescribeSemaphore() {
+ if (PendingDescribe) {
+ return;
+ }
+ LOG_ROW_DISPATCHER_DEBUG("Describe semaphore");
+ PendingDescribe = true;
+ Session->DescribeSemaphore(
+ SemaphoreName,
+ NYdb::NCoordination::TDescribeSemaphoreSettings()
+ .WatchData()
+ .WatchOwners()
+ .IncludeOwners()
+ .OnChanged([actorId = this->SelfId(), actorSystem = TActivationContext::ActorSystem()](bool isChanged) {
+ actorSystem->Send(actorId, new TEvPrivate::TEvOnChangedResult(isChanged));
+ }))
+ .Subscribe(
+ [actorId = this->SelfId(), actorSystem = TActivationContext::ActorSystem()](const NYdb::NCoordination::TAsyncDescribeSemaphoreResult& future) {
+ actorSystem->Send(actorId, new TEvPrivate::TEvDescribeSemaphoreResult(future));
+ });
+}
+
+void TLeaderElection::Handle(TEvPrivate::TEvOnChangedResult::TPtr& /*ev*/) {
+ LOG_ROW_DISPATCHER_DEBUG("Semaphore changed");
+ PendingDescribe = false;
+ ProcessState();
+}
+
+void TLeaderElection::Handle(TEvPrivate::TEvDescribeSemaphoreResult::TPtr& ev) {
+ PendingDescribe = false;
+ auto result = ev->Get()->Result.GetValue();
+ if (!result.IsSuccess()) {
+ LOG_ROW_DISPATCHER_ERROR("Semaphore describe fail, " << result.GetIssues());
+ Metrics.Errors->Inc();
+ ResetState();
+ return;
+ }
+
+ const NYdb::NCoordination::TSemaphoreDescription& description = result.GetResult();
+ Y_ABORT_UNLESS(description.GetOwners().size() <= 1, "To many owners");
+ if (description.GetOwners().empty()) {
+ LOG_ROW_DISPATCHER_DEBUG("Empty owners");
+ // Wait OnChanged.
+ return;
+ }
+ TString data = description.GetOwners()[0].GetData();
+ NActorsProto::TActorId protoId;
+ if (!protoId.ParseFromString(data)) {
+ Y_ABORT("ParseFromString");
+ }
+
+ NActors::TActorId id = ActorIdFromProto(protoId);
+ LOG_ROW_DISPATCHER_DEBUG("Semaphore successfully described: coordinator id " << id);
+ if (!LeaderActorId || (*LeaderActorId != id)) {
+ LOG_ROW_DISPATCHER_INFO("Send TEvCoordinatorChanged to " << ParentId);
+ TActivationContext::ActorSystem()->Send(ParentId, new NFq::TEvRowDispatcher::TEvCoordinatorChanged(id));
+ Metrics.LeaderChangedCount->Inc();
+ }
+ LeaderActorId = id;
+}
+
+void TLeaderElection::HandleException(const std::exception& e) {
+ LOG_ROW_DISPATCHER_ERROR("Internal error: exception:" << e.what());
+ Metrics.Errors->Inc();
+ ResetState();
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<NActors::IActor> NewLeaderElection(
+ NActors::TActorId rowDispatcherId,
+ NActors::TActorId coordinatorId,
+ const NConfig::TRowDispatcherCoordinatorConfig& config,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+{
+ return std::unique_ptr<NActors::IActor>(new TLeaderElection(rowDispatcherId, coordinatorId, config, credentialsProviderFactory, yqSharedResources, tenant, counters));
+}
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/leader_election.h b/ydb/core/fq/libs/row_dispatcher/leader_election.h
new file mode 100644
index 00000000000..536eecbe65a
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/leader_election.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <ydb/core/fq/libs/config/protos/row_dispatcher.pb.h>
+
+#include <ydb/library/actors/core/actor.h>
+#include <ydb/core/fq/libs/shared_resources/shared_resources.h>
+
+namespace NFq {
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<NActors::IActor> NewLeaderElection(
+ NActors::TActorId rowDispatcherId,
+ NActors::TActorId coordinatorId,
+ const NConfig::TRowDispatcherCoordinatorConfig& config,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/protos/events.proto b/ydb/core/fq/libs/row_dispatcher/protos/events.proto
new file mode 100644
index 00000000000..e832c26c275
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/protos/events.proto
@@ -0,0 +1,78 @@
+syntax = "proto3";
+
+package NFq.NRowDispatcherProto;
+option cc_enable_arenas = true;
+
+import "ydb/library/actors/protos/actors.proto";
+import "ydb/library/yql/providers/pq/proto/dq_io.proto";
+import "ydb/library/yql/dq/actors/protos/dq_events.proto";
+
+message TEvGetAddressRequest {
+ NYql.NPq.NProto.TDqPqTopicSource Source = 1;
+ repeated uint32 PartitionId = 2;
+}
+
+message TEvPartitionAddress {
+ repeated uint32 PartitionId = 1;
+ NActorsProto.TActorId ActorId = 2;
+}
+
+message TEvGetAddressResponse {
+ repeated TEvPartitionAddress Partitions = 1;
+}
+
+message TEvStartSession {
+ NYql.NPq.NProto.TDqPqTopicSource Source = 1;
+ uint32 PartitionId = 2;
+ string Token = 3;
+ optional uint64 Offset = 4;
+ uint64 StartingMessageTimestampMs = 5;
+ string QueryId = 6;
+ optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100;
+}
+
+message TEvStartSessionAck {
+ TEvStartSession Consumer = 1;
+ optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100;
+}
+
+message TEvGetNextBatch {
+ uint32 PartitionId = 1;
+ optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100;
+}
+
+message TEvNewDataArrived {
+ uint32 PartitionId = 1;
+ optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100;
+}
+
+message TEvStopSession {
+ NYql.NPq.NProto.TDqPqTopicSource Source = 1;
+ uint32 PartitionId = 2;
+ optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100;
+}
+
+message TEvMessage {
+ string Json = 1;
+ uint64 Offset = 2;
+}
+
+message TEvMessageBatch {
+ repeated TEvMessage Messages = 1;
+ uint32 PartitionId = 2;
+ uint64 NextMessageOffset = 3;
+ optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100;
+}
+
+message TEvStatus {
+ uint32 PartitionId = 1;
+ uint64 NextMessageOffset = 2;
+ optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100;
+}
+
+message TEvSessionError {
+ string Message = 1;
+ uint32 PartitionId = 2;
+ optional NYql.NDqProto.TMessageTransportMeta TransportMeta = 100;
+}
+
diff --git a/ydb/core/fq/libs/row_dispatcher/protos/ya.make b/ydb/core/fq/libs/row_dispatcher/protos/ya.make
new file mode 100644
index 00000000000..c2d06e23266
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/protos/ya.make
@@ -0,0 +1,15 @@
+PROTO_LIBRARY()
+
+SRCS(
+ events.proto
+)
+
+PEERDIR(
+ ydb/library/actors/protos
+ ydb/library/yql/dq/actors/protos
+ ydb/library/yql/providers/pq/proto
+)
+
+EXCLUDE_TAGS(GO_PROTO)
+
+END()
diff --git a/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp b/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp
new file mode 100644
index 00000000000..3d327385cf0
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/row_dispatcher.cpp
@@ -0,0 +1,608 @@
+#include "row_dispatcher.h"
+#include "coordinator.h"
+
+#include <ydb/library/actors/core/actorid.h>
+#include <ydb/library/actors/core/actor_bootstrapped.h>
+#include <ydb/library/actors/core/hfunc.h>
+#include <ydb/library/actors/core/interconnect.h>
+#include <ydb/library/yql/dq/actors/common/retry_queue.h>
+
+#include <ydb/core/fq/libs/actors/logging/log.h>
+#include <ydb/core/fq/libs/events/events.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/actors_factory.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+#include <ydb/core/fq/libs/row_dispatcher/leader_election.h>
+#include <ydb/core/fq/libs/row_dispatcher/protos/events.pb.h>
+
+#include <util/generic/queue.h>
+
+
+namespace NFq {
+
+using namespace NActors;
+
+namespace {
+
+const ui64 CoordinatorPingPeriodSec = 2;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TRowDispatcherMetrics {
+ explicit TRowDispatcherMetrics(const ::NMonitoring::TDynamicCounterPtr& counters)
+ : Counters(counters) {
+ ErrorsCount = Counters->GetCounter("ErrorsCount");
+ ClientsCount = Counters->GetCounter("ClientsCount");
+ RowsSent = Counters->GetCounter("RowsSent", true);
+ }
+
+ ::NMonitoring::TDynamicCounterPtr Counters;
+ ::NMonitoring::TDynamicCounters::TCounterPtr ErrorsCount;
+ ::NMonitoring::TDynamicCounters::TCounterPtr ClientsCount;
+ ::NMonitoring::TDynamicCounters::TCounterPtr RowsSent;
+};
+
+
+struct TEvPrivate {
+ // Event ids
+ enum EEv : ui32 {
+ EvBegin = EventSpaceBegin(NActors::TEvents::ES_PRIVATE),
+ EvCoordinatorPing = EvBegin + 20,
+ EvPrintState,
+ EvEnd
+ };
+
+ static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)");
+ struct TEvCoordinatorPing : NActors::TEventLocal<TEvCoordinatorPing, EvCoordinatorPing> {};
+ struct TEvPrintState : public NActors::TEventLocal<TEvPrintState, EvPrintState> {};
+};
+
+ui64 PrintStatePeriodSec = 60;
+
+class TRowDispatcher : public TActorBootstrapped<TRowDispatcher> {
+
+ struct ConsumerSessionKey {
+ TActorId ReadActorId;
+ ui32 PartitionId;
+
+ size_t Hash() const noexcept {
+ ui64 hash = std::hash<TActorId>()(ReadActorId);
+ hash = CombineHashes<ui64>(hash, std::hash<ui32>()(PartitionId));
+ return hash;
+ }
+ bool operator==(const ConsumerSessionKey& other) const {
+ return ReadActorId == other.ReadActorId && PartitionId == other.PartitionId;
+ }
+ };
+
+ struct ConsumerSessionKeyHash {
+ int operator()(const ConsumerSessionKey& k) const {
+ return k.Hash();
+ }
+ };
+
+ struct TopicSessionKey {
+ TString Endpoint;
+ TString Database;
+ TString TopicName;
+ ui64 PartitionId;
+
+ size_t Hash() const noexcept {
+ ui64 hash = std::hash<TString>()(Endpoint);
+ hash = CombineHashes<ui64>(hash, std::hash<TString>()(Database));
+ hash = CombineHashes<ui64>(hash, std::hash<TString>()(TopicName));
+ hash = CombineHashes<ui64>(hash, std::hash<ui64>()(PartitionId));
+ return hash;
+ }
+ bool operator==(const TopicSessionKey& other) const {
+ return Endpoint == other.Endpoint && Database == other.Database
+ && TopicName == other.TopicName && PartitionId == other.PartitionId;
+ }
+ };
+
+ struct TopicSessionKeyHash {
+ int operator()(const TopicSessionKey& k) const {
+ return k.Hash();
+ }
+ };
+
+
+ NConfig::TRowDispatcherConfig Config;
+ NConfig::TCommonConfig CommonConfig;
+ NKikimr::TYdbCredentialsProviderFactory CredentialsProviderFactory;
+ TYqSharedResources::TPtr YqSharedResources;
+ TMaybe<TActorId> CoordinatorActorId;
+ TSet<TActorId> CoordinatorChangedSubscribers;
+ NYql::ISecuredServiceAccountCredentialsFactory::TPtr CredentialsFactory;
+ const TString LogPrefix;
+ ui64 NextEventQueueId = 0;
+ TString Tenant;
+ NFq::NRowDispatcher::IActorFactory::TPtr ActorFactory;
+ const ::NMonitoring::TDynamicCounterPtr Counters;
+ TRowDispatcherMetrics Metrics;
+
+ struct ConsumerCounters {
+ ui64 NewDataArrived = 0;
+ ui64 GetNextBatch = 0;
+ ui64 MessageBatch = 0;
+ };
+
+ struct ConsumerInfo {
+ ConsumerInfo(
+ NActors::TActorId readActorId,
+ NActors::TActorId selfId,
+ ui64 eventQueueId,
+ NFq::NRowDispatcherProto::TEvStartSession& proto,
+ TActorId topicSessionId)
+ : ReadActorId(readActorId)
+ , SourceParams(proto.GetSource())
+ , PartitionId(proto.GetPartitionId())
+ , EventQueueId(eventQueueId)
+ , Proto(proto)
+ , TopicSessionId(topicSessionId)
+ , QueryId(proto.GetQueryId()) {
+ EventsQueue.Init("txId", selfId, selfId, eventQueueId, /* KeepAlive */ true);
+ EventsQueue.OnNewRecipientId(readActorId);
+ }
+
+ NActors::TActorId ReadActorId;
+ NYql::NPq::NProto::TDqPqTopicSource SourceParams;
+ ui64 PartitionId;
+ NYql::NDq::TRetryEventsQueue EventsQueue;
+ ui64 EventQueueId;
+ NFq::NRowDispatcherProto::TEvStartSession Proto;
+ TActorId TopicSessionId;
+ const TString QueryId;
+ ConsumerCounters Counters;
+ };
+
+ struct SessionInfo {
+ TMap<TActorId, TAtomicSharedPtr<ConsumerInfo>> Consumers; // key - ReadActor actor id
+ };
+
+ struct TopicSessionInfo {
+ TMap<TActorId, SessionInfo> Sessions; // key - TopicSession actor id
+ };
+
+ THashMap<ConsumerSessionKey, TAtomicSharedPtr<ConsumerInfo>, ConsumerSessionKeyHash> Consumers;
+ TMap<ui64, TAtomicSharedPtr<ConsumerInfo>> ConsumersByEventQueueId;
+ THashMap<TopicSessionKey, TopicSessionInfo, TopicSessionKeyHash> TopicSessions;
+
+public:
+ explicit TRowDispatcher(
+ const NConfig::TRowDispatcherConfig& config,
+ const NConfig::TCommonConfig& commonConfig,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ NYql::ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory,
+ const TString& tenant,
+ const NFq::NRowDispatcher::IActorFactory::TPtr& actorFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+ void Bootstrap();
+
+ static constexpr char ActorName[] = "FQ_ROW_DISPATCHER";
+
+ void Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev);
+ void HandleDisconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev);
+ void HandleConnected(TEvInterconnect::TEvNodeConnected::TPtr& ev);
+
+ void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) ;
+ void Handle(TEvPrivate::TEvCoordinatorPing::TPtr& ev);
+ void Handle(NActors::TEvents::TEvPong::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvStopSession::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvGetNextBatch::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvNewDataArrived::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvMessageBatch::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvSessionError::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvStatus::TPtr& ev);
+
+ void Handle(NActors::TEvents::TEvPing::TPtr& ev);
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr&);
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvPing::TPtr&);
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed::TPtr&);
+ void Handle(NFq::TEvPrivate::TEvPrintState::TPtr&);
+
+ void DeleteConsumer(const ConsumerSessionKey& key);
+ void PrintInternalState();
+
+ STRICT_STFUNC(
+ StateFunc, {
+ hFunc(NFq::TEvRowDispatcher::TEvCoordinatorChanged, Handle);
+ hFunc(TEvInterconnect::TEvNodeConnected, HandleConnected);
+ hFunc(TEvInterconnect::TEvNodeDisconnected, HandleDisconnected);
+ hFunc(NActors::TEvents::TEvUndelivered, Handle);
+ hFunc(TEvPrivate::TEvCoordinatorPing, Handle)
+ hFunc(NActors::TEvents::TEvPong, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvGetNextBatch, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvMessageBatch, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvStartSession, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvStopSession, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvSessionError, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvStatus, Handle);
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvRetry, Handle);
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvPing, Handle);
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed, Handle);
+ hFunc(NActors::TEvents::TEvPing, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvNewDataArrived, Handle);
+ hFunc(NFq::TEvPrivate::TEvPrintState, Handle);
+ })
+};
+
+TRowDispatcher::TRowDispatcher(
+ const NConfig::TRowDispatcherConfig& config,
+ const NConfig::TCommonConfig& commonConfig,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ NYql::ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory,
+ const TString& tenant,
+ const NFq::NRowDispatcher::IActorFactory::TPtr& actorFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+ : Config(config)
+ , CommonConfig(commonConfig)
+ , CredentialsProviderFactory(credentialsProviderFactory)
+ , YqSharedResources(yqSharedResources)
+ , CredentialsFactory(credentialsFactory)
+ , LogPrefix("RowDispatcher: ")
+ , Tenant(tenant)
+ , ActorFactory(actorFactory)
+ , Counters(counters)
+ , Metrics(counters) {
+}
+
+void TRowDispatcher::Bootstrap() {
+ Become(&TRowDispatcher::StateFunc);
+ LOG_ROW_DISPATCHER_DEBUG("Successfully bootstrapped row dispatcher, id " << SelfId() << ", tenant " << Tenant);
+
+ const auto& config = Config.GetCoordinator();
+ auto coordinatorId = Register(NewCoordinator(SelfId(), config, YqSharedResources, Tenant, Counters).release());
+ Register(NewLeaderElection(SelfId(), coordinatorId, config, CredentialsProviderFactory, YqSharedResources, Tenant, Counters).release());
+ Schedule(TDuration::Seconds(CoordinatorPingPeriodSec), new TEvPrivate::TEvCoordinatorPing());
+ Schedule(TDuration::Seconds(PrintStatePeriodSec), new NFq::TEvPrivate::TEvPrintState());
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("Coordinator changed, old leader " << CoordinatorActorId << ", new " << ev->Get()->CoordinatorActorId);
+
+ CoordinatorActorId = ev->Get()->CoordinatorActorId;
+ Send(*CoordinatorActorId, new NActors::TEvents::TEvPing(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession);
+ for (auto actorId : CoordinatorChangedSubscribers) {
+ Send(
+ actorId,
+ new NFq::TEvRowDispatcher::TEvCoordinatorChanged(ev->Get()->CoordinatorActorId),
+ IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession);
+ }
+}
+
+void TRowDispatcher::HandleConnected(TEvInterconnect::TEvNodeConnected::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("EvNodeConnected, node id " << ev->Get()->NodeId);
+ for (auto& [actorId, consumer] : Consumers) {
+ consumer->EventsQueue.HandleNodeConnected(ev->Get()->NodeId);
+ }
+}
+
+void TRowDispatcher::HandleDisconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvNodeDisconnected, node id " << ev->Get()->NodeId);
+ for (auto& [actorId, consumer] : Consumers) {
+ consumer->EventsQueue.HandleNodeDisconnected(ev->Get()->NodeId);
+ }
+}
+
+void TRowDispatcher::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvUndelivered, ev: " << ev->Get()->ToString() << ", reason " << ev->Get()->Reason);
+ for (auto& [actorId, consumer] : Consumers) {
+ consumer->EventsQueue.HandleUndelivered(ev);
+ }
+}
+
+void TRowDispatcher::Handle(TEvPrivate::TEvCoordinatorPing::TPtr&) {
+ Schedule(TDuration::Seconds(CoordinatorPingPeriodSec), new TEvPrivate::TEvCoordinatorPing());
+ if (!CoordinatorActorId) {
+ return;
+ }
+ LOG_ROW_DISPATCHER_DEBUG("Send ping to " << *CoordinatorActorId);
+ Send(*CoordinatorActorId, new NActors::TEvents::TEvPing(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession);
+}
+
+void TRowDispatcher::Handle(NActors::TEvents::TEvPong::TPtr&) {
+ LOG_ROW_DISPATCHER_TRACE("NActors::TEvents::TEvPong ");
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvCoordinatorChangesSubscribe from " << ev->Sender);
+ CoordinatorChangedSubscribers.insert(ev->Sender);
+ if (!CoordinatorActorId) {
+ return;
+ }
+ Send(ev->Sender, new NFq::TEvRowDispatcher::TEvCoordinatorChanged(*CoordinatorActorId), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession);
+}
+
+void TRowDispatcher::PrintInternalState() {
+ if (Consumers.empty()) {
+ return;
+ }
+ TStringStream str;
+ str << "Consumers:\n";
+ for (auto& [key, consumerInfo] : Consumers) {
+ str << " query id " << consumerInfo->QueryId << ", partId: " << key.PartitionId << ", read actor id: " << key.ReadActorId
+ << ", queueId " << consumerInfo->EventQueueId << ", get next " << consumerInfo->Counters.GetNextBatch
+ << ", data arrived " << consumerInfo->Counters.NewDataArrived << ", message batch " << consumerInfo->Counters.MessageBatch << "\n";
+ str << " ";
+ consumerInfo->EventsQueue.PrintInternalState(str);
+ }
+
+ str << "\nSessions:\n";
+ for (auto& [key, sessionInfo1] : TopicSessions) {
+ str << " " << key.Endpoint << " / " << key.Database << " / " << key.TopicName << ", id: " << key.PartitionId << "\n";
+ for (auto& [actorId, sessionInfo2] : sessionInfo1.Sessions) {
+ str << " session id: " << actorId << "\n";
+ for (auto& [actorId2, consumer] : sessionInfo2.Consumers) {
+ str << " read actor id: " << actorId2 << "\n";
+ }
+ }
+ }
+ LOG_ROW_DISPATCHER_DEBUG(str.Str());
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvStartSession from " << ev->Sender << ", topicPath " << ev->Get()->Record.GetSource().GetTopicPath() <<
+ " partitionId " << ev->Get()->Record.GetPartitionId());
+
+ TMaybe<ui64> readOffset;
+ if (ev->Get()->Record.HasOffset()) {
+ readOffset = ev->Get()->Record.GetOffset();
+ }
+
+ ConsumerSessionKey key{ev->Sender, ev->Get()->Record.GetPartitionId()};
+ auto it = Consumers.find(key);
+ if (it != Consumers.end()) {
+ LOG_ROW_DISPATCHER_ERROR("Сonsumer already exists, ignore StartSession");
+ return;
+ }
+ const auto& source = ev->Get()->Record.GetSource();
+
+ TActorId sessionActorId;
+ TopicSessionKey topicKey{source.GetEndpoint(), source.GetDatabase(), source.GetTopicPath(), ev->Get()->Record.GetPartitionId()};
+ TopicSessionInfo& topicSessionInfo = TopicSessions[topicKey];
+ LOG_ROW_DISPATCHER_DEBUG("Topic session count " << topicSessionInfo.Sessions.size());
+ Y_ENSURE(topicSessionInfo.Sessions.size() <= 1);
+
+ auto consumerInfo = MakeAtomicShared<ConsumerInfo>(ev->Sender, SelfId(), NextEventQueueId++, ev->Get()->Record, TActorId());
+ Consumers[key] = consumerInfo;
+ ConsumersByEventQueueId[consumerInfo->EventQueueId] = consumerInfo;
+ if (!consumerInfo->EventsQueue.OnEventReceived(ev)) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ const ui64 seqNo = meta.GetSeqNo();
+ LOG_ROW_DISPATCHER_ERROR("TEvStartSession: wrong seq num from " << ev->Sender.ToString() << ", seqNo " << seqNo << ", ignore message");
+ }
+
+ if (topicSessionInfo.Sessions.empty()) {
+ LOG_ROW_DISPATCHER_DEBUG("Create new session " << readOffset);
+ sessionActorId = ActorFactory->RegisterTopicSession(
+ source.GetTopicPath(),
+ Config,
+ SelfId(),
+ ev->Get()->Record.GetPartitionId(),
+ YqSharedResources->UserSpaceYdbDriver,
+ CreateCredentialsProviderFactoryForStructuredToken(
+ CredentialsFactory,
+ ev->Get()->Record.GetToken(),
+ source.GetAddBearerToToken()),
+ Counters);
+ SessionInfo& sessionInfo = topicSessionInfo.Sessions[sessionActorId];
+ sessionInfo.Consumers[ev->Sender] = consumerInfo;
+ } else {
+ auto sessionIt = topicSessionInfo.Sessions.begin();
+ SessionInfo& sessionInfo = sessionIt->second;
+ sessionInfo.Consumers[ev->Sender] = consumerInfo;
+ sessionActorId = sessionIt->first;
+ }
+ consumerInfo->TopicSessionId = sessionActorId;
+ consumerInfo->EventsQueue.Send(new NFq::TEvRowDispatcher::TEvStartSessionAck(consumerInfo->Proto));
+
+ Forward(ev, sessionActorId);
+ Metrics.ClientsCount->Set(Consumers.size());
+ PrintInternalState();
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvGetNextBatch::TPtr& ev) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ LOG_ROW_DISPATCHER_TRACE("TEvGetNextBatch from " << ev->Sender << ", partId " << ev->Get()->Record.GetPartitionId() << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo());
+
+ ConsumerSessionKey key{ev->Sender, ev->Get()->Record.GetPartitionId()};
+ auto it = Consumers.find(key);
+ if (it == Consumers.end()) {
+ LOG_ROW_DISPATCHER_WARN("Ignore TEvGetNextBatch, no such session");
+ return;
+ }
+ if (!it->second->EventsQueue.OnEventReceived(ev)) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ const ui64 seqNo = meta.GetSeqNo();
+ LOG_ROW_DISPATCHER_ERROR("TEvGetNextBatch: wrong seq num from " << ev->Sender.ToString() << ", seqNo " << seqNo << ", ignore message");
+ return;
+ }
+ it->second->Counters.GetNextBatch++;
+ Forward(ev, it->second->TopicSessionId);
+}
+
+void TRowDispatcher::Handle(NActors::TEvents::TEvPing::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvPing from " << ev->Sender);
+ Send(ev->Sender, new NActors::TEvents::TEvPong());
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvStopSession::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvStopSession, topicPath " << ev->Get()->Record.GetSource().GetTopicPath() <<
+ " partitionId " << ev->Get()->Record.GetPartitionId());
+
+ ConsumerSessionKey key{ev->Sender, ev->Get()->Record.GetPartitionId()};
+ auto it = Consumers.find(key);
+ if (it == Consumers.end()) {
+ LOG_ROW_DISPATCHER_WARN("Wrong consumer, sender " << ev->Sender << ", part id " << ev->Get()->Record.GetPartitionId());
+ return;
+ }
+ if (!it->second->EventsQueue.OnEventReceived(ev)) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ const ui64 seqNo = meta.GetSeqNo();
+
+ LOG_ROW_DISPATCHER_ERROR("TEvStopSession: wrong seq num from " << ev->Sender.ToString() << ", seqNo " << seqNo << ", ignore message");
+ return;
+ }
+ DeleteConsumer(key);
+}
+
+void TRowDispatcher::DeleteConsumer(const ConsumerSessionKey& key) {
+ LOG_ROW_DISPATCHER_DEBUG("DeleteConsumer, readActorId " << key.ReadActorId <<
+ " partitionId " << key.PartitionId);
+
+ auto consumerIt = Consumers.find(key);
+ if (consumerIt == Consumers.end()) {
+ LOG_ROW_DISPATCHER_WARN("Ignore DeleteConsumer, no such session");
+ return;
+ }
+ const auto& consumer = consumerIt->second;
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvStopSession>();
+ *event->Record.MutableSource() = consumer->SourceParams;
+ event->Record.SetPartitionId(consumer->PartitionId);
+ Send(new IEventHandle(consumerIt->second->TopicSessionId, consumer->ReadActorId, event.release(), 0));
+
+ TopicSessionKey topicKey{
+ consumer->SourceParams.GetEndpoint(),
+ consumer->SourceParams.GetDatabase(),
+ consumer->SourceParams.GetTopicPath(),
+ consumer->PartitionId};
+ TopicSessionInfo& topicSessionInfo = TopicSessions[topicKey];
+ SessionInfo& sessionInfo = topicSessionInfo.Sessions[consumerIt->second->TopicSessionId];
+ Y_ENSURE(sessionInfo.Consumers.count(consumer->ReadActorId));
+ sessionInfo.Consumers.erase(consumer->ReadActorId);
+ if (sessionInfo.Consumers.empty()) {
+ LOG_ROW_DISPATCHER_DEBUG("Session is not used, sent TEvPoisonPill");
+ topicSessionInfo.Sessions.erase(consumerIt->second->TopicSessionId);
+ Send(consumerIt->second->TopicSessionId, new NActors::TEvents::TEvPoisonPill());
+ if (topicSessionInfo.Sessions.empty()) {
+ TopicSessions.erase(topicKey);
+ }
+ }
+ ConsumersByEventQueueId.erase(consumerIt->second->EventQueueId);
+ Consumers.erase(consumerIt);
+ Metrics.ClientsCount->Set(Consumers.size());
+ PrintInternalState();
+}
+
+void TRowDispatcher::Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed::TPtr& ev) {
+ LOG_ROW_DISPATCHER_WARN("Session closed, event queue id " << ev->Get()->EventQueueId);
+ for (auto& [consumerKey, consumer] : Consumers) {
+ if (consumer->EventQueueId != ev->Get()->EventQueueId) {
+ continue;
+ }
+ DeleteConsumer(consumerKey);
+ break;
+ }
+}
+
+void TRowDispatcher::Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvRetry " << ev->Get()->EventQueueId);
+ auto it = ConsumersByEventQueueId.find(ev->Get()->EventQueueId);
+ if (it == ConsumersByEventQueueId.end()) {
+ LOG_ROW_DISPATCHER_WARN("No consumer with EventQueueId = " << ev->Get()->EventQueueId);
+ return;
+ }
+ it->second->EventsQueue.Retry();
+}
+
+void TRowDispatcher::Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvPing::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvRetryQueuePrivate::TEvPing " << ev->Get()->EventQueueId);
+ auto it = ConsumersByEventQueueId.find(ev->Get()->EventQueueId);
+ if (it == ConsumersByEventQueueId.end()) {
+ LOG_ROW_DISPATCHER_WARN("No consumer with EventQueueId = " << ev->Get()->EventQueueId);
+ return;
+ }
+ it->second->EventsQueue.Ping();
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvNewDataArrived::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvNewDataArrived from " << ev->Sender);
+ ConsumerSessionKey key{ev->Get()->ReadActorId, ev->Get()->Record.GetPartitionId()};
+ auto it = Consumers.find(key);
+ if (it == Consumers.end()) {
+ LOG_ROW_DISPATCHER_WARN("Ignore TEvNewDataArrived, no such session");
+ return;
+ }
+ LOG_ROW_DISPATCHER_TRACE("Forward TEvNewDataArrived to " << ev->Get()->ReadActorId);
+ it->second->Counters.NewDataArrived++;
+ it->second->EventsQueue.Send(ev.Release()->Release().Release());
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvMessageBatch::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvMessageBatch from " << ev->Sender);
+ ConsumerSessionKey key{ev->Get()->ReadActorId, ev->Get()->Record.GetPartitionId()};
+ auto it = Consumers.find(key);
+ if (it == Consumers.end()) {
+ LOG_ROW_DISPATCHER_WARN("Ignore MessageBatch, no such session");
+ return;
+ }
+ Metrics.RowsSent->Add(ev->Get()->Record.MessagesSize());
+ LOG_ROW_DISPATCHER_TRACE("Forward TEvMessageBatch to " << ev->Get()->ReadActorId);
+ it->second->Counters.MessageBatch++;
+ it->second->EventsQueue.Send(ev.Release()->Release().Release());
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvSessionError::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvSessionError from " << ev->Sender);
+ ConsumerSessionKey key{ev->Get()->ReadActorId, ev->Get()->Record.GetPartitionId()};
+ auto it = Consumers.find(key);
+ if (it == Consumers.end()) {
+ LOG_ROW_DISPATCHER_WARN("Ignore MessageBatch, no such session");
+ return;
+ }
+ Metrics.ErrorsCount->Inc();
+ LOG_ROW_DISPATCHER_TRACE("Forward TEvSessionError to " << ev->Get()->ReadActorId);
+ it->second->EventsQueue.Send(ev.Release()->Release().Release());
+ DeleteConsumer(key);
+}
+
+void TRowDispatcher::Handle(NFq::TEvRowDispatcher::TEvStatus::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvStatus from " << ev->Sender);
+ ConsumerSessionKey key{ev->Get()->ReadActorId, ev->Get()->Record.GetPartitionId()};
+ auto it = Consumers.find(key);
+ if (it == Consumers.end()) {
+ LOG_ROW_DISPATCHER_WARN("Ignore TEvStatus, no such session");
+ return;
+ }
+ LOG_ROW_DISPATCHER_TRACE("Forward TEvStatus to " << ev->Get()->ReadActorId);
+ it->second->EventsQueue.Send(ev.Release()->Release().Release());
+}
+
+void TRowDispatcher::Handle(NFq::TEvPrivate::TEvPrintState::TPtr&) {
+ Schedule(TDuration::Seconds(PrintStatePeriodSec), new NFq::TEvPrivate::TEvPrintState());
+ PrintInternalState();
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<NActors::IActor> NewRowDispatcher(
+ const NConfig::TRowDispatcherConfig& config,
+ const NConfig::TCommonConfig& commonConfig,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ NYql::ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory,
+ const TString& tenant,
+ const NFq::NRowDispatcher::IActorFactory::TPtr& actorFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+{
+ return std::unique_ptr<NActors::IActor>(new TRowDispatcher(
+ config,
+ commonConfig,
+ credentialsProviderFactory,
+ yqSharedResources,
+ credentialsFactory,
+ tenant,
+ actorFactory,
+ counters));
+}
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/row_dispatcher.h b/ydb/core/fq/libs/row_dispatcher/row_dispatcher.h
new file mode 100644
index 00000000000..54c3b1521af
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/row_dispatcher.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <ydb/core/fq/libs/config/protos/row_dispatcher.pb.h>
+#include <ydb/core/fq/libs/config/protos/common.pb.h>
+#include <ydb/core/fq/libs/shared_resources/shared_resources.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/actors_factory.h>
+
+#include <ydb/library/security/ydb_credentials_provider_factory.h>
+#include <ydb/library/yql/providers/common/token_accessor/client/factory.h>
+#include <ydb/library/actors/core/actor.h>
+
+#include <memory>
+
+namespace NFq {
+
+std::unique_ptr<NActors::IActor> NewRowDispatcher(
+ const NConfig::TRowDispatcherConfig& config,
+ const NConfig::TCommonConfig& commonConfig,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ NYql::ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory,
+ const TString& tenant,
+ const NFq::NRowDispatcher::IActorFactory::TPtr& actorFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.cpp b/ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.cpp
new file mode 100644
index 00000000000..1300f419d7d
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.cpp
@@ -0,0 +1,32 @@
+#include "row_dispatcher_service.h"
+#include "actors_factory.h"
+
+#include "row_dispatcher.h"
+
+namespace NFq {
+
+using namespace NActors;
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<NActors::IActor> NewRowDispatcherService(
+ const NConfig::TRowDispatcherConfig& config,
+ const NConfig::TCommonConfig& commonConfig,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ NYql::ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+{
+ return NewRowDispatcher(
+ config,
+ commonConfig,
+ credentialsProviderFactory,
+ yqSharedResources,
+ credentialsFactory,
+ tenant,
+ NFq::NRowDispatcher::CreateActorFactory(),
+ counters);
+}
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.h b/ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.h
new file mode 100644
index 00000000000..ef8a9f29099
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/row_dispatcher_service.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <ydb/core/fq/libs/config/protos/row_dispatcher.pb.h>
+#include <ydb/core/fq/libs/config/protos/common.pb.h>
+#include <ydb/core/fq/libs/shared_resources/shared_resources.h>
+
+#include <ydb/library/security/ydb_credentials_provider_factory.h>
+#include <ydb/library/yql/providers/common/token_accessor/client/factory.h>
+#include "events/data_plane.h"
+
+#include <ydb/library/actors/core/actor.h>
+
+#include <memory>
+
+namespace NFq {
+
+std::unique_ptr<NActors::IActor> NewRowDispatcherService(
+ const NConfig::TRowDispatcherConfig& config,
+ const NConfig::TCommonConfig& commonConfig,
+ const NKikimr::TYdbCredentialsProviderFactory& credentialsProviderFactory,
+ const TYqSharedResources::TPtr& yqSharedResources,
+ NYql::ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory,
+ const TString& tenant,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/topic_session.cpp b/ydb/core/fq/libs/row_dispatcher/topic_session.cpp
new file mode 100644
index 00000000000..2294f097a5f
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/topic_session.cpp
@@ -0,0 +1,776 @@
+#include "topic_session.h"
+
+#include <ydb/core/fq/libs/actors/logging/log.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+#include <ydb/library/yql/providers/pq/proto/dq_io.pb.h>
+#include <ydb/library/actors/core/actor_bootstrapped.h>
+#include <ydb/library/actors/core/hfunc.h>
+#include <ydb/library/yql/minikql/mkql_string_util.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+#include <ydb/library/yql/dq/runtime/dq_async_stats.h>
+#include <ydb/public/sdk/cpp/client/ydb_topic/topic.h>
+#include <util/generic/queue.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/json_parser.h>
+#include <ydb/core/fq/libs/row_dispatcher/json_filter.h>
+#include <ydb/library/yql/public/purecalc/purecalc.h>
+
+namespace NFq {
+
+using namespace NActors;
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TTopicSessionMetrics {
+ void Init(const ::NMonitoring::TDynamicCounterPtr& counters, NActors::TActorId selfId) {
+ SelfId = selfId;
+ SubGroup = counters->GetSubgroup("actor_id", SelfId.ToString());
+ InFlyAsyncInputData = SubGroup->GetCounter("InFlyAsyncInputData");
+ RowsRead = SubGroup->GetCounter("RowsRead", true);
+ InFlySubscribe = SubGroup->GetCounter("InFlySubscribe");
+ }
+
+ ~TTopicSessionMetrics() {
+ SubGroup->RemoveSubgroup("actor_id", SelfId.ToString());
+ }
+ NActors::TActorId SelfId;
+ ::NMonitoring::TDynamicCounterPtr SubGroup;
+ ::NMonitoring::TDynamicCounters::TCounterPtr InFlyAsyncInputData;
+ ::NMonitoring::TDynamicCounters::TCounterPtr RowsRead;
+ ::NMonitoring::TDynamicCounters::TCounterPtr InFlySubscribe;
+};
+
+struct TEvPrivate {
+ // Event ids
+ enum EEv : ui32 {
+ EvBegin = EventSpaceBegin(NActors::TEvents::ES_PRIVATE),
+ EvPqEventsReady = EvBegin + 10,
+ EvCreateSession,
+ EvStatus,
+ EvDataParsed,
+ EvDataAfterFilteration,
+ EvDataFiltered,
+ EvPrintState,
+ EvEnd
+ };
+ static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)");
+
+ // Events
+ struct TEvPqEventsReady : public NActors::TEventLocal<TEvPqEventsReady, EvPqEventsReady> {};
+ struct TEvCreateSession : public NActors::TEventLocal<TEvCreateSession, EvCreateSession> {};
+ struct TEvPrintState : public NActors::TEventLocal<TEvPrintState, EvPrintState> {};
+ struct TEvStatus : public NActors::TEventLocal<TEvStatus, EvStatus> {};
+ struct TEvDataParsed : public NActors::TEventLocal<TEvDataParsed, EvDataParsed> {
+ TEvDataParsed(ui64 offset, TList<TString>&& value)
+ : Offset(offset)
+ , Value(std::move(value))
+ {}
+ ui64 Offset = 0;
+ TList<TString> Value;
+ };
+
+ struct TEvDataFiltered : public NActors::TEventLocal<TEvDataFiltered, EvDataFiltered> {
+ TEvDataFiltered(ui64 offset)
+ : Offset(offset)
+ {}
+ ui64 Offset = 0;
+ };
+
+ struct TEvDataAfterFilteration : public NActors::TEventLocal<TEvDataAfterFilteration, EvDataAfterFilteration> {
+ TEvDataAfterFilteration(ui64 offset, const TString& json, TActorId readActorId)
+ : Offset(offset)
+ , Json(json)
+ , ReadActorId(readActorId) { }
+ ui64 Offset;
+ TString Json;
+ TActorId ReadActorId;
+ };
+};
+
+ui64 PrintStatePeriodSec = 60;
+ui64 MaxBatchSizeBytes = 10000000;
+
+TVector<TString> GetVector(const google::protobuf::RepeatedPtrField<TString>& value) {
+ return {value.begin(), value.end()};
+}
+
+class TTopicSession : public TActorBootstrapped<TTopicSession> {
+
+private:
+ using TParserInputType = std::pair< TVector<TString>, TVector<TString>>; // TODO: remove after YQ-3594
+
+ struct ClientsInfo {
+ ClientsInfo(const NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev)
+ : Settings(ev->Get()->Record)
+ , ReadActorId(ev->Sender)
+ {
+ if (Settings.HasOffset()) {
+ NextMessageOffset = Settings.GetOffset();
+ }
+ }
+ NFq::NRowDispatcherProto::TEvStartSession Settings;
+ NActors::TActorId ReadActorId;
+ std::unique_ptr<TJsonFilter> Filter; // empty if no predicate
+ TQueue<std::pair<ui64, TString>> Buffer;
+ ui64 UsedSize = 0;
+ bool DataArrivedSent = false;
+ TMaybe<ui64> NextMessageOffset;
+ ui64 LastSendedNextMessageOffset = 0;
+ };
+
+ struct TTopicEventProcessor {
+ void operator()(NYdb::NTopic::TReadSessionEvent::TDataReceivedEvent& event);
+ void operator()(NYdb::NTopic::TSessionClosedEvent& event);
+ void operator()(NYdb::NTopic::TReadSessionEvent::TStartPartitionSessionEvent& event);
+ void operator()(NYdb::NTopic::TReadSessionEvent::TStopPartitionSessionEvent& event);
+ void operator()(NYdb::NTopic::TReadSessionEvent::TEndPartitionSessionEvent& event);
+ void operator()(NYdb::NTopic::TReadSessionEvent::TPartitionSessionClosedEvent& event);
+ void operator()(NYdb::NTopic::TReadSessionEvent::TCommitOffsetAcknowledgementEvent&) {}
+ void operator()(NYdb::NTopic::TReadSessionEvent::TPartitionSessionStatusEvent&) { }
+
+ TTopicSession& Self;
+ const TString& LogPrefix;
+ };
+
+ const TString TopicPath;
+ NActors::TActorId RowDispatcherActorId;
+ ui32 PartitionId;
+ NYdb::TDriver Driver;
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> CredentialsProviderFactory;
+ std::unique_ptr<NYdb::NTopic::TTopicClient> TopicClient;
+ std::shared_ptr<NYdb::NTopic::IReadSession> ReadSession;
+ const i64 BufferSize;
+ TString LogPrefix;
+ NYql::NDq::TDqAsyncStats IngressStats;
+ ui64 LastMessageOffset = 0;
+ bool IsWaitingEvents = false;
+ THashMap<NActors::TActorId, ClientsInfo> Clients;
+ THashSet<NActors::TActorId> ClientsWithoutPredicate;
+ std::unique_ptr<TJsonParser> Parser;
+ NConfig::TRowDispatcherConfig Config;
+ ui64 UsedSize = 0;
+ TMaybe<TParserInputType> CurrentParserTypes;
+ const ::NMonitoring::TDynamicCounterPtr Counters;
+ TTopicSessionMetrics Metrics;
+
+public:
+ explicit TTopicSession(
+ const TString& topicPath,
+ const NConfig::TRowDispatcherConfig& config,
+ NActors::TActorId rowDispatcherActorId,
+ ui32 partitionId,
+ NYdb::TDriver driver,
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> credentialsProviderFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+ void Bootstrap();
+ void PassAway() override;
+
+ static constexpr char ActorName[] = "FQ_ROW_DISPATCHER_SESSION";
+
+private:
+ NYdb::NTopic::TTopicClientSettings GetTopicClientSettings(const NYql::NPq::NProto::TDqPqTopicSource& sourceParams) const;
+ NYdb::NTopic::TTopicClient& GetTopicClient(const NYql::NPq::NProto::TDqPqTopicSource& sourceParams);
+ NYdb::NTopic::TReadSessionSettings GetReadSessionSettings(const NYql::NPq::NProto::TDqPqTopicSource& sourceParams) const;
+ void CreateTopicSession();
+ void CloseTopicSession();
+ void SubscribeOnNextEvent();
+ void SendToParsing(ui64 offset, const TString& message);
+ void SendData(ClientsInfo& info);
+ void InitParser(const NYql::NPq::NProto::TDqPqTopicSource& sourceParams);
+ void FatalError(const TString& message, const std::unique_ptr<TJsonFilter>* filter = nullptr);
+ void SendDataArrived(ClientsInfo& client);
+ void StopReadSession();
+ TString GetSessionId() const;
+ void HandleNewEvents();
+ TInstant GetMinStartingMessageTimestamp() const;
+ void AddDataToClient(ClientsInfo& client, ui64 offset, const TString& json);
+
+ std::pair<NYql::NUdf::TUnboxedValuePod, i64> CreateItem(const NYdb::NTopic::TReadSessionEvent::TDataReceivedEvent::TMessage& message);
+
+ void Handle(NFq::TEvPrivate::TEvPqEventsReady::TPtr&);
+ void Handle(NFq::TEvPrivate::TEvCreateSession::TPtr&);
+ void Handle(NFq::TEvPrivate::TEvDataParsed::TPtr&);
+ void Handle(NFq::TEvPrivate::TEvDataAfterFilteration::TPtr&);
+ void Handle(NFq::TEvPrivate::TEvStatus::TPtr&);
+ void Handle(NFq::TEvPrivate::TEvDataFiltered::TPtr&);
+ void Handle(NFq::TEvPrivate::TEvPrintState::TPtr&);
+ void Handle(TEvRowDispatcher::TEvGetNextBatch::TPtr&);
+ void Handle(NFq::TEvRowDispatcher::TEvStopSession::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev);
+ void HandleException(const std::exception& err);
+
+ void PrintInternalState();
+ void SendSessionError(NActors::TActorId readActorId, const TString& message);
+
+private:
+
+ STRICT_STFUNC_EXC(StateFunc,
+ hFunc(NFq::TEvPrivate::TEvPqEventsReady, Handle);
+ hFunc(NFq::TEvPrivate::TEvCreateSession, Handle);
+ hFunc(NFq::TEvPrivate::TEvDataParsed, Handle);
+ hFunc(NFq::TEvPrivate::TEvDataAfterFilteration, Handle);
+ hFunc(NFq::TEvPrivate::TEvStatus, Handle);
+ hFunc(NFq::TEvPrivate::TEvDataFiltered, Handle);
+ hFunc(NFq::TEvPrivate::TEvPrintState, Handle);
+ hFunc(TEvRowDispatcher::TEvGetNextBatch, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvStartSession, Handle);
+ cFunc(NActors::TEvents::TEvPoisonPill::EventType, PassAway);
+ hFunc(NFq::TEvRowDispatcher::TEvStopSession, Handle);,
+ ExceptionFunc(std::exception, HandleException)
+ )
+
+ STRICT_STFUNC(ErrorState, {
+ cFunc(NActors::TEvents::TEvPoisonPill::EventType, PassAway);
+ IgnoreFunc(NFq::TEvPrivate::TEvPqEventsReady);
+ IgnoreFunc(NFq::TEvPrivate::TEvCreateSession);
+ IgnoreFunc(NFq::TEvPrivate::TEvDataAfterFilteration);
+ IgnoreFunc(NFq::TEvPrivate::TEvStatus);
+ IgnoreFunc(NFq::TEvPrivate::TEvDataFiltered);
+ IgnoreFunc(TEvRowDispatcher::TEvGetNextBatch);
+ IgnoreFunc(NFq::TEvRowDispatcher::TEvStartSession);
+ IgnoreFunc(NFq::TEvRowDispatcher::TEvStopSession);
+ IgnoreFunc(NFq::TEvPrivate::TEvPrintState);
+ })
+};
+
+TTopicSession::TTopicSession(
+ const TString& topicPath,
+ const NConfig::TRowDispatcherConfig& config,
+ NActors::TActorId rowDispatcherActorId,
+ ui32 partitionId,
+ NYdb::TDriver driver,
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> credentialsProviderFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+ : TopicPath(topicPath)
+ , RowDispatcherActorId(rowDispatcherActorId)
+ , PartitionId(partitionId)
+ , Driver(std::move(driver))
+ , CredentialsProviderFactory(credentialsProviderFactory)
+ , BufferSize(16_MB)
+ , LogPrefix("TopicSession")
+ , Config(config)
+ , Counters(counters)
+{
+}
+
+void TTopicSession::Bootstrap() {
+ Become(&TTopicSession::StateFunc);
+ Metrics.Init(Counters, SelfId());
+ LogPrefix = LogPrefix + " " + SelfId().ToString() + " ";
+ LOG_ROW_DISPATCHER_DEBUG("Bootstrap " << ", PartitionId " << PartitionId
+ << ", Timeout " << Config.GetTimeoutBeforeStartSessionSec() << " sec, StatusPeriod " << Config.GetSendStatusPeriodSec() << " sec");
+ Y_ENSURE(Config.GetSendStatusPeriodSec() > 0);
+ Schedule(TDuration::Seconds(Config.GetSendStatusPeriodSec()), new NFq::TEvPrivate::TEvStatus());
+ Schedule(TDuration::Seconds(PrintStatePeriodSec), new NFq::TEvPrivate::TEvPrintState());
+}
+
+void TTopicSession::PassAway() {
+ LOG_ROW_DISPATCHER_DEBUG("PassAway");
+ StopReadSession();
+ NActors::TActorBootstrapped<TTopicSession>::PassAway();
+}
+
+void TTopicSession::SubscribeOnNextEvent() {
+ if (!ReadSession || IsWaitingEvents) {
+ return;
+ }
+
+ if (Config.GetMaxSessionUsedMemory() && UsedSize > Config.GetMaxSessionUsedMemory()) {
+ LOG_ROW_DISPATCHER_TRACE("Too much used memory (" << UsedSize << " bytes), skip subscribing to WaitEvent()");
+ return;
+ }
+
+ LOG_ROW_DISPATCHER_TRACE("SubscribeOnNextEvent");
+ IsWaitingEvents = true;
+ Metrics.InFlySubscribe->Inc();
+ NActors::TActorSystem* actorSystem = NActors::TActivationContext::ActorSystem();
+ ReadSession->WaitEvent().Subscribe([actorSystem, selfId = SelfId()](const auto&){
+ actorSystem->Send(selfId, new NFq::TEvPrivate::TEvPqEventsReady());
+ });
+}
+
+NYdb::NTopic::TTopicClientSettings TTopicSession::GetTopicClientSettings(const NYql::NPq::NProto::TDqPqTopicSource& sourceParams) const {
+ NYdb::NTopic::TTopicClientSettings opts;
+ opts.Database(sourceParams.GetDatabase())
+ .DiscoveryEndpoint(sourceParams.GetEndpoint())
+ .SslCredentials(NYdb::TSslCredentials(sourceParams.GetUseSsl()))
+ .CredentialsProviderFactory(CredentialsProviderFactory);
+ return opts;
+}
+
+NYdb::NTopic::TTopicClient& TTopicSession::GetTopicClient(const NYql::NPq::NProto::TDqPqTopicSource& sourceParams) {
+ if (!TopicClient) {
+ TopicClient = std::make_unique<NYdb::NTopic::TTopicClient>(Driver, GetTopicClientSettings(sourceParams));
+ }
+ return *TopicClient;
+}
+
+TInstant TTopicSession::GetMinStartingMessageTimestamp() const {
+ auto result = TInstant::Max();
+ Y_ENSURE(!Clients.empty());
+ for (const auto& [actorId, info] : Clients) {
+ ui64 time = info.Settings.GetStartingMessageTimestampMs();
+ result = std::min(result, TInstant::MilliSeconds(time));
+ }
+ return result;
+}
+
+NYdb::NTopic::TReadSessionSettings TTopicSession::GetReadSessionSettings(const NYql::NPq::NProto::TDqPqTopicSource& sourceParams) const {
+ NYdb::NTopic::TTopicReadSettings topicReadSettings;
+ topicReadSettings.Path(TopicPath);
+ topicReadSettings.AppendPartitionIds(PartitionId);
+
+ TInstant minTime = GetMinStartingMessageTimestamp();
+ LOG_ROW_DISPATCHER_INFO("Create topic session, Path " << TopicPath
+ << ", StartingMessageTimestamp " << minTime
+ << ", BufferSize " << BufferSize << ", WithoutConsumer " << Config.GetWithoutConsumer());
+
+ auto settings = NYdb::NTopic::TReadSessionSettings()
+ .AppendTopics(topicReadSettings)
+ .MaxMemoryUsageBytes(BufferSize)
+ .ReadFromTimestamp(minTime);
+ if (Config.GetWithoutConsumer()) {
+ settings.WithoutConsumer();
+ } else {
+ settings.ConsumerName(sourceParams.GetConsumerName());
+ }
+ return settings;
+}
+
+void TTopicSession::CreateTopicSession() {
+ if (Clients.empty()) {
+ return;
+ }
+
+ // Use any sourceParams.
+ const NYql::NPq::NProto::TDqPqTopicSource& sourceParams = Clients.begin()->second.Settings.GetSource();
+
+ if (!ReadSession) {
+ InitParser(sourceParams);
+ ReadSession = GetTopicClient(sourceParams).CreateReadSession(GetReadSessionSettings(sourceParams));
+ SubscribeOnNextEvent();
+ }
+}
+
+void TTopicSession::Handle(NFq::TEvPrivate::TEvPqEventsReady::TPtr&) {
+ LOG_ROW_DISPATCHER_TRACE("TEvPqEventsReady");
+ Metrics.InFlySubscribe->Dec();
+ IsWaitingEvents = false;
+ HandleNewEvents();
+ SubscribeOnNextEvent();
+}
+
+void TTopicSession::Handle(NFq::TEvPrivate::TEvCreateSession::TPtr&) {
+ CreateTopicSession();
+}
+
+void TTopicSession::Handle(NFq::TEvPrivate::TEvDataParsed::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvDataParsed, offset " << ev->Get()->Offset);
+
+ for (auto v: ev->Get()->Value) {
+ LOG_ROW_DISPATCHER_TRACE("v " << v);
+ }
+
+ for (auto& [actorId, info] : Clients) {
+ try {
+ if (!info.Filter) {
+ continue;
+ }
+ info.Filter->Push(ev->Get()->Offset, ev->Get()->Value);
+ } catch (const std::exception& e) {
+ FatalError(e.what(), &info.Filter);
+ }
+ }
+ auto event = std::make_unique<TEvPrivate::TEvDataFiltered>(ev->Get()->Offset);
+ Send(SelfId(), event.release());
+}
+
+void TTopicSession::Handle(NFq::TEvPrivate::TEvDataAfterFilteration::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvDataAfterFilteration, read actor id " << ev->Get()->ReadActorId.ToString());
+ auto it = Clients.find(ev->Get()->ReadActorId);
+ if (it == Clients.end()) {
+ LOG_ROW_DISPATCHER_ERROR("Skip DataAfterFilteration, wrong read actor, id " << ev->Get()->ReadActorId.ToString());
+ return;
+ }
+ AddDataToClient(it->second, ev->Get()->Offset, ev->Get()->Json);
+}
+
+void TTopicSession::Handle(NFq::TEvPrivate::TEvStatus::TPtr&) {
+ LOG_ROW_DISPATCHER_TRACE("TEvStatus");
+ Schedule(TDuration::Seconds(Config.GetSendStatusPeriodSec()), new NFq::TEvPrivate::TEvStatus());
+ for (auto& [actorId, info] : Clients) {
+ if (!info.NextMessageOffset) {
+ continue;
+ }
+ if (*info.NextMessageOffset <= info.LastSendedNextMessageOffset) {
+ continue;
+ }
+ auto event = std::make_unique<TEvRowDispatcher::TEvStatus>();
+ event->Record.SetPartitionId(PartitionId);
+ event->Record.SetNextMessageOffset(*info.NextMessageOffset);
+ info.LastSendedNextMessageOffset = *info.NextMessageOffset;
+ event->ReadActorId = info.ReadActorId;
+ LOG_ROW_DISPATCHER_TRACE("Send status to " << info.ReadActorId << ", offset " << *info.NextMessageOffset);
+ Send(RowDispatcherActorId, event.release());
+ }
+}
+
+void TTopicSession::Handle(NFq::TEvPrivate::TEvDataFiltered::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvDataFiltered, offset " << ev->Get()->Offset);
+ for (auto& [actorId, info] : Clients) {
+ if (!info.NextMessageOffset
+ || *info.NextMessageOffset < ev->Get()->Offset + 1) {
+ info.NextMessageOffset = ev->Get()->Offset + 1;
+ }
+ }
+}
+
+void TTopicSession::Handle(TEvRowDispatcher::TEvGetNextBatch::TPtr& ev) {
+ LOG_ROW_DISPATCHER_TRACE("TEvGetNextBatch from " << ev->Sender.ToString());
+ Metrics.InFlyAsyncInputData->Set(0);
+ auto it = Clients.find(ev->Sender);
+ if (it == Clients.end()) {
+ LOG_ROW_DISPATCHER_ERROR("Wrong client, sender " << ev->Sender);
+ return;
+ }
+ SendData(it->second);
+ SubscribeOnNextEvent();
+}
+
+void TTopicSession::HandleNewEvents() {
+ while (true) {
+ if (!ReadSession) {
+ return;
+ }
+ if (Config.GetMaxSessionUsedMemory() && UsedSize > Config.GetMaxSessionUsedMemory()) {
+ LOG_ROW_DISPATCHER_TRACE("Too much used memory (" << UsedSize << " bytes), stop reading from yds");
+ break;
+ }
+ TMaybe<NYdb::NTopic::TReadSessionEvent::TEvent> event = ReadSession->GetEvent(false);
+ if (!event) {
+ break;
+ }
+ std::visit(TTopicEventProcessor{*this, LogPrefix}, *event);
+ }
+}
+
+void TTopicSession::CloseTopicSession() {
+ if (!ReadSession) {
+ return;
+ }
+ LOG_ROW_DISPATCHER_DEBUG("Close session");
+ ReadSession->Close(TDuration::Zero());
+ ReadSession.reset();
+}
+
+void TTopicSession::TTopicEventProcessor::operator()(NYdb::NTopic::TReadSessionEvent::TDataReceivedEvent& event) {
+ Self.Metrics.RowsRead->Add(event.GetMessages().size());
+ for (const auto& message : event.GetMessages()) {
+ const TString& data = message.GetData();
+ Self.IngressStats.Bytes += data.size();
+ LOG_ROW_DISPATCHER_TRACE("Data received: " << message.DebugString(true));
+
+ TString item = message.GetData();
+ item.Detach();
+ Self.SendToParsing(message.GetOffset(), item);
+ Self.LastMessageOffset = message.GetOffset();
+ }
+}
+
+void TTopicSession::TTopicEventProcessor::operator()(NYdb::NTopic::TSessionClosedEvent& ev) {
+ TString message = TStringBuilder() << "Read session to topic \"" << Self.TopicPath << "\" was closed: " << ev.DebugString();
+ LOG_ROW_DISPATCHER_DEBUG(message);
+ NYql::TIssues issues;
+ issues.AddIssue(message);
+ Self.FatalError(issues.ToOneLineString());
+}
+
+void TTopicSession::TTopicEventProcessor::operator()(NYdb::NTopic::TReadSessionEvent::TStartPartitionSessionEvent& event) {
+ LOG_ROW_DISPATCHER_DEBUG("StartPartitionSessionEvent received");
+
+ TMaybe<ui64> minOffset;
+ for (const auto& [actorId, info] : Self.Clients) {
+ if (!minOffset
+ || (info.NextMessageOffset && (info.NextMessageOffset < *minOffset))) {
+ minOffset = info.NextMessageOffset;
+ }
+ }
+ LOG_ROW_DISPATCHER_DEBUG("Confirm StartPartitionSession with offset " << minOffset);
+ event.Confirm(minOffset);
+}
+
+void TTopicSession::TTopicEventProcessor::operator()(NYdb::NTopic::TReadSessionEvent::TStopPartitionSessionEvent& event) {
+ LOG_ROW_DISPATCHER_DEBUG("SessionId: " << Self.GetSessionId() << " StopPartitionSessionEvent received");
+ event.Confirm();
+}
+
+void TTopicSession::TTopicEventProcessor::operator()(NYdb::NTopic::TReadSessionEvent::TEndPartitionSessionEvent& /*event*/) {
+ LOG_ROW_DISPATCHER_WARN("TEndPartitionSessionEvent");
+}
+
+void TTopicSession::TTopicEventProcessor::operator()(NYdb::NTopic::TReadSessionEvent::TPartitionSessionClosedEvent& /*event*/) {
+ LOG_ROW_DISPATCHER_WARN("TPartitionSessionClosedEvent");
+}
+
+std::pair<NYql::NUdf::TUnboxedValuePod, i64> TTopicSession::CreateItem(const NYdb::NTopic::TReadSessionEvent::TDataReceivedEvent::TMessage& message) {
+ const TString& data = message.GetData();
+ i64 usedSpace = data.Size();
+ NYql::NUdf::TUnboxedValuePod item = NKikimr::NMiniKQL::MakeString(NYql::NUdf::TStringRef(data.Data(), data.Size()));
+ return std::make_pair(item, usedSpace);
+}
+
+TString TTopicSession::GetSessionId() const {
+ return ReadSession ? ReadSession->GetSessionId() : TString{"empty"};
+}
+
+void TTopicSession::SendToParsing(ui64 offset, const TString& message) {
+ LOG_ROW_DISPATCHER_TRACE("SendToParsing, message " << message);
+
+ for (auto& readActorId : ClientsWithoutPredicate) {
+ auto it = Clients.find(readActorId);
+ Y_ENSURE(it != Clients.end(), "Internal error: unknown client");
+ auto& info = it->second;
+ if (!info.Filter) {
+ LOG_ROW_DISPATCHER_TRACE("Send message to client without parsing/filtering");
+ AddDataToClient(info, offset, message);
+ }
+ }
+
+ try {
+ Parser->Push(offset, message);
+ } catch (const std::exception& e) {
+ FatalError(e.what());
+ }
+}
+
+void TTopicSession::SendData(ClientsInfo& info) {
+ info.DataArrivedSent = false;
+ if (info.Buffer.empty()) {
+ LOG_ROW_DISPATCHER_TRACE("Buffer empty");
+ }
+
+ do {
+ auto event = std::make_unique<TEvRowDispatcher::TEvMessageBatch>();
+ event->Record.SetPartitionId(PartitionId);
+ Y_ENSURE(info.NextMessageOffset);
+ event->ReadActorId = info.ReadActorId;
+
+ ui64 batchSize = 0;
+ while (!info.Buffer.empty()) {
+ const auto& [offset, json] = info.Buffer.front();
+ info.UsedSize -= json.size();
+ UsedSize -= json.size();
+ batchSize += json.size();
+ NFq::NRowDispatcherProto::TEvMessage message;
+ message.SetJson(json);
+ message.SetOffset(offset);
+ event->Record.AddMessages()->CopyFrom(message);
+ event->Record.SetNextMessageOffset(offset + 1);
+ info.Buffer.pop();
+
+ if (batchSize > MaxBatchSizeBytes) {
+ break;
+ }
+ }
+ if (info.Buffer.empty()) {
+ event->Record.SetNextMessageOffset(*info.NextMessageOffset);
+ }
+ LOG_ROW_DISPATCHER_TRACE("SendData to " << info.ReadActorId << ", batch size " << event->Record.MessagesSize());
+ Send(RowDispatcherActorId, event.release());
+ } while(!info.Buffer.empty());
+ info.LastSendedNextMessageOffset = *info.NextMessageOffset;
+}
+
+void TTopicSession::Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
+ auto it = Clients.find(ev->Sender);
+ if (it != Clients.end()) {
+ FatalError("Internal error: sender " + ev->Sender.ToString());
+ return;
+ }
+
+ LOG_ROW_DISPATCHER_INFO("New client, read actor id " << ev->Sender.ToString());
+
+ auto columns = GetVector(ev->Get()->Record.GetSource().GetColumns());
+ auto types = GetVector(ev->Get()->Record.GetSource().GetColumnTypes());
+ auto parserType = std::make_pair(columns, types);
+ if (CurrentParserTypes && *CurrentParserTypes != parserType) {
+ SendSessionError(ev->Sender, "Different columns/types, use same in all queries");
+ return;
+ }
+
+ try {
+ auto& clientInfo = Clients.emplace(
+ std::piecewise_construct,
+ std::forward_as_tuple(ev->Sender),
+ std::forward_as_tuple(ev)).first->second;
+
+ TString predicate = clientInfo.Settings.GetSource().GetPredicate();
+ if (!predicate.empty()) {
+ clientInfo.Filter = NewJsonFilter(
+ columns,
+ types,
+ predicate,
+ [&, actorId = clientInfo.ReadActorId](ui64 offset, const TString& json){
+ Send(SelfId(), new NFq::TEvPrivate::TEvDataAfterFilteration(offset, json, actorId));
+ });
+ } else {
+ ClientsWithoutPredicate.insert(ev->Sender);
+ }
+
+ LOG_ROW_DISPATCHER_INFO("New client: offset " << clientInfo.NextMessageOffset << ", predicate: " << clientInfo.Settings.GetSource().GetPredicate());
+
+ if (ReadSession) {
+ if (clientInfo.Settings.HasOffset() && (clientInfo.Settings.GetOffset() <= LastMessageOffset)) {
+ LOG_ROW_DISPATCHER_INFO("New client has less offset than the last message, stop (restart) topic session");
+ StopReadSession();
+ }
+ }
+ } catch (const NYql::NPureCalc::TCompileError& e) {
+ FatalError("Adding new client failed: CompileError: sql: " + e.GetYql() + ", error: " + e.GetIssues());
+ } catch (const yexception &ex) {
+ FatalError(TString{"Adding new client failed: "} + ex.what());
+ } catch (...) {
+ FatalError("Adding new client failed, " + CurrentExceptionMessage());
+ }
+
+ PrintInternalState();
+ if (!ReadSession) {
+ Schedule(TDuration::Seconds(Config.GetTimeoutBeforeStartSessionSec()), new NFq::TEvPrivate::TEvCreateSession());
+ }
+}
+
+void TTopicSession::AddDataToClient(ClientsInfo& info, ui64 offset, const TString& json) {
+ if (info.NextMessageOffset && offset < info.NextMessageOffset) {
+ return;
+ }
+ info.NextMessageOffset = offset + 1;
+ info.Buffer.push(std::make_pair(offset, json));
+ info.UsedSize += json.size();
+ UsedSize += json.size();
+ SendDataArrived(info);
+}
+
+void TTopicSession::Handle(NFq::TEvRowDispatcher::TEvStopSession::TPtr& ev) {
+ LOG_ROW_DISPATCHER_DEBUG("TEvStopSession, topicPath " << ev->Get()->Record.GetSource().GetTopicPath() <<
+ " partitionId " << ev->Get()->Record.GetPartitionId());
+
+ auto it = Clients.find(ev->Sender);
+ if (it == Clients.end()) {
+ LOG_ROW_DISPATCHER_DEBUG("Wrong ClientSettings"); // TODO
+ return;
+ }
+ Clients.erase(it);
+ ClientsWithoutPredicate.erase(ev->Sender);
+}
+
+void TTopicSession::InitParser(const NYql::NPq::NProto::TDqPqTopicSource& sourceParams) {
+ if (Parser) {
+ return;
+ }
+ try {
+ CurrentParserTypes = std::make_pair(GetVector(sourceParams.GetColumns()), GetVector(sourceParams.GetColumnTypes()));
+ NActors::TActorSystem* actorSystem = NActors::TActivationContext::ActorSystem();
+ Parser = NewJsonParser(
+ GetVector(sourceParams.GetColumns()),
+ [actorSystem, selfId = SelfId()](ui64 offset, TList<TString>&& value){
+ actorSystem->Send(selfId, new NFq::TEvPrivate::TEvDataParsed(offset, std::move(value)));
+ });
+ } catch (const NYql::NPureCalc::TCompileError& e) {
+ FatalError(e.GetIssues());
+ }
+}
+
+void TTopicSession::FatalError(const TString& message, const std::unique_ptr<TJsonFilter>* filter) {
+ TStringStream str;
+ str << message;
+ if (Parser) {
+ str << ", parser sql: " << Parser->GetSql();
+ }
+ if (filter) {
+ str << ", filter sql:" << (*filter)->GetSql();
+ }
+ LOG_ROW_DISPATCHER_ERROR("FatalError: " << str.Str());
+
+ for (auto& [readActorId, info] : Clients) {
+ LOG_ROW_DISPATCHER_DEBUG("Send TEvSessionError to " << readActorId);
+ SendSessionError(readActorId, str.Str());
+ }
+ StopReadSession();
+ Become(&TTopicSession::ErrorState);
+ ythrow yexception() << "FatalError: " << str.Str(); // To exit from current stack and call once PassAway() in HandleException().
+}
+
+void TTopicSession::SendSessionError(NActors::TActorId readActorId, const TString& message) {
+ auto event = std::make_unique<TEvRowDispatcher::TEvSessionError>();
+ event->Record.SetMessage(message);
+ event->Record.SetPartitionId(PartitionId);
+ event->ReadActorId = readActorId;
+ Send(RowDispatcherActorId, event.release());
+}
+
+void TTopicSession::StopReadSession() {
+ if (ReadSession) {
+ LOG_ROW_DISPATCHER_DEBUG("Close read session");
+ ReadSession->Close(TDuration::Zero());
+ ReadSession.reset();
+ }
+ TopicClient.reset();
+}
+
+void TTopicSession::SendDataArrived(ClientsInfo& info) {
+ if (info.Buffer.empty() || info.DataArrivedSent) {
+ return;
+ }
+ info.DataArrivedSent = true;
+ LOG_ROW_DISPATCHER_TRACE("Send TEvNewDataArrived to " << info.ReadActorId);
+ Metrics.InFlyAsyncInputData->Set(1);
+ auto event = std::make_unique<TEvRowDispatcher::TEvNewDataArrived>();
+ event->Record.SetPartitionId(PartitionId);
+ event->ReadActorId = info.ReadActorId;
+ Send(RowDispatcherActorId, event.release());
+}
+
+void TTopicSession::HandleException(const std::exception& e) {
+ if (CurrentStateFunc() == &TThis::ErrorState) {
+ return;
+ }
+ FatalError(TString("Internal error: exception: ") + e.what());
+}
+
+void TTopicSession::PrintInternalState() {
+ TStringStream str;
+ str << "Clients:\n";
+ str << "UsedSize: " << UsedSize << "\n";
+ for (auto& [readActorId, info] : Clients) {
+ str << " read actor id " << readActorId << ", buffer size " << info.Buffer.size()
+ << ", used size: " << info.UsedSize << ", data arrived sent " << info.DataArrivedSent
+ << ", next offset " << info.NextMessageOffset << "\n";
+ }
+ LOG_ROW_DISPATCHER_DEBUG(str.Str());
+}
+
+void TTopicSession::Handle(NFq::TEvPrivate::TEvPrintState::TPtr&) {
+ Schedule(TDuration::Seconds(PrintStatePeriodSec), new NFq::TEvPrivate::TEvPrintState());
+ PrintInternalState();
+}
+
+} // namespace
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<NActors::IActor> NewTopicSession(
+ const TString& topicPath,
+ const NConfig::TRowDispatcherConfig& config,
+ NActors::TActorId rowDispatcherActorId,
+ ui32 partitionId,
+ NYdb::TDriver driver,
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> credentialsProviderFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters) {
+ return std::unique_ptr<NActors::IActor>(new TTopicSession(topicPath, config, rowDispatcherActorId, partitionId, std::move(driver), credentialsProviderFactory, counters));
+}
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/topic_session.h b/ydb/core/fq/libs/row_dispatcher/topic_session.h
new file mode 100644
index 00000000000..b3980cce826
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/topic_session.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <ydb/core/fq/libs/config/protos/row_dispatcher.pb.h>
+#include <ydb/core/fq/libs/config/protos/common.pb.h>
+#include <ydb/core/fq/libs/shared_resources/shared_resources.h>
+
+#include <ydb/library/security/ydb_credentials_provider_factory.h>
+#include <ydb/library/yql/providers/pq/proto/dq_io.pb.h>
+#include <ydb/library/actors/core/actor.h>
+
+#include <memory>
+
+namespace NFq {
+
+std::unique_ptr<NActors::IActor> NewTopicSession(
+ const TString& topicPath,
+ const NConfig::TRowDispatcherConfig& config,
+ NActors::TActorId rowDispatcherActorId,
+ ui32 partitionId,
+ NYdb::TDriver driver,
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> credentialsProviderFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+} // namespace NFq
diff --git a/ydb/core/fq/libs/row_dispatcher/ut/coordinator_ut.cpp b/ydb/core/fq/libs/row_dispatcher/ut/coordinator_ut.cpp
new file mode 100644
index 00000000000..478326acf53
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/ut/coordinator_ut.cpp
@@ -0,0 +1,166 @@
+#include <ydb/core/fq/libs/ydb/ydb.h>
+#include <ydb/core/fq/libs/events/events.h>
+#include <ydb/core/fq/libs/row_dispatcher/coordinator.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+#include <ydb/core/testlib/actors/test_runtime.h>
+#include <ydb/core/testlib/basics/helpers.h>
+#include <ydb/core/testlib/actor_helpers.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <google/protobuf/util/message_differencer.h>
+
+namespace {
+
+using namespace NKikimr;
+using namespace NFq;
+
+class TFixture : public NUnitTest::TBaseFixture {
+
+public:
+ TFixture()
+ : Runtime(4) {}
+
+ void SetUp(NUnitTest::TTestContext&) override {
+ TAutoPtr<TAppPrepare> app = new TAppPrepare();
+ Runtime.Initialize(app->Unwrap());
+ Runtime.SetLogPriority(NKikimrServices::FQ_ROW_DISPATCHER, NLog::PRI_TRACE);
+ auto credFactory = NKikimr::CreateYdbCredentialsProviderFactory;
+ auto yqSharedResources = NFq::TYqSharedResources::Cast(NFq::CreateYqSharedResourcesImpl({}, credFactory, MakeIntrusive<NMonitoring::TDynamicCounters>()));
+
+ LocalRowDispatcherId = Runtime.AllocateEdgeActor(0);
+ RowDispatcher1Id = Runtime.AllocateEdgeActor(1);
+ RowDispatcher2Id = Runtime.AllocateEdgeActor(2);
+ ReadActor1 = Runtime.AllocateEdgeActor(0);
+ ReadActor2 = Runtime.AllocateEdgeActor(0);
+
+ NConfig::TRowDispatcherCoordinatorConfig config;
+ config.SetCoordinationNodePath("RowDispatcher");
+ auto& database = *config.MutableDatabase();
+ database.SetEndpoint("YDB_ENDPOINT");
+ database.SetDatabase("YDB_DATABASE");
+ database.SetToken("");
+
+ Coordinator = Runtime.Register(NewCoordinator(
+ LocalRowDispatcherId,
+ config,
+ yqSharedResources,
+ "Tenant",
+ MakeIntrusive<NMonitoring::TDynamicCounters>()
+ ).release());
+
+ Runtime.EnableScheduleForActor(Coordinator);
+
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(NActors::TEvents::TSystem::Bootstrap, 1);
+ Runtime.DispatchEvents(options);
+ }
+
+ void TearDown(NUnitTest::TTestContext& /* context */) override {
+ }
+
+ NYql::NPq::NProto::TDqPqTopicSource BuildPqTopicSourceSettings(
+ TString topic)
+ {
+ NYql::NPq::NProto::TDqPqTopicSource settings;
+ settings.SetTopicPath(topic);
+ settings.SetConsumerName("PqConsumer");
+ settings.SetEndpoint("Endpoint");
+ settings.MutableToken()->SetName("token");
+ settings.SetDatabase("Database");
+ return settings;
+ }
+
+ void ExpectCoordinatorChangesSubscribe() {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe>(LocalRowDispatcherId, TDuration::Seconds(5));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ }
+
+ void Ping(NActors::TActorId rowDispatcherId) {
+ auto event = new NActors::TEvents::TEvPing();
+ Runtime.Send(new NActors::IEventHandle(Coordinator, rowDispatcherId, event));
+
+ // TODO: GrabEdgeEvent is not working with events on other nodes ?!
+ //auto eventHolder = Runtime.GrabEdgeEvent<NActors::TEvents::TEvPong>(rowDispatcherId, TDuration::Seconds(5));
+ //UNIT_ASSERT(eventHolder.Get() != nullptr);
+ }
+
+ void MockRequest(NActors::TActorId readActorId, TString topicName, const std::vector<ui64>& partitionId) {
+ auto event = new NFq::TEvRowDispatcher::TEvCoordinatorRequest(
+ BuildPqTopicSourceSettings(topicName),
+ partitionId);
+ Runtime.Send(new NActors::IEventHandle(Coordinator, readActorId, event));
+ }
+
+ NFq::NRowDispatcherProto::TEvGetAddressResponse ExpectResult(NActors::TActorId readActorId) {
+ auto eventPtr = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvCoordinatorResult>(readActorId, TDuration::Seconds(5));
+ UNIT_ASSERT(eventPtr.Get() != nullptr);
+ NFq::NRowDispatcherProto::TEvGetAddressResponse result;
+ result.CopyFrom(eventPtr->Get()->Record);
+ return result;
+ }
+
+ TActorSystemStub actorSystemStub;
+ NActors::TTestActorRuntime Runtime;
+ NActors::TActorId Coordinator;
+ NActors::TActorId LocalRowDispatcherId;
+ NActors::TActorId RowDispatcher1Id;
+ NActors::TActorId RowDispatcher2Id;
+ NActors::TActorId ReadActor1;
+ NActors::TActorId ReadActor2;
+
+ NYql::NPq::NProto::TDqPqTopicSource Source1 = BuildPqTopicSourceSettings("Source1");
+};
+
+Y_UNIT_TEST_SUITE(CoordinatorTests) {
+ Y_UNIT_TEST_F(Route, TFixture) {
+
+ ExpectCoordinatorChangesSubscribe();
+
+ TSet<NActors::TActorId> rowDispatcherIds{RowDispatcher1Id, RowDispatcher2Id, LocalRowDispatcherId};
+ for (auto id : rowDispatcherIds) {
+ Ping(id);
+ }
+
+ MockRequest(ReadActor1, "topic1", {0});
+ auto result1 = ExpectResult(ReadActor1);
+
+ MockRequest(ReadActor2, "topic1", {0});
+ auto result2 = ExpectResult(ReadActor2);
+
+ UNIT_ASSERT(result1.PartitionsSize() == 1);
+ UNIT_ASSERT(result2.PartitionsSize() == 1);
+ UNIT_ASSERT(google::protobuf::util::MessageDifferencer::Equals(result1, result2));
+
+ MockRequest(ReadActor2, "topic1", {1});
+ auto result3 = ExpectResult(ReadActor2);
+
+ TActorId actualRowDispatcher1 = ActorIdFromProto(result1.GetPartitions(0).GetActorId());
+ TActorId actualRowDispatcher2 = ActorIdFromProto(result2.GetPartitions(0).GetActorId());
+ TActorId actualRowDispatcher3 = ActorIdFromProto(result3.GetPartitions(0).GetActorId());
+
+ UNIT_ASSERT(rowDispatcherIds.contains(actualRowDispatcher1));
+ UNIT_ASSERT(rowDispatcherIds.contains(actualRowDispatcher2));
+ UNIT_ASSERT(rowDispatcherIds.contains(actualRowDispatcher3));
+ UNIT_ASSERT(actualRowDispatcher1 != actualRowDispatcher3);
+
+ // RowDispatchers is restarted.
+ // Skip Disconnected/Coonnected in test.
+ auto newDispatcher1Id = Runtime.AllocateEdgeActor(1);
+ Ping(newDispatcher1Id);
+
+ auto newDispatcher2Id = Runtime.AllocateEdgeActor(1);
+ Ping(newDispatcher2Id);
+
+ MockRequest(ReadActor1, "topic1", {0});
+ auto result4 = ExpectResult(ReadActor1);
+
+ MockRequest(ReadActor2, "topic1", {1});
+ auto result5 = ExpectResult(ReadActor2);
+
+ UNIT_ASSERT(!google::protobuf::util::MessageDifferencer::Equals(result1, result4)
+ || !google::protobuf::util::MessageDifferencer::Equals(result3, result5));
+ }
+}
+
+}
+
diff --git a/ydb/core/fq/libs/row_dispatcher/ut/json_filter_ut.cpp b/ydb/core/fq/libs/row_dispatcher/ut/json_filter_ut.cpp
new file mode 100644
index 00000000000..1645f521051
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/ut/json_filter_ut.cpp
@@ -0,0 +1,91 @@
+#include <ydb/core/fq/libs/ydb/ydb.h>
+#include <ydb/core/fq/libs/events/events.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/json_filter.h>
+
+#include <ydb/core/testlib/actors/test_runtime.h>
+#include <ydb/core/testlib/basics/helpers.h>
+#include <ydb/core/testlib/actor_helpers.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace {
+
+using namespace NKikimr;
+using namespace NFq;
+
+class TFixture : public NUnitTest::TBaseFixture {
+
+public:
+ TFixture()
+ : Runtime(true) {}
+
+ void SetUp(NUnitTest::TTestContext&) override {
+ TAutoPtr<TAppPrepare> app = new TAppPrepare();
+ Runtime.Initialize(app->Unwrap());
+ Runtime.SetLogPriority(NKikimrServices::FQ_ROW_DISPATCHER, NLog::PRI_DEBUG);
+ }
+
+ void TearDown(NUnitTest::TTestContext& /* context */) override {
+ Filter.reset();
+ }
+
+ void MakeFilter(
+ const TVector<TString>& columns,
+ const TVector<TString>& types,
+ const TString& whereFilter,
+ NFq::TJsonFilter::TCallback callback) {
+ Filter = NFq::NewJsonFilter(
+ columns,
+ types,
+ whereFilter,
+ callback);
+ }
+
+ TActorSystemStub actorSystemStub;
+ NActors::TTestActorRuntime Runtime;
+ std::unique_ptr<NFq::TJsonFilter> Filter;
+};
+
+Y_UNIT_TEST_SUITE(TJsonFilterTests) {
+ Y_UNIT_TEST_F(Simple1, TFixture) {
+ TMap<ui64, TString> result;
+ MakeFilter(
+ {"a1", "a2"},
+ {"String", "UInt64"},
+ "where a2 > 100",
+ [&](ui64 offset, const TString& json) {
+ result[offset] = json;
+ });
+ Filter->Push(5, {"hello1", "99"});
+ Filter->Push(6, {"hello2", "101"});
+ UNIT_ASSERT_VALUES_EQUAL(1, result.size());
+ UNIT_ASSERT_VALUES_EQUAL(R"({"a1":"hello2","a2":101})", result[6]);
+ }
+
+ Y_UNIT_TEST_F(Simple2, TFixture) {
+ TMap<ui64, TString> result;
+ MakeFilter(
+ {"a2", "a1"},
+ {"UInt64", "String"},
+ "where a2 > 100",
+ [&](ui64 offset, const TString& json) {
+ result[offset] = json;
+ });
+ Filter->Push(5, {"99", "hello1"});
+ Filter->Push(6, {"101", "hello2"});
+ UNIT_ASSERT_VALUES_EQUAL(1, result.size());
+ UNIT_ASSERT_VALUES_EQUAL(R"({"a1":"hello2","a2":101})", result[6]);
+ }
+
+ Y_UNIT_TEST_F(ThrowExceptionByError, TFixture) {
+ MakeFilter(
+ {"a1", "a2"},
+ {"String", "UInt64"},
+ "where Unwrap(a2) = 1",
+ [&](ui64, const TString&) { });
+ UNIT_ASSERT_EXCEPTION_CONTAINS(Filter->Push(5, {"99", "hello1"}), yexception, "Failed to unwrap empty optional");
+ }
+}
+
+}
+
diff --git a/ydb/core/fq/libs/row_dispatcher/ut/json_parser_ut.cpp b/ydb/core/fq/libs/row_dispatcher/ut/json_parser_ut.cpp
new file mode 100644
index 00000000000..58f19a4ee26
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/ut/json_parser_ut.cpp
@@ -0,0 +1,110 @@
+#include <ydb/core/fq/libs/ydb/ydb.h>
+#include <ydb/core/fq/libs/events/events.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/json_parser.h>
+
+#include <ydb/core/testlib/actors/test_runtime.h>
+#include <ydb/core/testlib/basics/helpers.h>
+#include <ydb/core/testlib/actor_helpers.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace {
+
+using namespace NKikimr;
+using namespace NFq;
+
+class TFixture : public NUnitTest::TBaseFixture {
+
+public:
+ TFixture()
+ : Runtime(true) {}
+
+ void SetUp(NUnitTest::TTestContext&) override {
+ TAutoPtr<TAppPrepare> app = new TAppPrepare();
+ Runtime.Initialize(app->Unwrap());
+ Runtime.SetLogPriority(NKikimrServices::FQ_ROW_DISPATCHER, NLog::PRI_DEBUG);
+ }
+
+ void TearDown(NUnitTest::TTestContext& /* context */) override {
+ if (Parser) {
+ Parser.reset();
+ }
+ }
+
+ void MakeParser(TVector<TString> columns, NFq::TJsonParser::TCallback callback) {
+ Parser = NFq::NewJsonParser(
+ columns,
+ callback);
+ }
+
+ TActorSystemStub actorSystemStub;
+ NActors::TTestActorRuntime Runtime;
+ std::unique_ptr<NFq::TJsonParser> Parser;
+};
+
+Y_UNIT_TEST_SUITE(TJsonParserTests) {
+ Y_UNIT_TEST_F(Simple1, TFixture) {
+ TList<TString> result;
+ ui64 resultOffset;
+ MakeParser({"a1", "a2"}, [&](ui64 offset, TList<TString>&& value){
+ resultOffset = offset;
+ result = std::move(value);
+ });
+ Parser->Push(5, R"({"a1": "hello1", "a2": "101", "event": "event1"})");
+ UNIT_ASSERT_VALUES_EQUAL(5, resultOffset);
+ UNIT_ASSERT_VALUES_EQUAL(2, result.size());
+ UNIT_ASSERT_VALUES_EQUAL("hello1", result.front());
+ UNIT_ASSERT_VALUES_EQUAL("101", result.back());
+ }
+
+ Y_UNIT_TEST_F(Simple2, TFixture) {
+ TList<TString> result;
+ ui64 resultOffset;
+ MakeParser({"a2", "a1"}, [&](ui64 offset, TList<TString>&& value){
+ resultOffset = offset;
+ result = std::move(value);
+ });
+ Parser->Push(5, R"({"a1": "hello1", "a2": "101", "event": "event1"})");
+ UNIT_ASSERT_VALUES_EQUAL(5, resultOffset);
+ UNIT_ASSERT_VALUES_EQUAL(2, result.size());
+ UNIT_ASSERT_VALUES_EQUAL("101", result.front());
+ UNIT_ASSERT_VALUES_EQUAL("hello1", result.back());
+ }
+
+ Y_UNIT_TEST_F(Simple3, TFixture) {
+ TList<TString> result;
+ ui64 resultOffset;
+ MakeParser({"a1", "a2"}, [&](ui64 offset, TList<TString>&& value){
+ resultOffset = offset;
+ result = std::move(value);
+ });
+ Parser->Push(5, R"({"a2": "hello1", "a1": "101", "event": "event1"})");
+ UNIT_ASSERT_VALUES_EQUAL(5, resultOffset);
+ UNIT_ASSERT_VALUES_EQUAL(2, result.size());
+ UNIT_ASSERT_VALUES_EQUAL("101", result.front());
+ UNIT_ASSERT_VALUES_EQUAL("hello1", result.back());
+ }
+
+ Y_UNIT_TEST_F(Simple4, TFixture) {
+ TList<TString> result;
+ ui64 resultOffset;
+ MakeParser({"a2", "a1"}, [&](ui64 offset, TList<TString>&& value){
+ resultOffset = offset;
+ result = std::move(value);
+ });
+ Parser->Push(5, R"({"a2": "hello1", "a1": "101", "event": "event1"})");
+ UNIT_ASSERT_VALUES_EQUAL(5, resultOffset);
+ UNIT_ASSERT_VALUES_EQUAL(2, result.size());
+ UNIT_ASSERT_VALUES_EQUAL("hello1", result.front());
+ UNIT_ASSERT_VALUES_EQUAL("101", result.back());
+ }
+
+ Y_UNIT_TEST_F(ThrowExceptionByError, TFixture) {
+
+ MakeParser({"a2", "a1"}, [&](ui64, TList<TString>&&){ });
+ UNIT_ASSERT_EXCEPTION_CONTAINS(Parser->Push(5, R"(ydb)"), yexception, " Failed to unwrap empty optional");
+ }
+}
+
+}
+
diff --git a/ydb/core/fq/libs/row_dispatcher/ut/leader_election_ut.cpp b/ydb/core/fq/libs/row_dispatcher/ut/leader_election_ut.cpp
new file mode 100644
index 00000000000..93ccaa8c151
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/ut/leader_election_ut.cpp
@@ -0,0 +1,140 @@
+#include <ydb/core/fq/libs/ydb/ydb.h>
+#include <ydb/core/fq/libs/events/events.h>
+#include <ydb/core/fq/libs/row_dispatcher/leader_election.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+#include <ydb/core/testlib/actors/test_runtime.h>
+#include <ydb/core/testlib/basics/helpers.h>
+#include <ydb/core/testlib/actor_helpers.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace {
+
+using namespace NKikimr;
+using namespace NFq;
+
+class TFixture : public NUnitTest::TBaseFixture {
+
+public:
+ TFixture()
+ : Runtime(1, false) {}
+
+ void SetUp(NUnitTest::TTestContext&) override {
+ TAutoPtr<TAppPrepare> app = new TAppPrepare();
+ Runtime.Initialize(app->Unwrap());
+ Runtime.SetLogPriority(NKikimrServices::FQ_ROW_DISPATCHER, NLog::PRI_DEBUG);
+ auto credFactory = NKikimr::CreateYdbCredentialsProviderFactory;
+ auto yqSharedResources = NFq::TYqSharedResources::Cast(NFq::CreateYqSharedResourcesImpl({}, credFactory, MakeIntrusive<NMonitoring::TDynamicCounters>()));
+
+ RowDispatcher = Runtime.AllocateEdgeActor();
+ Coordinator1 = Runtime.AllocateEdgeActor();
+ Coordinator2 = Runtime.AllocateEdgeActor();
+ Coordinator3 = Runtime.AllocateEdgeActor();
+
+ NConfig::TRowDispatcherCoordinatorConfig config;
+ config.SetCoordinationNodePath("row_dispatcher");
+ auto& database = *config.MutableDatabase();
+ database.SetEndpoint(GetEnv("YDB_ENDPOINT"));
+ database.SetDatabase(GetEnv("YDB_DATABASE"));
+ database.SetToken("");
+
+ LeaderElection1 = Runtime.Register(NewLeaderElection(
+ RowDispatcher,
+ Coordinator1,
+ config,
+ NKikimr::CreateYdbCredentialsProviderFactory,
+ yqSharedResources,
+ "/tenant",
+ MakeIntrusive<NMonitoring::TDynamicCounters>()
+ ).release());
+
+ LeaderElection2 = Runtime.Register(NewLeaderElection(
+ RowDispatcher,
+ Coordinator2,
+ config,
+ NKikimr::CreateYdbCredentialsProviderFactory,
+ yqSharedResources,
+ "/tenant",
+ MakeIntrusive<NMonitoring::TDynamicCounters>()
+ ).release());
+
+ LeaderElection3 = Runtime.Register(NewLeaderElection(
+ RowDispatcher,
+ Coordinator3,
+ config,
+ NKikimr::CreateYdbCredentialsProviderFactory,
+ yqSharedResources,
+ "/tenant",
+ MakeIntrusive<NMonitoring::TDynamicCounters>()
+ ).release());
+
+ Runtime.EnableScheduleForActor(LeaderElection1);
+ Runtime.EnableScheduleForActor(LeaderElection2);
+ Runtime.EnableScheduleForActor(LeaderElection3);
+
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(NActors::TEvents::TSystem::Bootstrap, 3);
+ Runtime.DispatchEvents(options);
+ }
+
+ void TearDown(NUnitTest::TTestContext& /* context */) override {
+ }
+
+ NActors::TActorId ExpectCoordinatorChanged() {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvCoordinatorChanged>(RowDispatcher);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ return eventHolder.Get()->Get()->CoordinatorActorId;
+ }
+
+ TActorSystemStub actorSystemStub;
+ NActors::TTestActorRuntime Runtime;
+ NActors::TActorId RowDispatcher;
+ NActors::TActorId LeaderElection1;
+ NActors::TActorId LeaderElection2;
+ NActors::TActorId LeaderElection3;
+ NActors::TActorId Coordinator1;
+ NActors::TActorId Coordinator2;
+ NActors::TActorId Coordinator3;
+ NActors::TActorId LeaderDetector;
+};
+
+Y_UNIT_TEST_SUITE(LeaderElectionTests) {
+ Y_UNIT_TEST_F(Test1, TFixture) {
+
+ auto coordinatorId1 = ExpectCoordinatorChanged();
+ auto coordinatorId2 = ExpectCoordinatorChanged();
+ auto coordinatorId3 = ExpectCoordinatorChanged();
+ UNIT_ASSERT(coordinatorId1 == coordinatorId2);
+ UNIT_ASSERT(coordinatorId2 == coordinatorId3);
+
+ NActors::TActorId currentLeader;
+ NActors::TActorId notActive;
+ if (coordinatorId1 == Coordinator1) {
+ currentLeader = LeaderElection1;
+ } else if (coordinatorId1 == Coordinator2) {
+ currentLeader = LeaderElection2;
+ } else {
+ currentLeader = LeaderElection3;
+ }
+
+ Runtime.Send(new IEventHandle(currentLeader, RowDispatcher, new NActors::TEvents::TEvPoisonPill()));
+ auto coordinatorId4 = ExpectCoordinatorChanged();
+ auto coordinatorId5 = ExpectCoordinatorChanged();
+ UNIT_ASSERT(coordinatorId4 == coordinatorId5);
+ UNIT_ASSERT(coordinatorId4 != coordinatorId1);
+
+ if (coordinatorId4 == Coordinator1) {
+ currentLeader = LeaderElection1;
+ } else if (coordinatorId4 == Coordinator2) {
+ currentLeader = LeaderElection2;
+ } else {
+ currentLeader = LeaderElection3;
+ }
+
+ Runtime.Send(new IEventHandle(currentLeader, RowDispatcher, new NActors::TEvents::TEvPoisonPill()));
+ auto coordinatorId6 = ExpectCoordinatorChanged();
+ UNIT_ASSERT(coordinatorId6 != coordinatorId4);
+ }
+}
+
+}
+
diff --git a/ydb/core/fq/libs/row_dispatcher/ut/row_dispatcher_ut.cpp b/ydb/core/fq/libs/row_dispatcher/ut/row_dispatcher_ut.cpp
new file mode 100644
index 00000000000..f5641e81553
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/ut/row_dispatcher_ut.cpp
@@ -0,0 +1,342 @@
+#include <ydb/core/fq/libs/ydb/ydb.h>
+#include <ydb/core/fq/libs/events/events.h>
+#include <ydb/core/fq/libs/row_dispatcher/row_dispatcher.h>
+#include <ydb/core/fq/libs/row_dispatcher/actors_factory.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+#include <ydb/core/testlib/actors/test_runtime.h>
+#include <ydb/core/testlib/basics/helpers.h>
+#include <ydb/core/testlib/actor_helpers.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+namespace {
+
+using namespace NKikimr;
+using namespace NFq;
+
+struct TTestActorFactory : public NFq::NRowDispatcher::IActorFactory {
+ TTestActorFactory(NActors::TTestActorRuntime& runtime)
+ : Runtime(runtime)
+ {}
+
+ NActors::TActorId PopActorId() {
+ UNIT_ASSERT(!ActorIds.empty());
+ auto result = ActorIds.front();
+ ActorIds.pop();
+ return result;
+ }
+
+ NActors::TActorId RegisterTopicSession(
+ const TString& /*topicPath*/,
+ const NConfig::TRowDispatcherConfig& /*config*/,
+ NActors::TActorId /*rowDispatcherActorId*/,
+ ui32 /*partitionId*/,
+ NYdb::TDriver /*driver*/,
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> /*credentialsProviderFactory*/,
+ const ::NMonitoring::TDynamicCounterPtr& /*counters*/) const override {
+ auto actorId = Runtime.AllocateEdgeActor();
+ ActorIds.push(actorId);
+ return actorId;
+ }
+
+ NActors::TTestActorRuntime& Runtime;
+ mutable TQueue<NActors::TActorId> ActorIds;
+};
+
+class TFixture : public NUnitTest::TBaseFixture {
+
+public:
+ TFixture()
+ : Runtime(1) {}
+
+ void SetUp(NUnitTest::TTestContext&) override {
+ TAutoPtr<TAppPrepare> app = new TAppPrepare();
+ Runtime.Initialize(app->Unwrap());
+ Runtime.SetLogPriority(NKikimrServices::FQ_ROW_DISPATCHER, NLog::PRI_TRACE);
+ NConfig::TRowDispatcherConfig config;
+ config.SetEnabled(true);
+ NConfig::TRowDispatcherCoordinatorConfig& coordinatorConfig = *config.MutableCoordinator();
+ coordinatorConfig.SetCoordinationNodePath("RowDispatcher");
+ auto& database = *coordinatorConfig.MutableDatabase();
+ database.SetEndpoint("YDB_ENDPOINT");
+ database.SetDatabase("YDB_DATABASE");
+ database.SetToken("");
+
+ NConfig::TCommonConfig commonConfig;
+ auto credFactory = NKikimr::CreateYdbCredentialsProviderFactory;
+ auto yqSharedResources = NFq::TYqSharedResources::Cast(NFq::CreateYqSharedResourcesImpl({}, credFactory, MakeIntrusive<NMonitoring::TDynamicCounters>()));
+
+ NYql::ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory;
+ Coordinator = Runtime.AllocateEdgeActor();
+ EdgeActor = Runtime.AllocateEdgeActor();
+ ReadActorId1 = Runtime.AllocateEdgeActor();
+ ReadActorId2 = Runtime.AllocateEdgeActor();
+ TestActorFactory = MakeIntrusive<TTestActorFactory>(Runtime);
+
+ RowDispatcher = Runtime.Register(NewRowDispatcher(
+ config,
+ commonConfig,
+ NKikimr::CreateYdbCredentialsProviderFactory,
+ yqSharedResources,
+ credentialsFactory,
+ "Tenant",
+ TestActorFactory,
+ MakeIntrusive<NMonitoring::TDynamicCounters>()
+ ).release());
+
+ Runtime.EnableScheduleForActor(RowDispatcher);
+
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(NActors::TEvents::TSystem::Bootstrap, 1);
+ Runtime.DispatchEvents(options);
+ }
+
+ void TearDown(NUnitTest::TTestContext& /* context */) override {
+ }
+
+ NYql::NPq::NProto::TDqPqTopicSource BuildPqTopicSourceSettings(
+ TString endpoint,
+ TString database,
+ TString topic)
+ {
+ NYql::NPq::NProto::TDqPqTopicSource settings;
+ settings.SetTopicPath(topic);
+ settings.SetConsumerName("PqConsumer");
+ settings.SetEndpoint(endpoint);
+ settings.MutableToken()->SetName("token");
+ settings.SetDatabase(database);
+ return settings;
+ }
+
+ void MockAddSession(const NYql::NPq::NProto::TDqPqTopicSource& source, ui64 partitionId, TActorId readActorId) {
+ auto event = new NFq::TEvRowDispatcher::TEvStartSession(
+ source,
+ partitionId, // partitionId
+ "Token",
+ Nothing(), // readOffset,
+ 0, // StartingMessageTimestamp;
+ "QueryId");
+ Runtime.Send(new IEventHandle(RowDispatcher, readActorId, event));
+ }
+
+ void MockStopSession(const NYql::NPq::NProto::TDqPqTopicSource& source, ui64 partitionId, TActorId readActorId) {
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvStopSession>();
+ event->Record.MutableSource()->CopyFrom(source);
+ event->Record.SetPartitionId(partitionId);
+ Runtime.Send(new IEventHandle(RowDispatcher, readActorId, event.release()));
+ }
+
+ void MockNewDataArrived(ui64 partitionId, TActorId topicSessionId, TActorId readActorId) {
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvNewDataArrived>();
+ event->Record.SetPartitionId(partitionId);
+ event->ReadActorId = readActorId;
+ Runtime.Send(new IEventHandle(RowDispatcher, topicSessionId, event.release()));
+ }
+
+ void MockMessageBatch(ui64 partitionId, TActorId topicSessionId, TActorId readActorId) {
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvMessageBatch>();
+ event->Record.SetPartitionId(partitionId);
+ event->ReadActorId = readActorId;
+ Runtime.Send(new IEventHandle(RowDispatcher, topicSessionId, event.release()));
+ }
+
+ void MockSessionError(ui64 partitionId, TActorId topicSessionId, TActorId readActorId) {
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvSessionError>();
+ event->Record.SetPartitionId(partitionId);
+ event->ReadActorId = readActorId;
+ Runtime.Send(new IEventHandle(RowDispatcher, topicSessionId, event.release()));
+ }
+
+ void MockGetNextBatch(ui64 partitionId, TActorId readActorId) {
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvGetNextBatch>();
+ event->Record.SetPartitionId(partitionId);
+ Runtime.Send(new IEventHandle(RowDispatcher, readActorId, event.release()));
+ }
+
+ void ExpectStartSession(NActors::TActorId actorId) {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvStartSession>(actorId);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ }
+
+ void ExpectStopSession(NActors::TActorId actorId, ui64 partitionId) {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvStopSession>(actorId);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->Record.GetPartitionId() == partitionId);
+ }
+
+ void ExpectGetNextBatch(NActors::TActorId topicSessionId, ui64 partitionId) {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvGetNextBatch>(topicSessionId);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->Record.GetPartitionId() == partitionId);
+ }
+
+ void ExpectNewDataArrived(NActors::TActorId readActorId, ui64 partitionId) {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvNewDataArrived>(readActorId);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->Record.GetPartitionId() == partitionId);
+ }
+
+ void ExpectStartSessionAck(NActors::TActorId readActorId) {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvStartSessionAck>(readActorId);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ }
+
+ void ExpectMessageBatch(NActors::TActorId readActorId) {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvMessageBatch>(readActorId);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ }
+
+ void ExpectSessionError(NActors::TActorId readActorId, ui64 partitionId) {
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvSessionError>(readActorId);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->Record.GetPartitionId() == partitionId);
+ }
+
+ NActors::TActorId ExpectRegisterTopicSession() {
+ auto actorId = TestActorFactory->PopActorId();
+ return actorId;
+ }
+
+ void ProcessData(NActors::TActorId readActorId, ui64 partId, NActors::TActorId topicSessionActorId) {
+ MockNewDataArrived(partId, topicSessionActorId, readActorId);
+ ExpectNewDataArrived(readActorId, partId);
+
+ MockGetNextBatch(partId, readActorId);
+ ExpectGetNextBatch(topicSessionActorId, partId);
+
+ MockMessageBatch(partId, topicSessionActorId, readActorId);
+ ExpectMessageBatch(readActorId);
+ }
+
+ TActorSystemStub actorSystemStub;
+ NActors::TTestActorRuntime Runtime;
+ NActors::TActorId RowDispatcher;
+ NActors::TActorId Coordinator;
+ NActors::TActorId EdgeActor;
+ NActors::TActorId ReadActorId1;
+ NActors::TActorId ReadActorId2;
+ TIntrusivePtr<TTestActorFactory> TestActorFactory;
+
+ NYql::NPq::NProto::TDqPqTopicSource Source1 = BuildPqTopicSourceSettings("Endpoint1", "Database1", "topic");
+ NYql::NPq::NProto::TDqPqTopicSource Source2 = BuildPqTopicSourceSettings("Endpoint2", "Database1", "topic");
+
+ ui64 PartitionId0 = 0;
+ ui64 PartitionId1 = 1;
+};
+
+Y_UNIT_TEST_SUITE(RowDispatcherTests) {
+ Y_UNIT_TEST_F(OneClientOneSession, TFixture) {
+ MockAddSession(Source1, PartitionId0, ReadActorId1);
+ auto topicSessionId = ExpectRegisterTopicSession();
+ ExpectStartSessionAck(ReadActorId1);
+ ExpectStartSession(topicSessionId);
+
+ ProcessData(ReadActorId1, PartitionId0, topicSessionId);
+
+ MockStopSession(Source1, PartitionId0, ReadActorId1);
+ ExpectStopSession(topicSessionId, PartitionId0);
+ }
+
+ Y_UNIT_TEST_F(TwoClientOneSession, TFixture) {
+ MockAddSession(Source1, PartitionId0, ReadActorId1);
+ auto topicSessionId = ExpectRegisterTopicSession();
+ ExpectStartSessionAck(ReadActorId1);
+ ExpectStartSession(topicSessionId);
+
+ MockAddSession(Source1, PartitionId0, ReadActorId2);
+ ExpectStartSessionAck(ReadActorId2);
+ ExpectStartSession(topicSessionId);
+
+ ProcessData(ReadActorId1, PartitionId0, topicSessionId);
+ ProcessData(ReadActorId2, PartitionId0, topicSessionId);
+
+ MockSessionError(PartitionId0, topicSessionId, ReadActorId1);
+ ExpectSessionError(ReadActorId1, PartitionId0);
+
+ MockSessionError(PartitionId0, topicSessionId, ReadActorId2);
+ ExpectSessionError(ReadActorId2, PartitionId0);
+ }
+
+ Y_UNIT_TEST_F(SessionError, TFixture) {
+ MockAddSession(Source1, PartitionId0, ReadActorId1);
+ auto topicSessionId = ExpectRegisterTopicSession();
+ ExpectStartSessionAck(ReadActorId1);
+ ExpectStartSession(topicSessionId);
+
+ MockSessionError(PartitionId0, topicSessionId, ReadActorId1);
+ ExpectSessionError(ReadActorId1, PartitionId0);
+ }
+
+ Y_UNIT_TEST_F(CoordinatorSubscribe, TFixture) {
+ Runtime.Send(new IEventHandle(RowDispatcher, EdgeActor, new NFq::TEvRowDispatcher::TEvCoordinatorChanged(Coordinator)));
+ Runtime.Send(new IEventHandle(RowDispatcher, ReadActorId1, new NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe));
+
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvCoordinatorChanged>(ReadActorId1);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->CoordinatorActorId == Coordinator);
+ }
+
+ Y_UNIT_TEST_F(CoordinatorSubscribeBeforeCoordinatorChanged, TFixture) {
+ Runtime.Send(new IEventHandle(RowDispatcher, ReadActorId1, new NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe));
+ Runtime.Send(new IEventHandle(RowDispatcher, ReadActorId2, new NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe));
+
+ Runtime.Send(new IEventHandle(RowDispatcher, EdgeActor, new NFq::TEvRowDispatcher::TEvCoordinatorChanged(Coordinator)));
+
+ auto eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvCoordinatorChanged>(ReadActorId1);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->CoordinatorActorId == Coordinator);
+
+ eventHolder = Runtime.GrabEdgeEvent<NFq::TEvRowDispatcher::TEvCoordinatorChanged>(ReadActorId2);
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->CoordinatorActorId == Coordinator);
+ }
+
+ Y_UNIT_TEST_F(TwoClients4Sessions, TFixture) {
+
+ MockAddSession(Source1, PartitionId0, ReadActorId1);
+ auto topicSession1 = ExpectRegisterTopicSession();
+ ExpectStartSessionAck(ReadActorId1);
+ ExpectStartSession(topicSession1);
+
+ MockAddSession(Source1, PartitionId1, ReadActorId1);
+ auto topicSession2 = ExpectRegisterTopicSession();
+ ExpectStartSessionAck(ReadActorId1);
+ ExpectStartSession(topicSession2);
+
+ MockAddSession(Source2, PartitionId0, ReadActorId2);
+ auto topicSession3 = ExpectRegisterTopicSession();
+ ExpectStartSessionAck(ReadActorId2);
+ ExpectStartSession(topicSession3);
+
+ MockAddSession(Source2, PartitionId1, ReadActorId2);
+ auto topicSession4 = ExpectRegisterTopicSession();
+ ExpectStartSessionAck(ReadActorId2);
+ ExpectStartSession(topicSession4);
+
+ ProcessData(ReadActorId1, PartitionId0, topicSession1);
+ ProcessData(ReadActorId1, PartitionId1, topicSession2);
+ ProcessData(ReadActorId2, PartitionId0, topicSession3);
+ ProcessData(ReadActorId2, PartitionId1, topicSession4);
+
+ MockSessionError(PartitionId0, topicSession1, ReadActorId1);
+ ExpectSessionError(ReadActorId1, PartitionId0);
+
+ ProcessData(ReadActorId1, PartitionId1, topicSession2);
+ ProcessData(ReadActorId2, PartitionId0, topicSession3);
+ ProcessData(ReadActorId2, PartitionId1, topicSession4);
+
+ MockStopSession(Source1, PartitionId1, ReadActorId1);
+ ExpectStopSession(topicSession2, PartitionId1);
+
+ MockStopSession(Source2, PartitionId0, ReadActorId2);
+ ExpectStopSession(topicSession3, PartitionId0);
+
+ MockStopSession(Source2, PartitionId1, ReadActorId2);
+ ExpectStopSession(topicSession4, PartitionId1);
+
+ // Ignore data after StopSession
+ MockMessageBatch(PartitionId1, topicSession4, ReadActorId2);
+ }
+}
+
+}
+
diff --git a/ydb/core/fq/libs/row_dispatcher/ut/topic_session_ut.cpp b/ydb/core/fq/libs/row_dispatcher/ut/topic_session_ut.cpp
new file mode 100644
index 00000000000..8e7036f057b
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/ut/topic_session_ut.cpp
@@ -0,0 +1,357 @@
+#include <ydb/core/fq/libs/ydb/ydb.h>
+#include <ydb/core/fq/libs/events/events.h>
+
+#include <ydb/core/fq/libs/row_dispatcher/topic_session.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+
+#include <ydb/core/testlib/actors/test_runtime.h>
+#include <ydb/core/testlib/basics/helpers.h>
+#include <ydb/core/testlib/actor_helpers.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <ydb/tests/fq/pq_async_io/ut_helpers.h>
+
+namespace {
+
+using namespace NKikimr;
+using namespace NFq;
+using namespace NYql::NDq;
+
+const ui64 TimeoutBeforeStartSessionSec = 3;
+const ui64 GrabTimeoutSec = 4 * TimeoutBeforeStartSessionSec;
+
+class TFixture : public NUnitTest::TBaseFixture {
+
+public:
+ TFixture()
+ : Runtime(true) {}
+
+ void SetUp(NUnitTest::TTestContext&) override {
+ TAutoPtr<TAppPrepare> app = new TAppPrepare();
+ Runtime.Initialize(app->Unwrap());
+ Runtime.SetLogPriority(NKikimrServices::FQ_ROW_DISPATCHER, NLog::PRI_TRACE);
+ Runtime.SetDispatchTimeout(TDuration::Seconds(5));
+
+ ReadActorId1 = Runtime.AllocateEdgeActor();
+ ReadActorId2 = Runtime.AllocateEdgeActor();
+ RowDispatcherActorId = Runtime.AllocateEdgeActor();
+ }
+
+ void Init(const TString& topicPath, ui64 maxSessionUsedMemory = std::numeric_limits<ui64>::max()) {
+ Config.SetTimeoutBeforeStartSessionSec(TimeoutBeforeStartSessionSec);
+ Config.SetMaxSessionUsedMemory(maxSessionUsedMemory);
+ Config.SetSendStatusPeriodSec(2);
+ Config.SetWithoutConsumer(true);
+
+ TopicSession = Runtime.Register(NewTopicSession(
+ topicPath,
+ Config,
+ RowDispatcherActorId,
+ 0,
+ Driver,
+ CredentialsProviderFactory,
+ MakeIntrusive<NMonitoring::TDynamicCounters>()
+ ).release());
+ Runtime.EnableScheduleForActor(TopicSession);
+
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(NActors::TEvents::TSystem::Bootstrap, 1);
+ UNIT_ASSERT(Runtime.DispatchEvents(options));
+ }
+
+ void TearDown(NUnitTest::TTestContext& /* context */) override {
+ }
+
+ void StartSession(TActorId readActorId, const NYql::NPq::NProto::TDqPqTopicSource& source, TMaybe<ui64> readOffset = Nothing()) {
+ auto event = new NFq::TEvRowDispatcher::TEvStartSession(
+ source,
+ PartitionId,
+ "Token",
+ readOffset, // readOffset,
+ 0, // StartingMessageTimestamp;
+ "QueryId");
+ Runtime.Send(new IEventHandle(TopicSession, readActorId, event));
+ }
+
+ NYql::NPq::NProto::TDqPqTopicSource BuildSource(TString topic, bool emptyPredicate = false) {
+ NYql::NPq::NProto::TDqPqTopicSource settings;
+ settings.SetEndpoint(GetDefaultPqEndpoint());
+ settings.SetTopicPath(topic);
+ settings.SetConsumerName("PqConsumer");
+ settings.MutableToken()->SetName("token");
+ settings.SetDatabase(GetDefaultPqDatabase());
+ settings.AddColumns("dt");
+ settings.AddColumns("value");
+ settings.AddColumnTypes("UInt64");
+ settings.AddColumnTypes("String");
+ if (!emptyPredicate) {
+ settings.SetPredicate("WHERE true");
+ }
+ return settings;
+ }
+
+ void StopSession(NActors::TActorId readActorId, const NYql::NPq::NProto::TDqPqTopicSource& source) {
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvStopSession>();
+ *event->Record.MutableSource() = source;
+ event->Record.SetPartitionId(PartitionId);
+ Runtime.Send(new IEventHandle(TopicSession, readActorId, event.release()));
+ }
+
+ void ExpectMessageBatch(NActors::TActorId readActorId, const std::vector<TString>& expected) {
+ auto eventHolder = Runtime.GrabEdgeEvent<TEvRowDispatcher::TEvMessageBatch>(RowDispatcherActorId, TDuration::Seconds(GrabTimeoutSec));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->ReadActorId == readActorId);
+ UNIT_ASSERT(expected.size() == eventHolder->Get()->Record.MessagesSize());
+ for (size_t i = 0; i < expected.size(); ++i) {
+ NFq::NRowDispatcherProto::TEvMessage message = eventHolder->Get()->Record.GetMessages(i);
+ std::cerr << "message.GetJson() " << message.GetJson() << std::endl;
+ UNIT_ASSERT(expected[i] == message.GetJson());
+ }
+ }
+
+ void ExpectSessionError(NActors::TActorId readActorId, TString message) {
+ auto eventHolder = Runtime.GrabEdgeEvent<TEvRowDispatcher::TEvSessionError>(RowDispatcherActorId, TDuration::Seconds(GrabTimeoutSec));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->ReadActorId == readActorId);
+ UNIT_ASSERT(TString(eventHolder->Get()->Record.GetMessage()).Contains(message));
+ }
+
+ void ExpectNewDataArrived(TSet<NActors::TActorId> readActorIds) {
+ size_t count = readActorIds.size();
+ for (size_t i = 0; i < count; ++i) {
+ auto eventHolder = Runtime.GrabEdgeEvent<TEvRowDispatcher::TEvNewDataArrived>(RowDispatcherActorId, TDuration::Seconds(GrabTimeoutSec));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(readActorIds.contains(eventHolder->Get()->ReadActorId));
+ readActorIds.erase(eventHolder->Get()->ReadActorId);
+ }
+ }
+
+ size_t ReadMessages(NActors::TActorId readActorId) {
+ Runtime.Send(new IEventHandle(TopicSession, readActorId, new TEvRowDispatcher::TEvGetNextBatch()));
+ auto eventHolder = Runtime.GrabEdgeEvent<TEvRowDispatcher::TEvMessageBatch>(RowDispatcherActorId, TDuration::Seconds(GrabTimeoutSec));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->ReadActorId == readActorId);
+ return eventHolder->Get()->Record.MessagesSize();
+ }
+
+ TActorSystemStub actorSystemStub;
+ NActors::TTestActorRuntime Runtime;
+ NActors::TActorId TopicSession;
+ NActors::TActorId RowDispatcherActorId;
+ NYdb::TDriver Driver = NYdb::TDriver(NYdb::TDriverConfig().SetLog(CreateLogBackend("cerr")));
+ std::shared_ptr<NYdb::ICredentialsProviderFactory> CredentialsProviderFactory;
+ NActors::TActorId ReadActorId1;
+ NActors::TActorId ReadActorId2;
+ ui64 PartitionId = 0;
+ NConfig::TRowDispatcherConfig Config;
+
+ const TString Json1 = "{\"dt\":100,\"value\":\"value1\"}";
+ const TString Json2 = "{\"dt\":200,\"value\":\"value2\"}";
+ const TString Json3 = "{\"dt\":300,\"value\":\"value3\"}";
+ const TString Json4 = "{\"dt\":400,\"value\":\"value4\"}";
+};
+
+Y_UNIT_TEST_SUITE(TopicSessionTests) {
+ Y_UNIT_TEST_F(TwoSessionsWithoutOffsets, TFixture) {
+ const TString topicName = "topic1";
+ PQCreateStream(topicName);
+ Init(topicName);
+ auto source = BuildSource(topicName);
+ StartSession(ReadActorId1, source);
+ StartSession(ReadActorId2, source);
+
+ const std::vector<TString> data = { Json1 };
+ PQWrite(data, topicName);
+ ExpectNewDataArrived({ReadActorId1, ReadActorId2});
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId1, new TEvRowDispatcher::TEvGetNextBatch()));
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId2, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId1, { Json1 });
+ ExpectMessageBatch(ReadActorId2, { Json1 });
+
+ StopSession(ReadActorId1, source);
+ StopSession(ReadActorId2, source);
+ }
+
+ Y_UNIT_TEST_F(SessionWithPredicateAndSessionWithoutPredicate, TFixture) {
+ const TString topicName = "topic2";
+ PQCreateStream(topicName);
+ Init(topicName);
+ auto source1 = BuildSource(topicName, false);
+ auto source2 = BuildSource(topicName, true);
+ StartSession(ReadActorId1, source1);
+ StartSession(ReadActorId2, source2);
+
+ const std::vector<TString> data = { Json1 };
+ PQWrite(data, topicName);
+ ExpectNewDataArrived({ReadActorId1, ReadActorId2});
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId1, new TEvRowDispatcher::TEvGetNextBatch()));
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId2, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId1, { Json1 });
+ ExpectMessageBatch(ReadActorId2, { Json1 });
+
+ StopSession(ReadActorId1, source1);
+ StopSession(ReadActorId2, source2);
+ }
+
+ Y_UNIT_TEST_F(SecondSessionWithoutOffsetsAfterSessionConnected, TFixture) {
+ const TString topicName = "topic3";
+ PQCreateStream(topicName);
+ Init(topicName);
+ auto source = BuildSource(topicName);
+ StartSession(ReadActorId1, source);
+
+ const std::vector<TString> data = { Json1 };
+ PQWrite(data, topicName);
+ ExpectNewDataArrived({ReadActorId1});
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId1, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId1, data);
+
+ StartSession(ReadActorId2, source);
+
+ const std::vector<TString> data2 = { Json2 };
+ PQWrite(data2, topicName);
+ ExpectNewDataArrived({ReadActorId1, ReadActorId2});
+
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId1, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId1, data2);
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId2, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId2, data2);
+
+ StopSession(ReadActorId1, source);
+ StopSession(ReadActorId2, source);
+ }
+
+ Y_UNIT_TEST_F(TwoSessionsWithOffsets, TFixture) {
+ const TString topicName = "topic4";
+ PQCreateStream(topicName);
+ Init(topicName);
+ auto source = BuildSource(topicName);
+ const std::vector<TString> data = { Json1, Json2, Json3};
+ PQWrite(data, topicName);
+
+ StartSession(ReadActorId1, source, 1);
+ StartSession(ReadActorId2, source, 2);
+
+ ExpectNewDataArrived({ReadActorId1, ReadActorId2});
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId1, new TEvRowDispatcher::TEvGetNextBatch()));
+ std::vector<TString> expected1 = { Json2, Json3};
+ ExpectMessageBatch(ReadActorId1, expected1);
+
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId2, new TEvRowDispatcher::TEvGetNextBatch()));
+ std::vector<TString> expected2 = { Json3 };
+ ExpectMessageBatch(ReadActorId2, expected2);
+
+ const std::vector<TString> data2 = { Json4 };
+ PQWrite(data2, topicName);
+ ExpectNewDataArrived({ReadActorId1, ReadActorId2});
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId1, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId1, data2);
+
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId2, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId2, data2);
+
+ StopSession(ReadActorId1, source);
+ StopSession(ReadActorId2, source);
+ }
+
+ Y_UNIT_TEST_F(BadDataSessionError, TFixture) {
+ const TString topicName = "topic5";
+ PQCreateStream(topicName);
+ Init(topicName);
+ auto source = BuildSource(topicName);
+ StartSession(ReadActorId1, source);
+
+ const std::vector<TString> data = { "not json", "noch einmal / nicht json" };
+ PQWrite(data, topicName);
+
+ ExpectSessionError(ReadActorId1, "Failed to unwrap empty optional");
+ StopSession(ReadActorId1, source);
+ }
+
+ Y_UNIT_TEST_F(RestartSessionIfNewClientWithOffset, TFixture) {
+ const TString topicName = "topic6";
+ PQCreateStream(topicName);
+ Init(topicName);
+ auto source = BuildSource(topicName);
+ StartSession(ReadActorId1, source);
+
+ const std::vector<TString> data = { Json1, Json2 }; // offset 0, 1
+ PQWrite(data, topicName);
+ ExpectNewDataArrived({ReadActorId1});
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId1, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId1, data);
+
+ // Restart topic session.
+ StartSession(ReadActorId2, source, 1);
+ ExpectNewDataArrived({ReadActorId2});
+
+ PQWrite({ Json3 }, topicName);
+ ExpectNewDataArrived({ReadActorId1});
+
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId1, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId1, { Json3 });
+
+ Runtime.Send(new IEventHandle(TopicSession, ReadActorId2, new TEvRowDispatcher::TEvGetNextBatch()));
+ ExpectMessageBatch(ReadActorId2, { Json2, Json3 });
+
+ StopSession(ReadActorId1, source);
+ StopSession(ReadActorId2, source);
+ }
+
+ Y_UNIT_TEST_F(ReadNonExistentTopic, TFixture) {
+ const TString topicName = "topic7";
+ Init(topicName);
+ auto source = BuildSource(topicName);
+ StartSession(ReadActorId1, source);
+ ExpectSessionError(ReadActorId1, "no path");
+ StopSession(ReadActorId1, source);
+ }
+
+ Y_UNIT_TEST_F(SlowSession, TFixture) {
+ const TString topicName = "topic8";
+ PQCreateStream(topicName);
+ Init(topicName, 50);
+ auto source = BuildSource(topicName);
+ StartSession(ReadActorId1, source);
+ StartSession(ReadActorId2, source);
+
+ size_t messagesSize = 5;
+ for (size_t i = 0; i < messagesSize; ++i) {
+ const std::vector<TString> data = { Json1 };
+ PQWrite(data, topicName);
+ }
+ ExpectNewDataArrived({ReadActorId1, ReadActorId2});
+
+ auto readMessages = ReadMessages(ReadActorId1);
+ UNIT_ASSERT(readMessages == messagesSize);
+
+ // Reading from yds is stopped.
+
+ for (size_t i = 0; i < messagesSize; ++i) {
+ const std::vector<TString> data = { Json1 };
+ PQWrite(data, topicName);
+ }
+ Sleep(TDuration::MilliSeconds(100));
+ Runtime.DispatchEvents({}, Runtime.GetCurrentTime() - TDuration::MilliSeconds(1));
+
+ readMessages = ReadMessages(ReadActorId1);
+ UNIT_ASSERT(readMessages == 0);
+
+ readMessages = ReadMessages(ReadActorId2);
+ UNIT_ASSERT(readMessages == messagesSize);
+
+ Sleep(TDuration::MilliSeconds(100));
+ Runtime.DispatchEvents({}, Runtime.GetCurrentTime() - TDuration::MilliSeconds(1));
+
+ readMessages = ReadMessages(ReadActorId1);
+ UNIT_ASSERT(readMessages == messagesSize);
+
+ readMessages = ReadMessages(ReadActorId2);
+ UNIT_ASSERT(readMessages == messagesSize);
+
+ StopSession(ReadActorId1, source);
+ StopSession(ReadActorId2, source);
+ }
+}
+
+}
+
diff --git a/ydb/core/fq/libs/row_dispatcher/ut/ya.make b/ydb/core/fq/libs/row_dispatcher/ut/ya.make
new file mode 100644
index 00000000000..bb66ec57798
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/ut/ya.make
@@ -0,0 +1,29 @@
+UNITTEST_FOR(ydb/core/fq/libs/row_dispatcher)
+
+INCLUDE(${ARCADIA_ROOT}/ydb/tests/tools/fq_runner/ydb_runner_with_datastreams.inc)
+
+SRCS(
+ coordinator_ut.cpp
+ json_filter_ut.cpp
+ json_parser_ut.cpp
+ leader_election_ut.cpp
+ row_dispatcher_ut.cpp
+ topic_session_ut.cpp
+)
+
+PEERDIR(
+ library/cpp/testing/unittest
+ ydb/core/fq/libs/row_dispatcher
+ ydb/core/testlib
+ ydb/core/testlib/actors
+ ydb/library/yql/udfs/common/json2
+ ydb/library/yql/udfs/common/yson2
+ ydb/tests/fq/pq_async_io
+ ydb/library/yql/sql/pg_dummy
+)
+
+SIZE(MEDIUM)
+
+YQL_LAST_ABI_VERSION()
+
+END()
diff --git a/ydb/core/fq/libs/row_dispatcher/ya.make b/ydb/core/fq/libs/row_dispatcher/ya.make
new file mode 100644
index 00000000000..f1f036d20dc
--- /dev/null
+++ b/ydb/core/fq/libs/row_dispatcher/ya.make
@@ -0,0 +1,39 @@
+LIBRARY()
+
+SRCS(
+ actors_factory.cpp
+ coordinator.cpp
+ json_filter.cpp
+ json_parser.cpp
+ leader_election.cpp
+ row_dispatcher_service.cpp
+ row_dispatcher.cpp
+ topic_session.cpp
+)
+
+PEERDIR(
+ contrib/libs/fmt
+ ydb/core/fq/libs/actors/logging
+ ydb/core/fq/libs/config/protos
+ ydb/core/fq/libs/control_plane_storage
+ ydb/core/fq/libs/row_dispatcher/events
+ ydb/core/fq/libs/shared_resources
+ ydb/core/fq/libs/ydb
+ ydb/library/actors/core
+ ydb/library/security
+ ydb/library/yql/dq/actors/common
+ ydb/library/yql/dq/actors/compute
+ ydb/library/yql/dq/proto
+ ydb/library/yql/providers/pq/provider
+ ydb/library/yql/public/purecalc/common/no_pg_wrapper
+ ydb/public/sdk/cpp/client/ydb_scheme
+ ydb/public/sdk/cpp/client/ydb_table
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/core/fq/libs/ya.make b/ydb/core/fq/libs/ya.make
index cf1038e7f40..34d75300f89 100644
--- a/ydb/core/fq/libs/ya.make
+++ b/ydb/core/fq/libs/ya.make
@@ -29,6 +29,7 @@ RECURSE(
rate_limiter
read_rule
result_formatter
+ row_dispatcher
shared_resources
signer
tasks_packer
diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp
index d0ee18636b8..0392fe10886 100644
--- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp
+++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp
@@ -1052,7 +1052,7 @@ private:
google::protobuf::Any& settings = *externalSource.MutableSettings();
TString& sourceType = *externalSource.MutableType();
- dqIntegration->FillSourceSettings(source.Ref(), settings, sourceType, maxTasksPerStage);
+ dqIntegration->FillSourceSettings(source.Ref(), settings, sourceType, maxTasksPerStage, ctx);
YQL_ENSURE(!settings.type_url().empty(), "Data source provider \"" << dataSourceCategory << "\" didn't fill dq source settings for its dq source node");
YQL_ENSURE(sourceType, "Data source provider \"" << dataSourceCategory << "\" didn't fill dq source settings type for its dq source node");
}
diff --git a/ydb/library/services/services.proto b/ydb/library/services/services.proto
index 075cae1c12e..ba3a64077ad 100644
--- a/ydb/library/services/services.proto
+++ b/ydb/library/services/services.proto
@@ -323,6 +323,7 @@ enum EServiceKikimr {
YQ_CONTROL_PLANE_STORAGE = 1021;
YQ_CONTROL_PLANE_PROXY= 1022;
YQ_TEST_CONNECTION = 1023;
+ FQ_ROW_DISPATCHER = 1024;
YQ_AUDIT = 1150;
YQ_AUDIT_EVENT_SENDER = 1151;
YQ_HEALTH = 1152;
diff --git a/ydb/library/yql/dq/actors/compute/retry_queue.cpp b/ydb/library/yql/dq/actors/common/retry_queue.cpp
index 243510dda24..1209f954d5e 100644
--- a/ydb/library/yql/dq/actors/compute/retry_queue.cpp
+++ b/ydb/library/yql/dq/actors/common/retry_queue.cpp
@@ -1,15 +1,24 @@
#include "retry_queue.h"
#include <util/generic/utility.h>
+#include <ydb/library/actors/core/log.h>
namespace NYql::NDq {
-void TRetryEventsQueue::Init(const TTxId& txId, const NActors::TActorId& senderId, const NActors::TActorId& selfId, ui64 eventQueueId) {
+const ui64 PingPeriodSeconds = 2;
+
+void TRetryEventsQueue::Init(
+ const TTxId& txId,
+ const NActors::TActorId& senderId,
+ const NActors::TActorId& selfId,
+ ui64 eventQueueId,
+ bool keepAlive) {
TxId = txId;
SenderId = senderId;
SelfId = selfId;
Y_ASSERT(SelfId.NodeId() == SenderId.NodeId());
EventQueueId = eventQueueId;
+ KeepAlive = keepAlive;
}
void TRetryEventsQueue::OnNewRecipientId(const NActors::TActorId& recipientId, bool unsubscribe) {
@@ -44,6 +53,9 @@ void TRetryEventsQueue::HandleNodeConnected(ui32 nodeId) {
SendRetryable(ev);
}
}
+ if (KeepAlive) {
+ SchedulePing();
+ }
}
}
@@ -54,6 +66,14 @@ bool TRetryEventsQueue::HandleUndelivered(NActors::TEvents::TEvUndelivered::TPtr
return true;
}
+ if (ev->Sender == RecipientId && ev->Get()->Reason == NActors::TEvents::TEvUndelivered::ReasonActorUnknown) {
+ if (KeepAlive) {
+ NActors::TActivationContext::Send(
+ new NActors::IEventHandle(SelfId, SelfId, new TEvRetryQueuePrivate::TEvSessionClosed(EventQueueId), 0, 0));
+ }
+ return true;
+ }
+
return false;
}
@@ -64,10 +84,28 @@ void TRetryEventsQueue::Retry() {
}
}
+void TRetryEventsQueue::Ping() {
+ PingScheduled = false;
+
+ if (!Connected) {
+ return;
+ }
+
+ if (TInstant::Now() - LastReceivedDataTime < TDuration::Seconds(PingPeriodSeconds)) {
+ SchedulePing();
+ return;
+ }
+
+ auto ev = MakeHolder<NActors::TEvents::TEvPing>();
+ NActors::TActivationContext::Send(new NActors::IEventHandle(RecipientId, SenderId, ev.Release(), NActors::IEventHandle::FlagTrackDelivery));
+ SchedulePing();
+}
+
void TRetryEventsQueue::Connect() {
auto connectEvent = MakeHolder<NActors::TEvInterconnect::TEvConnectNode>();
+ auto proxyId = NActors::TActivationContext::InterconnectProxy(RecipientId.NodeId());
NActors::TActivationContext::Send(
- new NActors::IEventHandle(NActors::TActivationContext::InterconnectProxy(RecipientId.NodeId()), SenderId, connectEvent.Release(), 0, 0));
+ new NActors::IEventHandle(proxyId, SenderId, connectEvent.Release(), 0, 0));
}
void TRetryEventsQueue::Unsubscribe() {
@@ -97,14 +135,25 @@ void TRetryEventsQueue::SendRetryable(const IRetryableEvent::TPtr& ev) {
}
void TRetryEventsQueue::ScheduleRetry() {
- if (!RetryScheduled && !Events.empty()) {
- RetryScheduled = true;
- if (!RetryState) {
- RetryState.ConstructInPlace();
- }
- auto ev = MakeHolder<TEvRetryQueuePrivate::TEvRetry>(EventQueueId);
- NActors::TActivationContext::Schedule(RetryState->GetNextDelay(), new NActors::IEventHandle(SelfId, SelfId, ev.Release()));
+ if (RetryScheduled) {
+ return;
+ }
+ RetryScheduled = true;
+ if (!RetryState) {
+ RetryState.ConstructInPlace();
}
+ auto ev = MakeHolder<TEvRetryQueuePrivate::TEvRetry>(EventQueueId);
+ NActors::TActivationContext::Schedule(RetryState->GetNextDelay(), new NActors::IEventHandle(SelfId, SelfId, ev.Release()));
+}
+
+void TRetryEventsQueue::SchedulePing() {
+ if (!KeepAlive || PingScheduled) {
+ return;
+ }
+
+ PingScheduled = true;
+ auto ev = MakeHolder<TEvRetryQueuePrivate::TEvPing>(EventQueueId);
+ NActors::TActivationContext::Schedule(TDuration::Seconds(PingPeriodSeconds), new NActors::IEventHandle(SelfId, SelfId, ev.Release()));
}
TDuration TRetryEventsQueue::TRetryState::GetNextDelay() {
@@ -120,4 +169,10 @@ TDuration TRetryEventsQueue::TRetryState::RandomizeDelay(TDuration baseDelay) {
return TDuration::FromValue(half + RandomNumber<TDuration::TValue>(half));
}
+void TRetryEventsQueue::PrintInternalState(TStringStream& stream) const {
+ stream << "RetryQueue: id " << EventQueueId << ", NextSeqNo "
+ << NextSeqNo << ", MyConfirmedSeqNo " << MyConfirmedSeqNo << ", SeqNos " << ReceivedEventsSeqNos.size() << ", events size " << Events.size() << "\n";
+}
+
+
} // namespace NYql::NDq
diff --git a/ydb/library/yql/dq/actors/compute/retry_queue.h b/ydb/library/yql/dq/actors/common/retry_queue.h
index 875aef00c99..5c9a8f317bf 100644
--- a/ydb/library/yql/dq/actors/compute/retry_queue.h
+++ b/ydb/library/yql/dq/actors/common/retry_queue.h
@@ -9,6 +9,7 @@
#include <util/generic/yexception.h>
#include <util/system/types.h>
+#include <util/datetime/base.h>
namespace NYql::NDq {
@@ -16,9 +17,9 @@ struct TEvRetryQueuePrivate {
// Event ids.
enum EEv : ui32 {
EvBegin = EventSpaceBegin(NActors::TEvents::ES_PRIVATE),
-
EvRetry = EvBegin,
-
+ EvPing,
+ EvSessionClosed, // recipientId does not exist anymore
EvEnd
};
@@ -29,9 +30,22 @@ struct TEvRetryQueuePrivate {
struct TEvRetry : NActors::TEventLocal<TEvRetry, EvRetry> {
explicit TEvRetry(ui64 eventQueueId)
: EventQueueId(eventQueueId)
- {
- }
+ { }
+ const ui64 EventQueueId;
+ };
+ struct TEvPing : NActors::TEventLocal<TEvPing, EvPing> {
+ explicit TEvPing(ui64 eventQueueId)
+ : EventQueueId(eventQueueId)
+ { }
+ const ui64 EventQueueId;
+ };
+
+
+ struct TEvSessionClosed : NActors::TEventLocal<TEvSessionClosed, EvSessionClosed> {
+ explicit TEvSessionClosed(ui64 eventQueueId)
+ : EventQueueId(eventQueueId)
+ { }
const ui64 EventQueueId;
};
@@ -55,6 +69,7 @@ template <class T>
concept TProtobufEventWithTransportMeta = TProtobufEvent<T> && THasTransportMeta<T>;
class TRetryEventsQueue {
+
public:
class IRetryableEvent : public TSimpleRefCount<IRetryableEvent> {
public:
@@ -64,7 +79,9 @@ public:
virtual ui64 GetSeqNo() const = 0;
};
- void Init(const TTxId& txId, const NActors::TActorId& senderId, const NActors::TActorId& selfId, ui64 eventQueueId = 0);
+ TRetryEventsQueue() {}
+
+ void Init(const TTxId& txId, const NActors::TActorId& senderId, const NActors::TActorId& selfId, ui64 eventQueueId = 0, bool keepAlive = false);
template <TProtobufEventWithTransportMeta T>
void Send(T* ev, ui64 cookie = 0) {
@@ -93,6 +110,7 @@ public:
template <TProtobufEventWithTransportMeta T>
bool OnEventReceived(const T* ev) { // Returns true if event was not processed (== it was received first time).
+ LastReceivedDataTime = TInstant::Now();
if (LocalRecipient) {
return true;
}
@@ -120,7 +138,7 @@ public:
}
return false;
}
-
+
bool RemoveConfirmedEvents() {
RemoveConfirmedEvents(MyConfirmedSeqNo);
return !Events.empty();
@@ -131,7 +149,9 @@ public:
void HandleNodeDisconnected(ui32 nodeId);
bool HandleUndelivered(NActors::TEvents::TEvUndelivered::TPtr& ev);
void Retry();
+ void Ping();
void Unsubscribe();
+ void PrintInternalState(TStringStream& stream) const;
private:
template <TProtobufEventWithTransportMeta T>
@@ -144,6 +164,7 @@ private:
void RemoveConfirmedEvents(ui64 confirmedSeqNo);
void SendRetryable(const IRetryableEvent::TPtr& ev);
void ScheduleRetry();
+ void SchedulePing();
void Connect();
private:
@@ -199,8 +220,11 @@ private:
std::set<ui64> ReceivedEventsSeqNos;
bool Connected = false;
bool RetryScheduled = false;
+ bool PingScheduled = false;
TMaybe<TRetryState> RetryState;
TTxId TxId;
+ bool KeepAlive = false;
+ TInstant LastReceivedDataTime = TInstant::Now();
};
} // namespace NYql::NDq
diff --git a/ydb/library/yql/dq/actors/common/ut/retry_events_queue_ut.cpp b/ydb/library/yql/dq/actors/common/ut/retry_events_queue_ut.cpp
new file mode 100644
index 00000000000..a1b45ccf34b
--- /dev/null
+++ b/ydb/library/yql/dq/actors/common/ut/retry_events_queue_ut.cpp
@@ -0,0 +1,191 @@
+#include <library/cpp/testing/unittest/registar.h>
+#include <ydb/core/testlib/actors/test_runtime.h>
+#include <ydb/core/testlib/actor_helpers.h>
+#include <ydb/library/actors/core/actor_bootstrapped.h>
+#include <ydb/library/yql/dq/actors/common/retry_queue.h>
+#include <ydb/core/testlib/basics/appdata.h>
+#include <ydb/library/actors/interconnect/interconnect_impl.h>
+#include <ydb/library/yql/dq/actors/compute/dq_compute_actor.h>
+#include <ydb/core/testlib/tablet_helpers.h>
+#include <chrono>
+#include <thread>
+
+
+using namespace NActors;
+using namespace NYql::NDq;
+
+namespace {
+
+const ui64 EventQueueId = 777;
+
+struct TEvPrivate {
+ // Event ids
+ enum EEv : ui32 {
+ EvBegin = EventSpaceBegin(NActors::TEvents::ES_PRIVATE),
+ EvSend = EvBegin + 10,
+ EvData,
+ EvDisconnect,
+ EvEnd
+ };
+ static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)");
+ struct TEvSend : public TEventLocal<TEvSend, EvSend> {};
+ struct TEvData : public TEventLocal<TEvData, EvData> {};
+ struct TEvDisconnect : public TEventLocal<TEvDisconnect, EvDisconnect> {};
+};
+
+
+class ClientActor : public TActorBootstrapped<ClientActor> {
+public:
+ ClientActor(
+ NActors::TActorId clientEdgeActorId,
+ NActors::TActorId serverActorId)
+ : ServerActorId(serverActorId)
+ , ClientEdgeActorId(clientEdgeActorId) {}
+
+ void Bootstrap() {
+ Become(&ClientActor::StateFunc);
+ Init();
+ }
+
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr& ) {
+ EventsQueue.Retry();
+ }
+
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvPing::TPtr& ) {
+ EventsQueue.Ping();
+ }
+
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed::TPtr& ) {
+ Send(ClientEdgeActorId, new TEvPrivate::TEvDisconnect());
+ }
+
+ void Handle(const TEvPrivate::TEvSend::TPtr& ) {
+ EventsQueue.Send(new TEvDqCompute::TEvInjectCheckpoint());
+ }
+
+ void HandleDisconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
+ EventsQueue.HandleNodeDisconnected(ev->Get()->NodeId);
+ }
+
+ void HandleConnected(TEvInterconnect::TEvNodeConnected::TPtr& ev) {
+ EventsQueue.HandleNodeConnected(ev->Get()->NodeId);
+ }
+
+ void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) {
+ EventsQueue.HandleUndelivered(ev);
+ }
+
+ STRICT_STFUNC(StateFunc,
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvRetry, Handle);
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvPing, Handle);
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed, Handle);
+ hFunc(TEvPrivate::TEvSend, Handle);
+ hFunc(TEvInterconnect::TEvNodeConnected, HandleConnected);
+ hFunc(TEvInterconnect::TEvNodeDisconnected, HandleDisconnected);
+ hFunc(NActors::TEvents::TEvUndelivered, Handle);
+ )
+
+ void Init() {
+ EventsQueue.Init("TxId", SelfId(), SelfId(), EventQueueId, true /*KeepAlive*/);
+ EventsQueue.OnNewRecipientId(ServerActorId);
+ }
+
+ NYql::NDq::TRetryEventsQueue EventsQueue;
+ NActors::TActorId ServerActorId;
+ NActors::TActorId ClientEdgeActorId;
+};
+
+class ServerActor : public TActorBootstrapped<ServerActor> {
+public:
+ ServerActor(NActors::TActorId serverEdgeActorId)
+ : ServerEdgeActorId(serverEdgeActorId) {}
+
+ void Bootstrap() {
+ Become(&ServerActor::StateFunc);
+ }
+
+ STRICT_STFUNC(StateFunc,
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvRetry, Handle);
+ hFunc(TEvInterconnect::TEvNodeConnected, HandleConnected);
+ hFunc(TEvInterconnect::TEvNodeDisconnected, HandleDisconnected);
+ hFunc(NActors::TEvents::TEvUndelivered, Handle);
+ hFunc(TEvDqCompute::TEvInjectCheckpoint, Handle);
+ hFunc(TEvents::TEvPoisonPill, Handle);
+ )
+
+ void Handle(const TEvents::TEvPoisonPill::TPtr& ) {
+ PassAway();
+ }
+
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr& ) {
+ EventsQueue.Retry();
+ }
+
+ void Handle(const TEvDqCompute::TEvInjectCheckpoint::TPtr& /*ev*/) {
+ Send(ServerEdgeActorId, new TEvDqCompute::TEvInjectCheckpoint());
+ }
+
+ void HandleDisconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
+ EventsQueue.HandleNodeDisconnected(ev->Get()->NodeId);
+ }
+
+ void HandleConnected(TEvInterconnect::TEvNodeConnected::TPtr& ev) {
+ EventsQueue.HandleNodeConnected(ev->Get()->NodeId);
+ }
+
+ void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) {
+ EventsQueue.HandleUndelivered(ev);
+ }
+
+ NYql::NDq::TRetryEventsQueue EventsQueue;
+ NActors::TActorId ServerEdgeActorId;
+};
+
+struct TRuntime: public NActors::TTestBasicRuntime
+{
+public:
+ TRuntime()
+ : NActors::TTestBasicRuntime(2, true){
+ Initialize(NKikimr::TAppPrepare().Unwrap());
+ SetLogPriority(NKikimrServices::FQ_ROW_DISPATCHER, NLog::PRI_DEBUG);
+
+ ClientEdgeActorId = AllocateEdgeActor(0);
+ ServerEdgeActorId = AllocateEdgeActor(1);
+
+ Server = new ServerActor(ServerEdgeActorId);
+ ServerActorId = Register(Server, 1);
+ EnableScheduleForActor(ServerActorId, true);
+
+ Client = new ClientActor(ClientEdgeActorId, ServerActorId);
+ ClientActorId = Register(Client, 0);
+ EnableScheduleForActor(ClientActorId, true);
+ }
+
+ ClientActor* Client;
+ ServerActor* Server;
+ NActors::TActorId ClientActorId;
+ NActors::TActorId ServerActorId;
+ NActors::TActorId ClientEdgeActorId;
+ NActors::TActorId ServerEdgeActorId;
+};
+
+Y_UNIT_TEST_SUITE(TRetryEventsQueueTest) {
+ Y_UNIT_TEST(SendDisconnectAfterPoisonPill) {
+ TRuntime runtime;
+
+ runtime.Send(new IEventHandle(
+ runtime.ClientActorId,
+ runtime.ClientEdgeActorId,
+ new TEvPrivate::TEvSend()));
+
+ TEvDqCompute::TEvInjectCheckpoint::TPtr event = runtime.GrabEdgeEvent<TEvDqCompute::TEvInjectCheckpoint>(runtime.ServerEdgeActorId);
+ UNIT_ASSERT(event);
+
+ runtime.Send(runtime.ServerActorId, runtime.ServerEdgeActorId, new TEvents::TEvPoisonPill());
+
+ TEvPrivate::TEvDisconnect::TPtr disconnectEvent = runtime.GrabEdgeEvent<TEvPrivate::TEvDisconnect>(runtime.ClientEdgeActorId);
+ UNIT_ASSERT(disconnectEvent);
+ }
+}
+
+}
diff --git a/ydb/library/yql/dq/actors/common/ut/ya.make b/ydb/library/yql/dq/actors/common/ut/ya.make
new file mode 100644
index 00000000000..bdc87264c39
--- /dev/null
+++ b/ydb/library/yql/dq/actors/common/ut/ya.make
@@ -0,0 +1,16 @@
+UNITTEST_FOR(ydb/library/yql/dq/actors/common)
+
+SRCS(
+ retry_events_queue_ut.cpp
+)
+
+PEERDIR(
+ library/cpp/testing/unittest
+ ydb/core/testlib/actors
+ ydb/core/testlib
+ ydb/library/yql/sql/pg_dummy
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
diff --git a/ydb/library/yql/dq/actors/common/ya.make b/ydb/library/yql/dq/actors/common/ya.make
new file mode 100644
index 00000000000..9311daae3a9
--- /dev/null
+++ b/ydb/library/yql/dq/actors/common/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+SRCS(
+ retry_queue.cpp
+)
+
+PEERDIR(
+ ydb/library/actors/core
+ ydb/library/yql/dq/actors/protos
+ ydb/library/yql/public/issue
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h
index 21bc494fef8..27a4ad7a840 100644
--- a/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h
+++ b/ydb/library/yql/dq/actors/compute/dq_compute_actor_checkpoints.h
@@ -2,8 +2,8 @@
#include "dq_compute_actor.h"
#include "dq_compute_actor_async_io.h"
-#include "retry_queue.h"
+#include <ydb/library/yql/dq/actors/common/retry_queue.h>
#include <ydb/library/yql/dq/common/dq_common.h>
#include <ydb/library/actors/core/log.h>
diff --git a/ydb/library/yql/dq/actors/compute/ya.make b/ydb/library/yql/dq/actors/compute/ya.make
index 3cec159c246..018af068e57 100644
--- a/ydb/library/yql/dq/actors/compute/ya.make
+++ b/ydb/library/yql/dq/actors/compute/ya.make
@@ -11,7 +11,6 @@ SRCS(
dq_compute_actor_watermarks.cpp
dq_compute_actor.cpp
dq_compute_issues_buffer.cpp
- retry_queue.cpp
dq_request_context.h
dq_request_context.cpp
)
@@ -21,11 +20,12 @@ PEERDIR(
ydb/library/actors/wilson/protos
ydb/library/services
ydb/library/ydb_issue/proto
+ ydb/library/yql/dq/actors/common
+ ydb/library/yql/dq/actors/spilling
ydb/library/yql/dq/common
ydb/library/yql/dq/proto
ydb/library/yql/dq/runtime
ydb/library/yql/dq/tasks
- ydb/library/yql/dq/actors/spilling
ydb/library/yql/minikql
ydb/library/yql/minikql/comp_nodes
ydb/library/yql/public/issue
diff --git a/ydb/library/yql/dq/integration/yql_dq_integration.h b/ydb/library/yql/dq/integration/yql_dq_integration.h
index 202486f6555..2534ad0e993 100644
--- a/ydb/library/yql/dq/integration/yql_dq_integration.h
+++ b/ydb/library/yql/dq/integration/yql_dq_integration.h
@@ -63,7 +63,7 @@ public:
virtual bool CanBlockRead(const NNodes::TExprBase& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) = 0;
virtual void RegisterMkqlCompiler(NCommon::TMkqlCallableCompilerBase& compiler) = 0;
virtual bool CanFallback() = 0;
- virtual void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType, size_t maxPartitions) = 0;
+ virtual void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType, size_t maxPartitions, TExprContext& ctx) = 0;
virtual void FillLookupSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType) = 0;
virtual void FillSinkSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sinkType) = 0;
virtual void FillTransformSettings(const TExprNode& node, ::google::protobuf::Any& settings) = 0;
diff --git a/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp b/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp
index f5aa18638eb..0f997f1e449 100644
--- a/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp
+++ b/ydb/library/yql/providers/clickhouse/provider/yql_clickhouse_dq_integration.cpp
@@ -75,7 +75,7 @@ public:
return 0ULL;
}
- void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override {
+ void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t, TExprContext&) override {
const TDqSource source(&node);
if (const auto maySettings = source.Settings().Maybe<TClSourceSettings>()) {
const auto settings = maySettings.Cast();
diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp
index 630181553d5..600e3c72e25 100644
--- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp
+++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.cpp
@@ -56,7 +56,7 @@ bool TDqIntegrationBase::CanFallback() {
return false;
}
-void TDqIntegrationBase::FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString&, size_t) {
+void TDqIntegrationBase::FillSourceSettings(const TExprNode&, ::google::protobuf::Any&, TString&, size_t, TExprContext&) {
}
void TDqIntegrationBase::FillLookupSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType) {
diff --git a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h
index 9372ebcbeba..5e1c77cce94 100644
--- a/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h
+++ b/ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h
@@ -19,7 +19,7 @@ public:
bool CanBlockRead(const NNodes::TExprBase& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) override;
TExprNode::TPtr WrapWrite(const TExprNode::TPtr& write, TExprContext& ctx) override;
bool CanFallback() override;
- void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType, size_t) override;
+ void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType, size_t, TExprContext&) override;
void FillLookupSourceSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sourceType) override;
void FillSinkSettings(const TExprNode& node, ::google::protobuf::Any& settings, TString& sinkType) override;
void FillTransformSettings(const TExprNode& node, ::google::protobuf::Any& settings) override;
diff --git a/ydb/library/yql/providers/common/proto/gateways_config.proto b/ydb/library/yql/providers/common/proto/gateways_config.proto
index 61d9082f528..dbb433a4e2f 100644
--- a/ydb/library/yql/providers/common/proto/gateways_config.proto
+++ b/ydb/library/yql/providers/common/proto/gateways_config.proto
@@ -326,6 +326,7 @@ message TPqClusterConfig {
optional bool AddBearerToToken = 11; // whether to use prefix "Bearer " in token
optional string DatabaseId = 12;
repeated TAttr Settings = 100;
+ optional bool SharedReading = 101;
}
message TPqGatewayConfig {
diff --git a/ydb/library/yql/providers/common/pushdown/physical_opt.cpp b/ydb/library/yql/providers/common/pushdown/physical_opt.cpp
new file mode 100644
index 00000000000..602ab6c8f0b
--- /dev/null
+++ b/ydb/library/yql/providers/common/pushdown/physical_opt.cpp
@@ -0,0 +1,73 @@
+#include "predicate_node.h"
+
+#include <ydb/library/yql/utils/log/log.h>
+#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
+#include <ydb/library/yql/providers/common/provider/yql_provider.h>
+#include <ydb/library/yql/providers/common/pushdown/collection.h>
+
+namespace NYql::NPushdown {
+
+using namespace NNodes;
+
+namespace {
+
+TPredicateNode SplitForPartialPushdown(
+ const NPushdown::TPredicateNode& predicateTree,
+ TExprContext& ctx,
+ TPositionHandle pos) {
+ if (predicateTree.CanBePushed) {
+ return predicateTree;
+ }
+
+ if (predicateTree.Op != NPushdown::EBoolOp::And) {
+ return NPushdown::TPredicateNode(); // Not valid, => return the same node from optimizer
+ }
+
+ std::vector<NPushdown::TPredicateNode> pushable;
+ for (auto& predicate : predicateTree.Children) {
+ if (predicate.CanBePushed) {
+ pushable.emplace_back(predicate);
+ }
+ }
+ NPushdown::TPredicateNode predicateToPush;
+ predicateToPush.SetPredicates(pushable, ctx, pos);
+ return predicateToPush;
+}
+
+}
+
+TMaybeNode<TCoLambda> MakePushdownPredicate(const TCoLambda& lambda, TExprContext& ctx, const TPositionHandle& pos, const TSettings& settings) {
+ auto lambdaArg = lambda.Args().Arg(0).Ptr();
+
+ YQL_LOG(TRACE) << "Push filter. Initial filter lambda: " << NCommon::ExprToPrettyString(ctx, lambda.Ref());
+
+ auto maybeOptionalIf = lambda.Body().Maybe<TCoOptionalIf>();
+ if (!maybeOptionalIf.IsValid()) { // Nothing to push
+ return {};
+ }
+
+ TCoOptionalIf optionalIf = maybeOptionalIf.Cast();
+ NPushdown::TPredicateNode predicateTree(optionalIf.Predicate());
+ NPushdown::CollectPredicates(optionalIf.Predicate(), predicateTree, lambdaArg.Get(), TExprBase(lambdaArg), settings);
+ YQL_ENSURE(predicateTree.IsValid(), "Collected filter predicates are invalid");
+
+ NPushdown::TPredicateNode predicateToPush = SplitForPartialPushdown(predicateTree, ctx, pos);
+ if (!predicateToPush.IsValid()) {
+ return {};
+ }
+
+ // clang-format off
+ auto newFilterLambda = Build<TCoLambda>(ctx, pos)
+ .Args({"filter_row"})
+ .Body<TExprApplier>()
+ .Apply(predicateToPush.ExprNode.Cast())
+ .With(TExprBase(lambdaArg), "filter_row")
+ .Build()
+ .Done();
+ // clang-format on
+
+ YQL_LOG(INFO) << "Push filter lambda: " << NCommon::ExprToPrettyString(ctx, *newFilterLambda.Ptr());
+ return newFilterLambda;
+}
+
+} // namespace NYql::NPushdown
diff --git a/ydb/library/yql/providers/common/pushdown/physical_opt.h b/ydb/library/yql/providers/common/pushdown/physical_opt.h
new file mode 100644
index 00000000000..f4a9bbe1906
--- /dev/null
+++ b/ydb/library/yql/providers/common/pushdown/physical_opt.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/ast/yql_pos_handle.h>
+#include <ydb/library/yql/core/expr_nodes_gen/yql_expr_nodes_gen.h>
+
+namespace NYql::NPushdown {
+
+NNodes::TMaybeNode<NNodes::TCoLambda> MakePushdownPredicate(const NNodes::TCoLambda& lambda, TExprContext& ctx, const TPositionHandle& pos, const TSettings& settings);
+
+} // namespace NYql::NPushdown
diff --git a/ydb/library/yql/providers/common/pushdown/type_ann.cpp b/ydb/library/yql/providers/common/pushdown/type_ann.cpp
new file mode 100644
index 00000000000..5ba21286a39
--- /dev/null
+++ b/ydb/library/yql/providers/common/pushdown/type_ann.cpp
@@ -0,0 +1,36 @@
+#include "predicate_node.h"
+
+#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
+#include <ydb/library/yql/core/yql_expr_type_annotation.h>
+
+namespace NYql::NPushdown {
+
+IGraphTransformer::TStatus AnnotateFilterPredicate(const TExprNode::TPtr& input, size_t childIndex, const TStructExprType* itemType, TExprContext& ctx) {
+ if (childIndex >= input->ChildrenSize()) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ auto& filterLambda = input->ChildRef(childIndex);
+ if (!EnsureLambda(*filterLambda, ctx)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!UpdateLambdaAllArgumentsTypes(filterLambda, {itemType}, ctx)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (const auto* filterLambdaType = filterLambda->GetTypeAnn()) {
+ if (filterLambdaType->GetKind() != ETypeAnnotationKind::Data) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ const TDataExprType* dataExprType = static_cast<const TDataExprType*>(filterLambdaType);
+ if (dataExprType->GetSlot() != EDataSlot::Bool) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ } else {
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ return IGraphTransformer::TStatus::Ok;
+}
+
+} // namespace NYql::NPushdown
diff --git a/ydb/library/yql/providers/common/pushdown/type_ann.h b/ydb/library/yql/providers/common/pushdown/type_ann.h
new file mode 100644
index 00000000000..4d879674fb0
--- /dev/null
+++ b/ydb/library/yql/providers/common/pushdown/type_ann.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <ydb/library/yql/ast/yql_expr.h>
+#include <ydb/library/yql/ast/yql_pos_handle.h>
+#include <ydb/library/yql/core/expr_nodes_gen/yql_expr_nodes_gen.h>
+#include <ydb/library/yql/core/yql_graph_transformer.h>
+
+namespace NYql::NPushdown {
+
+IGraphTransformer::TStatus AnnotateFilterPredicate(
+ const TExprNode::TPtr& input,
+ size_t childIndex,
+ const TStructExprType* itemType,
+ TExprContext& ctx);
+
+
+} // namespace NYql::NPushdown
diff --git a/ydb/library/yql/providers/common/pushdown/ya.make b/ydb/library/yql/providers/common/pushdown/ya.make
index f488c383a94..626babe45a1 100644
--- a/ydb/library/yql/providers/common/pushdown/ya.make
+++ b/ydb/library/yql/providers/common/pushdown/ya.make
@@ -4,6 +4,8 @@ SRCS(
collection.cpp
predicate_node.cpp
settings.cpp
+ type_ann.cpp
+ physical_opt.cpp
)
PEERDIR(
diff --git a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h
index c74c9bb201b..86c6d49eab5 100644
--- a/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h
+++ b/ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h
@@ -127,6 +127,7 @@ public:
public:
IDqComputeActorAsyncInput* DqAsyncInput = nullptr;
IDqComputeActorAsyncOutput* DqAsyncOutput = nullptr;
+ std::optional<NActors::TActorId> DqAsyncInputActorId;
private:
STRICT_STFUNC(StateFunc,
@@ -164,7 +165,6 @@ public:
NKikimr::NMiniKQL::TDefaultValueBuilder ValueBuilder;
private:
- std::optional<NActors::TActorId> DqAsyncInputActorId;
IActor* DqAsyncInputAsActor = nullptr;
std::optional<NActors::TActorId> DqAsyncOutputActorId;
diff --git a/ydb/library/yql/providers/dq/planner/execution_planner.cpp b/ydb/library/yql/providers/dq/planner/execution_planner.cpp
index 793ab143295..5d713dc1eff 100644
--- a/ydb/library/yql/providers/dq/planner/execution_planner.cpp
+++ b/ydb/library/yql/providers/dq/planner/execution_planner.cpp
@@ -549,7 +549,7 @@ namespace NYql::NDqs {
TString sourceType;
if (dqSource) {
sourceSettings.ConstructInPlace();
- dqIntegration->FillSourceSettings(*read, *sourceSettings, sourceType, maxPartitions);
+ dqIntegration->FillSourceSettings(*read, *sourceSettings, sourceType, maxPartitions, ExprContext);
YQL_ENSURE(!sourceSettings->type_url().empty(), "Data source provider \"" << dataSourceName << "\" did't fill dq source settings for its dq source node");
YQL_ENSURE(sourceType, "Data source provider \"" << dataSourceName << "\" did't fill dq source settings type for its dq source node");
}
diff --git a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp
index 48bb17d5267..937b5b0c60a 100644
--- a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp
+++ b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp
@@ -180,7 +180,7 @@ public:
.Ptr();
::google::protobuf::Any settings;
TString sourceType;
- dqIntegration->FillSourceSettings(*dqSourceNode, settings, sourceType, 1);
+ dqIntegration->FillSourceSettings(*dqSourceNode, settings, sourceType, 1, ctx);
UNIT_ASSERT_STRINGS_EQUAL(sourceType, "PostgreSqlGeneric");
UNIT_ASSERT(settings.Is<Generic::TSource>());
settings.UnpackTo(DqSourceSettings_);
diff --git a/ydb/library/yql/providers/generic/provider/ya.make b/ydb/library/yql/providers/generic/provider/ya.make
index 7d50ad5b716..4d21e4fb004 100644
--- a/ydb/library/yql/providers/generic/provider/ya.make
+++ b/ydb/library/yql/providers/generic/provider/ya.make
@@ -32,6 +32,7 @@ PEERDIR(
library/cpp/json
library/cpp/random_provider
library/cpp/time_provider
+ ydb/core/fq/libs/common
ydb/core/fq/libs/result_formatter
ydb/library/yql/ast
ydb/library/yql/core
diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_datasource_type_ann.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_datasource_type_ann.cpp
index c306d44b7ed..791c9ecb7f8 100644
--- a/ydb/library/yql/providers/generic/provider/yql_generic_datasource_type_ann.cpp
+++ b/ydb/library/yql/providers/generic/provider/yql_generic_datasource_type_ann.cpp
@@ -6,6 +6,7 @@
#include <ydb/library/yql/providers/common/provider/yql_data_provider_impl.h>
#include <ydb/library/yql/providers/common/provider/yql_provider.h>
#include <ydb/library/yql/providers/common/provider/yql_provider_names.h>
+#include <ydb/library/yql/providers/common/pushdown/type_ann.h>
#include <ydb/library/yql/providers/generic/expr_nodes/yql_generic_expr_nodes.h>
#include <ydb/library/yql/utils/log/log.h>
@@ -47,34 +48,6 @@ namespace NYql {
return TStatus::Ok;
}
- TStatus AnnotateFilterPredicate(const TExprNode::TPtr& input, size_t childIndex, const TStructExprType* itemType, TExprContext& ctx) {
- if (childIndex >= input->ChildrenSize()) {
- return TStatus::Error;
- }
-
- auto& filterLambda = input->ChildRef(childIndex);
- if (!EnsureLambda(*filterLambda, ctx)) {
- return TStatus::Error;
- }
-
- if (!UpdateLambdaAllArgumentsTypes(filterLambda, {itemType}, ctx)) {
- return IGraphTransformer::TStatus::Error;
- }
-
- if (const auto* filterLambdaType = filterLambda->GetTypeAnn()) {
- if (filterLambdaType->GetKind() != ETypeAnnotationKind::Data) {
- return IGraphTransformer::TStatus::Error;
- }
- const TDataExprType* dataExprType = static_cast<const TDataExprType*>(filterLambdaType);
- if (dataExprType->GetSlot() != EDataSlot::Bool) {
- return IGraphTransformer::TStatus::Error;
- }
- } else {
- return IGraphTransformer::TStatus::Repeat;
- }
- return TStatus::Ok;
- }
-
TStatus HandleSourceSettings(const TExprNode::TPtr& input, TExprContext& ctx) {
if (!EnsureArgsCount(*input, 5, ctx)) {
return TStatus::Error;
@@ -123,7 +96,7 @@ namespace NYql {
}
// Filter
- const TStatus filterAnnotationStatus = AnnotateFilterPredicate(input, TGenSourceSettings::idx_FilterPredicate, structExprType, ctx);
+ const TStatus filterAnnotationStatus = NYql::NPushdown::AnnotateFilterPredicate(input, TGenSourceSettings::idx_FilterPredicate, structExprType, ctx);
if (filterAnnotationStatus != TStatus::Ok) {
return filterAnnotationStatus;
}
@@ -204,7 +177,7 @@ namespace NYql {
}
// Filter
- const TStatus filterAnnotationStatus = AnnotateFilterPredicate(input, TGenReadTable::idx_FilterPredicate, itemType, ctx);
+ const TStatus filterAnnotationStatus = NYql::NPushdown::AnnotateFilterPredicate(input, TGenReadTable::idx_FilterPredicate, itemType, ctx);
if (filterAnnotationStatus != TStatus::Ok) {
return filterAnnotationStatus;
}
diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp
index 7d1fd5df73c..777cf1b6c7c 100644
--- a/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp
+++ b/ydb/library/yql/providers/generic/provider/yql_generic_dq_integration.cpp
@@ -114,7 +114,7 @@ namespace NYql {
}
void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings,
- TString& sourceType, size_t) override {
+ TString& sourceType, size_t, TExprContext&) override {
const TDqSource source(&node);
if (const auto maybeSettings = source.Settings().Maybe<TGenSourceSettings>()) {
const auto settings = maybeSettings.Cast();
diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp
index 517bede88df..8dbe6b5693e 100644
--- a/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp
+++ b/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp
@@ -8,6 +8,7 @@
#include <ydb/library/yql/providers/common/provider/yql_provider.h>
#include <ydb/library/yql/providers/common/provider/yql_provider_names.h>
#include <ydb/library/yql/providers/common/pushdown/collection.h>
+#include <ydb/library/yql/providers/common/pushdown/physical_opt.h>
#include <ydb/library/yql/providers/common/pushdown/predicate_node.h>
#include <ydb/library/yql/providers/common/transform/yql_optimize.h>
#include <ydb/library/yql/providers/generic/expr_nodes/yql_generic_expr_nodes.h>
@@ -105,62 +106,6 @@ namespace NYql {
return node;
}
- static NPushdown::TPredicateNode SplitForPartialPushdown(const NPushdown::TPredicateNode& predicateTree,
- TExprContext& ctx, TPositionHandle pos)
- {
- if (predicateTree.CanBePushed) {
- return predicateTree;
- }
-
- if (predicateTree.Op != NPushdown::EBoolOp::And) {
- return NPushdown::TPredicateNode(); // Not valid, => return the same node from optimizer
- }
-
- std::vector<NPushdown::TPredicateNode> pushable;
- for (auto& predicate : predicateTree.Children) {
- if (predicate.CanBePushed) {
- pushable.emplace_back(predicate);
- }
- }
- NPushdown::TPredicateNode predicateToPush;
- predicateToPush.SetPredicates(pushable, ctx, pos);
- return predicateToPush;
- }
-
- TMaybeNode<TCoLambda> MakePushdownPredicate(const TCoLambda& lambda, TExprContext& ctx, const TPositionHandle& pos) const {
- auto lambdaArg = lambda.Args().Arg(0).Ptr();
-
- YQL_CLOG(TRACE, ProviderGeneric) << "Push filter. Initial filter lambda: " << NCommon::ExprToPrettyString(ctx, lambda.Ref());
-
- auto maybeOptionalIf = lambda.Body().Maybe<TCoOptionalIf>();
- if (!maybeOptionalIf.IsValid()) { // Nothing to push
- return {};
- }
-
- TCoOptionalIf optionalIf = maybeOptionalIf.Cast();
- NPushdown::TPredicateNode predicateTree(optionalIf.Predicate());
- NPushdown::CollectPredicates(optionalIf.Predicate(), predicateTree, lambdaArg.Get(), TExprBase(lambdaArg), TPushdownSettings());
- YQL_ENSURE(predicateTree.IsValid(), "Collected filter predicates are invalid");
-
- NPushdown::TPredicateNode predicateToPush = SplitForPartialPushdown(predicateTree, ctx, pos);
- if (!predicateToPush.IsValid()) {
- return {};
- }
-
- // clang-format off
- auto newFilterLambda = Build<TCoLambda>(ctx, pos)
- .Args({"filter_row"})
- .Body<TExprApplier>()
- .Apply(predicateToPush.ExprNode.Cast())
- .With(TExprBase(lambdaArg), "filter_row")
- .Build()
- .Done();
- // clang-format on
-
- YQL_CLOG(INFO, ProviderGeneric) << "Push filter lambda: " << NCommon::ExprToPrettyString(ctx, *newFilterLambda.Ptr());
- return newFilterLambda;
- }
-
TMaybeNode<TExprBase> PushFilterToReadTable(TExprBase node, TExprContext& ctx) const {
if (!State_->Configuration->UsePredicatePushdown.Get().GetOrElse(TGenericSettings::TDefault::UsePredicatePushdown)) {
return node;
@@ -182,7 +127,7 @@ namespace NYql {
return node;
}
- auto newFilterLambda = MakePushdownPredicate(flatmap.Lambda(), ctx, node.Pos());
+ auto newFilterLambda = NPushdown::MakePushdownPredicate(flatmap.Lambda(), ctx, node.Pos(), TPushdownSettings());
if (!newFilterLambda) {
return node;
}
@@ -223,7 +168,7 @@ namespace NYql {
return node;
}
- auto newFilterLambda = MakePushdownPredicate(flatmap.Lambda(), ctx, node.Pos());
+ auto newFilterLambda = NPushdown::MakePushdownPredicate(flatmap.Lambda(), ctx, node.Pos(), TPushdownSettings());
if (!newFilterLambda) {
return node;
}
diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp
index 0d151689f4a..e4b99205d4e 100644
--- a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp
+++ b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp
@@ -1,7 +1,7 @@
#include "yql_generic_predicate_pushdown.h"
#include <ydb/library/yql/providers/generic/connector/api/service/protos/connector.pb.h>
-
+#include <ydb/core/fq/libs/common/util.h>
#include <util/string/cast.h>
namespace NYql {
@@ -9,6 +9,19 @@ namespace NYql {
using namespace NNodes;
using namespace NConnector::NApi;
+ TString FormatColumn(const TString& value);
+ TString FormatValue(const Ydb::TypedValue& value);
+ TString FormatNull(const TExpression_TNull&);
+ TString FormatExpression(const TExpression& expression);
+ TString FormatArithmeticalExpression(const TExpression_TArithmeticalExpression& expression);
+ TString FormatNegation(const TPredicate_TNegation& negation);
+ TString FormatComparison(const TPredicate_TComparison comparison);
+ TString FormatConjunction(const TPredicate_TConjunction& conjunction, bool topLevel);
+ TString FormatDisjunction(const TPredicate_TDisjunction& disjunction);
+ TString FormatIsNull(const TPredicate_TIsNull& isNull);
+ TString FormatIsNotNull(const TPredicate_TIsNotNull& isNotNull);
+ TString FormatPredicate(const TPredicate& predicate, bool topLevel);
+
namespace {
bool SerializeMember(const TCoMember& member, TExpression* proto, const TCoArgument& arg, TStringBuilder& err) {
@@ -185,8 +198,223 @@ namespace NYql {
err << "unknown predicate: " << predicate.Raw()->Content();
return false;
}
+ }
+
+ TString FormatColumn(const TString& value) {
+ return NFq::EncloseAndEscapeString(value, '`');
+ }
+
+ TString FormatValue(const Ydb::TypedValue& value) {
+ switch (value.value().value_case()) {
+ case Ydb::Value::kBoolValue:
+ return ToString(value.value().bool_value());
+ case Ydb::Value::kInt32Value:
+ return ToString(value.value().int32_value());
+ case Ydb::Value::kUint32Value:
+ return ToString(value.value().uint32_value());
+ case Ydb::Value::kInt64Value:
+ return ToString(value.value().int64_value());
+ case Ydb::Value::kUint64Value:
+ return ToString(value.value().uint64_value());
+ case Ydb::Value::kFloatValue:
+ return ToString(value.value().float_value());
+ case Ydb::Value::kDoubleValue:
+ return ToString(value.value().double_value());
+ case Ydb::Value::kBytesValue:
+ return NFq::EncloseAndEscapeString(value.value().bytes_value(), '"');
+ case Ydb::Value::kTextValue:
+ return NFq::EncloseAndEscapeString(value.value().text_value(), '"');
+ default:
+ throw yexception() << "ErrUnimplementedTypedValue, value case " << static_cast<ui64>(value.value().value_case());
+ }
+ }
+
+ TString FormatNull(const TExpression_TNull&) {
+ return "NULL";
+ }
+
+ TString FormatExpression(const TExpression& expression) {
+ switch (expression.payload_case()) {
+ case TExpression::kColumn:
+ return FormatColumn(expression.column());
+ case TExpression::kTypedValue:
+ return FormatValue(expression.typed_value());
+ case TExpression::kArithmeticalExpression:
+ return FormatArithmeticalExpression(expression.arithmetical_expression());
+ case TExpression::kNull:
+ return FormatNull(expression.null());
+ default:
+ throw yexception() << "UnimplementedExpression, payload_case " << static_cast<ui64>(expression.payload_case());
+ }
+ }
+
+ TString FormatArithmeticalExpression(const TExpression_TArithmeticalExpression& expression) {
+ TString operation;
+ switch (expression.operation()) {
+ case TExpression_TArithmeticalExpression::MUL:
+ operation = " * ";
+ break;
+ case TExpression_TArithmeticalExpression::ADD:
+ operation = " + ";
+ break;
+ case TExpression_TArithmeticalExpression::SUB:
+ operation = " - ";
+ break;
+ case TExpression_TArithmeticalExpression::BIT_AND:
+ operation = " & ";
+ break;
+ case TExpression_TArithmeticalExpression::BIT_OR:
+ operation = " | ";
+ break;
+ case TExpression_TArithmeticalExpression::BIT_XOR:
+ operation = " ^ ";
+ break;
+ default:
+ throw yexception() << "ErrUnimplementedArithmeticalExpression, operation " << static_cast<ui64>(expression.operation());
+ }
+
+ auto left = FormatExpression(expression.left_value());
+ auto right = FormatExpression(expression.right_value());
+ return left + operation + right;
+ }
+
+ TString FormatNegation(const TPredicate_TNegation& negation) {
+ auto pred = FormatPredicate(negation.operand(), false);
+ return "(NOT " + pred + ")";
+ }
+
+ TString FormatConjunction(const TPredicate_TConjunction& conjunction, bool /*topLevel*/) {
+ ui32 succeeded = 0;
+ TStringStream stream;
+ TString first;
+
+ for (const auto& predicate : conjunction.operands()) {
+ auto statement = FormatPredicate(predicate, false);
+
+ if (succeeded > 0) {
+ if (succeeded == 1) {
+ stream << "(";
+ stream << first;
+ }
+ stream << " AND ";
+ stream << statement;
+ } else {
+ first = statement;
+ }
+ succeeded++;
+ }
+
+ if (succeeded == 0) {
+ throw yexception() << "failed to format AND statement, no operands";
+ }
+
+ if (succeeded == 1) {
+ stream << first;
+ } else {
+ stream << ")";
+ }
+ return stream.Str();
+ }
+
+ TString FormatDisjunction(const TPredicate_TDisjunction& disjunction) {
+ TStringStream stream;
+ TString first;
+ ui32 cnt = 0;
+
+ for (const auto& predicate : disjunction.operands()) {
+ auto statement = FormatPredicate(predicate, false);
+
+ if (cnt > 0) {
+ if (cnt == 1) {
+ stream << "(";
+ stream << first;
+ }
+
+ stream << " OR ";
+ stream << statement;
+ } else {
+ first = statement;
+ }
+ cnt++;
+ }
+
+ if (cnt == 0) {
+ throw yexception() << "failed to format OR statement: no operands";
+ }
+
+ if (cnt == 1) {
+ stream << first;
+ } else {
+ stream << ")";
+ }
+
+ return stream.Str();
+ }
+
+ TString FormatIsNull(const TPredicate_TIsNull& isNull) {
+ auto statement = FormatExpression(isNull.value());
+ return "(" + statement + " IS NULL)";
+ }
+
+ TString FormatIsNotNull(const TPredicate_TIsNotNull& isNotNull) {
+ auto statement = FormatExpression(isNotNull.value());
+ return "(" + statement + " IS NOT NULL)";
+ }
+
+ TString FormatComparison(TPredicate_TComparison comparison) {
+ TString operation;
+
+ switch (comparison.operation()) {
+ case TPredicate_TComparison::L:
+ operation = " < ";
+ break;
+ case TPredicate_TComparison::LE:
+ operation = " <= ";
+ break;
+ case TPredicate_TComparison::EQ:
+ operation = " = ";
+ break;
+ case TPredicate_TComparison::NE:
+ operation = " <> ";
+ break;
+ case TPredicate_TComparison::GE:
+ operation = " >= ";
+ break;
+ case TPredicate_TComparison::G:
+ operation = " > ";
+ break;
+ default:
+ throw yexception() << "UnimplementedOperation, operation " << static_cast<ui64>(comparison.operation());
+ }
- } // namespace
+ auto left = FormatExpression(comparison.left_value());
+ auto right = FormatExpression(comparison.right_value());
+
+ return left + operation + right;
+ }
+
+ TString FormatPredicate(const TPredicate& predicate, bool topLevel ) {
+ switch (predicate.payload_case()) {
+ case TPredicate::PAYLOAD_NOT_SET:
+ return {};
+ case TPredicate::kNegation:
+ return FormatNegation(predicate.negation());
+ case TPredicate::kConjunction:
+ return FormatConjunction(predicate.conjunction(), topLevel);
+ case TPredicate::kDisjunction:
+ return FormatDisjunction(predicate.disjunction());
+ case TPredicate::kIsNull:
+ return FormatIsNull(predicate.is_null());
+ case TPredicate::kIsNotNull:
+ return FormatIsNotNull(predicate.is_not_null());
+ case TPredicate::kComparison:
+ return FormatComparison(predicate.comparison());
+ case TPredicate::kBoolExpression:
+ return FormatExpression(predicate.bool_expression().value());
+ default:
+ throw yexception() << "UnimplementedPredicateType, payload_case " << static_cast<ui64>(predicate.payload_case());
+ }
+ }
bool IsEmptyFilterPredicate(const TCoLambda& lambda) {
auto maybeBool = lambda.Body().Maybe<TCoBool>();
@@ -200,4 +428,11 @@ namespace NYql {
return SerializePredicate(predicate.Body(), proto, predicate.Args().Arg(0), err);
}
+ TString FormatWhere(const TPredicate& predicate) {
+ auto stream = FormatPredicate(predicate, true);
+ if (stream.empty()) {
+ return "";
+ }
+ return "WHERE " + stream;
+ }
} // namespace NYql
diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.h b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.h
index 121ab505278..b798e483b8a 100644
--- a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.h
+++ b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.h
@@ -10,5 +10,5 @@ namespace NYql {
bool IsEmptyFilterPredicate(const NNodes::TCoLambda& lambda);
bool SerializeFilterPredicate(const NNodes::TCoLambda& predicate, NConnector::NApi::TPredicate* proto, TStringBuilder& err);
-
+ TString FormatWhere(const NConnector::NApi::TPredicate& predicate);
} // namespace NYql
diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.cpp
new file mode 100644
index 00000000000..74fdb094923
--- /dev/null
+++ b/ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.cpp
@@ -0,0 +1,697 @@
+#include "dq_pq_rd_read_actor.h"
+#include "probes.h"
+
+#include <ydb/library/yql/dq/common/dq_common.h>
+#include <ydb/library/yql/dq/actors/protos/dq_events.pb.h>
+#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io_factory.h>
+#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h>
+#include <ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h>
+#include <ydb/library/yql/dq/actors/compute/dq_source_watermark_tracker.h>
+#include <ydb/library/yql/dq/actors/common/retry_queue.h>
+
+#include <ydb/library/yql/minikql/comp_nodes/mkql_saveload.h>
+#include <ydb/library/yql/minikql/mkql_alloc.h>
+#include <ydb/library/yql/minikql/mkql_string_util.h>
+#include <ydb/library/yql/providers/pq/async_io/dq_pq_meta_extractor.h>
+#include <ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.h>
+#include <ydb/library/yql/providers/pq/common/pq_meta_fields.h>
+#include <ydb/library/yql/providers/pq/proto/dq_io_state.pb.h>
+#include <ydb/library/yql/utils/log/log.h>
+#include <ydb/library/yql/utils/yql_panic.h>
+#include <ydb/core/fq/libs/events/events.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+
+#include <ydb/public/sdk/cpp/client/ydb_topic/topic.h>
+#include <ydb/public/sdk/cpp/client/ydb_types/credentials/credentials.h>
+
+#include <ydb/library/actors/core/actor_bootstrapped.h>
+#include <ydb/library/actors/core/event_local.h>
+#include <ydb/library/actors/core/events.h>
+#include <ydb/library/actors/core/hfunc.h>
+#include <ydb/library/actors/core/log.h>
+#include <ydb/library/actors/log_backend/actor_log_backend.h>
+#include <library/cpp/lwtrace/mon/mon_lwtrace.h>
+
+#include <util/generic/algorithm.h>
+#include <util/generic/hash.h>
+#include <util/generic/utility.h>
+#include <util/string/join.h>
+#include <ydb/library/actors/core/interconnect.h>
+
+#include <queue>
+#include <variant>
+
+#define SRC_LOG_T(s) \
+ LOG_TRACE_S(*NActors::TlsActivationContext, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+#define SRC_LOG_D(s) \
+ LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+#define SRC_LOG_I(s) \
+ LOG_INFO_S(*NActors::TlsActivationContext, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+#define SRC_LOG_W(s) \
+ LOG_WARN_S(*NActors::TlsActivationContext, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+#define SRC_LOG_N(s) \
+ LOG_NOTICE_S(*NActors::TlsActivationContext, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+#define SRC_LOG_E(s) \
+ LOG_ERROR_S(*NActors::TlsActivationContext, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+#define SRC_LOG_C(s) \
+ LOG_CRIT_S(*NActors::TlsActivationContext, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+#define SRC_LOG(prio, s) \
+ LOG_LOG_S(*NActors::TlsActivationContext, prio, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+
+namespace NYql::NDq {
+
+using namespace NActors;
+using namespace NLog;
+using namespace NKikimr::NMiniKQL;
+
+namespace {
+
+LWTRACE_USING(DQ_PQ_PROVIDER);
+
+} // namespace
+
+struct TRowDispatcherReadActorMetrics {
+ explicit TRowDispatcherReadActorMetrics(const TTxId& txId, ui64 taskId, const ::NMonitoring::TDynamicCounterPtr& counters)
+ : TxId(std::visit([](auto arg) { return ToString(arg); }, txId))
+ , Counters(counters) {
+ SubGroup = Counters->GetSubgroup("sink", "RdPqRead");
+ auto sink = SubGroup->GetSubgroup("tx_id", TxId);
+ auto task = sink->GetSubgroup("task_id", ToString(taskId));
+ InFlyGetNextBatch = task->GetCounter("InFlyGetNextBatch");
+ }
+
+ ~TRowDispatcherReadActorMetrics() {
+ SubGroup->RemoveSubgroup("id", TxId);
+ }
+
+ TString TxId;
+ ::NMonitoring::TDynamicCounterPtr Counters;
+ ::NMonitoring::TDynamicCounterPtr SubGroup;
+ ::NMonitoring::TDynamicCounters::TCounterPtr InFlyGetNextBatch;
+};
+
+struct TEvPrivate {
+ enum EEv : ui32 {
+ EvBegin = EventSpaceBegin(NActors::TEvents::ES_PRIVATE),
+ EvPrintState = EvBegin + 20,
+ EvEnd
+ };
+ static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)");
+ struct TEvPrintState : public NActors::TEventLocal<TEvPrintState, EvPrintState> {};
+};
+
+ui64 PrintStatePeriodSec = 60;
+
+class TDqPqRdReadActor : public NActors::TActor<TDqPqRdReadActor>, public NYql::NDq::NInternal::TDqPqReadActorBase {
+public:
+ using TDebugOffsets = TMaybe<std::pair<ui64, ui64>>;
+
+ struct TReadyBatch {
+ public:
+ TReadyBatch(ui64 partitionId, ui32 dataCapacity)
+ : PartitionId(partitionId) {
+ Data.reserve(dataCapacity);
+ }
+
+ public:
+ TVector<TString> Data;
+ i64 UsedSpace = 0;
+ ui64 NextOffset = 0;
+ ui64 PartitionId;
+ };
+
+ enum class EState {
+ INIT,
+ WAIT_COORDINATOR_ID,
+ WAIT_PARTITIONS_ADDRES,
+ STARTED
+ };
+private:
+ std::vector<std::tuple<TString, TPqMetaExtractor::TPqMetaExtractorLambda>> MetadataFields;
+ const TString Token;
+ TMaybe<NActors::TActorId> CoordinatorActorId;
+ NActors::TActorId LocalRowDispatcherActorId;
+ std::queue<TReadyBatch> ReadyBuffer;
+ EState State = EState::INIT;
+ ui64 CoordinatorRequestCookie = 0;
+ TRowDispatcherReadActorMetrics Metrics;
+ bool SchedulePrintStatePeriod = false;
+
+ struct SessionInfo {
+ enum class ESessionStatus {
+ NoSession,
+ Started,
+ };
+ SessionInfo(
+ const TTxId& txId,
+ const NActors::TActorId selfId,
+ TActorId rowDispatcherActorId,
+ ui64 eventQueueId)
+ : RowDispatcherActorId(rowDispatcherActorId) {
+ EventsQueue.Init(txId, selfId, selfId, eventQueueId, /* KeepAlive */ true);
+ EventsQueue.OnNewRecipientId(rowDispatcherActorId);
+ }
+
+ ESessionStatus Status = ESessionStatus::NoSession;
+ ui64 NextOffset = 0;
+ bool IsWaitingRowDispatcherResponse = false;
+ NYql::NDq::TRetryEventsQueue EventsQueue;
+ bool NewDataArrived = false;
+ TActorId RowDispatcherActorId;
+ };
+
+ TMap<ui64, SessionInfo> Sessions;
+
+public:
+ TDqPqRdReadActor(
+ ui64 inputIndex,
+ TCollectStatsLevel statsLevel,
+ const TTxId& txId,
+ ui64 taskId,
+ const THolderFactory& holderFactory,
+ NPq::NProto::TDqPqTopicSource&& sourceParams,
+ NPq::NProto::TDqReadTaskParams&& readParams,
+ const NActors::TActorId& computeActorId,
+ const NActors::TActorId& localRowDispatcherActorId,
+ const TString& token,
+ const ::NMonitoring::TDynamicCounterPtr& counters);
+
+ void Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvCoordinatorResult::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvMessageBatch::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvStartSessionAck::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvNewDataArrived::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvSessionError::TPtr& ev);
+ void Handle(NFq::TEvRowDispatcher::TEvStatus::TPtr& ev);
+
+ void HandleDisconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev);
+ void HandleConnected(TEvInterconnect::TEvNodeConnected::TPtr& ev);
+ void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev);
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr&);
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvPing::TPtr&);
+ void Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed::TPtr&);
+ void Handle(NActors::TEvents::TEvPong::TPtr& ev);
+ void Handle(const NActors::TEvents::TEvPing::TPtr&);
+ void Handle(TEvPrivate::TEvPrintState::TPtr&);
+
+ STRICT_STFUNC(StateFunc, {
+ hFunc(NFq::TEvRowDispatcher::TEvCoordinatorChanged, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvCoordinatorResult, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvNewDataArrived, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvMessageBatch, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvStartSessionAck, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvSessionError, Handle);
+ hFunc(NFq::TEvRowDispatcher::TEvStatus, Handle);
+
+ hFunc(NActors::TEvents::TEvPong, Handle);
+ hFunc(TEvInterconnect::TEvNodeConnected, HandleConnected);
+ hFunc(TEvInterconnect::TEvNodeDisconnected, HandleDisconnected);
+ hFunc(NActors::TEvents::TEvUndelivered, Handle);
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvRetry, Handle);
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvPing, Handle);
+ hFunc(NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed, Handle);
+ hFunc(NActors::TEvents::TEvPing, Handle);
+ hFunc(TEvPrivate::TEvPrintState, Handle);
+ })
+
+ static constexpr char ActorName[] = "DQ_PQ_READ_ACTOR";
+
+ void CommitState(const NDqProto::TCheckpoint& checkpoint) override;
+ void PassAway() override;
+ i64 GetAsyncInputData(NKikimr::NMiniKQL::TUnboxedValueBatch& buffer, TMaybe<TInstant>& watermark, bool&, i64 freeSpace) override;
+ std::vector<ui64> GetPartitionsToRead() const;
+ std::pair<NUdf::TUnboxedValuePod, i64> CreateItem(const TString& data);
+ void ProcessState();
+ void Stop(const TString& message);
+ void StopSessions();
+ void ReInit();
+ void PrintInternalState();
+};
+
+TDqPqRdReadActor::TDqPqRdReadActor(
+ ui64 inputIndex,
+ TCollectStatsLevel statsLevel,
+ const TTxId& txId,
+ ui64 taskId,
+ const THolderFactory& /*holderFactory*/,
+ NPq::NProto::TDqPqTopicSource&& sourceParams,
+ NPq::NProto::TDqReadTaskParams&& readParams,
+ const NActors::TActorId& computeActorId,
+ const NActors::TActorId& localRowDispatcherActorId,
+ const TString& token,
+ const ::NMonitoring::TDynamicCounterPtr& counters)
+ : TActor<TDqPqRdReadActor>(&TDqPqRdReadActor::StateFunc)
+ , TDqPqReadActorBase(inputIndex, taskId, this->SelfId(), txId, std::move(sourceParams), std::move(readParams), computeActorId)
+ , Token(token)
+ , LocalRowDispatcherActorId(localRowDispatcherActorId)
+ , Metrics(txId, taskId, counters)
+{
+ MetadataFields.reserve(SourceParams.MetadataFieldsSize());
+ TPqMetaExtractor fieldsExtractor;
+ for (const auto& fieldName : SourceParams.GetMetadataFields()) {
+ MetadataFields.emplace_back(fieldName, fieldsExtractor.FindExtractorLambda(fieldName));
+ }
+
+ IngressStats.Level = statsLevel;
+ SRC_LOG_D("Start read actor, local row dispatcher " << LocalRowDispatcherActorId.ToString());
+}
+
+void TDqPqRdReadActor::ProcessState() {
+ switch (State) {
+ case EState::INIT:
+ if (!ReadyBuffer.empty()) {
+ return;
+ }
+ if (!CoordinatorActorId) {
+ SRC_LOG_D("Send TEvCoordinatorChangesSubscribe to local row dispatcher, self id " << SelfId());
+ Send(LocalRowDispatcherActorId, new NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe());
+ if (!SchedulePrintStatePeriod) {
+ SchedulePrintStatePeriod = true;
+ Schedule(TDuration::Seconds(PrintStatePeriodSec), new TEvPrivate::TEvPrintState());
+ }
+ }
+ State = EState::WAIT_COORDINATOR_ID;
+ [[fallthrough]];
+ case EState::WAIT_COORDINATOR_ID: {
+ if (!CoordinatorActorId) {
+ return;
+ }
+ State = EState::WAIT_PARTITIONS_ADDRES;
+ auto partitionToRead = GetPartitionsToRead();
+ SRC_LOG_D("Send TEvCoordinatorRequest to coordinator " << CoordinatorActorId->ToString() << ", partIds: " << JoinSeq(", ", partitionToRead));
+ Send(
+ *CoordinatorActorId,
+ new NFq::TEvRowDispatcher::TEvCoordinatorRequest(SourceParams, partitionToRead),
+ IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession,
+ ++CoordinatorRequestCookie);
+ return;
+ }
+ case EState::WAIT_PARTITIONS_ADDRES:
+ if (Sessions.empty()) {
+ return;
+ }
+
+ for (auto& [partitionId, sessionInfo] : Sessions) {
+ if (sessionInfo.Status == SessionInfo::ESessionStatus::NoSession) {
+ TMaybe<ui64> readOffset;
+ TPartitionKey partitionKey{TString{}, partitionId};
+ const auto offsetIt = PartitionToOffset.find(partitionKey);
+ if (offsetIt != PartitionToOffset.end()) {
+ SRC_LOG_D("readOffset found" );
+ readOffset = offsetIt->second;
+ }
+
+ SRC_LOG_D("Send TEvStartSession to " << sessionInfo.RowDispatcherActorId
+ << ", offset " << readOffset
+ << ", partitionId " << partitionId);
+
+ auto event = new NFq::TEvRowDispatcher::TEvStartSession(
+ SourceParams,
+ partitionId,
+ Token,
+ readOffset,
+ StartingMessageTimestamp.MilliSeconds(),
+ std::visit([](auto arg) { return ToString(arg); }, TxId));
+ sessionInfo.EventsQueue.Send(event);
+ sessionInfo.IsWaitingRowDispatcherResponse = true;
+ sessionInfo.Status = SessionInfo::ESessionStatus::Started;
+ }
+ }
+ State = EState::STARTED;
+ return;
+ case EState::STARTED:
+ return;
+ }
+}
+
+
+void TDqPqRdReadActor::CommitState(const NDqProto::TCheckpoint& /*checkpoint*/) {
+}
+
+void TDqPqRdReadActor::StopSessions() {
+ SRC_LOG_I("Stop all session");
+ for (auto& [partitionId, sessionInfo] : Sessions) {
+ if (sessionInfo.Status == SessionInfo::ESessionStatus::NoSession) {
+ continue;
+ }
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvStopSession>();
+ *event->Record.MutableSource() = SourceParams;
+ event->Record.SetPartitionId(partitionId);
+ SRC_LOG_D("Send StopSession to " << sessionInfo.RowDispatcherActorId);
+ sessionInfo.EventsQueue.Send(event.release());
+ }
+}
+
+// IActor & IDqComputeActorAsyncInput
+void TDqPqRdReadActor::PassAway() { // Is called from Compute Actor
+ SRC_LOG_D("PassAway");
+ PrintInternalState();
+ StopSessions();
+ TActor<TDqPqRdReadActor>::PassAway();
+
+ // TODO: RetryQueue::Unsubscribe()
+}
+
+i64 TDqPqRdReadActor::GetAsyncInputData(NKikimr::NMiniKQL::TUnboxedValueBatch& buffer, TMaybe<TInstant>& /*watermark*/, bool&, i64 freeSpace) {
+ SRC_LOG_T("GetAsyncInputData freeSpace = " << freeSpace);
+
+ ProcessState();
+ if (ReadyBuffer.empty() || !freeSpace) {
+ return 0;
+ }
+ i64 usedSpace = 0;
+ buffer.clear();
+ do {
+ auto& readyBatch = ReadyBuffer.front();
+ SRC_LOG_T("Return " << readyBatch.Data.size() << " items");
+
+ for (const auto& message : readyBatch.Data) {
+ auto [item, size] = CreateItem(message);
+ buffer.push_back(std::move(item));
+ }
+ usedSpace += readyBatch.UsedSpace;
+ freeSpace -= readyBatch.UsedSpace;
+ SRC_LOG_T("usedSpace " << usedSpace);
+ SRC_LOG_T("freeSpace " << freeSpace);
+
+ TPartitionKey partitionKey{TString{}, readyBatch.PartitionId};
+ PartitionToOffset[partitionKey] = readyBatch.NextOffset;
+ SRC_LOG_T("NextOffset " << readyBatch.NextOffset);
+ ReadyBuffer.pop();
+ } while (freeSpace > 0 && !ReadyBuffer.empty());
+
+ ProcessState();
+ return usedSpace;
+}
+
+std::vector<ui64> TDqPqRdReadActor::GetPartitionsToRead() const {
+ std::vector<ui64> res;
+
+ ui64 currentPartition = ReadParams.GetPartitioningParams().GetEachTopicPartitionGroupId();
+ do {
+ res.emplace_back(currentPartition); // 0-based in topic API
+ currentPartition += ReadParams.GetPartitioningParams().GetDqPartitionsCount();
+ } while (currentPartition < ReadParams.GetPartitioningParams().GetTopicPartitionsCount());
+ return res;
+}
+
+void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvStartSessionAck::TPtr& ev) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ SRC_LOG_D("TEvStartSessionAck from " << ev->Sender << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo());
+
+ ui64 partitionId = ev->Get()->Record.GetConsumer().GetPartitionId();
+ auto sessionIt = Sessions.find(partitionId);
+ YQL_ENSURE(sessionIt != Sessions.end(), "Unknown partition id");
+ auto& sessionInfo = sessionIt->second;
+ if (!sessionInfo.EventsQueue.OnEventReceived(ev)) {
+ SRC_LOG_W("Wrong seq num ignore message, seqNo " << meta.GetSeqNo());
+ return;
+ }
+}
+
+void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvSessionError::TPtr& ev) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ SRC_LOG_D("TEvSessionError from " << ev->Sender << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo());
+
+ ui64 partitionId = ev->Get()->Record.GetPartitionId();
+ auto sessionIt = Sessions.find(partitionId);
+ YQL_ENSURE(sessionIt != Sessions.end(), "Unknown partition id");
+
+ auto& sessionInfo = sessionIt->second;
+ if (!sessionInfo.EventsQueue.OnEventReceived(ev)) {
+ SRC_LOG_W("Wrong seq num ignore message, seqNo " << meta.GetSeqNo());
+ return;
+ }
+ Stop(ev->Get()->Record.GetMessage());
+}
+
+void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvStatus::TPtr& ev) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ SRC_LOG_D("TEvStatus from " << ev->Sender << ", offset " << ev->Get()->Record.GetNextMessageOffset() << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo());
+
+ ui64 partitionId = ev->Get()->Record.GetPartitionId();
+ auto sessionIt = Sessions.find(partitionId);
+ YQL_ENSURE(sessionIt != Sessions.end(), "Unknown partition id");
+ auto& sessionInfo = sessionIt->second;
+
+ if (!sessionInfo.EventsQueue.OnEventReceived(ev)) {
+ SRC_LOG_W("Wrong seq num ignore message, seqNo " << meta.GetSeqNo());
+ return;
+ }
+
+ if (ReadyBuffer.empty()) {
+ TPartitionKey partitionKey{TString{}, partitionId};
+ PartitionToOffset[partitionKey] = ev->Get()->Record.GetNextMessageOffset();
+ }
+}
+
+void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvNewDataArrived::TPtr& ev) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ SRC_LOG_T("TEvNewDataArrived from " << ev->Sender << ", part id " << ev->Get()->Record.GetPartitionId() << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo());
+
+ ui64 partitionId = ev->Get()->Record.GetPartitionId();
+ auto sessionIt = Sessions.find(partitionId);
+ if (sessionIt == Sessions.end()) {
+ Stop("Internal error: unknown partition id " + ToString(partitionId));
+ return;
+ }
+
+ auto& sessionInfo = sessionIt->second;
+ if (!sessionInfo.EventsQueue.OnEventReceived(ev)) {
+ SRC_LOG_W("Wrong seq num ignore message, seqNo " << meta.GetSeqNo());
+ return;
+ }
+ sessionInfo.NewDataArrived = true;
+ Metrics.InFlyGetNextBatch->Inc();
+ auto event = std::make_unique<NFq::TEvRowDispatcher::TEvGetNextBatch>();
+ event->Record.SetPartitionId(partitionId);
+ sessionInfo.EventsQueue.Send(event.release());
+}
+
+void TDqPqRdReadActor::Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr& ev) {
+ SRC_LOG_D("TEvRetry");
+ ui64 partitionId = ev->Get()->EventQueueId;
+
+ auto sessionIt = Sessions.find(partitionId);
+ if (sessionIt == Sessions.end()) {
+ SRC_LOG_W("Unknown partition id " << partitionId << ", skip TEvRetry");
+ return;
+ }
+ sessionIt->second.EventsQueue.Retry();
+}
+
+void TDqPqRdReadActor::Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvPing::TPtr& ev) {
+ SRC_LOG_T("TEvRetryQueuePrivate::TEvPing");
+ ui64 partitionId = ev->Get()->EventQueueId;
+
+ auto sessionIt = Sessions.find(partitionId);
+ if (sessionIt == Sessions.end()) {
+ SRC_LOG_W("Unknown partition id " << partitionId << ", skip TEvPing");
+ return;
+ }
+ sessionIt->second.EventsQueue.Ping();
+}
+
+void TDqPqRdReadActor::Handle(const NActors::TEvents::TEvPing::TPtr& ev) {
+ SRC_LOG_T("NActors::TEvents::TEvPing");
+ Send(ev->Sender, new NActors::TEvents::TEvPong());
+}
+
+void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvCoordinatorChanged::TPtr& ev) {
+ SRC_LOG_D("TEvCoordinatorChanged, new coordinator " << ev->Get()->CoordinatorActorId);
+
+ if (CoordinatorActorId
+ && CoordinatorActorId == ev->Get()->CoordinatorActorId) {
+ return;
+ }
+
+ if (!CoordinatorActorId) {
+ CoordinatorActorId = ev->Get()->CoordinatorActorId;
+ ProcessState();
+ return;
+ }
+
+ CoordinatorActorId = ev->Get()->CoordinatorActorId;
+ SRC_LOG_I("Coordinator is changed, reinit all sessions");
+ ReInit();
+ ProcessState();
+}
+
+void TDqPqRdReadActor::ReInit() {
+ SRC_LOG_I("ReInit state");
+ StopSessions();
+ Sessions.clear();
+ State = EState::INIT;
+ if (!ReadyBuffer.empty()) {
+ Send(ComputeActorId, new TEvNewAsyncInputDataArrived(InputIndex));
+ }
+ ProcessState();
+}
+
+void TDqPqRdReadActor::Stop(const TString& message) {
+ NYql::TIssues issues;
+ issues.AddIssue(NYql::TIssue{message});
+ SRC_LOG_E("Stop read actor, error: " << message);
+ Send(ComputeActorId, new TEvAsyncInputError(InputIndex, issues, NYql::NDqProto::StatusIds::BAD_REQUEST)); // TODO: use UNAVAILABLE ?
+}
+
+void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvCoordinatorResult::TPtr& ev) {
+ SRC_LOG_D("TEvCoordinatorResult from " << ev->Sender.ToString() << ", cookie " << ev->Cookie);
+ if (ev->Cookie != CoordinatorRequestCookie) {
+ SRC_LOG_W("Ignore TEvCoordinatorResult. wrong cookie");
+ return;
+ }
+ for (auto& p : ev->Get()->Record.GetPartitions()) {
+ TActorId rowDispatcherActorId = ActorIdFromProto(p.GetActorId());
+ SRC_LOG_D(" rowDispatcherActorId:" << rowDispatcherActorId);
+
+ for (auto partitionId : p.GetPartitionId()) {
+ SRC_LOG_D(" partitionId:" << partitionId);
+ if (!Sessions.contains(partitionId)) { // TODO
+ Sessions.emplace(
+ std::piecewise_construct,
+ std::forward_as_tuple(partitionId),
+ std::forward_as_tuple(TxId, SelfId(), rowDispatcherActorId, partitionId));
+ }
+ }
+ }
+ ProcessState();
+}
+
+void TDqPqRdReadActor::HandleConnected(TEvInterconnect::TEvNodeConnected::TPtr& ev) {
+ SRC_LOG_D("EvNodeConnected " << ev->Get()->NodeId);
+ for (auto& [partitionId, sessionInfo] : Sessions) {
+ sessionInfo.EventsQueue.HandleNodeConnected(ev->Get()->NodeId);
+ }
+}
+
+void TDqPqRdReadActor::HandleDisconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
+ SRC_LOG_D("TEvNodeDisconnected, node id " << ev->Get()->NodeId);
+ for (auto& [partitionId, sessionInfo] : Sessions) {
+ sessionInfo.EventsQueue.HandleNodeDisconnected(ev->Get()->NodeId);
+ }
+ // In case of row dispatcher disconnection: wait connected or SessionClosed(). TODO: Stop actor after timeout.
+ // In case of row dispatcher disconnection: wait CoordinatorChanged().
+ //Stop(TString{"Node disconnected, nodeId "} + ToString(ev->Get()->NodeId));
+}
+
+void TDqPqRdReadActor::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) {
+ SRC_LOG_D("TEvUndelivered, " << ev->Get()->ToString() << " from " << ev->Sender.ToString());
+ for (auto& [partitionId, sessionInfo] : Sessions) {
+ sessionInfo.EventsQueue.HandleUndelivered(ev);
+ }
+
+ if (CoordinatorActorId && *CoordinatorActorId == ev->Sender) {
+ SRC_LOG_D("TEvUndelivered to coordinator, reinit");
+ ReInit();
+ }
+}
+
+void TDqPqRdReadActor::Handle(NFq::TEvRowDispatcher::TEvMessageBatch::TPtr& ev) {
+ const NYql::NDqProto::TMessageTransportMeta& meta = ev->Get()->Record.GetTransportMeta();
+ SRC_LOG_T("TEvMessageBatch from " << ev->Sender << ", seqNo " << meta.GetSeqNo() << ", ConfirmedSeqNo " << meta.GetConfirmedSeqNo());
+ ui64 partitionId = ev->Get()->Record.GetPartitionId();
+ YQL_ENSURE(Sessions.count(partitionId), "Unknown partition id");
+ auto it = Sessions.find(partitionId);
+ if (it == Sessions.end()) {
+ Stop("Wrong session data");
+ return;
+ }
+
+ Metrics.InFlyGetNextBatch->Dec();
+ auto& sessionInfo = it->second;
+ if (!sessionInfo.EventsQueue.OnEventReceived(ev)) {
+ SRC_LOG_W("Wrong seq num ignore message, seqNo " << meta.GetSeqNo());
+ return;
+ }
+ ReadyBuffer.emplace(partitionId, ev->Get()->Record.MessagesSize());
+ TReadyBatch& activeBatch = ReadyBuffer.back();
+
+ ui64 bytes = 0;
+ for (const auto& message : ev->Get()->Record.GetMessages()) {
+ SRC_LOG_T("Json: " << message.GetJson());
+ activeBatch.Data.emplace_back(message.GetJson());
+ activeBatch.UsedSpace += message.GetJson().size();
+ sessionInfo.NextOffset = message.GetOffset() + 1;
+ bytes += message.GetJson().size();
+ SRC_LOG_T("TEvMessageBatch NextOffset " << sessionInfo.NextOffset);
+ }
+ IngressStats.Bytes += bytes;
+ IngressStats.Chunks++;
+ activeBatch.NextOffset = ev->Get()->Record.GetNextMessageOffset();
+ Send(ComputeActorId, new TEvNewAsyncInputDataArrived(InputIndex));
+}
+
+std::pair<NUdf::TUnboxedValuePod, i64> TDqPqRdReadActor::CreateItem(const TString& data) {
+ i64 usedSpace = 0;
+ NUdf::TUnboxedValuePod item;
+ item = NKikimr::NMiniKQL::MakeString(NUdf::TStringRef(data.Data(), data.Size()));
+ usedSpace += data.Size();
+ return std::make_pair(item, usedSpace);
+}
+
+void TDqPqRdReadActor::Handle(const NYql::NDq::TEvRetryQueuePrivate::TEvSessionClosed::TPtr& ev) {
+ SRC_LOG_D("Session closed, event queue id " << ev->Get()->EventQueueId);
+ ReInit();
+}
+
+void TDqPqRdReadActor::Handle(NActors::TEvents::TEvPong::TPtr& ev) {
+ SRC_LOG_T("TEvPong from " << ev->Sender);
+}
+
+void TDqPqRdReadActor::Handle(TEvPrivate::TEvPrintState::TPtr&) {
+ Schedule(TDuration::Seconds(PrintStatePeriodSec), new TEvPrivate::TEvPrintState());
+ PrintInternalState();
+}
+
+void TDqPqRdReadActor::PrintInternalState() {
+ TStringStream str;
+ str << "State:\n";
+ for (auto& [partitionId, sessionInfo] : Sessions) {
+ str << " partId " << partitionId << " ";
+ sessionInfo.EventsQueue.PrintInternalState(str);
+ }
+ SRC_LOG_D(str.Str());
+}
+
+std::pair<IDqComputeActorAsyncInput*, NActors::IActor*> CreateDqPqRdReadActor(
+ NPq::NProto::TDqPqTopicSource&& settings,
+ ui64 inputIndex,
+ TCollectStatsLevel statsLevel,
+ TTxId txId,
+ ui64 taskId,
+ const THashMap<TString, TString>& secureParams,
+ const THashMap<TString, TString>& taskParams,
+ const NActors::TActorId& computeActorId,
+ const NActors::TActorId& localRowDispatcherActorId,
+ const NKikimr::NMiniKQL::THolderFactory& holderFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters,
+ i64 /*bufferSize*/) // TODO
+{
+ auto taskParamsIt = taskParams.find("pq");
+ YQL_ENSURE(taskParamsIt != taskParams.end(), "Failed to get pq task params");
+
+ NPq::NProto::TDqReadTaskParams readTaskParamsMsg;
+ YQL_ENSURE(readTaskParamsMsg.ParseFromString(taskParamsIt->second), "Failed to parse DqPqRead task params");
+
+ const TString& tokenName = settings.GetToken().GetName();
+ const TString token = secureParams.Value(tokenName, TString());
+
+ TDqPqRdReadActor* actor = new TDqPqRdReadActor(
+ inputIndex,
+ statsLevel,
+ txId,
+ taskId,
+ holderFactory,
+ std::move(settings),
+ std::move(readTaskParamsMsg),
+ computeActorId,
+ localRowDispatcherActorId,
+ token,
+ counters
+ );
+
+ return {actor, actor};
+}
+
+} // namespace NYql::NDq
diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.h b/ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.h
new file mode 100644
index 00000000000..d1131fd7a76
--- /dev/null
+++ b/ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io_factory.h>
+#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h>
+
+#include <ydb/library/yql/providers/common/token_accessor/client/factory.h>
+#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
+
+#include <ydb/library/yql/providers/pq/proto/dq_io.pb.h>
+#include <ydb/library/yql/providers/pq/proto/dq_task_params.pb.h>
+
+#include <ydb/public/sdk/cpp/client/ydb_driver/driver.h>
+
+#include <ydb/library/actors/core/actor.h>
+
+#include <util/generic/size_literals.h>
+#include <util/system/types.h>
+#include <ydb/library/security/ydb_credentials_provider_factory.h>
+
+namespace NYql::NDq {
+class TDqAsyncIoFactory;
+
+const i64 PQRdReadDefaultFreeSpace = 16_MB;
+
+std::pair<IDqComputeActorAsyncInput*, NActors::IActor*> CreateDqPqRdReadActor(
+ NPq::NProto::TDqPqTopicSource&& settings,
+ ui64 inputIndex,
+ TCollectStatsLevel statsLevel,
+ TTxId txId,
+ ui64 taskId,
+ const THashMap<TString, TString>& secureParams,
+ const THashMap<TString, TString>& taskParams,
+ const NActors::TActorId& computeActorId,
+ const NActors::TActorId& localRowDispatcherActorId,
+ const NKikimr::NMiniKQL::THolderFactory& holderFactory,
+ const ::NMonitoring::TDynamicCounterPtr& counters,
+ i64 bufferSize = PQRdReadDefaultFreeSpace);
+
+} // namespace NYql::NDq
diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp
index 112187cd72b..b57f80c8478 100644
--- a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp
+++ b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.cpp
@@ -12,6 +12,8 @@
#include <ydb/library/yql/minikql/mkql_alloc.h>
#include <ydb/library/yql/minikql/mkql_string_util.h>
#include <ydb/library/yql/providers/pq/async_io/dq_pq_meta_extractor.h>
+#include <ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.h>
+#include <ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.h>
#include <ydb/library/yql/providers/pq/common/pq_meta_fields.h>
#include <ydb/library/yql/providers/pq/proto/dq_io_state.pb.h>
#include <ydb/library/yql/utils/log/log.h>
@@ -28,11 +30,14 @@
#include <ydb/library/actors/log_backend/actor_log_backend.h>
#include <library/cpp/lwtrace/mon/mon_lwtrace.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+
#include <util/generic/algorithm.h>
#include <util/generic/hash.h>
#include <util/generic/utility.h>
#include <util/string/join.h>
+
#include <queue>
#include <variant>
@@ -59,8 +64,6 @@ using namespace NActors;
using namespace NLog;
using namespace NKikimr::NMiniKQL;
-constexpr ui32 StateVersion = 1;
-
namespace {
LWTRACE_USING(DQ_PQ_PROVIDER);
@@ -84,7 +87,7 @@ struct TEvPrivate {
} // namespace
-class TDqPqReadActor : public NActors::TActor<TDqPqReadActor>, public IDqComputeActorAsyncInput {
+class TDqPqReadActor : public NActors::TActor<TDqPqReadActor>, public NYql::NDq::NInternal::TDqPqReadActorBase {
struct TMetrics {
TMetrics(const TTxId& txId, ui64 taskId, const ::NMonitoring::TDynamicCounterPtr& counters)
: TxId(std::visit([](auto arg) { return ToString(arg); }, txId))
@@ -127,18 +130,12 @@ public:
const ::NMonitoring::TDynamicCounterPtr& counters,
i64 bufferSize)
: TActor<TDqPqReadActor>(&TDqPqReadActor::StateFunc)
- , InputIndex(inputIndex)
- , TxId(txId)
+ , TDqPqReadActorBase(inputIndex, taskId, this->SelfId(), txId, std::move(sourceParams), std::move(readParams), computeActorId)
, Metrics(txId, taskId, counters)
, BufferSize(bufferSize)
, HolderFactory(holderFactory)
- , LogPrefix(TStringBuilder() << "SelfId: " << this->SelfId() << ", TxId: " << TxId << ", task: " << taskId << ". PQ source. ")
, Driver(std::move(driver))
, CredentialsProviderFactory(std::move(credentialsProviderFactory))
- , SourceParams(std::move(sourceParams))
- , ReadParams(std::move(readParams))
- , StartingMessageTimestamp(TInstant::MilliSeconds(TInstant::Now().MilliSeconds())) // this field is serialized as milliseconds, so drop microseconds part to be consistent with storage
- , ComputeActorId(computeActorId)
{
MetadataFields.reserve(SourceParams.MetadataFieldsSize());
TPqMetaExtractor fieldsExtractor;
@@ -164,64 +161,13 @@ public:
public:
void SaveState(const NDqProto::TCheckpoint& checkpoint, TSourceState& state) override {
- NPq::NProto::TDqPqTopicSourceState stateProto;
-
- NPq::NProto::TDqPqTopicSourceState::TTopicDescription* topic = stateProto.AddTopics();
- topic->SetDatabaseId(SourceParams.GetDatabaseId());
- topic->SetEndpoint(SourceParams.GetEndpoint());
- topic->SetDatabase(SourceParams.GetDatabase());
- topic->SetTopicPath(SourceParams.GetTopicPath());
-
- for (const auto& [clusterAndPartition, offset] : PartitionToOffset) {
- const auto& [cluster, partition] = clusterAndPartition;
- NPq::NProto::TDqPqTopicSourceState::TPartitionReadState* partitionState = stateProto.AddPartitions();
- partitionState->SetTopicIndex(0); // Now we are supporting only one topic per source.
- partitionState->SetCluster(cluster);
- partitionState->SetPartition(partition);
- partitionState->SetOffset(offset);
- }
-
- stateProto.SetStartingMessageTimestampMs(StartingMessageTimestamp.MilliSeconds());
- stateProto.SetIngressBytes(IngressStats.Bytes);
-
- TString stateBlob;
- YQL_ENSURE(stateProto.SerializeToString(&stateBlob));
-
- state.Data.emplace_back(stateBlob, StateVersion);
-
+ TDqPqReadActorBase::SaveState(checkpoint, state);
DeferredCommits.emplace(checkpoint.GetId(), std::move(CurrentDeferredCommit));
CurrentDeferredCommit = NYdb::NTopic::TDeferredCommit();
}
void LoadState(const TSourceState& state) override {
- TInstant minStartingMessageTs = state.DataSize() ? TInstant::Max() : StartingMessageTimestamp;
- ui64 ingressBytes = 0;
- for (const auto& data : state.Data) {
- if (data.Version == StateVersion) { // Current version
- NPq::NProto::TDqPqTopicSourceState stateProto;
- YQL_ENSURE(stateProto.ParseFromString(data.Blob), "Serialized state is corrupted");
- YQL_ENSURE(stateProto.TopicsSize() == 1, "One topic per source is expected");
- PartitionToOffset.reserve(PartitionToOffset.size() + stateProto.PartitionsSize());
- for (const NPq::NProto::TDqPqTopicSourceState::TPartitionReadState& partitionProto : stateProto.GetPartitions()) {
- ui64& offset = PartitionToOffset[TPartitionKey{partitionProto.GetCluster(), partitionProto.GetPartition()}];
- if (offset) {
- offset = Min(offset, partitionProto.GetOffset());
- } else {
- offset = partitionProto.GetOffset();
- }
- }
- minStartingMessageTs = Min(minStartingMessageTs, TInstant::MilliSeconds(stateProto.GetStartingMessageTimestampMs()));
- ingressBytes += stateProto.GetIngressBytes();
- } else {
- ythrow yexception() << "Invalid state version " << data.Version;
- }
- }
- for (const auto& [key, value] : PartitionToOffset) {
- SRC_LOG_D("SessionId: " << GetSessionId() << " Restoring offset: cluster " << key.first << ", partition id " << key.second << ", offset: " << value);
- }
- StartingMessageTimestamp = minStartingMessageTs;
- IngressStats.Bytes += ingressBytes;
- IngressStats.Chunks++;
+ TDqPqReadActorBase::LoadState(state);
InitWatermarkTracker();
if (ReadSession) {
@@ -239,14 +185,6 @@ public:
}
}
- ui64 GetInputIndex() const override {
- return InputIndex;
- }
-
- const TDqAsyncStats& GetIngressStats() const override {
- return IngressStats;
- }
-
NYdb::NTopic::TTopicClient& GetTopicClient() {
if (!TopicClient) {
TopicClient = std::make_unique<NYdb::NTopic::TTopicClient>(Driver, GetTopicClientSettings());
@@ -262,7 +200,7 @@ public:
return *ReadSession;
}
- TString GetSessionId() const {
+ TString GetSessionId() const override {
return ReadSession ? ReadSession->GetSessionId() : TString{"empty"};
}
@@ -625,23 +563,14 @@ private:
};
private:
- const ui64 InputIndex;
- TDqAsyncStats IngressStats;
- const TTxId TxId;
TMetrics Metrics;
const i64 BufferSize;
const THolderFactory& HolderFactory;
- const TString LogPrefix;
NYdb::TDriver Driver;
std::shared_ptr<NYdb::ICredentialsProviderFactory> CredentialsProviderFactory;
- const NPq::NProto::TDqPqTopicSource SourceParams;
- const NPq::NProto::TDqReadTaskParams ReadParams;
std::unique_ptr<NYdb::NTopic::TTopicClient> TopicClient;
std::shared_ptr<NYdb::NTopic::IReadSession> ReadSession;
NThreading::TFuture<void> EventFuture;
- THashMap<TPartitionKey, ui64> PartitionToOffset; // {cluster, partition} -> offset of next event.
- TInstant StartingMessageTimestamp;
- const NActors::TActorId ComputeActorId;
std::queue<std::pair<ui64, NYdb::NTopic::TDeferredCommit>> DeferredCommits;
NYdb::NTopic::TDeferredCommit CurrentDeferredCommit;
bool SubscribedOnEvent = false;
@@ -702,7 +631,25 @@ void RegisterDqPqReadActorFactory(TDqAsyncIoFactory& factory, NYdb::TDriver driv
IDqAsyncIoFactory::TSourceArguments&& args)
{
NLwTraceMonPage::ProbeRegistry().AddProbesList(LWTRACE_GET_PROBES(DQ_PQ_PROVIDER));
- return CreateDqPqReadActor(
+
+ if (!settings.GetSharedReading()) {
+ return CreateDqPqReadActor(
+ std::move(settings),
+ args.InputIndex,
+ args.StatsLevel,
+ args.TxId,
+ args.TaskId,
+ args.SecureParams,
+ args.TaskParams,
+ driver,
+ credentialsFactory,
+ args.ComputeActorId,
+ args.HolderFactory,
+ counters,
+ PQReadDefaultFreeSpace);
+ }
+
+ return CreateDqPqRdReadActor(
std::move(settings),
args.InputIndex,
args.StatsLevel,
@@ -710,9 +657,8 @@ void RegisterDqPqReadActorFactory(TDqAsyncIoFactory& factory, NYdb::TDriver driv
args.TaskId,
args.SecureParams,
args.TaskParams,
- driver,
- credentialsFactory,
args.ComputeActorId,
+ NFq::RowDispatcherServiceActorId(),
args.HolderFactory,
counters,
PQReadDefaultFreeSpace);
diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.cpp b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.cpp
new file mode 100644
index 00000000000..c1bc837f4cb
--- /dev/null
+++ b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.cpp
@@ -0,0 +1,86 @@
+#include "dq_pq_read_actor.h"
+
+#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io_factory.h>
+#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h>
+#include <ydb/library/yql/dq/actors/protos/dq_events.pb.h>
+#include <ydb/library/yql/dq/common/dq_common.h>
+#include <ydb/library/yql/dq/actors/compute/dq_checkpoints_states.h>
+
+#include <ydb/library/yql/minikql/comp_nodes/mkql_saveload.h>
+#include <ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.h>
+#include <ydb/library/yql/providers/pq/proto/dq_io_state.pb.h>
+#include <ydb/library/yql/utils/log/log.h>
+
+#include <ydb/library/actors/core/log.h>
+
+using namespace NYql::NDq::NInternal;
+
+constexpr ui32 StateVersion = 1;
+
+#define SRC_LOG_D(s) \
+ LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_COMPUTE, LogPrefix << s)
+
+void TDqPqReadActorBase::SaveState(const NDqProto::TCheckpoint& /*checkpoint*/, TSourceState& state) {
+ NPq::NProto::TDqPqTopicSourceState stateProto;
+
+ NPq::NProto::TDqPqTopicSourceState::TTopicDescription* topic = stateProto.AddTopics();
+ topic->SetDatabaseId(SourceParams.GetDatabaseId());
+ topic->SetEndpoint(SourceParams.GetEndpoint());
+ topic->SetDatabase(SourceParams.GetDatabase());
+ topic->SetTopicPath(SourceParams.GetTopicPath());
+
+ for (const auto& [clusterAndPartition, offset] : PartitionToOffset) {
+ const auto& [cluster, partition] = clusterAndPartition;
+ NPq::NProto::TDqPqTopicSourceState::TPartitionReadState* partitionState = stateProto.AddPartitions();
+ partitionState->SetTopicIndex(0); // Now we are supporting only one topic per source.
+ partitionState->SetCluster(cluster);
+ partitionState->SetPartition(partition);
+ partitionState->SetOffset(offset);
+ }
+
+ stateProto.SetStartingMessageTimestampMs(StartingMessageTimestamp.MilliSeconds());
+ stateProto.SetIngressBytes(IngressStats.Bytes);
+
+ TString stateBlob;
+ YQL_ENSURE(stateProto.SerializeToString(&stateBlob));
+
+ state.Data.emplace_back(stateBlob, StateVersion);
+}
+
+void TDqPqReadActorBase::LoadState(const TSourceState& state) {
+ TInstant minStartingMessageTs = state.DataSize() ? TInstant::Max() : StartingMessageTimestamp;
+ ui64 ingressBytes = 0;
+ for (const auto& data : state.Data) {
+ if (data.Version != StateVersion) {
+ ythrow yexception() << "Invalid state version, expected " << StateVersion << ", actual " << data.Version;
+ }
+ NPq::NProto::TDqPqTopicSourceState stateProto;
+ YQL_ENSURE(stateProto.ParseFromString(data.Blob), "Serialized state is corrupted");
+ YQL_ENSURE(stateProto.TopicsSize() == 1, "One topic per source is expected");
+ PartitionToOffset.reserve(PartitionToOffset.size() + stateProto.PartitionsSize());
+ for (const NPq::NProto::TDqPqTopicSourceState::TPartitionReadState& partitionProto : stateProto.GetPartitions()) {
+ ui64& offset = PartitionToOffset[TPartitionKey{partitionProto.GetCluster(), partitionProto.GetPartition()}];
+ if (offset) {
+ offset = Min(offset, partitionProto.GetOffset());
+ } else {
+ offset = partitionProto.GetOffset();
+ }
+ }
+ minStartingMessageTs = Min(minStartingMessageTs, TInstant::MilliSeconds(stateProto.GetStartingMessageTimestampMs()));
+ ingressBytes += stateProto.GetIngressBytes();
+ }
+ for (const auto& [key, value] : PartitionToOffset) {
+ SRC_LOG_D("SessionId: " << GetSessionId() << " Restoring offset: cluster " << key.first << ", partition id " << key.second << ", offset: " << value);
+ }
+ StartingMessageTimestamp = minStartingMessageTs;
+ IngressStats.Bytes += ingressBytes;
+ IngressStats.Chunks++;
+}
+
+ui64 TDqPqReadActorBase::GetInputIndex() const {
+ return InputIndex;
+}
+
+const NYql::NDq::TDqAsyncStats& TDqPqReadActorBase::GetIngressStats() const {
+ return IngressStats;
+} \ No newline at end of file
diff --git a/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.h b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.h
new file mode 100644
index 00000000000..f00176b8ab9
--- /dev/null
+++ b/ydb/library/yql/providers/pq/async_io/dq_pq_read_actor_base.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h>
+
+namespace NYql::NDq::NInternal {
+
+class TDqPqReadActorBase : public IDqComputeActorAsyncInput {
+
+public:
+ using TPartitionKey = std::pair<TString, ui64>; // Cluster, partition id.
+
+ const ui64 InputIndex;
+ THashMap<TPartitionKey, ui64> PartitionToOffset; // {cluster, partition} -> offset of next event.
+ const TTxId TxId;
+ const NPq::NProto::TDqPqTopicSource SourceParams;
+ TDqAsyncStats IngressStats;
+ TInstant StartingMessageTimestamp;
+ const TString LogPrefix;
+ const NPq::NProto::TDqReadTaskParams ReadParams;
+ const NActors::TActorId ComputeActorId;
+
+ TDqPqReadActorBase(
+ ui64 inputIndex,
+ ui64 taskId,
+ NActors::TActorId selfId,
+ const TTxId& txId,
+ NPq::NProto::TDqPqTopicSource&& sourceParams,
+ NPq::NProto::TDqReadTaskParams&& readParams,
+ const NActors::TActorId& computeActorId)
+ : InputIndex(inputIndex)
+ , TxId(txId)
+ , SourceParams(std::move(sourceParams))
+ , StartingMessageTimestamp(TInstant::MilliSeconds(TInstant::Now().MilliSeconds())) // this field is serialized as milliseconds, so drop microseconds part to be consistent with storage
+ , LogPrefix(TStringBuilder() << "SelfId: " << selfId << ", TxId: " << txId << ", task: " << taskId << ". PQ source. ")
+ , ReadParams(std::move(readParams))
+ , ComputeActorId(computeActorId) {
+ }
+
+public:
+ void SaveState(const NDqProto::TCheckpoint& checkpoint, TSourceState& state) override;
+ void LoadState(const TSourceState& state) override;
+
+ ui64 GetInputIndex() const override;
+ const TDqAsyncStats& GetIngressStats() const override;
+
+ virtual TString GetSessionId() const {
+ return TString{"empty"};
+ }
+};
+
+} // namespace NYql::NDq
diff --git a/ydb/library/yql/providers/pq/async_io/ya.make b/ydb/library/yql/providers/pq/async_io/ya.make
index 46fec067e4a..5f35c84043a 100644
--- a/ydb/library/yql/providers/pq/async_io/ya.make
+++ b/ydb/library/yql/providers/pq/async_io/ya.make
@@ -2,16 +2,22 @@ LIBRARY()
SRCS(
dq_pq_meta_extractor.cpp
+ dq_pq_rd_read_actor.cpp
dq_pq_read_actor.cpp
+ dq_pq_read_actor_base.cpp
dq_pq_write_actor.cpp
probes.cpp
)
PEERDIR(
+ ydb/core/fq/libs/graph_params/proto
+ ydb/core/fq/libs/protos
+ ydb/core/fq/libs/row_dispatcher
ydb/library/actors/log_backend
ydb/library/yql/dq/actors/compute
ydb/library/yql/minikql/computation
ydb/library/yql/providers/common/token_accessor/client
+ ydb/library/yql/providers/dq/api/protos
ydb/library/yql/providers/pq/common
ydb/library/yql/providers/pq/proto
ydb/library/yql/public/types
diff --git a/ydb/library/yql/providers/pq/common/yql_names.h b/ydb/library/yql/providers/pq/common/yql_names.h
index 268944e9bbe..c4f6eeb3cc4 100644
--- a/ydb/library/yql/providers/pq/common/yql_names.h
+++ b/ydb/library/yql/providers/pq/common/yql_names.h
@@ -7,6 +7,8 @@ namespace NYql {
constexpr TStringBuf PartitionsCountProp = "PartitionsCount";
constexpr TStringBuf ConsumerSetting = "Consumer";
constexpr TStringBuf EndpointSetting = "Endpoint";
+constexpr TStringBuf SharedReading = "SharedReading";
+constexpr TStringBuf Format = "Format";
constexpr TStringBuf UseSslSetting = "UseSsl";
constexpr TStringBuf AddBearerToTokenSetting = "AddBearerToToken";
constexpr TStringBuf WatermarksEnableSetting = "WatermarksEnable";
diff --git a/ydb/library/yql/providers/pq/expr_nodes/yql_pq_expr_nodes.json b/ydb/library/yql/providers/pq/expr_nodes/yql_pq_expr_nodes.json
index e43a69ba175..8a8f172d307 100644
--- a/ydb/library/yql/providers/pq/expr_nodes/yql_pq_expr_nodes.json
+++ b/ydb/library/yql/providers/pq/expr_nodes/yql_pq_expr_nodes.json
@@ -70,7 +70,9 @@
{"Index": 0, "Name": "Topic", "Type": "TPqTopic"},
{"Index": 1, "Name": "Columns", "Type": "TExprBase"},
{"Index": 2, "Name": "Settings", "Type": "TCoNameValueTupleList"},
- {"Index": 3, "Name": "Token", "Type": "TCoSecureParam"}
+ {"Index": 3, "Name": "Token", "Type": "TCoSecureParam"},
+ {"Index": 4, "Name": "FilterPredicate", "Type": "TCoLambda"},
+ {"Index": 5, "Name": "ColumnTypes", "Type": "TExprBase"}
]
},
{
diff --git a/ydb/library/yql/providers/pq/proto/dq_io.proto b/ydb/library/yql/providers/pq/proto/dq_io.proto
index 09342088851..1f9a17b7178 100644
--- a/ydb/library/yql/providers/pq/proto/dq_io.proto
+++ b/ydb/library/yql/providers/pq/proto/dq_io.proto
@@ -33,6 +33,10 @@ message TDqPqTopicSource {
string DatabaseId = 9;
repeated string MetadataFields = 10;
TWatermarks Watermarks = 11;
+ repeated string Columns = 12;
+ repeated string ColumnTypes = 13;
+ string Predicate = 14;
+ bool SharedReading = 15;
}
message TDqPqTopicSink {
diff --git a/ydb/library/yql/providers/pq/provider/ya.make b/ydb/library/yql/providers/pq/provider/ya.make
index 5aee5e3863c..92186b68b75 100644
--- a/ydb/library/yql/providers/pq/provider/ya.make
+++ b/ydb/library/yql/providers/pq/provider/ya.make
@@ -24,29 +24,31 @@ PEERDIR(
library/cpp/random_provider
library/cpp/time_provider
ydb/library/yql/ast
- ydb/library/yql/minikql/comp_nodes
- ydb/library/yql/providers/common/db_id_async_resolver
- ydb/library/yql/providers/common/structured_token
- ydb/library/yql/public/udf
- ydb/public/sdk/cpp/client/ydb_driver
ydb/library/yql/core
ydb/library/yql/core/type_ann
ydb/library/yql/dq/expr_nodes
ydb/library/yql/dq/integration
ydb/library/yql/dq/opt
+ ydb/library/yql/minikql/comp_nodes
ydb/library/yql/providers/common/config
+ ydb/library/yql/providers/common/db_id_async_resolver
ydb/library/yql/providers/common/dq
ydb/library/yql/providers/common/proto
ydb/library/yql/providers/common/provider
+ ydb/library/yql/providers/common/pushdown
+ ydb/library/yql/providers/common/structured_token
ydb/library/yql/providers/common/transform
ydb/library/yql/providers/dq/common
ydb/library/yql/providers/dq/expr_nodes
ydb/library/yql/providers/dq/provider/exec
+ ydb/library/yql/providers/generic/provider
ydb/library/yql/providers/pq/cm_client
ydb/library/yql/providers/pq/common
ydb/library/yql/providers/pq/expr_nodes
ydb/library/yql/providers/pq/proto
ydb/library/yql/providers/result/expr_nodes
+ ydb/library/yql/public/udf
+ ydb/public/sdk/cpp/client/ydb_driver
)
YQL_LAST_ABI_VERSION()
diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp
index e9ad7d5baba..11d6194e51c 100644
--- a/ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp
+++ b/ydb/library/yql/providers/pq/provider/yql_pq_datasource_type_ann.cpp
@@ -5,6 +5,7 @@
#include <ydb/library/yql/providers/common/provider/yql_provider.h>
#include <ydb/library/yql/providers/common/provider/yql_provider_names.h>
+#include <ydb/library/yql/providers/common/pushdown/type_ann.h>
#include <ydb/library/yql/providers/pq/common/pq_meta_fields.h>
#include <ydb/library/yql/providers/common/provider/yql_data_provider_impl.h>
@@ -131,7 +132,7 @@ public:
}
TStatus HandleDqTopicSource(TExprBase input, TExprContext& ctx) {
- if (!EnsureArgsCount(input.Ref(), 4, ctx)) {
+ if (!EnsureArgsCount(input.Ref(), 6, ctx)) {
return TStatus::Error;
}
@@ -150,6 +151,13 @@ public:
return TStatus::Error;
}
+ auto rowSchema = topic.RowSpec().Ref().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
+
+ const TStatus filterAnnotationStatus = NYql::NPushdown::AnnotateFilterPredicate(input.Ptr(), TDqPqTopicSource::idx_FilterPredicate, rowSchema, ctx);
+ if (filterAnnotationStatus != TStatus::Ok) {
+ return filterAnnotationStatus;
+ }
+
if (topic.Metadata().Empty()) {
input.Ptr()->SetTypeAnn(ctx.MakeType<TStreamExprType>(ctx.MakeType<TDataExprType>(EDataSlot::String)));
return TStatus::Ok;
diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp
index f98c58d173d..bac0ba92fbc 100644
--- a/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp
+++ b/ydb/library/yql/providers/pq/provider/yql_pq_dq_integration.cpp
@@ -7,6 +7,8 @@
#include <ydb/library/yql/providers/common/dq/yql_dq_integration_impl.h>
#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h>
#include <ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h>
+#include <ydb/library/yql/providers/generic/connector/api/service/protos/connector.pb.h>
+#include <ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.h>
#include <ydb/library/yql/providers/pq/common/pq_meta_fields.h>
#include <ydb/library/yql/providers/pq/common/yql_names.h>
#include <ydb/library/yql/providers/pq/expr_nodes/yql_pq_expr_nodes.h>
@@ -85,6 +87,8 @@ public:
.Value(pqReadTopic.Format())
.Done());
+ auto format = pqReadTopic.Format().Ref().Content();
+
TVector<TCoNameValueTuple> innerSettings;
if (pqReadTopic.Compression() != "") {
innerSettings.push_back(Build<TCoNameValueTuple>(ctx, pqReadTopic.Pos())
@@ -119,24 +123,47 @@ public:
.Done());
const auto token = "cluster:default_" + clusterName;
- auto columns = pqReadTopic.Columns().Ptr();
- if (!columns->IsList()) {
- const auto pos = columns->Pos();
- const auto& items = rowType->GetItems();
- TExprNode::TListType cols;
- cols.reserve(items.size());
- std::transform(items.cbegin(), items.cend(), std::back_inserter(cols), [&](const TItemExprType* item) { return ctx.NewAtom(pos, item->GetName()); });
- columns = ctx.NewList(pos, std::move(cols));
- }
+
+ auto rowSchema = pqReadTopic.Topic().RowSpec().Ref().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
+ TExprNode::TListType colTypes;
+ const auto& typeItems = rowSchema->GetItems();
+ colTypes.reserve(typeItems.size());
+ const auto pos = read->Pos(); // TODO
+ std::transform(typeItems.cbegin(), typeItems.cend(), std::back_inserter(colTypes),
+ [&](const TItemExprType* item) {
+ return ctx.NewAtom(pos, FormatType(item->GetItemType()));
+ });
+ auto columnTypes = ctx.NewList(pos, std::move(colTypes));
+
+ TExprNode::TListType colNames;
+ colNames.reserve(typeItems.size());
+ std::transform(typeItems.cbegin(), typeItems.cend(), std::back_inserter(colNames),
+ [&](const TItemExprType* item) {
+ return ctx.NewAtom(pos, item->GetName());
+ });
+ auto columnNames = ctx.NewList(pos, std::move(colNames));
+
+ auto row = Build<TCoArgument>(ctx, read->Pos())
+ .Name("row")
+ .Done();
+ auto emptyPredicate = Build<TCoLambda>(ctx, read->Pos())
+ .Args({row})
+ .Body<TCoBool>()
+ .Literal().Build("true")
+ .Build()
+ .Done().Ptr();
+
return Build<TDqSourceWrap>(ctx, read->Pos())
.Input<TDqPqTopicSource>()
.Topic(pqReadTopic.Topic())
- .Columns(std::move(columns))
- .Settings(BuildTopicReadSettings(clusterName, dqSettings, read->Pos(), ctx))
+ .Columns(std::move(columnNames))
+ .Settings(BuildTopicReadSettings(clusterName, dqSettings, read->Pos(), format, ctx))
.Token<TCoSecureParam>()
.Name().Build(token)
.Build()
+ .FilterPredicate(emptyPredicate)
+ .ColumnTypes(std::move(columnTypes))
.Build()
.RowType(ExpandType(pqReadTopic.Pos(), *rowType, ctx))
.DataSource(pqReadTopic.DataSource().Cast<TCoDataSource>())
@@ -179,7 +206,7 @@ public:
}
}
- void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override {
+ void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t, TExprContext& ctx) override {
if (auto maybeDqSource = TMaybeNode<TDqSource>(&node)) {
auto settings = maybeDqSource.Cast().Settings();
if (auto maybeTopicSource = TMaybeNode<TDqPqTopicSource>(settings.Raw())) {
@@ -195,6 +222,8 @@ public:
srcDesc.SetClusterType(ToClusterType(clusterDesc->ClusterType));
srcDesc.SetDatabaseId(clusterDesc->DatabaseId);
+ bool sharedReading = false;
+ TString format;
size_t const settingsCount = topicSource.Settings().Size();
for (size_t i = 0; i < settingsCount; ++i) {
TCoNameValueTuple setting = topicSource.Settings().Item(i);
@@ -203,6 +232,10 @@ public:
srcDesc.SetConsumerName(TString(Value(setting)));
} else if (name == EndpointSetting) {
srcDesc.SetEndpoint(TString(Value(setting)));
+ } else if (name == SharedReading) {
+ sharedReading = FromString<bool>(Value(setting));
+ } else if (name == Format) {
+ format = TString(Value(setting));
} else if (name == UseSslSetting) {
srcDesc.SetUseSsl(FromString<bool>(Value(setting)));
} else if (name == AddBearerToTokenSetting) {
@@ -230,7 +263,33 @@ public:
srcDesc.AddMetadataFields(metadata.Value().Maybe<TCoAtom>().Cast().StringValue());
}
+ for (const auto& column : topicSource.Columns().Cast<TCoAtomList>()) {
+ srcDesc.AddColumns(column.StringValue());
+ }
+
+ for (const auto& columnTypes : topicSource.ColumnTypes().Cast<TCoAtomList>()) {
+ srcDesc.AddColumnTypes(columnTypes.StringValue());
+ }
+
+ NYql::NConnector::NApi::TPredicate predicateProto;
+ if (auto predicate = topicSource.FilterPredicate(); !NYql::IsEmptyFilterPredicate(predicate)) {
+ TStringBuilder err;
+ if (!NYql::SerializeFilterPredicate(predicate, &predicateProto, err)) {
+ ythrow yexception() << "Failed to serialize filter predicate for source: " << err;
+ }
+ }
+
+ //sharedReading = true;
+ sharedReading = sharedReading && (format == "json_each_row");
+ TString predicateSql = NYql::FormatWhere(predicateProto);
+ if (sharedReading) {
+ srcDesc.SetPredicate(predicateSql);
+ srcDesc.SetSharedReading(true);
+ }
protoSettings.PackFrom(srcDesc);
+ if (sharedReading && !predicateSql.empty()) {
+ ctx.AddWarning(TIssue(ctx.GetPosition(node.Pos()), "Row dispatcher will use the predicate: " + predicateSql));
+ }
sourceType = "PqSource";
}
}
@@ -278,6 +337,7 @@ public:
const TString& cluster,
const TDqSettings& dqSettings,
TPositionHandle pos,
+ std::string_view format,
TExprContext& ctx) const
{
TVector<TCoNameValueTuple> props;
@@ -295,6 +355,10 @@ public:
}
Add(props, EndpointSetting, clusterConfiguration->Endpoint, pos, ctx);
+ Add(props, SharedReading, ToString(clusterConfiguration->SharedReading), pos, ctx);
+ Add(props, Format, format, pos, ctx);
+
+
if (clusterConfiguration->UseSsl) {
Add(props, UseSslSetting, "1", pos, ctx);
}
diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp
index 8650ae9b2f8..92964948185 100644
--- a/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp
+++ b/ydb/library/yql/providers/pq/provider/yql_pq_logical_opt.cpp
@@ -10,13 +10,25 @@
#include <ydb/library/yql/providers/pq/common/pq_meta_fields.h>
#include <ydb/library/yql/providers/pq/expr_nodes/yql_pq_expr_nodes.h>
#include <ydb/library/yql/utils/log/log.h>
+#include <ydb/library/yql/utils/plan/plan_utils.h>
+#include <ydb/library/yql/providers/common/pushdown/collection.h>
+#include <ydb/library/yql/providers/common/pushdown/physical_opt.h>
+#include <ydb/library/yql/providers/common/pushdown/predicate_node.h>
namespace NYql {
using namespace NNodes;
namespace {
+ struct TPushdownSettings: public NPushdown::TSettings {
+ TPushdownSettings()
+ : NPushdown::TSettings(NLog::EComponent::ProviderGeneric)
+ {
+ using EFlag = NPushdown::TSettings::EFeatureFlag;
+ Enable(EFlag::ExpressionAsPredicate | EFlag::ArithmeticalExpressions | EFlag::ImplicitConversionToInt64 | EFlag::StringTypes | EFlag::LikeOperator);
+ }
+ };
std::unordered_set<TString> GetUsedMetadataFields(const TCoExtractMembers& extract) {
std::unordered_set<TString> usedMetadataFields;
@@ -123,6 +135,7 @@ public:
#define HNDL(name) "LogicalOptimizer-"#name, Hndl(&TPqLogicalOptProposalTransformer::name)
// AddHandler(0, &TCoExtractMembers::Match, HNDL(ExtractMembers));
AddHandler(0, &TCoExtractMembers::Match, HNDL(ExtractMembersOverDqWrap));
+ AddHandler(0, &TCoFlatMap::Match, HNDL(PushFilterToPqTopicSource));
#undef HNDL
}
@@ -200,6 +213,71 @@ public:
.Input(ctx.ReplaceNode(input.Ptr(), dqSourceWrap.Ref(), newDqSourceWrap))
.Done();
}
+
+ bool IsEmptyFilterPredicate(const TCoLambda& lambda) const {
+ auto maybeBool = lambda.Body().Maybe<TCoBool>();
+ if (!maybeBool) {
+ return false;
+ }
+ return TStringBuf(maybeBool.Cast().Literal()) == "true"sv;
+ }
+
+ TMaybeNode<TExprBase> PushFilterToPqTopicSource(TExprBase node, TExprContext& ctx) const {
+ auto flatmap = node.Cast<TCoFlatMap>();
+ auto maybeExtractMembers = flatmap.Input().Maybe<TCoExtractMembers>();
+
+ auto maybeDqSourceWrap =
+ maybeExtractMembers
+ ? maybeExtractMembers.Cast().Input().Maybe<TDqSourceWrap>()
+ : flatmap.Input().Maybe<TDqSourceWrap>();
+ ;
+ if (!maybeDqSourceWrap) {
+ return node;
+ }
+ TDqSourceWrap dqSourceWrap = maybeDqSourceWrap.Cast();
+ auto maybeDqPqTopicSource = dqSourceWrap.Input().Maybe<TDqPqTopicSource>();
+ if (!maybeDqPqTopicSource) {
+ return node;
+ }
+ TDqPqTopicSource dqPqTopicSource = maybeDqPqTopicSource.Cast();
+ if (!IsEmptyFilterPredicate(dqPqTopicSource.FilterPredicate())) {
+ YQL_CLOG(TRACE, ProviderPq) << "Push filter. Lambda is already not empty";
+ return node;
+ }
+
+ auto newFilterLambda = MakePushdownPredicate(flatmap.Lambda(), ctx, node.Pos(), TPushdownSettings());
+ if (!newFilterLambda) {
+ ctx.AddWarning(TIssue(ctx.GetPosition(node.Pos()), "No predicate to pushdown"));
+ return node;
+ }
+ YQL_CLOG(INFO, ProviderPq) << "Build new TCoFlatMap with predicate";
+
+ if (maybeExtractMembers) {
+ return Build<TCoFlatMap>(ctx, flatmap.Pos())
+ .InitFrom(flatmap)
+ .Input<TCoExtractMembers>()
+ .InitFrom(maybeExtractMembers.Cast())
+ .Input<TDqSourceWrap>()
+ .InitFrom(dqSourceWrap)
+ .Input<TDqPqTopicSource>()
+ .InitFrom(dqPqTopicSource)
+ .FilterPredicate(newFilterLambda.Cast())
+ .Build()
+ .Build()
+ .Build()
+ .Done();
+ }
+ return Build<TCoFlatMap>(ctx, flatmap.Pos())
+ .InitFrom(flatmap)
+ .Input<TDqSourceWrap>()
+ .InitFrom(dqSourceWrap)
+ .Input<TDqPqTopicSource>()
+ .InitFrom(dqPqTopicSource)
+ .FilterPredicate(newFilterLambda.Cast())
+ .Build()
+ .Build()
+ .Done();
+ }
private:
TPqState::TPtr State_;
diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_settings.cpp b/ydb/library/yql/providers/pq/provider/yql_pq_settings.cpp
index c424fa9d0e9..5b97002b9ad 100644
--- a/ydb/library/yql/providers/pq/provider/yql_pq_settings.cpp
+++ b/ydb/library/yql/providers/pq/provider/yql_pq_settings.cpp
@@ -42,6 +42,7 @@ void TPqConfiguration::Init(
clusterSettings.TvmId = cluster.GetTvmId();
clusterSettings.UseSsl = cluster.GetUseSsl();
clusterSettings.AddBearerToToken = cluster.GetAddBearerToToken();
+ clusterSettings.SharedReading = cluster.GetSharedReading();
const TString authToken = typeCtx->Credentials->FindCredentialContent("cluster:default_" + clusterSettings.ClusterName, "default_pq", cluster.GetToken());
clusterSettings.AuthToken = authToken;
diff --git a/ydb/library/yql/providers/pq/provider/yql_pq_settings.h b/ydb/library/yql/providers/pq/provider/yql_pq_settings.h
index a720506ef12..672effd42fc 100644
--- a/ydb/library/yql/providers/pq/provider/yql_pq_settings.h
+++ b/ydb/library/yql/providers/pq/provider/yql_pq_settings.h
@@ -29,6 +29,7 @@ struct TPqClusterConfigurationSettings {
ui32 TvmId = 0;
TString AuthToken;
bool AddBearerToToken = false;
+ bool SharedReading = false;
};
struct TPqConfiguration : public TPqSettings, public NCommon::TSettingDispatcher {
diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp
index 5b223ec6921..8015c0f416f 100644
--- a/ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp
+++ b/ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp
@@ -8,7 +8,7 @@
#include <ydb/library/actors/core/log.h>
#include <ydb/library/services/services.pb.h>
-#include <ydb/library/yql/dq/actors/compute/retry_queue.h>
+#include <ydb/library/yql/dq/actors/common/retry_queue.h>
#include <ydb/library/yql/minikql/mkql_string_util.h>
#include <ydb/library/yql/providers/s3/common/util.h>
diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
index c386da7affa..f01d76faf59 100644
--- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
+++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp
@@ -48,7 +48,7 @@
#include <ydb/library/services/services.pb.h>
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
-#include <ydb/library/yql/dq/actors/compute/retry_queue.h>
+#include <ydb/library/yql/dq/actors/common/retry_queue.h>
#include <ydb/library/yql/minikql/mkql_string_util.h>
#include <ydb/library/yql/minikql/computation/mkql_computation_node_impl.h>
#include <ydb/library/yql/minikql/mkql_program_builder.h>
diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_source_queue.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_source_queue.cpp
index 5adab401133..7c0bf3a692f 100644
--- a/ydb/library/yql/providers/s3/actors/yql_s3_source_queue.cpp
+++ b/ydb/library/yql/providers/s3/actors/yql_s3_source_queue.cpp
@@ -45,7 +45,7 @@
#include <ydb/library/services/services.pb.h>
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
-#include <ydb/library/yql/dq/actors/compute/retry_queue.h>
+#include <ydb/library/yql/dq/actors/common/retry_queue.h>
#include <ydb/library/yql/minikql/mkql_string_util.h>
#include <ydb/library/yql/minikql/computation/mkql_computation_node_impl.h>
#include <ydb/library/yql/minikql/mkql_program_builder.h>
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp
index 39f984ab5a6..9cc5b024c6e 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp
@@ -9,7 +9,6 @@
#include <ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h>
#include <ydb/library/yql/providers/generic/connector/api/service/protos/connector.pb.h>
#include <ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.h>
-#include <ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.h>
#include <ydb/library/yql/providers/s3/actors/yql_s3_read_actor.h>
#include <ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.h>
#include <ydb/library/yql/providers/s3/proto/range.pb.h>
@@ -358,7 +357,7 @@ public:
return read;
}
- void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t maxPartitions) override {
+ void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t maxPartitions, TExprContext&) override {
const TDqSource source(&node);
if (const auto maySettings = source.Settings().Maybe<TS3SourceSettingsBase>()) {
const auto settings = maySettings.Cast();
diff --git a/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp b/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp
index 29a65f6377f..7cf47fa189b 100644
--- a/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp
+++ b/ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp
@@ -227,7 +227,7 @@ public:
return TSoWrite::Match(&write);
}
- void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override {
+ void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t, TExprContext&) override {
const TDqSource dqSource(&node);
const auto maybeSettings = dqSource.Settings().Maybe<TSoSourceSettings>();
if (!maybeSettings) {
diff --git a/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp b/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp
index 28d4aebde0f..7ff7fc1c5cc 100644
--- a/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp
+++ b/ydb/library/yql/providers/ydb/provider/yql_ydb_dq_integration.cpp
@@ -114,7 +114,7 @@ public:
return read;
}
- void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t) override {
+ void FillSourceSettings(const TExprNode& node, ::google::protobuf::Any& protoSettings, TString& sourceType, size_t, TExprContext&) override {
const TDqSource source(&node);
if (const auto maySettings = source.Settings().Maybe<TYdbSourceSettings>()) {
const auto settings = maySettings.Cast();
diff --git a/ydb/tests/fq/pq_async_io/ut/dq_pq_rd_read_actor_ut.cpp b/ydb/tests/fq/pq_async_io/ut/dq_pq_rd_read_actor_ut.cpp
new file mode 100644
index 00000000000..350c0bd5b40
--- /dev/null
+++ b/ydb/tests/fq/pq_async_io/ut/dq_pq_rd_read_actor_ut.cpp
@@ -0,0 +1,359 @@
+#include <ydb/tests/fq/pq_async_io/ut_helpers.h>
+
+#include <ydb/library/yql/utils/yql_panic.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <ydb/core/fq/libs/row_dispatcher/events/data_plane.h>
+
+#include <thread>
+
+namespace NYql::NDq {
+
+const ui64 PartitionId = 666;
+
+struct TFixture : public TPqIoTestFixture {
+
+ TFixture() {
+ LocalRowDispatcherId = CaSetup->Runtime->AllocateEdgeActor();
+ Coordinator1Id = CaSetup->Runtime->AllocateEdgeActor();
+ Coordinator2Id = CaSetup->Runtime->AllocateEdgeActor();
+ RowDispatcher1 = CaSetup->Runtime->AllocateEdgeActor();
+ RowDispatcher2 = CaSetup->Runtime->AllocateEdgeActor();
+ }
+
+ void InitRdSource(
+ NYql::NPq::NProto::TDqPqTopicSource&& settings,
+ i64 freeSpace = 1_MB)
+ {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ NPq::NProto::TDqReadTaskParams params;
+ auto* partitioninigParams = params.MutablePartitioningParams();
+ partitioninigParams->SetTopicPartitionsCount(1);
+ partitioninigParams->SetEachTopicPartitionGroupId(PartitionId);
+ partitioninigParams->SetDqPartitionsCount(1);
+
+ TString serializedParams;
+ Y_PROTOBUF_SUPPRESS_NODISCARD params.SerializeToString(&serializedParams);
+
+ const THashMap<TString, TString> secureParams;
+ const THashMap<TString, TString> taskParams { {"pq", serializedParams} };
+
+ auto [dqSource, dqSourceAsActor] = CreateDqPqRdReadActor(
+ std::move(settings),
+ 0,
+ NYql::NDq::TCollectStatsLevel::None,
+ "query_1",
+ 0,
+ secureParams,
+ taskParams,
+ actor.SelfId(), // computeActorId
+ LocalRowDispatcherId,
+ actor.GetHolderFactory(),
+ MakeIntrusive<NMonitoring::TDynamicCounters>(),
+ freeSpace);
+
+ actor.InitAsyncInput(dqSource, dqSourceAsActor);
+ });
+ }
+
+ void ExpectCoordinatorChangesSubscribe() {
+ auto eventHolder = CaSetup->Runtime->GrabEdgeEvent<NFq::TEvRowDispatcher::TEvCoordinatorChangesSubscribe>(LocalRowDispatcherId, TDuration::Seconds(5));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ }
+
+ auto ExpectCoordinatorRequest(NActors::TActorId coordinatorId) {
+ auto eventHolder = CaSetup->Runtime->GrabEdgeEvent<NFq::TEvRowDispatcher::TEvCoordinatorRequest>(coordinatorId, TDuration::Seconds(5));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ return eventHolder.Get();
+ }
+
+ void ExpectStartSession(ui64 expectedOffset, NActors::TActorId rowDispatcherId) {
+ auto eventHolder = CaSetup->Runtime->GrabEdgeEvent<NFq::TEvRowDispatcher::TEvStartSession>(rowDispatcherId, TDuration::Seconds(5));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->Record.GetOffset() == expectedOffset);
+ }
+
+ void ExpectStopSession(NActors::TActorId rowDispatcherId) {
+ auto eventHolder = CaSetup->Runtime->GrabEdgeEvent<NFq::TEvRowDispatcher::TEvStopSession>(rowDispatcherId, TDuration::Seconds(5));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ }
+
+ void ExpectGetNextBatch(NActors::TActorId rowDispatcherId) {
+ auto eventHolder = CaSetup->Runtime->GrabEdgeEvent<NFq::TEvRowDispatcher::TEvGetNextBatch>(rowDispatcherId, TDuration::Seconds(5));
+ UNIT_ASSERT(eventHolder.Get() != nullptr);
+ UNIT_ASSERT(eventHolder->Get()->Record.GetPartitionId() == PartitionId);
+ }
+
+ void MockCoordinatorChanged(NActors::TActorId coordinatorId) {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ auto event = new NFq::TEvRowDispatcher::TEvCoordinatorChanged(coordinatorId);
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, LocalRowDispatcherId, event));
+ });
+ }
+
+ void MockCoordinatorResult(NActors::TActorId rowDispatcherId, ui64 cookie = 0) {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ auto event = new NFq::TEvRowDispatcher::TEvCoordinatorResult();
+ auto* partitions = event->Record.AddPartitions();
+ partitions->AddPartitionId(PartitionId);
+ ActorIdToProto(rowDispatcherId, partitions->MutableActorId());
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, Coordinator1Id, event, 0, cookie));
+ });
+ }
+
+ void MockAck(NActors::TActorId rowDispatcherId) {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ NFq::NRowDispatcherProto::TEvStartSession proto;
+ proto.SetPartitionId(PartitionId);
+ auto event = new NFq::TEvRowDispatcher::TEvStartSessionAck(proto);
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, rowDispatcherId, event));
+ });
+ }
+
+ void MockNewDataArrived(NActors::TActorId rowDispatcherId) {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ auto event = new NFq::TEvRowDispatcher::TEvNewDataArrived();
+ event->Record.SetPartitionId(PartitionId);
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, rowDispatcherId, event));
+ });
+ }
+
+ void MockMessageBatch(ui64 offset, const std::vector<TString>& jsons, NActors::TActorId rowDispatcherId) {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ auto event = new NFq::TEvRowDispatcher::TEvMessageBatch();
+ for (const auto& json :jsons) {
+ NFq::NRowDispatcherProto::TEvMessage message;
+ message.SetJson(json);
+ message.SetOffset(offset++);
+ *event->Record.AddMessages() = message;
+ }
+ event->Record.SetPartitionId(PartitionId);
+ event->Record.SetNextMessageOffset(offset);
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, rowDispatcherId, event));
+ });
+ }
+
+ void MockSessionError() {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ auto event = new NFq::TEvRowDispatcher::TEvSessionError();
+ event->Record.SetMessage("A problem has been detected and session has been shut down to prevent damage your life");
+ event->Record.SetPartitionId(PartitionId);
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, RowDispatcher1, event));
+ });
+ }
+
+ template<typename T>
+ void AssertDataWithWatermarks(
+ const std::vector<std::variant<T, TInstant>>& actual,
+ const std::vector<T>& expected,
+ const std::vector<ui32>& watermarkBeforePositions)
+ {
+ auto expectedPos = 0U;
+ auto watermarksBeforeIter = watermarkBeforePositions.begin();
+
+ for (auto item : actual) {
+ if (std::holds_alternative<TInstant>(item)) {
+ if (watermarksBeforeIter != watermarkBeforePositions.end()) {
+ watermarksBeforeIter++;
+ }
+ continue;
+ } else {
+ UNIT_ASSERT_C(expectedPos < expected.size(), "Too many data items");
+ UNIT_ASSERT_C(
+ watermarksBeforeIter == watermarkBeforePositions.end() ||
+ *watermarksBeforeIter > expectedPos,
+ "Watermark before item on position " << expectedPos << " was expected");
+ UNIT_ASSERT_EQUAL(std::get<T>(item), expected.at(expectedPos));
+ expectedPos++;
+ }
+ }
+ }
+
+ void MockDisconnected() {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ auto event = new NActors::TEvInterconnect::TEvNodeDisconnected(CaSetup->Runtime->GetNodeId(0));
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, RowDispatcher1, event));
+ });
+ }
+
+ void MockConnected() {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ auto event = new NActors::TEvInterconnect::TEvNodeConnected(CaSetup->Runtime->GetNodeId(0));
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, RowDispatcher1, event));
+ });
+ }
+
+ void MockUndelivered() {
+ CaSetup->Execute([&](TFakeActor& actor) {
+ auto event = new NActors::TEvents::TEvUndelivered(0, NActors::TEvents::TEvUndelivered::ReasonActorUnknown);
+ CaSetup->Runtime->Send(new NActors::IEventHandle(*actor.DqAsyncInputActorId, RowDispatcher1, event));
+ });
+ }
+
+
+ void StartSession() {
+ InitRdSource(BuildPqTopicSourceSettings("topicName"));
+ SourceRead<TString>(UVParser);
+ ExpectCoordinatorChangesSubscribe();
+
+ MockCoordinatorChanged(Coordinator1Id);
+ auto req =ExpectCoordinatorRequest(Coordinator1Id);
+
+ MockCoordinatorResult(RowDispatcher1, req->Cookie);
+ ExpectStartSession(0, RowDispatcher1);
+ MockAck(RowDispatcher1);
+ }
+
+ void ProcessSomeJsons(ui64 offset, const std::vector<TString>& jsons, NActors::TActorId rowDispatcherId) {
+ MockNewDataArrived(rowDispatcherId);
+ ExpectGetNextBatch(rowDispatcherId);
+
+ MockMessageBatch(offset, jsons, rowDispatcherId);
+
+ auto result = SourceReadDataUntil<TString>(UVParser, jsons.size());
+ AssertDataWithWatermarks(result, jsons, {});
+ }
+
+ const TString Json1 = "{\"dt\":100,\"value\":\"value1\"}";
+ const TString Json2 = "{\"dt\":200,\"value\":\"value2\"}";
+ const TString Json3 = "{\"dt\":300,\"value\":\"value3\"}";
+ const TString Json4 = "{\"dt\":400,\"value\":\"value4\"}";
+
+ NActors::TActorId LocalRowDispatcherId;
+ NActors::TActorId Coordinator1Id;
+ NActors::TActorId Coordinator2Id;
+ NActors::TActorId RowDispatcher1;
+ NActors::TActorId RowDispatcher2;
+};
+
+Y_UNIT_TEST_SUITE(TDqPqRdReadActorTests) {
+ Y_UNIT_TEST_F(TestReadFromTopic, TFixture) {
+ StartSession();
+ ProcessSomeJsons(0, {Json1, Json2}, RowDispatcher1);
+ }
+
+ Y_UNIT_TEST_F(SessionError, TFixture) {
+ StartSession();
+
+ TInstant deadline = Now() + TDuration::Seconds(5);
+ auto future = CaSetup->AsyncInputPromises.FatalError.GetFuture();
+ MockSessionError();
+
+ bool failured = false;
+ while (Now() < deadline) {
+ SourceRead<TString>(UVParser);
+ if (future.HasValue()) {
+ UNIT_ASSERT_STRING_CONTAINS(future.GetValue().ToOneLineString(), "damage your life");
+ failured = true;
+ break;
+ }
+ }
+ UNIT_ASSERT_C(failured, "Failure timeout");
+ }
+
+ Y_UNIT_TEST_F(ReadWithFreeSpace, TFixture) {
+ StartSession();
+
+ MockNewDataArrived(RowDispatcher1);
+ ExpectGetNextBatch(RowDispatcher1);
+
+ const std::vector<TString> data1 = {Json1, Json2};
+ MockMessageBatch(0, data1, RowDispatcher1);
+
+ const std::vector<TString> data2 = {Json3, Json4};
+ MockMessageBatch(2, data2, RowDispatcher1);
+
+ auto result = SourceReadDataUntil<TString>(UVParser, 1, 1);
+ std::vector<TString> expected{data1};
+ AssertDataWithWatermarks(result, expected, {});
+
+ UNIT_ASSERT_EQUAL(SourceRead<TString>(UVParser, 0).size(), 0);
+ }
+
+ Y_UNIT_TEST(TestSaveLoadPqRdRead) {
+ TSourceState state;
+
+ {
+ TFixture f;
+ f.StartSession();
+ f.ProcessSomeJsons(0, {f.Json1, f.Json2}, f.RowDispatcher1); // offsets: 0, 1
+
+ f.SaveSourceState(CreateCheckpoint(), state);
+ Cerr << "State saved" << Endl;
+ }
+ {
+ TFixture f;
+ f.InitRdSource(BuildPqTopicSourceSettings("topicName"));
+ f.SourceRead<TString>(UVParser);
+ f.LoadSource(state);
+ f.SourceRead<TString>(UVParser);
+ f.ExpectCoordinatorChangesSubscribe();
+
+ f.MockCoordinatorChanged(f.Coordinator1Id);
+ auto req = f.ExpectCoordinatorRequest(f.Coordinator1Id);
+
+ f.MockCoordinatorResult(f.RowDispatcher1, req->Cookie);
+ f.ExpectStartSession(2, f.RowDispatcher1);
+ f.MockAck(f.RowDispatcher1);
+
+ f.ProcessSomeJsons(2, {f.Json3}, f.RowDispatcher1); // offsets: 2
+ state.Data.clear();
+ f.SaveSourceState(CreateCheckpoint(), state);
+ Cerr << "State saved" << Endl;
+ }
+ {
+ TFixture f;
+ f.InitRdSource(BuildPqTopicSourceSettings("topicName"));
+ f.SourceRead<TString>(UVParser);
+ f.LoadSource(state);
+ f.SourceRead<TString>(UVParser);
+ f.ExpectCoordinatorChangesSubscribe();
+
+ f.MockCoordinatorChanged(f.Coordinator1Id);
+ auto req = f.ExpectCoordinatorRequest(f.Coordinator1Id);
+
+ f.MockCoordinatorResult(f.RowDispatcher1, req->Cookie);
+ f.ExpectStartSession(3, f.RowDispatcher1);
+ f.MockAck(f.RowDispatcher1);
+
+ f.ProcessSomeJsons(3, {f.Json4}, f.RowDispatcher1); // offsets: 3
+ }
+ }
+
+ Y_UNIT_TEST_F(CoordinatorChanged, TFixture) {
+ StartSession();
+ ProcessSomeJsons(0, {Json1, Json2}, RowDispatcher1);
+ MockMessageBatch(2, {Json3}, RowDispatcher1);
+
+ // change active Coordinator
+ MockCoordinatorChanged(Coordinator2Id);
+ ExpectStopSession(RowDispatcher1);
+
+ auto result = SourceReadDataUntil<TString>(UVParser, 1);
+ AssertDataWithWatermarks(result, {Json3}, {});
+
+ auto req = ExpectCoordinatorRequest(Coordinator2Id);
+ MockCoordinatorResult(RowDispatcher2, req->Cookie);
+
+ ExpectStartSession(3, RowDispatcher2);
+ MockAck(RowDispatcher2);
+
+ ProcessSomeJsons(3, {Json4}, RowDispatcher2);
+ }
+
+ Y_UNIT_TEST_F(RowDispatcherIsRestarted, TFixture) {
+ StartSession();
+ ProcessSomeJsons(0, {Json1, Json2}, RowDispatcher1);
+ MockDisconnected();
+ MockConnected();
+ MockUndelivered();
+
+ auto req = ExpectCoordinatorRequest(Coordinator1Id);
+ MockCoordinatorResult(RowDispatcher1, req->Cookie);
+ ExpectStartSession(2, RowDispatcher1);
+ MockAck(RowDispatcher1);
+
+ ProcessSomeJsons(2, {Json3}, RowDispatcher1);
+ }
+}
+} // NYql::NDq
diff --git a/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp b/ydb/tests/fq/pq_async_io/ut/dq_pq_read_actor_ut.cpp
index fbc1cfd6104..37438882c23 100644
--- a/ydb/tests/fq/pq_async_io/dq_pq_read_actor_ut.cpp
+++ b/ydb/tests/fq/pq_async_io/ut/dq_pq_read_actor_ut.cpp
@@ -1,4 +1,4 @@
-#include "ut_helpers.h"
+#include <ydb/tests/fq/pq_async_io/ut_helpers.h>
#include <ydb/library/yql/utils/yql_panic.h>
diff --git a/ydb/tests/fq/pq_async_io/dq_pq_write_actor_ut.cpp b/ydb/tests/fq/pq_async_io/ut/dq_pq_write_actor_ut.cpp
index 8bb3be26c27..9094c7b9f58 100644
--- a/ydb/tests/fq/pq_async_io/dq_pq_write_actor_ut.cpp
+++ b/ydb/tests/fq/pq_async_io/ut/dq_pq_write_actor_ut.cpp
@@ -1,4 +1,4 @@
-#include "ut_helpers.h"
+#include <ydb/tests/fq/pq_async_io/ut_helpers.h>
#include <ydb/library/yql/utils/yql_panic.h>
diff --git a/ydb/tests/fq/pq_async_io/ut/ya.make b/ydb/tests/fq/pq_async_io/ut/ya.make
new file mode 100644
index 00000000000..82f2450a647
--- /dev/null
+++ b/ydb/tests/fq/pq_async_io/ut/ya.make
@@ -0,0 +1,28 @@
+UNITTEST_FOR(ydb/library/yql/providers/pq/async_io)
+
+SIZE(MEDIUM)
+
+INCLUDE(${ARCADIA_ROOT}/ydb/tests/tools/fq_runner/ydb_runner_with_datastreams.inc)
+
+SRCS(
+ dq_pq_rd_read_actor_ut.cpp
+ dq_pq_read_actor_ut.cpp
+ dq_pq_write_actor_ut.cpp
+)
+
+PEERDIR(
+ ydb/core/testlib/basics/default
+ ydb/library/yql/minikql/comp_nodes/llvm14
+ ydb/library/yql/minikql/computation/llvm14
+ ydb/library/yql/providers/common/comp_nodes
+ ydb/library/yql/providers/common/ut_helpers
+ ydb/library/yql/public/udf/service/exception_policy
+ ydb/library/yql/sql
+ ydb/public/sdk/cpp/client/ydb_datastreams
+ ydb/public/sdk/cpp/client/ydb_persqueue_public
+ ydb/tests/fq/pq_async_io
+)
+
+YQL_LAST_ABI_VERSION()
+
+END()
diff --git a/ydb/tests/fq/pq_async_io/ut_helpers.h b/ydb/tests/fq/pq_async_io/ut_helpers.h
index 96fd267ecf6..6e9f92007d2 100644
--- a/ydb/tests/fq/pq_async_io/ut_helpers.h
+++ b/ydb/tests/fq/pq_async_io/ut_helpers.h
@@ -2,6 +2,7 @@
#include <ydb/library/yql/providers/common/ut_helpers/dq_fake_ca.h>
#include <ydb/library/yql/providers/pq/async_io/dq_pq_read_actor.h>
+#include <ydb/library/yql/providers/pq/async_io/dq_pq_rd_read_actor.h>
#include <ydb/library/yql/providers/pq/async_io/dq_pq_write_actor.h>
#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h>
diff --git a/ydb/tests/fq/pq_async_io/ya.make b/ydb/tests/fq/pq_async_io/ya.make
index d8a793a0a04..c27e93ce4ce 100644
--- a/ydb/tests/fq/pq_async_io/ya.make
+++ b/ydb/tests/fq/pq_async_io/ya.make
@@ -1,25 +1,14 @@
-UNITTEST_FOR(ydb/library/yql/providers/pq/async_io)
-
-SIZE(MEDIUM)
-
-INCLUDE(${ARCADIA_ROOT}/ydb/tests/tools/fq_runner/ydb_runner_with_datastreams.inc)
+LIBRARY()
SRCS(
- dq_pq_read_actor_ut.cpp
- dq_pq_write_actor_ut.cpp
ut_helpers.cpp
)
PEERDIR(
- ydb/core/testlib/basics/default
ydb/library/yql/minikql/computation/llvm14
- ydb/library/yql/public/udf/service/exception_policy
- ydb/library/yql/providers/common/comp_nodes
ydb/library/yql/providers/common/ut_helpers
- ydb/library/yql/sql
ydb/public/sdk/cpp/client/ydb_datastreams
ydb/public/sdk/cpp/client/ydb_persqueue_public
- ydb/library/yql/minikql/comp_nodes/llvm14
)
YQL_LAST_ABI_VERSION()
diff --git a/ydb/tests/fq/yds/test_row_dispatcher.py b/ydb/tests/fq/yds/test_row_dispatcher.py
new file mode 100644
index 00000000000..7cafe48661c
--- /dev/null
+++ b/ydb/tests/fq/yds/test_row_dispatcher.py
@@ -0,0 +1,681 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import pytest
+import logging
+import time
+
+from ydb.tests.tools.fq_runner.kikimr_utils import yq_v1
+from ydb.tests.tools.datastreams_helpers.test_yds_base import TestYdsBase
+
+from ydb.tests.tools.fq_runner.kikimr_runner import StreamingOverKikimr
+from ydb.tests.tools.fq_runner.kikimr_runner import StreamingOverKikimrConfig
+from ydb.tests.tools.fq_runner.kikimr_runner import TenantConfig
+
+from ydb.tests.tools.datastreams_helpers.control_plane import list_read_rules
+from ydb.tests.tools.datastreams_helpers.control_plane import create_stream, create_read_rule
+from ydb.tests.tools.datastreams_helpers.data_plane import read_stream, write_stream
+from ydb.tests.tools.fq_runner.fq_client import StreamingDisposition
+
+import ydb.public.api.protos.draft.fq_pb2 as fq
+
+YDS_CONNECTION = "yds"
+
+
+@pytest.fixture
+def kikimr(request):
+ kikimr_conf = StreamingOverKikimrConfig(
+ cloud_mode=True, node_count={"/cp": TenantConfig(1), "/compute": TenantConfig(2)}
+ )
+ kikimr = StreamingOverKikimr(kikimr_conf)
+ kikimr.compute_plane.fq_config['row_dispatcher']['enabled'] = True
+ kikimr.compute_plane.fq_config['row_dispatcher']['without_consumer'] = True
+ kikimr.start_mvp_mock_server()
+ kikimr.start()
+ yield kikimr
+ kikimr.stop_mvp_mock_server()
+ kikimr.stop()
+
+
+def start_yds_query(kikimr, client, sql) -> str:
+ query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.STREAMING).result.query_id
+ client.wait_query_status(query_id, fq.QueryMeta.RUNNING)
+ kikimr.compute_plane.wait_zero_checkpoint(query_id)
+ return query_id
+
+
+def stop_yds_query(client, query_id):
+ client.abort_query(query_id)
+ client.wait_query(query_id)
+
+
+def wait_actor_count(kikimr, activity, expected_count):
+ deadline = time.time() + 60
+ while True:
+ count = 0
+ for node_index in kikimr.compute_plane.kikimr_cluster.nodes:
+ count = count + kikimr.compute_plane.get_actor_count(node_index, activity)
+ if count == expected_count:
+ break
+ assert time.time() < deadline, f"Waiting actor {activity} count failed, current count {count}"
+ time.sleep(1)
+ pass
+
+
+def wait_row_dispatcher_sensor_value(kikimr, sensor, expected_count, exact_match=True):
+ deadline = time.time() + 60
+ while True:
+ count = 0
+ for node_index in kikimr.compute_plane.kikimr_cluster.nodes:
+ value = kikimr.compute_plane.get_sensors(node_index, "yq").find_sensor(
+ {"subsystem": "row_dispatcher", "sensor": sensor})
+ count += value if value is not None else 0
+ if count == expected_count:
+ break
+ if not exact_match and count > expected_count:
+ break
+ assert time.time() < deadline, f"Waiting sensor {sensor} value failed, current count {count}"
+ time.sleep(1)
+ pass
+
+
+class TestPqRowDispatcher(TestYdsBase):
+
+ @yq_v1
+ def test_read_raw_format_without_row_dispatcher(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_read_raw_format_without_row_dispatcher", create_output=False)
+
+ output_topic = "pq_test_pq_read_write_output"
+
+ create_stream(output_topic, partitions_count=1)
+ create_read_rule(output_topic, self.consumer_name)
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic}`
+ SELECT * FROM {YDS_CONNECTION}.`{self.input_topic}`;'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ data = ['{"time" = 101;}', '{"time" = 102;}']
+
+ self.write_stream(data)
+ expected = data
+ assert self.read_stream(len(expected), topic_path=output_topic) == expected
+
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+ stop_yds_query(client, query_id)
+
+ @yq_v1
+ def test_simple_not_null(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_simple_not_null")
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{self.output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL, data String NOT NULL));'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+ time.sleep(10)
+
+ data = [
+ '{"time": 101, "data": "hello1", "event": "event1"}',
+ '{"time": 102, "data": "hello2", "event": "event2"}',
+ '{"time": 103, "data": "hello3", "event": "event3"}',
+ ]
+
+ self.write_stream(data)
+ expected = ['101', '102', '103']
+ assert self.read_stream(len(expected), topic_path=self.output_topic) == expected
+
+ wait_actor_count(kikimr, "DQ_PQ_READ_ACTOR", 1)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ stop_yds_query(client, query_id)
+ # Assert that all read rules were removed after query stops
+ read_rules = list_read_rules(self.input_topic)
+ assert len(read_rules) == 0, read_rules
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ @pytest.mark.skip(reason="Is not implemented")
+ def test_simple_optional(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_simple_optional")
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{self.output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL, data String));'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ data = ['{"time": 101, "data": "hello1", "event": "event1"}', '{"time": 102, "event": "event2"}']
+
+ self.write_stream(data)
+ expected = ['101', '102']
+ assert self.read_stream(len(expected), topic_path=self.output_topic) == expected
+
+ wait_actor_count(kikimr, "DQ_PQ_READ_ACTOR", 1)
+
+ stop_yds_query(client, query_id)
+ # Assert that all read rules were removed after query stops
+ read_rules = list_read_rules(self.input_topic)
+ assert len(read_rules) == 0, read_rules
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ def test_scheme_error(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_scheme_error")
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{self.output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL, data String NOT NULL));'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ data = ['{"this": "is", not json}', '{"noch einmal / nicht json"}']
+ self.write_stream(data)
+
+ client.wait_query_status(query_id, fq.QueryMeta.FAILED)
+ issues = str(client.describe_query(query_id).result.query.issue)
+ assert "Failed to unwrap empty optional" in issues, "Incorrect Issues: " + issues
+
+ wait_actor_count(kikimr, "DQ_PQ_READ_ACTOR", 0)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+ data = ['{"time": 101, "data": "hello1", "event": "event1"}']
+ self.write_stream(data)
+ expected = ['101']
+ assert self.read_stream(len(expected), topic_path=self.output_topic) == expected
+ stop_yds_query(client, query_id)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ def test_filter(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_filter")
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{self.output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time UInt64 NOT NULL, data String NOT NULL, event String NOT NULL))
+ WHERE time > 101UL or event = "event666";'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ data = [
+ '{"time": 101, "data": "hello1", "event": "event1"}',
+ '{"time": 102, "data": "hello2", "event": "event2"}',
+ ]
+
+ self.write_stream(data)
+ expected = ['102']
+ assert self.read_stream(len(expected), topic_path=self.output_topic) == expected
+
+ wait_actor_count(kikimr, "DQ_PQ_READ_ACTOR", 1)
+
+ stop_yds_query(client, query_id)
+ # Assert that all read rules were removed after query stops
+ read_rules = list_read_rules(self.input_topic)
+ assert len(read_rules) == 0, read_rules
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ issues = str(client.describe_query(query_id).result.query.transient_issue)
+ assert "Row dispatcher will use the predicate: WHERE (`time` > 101" in issues, "Incorrect Issues: " + issues
+
+ @yq_v1
+ def test_filter_with_mr(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_filter_with_mr")
+
+ sql = Rf'''
+ pragma FeatureR010="prototype";
+ pragma config.flags("TimeOrderRecoverDelay", "-10");
+ pragma config.flags("TimeOrderRecoverAhead", "10");
+
+ $data =
+ SELECT * FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time UInt64 NOT NULL, event_class String NOT NULL, event_type UInt64 NOT NULL))
+ WHERE event_class = "event_class2";
+
+ $match =
+ SELECT * FROM $data
+ MATCH_RECOGNIZE(
+ ORDER BY CAST(time as Timestamp)
+ MEASURES
+ LAST(M1.event_type) as event_type
+ ONE ROW PER MATCH
+ PATTERN ( M1 )
+ DEFINE
+ M1 as
+ M1.event_class = "event_class2"
+ );
+
+ INSERT INTO {YDS_CONNECTION}.`{self.output_topic}`
+ SELECT ToBytes(Unwrap(Json::SerializeJson(Yson::From(TableRow())))) FROM $match;
+ '''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ data = [
+ '{"time": 100, "event_class": "event_class1", "event_type": 1}',
+ '{"time": 105, "event_class": "event_class2", "event_type": 2}',
+ '{"time": 110, "event_class": "event_class2", "event_type": 3}',
+ '{"time": 116, "event_class": "event_class2", "event_type": 4}'
+ ]
+
+ self.write_stream(data)
+ expected = ['{"event_type":2}']
+ assert self.read_stream(len(expected), topic_path=self.output_topic) == expected
+
+ stop_yds_query(client, query_id)
+
+ issues = str(client.describe_query(query_id).result.query.transient_issue)
+ assert "Row dispatcher will use the predicate: WHERE `event_class` =" in issues, "Incorrect Issues: " + issues
+
+ @yq_v1
+ def test_start_new_query(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_start_new_query", create_output=False)
+
+ output_topic1 = "pq_test_pq_read_write_output1"
+ output_topic2 = "pq_test_pq_read_write_output2"
+ output_topic3 = "pq_test_pq_read_write_output3"
+ create_stream(output_topic1, partitions_count=1)
+ create_read_rule(output_topic1, self.consumer_name)
+
+ create_stream(output_topic2, partitions_count=1)
+ create_read_rule(output_topic2, self.consumer_name)
+
+ create_stream(output_topic3, partitions_count=1)
+ create_read_rule(output_topic3, self.consumer_name)
+
+ sql1 = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic1}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL, data String NOT NULL));'''
+ sql2 = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic2}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL, data String NOT NULL));'''
+ query_id1 = start_yds_query(kikimr, client, sql1)
+ query_id2 = start_yds_query(kikimr, client, sql2)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ data = [
+ '{"time": 101, "data": "hello1", "event": "event1"}',
+ '{"time": 102, "data": "hello2", "event": "event2"}',
+ ]
+
+ self.write_stream(data)
+ expected = ['101', '102']
+ assert self.read_stream(len(expected), topic_path=output_topic1) == expected
+ assert self.read_stream(len(expected), topic_path=output_topic2) == expected
+
+ wait_actor_count(kikimr, "DQ_PQ_READ_ACTOR", 2)
+
+ # nothing unnecessary...
+ assert not read_stream(output_topic1, 1, True, self.consumer_name, timeout=1)
+ assert not read_stream(output_topic2, 1, True, self.consumer_name, timeout=1)
+
+ sql3 = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic3}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL, data String NOT NULL));'''
+ query_id3 = start_yds_query(kikimr, client, sql3)
+
+ data = [
+ '{"time": 103, "data": "hello3", "event": "event3"}',
+ '{"time": 104, "data": "hello4", "event": "event4"}',
+ ]
+
+ self.write_stream(data)
+ expected = ['103', '104']
+
+ assert self.read_stream(len(expected), topic_path=output_topic1) == expected
+ assert self.read_stream(len(expected), topic_path=output_topic2) == expected
+ assert self.read_stream(len(expected), topic_path=output_topic3) == expected
+
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ assert not read_stream(output_topic1, 1, True, self.consumer_name, timeout=1)
+ assert not read_stream(output_topic2, 1, True, self.consumer_name, timeout=1)
+ assert not read_stream(output_topic3, 1, True, self.consumer_name, timeout=1)
+
+ stop_yds_query(client, query_id1)
+ stop_yds_query(client, query_id2)
+ stop_yds_query(client, query_id3)
+
+ # Assert that all read rules were removed after query stops
+ read_rules = list_read_rules(self.input_topic)
+ assert len(read_rules) == 0, read_rules
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ def test_stop_start(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_stop_start", create_output=False)
+
+ output_topic = "test_stop_start"
+ create_stream(output_topic, partitions_count=1)
+ create_read_rule(output_topic, self.consumer_name)
+
+ sql1 = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL));'''
+
+ query_id = start_yds_query(kikimr, client, sql1)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ data = ['{"time": 101}', '{"time": 102}']
+ self.write_stream(data)
+ expected = ['101', '102']
+ assert self.read_stream(len(expected), topic_path=output_topic) == expected
+
+ kikimr.compute_plane.wait_completed_checkpoints(
+ query_id, kikimr.compute_plane.get_completed_checkpoints(query_id) + 1
+ )
+ stop_yds_query(client, query_id)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ client.modify_query(
+ query_id,
+ "continue",
+ sql1,
+ type=fq.QueryContent.QueryType.STREAMING,
+ state_load_mode=fq.StateLoadMode.EMPTY,
+ streaming_disposition=StreamingDisposition.from_last_checkpoint(),
+ )
+ client.wait_query_status(query_id, fq.QueryMeta.RUNNING)
+
+ data = ['{"time": 103}', '{"time": 104}']
+
+ self.write_stream(data)
+ expected = ['103', '104']
+ assert self.read_stream(len(expected), topic_path=output_topic) == expected
+
+ stop_yds_query(client, query_id)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ def test_stop_start_with_filter(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_stop_start", create_output=False)
+
+ output_topic = "test_stop_start"
+ create_stream(output_topic, partitions_count=1)
+ create_read_rule(output_topic, self.consumer_name)
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time UInt64 NOT NULL))
+ WHERE time > 200UL;'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ data = ['{"time": 101}', '{"time": 102}']
+ self.write_stream(data)
+
+ kikimr.compute_plane.wait_completed_checkpoints(
+ query_id, kikimr.compute_plane.get_completed_checkpoints(query_id) + 10
+ )
+ stop_yds_query(client, query_id)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time UInt64 NOT NULL));'''
+
+ client.modify_query(
+ query_id,
+ "continue",
+ sql,
+ type=fq.QueryContent.QueryType.STREAMING,
+ state_load_mode=fq.StateLoadMode.EMPTY,
+ streaming_disposition=StreamingDisposition.from_last_checkpoint(),
+ )
+ client.wait_query_status(query_id, fq.QueryMeta.RUNNING)
+
+ data = ['{"time": 203}', '{"time": 204}']
+ self.write_stream(data)
+ expected = ['203', '204']
+ assert self.read_stream(len(expected), topic_path=output_topic) == expected
+
+ stop_yds_query(client, query_id)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ def test_restart_compute_node(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_restart_compute_node")
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{self.output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL));'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ data = ['{"time": 101, "data": "hello1"}', '{"time": 102, "data": "hello2"}']
+
+ self.write_stream(data)
+ expected = ['101', '102']
+ assert self.read_stream(len(expected), topic_path=self.output_topic) == expected
+
+ kikimr.compute_plane.wait_completed_checkpoints(
+ query_id, kikimr.compute_plane.get_completed_checkpoints(query_id) + 1
+ )
+
+ wait_actor_count(kikimr, "DQ_PQ_READ_ACTOR", 1)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ node_index = 2
+ logging.debug("Restart compute node {}".format(node_index))
+ kikimr.compute_plane.kikimr_cluster.nodes[node_index].stop()
+ kikimr.compute_plane.kikimr_cluster.nodes[node_index].start()
+ kikimr.compute_plane.wait_bootstrap(node_index)
+
+ data = ['{"time": 103, "data": "hello3"}', '{"time": 104, "data": "hello4"}']
+ self.write_stream(data)
+ expected = ['103', '104']
+ assert self.read_stream(len(expected), topic_path=self.output_topic) == expected
+ kikimr.compute_plane.wait_completed_checkpoints(
+ query_id, kikimr.compute_plane.get_completed_checkpoints(query_id) + 1
+ )
+
+ node_index = 1
+ logging.debug("Restart compute node {}".format(node_index))
+ kikimr.control_plane.kikimr_cluster.nodes[node_index].stop()
+ kikimr.control_plane.kikimr_cluster.nodes[node_index].start()
+ kikimr.control_plane.wait_bootstrap(node_index)
+
+ data = ['{"time": 105, "data": "hello5"}', '{"time": 106, "data": "hello6"}']
+ self.write_stream(data)
+ expected = ['105', '106']
+ assert self.read_stream(len(expected), topic_path=self.output_topic) == expected
+
+ stop_yds_query(client, query_id)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ def test_3_sessions(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_3_session", create_output=False)
+
+ output_topic1 = "test_3_session1"
+ output_topic2 = "test_3_session2"
+ output_topic3 = "test_3_session3"
+ create_stream(output_topic1, partitions_count=1)
+ create_read_rule(output_topic1, self.consumer_name)
+
+ create_stream(output_topic2, partitions_count=1)
+ create_read_rule(output_topic2, self.consumer_name)
+
+ create_stream(output_topic3, partitions_count=1)
+ create_read_rule(output_topic3, self.consumer_name)
+
+ sql1 = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic1}`
+ SELECT Unwrap(Json::SerializeJson(Yson::From(TableRow()))) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL));'''
+ sql2 = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic2}`
+ SELECT Unwrap(Json::SerializeJson(Yson::From(TableRow()))) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL));'''
+
+ sql3 = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{output_topic3}`
+ SELECT Unwrap(Json::SerializeJson(Yson::From(TableRow()))) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL));'''
+ query_id1 = start_yds_query(kikimr, client, sql1)
+ query_id2 = start_yds_query(kikimr, client, sql2)
+ query_id3 = start_yds_query(kikimr, client, sql3)
+
+ data = ['{"time":101}', '{"time":102}']
+
+ self.write_stream(data)
+ expected = data
+ assert self.read_stream(len(expected), topic_path=output_topic1) == expected
+ assert self.read_stream(len(expected), topic_path=output_topic2) == expected
+ assert self.read_stream(len(expected), topic_path=output_topic3) == expected
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ kikimr.compute_plane.wait_completed_checkpoints(
+ query_id1, kikimr.compute_plane.get_completed_checkpoints(query_id1) + 1
+ )
+ stop_yds_query(client, query_id1)
+
+ data = ['{"time":103}', '{"time":104}']
+ self.write_stream(data)
+ expected = data
+ assert not read_stream(output_topic1, 1, True, self.consumer_name, timeout=1)
+ assert self.read_stream(len(expected), topic_path=output_topic2) == expected
+ assert self.read_stream(len(expected), topic_path=output_topic3) == expected
+
+ client.modify_query(
+ query_id1,
+ "continue",
+ sql1,
+ type=fq.QueryContent.QueryType.STREAMING,
+ state_load_mode=fq.StateLoadMode.EMPTY,
+ streaming_disposition=StreamingDisposition.from_last_checkpoint(),
+ )
+ client.wait_query_status(query_id1, fq.QueryMeta.RUNNING)
+
+ assert self.read_stream(len(expected), topic_path=output_topic1) == expected
+
+ data = ['{"time":105}', '{"time":106}']
+ self.write_stream(data)
+ expected = data
+ assert self.read_stream(len(expected), topic_path=output_topic1) == expected
+ assert self.read_stream(len(expected), topic_path=output_topic2) == expected
+ assert self.read_stream(len(expected), topic_path=output_topic3) == expected
+
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+
+ stop_yds_query(client, query_id1)
+ stop_yds_query(client, query_id2)
+ stop_yds_query(client, query_id3)
+
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ def test_many_partitions(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_simple_not_null", partitions_count=4)
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{self.output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL));'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 4)
+
+ input_messages1 = [Rf'''{{"time": {c}}}''' for c in range(100, 110)]
+ write_stream(self.input_topic, input_messages1, "partition_key1")
+
+ input_messages2 = [Rf'''{{"time": {c}}}''' for c in range(110, 120)]
+ write_stream(self.input_topic, input_messages2, "partition_key2")
+
+ input_messages3 = [Rf'''{{"time": {c}}}''' for c in range(120, 130)]
+ write_stream(self.input_topic, input_messages3, "partition_key3")
+
+ input_messages4 = [Rf'''{{"time": {c}}}''' for c in range(130, 140)]
+ write_stream(self.input_topic, input_messages4, "partition_key4")
+
+ expected = [Rf'''{c}''' for c in range(100, 140)]
+ assert sorted(self.read_stream(len(expected), topic_path=self.output_topic)) == expected
+
+ stop_yds_query(client, query_id)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+
+ @yq_v1
+ def test_sensors(self, kikimr, client):
+ client.create_yds_connection(
+ YDS_CONNECTION, os.getenv("YDB_DATABASE"), os.getenv("YDB_ENDPOINT"), shared_reading=True
+ )
+ self.init_topics("test_sensors")
+
+ sql = Rf'''
+ INSERT INTO {YDS_CONNECTION}.`{self.output_topic}`
+ SELECT Cast(time as String) FROM {YDS_CONNECTION}.`{self.input_topic}`
+ WITH (format=json_each_row, SCHEMA (time Int32 NOT NULL));'''
+
+ query_id = start_yds_query(kikimr, client, sql)
+
+ self.write_stream(['{"time": 101}'])
+ assert self.read_stream(1, topic_path=self.output_topic) == ['101']
+
+ wait_actor_count(kikimr, "DQ_PQ_READ_ACTOR", 1)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 1)
+ wait_row_dispatcher_sensor_value(kikimr, "ClientsCount", 1)
+ wait_row_dispatcher_sensor_value(kikimr, "RowsSent", 1, exact_match=False)
+ wait_row_dispatcher_sensor_value(kikimr, "IncomingRequests", 1, exact_match=False)
+ wait_row_dispatcher_sensor_value(kikimr, "RowsRead", 1, exact_match=False)
+
+ stop_yds_query(client, query_id)
+
+ wait_actor_count(kikimr, "DQ_PQ_READ_ACTOR", 0)
+ wait_actor_count(kikimr, "FQ_ROW_DISPATCHER_SESSION", 0)
+ wait_row_dispatcher_sensor_value(kikimr, "ClientsCount", 0)
diff --git a/ydb/tests/fq/yds/ya.make b/ydb/tests/fq/yds/ya.make
index 16d33167e21..233b52353d6 100644
--- a/ydb/tests/fq/yds/ya.make
+++ b/ydb/tests/fq/yds/ya.make
@@ -42,6 +42,7 @@ TEST_SRCS(
test_recovery_match_recognize.py
test_recovery_mz.py
test_restart_query.py
+ test_row_dispatcher.py
test_select_1.py
test_select_limit_db_id.py
test_select_limit.py
diff --git a/ydb/tests/tools/fq_runner/fq_client.py b/ydb/tests/tools/fq_runner/fq_client.py
index 001b7cf3788..7043657cf6a 100644
--- a/ydb/tests/tools/fq_runner/fq_client.py
+++ b/ydb/tests/tools/fq_runner/fq_client.py
@@ -405,7 +405,7 @@ class FederatedQueryClient(object):
@retry.retry_intrusive
def create_yds_connection(self, name, database=None, endpoint=None, database_id=None,
visibility=fq.Acl.Visibility.PRIVATE, auth_method=AuthMethod.no_auth(),
- check_issues=True):
+ check_issues=True, shared_reading=False):
assert (database_id is not None and database is None and endpoint is None) or (
database_id is None and database is not None and endpoint is not None)
request = fq.CreateConnectionRequest()
@@ -417,6 +417,8 @@ class FederatedQueryClient(object):
yds.database = database
yds.endpoint = endpoint
+ yds.shared_reading = shared_reading
+
yds.auth.CopyFrom(auth_method)
request.content.acl.visibility = visibility
return self.create_connection(request, check_issues)
diff --git a/ydb/tests/tools/fq_runner/kikimr_runner.py b/ydb/tests/tools/fq_runner/kikimr_runner.py
index a10bde35d16..3cbdd565d4a 100644
--- a/ydb/tests/tools/fq_runner/kikimr_runner.py
+++ b/ydb/tests/tools/fq_runner/kikimr_runner.py
@@ -117,6 +117,7 @@ class BaseTenant(abc.ABC):
self.enable_logging("FQ_QUOTA_PROXY")
self.enable_logging("PUBLIC_HTTP")
self.enable_logging("FQ_CONTROL_PLANE_CONFIG")
+ self.enable_logging("FQ_ROW_DISPATCHER", LogLevels.TRACE)
# self.enable_logging("GRPC_SERVER")
@abc.abstractclassmethod
@@ -363,7 +364,7 @@ class BaseTenant(abc.ABC):
completed = self.get_completed_checkpoints(query_id, expect_counters_exist=expect_counters_exist)
if completed >= checkpoints_count:
break
- assert time.time() < deadline, "Wait zero checkpoint failed"
+ assert time.time() < deadline, "Wait zero checkpoint failed, actual completed: " + str(completed)
time.sleep(yatest_common.plain_or_under_sanitizer(0.5, 2))
def wait_zero_checkpoint(self, query_id, timeout=yatest_common.plain_or_under_sanitizer(30, 150),
@@ -514,6 +515,17 @@ class YqTenant(BaseTenant):
self.fill_storage_config(fq_config['checkpoint_coordinator']['storage'],
"CheckpointCoordinatorStorage_" + self.uuid)
+ fq_config['row_dispatcher'] = {
+ 'enabled': True,
+ 'timeout_before_start_session_sec': 2,
+ 'send_status_period_sec': 2,
+ 'max_session_used_memory': 1000000,
+ 'without_consumer': True}
+ fq_config['row_dispatcher']['coordinator'] = {'coordination_node_path': "row_dispatcher"}
+ fq_config['row_dispatcher']['coordinator']['database'] = {}
+ self.fill_storage_config(fq_config['row_dispatcher']['coordinator']['database'],
+ "RowDispatcher_" + self.uuid)
+
fq_config['quotas_manager'] = {'enabled': True}
fq_config['rate_limiter'] = {'enabled': True}