aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Cherednik <dan.cherednik@gmail.com>2023-07-20 22:11:42 +0300
committerDaniil Cherednik <dan.cherednik@gmail.com>2023-07-20 22:11:42 +0300
commitd63f0523399ab2d93c1c6ca6c2dca082be5e52ba (patch)
tree1123a7aa3ac1d42f3ceaae288e639931d9dca92a
parent068d4453cf9fc68c875eee73f5c637bb076f6a71 (diff)
downloadydb-d63f0523399ab2d93c1c6ca6c2dca082be5e52ba.tar.gz
Ydb stable 23-2-1123.2.11
x-stable-origin-commit: 758ace972646c843c5e0785d75c8f4fe044580a1
-rw-r--r--library/cpp/actors/interconnect/CMakeLists.darwin.txt1
-rw-r--r--library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt1
-rw-r--r--library/cpp/actors/interconnect/CMakeLists.linux.txt1
-rw-r--r--library/cpp/actors/interconnect/events_local.h15
-rw-r--r--library/cpp/actors/interconnect/handshake_broker.h157
-rw-r--r--library/cpp/actors/interconnect/interconnect_common.h2
-rw-r--r--library/cpp/actors/interconnect/interconnect_handshake.cpp35
-rw-r--r--library/cpp/actors/interconnect/interconnect_handshake.h2
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_server.cpp3
-rw-r--r--library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h7
-rw-r--r--library/cpp/actors/interconnect/ut/lib/node.h61
-rw-r--r--library/cpp/actors/interconnect/ut/lib/test_events.h5
-rw-r--r--library/cpp/actors/interconnect/ut/protos/interconnect_test.proto3
-rw-r--r--library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.darwin.txt45
-rw-r--r--library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux-aarch64.txt48
-rw-r--r--library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux.txt50
-rw-r--r--library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.txt15
-rw-r--r--library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp167
-rw-r--r--library/cpp/actors/util/rc_buf.h7
-rw-r--r--library/cpp/monlib/service/pages/index_mon_page.cpp23
-rw-r--r--library/cpp/monlib/service/pages/index_mon_page.h8
-rw-r--r--library/cpp/threading/CMakeLists.txt1
-rw-r--r--library/cpp/threading/hot_swap/CMakeLists.darwin.txt18
-rw-r--r--library/cpp/threading/hot_swap/CMakeLists.linux-aarch64.txt19
-rw-r--r--library/cpp/threading/hot_swap/CMakeLists.linux.txt19
-rw-r--r--library/cpp/threading/hot_swap/CMakeLists.txt15
-rw-r--r--ydb/core/cms/json_proxy_proto.h2
-rw-r--r--ydb/core/driver_lib/run/kikimr_services_initializers.cpp314
-rw-r--r--ydb/core/grpc_services/CMakeLists.darwin.txt1
-rw-r--r--ydb/core/grpc_services/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/grpc_services/CMakeLists.linux.txt1
-rw-r--r--ydb/core/grpc_services/rpc_keyvalue.cpp1048
-rw-r--r--ydb/core/grpc_services/service_keyvalue.h23
-rw-r--r--ydb/core/health_check/health_check.cpp1
-rw-r--r--ydb/core/health_check/health_check_ut.cpp263
-rw-r--r--ydb/core/http_proxy/custom_metrics.h4
-rw-r--r--ydb/core/http_proxy/events.h20
-rw-r--r--ydb/core/http_proxy/http_req.cpp130
-rw-r--r--ydb/core/http_proxy/http_req.h2
-rw-r--r--ydb/core/http_proxy/http_service.cpp2
-rw-r--r--ydb/core/kqp/compile_service/kqp_compile_service.cpp6
-rw-r--r--ydb/core/kqp/executer_actor/kqp_data_executer.cpp13
-rw-r--r--ydb/core/kqp/executer_actor/kqp_executer_impl.h11
-rw-r--r--ydb/core/kqp/executer_actor/kqp_executer_stats.h4
-rw-r--r--ydb/core/kqp/executer_actor/kqp_planner.cpp9
-rw-r--r--ydb/core/kqp/executer_actor/kqp_planner.h2
-rw-r--r--ydb/core/kqp/executer_actor/kqp_result_channel.cpp22
-rw-r--r--ydb/core/kqp/executer_actor/kqp_result_channel.h4
-rw-r--r--ydb/core/kqp/executer_actor/kqp_scan_executer.cpp4
-rw-r--r--ydb/core/kqp/host/kqp_host.cpp4
-rw-r--r--ydb/core/kqp/node_service/kqp_node_service.cpp29
-rw-r--r--ydb/core/kqp/provider/yql_kikimr_gateway.cpp27
-rw-r--r--ydb/core/kqp/provider/yql_kikimr_gateway.h12
-rw-r--r--ydb/core/kqp/runtime/CMakeLists.darwin.txt1
-rw-r--r--ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/kqp/runtime/CMakeLists.linux.txt1
-rw-r--r--ydb/core/kqp/runtime/kqp_read_actor.cpp81
-rw-r--r--ydb/core/kqp/runtime/kqp_read_actor.h30
-rw-r--r--ydb/core/kqp/ut/olap/kqp_olap_ut.cpp124
-rw-r--r--ydb/core/kqp/ut/scan/kqp_split_ut.cpp7
-rw-r--r--ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp39
-rw-r--r--ydb/core/mind/node_broker.cpp22
-rw-r--r--ydb/core/mind/node_broker__load_state.cpp2
-rw-r--r--ydb/core/mind/node_broker_impl.h5
-rw-r--r--ydb/core/mon/async_http_mon.cpp16
-rw-r--r--ydb/core/mon/async_http_mon.h2
-rw-r--r--ydb/core/mon/mon.cpp3
-rw-r--r--ydb/core/mon/mon.h3
-rw-r--r--ydb/core/mon/sync_http_mon.cpp4
-rw-r--r--ydb/core/protos/config.proto47
-rw-r--r--ydb/core/protos/console_config.proto3
-rw-r--r--ydb/core/protos/flat_scheme_op.proto8
-rw-r--r--ydb/core/protos/tx_datashard.proto3
-rw-r--r--ydb/core/tablet/tablet_counters_aggregator.cpp43
-rw-r--r--ydb/core/tablet/tablet_counters_aggregator_ut.cpp154
-rw-r--r--ydb/core/tx/columnshard/blob_manager.cpp42
-rw-r--r--ydb/core/tx/columnshard/blob_manager.h6
-rw-r--r--ydb/core/tx/columnshard/columnshard.cpp3
-rw-r--r--ydb/core/tx/columnshard/columnshard__export.cpp29
-rw-r--r--ydb/core/tx/columnshard/columnshard__forget.cpp6
-rw-r--r--ydb/core/tx/columnshard/columnshard__write_index.cpp54
-rw-r--r--ydb/core/tx/columnshard/columnshard_impl.cpp115
-rw-r--r--ydb/core/tx/columnshard/columnshard_impl.h7
-rw-r--r--ydb/core/tx/columnshard/columnshard_private_events.h32
-rw-r--r--ydb/core/tx/columnshard/columnshard_ut_common.h24
-rw-r--r--ydb/core/tx/columnshard/engines/column_engine.h19
-rw-r--r--ydb/core/tx/columnshard/engines/column_engine_logs.cpp128
-rw-r--r--ydb/core/tx/columnshard/engines/column_engine_logs.h11
-rw-r--r--ydb/core/tx/columnshard/export_actor.cpp18
-rw-r--r--ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp518
-rw-r--r--ydb/core/tx/coordinator/coordinator__init.cpp40
-rw-r--r--ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp14
-rw-r--r--ydb/core/tx/coordinator/coordinator_impl.cpp2
-rw-r--r--ydb/core/tx/coordinator/coordinator_impl.h44
-rw-r--r--ydb/core/tx/datashard/cdc_stream_scan.cpp34
-rw-r--r--ydb/core/tx/datashard/change_collector_cdc_stream.cpp2
-rw-r--r--ydb/core/tx/datashard/change_sender_async_index.cpp42
-rw-r--r--ydb/core/tx/datashard/change_sender_common_ops.cpp21
-rw-r--r--ydb/core/tx/datashard/change_sender_common_ops.h9
-rw-r--r--ydb/core/tx/datashard/datashard.cpp2
-rw-r--r--ydb/core/tx/datashard/datashard__read_iterator.cpp41
-rw-r--r--ydb/core/tx/datashard/datashard_impl.h2
-rw-r--r--ydb/core/tx/datashard/datashard_ut_change_exchange.cpp200
-rw-r--r--ydb/core/tx/datashard/datashard_ut_read_iterator.cpp84
-rw-r--r--ydb/core/tx/datashard/export_s3_base_uploader.h50
-rw-r--r--ydb/core/tx/datashard/read_iterator.h4
-rw-r--r--ydb/core/tx/scheme_board/cache.cpp4
-rw-r--r--ydb/core/tx/scheme_cache/scheme_cache.h1
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__init.cpp4
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__operation.cpp18
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp8
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp2
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp31
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp22
-rw-r--r--ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp8
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp22
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_export__create.cpp57
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_impl.cpp45
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_impl.h8
-rw-r--r--ydb/core/tx/schemeshard/schemeshard_import__create.cpp20
-rw-r--r--ydb/core/tx/schemeshard/ut_export.cpp179
-rw-r--r--ydb/core/tx/schemeshard/ut_olap.cpp81
-rw-r--r--ydb/core/tx/schemeshard/ut_olap_reboots.cpp8
-rw-r--r--ydb/core/tx/tiering/manager.cpp4
-rw-r--r--ydb/core/tx/tiering/manager.h7
-rw-r--r--ydb/core/tx/tiering/s3_actor.cpp116
-rw-r--r--ydb/core/viewer/json_cluster.h216
-rw-r--r--ydb/core/viewer/json_storage.h13
-rw-r--r--ydb/library/yql/sql/v1/SQLv1.g.in10
-rw-r--r--ydb/library/yql/sql/v1/format/sql_format_ut.cpp8
-rw-r--r--ydb/library/yql/sql/v1/node.cpp3
-rw-r--r--ydb/library/yql/sql/v1/node.h10
-rw-r--r--ydb/library/yql/sql/v1/query.cpp32
-rw-r--r--ydb/library/yql/sql/v1/sql.cpp12
-rw-r--r--ydb/library/yql/sql/v1/sql_ut.cpp68
-rw-r--r--ydb/public/api/grpc/CMakeLists.darwin.txt1
-rw-r--r--ydb/public/api/grpc/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/public/api/grpc/CMakeLists.linux.txt1
-rw-r--r--ydb/public/api/grpc/ydb_keyvalue_v1.proto49
-rw-r--r--ydb/public/api/protos/CMakeLists.darwin.txt1
-rw-r--r--ydb/public/api/protos/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/public/api/protos/CMakeLists.linux.txt1
-rw-r--r--ydb/public/api/protos/ydb_keyvalue.proto544
-rw-r--r--ydb/services/CMakeLists.txt1
-rw-r--r--ydb/services/datastreams/datastreams_ut.cpp2
-rw-r--r--ydb/services/datastreams/put_records_actor.h9
-rw-r--r--ydb/services/keyvalue/CMakeLists.darwin.txt24
-rw-r--r--ydb/services/keyvalue/CMakeLists.linux-aarch64.txt25
-rw-r--r--ydb/services/keyvalue/CMakeLists.linux.txt25
-rw-r--r--ydb/services/keyvalue/CMakeLists.txt15
-rw-r--r--ydb/services/keyvalue/grpc_service.cpp81
-rw-r--r--ydb/services/keyvalue/grpc_service.h37
-rw-r--r--ydb/services/keyvalue/grpc_service_ut.cpp838
-rw-r--r--ydb/services/keyvalue/ut/CMakeLists.darwin.txt49
-rw-r--r--ydb/services/keyvalue/ut/CMakeLists.linux-aarch64.txt52
-rw-r--r--ydb/services/keyvalue/ut/CMakeLists.linux.txt54
-rw-r--r--ydb/services/keyvalue/ut/CMakeLists.txt15
-rw-r--r--ydb/tests/functional/autoconfig/test_actorsystem.py36
-rw-r--r--ydb/tests/library/harness/kikimr_config.py4
159 files changed, 6707 insertions, 1065 deletions
diff --git a/library/cpp/actors/interconnect/CMakeLists.darwin.txt b/library/cpp/actors/interconnect/CMakeLists.darwin.txt
index c0b4981c37..4d13e9ec54 100644
--- a/library/cpp/actors/interconnect/CMakeLists.darwin.txt
+++ b/library/cpp/actors/interconnect/CMakeLists.darwin.txt
@@ -10,6 +10,7 @@ find_package(OpenSSL REQUIRED)
add_subdirectory(mock)
add_subdirectory(ut)
add_subdirectory(ut_fat)
+add_subdirectory(ut_huge_cluster)
add_library(cpp-actors-interconnect)
target_link_libraries(cpp-actors-interconnect PUBLIC
diff --git a/library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt b/library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt
index ada1e68d25..4142c4c6b5 100644
--- a/library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt
+++ b/library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt
@@ -10,6 +10,7 @@ find_package(OpenSSL REQUIRED)
add_subdirectory(mock)
add_subdirectory(ut)
add_subdirectory(ut_fat)
+add_subdirectory(ut_huge_cluster)
add_library(cpp-actors-interconnect)
target_link_libraries(cpp-actors-interconnect PUBLIC
diff --git a/library/cpp/actors/interconnect/CMakeLists.linux.txt b/library/cpp/actors/interconnect/CMakeLists.linux.txt
index ada1e68d25..4142c4c6b5 100644
--- a/library/cpp/actors/interconnect/CMakeLists.linux.txt
+++ b/library/cpp/actors/interconnect/CMakeLists.linux.txt
@@ -10,6 +10,7 @@ find_package(OpenSSL REQUIRED)
add_subdirectory(mock)
add_subdirectory(ut)
add_subdirectory(ut_fat)
+add_subdirectory(ut_huge_cluster)
add_library(cpp-actors-interconnect)
target_link_libraries(cpp-actors-interconnect PUBLIC
diff --git a/library/cpp/actors/interconnect/events_local.h b/library/cpp/actors/interconnect/events_local.h
index b1b8ae0c75..966cdb763e 100644
--- a/library/cpp/actors/interconnect/events_local.h
+++ b/library/cpp/actors/interconnect/events_local.h
@@ -52,6 +52,9 @@ namespace NActors {
EvProcessPingRequest,
EvGetSecureSocket,
EvSecureSocket,
+ HandshakeBrokerTake,
+ HandshakeBrokerFree,
+ HandshakeBrokerPermit,
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// nonlocal messages; their indices must be preserved in order to work properly while doing rolling update
@@ -98,6 +101,18 @@ namespace NActors {
}
};
+ struct TEvHandshakeBrokerTake: TEventLocal<TEvHandshakeBrokerTake, ui32(ENetwork::HandshakeBrokerTake)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeBrokerTake, "Network: TEvHandshakeBrokerTake")
+ };
+
+ struct TEvHandshakeBrokerFree: TEventLocal<TEvHandshakeBrokerFree, ui32(ENetwork::HandshakeBrokerFree)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeBrokerFree, "Network: TEvHandshakeBrokerFree")
+ };
+
+ struct TEvHandshakeBrokerPermit: TEventLocal<TEvHandshakeBrokerPermit, ui32(ENetwork::HandshakeBrokerPermit)> {
+ DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeBrokerPermit, "Network: TEvHandshakeBrokerPermit")
+ };
+
struct TEvHandshakeAsk: public TEventLocal<TEvHandshakeAsk, ui32(ENetwork::HandshakeAsk)> {
DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeAsk, "Network: TEvHandshakeAsk")
TEvHandshakeAsk(const TActorId& self,
diff --git a/library/cpp/actors/interconnect/handshake_broker.h b/library/cpp/actors/interconnect/handshake_broker.h
new file mode 100644
index 0000000000..9910fb4b71
--- /dev/null
+++ b/library/cpp/actors/interconnect/handshake_broker.h
@@ -0,0 +1,157 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/actors/core/hfunc.h>
+#include <library/cpp/actors/interconnect/events_local.h>
+
+#include <deque>
+
+namespace NActors {
+ class TBrokerLeaseHolder {
+ public:
+ TBrokerLeaseHolder(TActorSystem* actorSystem, TActorId waiterId, TActorId brokerId)
+ : ActorSystem(actorSystem)
+ , WaiterId(waiterId)
+ , BrokerId(brokerId) {
+ if (ActorSystem->Send(new IEventHandle(BrokerId, WaiterId, new TEvHandshakeBrokerTake()))) {
+ LeaseRequested = true;
+ }
+ }
+
+ ~TBrokerLeaseHolder() {
+ if (LeaseRequested) {
+ ActorSystem->Send(new IEventHandle(BrokerId, WaiterId, new TEvHandshakeBrokerFree()));
+ }
+ }
+
+ bool IsLeaseRequested() {
+ return LeaseRequested;
+ }
+
+ void ForgetLease() {
+ // only call when TDtorException was caught
+ LeaseRequested = false;
+ }
+
+ private:
+ TActorSystem* ActorSystem;
+ TActorId WaiterId;
+ TActorId BrokerId;
+ bool LeaseRequested = false;
+ };
+
+ class THandshakeBroker : public TActor<THandshakeBroker> {
+ private:
+ enum class ESelectionStrategy {
+ FIFO = 0,
+ LIFO,
+ Random,
+ };
+
+ private:
+ void PermitNext() {
+ if (Capacity == 0 && !Waiters.empty()) {
+ TActorId waiter;
+
+ switch (SelectionStrategy) {
+ case ESelectionStrategy::FIFO:
+ waiter = Waiters.front();
+ Waiters.pop_front();
+ SelectionStrategy = ESelectionStrategy::LIFO;
+ break;
+
+ case ESelectionStrategy::LIFO:
+ waiter = Waiters.back();
+ Waiters.pop_back();
+ SelectionStrategy = ESelectionStrategy::Random;
+ break;
+
+ case ESelectionStrategy::Random: {
+ const auto it = WaiterLookup.begin();
+ waiter = it->first;
+ Waiters.erase(it->second);
+ SelectionStrategy = ESelectionStrategy::FIFO;
+ break;
+ }
+
+ default:
+ Y_FAIL("Unimplimented selection strategy");
+ }
+
+ const size_t n = WaiterLookup.erase(waiter);
+ Y_VERIFY(n == 1);
+
+ Send(waiter, new TEvHandshakeBrokerPermit());
+ PermittedLeases.insert(waiter);
+ } else {
+ Capacity += 1;
+ }
+ }
+
+ private:
+ using TWaiters = std::list<TActorId>;
+ TWaiters Waiters;
+ std::unordered_map<TActorId, TWaiters::iterator> WaiterLookup;
+ std::unordered_set<TActorId> PermittedLeases;
+
+ ESelectionStrategy SelectionStrategy = ESelectionStrategy::FIFO;
+
+ ui32 Capacity;
+
+ void Handle(TEvHandshakeBrokerTake::TPtr &ev) {
+ const TActorId sender = ev->Sender;
+ if (Capacity > 0) {
+ Capacity -= 1;
+ PermittedLeases.insert(sender);
+ Send(sender, new TEvHandshakeBrokerPermit());
+ } else {
+ const auto [it, inserted] = WaiterLookup.try_emplace(sender,
+ Waiters.insert(Waiters.end(), sender));
+ Y_VERIFY(inserted);
+ }
+ }
+
+ void Handle(TEvHandshakeBrokerFree::TPtr& ev) {
+ const TActorId sender = ev->Sender;
+ if (!PermittedLeases.erase(sender)) {
+ // Lease was not permitted yet, remove sender from Waiters queue
+ const auto it = WaiterLookup.find(sender);
+ Y_VERIFY(it != WaiterLookup.end());
+ Waiters.erase(it->second);
+ WaiterLookup.erase(it);
+ }
+ PermitNext();
+ }
+
+ public:
+ THandshakeBroker(ui32 inflightLimit)
+ : TActor(&TThis::StateFunc)
+ , Capacity(inflightLimit)
+ {
+ }
+
+ STFUNC(StateFunc) {
+ Y_UNUSED(ctx);
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvHandshakeBrokerTake, Handle);
+ hFunc(TEvHandshakeBrokerFree, Handle);
+
+ default:
+ Y_FAIL("unexpected event 0x%08" PRIx32, ev->GetTypeRewrite());
+ }
+ }
+
+ void Bootstrap() {
+ Become(&TThis::StateFunc);
+ };
+ };
+
+ inline IActor* CreateHandshakeBroker(ui32 maxCapacity) {
+ return new THandshakeBroker(maxCapacity);
+ }
+
+ inline TActorId MakeHandshakeBrokerOutId() {
+ char x[12] = {'I', 'C', 'H', 's', 'h', 'k', 'B', 'r', 'k', 'O', 'u', 't'};
+ return TActorId(0, TStringBuf(std::begin(x), std::end(x)));
+ }
+};
diff --git a/library/cpp/actors/interconnect/interconnect_common.h b/library/cpp/actors/interconnect/interconnect_common.h
index ea6a5310d4..d526621491 100644
--- a/library/cpp/actors/interconnect/interconnect_common.h
+++ b/library/cpp/actors/interconnect/interconnect_common.h
@@ -48,6 +48,7 @@ namespace NActors {
ui32 MaxSerializedEventSize = NActors::EventMaxByteSize;
ui32 PreallocatedBufferSize = 8 << 10; // 8 KB
ui32 NumPreallocatedBuffers = 16;
+ ui32 SocketBacklogSize = 0; // SOMAXCONN if zero
ui32 GetSendBufferSize() const {
ui32 res = 512 * 1024; // 512 kb is the default value for send buffer
@@ -94,6 +95,7 @@ namespace NActors {
std::shared_ptr<TEventFilter> EventFilter;
TString Cookie; // unique random identifier of a node instance (generated randomly at every start)
std::unordered_map<ui16, TString> ChannelName;
+ std::optional<ui32> OutgoingHandshakeInflightLimit;
struct TVersionInfo {
TString Tag; // version tag for this node
diff --git a/library/cpp/actors/interconnect/interconnect_handshake.cpp b/library/cpp/actors/interconnect/interconnect_handshake.cpp
index dc651f3762..8d281ae52e 100644
--- a/library/cpp/actors/interconnect/interconnect_handshake.cpp
+++ b/library/cpp/actors/interconnect/interconnect_handshake.cpp
@@ -1,4 +1,5 @@
#include "interconnect_handshake.h"
+#include "handshake_broker.h"
#include "interconnect_tcp_proxy.h"
#include <library/cpp/actors/core/actor_coroutine.h>
@@ -96,6 +97,8 @@ namespace NActors {
THashMap<ui32, TInstant> LastLogNotice;
const TDuration MuteDuration = TDuration::Seconds(15);
TInstant Deadline;
+ TActorId HandshakeBroker;
+ std::optional<TBrokerLeaseHolder> BrokerLeaseHolder;
public:
THandshakeActor(TInterconnectProxyCommon::TPtr common, const TActorId& self, const TActorId& peer,
@@ -113,6 +116,7 @@ namespace NActors {
Y_VERIFY(SelfVirtualId);
Y_VERIFY(SelfVirtualId.NodeId());
Y_VERIFY(PeerNodeId);
+ HandshakeBroker = MakeHandshakeBrokerOutId();
}
THandshakeActor(TInterconnectProxyCommon::TPtr common, TSocketPtr socket)
@@ -135,14 +139,42 @@ namespace NActors {
}
void Run() override {
+ try {
+ RunImpl();
+ } catch (const TDtorException&) {
+ if (BrokerLeaseHolder) {
+ BrokerLeaseHolder->ForgetLease();
+ }
+ throw;
+ } catch (...) {
+ throw;
+ }
+ }
+
+ void RunImpl() {
UpdatePrefix();
+ if (!Socket && Common->OutgoingHandshakeInflightLimit) {
+ // Create holder, which sends request to broker and automatically frees the place when destroyed
+ BrokerLeaseHolder.emplace(GetActorSystem(), SelfActorId, HandshakeBroker);
+ }
+
+ if (BrokerLeaseHolder && BrokerLeaseHolder->IsLeaseRequested()) {
+ WaitForSpecificEvent<TEvHandshakeBrokerPermit>("HandshakeBrokerPermit");
+ }
+
// set up overall handshake process timer
TDuration timeout = Common->Settings.Handshake;
if (timeout == TDuration::Zero()) {
timeout = DEFAULT_HANDSHAKE_TIMEOUT;
}
timeout += ResolveTimeout * 2;
+
+ if (Socket) {
+ // Incoming handshakes have shorter timeout than outgoing
+ timeout *= 0.9;
+ }
+
Deadline = Now() + timeout;
Schedule(Deadline, new TEvents::TEvWakeup);
@@ -176,6 +208,7 @@ namespace NActors {
*NextPacketFromPeer, ProgramInfo->Release(), std::move(Params)));
}
+ BrokerLeaseHolder.reset();
Socket.Reset();
}
@@ -850,7 +883,7 @@ namespace NActors {
addresses.emplace_back(r.GetAddress(), static_cast<ui16>(r.GetPort()));
} else {
Y_VERIFY(ev->GetTypeRewrite() == ui32(ENetwork::ResolveError));
- Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: " + ev->Get<TEvResolveError>()->Explain
+ Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: " + ev->Get<TEvResolveError>()->Explain
+ ", Unresolved host# " + ev->Get<TEvResolveError>()->Host, true);
}
diff --git a/library/cpp/actors/interconnect/interconnect_handshake.h b/library/cpp/actors/interconnect/interconnect_handshake.h
index b3c0db6c5d..fc37f11251 100644
--- a/library/cpp/actors/interconnect/interconnect_handshake.h
+++ b/library/cpp/actors/interconnect/interconnect_handshake.h
@@ -10,7 +10,7 @@
#include "events_local.h"
namespace NActors {
- static constexpr TDuration DEFAULT_HANDSHAKE_TIMEOUT = TDuration::Seconds(1);
+ static constexpr TDuration DEFAULT_HANDSHAKE_TIMEOUT = TDuration::Seconds(5);
static constexpr ui64 INTERCONNECT_PROTOCOL_VERSION = 2;
using TSocketPtr = TIntrusivePtr<NInterconnect::TStreamSocket>;
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_server.cpp b/library/cpp/actors/interconnect/interconnect_tcp_server.cpp
index aad8677ca4..ede35b0b8b 100644
--- a/library/cpp/actors/interconnect/interconnect_tcp_server.cpp
+++ b/library/cpp/actors/interconnect/interconnect_tcp_server.cpp
@@ -46,9 +46,10 @@ namespace NActors {
if (addr.GetFamily() == AF_INET6) {
SetSockOpt(*Listener, IPPROTO_IPV6, IPV6_V6ONLY, 0);
}
+ const ui32 backlog = ProxyCommonCtx->Settings.SocketBacklogSize;
if (const auto e = -Listener->Bind(addr)) {
return e;
- } else if (const auto e = -Listener->Listen(SOMAXCONN)) {
+ } else if (const auto e = -Listener->Listen(backlog ? backlog : SOMAXCONN)) {
return e;
} else {
return 0;
diff --git a/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h b/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h
index 2b6d27cd3f..dd2557e25e 100644
--- a/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h
+++ b/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h
@@ -26,13 +26,15 @@ private:
TList<TTrafficInterrupter> interrupters;
NActors::TChannelsConfig ChannelsConfig;
TPortManager PortManager;
+ TIntrusivePtr<NLog::TSettings> LoggerSettings;
public:
TTestICCluster(ui32 numNodes = 1, NActors::TChannelsConfig channelsConfig = NActors::TChannelsConfig(),
- TTrafficInterrupterSettings* tiSettings = nullptr)
+ TTrafficInterrupterSettings* tiSettings = nullptr, TIntrusivePtr<NLog::TSettings> loggerSettings = nullptr)
: NumNodes(numNodes)
, Counters(new NMonitoring::TDynamicCounters)
, ChannelsConfig(channelsConfig)
+ , LoggerSettings(loggerSettings)
{
THashMap<ui32, ui16> nodeToPortMap;
THashMap<ui32, THashMap<ui32, ui16>> specificNodePortMap;
@@ -59,7 +61,8 @@ public:
for (ui32 i = 1; i <= NumNodes; ++i) {
auto& portMap = tiSettings ? specificNodePortMap[i] : nodeToPortMap;
- Nodes.emplace(i, MakeHolder<TNode>(i, NumNodes, portMap, Address, Counters, DeadPeerTimeout, ChannelsConfig));
+ Nodes.emplace(i, MakeHolder<TNode>(i, NumNodes, portMap, Address, Counters, DeadPeerTimeout, ChannelsConfig,
+ /*numDynamicNodes=*/0, /*numThreads=*/1, LoggerSettings));
}
}
diff --git a/library/cpp/actors/interconnect/ut/lib/node.h b/library/cpp/actors/interconnect/ut/lib/node.h
index ff30b1445e..0b538cdb1c 100644
--- a/library/cpp/actors/interconnect/ut/lib/node.h
+++ b/library/cpp/actors/interconnect/ut/lib/node.h
@@ -6,6 +6,7 @@
#include <library/cpp/actors/core/mailbox.h>
#include <library/cpp/actors/dnsresolver/dnsresolver.h>
+#include <library/cpp/actors/interconnect/handshake_broker.h>
#include <library/cpp/actors/interconnect/interconnect_tcp_server.h>
#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h>
#include <library/cpp/actors/interconnect/interconnect_proxy_wrapper.h>
@@ -19,7 +20,8 @@ public:
TNode(ui32 nodeId, ui32 numNodes, const THashMap<ui32, ui16>& nodeToPort, const TString& address,
NMonitoring::TDynamicCounterPtr counters, TDuration deadPeerTimeout,
TChannelsConfig channelsSettings = TChannelsConfig(),
- ui32 numDynamicNodes = 0, ui32 numThreads = 1) {
+ ui32 numDynamicNodes = 0, ui32 numThreads = 1,
+ TIntrusivePtr<NLog::TSettings> loggerSettings = nullptr) {
TActorSystemSetup setup;
setup.NodeId = nodeId;
setup.ExecutorsCount = 1;
@@ -43,6 +45,7 @@ public:
common->Settings.SendBufferDieLimitInMB = 512;
common->Settings.TotalInflightAmountOfData = 512 * 1024;
common->Settings.TCPSocketBufferSize = 2048 * 1024;
+ common->OutgoingHandshakeInflightLimit = 3;
setup.Interconnect.ProxyActors.resize(numNodes + 1 - numDynamicNodes);
setup.Interconnect.ProxyWrapperFactory = CreateProxyWrapperFactory(common, interconnectPoolId);
@@ -62,29 +65,31 @@ public:
setup.LocalServices.emplace_back(MakePollerActorId(), TActorSetupCmd(CreatePollerActor(),
TMailboxType::ReadAsFilled, 0));
- const TActorId loggerActorId(0, "logger");
- constexpr ui32 LoggerComponentId = 410; // NKikimrServices::LOGGER
-
- auto loggerSettings = MakeIntrusive<NLog::TSettings>(
- loggerActorId,
- (NLog::EComponent)LoggerComponentId,
- NLog::PRI_INFO,
- NLog::PRI_DEBUG,
- 0U);
-
- loggerSettings->Append(
- NActorsServices::EServiceCommon_MIN,
- NActorsServices::EServiceCommon_MAX,
- NActorsServices::EServiceCommon_Name
- );
-
- constexpr ui32 WilsonComponentId = 430; // NKikimrServices::WILSON
- static const TString WilsonComponentName = "WILSON";
-
- loggerSettings->Append(
- (NLog::EComponent)WilsonComponentId,
- (NLog::EComponent)WilsonComponentId + 1,
- [](NLog::EComponent) -> const TString & { return WilsonComponentName; });
+ const TActorId loggerActorId = loggerSettings ? loggerSettings->LoggerActorId : TActorId(0, "logger");
+
+ if (!loggerSettings) {
+ constexpr ui32 LoggerComponentId = 410; // NKikimrServices::LOGGER
+ loggerSettings = MakeIntrusive<NLog::TSettings>(
+ loggerActorId,
+ (NLog::EComponent)LoggerComponentId,
+ NLog::PRI_INFO,
+ NLog::PRI_DEBUG,
+ 0U);
+
+ loggerSettings->Append(
+ NActorsServices::EServiceCommon_MIN,
+ NActorsServices::EServiceCommon_MAX,
+ NActorsServices::EServiceCommon_Name
+ );
+
+ constexpr ui32 WilsonComponentId = 430; // NKikimrServices::WILSON
+ static const TString WilsonComponentName = "WILSON";
+
+ loggerSettings->Append(
+ (NLog::EComponent)WilsonComponentId,
+ (NLog::EComponent)WilsonComponentId + 1,
+ [](NLog::EComponent) -> const TString & { return WilsonComponentName; });
+ }
// register nameserver table
auto names = MakeIntrusive<TTableNameserverSetup>();
@@ -105,6 +110,14 @@ public:
CreateStderrBackend(), counters->GetSubgroup("subsystem", "logger")),
TMailboxType::ReadAsFilled, interconnectPoolId));
+
+ if (common->OutgoingHandshakeInflightLimit) {
+ // create handshake broker actor
+ setup.LocalServices.emplace_back(MakeHandshakeBrokerOutId(), TActorSetupCmd(
+ CreateHandshakeBroker(*common->OutgoingHandshakeInflightLimit),
+ TMailboxType::ReadAsFilled, interconnectPoolId));
+ }
+
auto sp = MakeHolder<TActorSystemSetup>(std::move(setup));
ActorSystem.Reset(new TActorSystem(sp, nullptr, loggerSettings));
ActorSystem->Start();
diff --git a/library/cpp/actors/interconnect/ut/lib/test_events.h b/library/cpp/actors/interconnect/ut/lib/test_events.h
index cd0d9e0152..1bb5eb7d38 100644
--- a/library/cpp/actors/interconnect/ut/lib/test_events.h
+++ b/library/cpp/actors/interconnect/ut/lib/test_events.h
@@ -9,6 +9,7 @@ namespace NActors {
EvTestSmall,
EvTestLarge,
EvTestResponse,
+ EvTestStartPolling,
};
struct TEvTest : TEventPB<TEvTest, NInterconnectTest::TEvTest, EvTest> {
@@ -46,4 +47,8 @@ namespace NActors {
}
};
+ struct TEvTestStartPolling : TEventPB<TEvTestStartPolling, NInterconnectTest::TEvTestStartPolling, EvTestStartPolling> {
+ TEvTestStartPolling() = default;
+ };
+
}
diff --git a/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto b/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto
index b9b2bd6a4e..b74d068a8b 100644
--- a/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto
+++ b/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto
@@ -23,3 +23,6 @@ message TEvTestSmall {
message TEvTestResponse {
optional uint64 ConfirmedSequenceNumber = 1;
}
+
+message TEvTestStartPolling {
+}
diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.darwin.txt b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.darwin.txt
new file mode 100644
index 0000000000..89c38824c2
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.darwin.txt
@@ -0,0 +1,45 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(library-cpp-actors-interconnect-ut_huge_cluster)
+target_link_libraries(library-cpp-actors-interconnect-ut_huge_cluster PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ cpp-actors-core
+ cpp-actors-interconnect
+ interconnect-ut-lib
+ interconnect-ut-protos
+ cpp-testing-unittest
+ cpp-actors-testlib
+)
+target_link_options(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE
+ -Wl,-no_deduplicate
+ -Wl,-sdk_version,10.15
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_sources(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp
+)
+add_test(
+ NAME
+ library-cpp-actors-interconnect-ut_huge_cluster
+ COMMAND
+ library-cpp-actors-interconnect-ut_huge_cluster
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+vcs_info(library-cpp-actors-interconnect-ut_huge_cluster)
diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux-aarch64.txt b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..33064b5008
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,48 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(library-cpp-actors-interconnect-ut_huge_cluster)
+target_link_libraries(library-cpp-actors-interconnect-ut_huge_cluster PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-lfalloc
+ cpp-testing-unittest_main
+ cpp-actors-core
+ cpp-actors-interconnect
+ interconnect-ut-lib
+ interconnect-ut-protos
+ cpp-testing-unittest
+ cpp-actors-testlib
+)
+target_link_options(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp
+)
+add_test(
+ NAME
+ library-cpp-actors-interconnect-ut_huge_cluster
+ COMMAND
+ library-cpp-actors-interconnect-ut_huge_cluster
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+vcs_info(library-cpp-actors-interconnect-ut_huge_cluster)
diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux.txt b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux.txt
new file mode 100644
index 0000000000..5b08a947cf
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux.txt
@@ -0,0 +1,50 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(library-cpp-actors-interconnect-ut_huge_cluster)
+target_link_libraries(library-cpp-actors-interconnect-ut_huge_cluster PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ cpp-actors-core
+ cpp-actors-interconnect
+ interconnect-ut-lib
+ interconnect-ut-protos
+ cpp-testing-unittest
+ cpp-actors-testlib
+)
+target_link_options(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp
+)
+add_test(
+ NAME
+ library-cpp-actors-interconnect-ut_huge_cluster
+ COMMAND
+ library-cpp-actors-interconnect-ut_huge_cluster
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+vcs_info(library-cpp-actors-interconnect-ut_huge_cluster)
diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.txt b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.txt
new file mode 100644
index 0000000000..3e0811fb22
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.txt
@@ -0,0 +1,15 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (APPLE)
+ include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux.txt)
+endif()
diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp b/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp
new file mode 100644
index 0000000000..458ead3459
--- /dev/null
+++ b/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp
@@ -0,0 +1,167 @@
+#include <library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h>
+#include <library/cpp/actors/interconnect/ut/lib/test_events.h>
+#include <library/cpp/actors/interconnect/ut/lib/test_actors.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <vector>
+
+Y_UNIT_TEST_SUITE(HugeCluster) {
+ using namespace NActors;
+
+ class TPoller: public TActor<TPoller> {
+ const std::vector<TActorId>& Targets;
+ std::unordered_map<TActorId, TManualEvent>& Connected;
+
+ public:
+ TPoller(const std::vector<TActorId>& targets, std::unordered_map<TActorId, TManualEvent>& events)
+ : TActor(&TPoller::StateFunc)
+ , Targets(targets)
+ , Connected(events)
+ {}
+
+ void Handle(TEvTestStartPolling::TPtr /*ev*/, const TActorContext& ctx) {
+ for (ui32 i = 0; i < Targets.size(); ++i) {
+ ctx.Send(Targets[i], new TEvTest(), IEventHandle::FlagTrackDelivery, i);
+ }
+ }
+
+ void Handle(TEvents::TEvUndelivered::TPtr ev, const TActorContext& ctx) {
+ const ui32 cookie = ev->Cookie;
+ // Cerr << "TEvUndelivered ping from node# " << SelfId().NodeId() << " to node# " << cookie + 1 << Endl;
+ ctx.Send(Targets[cookie], new TEvTest(), IEventHandle::FlagTrackDelivery, cookie);
+ }
+
+ void Handle(TEvTest::TPtr ev, const TActorContext& /*ctx*/) {
+ // Cerr << "Polled from " << ev->Sender.ToString() << Endl;
+ Connected[ev->Sender].Signal();
+ }
+
+ void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) {
+ Die(ctx);
+ }
+
+ STRICT_STFUNC(StateFunc,
+ HFunc(TEvents::TEvUndelivered, Handle)
+ HFunc(TEvTestStartPolling, Handle)
+ HFunc(TEvTest, Handle)
+ HFunc(TEvents::TEvPoisonPill, Handle)
+ )
+ };
+
+ class TStartPollers : public TActorBootstrapped<TStartPollers> {
+ const std::vector<TActorId>& Pollers;
+
+ public:
+ TStartPollers(const std::vector<TActorId>& pollers)
+ : Pollers(pollers)
+ {}
+
+ void Bootstrap(const TActorContext& ctx) {
+ Become(&TThis::StateFunc);
+ for (ui32 i = 0; i < Pollers.size(); ++i) {
+ ctx.Send(Pollers[i], new TEvTestStartPolling(), IEventHandle::FlagTrackDelivery, i);
+ }
+ }
+
+ void Handle(TEvents::TEvUndelivered::TPtr ev, const TActorContext& ctx) {
+ const ui32 cookie = ev->Cookie;
+ // Cerr << "TEvUndelivered start poller message to node# " << cookie + 1 << Endl;
+ ctx.Send(Pollers[cookie], new TEvTestStartPolling(), IEventHandle::FlagTrackDelivery, cookie);
+ }
+
+ void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) {
+ Die(ctx);
+ }
+
+ STRICT_STFUNC(StateFunc,
+ HFunc(TEvents::TEvUndelivered, Handle)
+ HFunc(TEvents::TEvPoisonPill, Handle)
+ )
+ };
+
+ TIntrusivePtr<NLog::TSettings> MakeLogConfigs(NLog::EPriority priority) {
+ // custom logger settings
+ auto loggerSettings = MakeIntrusive<NLog::TSettings>(
+ TActorId(0, "logger"),
+ (NLog::EComponent)410,
+ priority,
+ priority,
+ 0U);
+
+ loggerSettings->Append(
+ NActorsServices::EServiceCommon_MIN,
+ NActorsServices::EServiceCommon_MAX,
+ NActorsServices::EServiceCommon_Name
+ );
+
+ constexpr ui32 WilsonComponentId = 430; // NKikimrServices::WILSON
+ static const TString WilsonComponentName = "WILSON";
+
+ loggerSettings->Append(
+ (NLog::EComponent)WilsonComponentId,
+ (NLog::EComponent)WilsonComponentId + 1,
+ [](NLog::EComponent) -> const TString & { return WilsonComponentName; });
+
+ return loggerSettings;
+ }
+
+ Y_UNIT_TEST(AllToAll) {
+ ui32 nodesNum = 120;
+ std::vector<TActorId> pollers(nodesNum);
+ std::vector<std::unordered_map<TActorId, TManualEvent>> events(nodesNum);
+
+ // Must destroy actor system before shared arrays
+ {
+ TTestICCluster testCluster(nodesNum, NActors::TChannelsConfig(), nullptr, MakeLogConfigs(NLog::PRI_EMERG));
+
+ for (ui32 i = 0; i < nodesNum; ++i) {
+ pollers[i] = testCluster.RegisterActor(new TPoller(pollers, events[i]), i + 1);
+ }
+
+ for (ui32 i = 0; i < nodesNum; ++i) {
+ for (const auto& actor : pollers) {
+ events[i][actor] = TManualEvent();
+ }
+ }
+
+ testCluster.RegisterActor(new TStartPollers(pollers), 1);
+
+ for (ui32 i = 0; i < nodesNum; ++i) {
+ for (auto& [_, ev] : events[i]) {
+ ev.WaitI();
+ }
+ }
+ }
+ }
+
+
+ Y_UNIT_TEST(AllToOne) {
+ ui32 nodesNum = 500;
+ std::vector<TActorId> listeners;
+ std::vector<TActorId> pollers(nodesNum - 1);
+ std::unordered_map<TActorId, TManualEvent> events;
+ std::unordered_map<TActorId, TManualEvent> emptyEventList;
+
+ // Must destroy actor system before shared arrays
+ {
+ TTestICCluster testCluster(nodesNum, NActors::TChannelsConfig(), nullptr, MakeLogConfigs(NLog::PRI_EMERG));
+
+ const TActorId listener = testCluster.RegisterActor(new TPoller({}, events), nodesNum);
+ listeners = { listener };
+ for (ui32 i = 0; i < nodesNum - 1; ++i) {
+ pollers[i] = testCluster.RegisterActor(new TPoller(listeners, emptyEventList), i + 1);
+ }
+
+ for (const auto& actor : pollers) {
+ events[actor] = TManualEvent();
+ }
+
+ testCluster.RegisterActor(new TStartPollers(pollers), 1);
+
+ for (auto& [_, ev] : events) {
+ ev.WaitI();
+ }
+ }
+ }
+}
diff --git a/library/cpp/actors/util/rc_buf.h b/library/cpp/actors/util/rc_buf.h
index a2bce33fba..5d4517ade2 100644
--- a/library/cpp/actors/util/rc_buf.h
+++ b/library/cpp/actors/util/rc_buf.h
@@ -306,9 +306,12 @@ class TRcBuf {
struct TBackendHolder {
uintptr_t Data[2];
- operator bool() const noexcept {
+ explicit operator bool() const noexcept {
return Data[0] || Data[1];
}
+ friend bool operator ==(const TBackendHolder& x, const TBackendHolder& y) {
+ return x.Data[0] == y.Data[0] && x.Data[1] == y.Data[1];
+ }
};
constexpr static TBackendHolder Empty = {0, 0};
@@ -592,7 +595,7 @@ class TRcBuf {
}
explicit operator bool() const {
- return Owner;
+ return static_cast<bool>(Owner);
}
private:
diff --git a/library/cpp/monlib/service/pages/index_mon_page.cpp b/library/cpp/monlib/service/pages/index_mon_page.cpp
index 2bfa0faca8..c9b2f82cc0 100644
--- a/library/cpp/monlib/service/pages/index_mon_page.cpp
+++ b/library/cpp/monlib/service/pages/index_mon_page.cpp
@@ -28,9 +28,8 @@ void TIndexMonPage::Output(IMonHttpRequest& request) {
TGuard<TMutex> g(Mtx);
TStringBuf pathTmp = request.GetPathInfo();
for (;;) {
- TPagesByPath::iterator i = PagesByPath.find(pathTmp);
- if (i != PagesByPath.end()) {
- found = i->second;
+ if (TPagesByPath::iterator i = PagesByPath.find(pathTmp); i != PagesByPath.end()) {
+ found = *i->second;
pathInfo = request.GetPathInfo().substr(pathTmp.size());
Y_VERIFY(pathInfo.empty() || pathInfo.StartsWith('/'));
break;
@@ -67,18 +66,12 @@ void TIndexMonPage::OutputIndex(IOutputStream& out, bool pathEndsWithSlash) {
void TIndexMonPage::Register(TMonPagePtr page) {
TGuard<TMutex> g(Mtx);
- auto insres = PagesByPath.insert(std::make_pair("/" + page->GetPath(), page));
- if (insres.second) {
- // new unique page just inserted, update Pages
- Pages.push_back(page);
+ if (auto [it, inserted] = PagesByPath.try_emplace("/" + page->GetPath()); inserted) {
+ // new unique page just inserted, insert it to the end
+ it->second = Pages.insert(Pages.end(), page);
} else {
// a page with the given path is already present, replace it with the new page
-
- // find old page, sorry for O(n)
- auto it = std::find(Pages.begin(), Pages.end(), insres.first->second);
- *it = page;
- // this already present, replace it
- insres.first->second = page;
+ *it->second = page;
}
page->Parent = this;
}
@@ -101,7 +94,7 @@ IMonPage* TIndexMonPage::FindPage(const TString& relativePath) {
if (i == PagesByPath.end()) {
return nullptr;
} else {
- return i->second.Get();
+ return i->second->Get();
}
}
@@ -171,7 +164,7 @@ void TIndexMonPage::OutputBody(IMonHttpRequest& req) {
void TIndexMonPage::SortPages() {
TGuard<TMutex> g(Mtx);
- std::sort(Pages.begin(), Pages.end(), [](const TMonPagePtr& a, const TMonPagePtr& b) {
+ Pages.sort([](const TMonPagePtr& a, const TMonPagePtr& b) {
return AsciiCompareIgnoreCase(a->GetTitle(), b->GetTitle()) < 0;
});
}
diff --git a/library/cpp/monlib/service/pages/index_mon_page.h b/library/cpp/monlib/service/pages/index_mon_page.h
index af96bcd2b9..0aaf826d46 100644
--- a/library/cpp/monlib/service/pages/index_mon_page.h
+++ b/library/cpp/monlib/service/pages/index_mon_page.h
@@ -2,12 +2,14 @@
#include "mon_page.h"
+#include <list>
+
namespace NMonitoring {
struct TIndexMonPage: public IMonPage {
TMutex Mtx;
- typedef TVector<TMonPagePtr> TPages;
- TPages Pages;
- typedef THashMap<TString, TMonPagePtr> TPagesByPath;
+ using TPages = std::list<TMonPagePtr>;
+ TPages Pages; // a list of pages to maintain specific order
+ using TPagesByPath = THashMap<TString, TPages::iterator>;
TPagesByPath PagesByPath;
TIndexMonPage(const TString& path, const TString& title)
diff --git a/library/cpp/threading/CMakeLists.txt b/library/cpp/threading/CMakeLists.txt
index 1246829e67..6a92c755cd 100644
--- a/library/cpp/threading/CMakeLists.txt
+++ b/library/cpp/threading/CMakeLists.txt
@@ -10,6 +10,7 @@ add_subdirectory(atomic)
add_subdirectory(chunk_queue)
add_subdirectory(equeue)
add_subdirectory(future)
+add_subdirectory(hot_swap)
add_subdirectory(light_rw_lock)
add_subdirectory(local_executor)
add_subdirectory(poor_man_openmp)
diff --git a/library/cpp/threading/hot_swap/CMakeLists.darwin.txt b/library/cpp/threading/hot_swap/CMakeLists.darwin.txt
new file mode 100644
index 0000000000..fb3d6d7710
--- /dev/null
+++ b/library/cpp/threading/hot_swap/CMakeLists.darwin.txt
@@ -0,0 +1,18 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-threading-hot_swap)
+target_link_libraries(cpp-threading-hot_swap PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-atomic
+)
+target_sources(cpp-threading-hot_swap PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/threading/hot_swap/hot_swap.cpp
+)
diff --git a/library/cpp/threading/hot_swap/CMakeLists.linux-aarch64.txt b/library/cpp/threading/hot_swap/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..48692e2319
--- /dev/null
+++ b/library/cpp/threading/hot_swap/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,19 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-threading-hot_swap)
+target_link_libraries(cpp-threading-hot_swap PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-atomic
+)
+target_sources(cpp-threading-hot_swap PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/threading/hot_swap/hot_swap.cpp
+)
diff --git a/library/cpp/threading/hot_swap/CMakeLists.linux.txt b/library/cpp/threading/hot_swap/CMakeLists.linux.txt
new file mode 100644
index 0000000000..48692e2319
--- /dev/null
+++ b/library/cpp/threading/hot_swap/CMakeLists.linux.txt
@@ -0,0 +1,19 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-threading-hot_swap)
+target_link_libraries(cpp-threading-hot_swap PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-deprecated-atomic
+)
+target_sources(cpp-threading-hot_swap PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/threading/hot_swap/hot_swap.cpp
+)
diff --git a/library/cpp/threading/hot_swap/CMakeLists.txt b/library/cpp/threading/hot_swap/CMakeLists.txt
new file mode 100644
index 0000000000..3e0811fb22
--- /dev/null
+++ b/library/cpp/threading/hot_swap/CMakeLists.txt
@@ -0,0 +1,15 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (APPLE)
+ include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux.txt)
+endif()
diff --git a/ydb/core/cms/json_proxy_proto.h b/ydb/core/cms/json_proxy_proto.h
index ebff56630c..88ecfa1a72 100644
--- a/ydb/core/cms/json_proxy_proto.h
+++ b/ydb/core/cms/json_proxy_proto.h
@@ -76,6 +76,8 @@ protected:
return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TCoordinatorControls::descriptor(), ctx);
else if (name == ".NKikimrConfig.TImmediateControlsConfig.TSchemeShardControls")
return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TSchemeShardControls::descriptor(), ctx);
+ else if (name == ".NKikimrConfig.TImmediateControlsConfig.TTCMallocControls")
+ return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TTCMallocControls::descriptor(), ctx);
}
ctx.Send(RequestEvent->Sender,
diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
index 29278be5ad..256acbf804 100644
--- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
+++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
@@ -183,6 +183,7 @@
#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h>
#include <library/cpp/actors/interconnect/interconnect_proxy_wrapper.h>
#include <library/cpp/actors/interconnect/interconnect_tcp_server.h>
+#include <library/cpp/actors/interconnect/handshake_broker.h>
#include <library/cpp/actors/interconnect/load.h>
#include <library/cpp/actors/interconnect/poller_actor.h>
#include <library/cpp/actors/interconnect/poller_tcp.h>
@@ -558,17 +559,19 @@ static TInterconnectSettings GetInterconnectSettings(const NKikimrConfig::TInter
if (config.HasNumPreallocatedBuffers()) {
result.NumPreallocatedBuffers = config.GetNumPreallocatedBuffers();
}
+ result.SocketBacklogSize = config.GetSocketBacklogSize();
return result;
}
namespace {
- enum class EPoolType : i8 {
+ enum class EPoolKind : i8 {
System = 0,
User = 1,
Batch = 2,
- IC = 3,
+ IO = 3,
+ IC = 4,
};
struct TShortPoolCfg {
@@ -584,116 +587,116 @@ namespace {
constexpr i16 GRpcHandlersPerCompletionQueueInMaxPreparedCpuCase = 1000;
constexpr i16 GRpcHandlersPerCompletionQueuePerCpu = GRpcHandlersPerCompletionQueueInMaxPreparedCpuCase / MaxPreparedCpuCount;
- TShortPoolCfg ComputeCpuTable[MaxPreparedCpuCount + 1][4] {
- { {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0
- { {1, 1}, {0, 1}, {0, 1}, {0, 0} }, // 1
- { {1, 1}, {1, 2}, {0, 1}, {0, 1} }, // 2
- { {1, 2}, {1, 3}, {1, 1}, {0, 1} }, // 3
- { {1, 2}, {1, 4}, {1, 1}, {1, 2} }, // 4
- { {1, 3}, {2, 5}, {1, 1}, {1, 2} }, // 5
- { {1, 3}, {3, 6}, {1, 1}, {1, 2} }, // 6
- { {2, 4}, {3, 7}, {1, 2}, {1, 3} }, // 7
- { {2, 4}, {4, 8}, {1, 2}, {1, 3} }, // 8
- { {2, 5}, {4, 9}, {2, 3}, {1, 3} }, // 9
- { {2, 5}, {5, 10}, {2, 3}, {1, 3} }, // 10
- { {2, 6}, {6, 11}, {2, 3}, {2, 4} }, // 11
- { {2, 6}, {7, 12}, {2, 3}, {2, 5} }, // 12
- { {3, 7}, {7, 13}, {2, 3}, {2, 5} }, // 13
- { {3, 7}, {7, 14}, {2, 3}, {3, 6} }, // 14
- { {3, 8}, {8, 15}, {2, 4}, {3, 6} }, // 15
- { {3, 8}, {9, 16}, {2, 4}, {3, 6} }, // 16
- { {3, 9}, {10, 17}, {2, 4}, {3, 7} }, // 17
- { {3, 9}, {10, 18}, {3, 5}, {3, 7} }, // 18
- { {4, 10}, {10, 19}, {3, 5}, {4, 8} }, // 19
- { {4, 10}, {10, 20}, {3, 5}, {4, 8} }, // 20
- { {4, 11}, {11, 21}, {3, 5}, {4, 8} }, // 21
- { {4, 11}, {12, 22}, {3, 5}, {4, 9} }, // 22
- { {4, 12}, {13, 23}, {3, 6}, {4, 9} }, // 23
- { {4, 12}, {13, 24}, {3, 6}, {5, 10} }, // 24
- { {5, 13}, {13, 25}, {3, 6}, {5, 10} }, // 25
- { {5, 13}, {13, 26}, {4, 7}, {5, 10} }, // 26
- { {5, 14}, {14, 27}, {4, 7}, {5, 11} }, // 27
- { {5, 14}, {14, 28}, {4, 7}, {5, 11} }, // 28
- { {5, 15}, {15, 29}, {4, 8}, {6, 12} }, // 29
- { {5, 15}, {16, 30}, {4, 8}, {6, 12} }, // 30
- { {6, 18}, {16, 31}, {4, 8}, {6, 12} }, // 31
+ TShortPoolCfg ComputeCpuTable[MaxPreparedCpuCount + 1][5] {
+ { {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0
+ { {1, 1}, {0, 1}, {0, 1}, {0, 0}, {0, 0} }, // 1
+ { {1, 1}, {0, 2}, {0, 1}, {0, 0}, {1, 1} }, // 2
+ { {1, 2}, {0, 3}, {1, 1}, {0, 0}, {1, 1} }, // 3
+ { {1, 2}, {1, 4}, {1, 1}, {0, 0}, {1, 2} }, // 4
+ { {1, 3}, {2, 5}, {1, 1}, {0, 0}, {1, 2} }, // 5
+ { {1, 3}, {3, 6}, {1, 1}, {0, 0}, {1, 2} }, // 6
+ { {2, 4}, {3, 7}, {1, 2}, {0, 0}, {1, 3} }, // 7
+ { {2, 4}, {4, 8}, {1, 2}, {0, 0}, {1, 3} }, // 8
+ { {2, 5}, {4, 9}, {2, 3}, {0, 0}, {1, 3} }, // 9
+ { {2, 5}, {5, 10}, {2, 3}, {0, 0}, {1, 3} }, // 10
+ { {2, 6}, {6, 11}, {2, 3}, {0, 0}, {2, 4} }, // 11
+ { {2, 6}, {7, 12}, {2, 3}, {0, 0}, {2, 5} }, // 12
+ { {3, 7}, {7, 13}, {2, 3}, {0, 0}, {2, 5} }, // 13
+ { {3, 7}, {7, 14}, {2, 3}, {0, 0}, {3, 6} }, // 14
+ { {3, 8}, {8, 15}, {2, 4}, {0, 0}, {3, 6} }, // 15
+ { {3, 8}, {9, 16}, {2, 4}, {0, 0}, {3, 6} }, // 16
+ { {3, 9}, {10, 17}, {2, 4}, {0, 0}, {3, 7} }, // 17
+ { {3, 9}, {10, 18}, {3, 5}, {0, 0}, {3, 7} }, // 18
+ { {4, 10}, {10, 19}, {3, 5}, {0, 0}, {4, 8} }, // 19
+ { {4, 10}, {10, 20}, {3, 5}, {0, 0}, {4, 8} }, // 20
+ { {4, 11}, {11, 21}, {3, 5}, {0, 0}, {4, 8} }, // 21
+ { {4, 11}, {12, 22}, {3, 5}, {0, 0}, {4, 9} }, // 22
+ { {4, 12}, {13, 23}, {3, 6}, {0, 0}, {4, 9} }, // 23
+ { {4, 12}, {13, 24}, {3, 6}, {0, 0}, {5, 10} }, // 24
+ { {5, 13}, {13, 25}, {3, 6}, {0, 0}, {5, 10} }, // 25
+ { {5, 13}, {13, 26}, {4, 7}, {0, 0}, {5, 10} }, // 26
+ { {5, 14}, {14, 27}, {4, 7}, {0, 0}, {5, 11} }, // 27
+ { {5, 14}, {14, 28}, {4, 7}, {0, 0}, {5, 11} }, // 28
+ { {5, 15}, {15, 29}, {4, 8}, {0, 0}, {6, 12} }, // 29
+ { {5, 15}, {16, 30}, {4, 8}, {0, 0}, {6, 12} }, // 30
+ { {6, 18}, {16, 31}, {4, 8}, {0, 0}, {6, 12} }, // 31
};
- TShortPoolCfg HybridCpuTable[MaxPreparedCpuCount + 1][4] {
- { {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0
- { {1, 1}, {0, 1}, {0, 1}, {0, 0} }, // 1
- { {1, 1}, {1, 2}, {0, 1}, {0, 1} }, // 2
- { {1, 2}, {1, 3}, {1, 1}, {0, 1} }, // 3
- { {1, 2}, {1, 4}, {1, 1}, {1, 2} }, // 4
- { {1, 2}, {2, 5}, {1, 1}, {1, 2} }, // 5
- { {1, 2}, {2, 6}, {1, 1}, {2, 2} }, // 6
- { {2, 3}, {2, 7}, {1, 2}, {2, 3} }, // 7
- { {2, 3}, {3, 8}, {1, 2}, {2, 3} }, // 8
- { {2, 4}, {3, 9}, {1, 2}, {3, 4} }, // 9
- { {3, 4}, {3, 10}, {1, 2}, {3, 4} }, // 10
- { {3, 5}, {4, 11}, {1, 2}, {3, 5} }, // 11
- { {3, 5}, {4, 12}, {1, 3}, {4, 5} }, // 12
- { {4, 6}, {4, 13}, {1, 3}, {4, 6} }, // 13
- { {4, 6}, {5, 14}, {1, 3}, {4, 6} }, // 14
- { {4, 7}, {5, 15}, {1, 3}, {5, 7} }, // 15
- { {5, 7}, {5, 16}, {1, 3}, {5, 7} }, // 16
- { {5, 8}, {6, 17}, {1, 4}, {5, 8} }, // 17
- { {5, 8}, {6, 18}, {1, 4}, {6, 8} }, // 18
- { {6, 9}, {6, 19}, {1, 4}, {6, 9} }, // 19
- { {6, 9}, {7, 20}, {1, 4}, {6, 9} }, // 20
- { {6, 10}, {7, 21}, {1, 4}, {7, 10} }, // 21
- { {7, 10}, {7, 22}, {1, 5}, {7, 10} }, // 22
- { {7, 11}, {8, 23}, {1, 5}, {7, 11} }, // 23
- { {7, 11}, {8, 24}, {1, 5}, {8, 11} }, // 24
- { {8, 12}, {8, 25}, {1, 5}, {8, 12} }, // 25
- { {8, 12}, {9, 26}, {1, 5}, {8, 12} }, // 26
- { {8, 13}, {9, 27}, {1, 6}, {9, 13} }, // 27
- { {9, 13}, {9, 28}, {1, 6}, {9, 13} }, // 28
- { {9, 14}, {10, 29}, {1, 6}, {9, 14} }, // 29
- { {9, 14}, {10, 30}, {1, 6}, {10, 14} }, // 30
- { {10, 15}, {10, 31}, {1, 6}, {10, 15} }, // 31
+ TShortPoolCfg HybridCpuTable[MaxPreparedCpuCount + 1][5] {
+ { {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0
+ { {1, 1}, {0, 1}, {0, 1}, {0, 0}, {0, 0} }, // 1
+ { {1, 1}, {0, 2}, {0, 1}, {0, 0}, {1, 1} }, // 2
+ { {1, 2}, {0, 3}, {1, 1}, {0, 0}, {1, 1} }, // 3
+ { {1, 2}, {1, 4}, {1, 1}, {0, 0}, {1, 2} }, // 4
+ { {1, 2}, {2, 5}, {1, 1}, {0, 0}, {1, 2} }, // 5
+ { {1, 2}, {2, 6}, {1, 1}, {0, 0}, {2, 2} }, // 6
+ { {2, 3}, {2, 7}, {1, 2}, {0, 0}, {2, 3} }, // 7
+ { {2, 3}, {3, 8}, {1, 2}, {0, 0}, {2, 3} }, // 8
+ { {2, 4}, {3, 9}, {1, 2}, {0, 0}, {3, 4} }, // 9
+ { {3, 4}, {3, 10}, {1, 2}, {0, 0}, {3, 4} }, // 10
+ { {3, 5}, {4, 11}, {1, 2}, {0, 0}, {3, 5} }, // 11
+ { {3, 5}, {4, 12}, {1, 3}, {0, 0}, {4, 5} }, // 12
+ { {4, 6}, {4, 13}, {1, 3}, {0, 0}, {4, 6} }, // 13
+ { {4, 6}, {5, 14}, {1, 3}, {0, 0}, {4, 6} }, // 14
+ { {4, 7}, {5, 15}, {1, 3}, {0, 0}, {5, 7} }, // 15
+ { {5, 7}, {5, 16}, {1, 3}, {0, 0}, {5, 7} }, // 16
+ { {5, 8}, {6, 17}, {1, 4}, {0, 0}, {5, 8} }, // 17
+ { {5, 8}, {6, 18}, {1, 4}, {0, 0}, {6, 8} }, // 18
+ { {6, 9}, {6, 19}, {1, 4}, {0, 0}, {6, 9} }, // 19
+ { {6, 9}, {7, 20}, {1, 4}, {0, 0}, {6, 9} }, // 20
+ { {6, 10}, {7, 21}, {1, 4}, {0, 0}, {7, 10} }, // 21
+ { {7, 10}, {7, 22}, {1, 5}, {0, 0}, {7, 10} }, // 22
+ { {7, 11}, {8, 23}, {1, 5}, {0, 0}, {7, 11} }, // 23
+ { {7, 11}, {8, 24}, {1, 5}, {0, 0}, {8, 11} }, // 24
+ { {8, 12}, {8, 25}, {1, 5}, {0, 0}, {8, 12} }, // 25
+ { {8, 12}, {9, 26}, {1, 5}, {0, 0}, {8, 12} }, // 26
+ { {8, 13}, {9, 27}, {1, 6}, {0, 0}, {9, 13} }, // 27
+ { {9, 13}, {9, 28}, {1, 6}, {0, 0}, {9, 13} }, // 28
+ { {9, 14}, {10, 29}, {1, 6}, {0, 0}, {9, 14} }, // 29
+ { {9, 14}, {10, 30}, {1, 6}, {0, 0}, {10, 14} }, // 30
+ { {10, 15}, {10, 31}, {1, 6}, {0, 0}, {10, 15} }, // 31
};
- TShortPoolCfg StorageCpuTable[MaxPreparedCpuCount + 1][4] {
- { {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0
- { {1, 1}, {0, 1}, {0, 1}, {0, 0} }, // 1
- { {2, 2}, {0, 2}, {0, 1}, {0, 1} }, // 2
- { {1, 3}, {1, 3}, {1, 1}, {0, 1} }, // 3
- { {1, 4}, {1, 4}, {1, 1}, {1, 2} }, // 4
- { {2, 5}, {1, 5}, {1, 1}, {1, 2} }, // 5
- { {3, 6}, {1, 6}, {1, 1}, {1, 2} }, // 6
- { {4, 7}, {1, 7}, {1, 2}, {1, 3} }, // 7
- { {5, 8}, {1, 8}, {1, 2}, {1, 3} }, // 8
- { {5, 9}, {1, 9}, {1, 2}, {2, 4} }, // 9
- { {6, 10}, {1, 10}, {1, 2}, {2, 4} }, // 10
- { {6, 11}, {1, 11}, {2, 3}, {2, 4} }, // 11
- { {7, 12}, {1, 12}, {2, 3}, {2, 5} }, // 12
- { {8, 13}, {1, 13}, {2, 3}, {2, 5} }, // 13
- { {8, 14}, {1, 14}, {2, 3}, {3, 6} }, // 14
- { {9, 15}, {1, 15}, {2, 4}, {3, 6} }, // 15
- { {10, 16}, {1, 16}, {2, 4}, {3, 6} }, // 16
- { {11, 17}, {1, 17}, {2, 4}, {3, 7} }, // 17
- { {11, 18}, {1, 18}, {3, 5}, {3, 7} }, // 18
- { {11, 19}, {1, 19}, {3, 5}, {4, 8} }, // 19
- { {12, 20}, {1, 20}, {3, 5}, {4, 8} }, // 20
- { {13, 21}, {1, 21}, {3, 5}, {4, 8} }, // 21
- { {14, 22}, {1, 22}, {3, 6}, {4, 9} }, // 22
- { {15, 23}, {1, 23}, {3, 6}, {4, 9} }, // 23
- { {15, 24}, {1, 24}, {3, 6}, {5, 10} }, // 24
- { {16, 25}, {1, 25}, {3, 6}, {5, 10} }, // 25
- { {16, 26}, {1, 26}, {4, 7}, {5, 10} }, // 26
- { {17, 27}, {1, 27}, {4, 7}, {5, 11} }, // 27
- { {18, 28}, {1, 28}, {4, 7}, {5, 11} }, // 28
- { {18, 29}, {1, 29}, {4, 7}, {6, 12} }, // 29
- { {19, 30}, {1, 30}, {4, 8}, {6, 12} }, // 30
- { {20, 31}, {1, 31}, {4, 8}, {6, 12} }, // 31
+ TShortPoolCfg StorageCpuTable[MaxPreparedCpuCount + 1][5] {
+ { {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0
+ { {1, 1}, {0, 1}, {0, 1}, {0, 0}, {0, 0} }, // 1
+ { {1, 2}, {0, 2}, {0, 1}, {0, 0}, {1, 1} }, // 2
+ { {1, 3}, {0, 3}, {1, 1}, {0, 0}, {1, 1} }, // 3
+ { {1, 4}, {1, 4}, {1, 1}, {0, 0}, {1, 2} }, // 4
+ { {2, 5}, {1, 5}, {1, 1}, {0, 0}, {1, 2} }, // 5
+ { {3, 6}, {1, 6}, {1, 1}, {0, 0}, {1, 2} }, // 6
+ { {4, 7}, {1, 7}, {1, 2}, {0, 0}, {1, 3} }, // 7
+ { {5, 8}, {1, 8}, {1, 2}, {0, 0}, {1, 3} }, // 8
+ { {5, 9}, {1, 9}, {1, 2}, {0, 0}, {2, 4} }, // 9
+ { {6, 10}, {1, 10}, {1, 2}, {0, 0}, {2, 4} }, // 10
+ { {6, 11}, {1, 11}, {2, 3}, {0, 0}, {2, 4} }, // 11
+ { {7, 12}, {1, 12}, {2, 3}, {0, 0}, {2, 5} }, // 12
+ { {8, 13}, {1, 13}, {2, 3}, {0, 0}, {2, 5} }, // 13
+ { {8, 14}, {1, 14}, {2, 3}, {0, 0}, {3, 6} }, // 14
+ { {9, 15}, {1, 15}, {2, 4}, {0, 0}, {3, 6} }, // 15
+ { {10, 16}, {1, 16}, {2, 4}, {0, 0}, {3, 6} }, // 16
+ { {11, 17}, {1, 17}, {2, 4}, {0, 0}, {3, 7} }, // 17
+ { {11, 18}, {1, 18}, {3, 5}, {0, 0}, {3, 7} }, // 18
+ { {11, 19}, {1, 19}, {3, 5}, {0, 0}, {4, 8} }, // 19
+ { {12, 20}, {1, 20}, {3, 5}, {0, 0}, {4, 8} }, // 20
+ { {13, 21}, {1, 21}, {3, 5}, {0, 0}, {4, 8} }, // 21
+ { {14, 22}, {1, 22}, {3, 6}, {0, 0}, {4, 9} }, // 22
+ { {15, 23}, {1, 23}, {3, 6}, {0, 0}, {4, 9} }, // 23
+ { {15, 24}, {1, 24}, {3, 6}, {0, 0}, {5, 10} }, // 24
+ { {16, 25}, {1, 25}, {3, 6}, {0, 0}, {5, 10} }, // 25
+ { {16, 26}, {1, 26}, {4, 7}, {0, 0}, {5, 10} }, // 26
+ { {17, 27}, {1, 27}, {4, 7}, {0, 0}, {5, 11} }, // 27
+ { {18, 28}, {1, 28}, {4, 7}, {0, 0}, {5, 11} }, // 28
+ { {18, 29}, {1, 29}, {4, 7}, {0, 0}, {6, 12} }, // 29
+ { {19, 30}, {1, 30}, {4, 8}, {0, 0}, {6, 12} }, // 30
+ { {20, 31}, {1, 31}, {4, 8}, {0, 0}, {6, 12} }, // 31
};
i16 GetIOThreadCount(i16 cpuCount) {
return (cpuCount - 1) / (MaxPreparedCpuCount * 2) + 1;
}
- TShortPoolCfg GetShortPoolChg(EPoolType pool, i16 cpuCount, TShortPoolCfg cpuTable[][4]) {
+ TShortPoolCfg GetShortPoolChg(EPoolKind pool, i16 cpuCount, TShortPoolCfg cpuTable[][5]) {
i16 k = cpuCount / MaxPreparedCpuCount;
i16 mod = cpuCount % MaxPreparedCpuCount;
ui8 poolIdx = static_cast<i8>(pool);
@@ -743,72 +746,100 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s
scheduler->SetProgressThreshold(10'000);
}
- NKikimrConfig::TActorSystemConfig::TExecutor *executors[] = {
- mutableSystemConfig->AddExecutor(),
- mutableSystemConfig->AddExecutor(),
- mutableSystemConfig->AddExecutor(),
- mutableSystemConfig->AddExecutor(),
- mutableSystemConfig->AddExecutor()
- };
- mutableSystemConfig->SetIoExecutor(0);
- auto *ioExecutor = executors[3];
- ioExecutor->SetType(NKikimrConfig::TActorSystemConfig::TExecutor::IO);
- ioExecutor->SetThreads(GetIOThreadCount(cpuCount));
- ioExecutor->SetName("IO");
-
ui16 poolCount = Min(5, cpuCount + 1);
- ui32 executorIds[4] = {0, 1, 2, 4};
- TVector<TString> names = {"System", "User", "Batch", "IC"};
- TVector<ui32> priorities = {30, 20, 10, 40};
+ TVector<TString> names = {"System", "User", "Batch", "IO", "IC"};
+ TVector<ui32> priorities = {30, 20, 10, 0, 40};
+ TVector<ui32> executorIds = {0, 1, 2, 3, 4};
+
+ auto *serviceExecutor = mutableSystemConfig->AddServiceExecutor();
+ serviceExecutor->SetServiceName("Interconnect");
switch (cpuCount) {
case 1:
mutableSystemConfig->SetUserExecutor(1);
mutableSystemConfig->SetSysExecutor(1);
mutableSystemConfig->SetBatchExecutor(1);
- names = {"Common"};
- priorities = {40,};
+ mutableSystemConfig->SetIoExecutor(2);
+ serviceExecutor->SetExecutorId(1);
+
+ poolCount = 2;
+ names = {"Common", "IO"};
+ priorities = {40, 0};
+ executorIds = {0, 0, 0, 1, 0};
break;
case 2:
mutableSystemConfig->SetUserExecutor(1);
mutableSystemConfig->SetSysExecutor(1);
mutableSystemConfig->SetBatchExecutor(1);
- names = {"Common"};
- priorities = {40,};
+ mutableSystemConfig->SetIoExecutor(2);
+ serviceExecutor->SetExecutorId(1);
+
poolCount = 2;
+ names = {"Common", "IO"};
+ priorities = {40, 0};
+ executorIds = {0, 0, 0, 1, 0};
break;
case 3:
mutableSystemConfig->SetUserExecutor(1);
mutableSystemConfig->SetSysExecutor(1);
mutableSystemConfig->SetBatchExecutor(2);
- names = {"Common", "Batch", "IC"};
- priorities = {30, 10, 40,};
+ mutableSystemConfig->SetIoExecutor(3);
+ serviceExecutor->SetExecutorId(4);
+
+ poolCount = 4;
+ names = {"Common", "Batch", "IO", "IC"};
+ priorities = {30, 10, 0, 40,};
+ executorIds = {0, 0, 1, 2, 3};
break;
default:
mutableSystemConfig->SetUserExecutor(1);
mutableSystemConfig->SetSysExecutor(2);
mutableSystemConfig->SetBatchExecutor(3);
+ mutableSystemConfig->SetIoExecutor(4);
+ serviceExecutor->SetExecutorId(5);
break;
}
- auto *serviceExecutor = mutableSystemConfig->AddServiceExecutor();
- serviceExecutor->SetServiceName("Interconnect");
- serviceExecutor->SetExecutorId(poolCount - 1);
+
+ TVector<NKikimrConfig::TActorSystemConfig::TExecutor *> executors;
+ for (ui32 poolIdx = 0; poolIdx < poolCount; ++poolIdx) {
+ executors.push_back(mutableSystemConfig->AddExecutor());
+ }
auto &cpuTable = (mutableSystemConfig->GetNodeType() == NKikimrConfig::TActorSystemConfig::STORAGE ? StorageCpuTable :
mutableSystemConfig->GetNodeType() == NKikimrConfig::TActorSystemConfig::COMPUTE ? ComputeCpuTable :
HybridCpuTable );
- for (ui32 poolType = 0; poolType < poolCount - 1; ++poolType) {
- TShortPoolCfg cfg = GetShortPoolChg(static_cast<EPoolType>(poolType), cpuCount, cpuTable);
- auto *executor = executors[executorIds[poolType]];
+
+ for (ui32 poolIdx = 0; poolIdx < poolCount; ++poolIdx) {
+ auto *executor = executors[poolIdx];
+ if (names[poolIdx] == "IO") {
+ executor->SetType(NKikimrConfig::TActorSystemConfig::TExecutor::IO);
+ executor->SetThreads(GetIOThreadCount(cpuCount));
+ executor->SetName(names[poolIdx]);
+ continue;
+ }
+ EPoolKind poolKind = EPoolKind::System;
+ if (names[poolIdx] == "User") {
+ poolKind = EPoolKind::User;
+ } else if (names[poolIdx] == "Batch") {
+ poolKind = EPoolKind::Batch;
+ } else if (names[poolIdx] == "IC") {
+ poolKind = EPoolKind::IC;
+ }
+ TShortPoolCfg cfg = GetShortPoolChg(poolKind, cpuCount, cpuTable);
+ i16 threadsCount = cfg.ThreadCount;
+ if (poolCount == 2) {
+ threadsCount = cpuCount;
+ }
executor->SetType(NKikimrConfig::TActorSystemConfig::TExecutor::BASIC);
- executor->SetThreads(cpuCount == 2 ? 2 : cfg.ThreadCount);
- executor->SetMaxThreads(cpuCount == 2 ? 2 : cfg.MaxThreadCount);
- executor->SetPriority(priorities[poolType]);
- executor->SetName(names[poolType]);
- if (cpuCount == 1 || cpuCount == 2) {
+ executor->SetThreads(threadsCount);
+ executor->SetThreads(Max(cfg.MaxThreadCount, threadsCount));
+ executor->SetPriority(priorities[poolIdx]);
+ executor->SetName(names[poolIdx]);
+
+ if (names[poolIdx] == "Common") {
executor->SetSpinThreshold(0);
executor->SetTimePerMailboxMicroSecs(100);
- } else if (poolType == poolCount - 2) { // IC pool
+ } else if (names[poolIdx] == "IC") {
executor->SetSpinThreshold(10);
executor->SetTimePerMailboxMicroSecs(100);
executor->SetMaxAvgPingDeviation(500);
@@ -949,6 +980,15 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s
icCommon->LocalScopeId = ScopeId.GetInterconnectScopeId();
icCommon->Cookie = icConfig.GetSuppressConnectivityCheck() ? TString() : CreateGuidAsString();
+ if (icConfig.HasOutgoingHandshakeInflightLimit()) {
+ icCommon->OutgoingHandshakeInflightLimit = icConfig.GetOutgoingHandshakeInflightLimit();
+
+ // create handshake broker actor
+ setup->LocalServices.emplace_back(MakeHandshakeBrokerOutId(), TActorSetupCmd(
+ CreateHandshakeBroker(*icCommon->OutgoingHandshakeInflightLimit),
+ TMailboxType::ReadAsFilled, systemPoolId));
+ }
+
#define CHANNEL(NAME) {TInterconnectChannels::NAME, #NAME}
icCommon->ChannelName = {
CHANNEL(IC_COMMON),
@@ -986,7 +1026,7 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s
if (const auto& mon = appData->Mon) {
icCommon->RegisterMonPage = [mon](const TString& path, const TString& title, TActorSystem *actorSystem, const TActorId& actorId) {
NMonitoring::TIndexMonPage *page = mon->RegisterIndexPage("actors", "Actors")->RegisterIndexPage("interconnect", "Interconnect");
- mon->RegisterActorPage(page, path, title, false, actorSystem, actorId);
+ mon->RegisterActorPage(page, path, title, false, actorSystem, actorId, /*useAuth=*/true, /*sortPages=*/false);
};
setup->LocalServices.emplace_back(NInterconnect::MakeInterconnectMonActorId(NodeId), TActorSetupCmd(
NInterconnect::CreateInterconnectMonActor(icCommon), TMailboxType::ReadAsFilled, systemPoolId));
diff --git a/ydb/core/grpc_services/CMakeLists.darwin.txt b/ydb/core/grpc_services/CMakeLists.darwin.txt
index 209341cbd3..2816a8a926 100644
--- a/ydb/core/grpc_services/CMakeLists.darwin.txt
+++ b/ydb/core/grpc_services/CMakeLists.darwin.txt
@@ -100,6 +100,7 @@ target_sources(ydb-core-grpc_services PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import_data.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keep_alive.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keyvalue.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_describe.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_snapshots.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kqp_base.cpp
diff --git a/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt b/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt
index fcf80c2b10..a25eb23a5f 100644
--- a/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt
@@ -101,6 +101,7 @@ target_sources(ydb-core-grpc_services PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import_data.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keep_alive.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keyvalue.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_describe.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_snapshots.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kqp_base.cpp
diff --git a/ydb/core/grpc_services/CMakeLists.linux.txt b/ydb/core/grpc_services/CMakeLists.linux.txt
index fcf80c2b10..a25eb23a5f 100644
--- a/ydb/core/grpc_services/CMakeLists.linux.txt
+++ b/ydb/core/grpc_services/CMakeLists.linux.txt
@@ -101,6 +101,7 @@ target_sources(ydb-core-grpc_services PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import_data.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keep_alive.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keyvalue.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_describe.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_snapshots.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kqp_base.cpp
diff --git a/ydb/core/grpc_services/rpc_keyvalue.cpp b/ydb/core/grpc_services/rpc_keyvalue.cpp
new file mode 100644
index 0000000000..3f6746d7f5
--- /dev/null
+++ b/ydb/core/grpc_services/rpc_keyvalue.cpp
@@ -0,0 +1,1048 @@
+#include "service_keyvalue.h"
+
+#include <ydb/public/api/protos/ydb_keyvalue.pb.h>
+
+#include <ydb/core/base/path.h>
+#include <ydb/core/grpc_services/rpc_scheme_base.h>
+#include <ydb/core/grpc_services/rpc_common.h>
+#include <ydb/core/keyvalue/keyvalue_events.h>
+#include <ydb/core/tx/scheme_cache/scheme_cache.h>
+#include <ydb/core/mind/local.h>
+#include <ydb/core/protos/local.pb.h>
+
+
+namespace NKikimr::NGRpcService {
+
+using namespace NActors;
+using namespace Ydb;
+
+using TEvCreateVolumeKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::CreateVolumeRequest,
+ Ydb::KeyValue::CreateVolumeResponse>;
+using TEvDropVolumeKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::DropVolumeRequest,
+ Ydb::KeyValue::DropVolumeResponse>;
+using TEvAlterVolumeKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::AlterVolumeRequest,
+ Ydb::KeyValue::AlterVolumeResponse>;
+using TEvDescribeVolumeKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::DescribeVolumeRequest,
+ Ydb::KeyValue::DescribeVolumeResponse>;
+using TEvListLocalPartitionsKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::ListLocalPartitionsRequest,
+ Ydb::KeyValue::ListLocalPartitionsResponse>;
+
+using TEvAcquireLockKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::AcquireLockRequest,
+ Ydb::KeyValue::AcquireLockResponse>;
+using TEvExecuteTransactionKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::ExecuteTransactionRequest,
+ Ydb::KeyValue::ExecuteTransactionResponse>;
+using TEvReadKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::ReadRequest,
+ Ydb::KeyValue::ReadResponse>;
+using TEvReadRangeKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::ReadRangeRequest,
+ Ydb::KeyValue::ReadRangeResponse>;
+using TEvListRangeKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::ListRangeRequest,
+ Ydb::KeyValue::ListRangeResponse>;
+using TEvGetStorageChannelStatusKeyValueRequest =
+ TGrpcRequestOperationCall<Ydb::KeyValue::GetStorageChannelStatusRequest,
+ Ydb::KeyValue::GetStorageChannelStatusResponse>;
+
+} // namespace NKikimr::NGRpcService
+
+
+namespace NKikimr::NGRpcService {
+
+using namespace NActors;
+using namespace Ydb;
+
+#define COPY_PRIMITIVE_FIELD(name) \
+ to->set_ ## name(static_cast<decltype(to->name())>(from.name())) \
+// COPY_PRIMITIVE_FIELD
+
+#define COPY_PRIMITIVE_OPTIONAL_FIELD(name) \
+ if (from.has_ ## name()) { \
+ to->set_ ## name(static_cast<decltype(to->name())>(from.name())); \
+ } \
+// COPY_PRIMITIVE_FIELD
+
+namespace {
+
+void CopyProtobuf(const Ydb::KeyValue::AcquireLockRequest &/*from*/,
+ NKikimrKeyValue::AcquireLockRequest */*to*/)
+{
+}
+
+void CopyProtobuf(const NKikimrKeyValue::AcquireLockResult &from,
+ Ydb::KeyValue::AcquireLockResult *to)
+{
+ COPY_PRIMITIVE_FIELD(lock_generation);
+ COPY_PRIMITIVE_FIELD(node_id);
+}
+
+
+void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::Rename &from,
+ NKikimrKeyValue::ExecuteTransactionRequest::Command::Rename *to)
+{
+ COPY_PRIMITIVE_FIELD(old_key);
+ COPY_PRIMITIVE_FIELD(new_key);
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::Concat &from,
+ NKikimrKeyValue::ExecuteTransactionRequest::Command::Concat *to)
+{
+ *to->mutable_input_keys() = from.input_keys();
+ COPY_PRIMITIVE_FIELD(output_key);
+ COPY_PRIMITIVE_FIELD(keep_inputs);
+}
+
+void CopyProtobuf(const Ydb::KeyValue::KeyRange &from, NKikimrKeyValue::KVRange *to) {
+#define CHECK_AND_SET(name) \
+ if (from.has_ ## name()) { \
+ COPY_PRIMITIVE_FIELD(name); \
+ } \
+// CHECK_AND_SET
+
+ CHECK_AND_SET(from_key_inclusive)
+ CHECK_AND_SET(from_key_exclusive)
+ CHECK_AND_SET(to_key_inclusive)
+ CHECK_AND_SET(to_key_exclusive)
+
+#undef CHECK_AND_SET
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::CopyRange &from,
+ NKikimrKeyValue::ExecuteTransactionRequest::Command::CopyRange *to)
+{
+ CopyProtobuf(from.range(), to->mutable_range());
+ COPY_PRIMITIVE_FIELD(prefix_to_remove);
+ COPY_PRIMITIVE_FIELD(prefix_to_add);
+}
+
+template <typename TProtoFrom, typename TProtoTo>
+void CopyPriority(TProtoFrom &&from, TProtoTo *to) {
+ switch(from.priority()) {
+ case Ydb::KeyValue::Priorities::PRIORITY_REALTIME:
+ to->set_priority(NKikimrKeyValue::Priorities::PRIORITY_REALTIME);
+ break;
+ case Ydb::KeyValue::Priorities::PRIORITY_BACKGROUND:
+ to->set_priority(NKikimrKeyValue::Priorities::PRIORITY_BACKGROUND);
+ break;
+ default:
+ to->set_priority(NKikimrKeyValue::Priorities::PRIORITY_UNSPECIFIED);
+ break;
+ }
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::Write &from,
+ NKikimrKeyValue::ExecuteTransactionRequest::Command::Write *to)
+{
+ COPY_PRIMITIVE_FIELD(key);
+ COPY_PRIMITIVE_FIELD(value);
+ COPY_PRIMITIVE_FIELD(storage_channel);
+ CopyPriority(from, to);
+ switch(from.tactic()) {
+ case Ydb::KeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_MAX_THROUGHPUT:
+ to->set_tactic(NKikimrKeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_MAX_THROUGHPUT);
+ break;
+ case Ydb::KeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_MIN_LATENCY:
+ to->set_tactic(NKikimrKeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_MIN_LATENCY);
+ break;
+ default:
+ to->set_tactic(NKikimrKeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_UNSPECIFIED);
+ break;
+ }
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::DeleteRange &from,
+ NKikimrKeyValue::ExecuteTransactionRequest::Command::DeleteRange *to)
+{
+ CopyProtobuf(from.range(), to->mutable_range());
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command &from,
+ NKikimrKeyValue::ExecuteTransactionRequest::Command *to)
+{
+#define CHECK_AND_COPY(name) \
+ if (from.has_ ## name()) { \
+ CopyProtobuf(from.name(), to->mutable_ ## name()); \
+ } \
+// CHECK_AND_COPY
+
+ CHECK_AND_COPY(rename)
+ CHECK_AND_COPY(concat)
+ CHECK_AND_COPY(copy_range)
+ CHECK_AND_COPY(write)
+ CHECK_AND_COPY(delete_range)
+
+#undef CHECK_AND_COPY
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest &from,
+ NKikimrKeyValue::ExecuteTransactionRequest *to)
+{
+ COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation);
+ for (auto &cmd : from.commands()) {
+ CopyProtobuf(cmd, to->add_commands());
+ }
+}
+
+void CopyProtobuf(const NKikimrKeyValue::StorageChannel &from, Ydb::KeyValue::StorageChannelInfo *to) {
+ COPY_PRIMITIVE_FIELD(storage_channel);
+ COPY_PRIMITIVE_FIELD(status_flag);
+}
+
+void CopyProtobuf(const NKikimrKeyValue::ExecuteTransactionResult &from,
+ Ydb::KeyValue::ExecuteTransactionResult *to)
+{
+ COPY_PRIMITIVE_FIELD(node_id);
+ for (auto &channel : from.storage_channel()) {
+ CopyProtobuf(channel, to->add_storage_channel_info());
+ }
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ReadRequest &from, NKikimrKeyValue::ReadRequest *to) {
+ COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation);
+ COPY_PRIMITIVE_FIELD(key);
+ COPY_PRIMITIVE_FIELD(offset);
+ COPY_PRIMITIVE_FIELD(size);
+ CopyPriority(from, to);
+ COPY_PRIMITIVE_FIELD(limit_bytes);
+}
+
+void CopyProtobuf(const NKikimrKeyValue::ReadResult &from, Ydb::KeyValue::ReadResult *to) {
+ COPY_PRIMITIVE_FIELD(requested_key);
+ COPY_PRIMITIVE_FIELD(requested_offset);
+ COPY_PRIMITIVE_FIELD(requested_size);
+ COPY_PRIMITIVE_FIELD(value);
+ COPY_PRIMITIVE_FIELD(node_id);
+ switch (from.status()) {
+ case NKikimrKeyValue::Statuses::RSTATUS_OVERRUN:
+ to->set_is_overrun(true);
+ break;
+ default:
+ break;
+ }
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ReadRangeRequest &from, NKikimrKeyValue::ReadRangeRequest *to) {
+ COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation);
+ CopyProtobuf(from.range(), to->mutable_range());
+ to->set_include_data(true);
+ COPY_PRIMITIVE_FIELD(limit_bytes);
+ CopyPriority(from, to);
+}
+
+void CopyProtobuf(const Ydb::KeyValue::ListRangeRequest &from, NKikimrKeyValue::ReadRangeRequest *to) {
+ COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation);
+ CopyProtobuf(from.range(), to->mutable_range());
+ to->set_include_data(false);
+ COPY_PRIMITIVE_FIELD(limit_bytes);
+}
+
+void CopyProtobuf(const NKikimrKeyValue::ReadRangeResult::KeyValuePair &from,
+ Ydb::KeyValue::ReadRangeResult::KeyValuePair *to)
+{
+ COPY_PRIMITIVE_FIELD(key);
+ COPY_PRIMITIVE_FIELD(value);
+ COPY_PRIMITIVE_FIELD(creation_unix_time);
+ COPY_PRIMITIVE_FIELD(storage_channel);
+}
+
+void CopyProtobuf(const NKikimrKeyValue::ReadRangeResult &from,
+ Ydb::KeyValue::ReadRangeResult *to)
+{
+ for (auto &pair : from.pair()) {
+ CopyProtobuf(pair, to->add_pair());
+ }
+ if (from.status() == NKikimrKeyValue::Statuses::RSTATUS_OVERRUN) {
+ to->set_is_overrun(true);
+ }
+ COPY_PRIMITIVE_FIELD(node_id);
+}
+
+void CopyProtobuf(const NKikimrKeyValue::ReadRangeResult::KeyValuePair &from,
+ Ydb::KeyValue::ListRangeResult::KeyInfo *to)
+{
+ COPY_PRIMITIVE_FIELD(key);
+ COPY_PRIMITIVE_FIELD(value_size);
+ COPY_PRIMITIVE_FIELD(creation_unix_time);
+ COPY_PRIMITIVE_FIELD(storage_channel);
+}
+
+void CopyProtobuf(const NKikimrKeyValue::ReadRangeResult &from,
+ Ydb::KeyValue::ListRangeResult *to)
+{
+ for (auto &pair : from.pair()) {
+ CopyProtobuf(pair, to->add_key());
+ }
+ if (from.status() == NKikimrKeyValue::Statuses::RSTATUS_OVERRUN) {
+ to->set_is_overrun(true);
+ }
+ COPY_PRIMITIVE_FIELD(node_id);
+}
+
+void CopyProtobuf(const Ydb::KeyValue::GetStorageChannelStatusRequest &from,
+ NKikimrKeyValue::GetStorageChannelStatusRequest *to)
+{
+ COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation);
+ *to->mutable_storage_channel() = from.storage_channel();
+}
+
+
+void CopyProtobuf(const NKikimrKeyValue::GetStorageChannelStatusResult &from,
+ Ydb::KeyValue::GetStorageChannelStatusResult *to)
+{
+ for (auto &channel : from.storage_channel()) {
+ CopyProtobuf(channel, to->add_storage_channel_info());
+ }
+ COPY_PRIMITIVE_FIELD(node_id);
+}
+
+
+Ydb::StatusIds::StatusCode PullStatus(const NKikimrKeyValue::AcquireLockResult &) {
+ return Ydb::StatusIds::SUCCESS;
+}
+
+template <typename TResult>
+Ydb::StatusIds::StatusCode PullStatus(const TResult &result) {
+ switch (result.status()) {
+ case NKikimrKeyValue::Statuses::RSTATUS_OK:
+ case NKikimrKeyValue::Statuses::RSTATUS_OVERRUN:
+ return Ydb::StatusIds::SUCCESS;
+ case NKikimrKeyValue::Statuses::RSTATUS_ERROR:
+ return Ydb::StatusIds::GENERIC_ERROR;
+ case NKikimrKeyValue::Statuses::RSTATUS_TIMEOUT:
+ return Ydb::StatusIds::TIMEOUT;
+ case NKikimrKeyValue::Statuses::RSTATUS_NOT_FOUND:
+ return Ydb::StatusIds::NOT_FOUND;
+ case NKikimrKeyValue::Statuses::RSTATUS_WRONG_LOCK_GENERATION:
+ return Ydb::StatusIds::PRECONDITION_FAILED;
+ default:
+ return Ydb::StatusIds::INTERNAL_ERROR;
+ }
+}
+
+namespace {
+ void AssignPoolKinds(auto &storageConfig, auto *internalStorageConfig) {
+ ui32 size = storageConfig.channel_size();
+
+ for (ui32 channelIdx = 0; channelIdx < size; ++channelIdx) {
+ internalStorageConfig->AddChannel()->SetPreferredPoolKind(storageConfig.channel(channelIdx).media());
+ }
+ }
+}
+
+
+class TCreateVolumeRequest : public TRpcSchemeRequestActor<TCreateVolumeRequest, TEvCreateVolumeKeyValueRequest> {
+public:
+ using TBase = TRpcSchemeRequestActor<TCreateVolumeRequest, TEvCreateVolumeKeyValueRequest>;
+ using TBase::TBase;
+
+ void Bootstrap(const TActorContext& ctx) {
+ TBase::Bootstrap(ctx);
+ Become(&TCreateVolumeRequest::StateFunc);
+ SendProposeRequest(ctx);
+ }
+
+ void SendProposeRequest(const TActorContext &ctx) {
+ const auto req = this->GetProtoRequest();
+
+ std::pair<TString, TString> pathPair;
+ try {
+ pathPair = SplitPath(Request_->GetDatabaseName(), req->path());
+ } catch (const std::exception& ex) {
+ Request_->RaiseIssue(NYql::ExceptionToIssue(ex));
+ return Reply(StatusIds::BAD_REQUEST, ctx);
+ }
+ const auto& workingDir = pathPair.first;
+ const auto& name = pathPair.second;
+
+ std::unique_ptr<TEvTxUserProxy::TEvProposeTransaction> proposeRequest = this->CreateProposeTransaction();
+ NKikimrTxUserProxy::TEvProposeTransaction& record = proposeRequest->Record;
+ NKikimrSchemeOp::TModifyScheme* modifyScheme = record.MutableTransaction()->MutableModifyScheme();
+ modifyScheme->SetWorkingDir(workingDir);
+ NKikimrSchemeOp::TCreateSolomonVolume* tableDesc = nullptr;
+
+ modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpCreateSolomonVolume);
+ tableDesc = modifyScheme->MutableCreateSolomonVolume();
+ tableDesc->SetName(name);
+ tableDesc->SetPartitionCount(req->partition_count());
+
+ if (GetProtoRequest()->has_storage_config()) {
+ auto &storageConfig = GetProtoRequest()->storage_config();
+ auto *internalStorageConfig = tableDesc->MutableStorageConfig();
+ AssignPoolKinds(storageConfig, internalStorageConfig);
+ } else {
+ tableDesc->SetChannelProfileId(GetProtoRequest()->partition_count());
+ }
+
+ ctx.Send(MakeTxProxyID(), proposeRequest.release());
+ }
+
+ STFUNC(StateFunc) {
+ return TBase::StateWork(ev, ctx);
+ }
+};
+
+
+class TDropVolumeRequest : public TRpcSchemeRequestActor<TDropVolumeRequest, TEvDropVolumeKeyValueRequest> {
+public:
+ using TBase = TRpcSchemeRequestActor<TDropVolumeRequest, TEvDropVolumeKeyValueRequest>;
+ using TBase::TBase;
+
+ void Bootstrap(const TActorContext& ctx) {
+ TBase::Bootstrap(ctx);
+ Become(&TDropVolumeRequest::StateFunc);
+ SendProposeRequest(ctx);
+ }
+
+ void SendProposeRequest(const TActorContext &ctx) {
+ const auto req = this->GetProtoRequest();
+
+ std::pair<TString, TString> pathPair;
+ try {
+ pathPair = SplitPath(req->path());
+ } catch (const std::exception& ex) {
+ Request_->RaiseIssue(NYql::ExceptionToIssue(ex));
+ return Reply(StatusIds::BAD_REQUEST, ctx);
+ }
+ const auto& workingDir = pathPair.first;
+ const auto& name = pathPair.second;
+
+ std::unique_ptr<TEvTxUserProxy::TEvProposeTransaction> proposeRequest = this->CreateProposeTransaction();
+ NKikimrTxUserProxy::TEvProposeTransaction& record = proposeRequest->Record;
+ NKikimrSchemeOp::TModifyScheme* modifyScheme = record.MutableTransaction()->MutableModifyScheme();
+ modifyScheme->SetWorkingDir(workingDir);
+ NKikimrSchemeOp::TDrop* drop = nullptr;
+
+ modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpDropSolomonVolume);
+ drop = modifyScheme->MutableDrop();
+ drop->SetName(name);
+
+ ctx.Send(MakeTxProxyID(), proposeRequest.release());
+ }
+
+ STFUNC(StateFunc) {
+ return TBase::StateWork(ev, ctx);
+ }
+};
+
+class TAlterVolumeRequest : public TRpcSchemeRequestActor<TAlterVolumeRequest, TEvAlterVolumeKeyValueRequest> {
+public:
+ using TBase = TRpcSchemeRequestActor<TAlterVolumeRequest, TEvAlterVolumeKeyValueRequest>;
+ using TBase::TBase;
+
+ void Bootstrap(const TActorContext& ctx) {
+ TBase::Bootstrap(ctx);
+ Become(&TAlterVolumeRequest::StateFunc);
+ SendProposeRequest(ctx);
+ }
+
+ void SendProposeRequest(const TActorContext &ctx) {
+ const auto req = this->GetProtoRequest();
+
+ std::pair<TString, TString> pathPair;
+ try {
+ pathPair = SplitPath(req->path());
+ } catch (const std::exception& ex) {
+ Request_->RaiseIssue(NYql::ExceptionToIssue(ex));
+ return Reply(StatusIds::BAD_REQUEST, ctx);
+ }
+ const auto& workingDir = pathPair.first;
+ const auto& name = pathPair.second;
+
+ std::unique_ptr<TEvTxUserProxy::TEvProposeTransaction> proposeRequest = this->CreateProposeTransaction();
+ NKikimrTxUserProxy::TEvProposeTransaction& record = proposeRequest->Record;
+ NKikimrSchemeOp::TModifyScheme* modifyScheme = record.MutableTransaction()->MutableModifyScheme();
+ modifyScheme->SetWorkingDir(workingDir);
+ NKikimrSchemeOp::TAlterSolomonVolume* tableDesc = nullptr;
+
+ modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpAlterSolomonVolume);
+ tableDesc = modifyScheme->MutableAlterSolomonVolume();
+ tableDesc->SetName(name);
+ tableDesc->SetPartitionCount(req->alter_partition_count());
+
+ if (GetProtoRequest()->has_storage_config()) {
+ tableDesc->SetUpdateChannelsBinding(true);
+ auto &storageConfig = GetProtoRequest()->storage_config();
+ auto *internalStorageConfig = tableDesc->MutableStorageConfig();
+ AssignPoolKinds(storageConfig, internalStorageConfig);
+ } else {
+ tableDesc->SetUpdateChannelsBinding(false);
+ tableDesc->SetChannelProfileId(0);
+ }
+
+ ctx.Send(MakeTxProxyID(), proposeRequest.release());
+ }
+
+ STFUNC(StateFunc) {
+ return TBase::StateWork(ev, ctx);
+ }
+};
+
+template <typename TDerived>
+class TBaseKeyValueRequest {
+protected:
+ void OnBootstrap() {
+ auto self = static_cast<TDerived*>(this);
+ Ydb::StatusIds::StatusCode status = Ydb::StatusIds::STATUS_CODE_UNSPECIFIED;
+ NYql::TIssues issues;
+ if (!self->ValidateRequest(status, issues)) {
+ self->Reply(status, issues, TActivationContext::AsActorContext());
+ return;
+ }
+ if (const auto& userToken = self->Request_->GetSerializedToken()) {
+ UserToken = new NACLib::TUserToken(userToken);
+ }
+ SendNavigateRequest();
+ }
+
+ void SendNavigateRequest() {
+ auto self = static_cast<TDerived*>(this);
+ auto &rec = *self->GetProtoRequest();
+ auto req = MakeHolder<NSchemeCache::TSchemeCacheNavigate>();
+ auto& entry = req->ResultSet.emplace_back();
+ entry.Path = ::NKikimr::SplitPath(rec.path());
+ entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByPath;
+ entry.ShowPrivatePath = true;
+ entry.SyncVersion = false;
+ req->UserToken = UserToken;
+ req->DatabaseName = self->Request_->GetDatabaseName().GetOrElse("");
+ auto ev = new TEvTxProxySchemeCache::TEvNavigateKeySet(req.Release());
+ self->Send(MakeSchemeCacheID(), ev);
+ }
+
+ bool OnNavigateKeySetResult(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev, ui32 access) {
+ auto self = static_cast<TDerived*>(this);
+ TEvTxProxySchemeCache::TEvNavigateKeySetResult* res = ev->Get();
+ NSchemeCache::TSchemeCacheNavigate *request = res->Request.Get();
+
+ auto ctx = TActivationContext::AsActorContext();
+
+ if (res->Request->ResultSet.size() != 1) {
+ self->Reply(StatusIds::INTERNAL_ERROR, "Received an incorrect answer from SchemeCache.", NKikimrIssues::TIssuesIds::UNEXPECTED, ctx);
+ return false;
+ }
+
+ switch (request->ResultSet[0].Status) {
+ case NSchemeCache::TSchemeCacheNavigate::EStatus::Ok:
+ break;
+ case NSchemeCache::TSchemeCacheNavigate::EStatus::RootUnknown:
+ case NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown:
+ self->Reply(StatusIds::SCHEME_ERROR, "Path isn't exist.", NKikimrIssues::TIssuesIds::PATH_NOT_EXIST, ctx);
+ return false;
+ case NSchemeCache::TSchemeCacheNavigate::EStatus::LookupError:
+ case NSchemeCache::TSchemeCacheNavigate::EStatus::RedirectLookupError:
+ self->Reply(StatusIds::UNAVAILABLE, "Database resolve failed with no certain result.", NKikimrIssues::TIssuesIds::RESOLVE_LOOKUP_ERROR, ctx);
+ return false;
+ default:
+ self->Reply(StatusIds::UNAVAILABLE, "Resolve error", NKikimrIssues::TIssuesIds::GENERIC_RESOLVE_ERROR, ctx);
+ return false;
+ }
+
+ if (!self->CheckAccess(CanonizePath(res->Request->ResultSet[0].Path), res->Request->ResultSet[0].SecurityObject, access)) {
+ return false;
+ }
+ if (!request->ResultSet[0].SolomonVolumeInfo) {
+ self->Reply(StatusIds::SCHEME_ERROR, "Table isn't keyvalue.", NKikimrIssues::TIssuesIds::DEFAULT_ERROR, ctx);
+ return false;
+ }
+
+ return true;
+ }
+
+ bool CheckAccess(const TString& path, TIntrusivePtr<TSecurityObject> securityObject, ui32 access) {
+ auto self = static_cast<TDerived*>(this);
+ if (!UserToken || !securityObject) {
+ return true;
+ }
+
+ if (securityObject->CheckAccess(access, *UserToken)) {
+ return true;
+ }
+
+ self->Reply(Ydb::StatusIds::UNAUTHORIZED,
+ TStringBuilder() << "Access denied"
+ << ": for# " << UserToken->GetUserSID()
+ << ", path# " << path
+ << ", access# " << NACLib::AccessRightsToString(access),
+ NKikimrIssues::TIssuesIds::ACCESS_DENIED,
+ TActivationContext::AsActorContext());
+ return false;
+ }
+
+private:
+ TIntrusiveConstPtr<NACLib::TUserToken> UserToken;
+};
+
+class TDescribeVolumeRequest
+ : public TRpcOperationRequestActor<TDescribeVolumeRequest, TEvDescribeVolumeKeyValueRequest>
+ , public TBaseKeyValueRequest<TDescribeVolumeRequest>
+{
+public:
+ using TBase = TRpcOperationRequestActor<TDescribeVolumeRequest, TEvDescribeVolumeKeyValueRequest>;
+ using TBase::TBase;
+
+ friend class TBaseKeyValueRequest<TDescribeVolumeRequest>;
+
+ void Bootstrap(const TActorContext& ctx) {
+ TBase::Bootstrap(ctx);
+ OnBootstrap();
+ Become(&TDescribeVolumeRequest::StateFunc);
+ }
+
+
+protected:
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle);
+ default:
+ return TBase::StateFuncBase(ev, ctx);
+ }
+ }
+
+ void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) {
+ TEvTxProxySchemeCache::TEvNavigateKeySetResult* res = ev->Get();
+ NSchemeCache::TSchemeCacheNavigate *request = res->Request.Get();
+
+ if (!OnNavigateKeySetResult(ev, NACLib::DescribeSchema)) {
+ return;
+ }
+
+ const NKikimrSchemeOp::TSolomonVolumeDescription &desc = request->ResultSet[0].SolomonVolumeInfo->Description;
+ Ydb::KeyValue::DescribeVolumeResult result;
+ result.set_path(this->GetProtoRequest()->path());
+ result.set_partition_count(desc.PartitionsSize());
+ this->ReplyWithResult(Ydb::StatusIds::SUCCESS, result, TActivationContext::AsActorContext());
+ }
+
+ bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) {
+ return true;
+ }
+
+private:
+ TIntrusiveConstPtr<NACLib::TUserToken> UserToken;
+};
+
+
+class TListLocalPartitionsRequest
+ : public TRpcOperationRequestActor<TListLocalPartitionsRequest, TEvListLocalPartitionsKeyValueRequest>
+ , public TBaseKeyValueRequest<TListLocalPartitionsRequest>
+{
+public:
+ using TBase = TRpcOperationRequestActor<TListLocalPartitionsRequest, TEvListLocalPartitionsKeyValueRequest>;
+ using TBase::TBase;
+
+ friend class TBaseKeyValueRequest<TListLocalPartitionsRequest>;
+
+ void Bootstrap(const TActorContext& ctx) {
+ TBase::Bootstrap(ctx);
+ OnBootstrap();
+ Become(&TListLocalPartitionsRequest::StateFunc);
+ }
+
+protected:
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle);
+ hFunc(TEvLocal::TEvEnumerateTabletsResult, Handle);
+ default:
+ return TBase::StateFuncBase(ev, ctx);
+ }
+ }
+
+ void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) {
+ TEvTxProxySchemeCache::TEvNavigateKeySetResult* res = ev->Get();
+ NSchemeCache::TSchemeCacheNavigate *request = res->Request.Get();
+
+ if (!OnNavigateKeySetResult(ev, NACLib::DescribeSchema)) {
+ return;
+ }
+
+ const NKikimrSchemeOp::TSolomonVolumeDescription &desc = request->ResultSet[0].SolomonVolumeInfo->Description;
+ for (const NKikimrSchemeOp::TSolomonVolumeDescription::TPartition &partition : desc.GetPartitions()) {
+ TabletIdToPartitionId[partition.GetTabletId()] = partition.GetPartitionId();
+ }
+
+ if (TabletIdToPartitionId.empty()) {
+ Ydb::KeyValue::ListLocalPartitionsResult result;
+ result.set_path(this->GetProtoRequest()->path());
+ result.set_node_id(SelfId().NodeId());
+ this->ReplyWithResult(Ydb::StatusIds::SUCCESS, result, TActivationContext::AsActorContext());
+ return;
+ }
+
+ SendRequest();
+ }
+
+ TActorId MakeLocalRegistrarID() {
+ auto &ctx = TActivationContext::AsActorContext();
+ auto &domainsInfo = *AppData(ctx)->DomainsInfo;
+ auto domainIt = domainsInfo.Domains.find(1);
+ if (domainIt == domainsInfo.Domains.end()) {
+ TActorId invalidId;
+ return invalidId;
+ }
+ auto &rec = *this->GetProtoRequest();
+ ui32 nodeId = rec.node_id() ? rec.node_id() : ctx.SelfID.NodeId();
+ ui32 hiveUid = domainsInfo.GetDefaultHiveUid(1);
+ ui64 hiveId = domainsInfo.GetHive(hiveUid);
+ return ::NKikimr::MakeLocalRegistrarID(nodeId, hiveId);
+ }
+
+ TEvLocal::TEvEnumerateTablets* MakeRequest() {
+ return new TEvLocal::TEvEnumerateTablets(TTabletTypes::KeyValue);
+ }
+
+ void SendRequest() {
+ Send(MakeLocalRegistrarID(), MakeRequest(), IEventHandle::FlagTrackDelivery, 0);
+ }
+
+ void Handle(TEvLocal::TEvEnumerateTabletsResult::TPtr &ev) {
+ const NKikimrLocal::TEvEnumerateTabletsResult &record = ev->Get()->Record;
+ if (!record.HasStatus() || record.GetStatus() != NKikimrProto::OK) {
+ this->Reply(StatusIds::INTERNAL_ERROR, "Received an incorrect answer from Local.", NKikimrIssues::TIssuesIds::UNEXPECTED, TActivationContext::AsActorContext());
+ return;
+ }
+
+ Ydb::KeyValue::ListLocalPartitionsResult result;
+ result.set_path(this->GetProtoRequest()->path());
+ result.set_node_id(SelfId().NodeId());
+ for (auto &item : record.GetTabletInfo()) {
+ if (!item.HasTabletId()) {
+ continue;
+ }
+ auto it = TabletIdToPartitionId.find(item.GetTabletId());
+ if (it != TabletIdToPartitionId.end()) {
+ result.add_partition_ids(it->second);
+ }
+ }
+ this->ReplyWithResult(Ydb::StatusIds::SUCCESS, result, TActivationContext::AsActorContext());
+ }
+
+ bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) {
+ return true;
+ }
+
+private:
+ THashMap<ui64, ui64> TabletIdToPartitionId;
+};
+
+
+template <typename TDerived, typename TRequest, typename TResultRecord, typename TKVRequest>
+class TKeyValueRequestGrpc
+ : public TRpcOperationRequestActor<TDerived, TRequest>
+ , public TBaseKeyValueRequest<TKeyValueRequestGrpc<TDerived, TRequest, TResultRecord, TKVRequest>>
+{
+public:
+ using TBase = TRpcOperationRequestActor<TDerived, TRequest>;
+ using TBase::TBase;
+
+ friend class TBaseKeyValueRequest<TKeyValueRequestGrpc<TDerived, TRequest, TResultRecord, TKVRequest>>;
+
+ void Bootstrap(const TActorContext& ctx) {
+ TBase::Bootstrap(ctx);
+ this->OnBootstrap();
+ this->Become(&TKeyValueRequestGrpc::StateFunc);
+ }
+
+
+protected:
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvTabletPipe::TEvClientConnected, Handle);
+ hFunc(TEvTabletPipe::TEvClientDestroyed, Handle);
+ hFunc(TKVRequest::TResponse, Handle);
+ hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle);
+ default:
+ return TBase::StateFuncBase(ev, ctx);
+ }
+ }
+
+ void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) {
+ TEvTxProxySchemeCache::TEvNavigateKeySetResult* res = ev->Get();
+ NSchemeCache::TSchemeCacheNavigate *request = res->Request.Get();
+
+ if (!this->OnNavigateKeySetResult(ev, static_cast<TDerived*>(this)->GetRequiredAccessRights())) {
+ return;
+ }
+
+ auto &rec = *this->GetProtoRequest();
+ const NKikimrSchemeOp::TSolomonVolumeDescription &desc = request->ResultSet[0].SolomonVolumeInfo->Description;
+
+ if (rec.partition_id() >= desc.PartitionsSize()) {
+ this->Reply(StatusIds::SCHEME_ERROR, "The partition wasn't found. Partition ID was larger or equal partition count.", NKikimrIssues::TIssuesIds::DEFAULT_ERROR, TActivationContext::AsActorContext());
+ return;
+ }
+
+ ui64 partitionId = rec.partition_id();
+ if (const auto &partition = desc.GetPartitions(rec.partition_id()); partition.GetPartitionId() == partitionId) {
+ KVTabletId = partition.GetTabletId();
+ } else {
+ Y_VERIFY_DEBUG(false);
+ for (const NKikimrSchemeOp::TSolomonVolumeDescription::TPartition &partition : desc.GetPartitions()) {
+ if (partition.GetPartitionId() == partitionId) {
+ KVTabletId = partition.GetTabletId();
+ break;
+ }
+ }
+ }
+
+ if (!KVTabletId) {
+ this->Reply(StatusIds::INTERNAL_ERROR, "Partition wasn't found.", NKikimrIssues::TIssuesIds::DEFAULT_ERROR, TActivationContext::AsActorContext());
+ return;
+ }
+
+ CreatePipe();
+ SendRequest();
+ }
+
+ void SendRequest() {
+ std::unique_ptr<TKVRequest> req = std::make_unique<TKVRequest>();
+ auto &rec = *this->GetProtoRequest();
+ CopyProtobuf(rec, &req->Record);
+ req->Record.set_tablet_id(KVTabletId);
+ NTabletPipe::SendData(this->SelfId(), KVPipeClient, req.release(), 0);
+ }
+
+ void Handle(typename TKVRequest::TResponse::TPtr &ev) {
+ TResultRecord result;
+ CopyProtobuf(ev->Get()->Record, &result);
+ auto status = PullStatus(ev->Get()->Record);
+ this->ReplyWithResult(status, result, TActivationContext::AsActorContext());
+ }
+
+ NTabletPipe::TClientConfig GetPipeConfig() {
+ NTabletPipe::TClientConfig cfg;
+ cfg.RetryPolicy = {
+ .RetryLimitCount = 3u
+ };
+ return cfg;
+ }
+
+ void CreatePipe() {
+ KVPipeClient = this->Register(NTabletPipe::CreateClient(this->SelfId(), KVTabletId, GetPipeConfig()));
+ }
+
+ void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) {
+ if (ev->Get()->Status != NKikimrProto::OK) {
+ this->Reply(StatusIds::UNAVAILABLE, "Failed to connect to coordination node.", NKikimrIssues::TIssuesIds::SHARD_NOT_AVAILABLE, TActivationContext::AsActorContext());
+ }
+ }
+
+ void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr&) {
+ this->Reply(StatusIds::UNAVAILABLE, "Connection to coordination node was lost.", NKikimrIssues::TIssuesIds::SHARD_NOT_AVAILABLE, TActivationContext::AsActorContext());
+ }
+
+ virtual bool ValidateRequest(Ydb::StatusIds::StatusCode& status, NYql::TIssues& issues) = 0;
+
+ void PassAway() override {
+ if (KVPipeClient) {
+ NTabletPipe::CloseClient(this->SelfId(), KVPipeClient);
+ KVPipeClient = {};
+ }
+ TBase::PassAway();
+ }
+
+protected:
+ ui64 KVTabletId = 0;
+ TActorId KVPipeClient;
+};
+
+class TAcquireLockRequest
+ : public TKeyValueRequestGrpc<TAcquireLockRequest, TEvAcquireLockKeyValueRequest,
+ Ydb::KeyValue::AcquireLockResult, TEvKeyValue::TEvAcquireLock>
+{
+public:
+ using TBase = TKeyValueRequestGrpc<TAcquireLockRequest, TEvAcquireLockKeyValueRequest,
+ Ydb::KeyValue::AcquireLockResult, TEvKeyValue::TEvAcquireLock>;
+ using TBase::TBase;
+
+ bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override {
+ return true;
+ }
+ NACLib::EAccessRights GetRequiredAccessRights() const {
+ return NACLib::UpdateRow;
+ }
+};
+
+
+class TExecuteTransactionRequest
+ : public TKeyValueRequestGrpc<TExecuteTransactionRequest, TEvExecuteTransactionKeyValueRequest,
+ Ydb::KeyValue::ExecuteTransactionResult, TEvKeyValue::TEvExecuteTransaction> {
+public:
+ using TBase = TKeyValueRequestGrpc<TExecuteTransactionRequest, TEvExecuteTransactionKeyValueRequest,
+ Ydb::KeyValue::ExecuteTransactionResult, TEvKeyValue::TEvExecuteTransaction>;
+ using TBase::TBase;
+
+ bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override {
+ return true;
+ }
+
+ NACLib::EAccessRights GetRequiredAccessRights() const {
+ ui32 accessRights = 0;
+ auto &rec = *this->GetProtoRequest();
+ for (auto &command : rec.commands()) {
+ if (command.has_delete_range()) {
+ accessRights |= NACLib::EraseRow;
+ }
+ if (command.has_rename()) {
+ accessRights |= NACLib::UpdateRow | NACLib::EraseRow;
+ }
+ if (command.has_copy_range()) {
+ accessRights |= NACLib::UpdateRow;
+ }
+ if (command.has_concat() && !command.concat().keep_inputs()) {
+ accessRights |= NACLib::UpdateRow | NACLib::EraseRow;
+ }
+ if (command.has_concat() && command.concat().keep_inputs()) {
+ accessRights |= NACLib::UpdateRow;
+ }
+ if (command.has_write()) {
+ accessRights |= NACLib::UpdateRow;
+ }
+ }
+ return static_cast<NACLib::EAccessRights>(accessRights);
+ }
+};
+
+class TReadRequest
+ : public TKeyValueRequestGrpc<TReadRequest, TEvReadKeyValueRequest,
+ Ydb::KeyValue::ReadResult, TEvKeyValue::TEvRead> {
+public:
+ using TBase = TKeyValueRequestGrpc<TReadRequest, TEvReadKeyValueRequest,
+ Ydb::KeyValue::ReadResult, TEvKeyValue::TEvRead>;
+ using TBase::TBase;
+ using TBase::Handle;
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ default:
+ return TBase::StateFunc(ev, ctx);
+ }
+ }
+ bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override {
+ return true;
+ }
+ NACLib::EAccessRights GetRequiredAccessRights() const {
+ return NACLib::SelectRow;
+ }
+};
+
+class TReadRangeRequest
+ : public TKeyValueRequestGrpc<TReadRangeRequest, TEvReadRangeKeyValueRequest,
+ Ydb::KeyValue::ReadRangeResult, TEvKeyValue::TEvReadRange> {
+public:
+ using TBase = TKeyValueRequestGrpc<TReadRangeRequest, TEvReadRangeKeyValueRequest,
+ Ydb::KeyValue::ReadRangeResult, TEvKeyValue::TEvReadRange>;
+ using TBase::TBase;
+ using TBase::Handle;
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ default:
+ return TBase::StateFunc(ev, ctx);
+ }
+ }
+ bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override {
+ return true;
+ }
+ NACLib::EAccessRights GetRequiredAccessRights() const {
+ return NACLib::SelectRow;
+ }
+};
+
+class TListRangeRequest
+ : public TKeyValueRequestGrpc<TListRangeRequest, TEvListRangeKeyValueRequest,
+ Ydb::KeyValue::ListRangeResult, TEvKeyValue::TEvReadRange> {
+public:
+ using TBase = TKeyValueRequestGrpc<TListRangeRequest, TEvListRangeKeyValueRequest,
+ Ydb::KeyValue::ListRangeResult, TEvKeyValue::TEvReadRange>;
+ using TBase::TBase;
+ using TBase::Handle;
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ default:
+ return TBase::StateFunc(ev, ctx);
+ }
+ }
+ bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override {
+ return true;
+ }
+ NACLib::EAccessRights GetRequiredAccessRights() const {
+ return NACLib::SelectRow;
+ }
+};
+
+class TGetStorageChannelStatusRequest
+ : public TKeyValueRequestGrpc<TGetStorageChannelStatusRequest, TEvGetStorageChannelStatusKeyValueRequest,
+ Ydb::KeyValue::GetStorageChannelStatusResult, TEvKeyValue::TEvGetStorageChannelStatus> {
+public:
+ using TBase = TKeyValueRequestGrpc<TGetStorageChannelStatusRequest, TEvGetStorageChannelStatusKeyValueRequest,
+ Ydb::KeyValue::GetStorageChannelStatusResult, TEvKeyValue::TEvGetStorageChannelStatus>;
+ using TBase::TBase;
+ using TBase::Handle;
+ STFUNC(StateFunc) {
+ switch (ev->GetTypeRewrite()) {
+ default:
+ return TBase::StateFunc(ev, ctx);
+ }
+ }
+ bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override {
+ return true;
+ }
+ NACLib::EAccessRights GetRequiredAccessRights() const {
+ return NACLib::DescribeSchema;
+ }
+};
+
+}
+
+
+void DoCreateVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TCreateVolumeRequest(p.release()));
+}
+
+void DoDropVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TDropVolumeRequest(p.release()));
+}
+
+void DoAlterVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TAlterVolumeRequest(p.release()));
+}
+
+void DoDescribeVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TDescribeVolumeRequest(p.release()));
+}
+
+void DoListLocalPartitionsKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TListLocalPartitionsRequest(p.release()));
+}
+
+void DoAcquireLockKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TAcquireLockRequest(p.release()));
+}
+
+void DoExecuteTransactionKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TExecuteTransactionRequest(p.release()));
+}
+
+void DoReadKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TReadRequest(p.release()));
+}
+
+void DoReadRangeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TReadRangeRequest(p.release()));
+}
+
+void DoListRangeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TListRangeRequest(p.release()));
+}
+
+void DoGetStorageChannelStatusKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) {
+ TActivationContext::AsActorContext().Register(new TGetStorageChannelStatusRequest(p.release()));
+}
+
+} // namespace NKikimr::NGRpcService
diff --git a/ydb/core/grpc_services/service_keyvalue.h b/ydb/core/grpc_services/service_keyvalue.h
new file mode 100644
index 0000000000..9bbb430178
--- /dev/null
+++ b/ydb/core/grpc_services/service_keyvalue.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <memory>
+
+namespace NKikimr::NGRpcService {
+
+ class IRequestOpCtx;
+ class IFacilityProvider;
+
+ void DoCreateVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoDropVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoAlterVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoDescribeVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoListLocalPartitionsKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+
+ void DoAcquireLockKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoExecuteTransactionKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoReadKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoReadRangeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoListRangeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+ void DoGetStorageChannelStatusKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&);
+
+} // NKikimr::NGRpcService
diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp
index 409b3594db..2154f6b56a 100644
--- a/ydb/core/health_check/health_check.cpp
+++ b/ydb/core/health_check/health_check.cpp
@@ -1639,6 +1639,7 @@ public:
if (!vDiskInfo.GetReplicated()) {
context.IssueRecords.clear();
context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, "Replication in progress", ETags::VDiskState);
+ storageVDiskStatus.set_overall(context.GetOverallStatus());
return;
}
diff --git a/ydb/core/health_check/health_check_ut.cpp b/ydb/core/health_check/health_check_ut.cpp
index b402d39f2d..f7c5716209 100644
--- a/ydb/core/health_check/health_check_ut.cpp
+++ b/ydb/core/health_check/health_check_ut.cpp
@@ -47,8 +47,8 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
}
int const GROUP_START_ID = 1200;
- int const VCARD_START_ID = 5500;
-
+ int const VCARD_START_ID = 124;
+
void ChangeDescribeSchemeResult(TEvSchemeShard::TEvDescribeSchemeResult::TPtr* ev, ui64 size = 20000000, ui64 quota = 90000000) {
auto record = (*ev)->Get()->MutableRecord();
auto pool = record->mutable_pathdescription()->mutable_domaindescription()->add_storagepools();
@@ -79,7 +79,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
}
};
- void AddGroupVSlotInControllerConfigResponse(TEvBlobStorage::TEvControllerConfigResponse::TPtr* ev, int groupCount, int vslotCount) {
+ void AddGroupVSlotInControllerConfigResponse(TEvBlobStorage::TEvControllerConfigResponse::TPtr* ev, int groupCount, int vslotCount, TString erasurespecies = NHealthCheck::TSelfCheckRequest::BLOCK_4_2) {
auto& pbRecord = (*ev)->Get()->Record;
auto pbConfig = pbRecord.mutable_response()->mutable_status(0)->mutable_baseconfig();
@@ -88,6 +88,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
auto vslotIdSample = pbConfig->group(0).vslotid(0);
pbConfig->clear_group();
pbConfig->clear_vslot();
+ pbConfig->clear_pdisk();
auto groupId = GROUP_START_ID;
auto vslotId = VCARD_START_ID;
@@ -96,6 +97,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
auto group = pbConfig->add_group();
group->CopyFrom(groupSample);
group->set_groupid(groupId);
+ group->set_erasurespecies(erasurespecies);
group->clear_vslotid();
for (int j = 0; j < vslotCount; j++) {
@@ -134,6 +136,51 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
}
}
+ void AddVSlotInVDiskStateResponse(NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateResponse::TPtr* ev, const TVector<Ydb::Monitoring::StatusFlag::Status>& vdiskStatuses) {
+ auto& pbRecord = (*ev)->Get()->Record;
+
+ auto sample = pbRecord.vdiskstateinfo(0);
+ pbRecord.clear_vdiskstateinfo();
+
+ auto groupId = GROUP_START_ID;
+ auto vslotId = VCARD_START_ID;
+
+ for (auto status: vdiskStatuses) {
+ switch (status) {
+ case Ydb::Monitoring::StatusFlag::RED: {
+ auto state = pbRecord.add_vdiskstateinfo();
+ state->CopyFrom(sample);
+ state->mutable_vdiskid()->set_vdisk(vslotId++);
+ state->mutable_vdiskid()->set_groupid(groupId);
+ state->set_pdiskid(100);
+ state->set_vdiskstate(NKikimrWhiteboard::EVDiskState::PDiskError);
+ break;
+ }
+ case Ydb::Monitoring::StatusFlag::BLUE: {
+ auto state = pbRecord.add_vdiskstateinfo();
+ state->CopyFrom(sample);
+ state->mutable_vdiskid()->set_vdisk(vslotId++);
+ state->mutable_vdiskid()->set_groupid(groupId);
+ state->set_pdiskid(100);
+ state->set_vdiskstate(NKikimrWhiteboard::EVDiskState::OK);
+ state->set_replicated(false);
+ break;
+ }
+ case Ydb::Monitoring::StatusFlag::YELLOW: {
+ auto state = pbRecord.add_vdiskstateinfo();
+ state->CopyFrom(sample);
+ state->mutable_vdiskid()->set_vdisk(vslotId++);
+ state->mutable_vdiskid()->set_groupid(groupId);
+ state->set_pdiskid(100);
+ state->set_vdiskstate(NKikimrWhiteboard::EVDiskState::SyncGuidRecovery);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+
void ListingTest(int const groupNumber, int const vdiscPerGroupNumber) {
TPortManager tp;
ui16 port = tp.GetPort(2134);
@@ -145,7 +192,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
TServer server(settings);
server.EnableGRpc(grpcPort);
TClient client(settings);
- TTestActorRuntime &runtime = *server.GetRuntime();
+ TTestActorRuntime& runtime = *server.GetRuntime();
TActorId sender = runtime.AllocateEdgeActor();
TAutoPtr<IEventHandle> handle;
@@ -214,48 +261,66 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
UNIT_ASSERT_VALUES_EQUAL(issueVdiscCount, issueVdiscNumber);
}
- Y_UNIT_TEST(IssuesGroupsListing) {
- int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE;
- ListingTest(groupNumber, 1);
- }
-
- Y_UNIT_TEST(IssuesVCardListing) {
- int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE;
- ListingTest(1, vcardNumber);
- }
-
- Y_UNIT_TEST(IssuesGroupsVCardListing) {
- int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE;
- int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE;
- ListingTest(groupNumber, vcardNumber);
- }
-
- Y_UNIT_TEST(IssuesGroupsMerging) {
- int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
- ListingTest(groupNumber, 1);
- }
-
- Y_UNIT_TEST(IssuesVCardMerging) {
- int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
- ListingTest(1, vcardNumber);
- }
+ void CheckGroupStatusDependsOnVdisks(TString erasurespecies, const Ydb::Monitoring::StatusFlag::Status expectiongGroupStatus, const TVector<Ydb::Monitoring::StatusFlag::Status>& vdiskStatuses) {
+ TPortManager tp;
+ ui16 port = tp.GetPort(2134);
+ ui16 grpcPort = tp.GetPort(2135);
+ auto settings = TServerSettings(port)
+ .SetNodeCount(2)
+ .SetUseRealThreads(false)
+ .SetDomainName("Root");
+ TServer server(settings);
+ server.EnableGRpc(grpcPort);
+ TClient client(settings);
+ TTestActorRuntime& runtime = *server.GetRuntime();
- Y_UNIT_TEST(IssuesGroupsVCardMerging) {
- int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
- int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
- ListingTest(groupNumber, vcardNumber);
- }
+ TActorId sender = runtime.AllocateEdgeActor();
+ TAutoPtr<IEventHandle> handle;
- Y_UNIT_TEST(IssuesGroupsDeleting) {
- ListingTest(100, 1);
- }
+ auto observerFunc = [&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) {
+ switch (ev->GetTypeRewrite()) {
+ case TEvSchemeShard::EvDescribeSchemeResult: {
+ auto *x = reinterpret_cast<NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr*>(&ev);
+ ChangeDescribeSchemeResult(x);
+ break;
+ }
+ case TEvBlobStorage::EvControllerSelectGroupsResult: {
+ auto *x = reinterpret_cast<TEvBlobStorage::TEvControllerSelectGroupsResult::TPtr*>(&ev);
+ AddGroupsInControllerSelectGroupsResult(x, 1);
+ break;
+ }
+ case TEvBlobStorage::EvControllerConfigResponse: {
+ auto *x = reinterpret_cast<TEvBlobStorage::TEvControllerConfigResponse::TPtr*>(&ev);
+ AddGroupVSlotInControllerConfigResponse(x, 1, vdiskStatuses.size(), erasurespecies);
+ break;
+ }
+ case NNodeWhiteboard::TEvWhiteboard::EvVDiskStateResponse: {
+ auto *x = reinterpret_cast<NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateResponse::TPtr*>(&ev);
+ AddVSlotInVDiskStateResponse(x, vdiskStatuses);
+ break;
+ }
+ case NNodeWhiteboard::TEvWhiteboard::EvPDiskStateResponse: {
+ auto *x = reinterpret_cast<NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateResponse::TPtr*>(&ev);
+ (*x)->Get()->Record.clear_pdiskstateinfo();
+ break;
+ }
+ }
- Y_UNIT_TEST(IssuesVCardDeleting) {
- ListingTest(1, 100);
- }
+ return TTestActorRuntime::EEventAction::PROCESS;
+ };
+ runtime.SetObserverFunc(observerFunc);
- Y_UNIT_TEST(IssuesGroupsVCardDeleting) {
- ListingTest(100, 100);
+ auto *request = new NHealthCheck::TEvSelfCheckRequest;
+ runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0));
+ NHealthCheck::TEvSelfCheckResult* result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle);
+ int groupIssuesCount = 0;
+ for (const auto& issue_log : result->Result.Getissue_log()) {
+ if (issue_log.type() == "STORAGE_GROUP" && issue_log.location().storage().pool().name() == "/Root:test") {
+ UNIT_ASSERT_VALUES_EQUAL((int)issue_log.status(), (int)expectiongGroupStatus);
+ groupIssuesCount++;
+ }
+ }
+ UNIT_ASSERT_VALUES_EQUAL(groupIssuesCount, 1);
}
void ChangeUsageDescribeSchemeResult(TEvSchemeShard::TEvDescribeSchemeResult::TPtr* ev, ui64 size, ui64 quota) {
@@ -280,7 +345,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
TServer server(settings);
server.EnableGRpc(grpcPort);
TClient client(settings);
- TTestActorRuntime &runtime = *server.GetRuntime();
+ TTestActorRuntime& runtime = *server.GetRuntime();
TActorId sender = runtime.AllocateEdgeActor();
TAutoPtr<IEventHandle> handle;
@@ -312,6 +377,118 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
UNIT_ASSERT_VALUES_EQUAL(storageIssuesCount, storageIssuesNumber);
}
+ Y_UNIT_TEST(IssuesGroupsListing) {
+ int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE;
+ ListingTest(groupNumber, 1);
+ }
+
+ Y_UNIT_TEST(IssuesVCardListing) {
+ int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE;
+ ListingTest(1, vcardNumber);
+ }
+
+ Y_UNIT_TEST(IssuesGroupsVCardListing) {
+ int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE;
+ int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE;
+ ListingTest(groupNumber, vcardNumber);
+ }
+
+ Y_UNIT_TEST(IssuesGroupsMerging) {
+ int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
+ ListingTest(groupNumber, 1);
+ }
+
+ Y_UNIT_TEST(IssuesVCardMerging) {
+ int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
+ ListingTest(1, vcardNumber);
+ }
+
+ Y_UNIT_TEST(IssuesGroupsVCardMerging) {
+ int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
+ int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
+ ListingTest(groupNumber, vcardNumber);
+ }
+
+ Y_UNIT_TEST(IssuesGroupsDeleting) {
+ ListingTest(100, 1);
+ }
+
+ Y_UNIT_TEST(IssuesVCardDeleting) {
+ ListingTest(1, 100);
+ }
+
+ Y_UNIT_TEST(IssuesGroupsVCardDeleting) {
+ ListingTest(100, 100);
+ }
+
+ Y_UNIT_TEST(NoneRedGroupWhenRedVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::NONE, Ydb::Monitoring::StatusFlag::RED, {Ydb::Monitoring::StatusFlag::RED});
+ }
+
+ Y_UNIT_TEST(NoneRedGroupWhenBlueVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::NONE, Ydb::Monitoring::StatusFlag::RED, {Ydb::Monitoring::StatusFlag::BLUE});
+ }
+
+ Y_UNIT_TEST(NoneYellowGroupWhenYellowVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::NONE, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::YELLOW});
+ }
+
+ Y_UNIT_TEST(Block42RedGroupWhen3RedVdisks) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::RED, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED});
+ }
+
+ Y_UNIT_TEST(Block42RedGroupWhen2RedBlueVdisks) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::RED, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::BLUE});
+ }
+
+ Y_UNIT_TEST(Block42OrangeGroupWhen2RedVdisks) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::ORANGE, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED});
+ }
+
+ Y_UNIT_TEST(Block42OrangeGroupWhenRedBlueVdisks) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::ORANGE, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::BLUE});
+ }
+
+ Y_UNIT_TEST(Block42YellowGroupWhenRedVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED});
+ }
+
+ Y_UNIT_TEST(Block42BlueGroupWhenBlueVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::BLUE, {Ydb::Monitoring::StatusFlag::BLUE});
+ }
+
+ Y_UNIT_TEST(Block42YellowGroupWhenYellowVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::YELLOW});
+ }
+
+ Y_UNIT_TEST(Mirrot3dcYellowGroupWhen3RedVdisks) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED});
+ }
+
+ Y_UNIT_TEST(Mirrot3dcYellowGroupWhen2RedBlueVdisks) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::BLUE});
+ }
+
+ Y_UNIT_TEST(Mirrot3dcYellowGroupWhen2RedVdisks) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED});
+ }
+
+ Y_UNIT_TEST(Mirrot3dcYellowGroupWhenRedBlueVdisks) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::BLUE});
+ }
+
+ Y_UNIT_TEST(Mirrot3dcYellowGroupWhenRedVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED});
+ }
+
+ Y_UNIT_TEST(Mirrot3dcBlueGroupWhenBlueVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::BLUE, {Ydb::Monitoring::StatusFlag::BLUE});
+ }
+
+ Y_UNIT_TEST(Mirrot3dcYellowGroupWhenYellowVdisk) {
+ CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::YELLOW});
+ }
+
Y_UNIT_TEST(StorageLimit95) {
StorageTest(95, 100, 1, Ydb::Monitoring::StatusFlag::RED);
}
diff --git a/ydb/core/http_proxy/custom_metrics.h b/ydb/core/http_proxy/custom_metrics.h
index 089e9190ac..a25a2989c2 100644
--- a/ydb/core/http_proxy/custom_metrics.h
+++ b/ydb/core/http_proxy/custom_metrics.h
@@ -28,12 +28,12 @@ TVector<std::pair<TString, TString>> BuildLabels(const TString& method, const TH
{"name", name}};
}
if (method.empty()) {
- return {{"database", httpContext.DatabaseName}, {"cloud_id", httpContext.CloudId},
+ return {{"database", httpContext.DatabasePath}, {"cloud_id", httpContext.CloudId},
{"folder_id", httpContext.FolderId}, {"database_id", httpContext.DatabaseId},
{"topic", httpContext.StreamName}, {"name", name}};
}
- return {{"database", httpContext.DatabaseName}, {"method", method}, {"cloud_id", httpContext.CloudId},
+ return {{"database", httpContext.DatabasePath}, {"method", method}, {"cloud_id", httpContext.CloudId},
{"folder_id", httpContext.FolderId}, {"database_id", httpContext.DatabaseId},
{"topic", httpContext.StreamName}, {"name", name}};
}
diff --git a/ydb/core/http_proxy/events.h b/ydb/core/http_proxy/events.h
index 4acb6376ed..010a18e73b 100644
--- a/ydb/core/http_proxy/events.h
+++ b/ydb/core/http_proxy/events.h
@@ -41,7 +41,6 @@ namespace NKikimr::NHttpProxy {
EvUpdateDatabasesEvent,
EvListEndpointsRequest,
EvListEndpointsResponse,
- EvError,
EvErrorWithIssue,
EvCounter,
EvHistCounter,
@@ -120,10 +119,13 @@ namespace NKikimr::NHttpProxy {
TString SerializedUserToken;
- TEvToken(const TString& serviceAccountId, const TString& iamToken, const TString& serializedUserToken = "")
+ TDatabase Database;
+
+ TEvToken(const TString& serviceAccountId, const TString& iamToken, const TString& serializedUserToken, const TDatabase& database)
: ServiceAccountId(serviceAccountId)
, IamToken(iamToken)
, SerializedUserToken(serializedUserToken)
+ , Database(database)
{}
};
@@ -131,25 +133,17 @@ namespace NKikimr::NHttpProxy {
TEvClientReady() {}
};
- struct TEvError : public TEventLocal<TEvError, EvError> {
- NYdb::EStatus Status;
- TString Response;
-
- TEvError(const NYdb::EStatus status, const TString& response)
- : Status(status)
- , Response(response)
- {}
- };
-
struct TEvErrorWithIssue : public TEventLocal<TEvErrorWithIssue, EvErrorWithIssue> {
NYdb::EStatus Status;
size_t IssueCode;
TString Response;
+ TDatabase Database;
- TEvErrorWithIssue(const NYdb::EStatus status, const TString& response, size_t issueCode=0)
+ TEvErrorWithIssue(const NYdb::EStatus status, const TString& response, const TDatabase& database, size_t issueCode)
: Status(status)
, IssueCode(issueCode)
, Response(response)
+ , Database(database)
{}
};
};
diff --git a/ydb/core/http_proxy/http_req.cpp b/ydb/core/http_proxy/http_req.cpp
index 2ca59fbb56..f67688f5da 100644
--- a/ydb/core/http_proxy/http_req.cpp
+++ b/ydb/core/http_proxy/http_req.cpp
@@ -143,17 +143,17 @@ namespace NKikimr::NHttpProxy {
}
template<class TProto>
- TString TruncateStreamName(const TProto& req, const TString& database)
+ TString TruncateStreamName(const TProto& req, const TString& databasePath)
{
constexpr bool has_stream_name = requires(const TProto& t) {
t.stream_name();
};
if constexpr (has_stream_name) {
- Y_VERIFY(req.stream_name().StartsWith(database));
- return req.stream_name().substr(database.size(), -1);
+ Y_VERIFY(req.stream_name().StartsWith(databasePath));
+ return req.stream_name().substr(databasePath.size(), -1);
}
- return ExtractStreamNameWithoutProtoField<TProto>(req).substr(database.size(), -1);
+ return ExtractStreamNameWithoutProtoField<TProto>(req).substr(databasePath.size(), -1);
}
constexpr TStringBuf IAM_HEADER = "x-yacloud-subjecttoken";
@@ -231,7 +231,6 @@ namespace NKikimr::NHttpProxy {
HFunc(TEvents::TEvWakeup, HandleTimeout);
HFunc(TEvServerlessProxy::TEvClientReady, HandleClientReady);
HFunc(TEvServerlessProxy::TEvDiscoverDatabaseEndpointResult, Handle);
- HFunc(TEvServerlessProxy::TEvError, HandleError);
HFunc(TEvServerlessProxy::TEvErrorWithIssue, HandleErrorWithIssue);
HFunc(TEvServerlessProxy::TEvGrpcRequestResult, HandleGrpcResponse);
HFunc(TEvServerlessProxy::TEvToken, HandleToken);
@@ -247,7 +246,7 @@ namespace NKikimr::NHttpProxy {
RequestState = StateAuthorization;
auto request = MakeHolder<TEvServerlessProxy::TEvDiscoverDatabaseEndpointRequest>();
- request->DatabasePath = HttpContext.DatabaseName;
+ request->DatabasePath = HttpContext.DatabasePath;
ctx.Send(MakeTenantDiscoveryID(), std::move(request));
}
@@ -256,17 +255,17 @@ namespace NKikimr::NHttpProxy {
RequestState = StateListEndpoints;
LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY,
"create client to '" << HttpContext.DiscoveryEndpoint <<
- "' database: '" << HttpContext.DatabaseName <<
+ "' database: '" << HttpContext.DatabasePath <<
"' iam token size: " << HttpContext.IamToken.size());
auto clientSettings = NYdb::TCommonClientSettings()
.DiscoveryEndpoint(HttpContext.DiscoveryEndpoint)
- .Database(HttpContext.DatabaseName)
+ .Database(HttpContext.DatabasePath)
.AuthToken(HttpContext.IamToken)
.DiscoveryMode(NYdb::EDiscoveryMode::Async);
- if (!HttpContext.DatabaseName.empty() && !HttpContext.ServiceConfig.GetTestMode()) {
- clientSettings.Database(HttpContext.DatabaseName);
+ if (!HttpContext.DatabasePath.empty() && !HttpContext.ServiceConfig.GetTestMode()) {
+ clientSettings.Database(HttpContext.DatabasePath);
}
Y_VERIFY(!Client);
Client.Reset(new TDataStreamsClient(*HttpContext.Driver, clientSettings));
@@ -285,10 +284,10 @@ namespace NKikimr::NHttpProxy {
RequestState = StateGrpcRequest;
LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY,
"sending grpc request to '" << HttpContext.DiscoveryEndpoint <<
- "' database: '" << HttpContext.DatabaseName <<
+ "' database: '" << HttpContext.DatabasePath <<
"' iam token size: " << HttpContext.IamToken.size());
- RpcFuture = NRpcService::DoLocalRpc<TRpcEv>(std::move(Request), HttpContext.DatabaseName,
+ RpcFuture = NRpcService::DoLocalRpc<TRpcEv>(std::move(Request), HttpContext.DatabasePath,
HttpContext.SerializedUserToken, ctx.ActorSystem());
RpcFuture.Subscribe([actorId = ctx.SelfID, actorSystem = ctx.ActorSystem()]
(const NThreading::TFuture<TProtoResponse>& future) {
@@ -313,7 +312,7 @@ namespace NKikimr::NHttpProxy {
RequestState = StateGrpcRequest;
LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY,
"sending grpc request to '" << HttpContext.DiscoveryEndpoint <<
- "' database: '" << HttpContext.DatabaseName <<
+ "' database: '" << HttpContext.DatabasePath <<
"' iam token size: " << HttpContext.IamToken.size());
Y_VERIFY(Client);
@@ -345,6 +344,23 @@ namespace NKikimr::NHttpProxy {
Y_UNUSED(ev, ctx);
}
+ void TryUpdateDbInfo(const TDatabase& db, const TActorContext& ctx) {
+ if (db.Path) {
+ HttpContext.DatabasePath = db.Path;
+ HttpContext.DatabaseId = db.Id;
+ HttpContext.CloudId = db.CloudId;
+ HttpContext.FolderId = db.FolderId;
+ if (ExtractStreamName<TProtoRequest>(Request).StartsWith(HttpContext.DatabasePath + "/")) {
+ HttpContext.StreamName =
+ TruncateStreamName<TProtoRequest>(Request, HttpContext.DatabasePath + "/");
+ } else {
+ HttpContext.StreamName = ExtractStreamName<TProtoRequest>(Request);
+ }
+
+ }
+ ReportInputCounters(ctx);
+ }
+
void HandleToken(TEvServerlessProxy::TEvToken::TPtr& ev, const TActorContext& ctx) {
HttpContext.ServiceAccountId = ev->Get()->ServiceAccountId;
HttpContext.IamToken = ev->Get()->IamToken;
@@ -353,15 +369,14 @@ namespace NKikimr::NHttpProxy {
if (HttpContext.Driver) {
SendYdbDriverRequest(ctx);
} else {
+ TryUpdateDbInfo(ev->Get()->Database, ctx);
SendGrpcRequestNoDriver(ctx);
}
}
- void HandleError(TEvServerlessProxy::TEvError::TPtr& ev, const TActorContext& ctx) {
- ReplyWithError(ctx, ev->Get()->Status, ev->Get()->Response);
- }
void HandleErrorWithIssue(TEvServerlessProxy::TEvErrorWithIssue::TPtr& ev, const TActorContext& ctx) {
+ TryUpdateDbInfo(ev->Get()->Database, ctx);
ReplyWithError(ctx, ev->Get()->Status, ev->Get()->Response, ev->Get()->IssueCode);
}
@@ -377,6 +392,20 @@ namespace NKikimr::NHttpProxy {
{"code", TStringBuilder() << (int)MapToException(status, Method, issueCode).second},
{"name", "api.http.errors_per_second"}}
});
+
+ ctx.Send(MakeMetricsServiceID(),
+ new TEvServerlessProxy::TEvCounter{
+ 1, true, true,
+ {{"database", HttpContext.DatabasePath},
+ {"method", Method},
+ {"cloud_id", HttpContext.CloudId},
+ {"folder_id", HttpContext.FolderId},
+ {"database_id", HttpContext.DatabaseId},
+ {"topic", HttpContext.StreamName},
+ {"code", TStringBuilder() << (int)MapToException(status, Method, issueCode).second},
+ {"name", "api.http.data_streams.response.count"}}
+ });
+
HttpContext.ResponseData.Status = status;
HttpContext.ResponseData.ErrorText = errorText;
HttpContext.DoReply(ctx, issueCode);
@@ -386,6 +415,24 @@ namespace NKikimr::NHttpProxy {
TBase::Die(ctx);
}
+ void ReportInputCounters(const TActorContext& ctx) {
+
+ if (InputCountersReported) {
+ return;
+ }
+ InputCountersReported = true;
+
+ FillInputCustomMetrics<TProtoRequest>(Request, HttpContext, ctx);
+ /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(),
+ new TEvServerlessProxy::TEvCounter{1, true, true,
+ BuildLabels(Method, HttpContext, "api.http.requests_per_second", setStreamPrefix)
+ });
+ ctx.Send(MakeMetricsServiceID(),
+ new TEvServerlessProxy::TEvCounter{1, true, true,
+ BuildLabels(Method, HttpContext, "api.http.data_streams.request.count")
+ });
+ }
+
void Handle(TEvServerlessProxy::TEvDiscoverDatabaseEndpointResult::TPtr ev,
const TActorContext& ctx) {
if (ev->Get()->DatabaseInfo) {
@@ -394,25 +441,15 @@ namespace NKikimr::NHttpProxy {
HttpContext.CloudId = db->CloudId;
HttpContext.DatabaseId = db->Id;
HttpContext.DiscoveryEndpoint = db->Endpoint;
- HttpContext.DatabaseName = db->Path;
+ HttpContext.DatabasePath = db->Path;
- if (ExtractStreamName<TProtoRequest>(Request).StartsWith(HttpContext.DatabaseName + "/")) {
+ if (ExtractStreamName<TProtoRequest>(Request).StartsWith(HttpContext.DatabasePath + "/")) {
HttpContext.StreamName =
- TruncateStreamName<TProtoRequest>(Request, HttpContext.DatabaseName + "/");
+ TruncateStreamName<TProtoRequest>(Request, HttpContext.DatabasePath + "/");
} else {
HttpContext.StreamName = ExtractStreamName<TProtoRequest>(Request);
}
-
- FillInputCustomMetrics<TProtoRequest>(Request, HttpContext, ctx);
- /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(),
- new TEvServerlessProxy::TEvCounter{1, true, true,
- BuildLabels(Method, HttpContext, "api.http.requests_per_second", setStreamPrefix)
- });
- ctx.Send(MakeMetricsServiceID(),
- new TEvServerlessProxy::TEvCounter{1, true, true,
- BuildLabels(Method, HttpContext, "api.http.data_streams.request.count")
- });
- //TODO: add api.http.request.count
+ ReportInputCounters(ctx);
CreateClient(ctx);
return;
}
@@ -440,7 +477,6 @@ namespace NKikimr::NHttpProxy {
FillOutputCustomMetrics<TProtoResult>(
*(dynamic_cast<TProtoResult*>(ev->Get()->Message.Get())), HttpContext, ctx);
ReportLatencyCounters(ctx);
-
/* deprecated metric: */ ctx.Send(MakeMetricsServiceID(),
new TEvServerlessProxy::TEvCounter{1, true, true,
BuildLabels(Method, HttpContext, "api.http.success_per_second", setStreamPrefix)
@@ -448,7 +484,7 @@ namespace NKikimr::NHttpProxy {
ctx.Send(MakeMetricsServiceID(),
new TEvServerlessProxy::TEvCounter{
1, true, true,
- {{"database", HttpContext.DatabaseName},
+ {{"database", HttpContext.DatabasePath},
{"method", Method},
{"cloud_id", HttpContext.CloudId},
{"folder_id", HttpContext.FolderId},
@@ -505,17 +541,17 @@ namespace NKikimr::NHttpProxy {
} catch (const std::exception& e) {
LOG_SP_WARN_S(ctx, NKikimrServices::HTTP_PROXY,
"got new request with incorrect json from [" << HttpContext.SourceAddress << "] " <<
- "database '" << HttpContext.DatabaseName << "'");
+ "database '" << HttpContext.DatabasePath << "'");
return ReplyWithError(ctx, NYdb::EStatus::BAD_REQUEST, e.what(), static_cast<size_t>(NYds::EErrorCodes::INVALID_ARGUMENT));
}
- if (HttpContext.DatabaseName.empty()) {
- HttpContext.DatabaseName = ExtractStreamName<TProtoRequest>(Request);
+ if (HttpContext.DatabasePath.empty()) {
+ HttpContext.DatabasePath = ExtractStreamName<TProtoRequest>(Request);
}
LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY,
"got new request from [" << HttpContext.SourceAddress << "] " <<
- "database '" << HttpContext.DatabaseName << "' " <<
+ "database '" << HttpContext.DatabasePath << "' " <<
"stream '" << ExtractStreamName<TProtoRequest>(Request) << "'");
// Use Signature or no sdk mode - then need to auth anyway
@@ -553,6 +589,7 @@ namespace NKikimr::NHttpProxy {
THolder<TDataStreamsClient> Client;
TActorId AuthActor;
+ bool InputCountersReported = false;
};
private:
@@ -652,9 +689,9 @@ namespace NKikimr::NHttpProxy {
SourceAddress = address;
}
- DatabaseName = Request->URL;
- if (DatabaseName == "/") {
- DatabaseName = "";
+ DatabasePath = Request->URL;
+ if (DatabasePath == "/") {
+ DatabasePath = "";
}
//TODO: find out databaseId
ParseHeaders(Request->Headers);
@@ -837,7 +874,7 @@ namespace NKikimr::NHttpProxy {
, ServiceConfig(context.ServiceConfig)
, IamToken(context.IamToken)
, Authorize(!context.Driver)
- , Database(context.DatabaseName)
+ , DatabasePath(context.DatabasePath)
, StreamName(context.StreamName)
{
}
@@ -864,7 +901,7 @@ namespace NKikimr::NHttpProxy {
void SendDescribeRequest(const TActorContext& ctx) {
auto schemeCacheRequest = std::make_unique<NSchemeCache::TSchemeCacheNavigate>();
NSchemeCache::TSchemeCacheNavigate::TEntry entry;
- entry.Path = NKikimr::SplitPath(Database);
+ entry.Path = NKikimr::SplitPath(DatabasePath);
entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpPath;
entry.SyncVersion = false;
schemeCacheRequest->ResultSet.emplace_back(entry);
@@ -875,7 +912,7 @@ namespace NKikimr::NHttpProxy {
const NSchemeCache::TSchemeCacheNavigate* navigate = ev->Get()->Request.Get();
if (navigate->ErrorCount) {
return ReplyWithError(
- ctx, NYdb::EStatus::SCHEME_ERROR, TStringBuilder() << "Database with path '" << Database << "' doesn't exists",
+ ctx, NYdb::EStatus::SCHEME_ERROR, TStringBuilder() << "Database with path '" << DatabasePath << "' doesn't exists",
NYds::EErrorCodes::NOT_FOUND
);
}
@@ -885,6 +922,7 @@ namespace NKikimr::NHttpProxy {
FolderId = description.GetPQTabletConfig().GetYcFolderId();
CloudId = description.GetPQTabletConfig().GetYcCloudId();
DatabaseId = description.GetPQTabletConfig().GetYdbDatabaseId();
+ DatabasePath = description.GetPQTabletConfig().GetYdbDatabasePath();
}
for (const auto& attr : navigate->ResultSet.front().Attributes) {
if (attr.first == "folder_id") FolderId = attr.second;
@@ -903,7 +941,7 @@ namespace NKikimr::NHttpProxy {
if (ev->Get()->Error) {
return ReplyWithError(ctx, NYdb::EStatus::UNAUTHORIZED, ev->Get()->Error.Message);
};
- ctx.Send(Sender, new TEvServerlessProxy::TEvToken(ev->Get()->Token->GetUserSID(), "", ev->Get()->SerializedToken));
+ ctx.Send(Sender, new TEvServerlessProxy::TEvToken(ev->Get()->Token->GetUserSID(), "", ev->Get()->SerializedToken, {"", DatabaseId, DatabasePath, CloudId, FolderId}));
LOG_SP_DEBUG_S(ctx, NKikimrServices::HTTP_PROXY, "Authorized successfully");
@@ -1035,7 +1073,7 @@ namespace NKikimr::NHttpProxy {
void ReplyWithError(const TActorContext& ctx, NYdb::EStatus status, const TString& errorText,
NYds::EErrorCodes issueCode = NYds::EErrorCodes::GENERIC_ERROR) {
- ctx.Send(Sender, new TEvServerlessProxy::TEvErrorWithIssue(status, errorText, static_cast<size_t>(issueCode)));
+ ctx.Send(Sender, new TEvServerlessProxy::TEvErrorWithIssue(status, errorText, {"", DatabaseId, DatabasePath, CloudId, FolderId}, static_cast<size_t>(issueCode)));
TBase::Die(ctx);
}
@@ -1058,7 +1096,7 @@ namespace NKikimr::NHttpProxy {
Y_VERIFY(!ev->Get()->Response.iam_token().empty());
ctx.Send(Sender,
- new TEvServerlessProxy::TEvToken(ServiceAccountId, ev->Get()->Response.iam_token()));
+ new TEvServerlessProxy::TEvToken(ServiceAccountId, ev->Get()->Response.iam_token(), "", {}));
LOG_SP_DEBUG_S(ctx, NKikimrServices::HTTP_PROXY, "IAM token generated");
@@ -1094,7 +1132,7 @@ namespace NKikimr::NHttpProxy {
TString FolderId;
TString CloudId;
TString DatabaseId;
- TString Database;
+ TString DatabasePath;
TString StreamName;
};
diff --git a/ydb/core/http_proxy/http_req.h b/ydb/core/http_proxy/http_req.h
index 60437ff3b2..6effd74297 100644
--- a/ydb/core/http_proxy/http_req.h
+++ b/ydb/core/http_proxy/http_req.h
@@ -75,7 +75,7 @@ struct THttpRequestContext {
TString ServiceAccountId;
TString RequestId;
TString DiscoveryEndpoint;
- TString DatabaseName;
+ TString DatabasePath;
TString DatabaseId; // not in context
TString FolderId; // not in context
TString CloudId; // not in context
diff --git a/ydb/core/http_proxy/http_service.cpp b/ydb/core/http_proxy/http_service.cpp
index a08d764bc5..656ec9be03 100644
--- a/ydb/core/http_proxy/http_service.cpp
+++ b/ydb/core/http_proxy/http_service.cpp
@@ -94,7 +94,7 @@ namespace NKikimr::NHttpProxy {
" incoming request from [" << context.SourceAddress << "]" <<
" request [" << context.MethodName << "]" <<
" url [" << context.Request->URL << "]" <<
- " database [" << context.DatabaseName << "]" <<
+ " database [" << context.DatabasePath << "]" <<
" requestId: " << context.RequestId);
try {
diff --git a/ydb/core/kqp/compile_service/kqp_compile_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_service.cpp
index 5388386fa8..8cd2a925c3 100644
--- a/ydb/core/kqp/compile_service/kqp_compile_service.cpp
+++ b/ydb/core/kqp/compile_service/kqp_compile_service.cpp
@@ -367,6 +367,7 @@ private:
bool enableKqpScanQueryPredicateExtract = Config.GetEnablePredicateExtractForScanQueries();
bool defaultSyntaxVersion = Config.GetSqlVersion();
+ bool enableKqpImmediateEffects = Config.GetEnableKqpImmediateEffects();
Config.Swap(event.MutableConfig()->MutableTableServiceConfig());
LOG_INFO(*TlsActivationContext, NKikimrServices::KQP_COMPILE_SERVICE, "Updated config");
@@ -380,8 +381,9 @@ private:
Config.GetEnableKqpScanQueryStreamIdxLookupJoin() != enableKqpScanQueryStreamIdxLookupJoin ||
Config.GetEnableKqpDataQuerySourceRead() != enableKqpDataQuerySourceRead ||
Config.GetEnableKqpScanQuerySourceRead() != enableKqpScanQuerySourceRead ||
- Config.GetEnablePredicateExtractForDataQueries() != enableKqpDataQueryPredicateExtract ||
- Config.GetEnablePredicateExtractForScanQueries() != enableKqpScanQueryPredicateExtract)
+ Config.GetEnablePredicateExtractForDataQueries() != enableKqpDataQueryPredicateExtract ||
+ Config.GetEnablePredicateExtractForScanQueries() != enableKqpScanQueryPredicateExtract ||
+ Config.GetEnableKqpImmediateEffects() != enableKqpImmediateEffects)
{
LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::KQP_COMPILE_SERVICE,
diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp
index 561dccef84..722b9f1d9c 100644
--- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp
+++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp
@@ -1302,6 +1302,11 @@ private:
break;
}
+ case NKqpProto::TKqpPhyTableOperation::kReadOlapRange: {
+ YQL_ENSURE(false, "The previous check did not work! Data query read does not support column shard tables." << Endl
+ << this->DebugString());
+ }
+
default: {
YQL_ENSURE(false, "Unexpected table operation: " << (ui32) op.GetTypeCase() << Endl
<< this->DebugString());
@@ -1528,6 +1533,14 @@ private:
}
}
+ if (stageInfo.Meta.IsOlap() && tx.Body->GetType() == NKqpProto::TKqpPhyTx::TYPE_DATA) {
+ auto error = TStringBuilder() << "Data manipulation queries do not support column shard tables";
+ LOG_E(error);
+ ReplyErrorAndDie(Ydb::StatusIds::PRECONDITION_FAILED,
+ YqlIssue({}, NYql::TIssuesIds::KIKIMR_PRECONDITION_FAILED, error));
+ return;
+ }
+
LOG_D("Stage " << stageInfo.Id << " AST: " << stage.GetProgramAst());
if (stage.SourcesSize() > 0) {
diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h
index 7da9ef93cc..00b3bae793 100644
--- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h
+++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h
@@ -1069,7 +1069,7 @@ protected:
}
proxy = CreateResultStreamChannelProxy(TxId, channel.Id, ResponseEv->TxResults[0].MkqlItemType,
- ResponseEv->TxResults[0].ColumnOrder, Target, Stats.get(), this->SelfId());
+ ResponseEv->TxResults[0].ColumnOrder, Target, Stats, this->SelfId());
} else {
YQL_ENSURE(channel.DstInputIndex < ResponseEv->ResultsSize());
@@ -1079,7 +1079,7 @@ protected:
return channelIt->second;
}
- proxy = CreateResultDataChannelProxy(TxId, channel.Id, Stats.get(), this->SelfId(),
+ proxy = CreateResultDataChannelProxy(TxId, channel.Id, Stats, this->SelfId(),
channel.DstInputIndex, ResponseEv.get());
}
@@ -1101,6 +1101,11 @@ protected:
if (KqpShardsResolverId) {
this->Send(KqpShardsResolverId, new TEvents::TEvPoison);
}
+
+ if (Planner) {
+ Planner->Unsubscribe();
+ }
+
if (KqpTableResolverId) {
this->Send(KqpTableResolverId, new TEvents::TEvPoison);
this->Send(this->SelfId(), new TEvents::TEvPoison);
@@ -1149,7 +1154,7 @@ protected:
const TString Database;
const TIntrusiveConstPtr<NACLib::TUserToken> UserToken;
TKqpRequestCounters::TPtr Counters;
- std::unique_ptr<TQueryExecutionStats> Stats;
+ std::shared_ptr<TQueryExecutionStats> Stats;
TInstant StartTime;
TMaybe<TInstant> Deadline;
TActorId DeadlineActor;
diff --git a/ydb/core/kqp/executer_actor/kqp_executer_stats.h b/ydb/core/kqp/executer_actor/kqp_executer_stats.h
index 4c936207bc..2b919e519f 100644
--- a/ydb/core/kqp/executer_actor/kqp_executer_stats.h
+++ b/ydb/core/kqp/executer_actor/kqp_executer_stats.h
@@ -21,8 +21,8 @@ struct TQueryExecutionStats {
// basic stats
std::unordered_set<ui64> AffectedShards;
ui32 TotalTasks = 0;
- ui64 ResultBytes = 0;
- ui64 ResultRows = 0;
+ std::atomic<ui64> ResultBytes = 0;
+ std::atomic<ui64> ResultRows = 0;
TDuration ExecuterCpuTime;
TInstant StartTs;
diff --git a/ydb/core/kqp/executer_actor/kqp_planner.cpp b/ydb/core/kqp/executer_actor/kqp_planner.cpp
index 753bc532bd..106ecdf459 100644
--- a/ydb/core/kqp/executer_actor/kqp_planner.cpp
+++ b/ydb/core/kqp/executer_actor/kqp_planner.cpp
@@ -78,7 +78,7 @@ bool TKqpPlanner::SendStartKqpTasksRequest(ui32 requestId, const TActorId& targe
LOG_D("Try to retry to another node, nodeId: " << *targetNode << ", requestId: " << requestId);
auto anotherTarget = MakeKqpNodeServiceID(*targetNode);
TlsActivationContext->Send(std::make_unique<NActors::IEventHandle>(anotherTarget, ExecuterId, ev.Release(),
- IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, requestId, nullptr, ExecuterSpan.GetTraceId()));
+ CalcSendMessageFlagsForNode(*targetNode), requestId, nullptr, ExecuterSpan.GetTraceId()));
requestData.RetryNumber++;
return true;
}
@@ -234,6 +234,13 @@ void TKqpPlanner::ProcessTasksForScanExecuter() {
}
}
+void TKqpPlanner::Unsubscribe() {
+ for(ui64 nodeId: TrackingNodes) {
+ TlsActivationContext->Send(std::make_unique<NActors::IEventHandle>(
+ TActivationContext::InterconnectProxy(nodeId), ExecuterId, new TEvents::TEvUnsubscribe()));
+ }
+}
+
void TKqpPlanner::PrepareToProcess() {
auto rmConfig = GetKqpResourceManager()->GetConfig();
diff --git a/ydb/core/kqp/executer_actor/kqp_planner.h b/ydb/core/kqp/executer_actor/kqp_planner.h
index 9c422c3903..1ea7c26722 100644
--- a/ydb/core/kqp/executer_actor/kqp_planner.h
+++ b/ydb/core/kqp/executer_actor/kqp_planner.h
@@ -34,6 +34,8 @@ public:
TVector<NKikimrKqp::TKqpNodeResources>&& resourcesSnapshot, const NKikimrConfig::TTableServiceConfig::TExecuterRetriesConfig& executerRetriesConfig);
bool SendStartKqpTasksRequest(ui32 requestId, const TActorId& target);
+ void Unsubscribe();
+
void ProcessTasksForScanExecuter();
void ProcessTasksForDataExecuter();
diff --git a/ydb/core/kqp/executer_actor/kqp_result_channel.cpp b/ydb/core/kqp/executer_actor/kqp_result_channel.cpp
index 3d83981ca7..261ef8ff48 100644
--- a/ydb/core/kqp/executer_actor/kqp_result_channel.cpp
+++ b/ydb/core/kqp/executer_actor/kqp_result_channel.cpp
@@ -20,11 +20,11 @@ public:
return NKikimrServices::TActivity::KQP_RESULT_CHANNEL_PROXY;
}
- TResultCommonChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, TActorId executer)
+ TResultCommonChannelProxy(ui64 txId, ui64 channelId, std::shared_ptr<TQueryExecutionStats> stats, TActorId executer)
: TActor(&TResultCommonChannelProxy::WorkState)
, TxId(txId)
, ChannelId(channelId)
- , Stats(stats)
+ , Stats(std::move(stats))
, Executer(executer) {}
protected:
@@ -112,7 +112,7 @@ private:
private:
const ui64 TxId;
const ui64 ChannelId;
- TQueryExecutionStats* Stats; // owned by KqpExecuter
+ std::shared_ptr<TQueryExecutionStats> Stats; // owned by KqpExecuter
const NActors::TActorId Executer;
NActors::TActorId ComputeActor;
};
@@ -120,9 +120,9 @@ private:
class TResultStreamChannelProxy : public TResultCommonChannelProxy {
public:
TResultStreamChannelProxy(ui64 txId, ui64 channelId, NKikimr::NMiniKQL::TType* itemType,
- const TVector<ui32>* columnOrder, TActorId target, TQueryExecutionStats* stats,
+ const TVector<ui32>* columnOrder, TActorId target, std::shared_ptr<TQueryExecutionStats> stats,
TActorId executer)
- : TResultCommonChannelProxy(txId, channelId, stats, executer)
+ : TResultCommonChannelProxy(txId, channelId, std::move(stats), executer)
, ColumnOrder(columnOrder)
, ItemType(itemType)
, Target(target) {}
@@ -154,9 +154,9 @@ private:
class TResultDataChannelProxy : public TResultCommonChannelProxy {
public:
- TResultDataChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, TActorId executer,
+ TResultDataChannelProxy(ui64 txId, ui64 channelId, std::shared_ptr<TQueryExecutionStats> stats, TActorId executer,
ui32 inputIndex, TEvKqpExecuter::TEvTxResponse* resultReceiver)
- : TResultCommonChannelProxy(txId, channelId, stats, executer)
+ : TResultCommonChannelProxy(txId, channelId, std::move(stats), executer)
, InputIndex(inputIndex)
, ResultReceiver(resultReceiver) {}
@@ -184,18 +184,18 @@ private:
} // anonymous namespace end
NActors::IActor* CreateResultStreamChannelProxy(ui64 txId, ui64 channelId, NKikimr::NMiniKQL::TType* itemType,
- const TVector<ui32>* columnOrder, TActorId target, TQueryExecutionStats* stats, TActorId executer)
+ const TVector<ui32>* columnOrder, TActorId target, std::shared_ptr<TQueryExecutionStats> stats, TActorId executer)
{
LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER,
"CreateResultStreamChannelProxy: TxId: " << txId <<
", channelId: " << channelId
);
- return new TResultStreamChannelProxy(txId, channelId, itemType, columnOrder, target, stats, executer);
+ return new TResultStreamChannelProxy(txId, channelId, itemType, columnOrder, target, std::move(stats), executer);
}
NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId,
- TQueryExecutionStats* stats, TActorId executer,
+ std::shared_ptr<TQueryExecutionStats> stats, TActorId executer,
ui32 inputIndex, TEvKqpExecuter::TEvTxResponse* resultsReceiver)
{
LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER,
@@ -203,7 +203,7 @@ NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId,
", channelId: " << channelId
);
- return new TResultDataChannelProxy(txId, channelId, stats, executer, inputIndex, resultsReceiver);
+ return new TResultDataChannelProxy(txId, channelId, std::move(stats), executer, inputIndex, resultsReceiver);
}
} // namespace NKqp
diff --git a/ydb/core/kqp/executer_actor/kqp_result_channel.h b/ydb/core/kqp/executer_actor/kqp_result_channel.h
index 5cc8c54fb0..6441e288c6 100644
--- a/ydb/core/kqp/executer_actor/kqp_result_channel.h
+++ b/ydb/core/kqp/executer_actor/kqp_result_channel.h
@@ -26,10 +26,10 @@ struct TQueryExecutionStats;
struct TKqpExecuterTxResult;
NActors::IActor* CreateResultStreamChannelProxy(ui64 txId, ui64 channelId, NKikimr::NMiniKQL::TType* itemType,
- const TVector<ui32>* columnOrder, NActors::TActorId target, TQueryExecutionStats* stats,
+ const TVector<ui32>* columnOrder, NActors::TActorId target, std::shared_ptr<TQueryExecutionStats> stats,
NActors::TActorId executer);
-NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats,
+NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, std::shared_ptr<TQueryExecutionStats> stats,
NActors::TActorId executer, ui32 inputIndex, TEvKqpExecuter::TEvTxResponse* receiver);
} // namespace NKikimr::NKqp
diff --git a/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp b/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp
index 6e8103fc2c..c375cfc98e 100644
--- a/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp
+++ b/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp
@@ -730,10 +730,6 @@ private:
channelPair.second->Receive(ev, TActivationContext::AsActorContext());
}
- for (auto& [shardId, nodeId] : ShardIdToNodeId) {
- Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe());
- }
-
auto totalTime = TInstant::Now() - StartTime;
Counters->Counters->ScanTxTotalTimeHistogram->Collect(totalTime.MilliSeconds());
diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp
index d8e4979926..401861e22f 100644
--- a/ydb/core/kqp/host/kqp_host.cpp
+++ b/ydb/core/kqp/host/kqp_host.cpp
@@ -810,8 +810,8 @@ private:
for (const auto& operation : queryBlock.Operations()) {
auto& tableData = SessionCtx->Tables().ExistingTable(operation.Cluster(), operation.Table());
- if (!tableData.Metadata->SysView.empty()) {
- // Always use ScanQuery for queries with system tables.
+ if (tableData.Metadata->IsOlap() || !tableData.Metadata->SysView.empty()) {
+ // Always use ScanQuery for queries with OLAP and system tables.
return true;
}
}
diff --git a/ydb/core/kqp/node_service/kqp_node_service.cpp b/ydb/core/kqp/node_service/kqp_node_service.cpp
index dd57aadc36..7f89570b5e 100644
--- a/ydb/core/kqp/node_service/kqp_node_service.cpp
+++ b/ydb/core/kqp/node_service/kqp_node_service.cpp
@@ -11,6 +11,7 @@
#include <ydb/core/kqp/compute_actor/kqp_compute_actor.h>
#include <ydb/core/kqp/rm_service/kqp_resource_estimation.h>
#include <ydb/core/kqp/rm_service/kqp_rm_service.h>
+#include <ydb/core/kqp/runtime/kqp_read_actor.h>
#include <ydb/core/kqp/common/kqp_resolve.h>
#include <ydb/core/base/wilson.h>
@@ -76,7 +77,12 @@ public:
IKqpNodeComputeActorFactory* caFactory)
: Config(config.GetResourceManager())
, Counters(counters)
- , CaFactory(caFactory) {}
+ , CaFactory(caFactory)
+ {
+ if (config.HasIteratorReadsRetrySettings()) {
+ SetIteratorReadsRetrySettings(config.GetIteratorReadsRetrySettings());
+ }
+ }
void Bootstrap() {
LOG_I("Starting KQP Node service");
@@ -444,10 +450,31 @@ private:
LOG_I("Updated table service config: " << Config.DebugString());
}
+ if (event.GetConfig().GetTableServiceConfig().HasIteratorReadsRetrySettings()) {
+ SetIteratorReadsRetrySettings(event.GetConfig().GetTableServiceConfig().GetIteratorReadsRetrySettings());
+ }
+
auto responseEv = MakeHolder<NConsole::TEvConsole::TEvConfigNotificationResponse>(event);
Send(ev->Sender, responseEv.Release(), IEventHandle::FlagTrackDelivery, ev->Cookie);
}
+ void SetIteratorReadsRetrySettings(const NKikimrConfig::TTableServiceConfig::TIteratorReadsRetrySettings& settings) {
+ auto ptr = MakeIntrusive<NKikimr::NKqp::TIteratorReadBackoffSettings>();
+ ptr->StartRetryDelay = TDuration::MilliSeconds(settings.GetStartDelayMs());
+ ptr->MaxShardAttempts = settings.GetMaxShardRetries();
+ ptr->MaxShardResolves = settings.GetMaxShardResolves();
+ ptr->UnsertaintyRatio = settings.GetUnsertaintyRatio();
+ ptr->Multiplier = settings.GetMultiplier();
+ if (settings.GetMaxTotalRetries()) {
+ ptr->MaxTotalRetries = settings.GetMaxTotalRetries();
+ }
+ if (settings.GetIteratorResponseTimeoutMs()) {
+ ptr->ReadResponseTimeout = TDuration::MilliSeconds(settings.GetIteratorResponseTimeoutMs());
+ }
+ ptr->MaxRetryDelay = TDuration::MilliSeconds(settings.GetMaxDelayMs());
+ SetReadIteratorBackoffSettings(ptr);
+ }
+
void HandleWork(TEvents::TEvUndelivered::TPtr& ev) {
switch (ev->Get()->SourceType) {
case TEvKqpNode::TEvStartKqpTasksResponse::EventType: {
diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp
index d251aad27e..066749c709 100644
--- a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp
+++ b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp
@@ -160,6 +160,21 @@ bool TTtlSettings::TryParse(const NNodes::TCoNameValueTupleList& node, TTtlSetti
}
settings.ExpireAfter = TDuration::FromValue(value);
+ } else if (name == "columnUnit") {
+ YQL_ENSURE(field.Value().Maybe<TCoAtom>());
+ auto value = field.Value().Cast<TCoAtom>().StringValue();
+ if (value == "seconds") {
+ settings.ColumnUnit = EUnit::Seconds;
+ } else if (value == "milliseconds") {
+ settings.ColumnUnit = EUnit::Milliseconds;
+ } else if (value == "microseconds") {
+ settings.ColumnUnit = EUnit::Microseconds;
+ } else if (value == "nanoseconds") {
+ settings.ColumnUnit = EUnit::Nanoseconds;
+ } else {
+ error = TStringBuilder() << "Invalid unit: " << value;
+ return false;
+ }
} else {
error = TStringBuilder() << "Unknown field: " << name;
return false;
@@ -292,8 +307,16 @@ bool ConvertReadReplicasSettingsToProto(const TString settings, Ydb::Table::Read
}
void ConvertTtlSettingsToProto(const NYql::TTtlSettings& settings, Ydb::Table::TtlSettings& proto) {
- proto.mutable_date_type_column()->set_column_name(settings.ColumnName);
- proto.mutable_date_type_column()->set_expire_after_seconds(settings.ExpireAfter.Seconds());
+ if (!settings.ColumnUnit) {
+ auto& opts = *proto.mutable_date_type_column();
+ opts.set_column_name(settings.ColumnName);
+ opts.set_expire_after_seconds(settings.ExpireAfter.Seconds());
+ } else {
+ auto& opts = *proto.mutable_value_since_unix_epoch();
+ opts.set_column_name(settings.ColumnName);
+ opts.set_column_unit(static_cast<Ydb::Table::ValueSinceUnixEpochModeSettings::Unit>(*settings.ColumnUnit));
+ opts.set_expire_after_seconds(settings.ExpireAfter.Seconds());
+ }
}
Ydb::FeatureFlag::Status GetFlagValue(const TMaybe<bool>& value) {
diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.h b/ydb/core/kqp/provider/yql_kikimr_gateway.h
index a0f5a0b3b4..3ae5ab535f 100644
--- a/ydb/core/kqp/provider/yql_kikimr_gateway.h
+++ b/ydb/core/kqp/provider/yql_kikimr_gateway.h
@@ -151,8 +151,16 @@ struct TColumnFamily {
};
struct TTtlSettings {
+ enum class EUnit: ui32 {
+ Seconds = 1,
+ Milliseconds = 2,
+ Microseconds = 3,
+ Nanoseconds = 4,
+ };
+
TString ColumnName;
TDuration ExpireAfter;
+ TMaybe<EUnit> ColumnUnit;
static bool TryParse(const NNodes::TCoNameValueTupleList& node, TTtlSettings& settings, TString& error);
};
@@ -439,6 +447,10 @@ struct TKikimrTableMetadata : public TThrRefBase {
}
return {nullptr, TIndexDescription::EIndexState::Invalid};
}
+
+ bool IsOlap() const {
+ return Kind == EKikimrTableKind::Olap;
+ }
};
struct TCreateUserSettings {
diff --git a/ydb/core/kqp/runtime/CMakeLists.darwin.txt b/ydb/core/kqp/runtime/CMakeLists.darwin.txt
index 3f0643e6e8..88cc8947ec 100644
--- a/ydb/core/kqp/runtime/CMakeLists.darwin.txt
+++ b/ydb/core/kqp/runtime/CMakeLists.darwin.txt
@@ -31,6 +31,7 @@ target_link_libraries(core-kqp-runtime PUBLIC
library-yql-utils
dq-actors-protos
yql-dq-runtime
+ cpp-threading-hot_swap
tools-enum_parser-enum_serialization_runtime
)
target_sources(core-kqp-runtime PRIVATE
diff --git a/ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt
index 7dee34ab15..7d5bf657f4 100644
--- a/ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt
@@ -32,6 +32,7 @@ target_link_libraries(core-kqp-runtime PUBLIC
library-yql-utils
dq-actors-protos
yql-dq-runtime
+ cpp-threading-hot_swap
tools-enum_parser-enum_serialization_runtime
)
target_sources(core-kqp-runtime PRIVATE
diff --git a/ydb/core/kqp/runtime/CMakeLists.linux.txt b/ydb/core/kqp/runtime/CMakeLists.linux.txt
index 7dee34ab15..7d5bf657f4 100644
--- a/ydb/core/kqp/runtime/CMakeLists.linux.txt
+++ b/ydb/core/kqp/runtime/CMakeLists.linux.txt
@@ -32,6 +32,7 @@ target_link_libraries(core-kqp-runtime PUBLIC
library-yql-utils
dq-actors-protos
yql-dq-runtime
+ cpp-threading-hot_swap
tools-enum_parser-enum_serialization_runtime
)
target_sources(core-kqp-runtime PRIVATE
diff --git a/ydb/core/kqp/runtime/kqp_read_actor.cpp b/ydb/core/kqp/runtime/kqp_read_actor.cpp
index fa9feecb0f..bfa8a24f6c 100644
--- a/ydb/core/kqp/runtime/kqp_read_actor.cpp
+++ b/ydb/core/kqp/runtime/kqp_read_actor.cpp
@@ -14,6 +14,7 @@
#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h>
+#include <library/cpp/threading/hot_swap/hot_swap.h>
#include <library/cpp/actors/core/interconnect.h>
#include <library/cpp/actors/core/actorsystem.h>
@@ -21,9 +22,6 @@
namespace {
-static constexpr ui64 MAX_SHARD_RETRIES = 5;
-static constexpr ui64 MAX_SHARD_RESOLVES = 3;
-
bool IsDebugLogEnabled(const NActors::TActorSystem* actorSystem, NActors::NLog::EComponent component) {
auto* settings = actorSystem->LoggerSettings();
return settings && settings->Satisfies(NActors::NLog::EPriority::PRI_DEBUG, component);
@@ -64,7 +62,33 @@ THolder<NKikimr::TEvDataShard::TEvReadAck> DefaultAckSettings() {
NActors::TActorId MainPipeCacheId = NKikimr::MakePipePeNodeCacheID(false);
NActors::TActorId FollowersPipeCacheId = NKikimr::MakePipePeNodeCacheID(true);
-TDuration StartRetryDelay = TDuration::MilliSeconds(250);
+struct TBackoffStorage {
+ THotSwap<NKikimr::NKqp::TIteratorReadBackoffSettings> SettingsPtr;
+
+ TBackoffStorage() {
+ SettingsPtr.AtomicStore(new NKikimr::NKqp::TIteratorReadBackoffSettings());
+ }
+};
+
+TDuration CalcDelay(size_t attempt, bool allowInstantRetry) {
+ return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->CalcShardDelay(attempt, allowInstantRetry);
+}
+
+size_t MaxShardResolves() {
+ return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->MaxShardResolves;
+}
+
+size_t MaxShardRetries() {
+ return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->MaxShardAttempts;
+}
+
+TMaybe<size_t> MaxTotalRetries() {
+ return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->MaxTotalRetries;
+}
+
+TMaybe<TDuration> ShardTimeout() {
+ return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->ReadResponseTimeout;
+}
}
@@ -441,6 +465,7 @@ public:
state.AddRange(TSerializedTableRange(range));
}
} else {
+ YQL_ENSURE(Settings.GetRanges().KeyPointsSize() > 0);
for (const auto& point : Settings.GetRanges().GetKeyPoints()) {
state.AddPoint(TSerializedCellVec(point));
}
@@ -485,7 +510,7 @@ public:
}
void ResolveShard(TShardState* state) {
- if (state->ResolveAttempt >= MAX_SHARD_RESOLVES) {
+ if (state->ResolveAttempt >= ::MaxShardResolves()) {
RuntimeError(TStringBuilder() << "Table '" << Settings.GetTable().GetTablePath() << "' resolve limit exceeded",
NDqProto::StatusIds::UNAVAILABLE);
return;
@@ -723,12 +748,23 @@ public:
}
auto state = Reads[id].Shard;
- if (state->RetryAttempt == 0 && allowInstantRetry) { // instant retry
- return DoRetryRead(id);
+
+ TotalRetries += 1;
+ auto limit = ::MaxTotalRetries();
+ if (limit && TotalRetries > *limit) {
+ return RuntimeError(TStringBuilder() << "Table '" << Settings.GetTable().GetTablePath() << "' retry limit exceeded",
+ NDqProto::StatusIds::UNAVAILABLE);
+ }
+
+ state->RetryAttempt += 1;
+ if (state->RetryAttempt > ::MaxShardRetries()) {
+ ResetRead(id);
+ return ResolveShard(state);
}
- auto delay = ::StartRetryDelay;
- for (size_t i = 0; i < state->RetryAttempt; ++i) {
- delay *= 2;
+
+ auto delay = ::CalcDelay(state->RetryAttempt, allowInstantRetry);
+ if (delay == TDuration::Zero()) {
+ return DoRetryRead(id);
}
CA_LOG_D("schedule retry #" << id << " after " << delay);
@@ -741,12 +777,6 @@ public:
}
auto state = Reads[id].Shard;
-
- state->RetryAttempt += 1;
- if (state->RetryAttempt >= MAX_SHARD_RETRIES) {
- ResetRead(id);
- return ResolveShard(state);
- }
CA_LOG_D("Retrying read #" << id);
ResetRead(id);
@@ -811,6 +841,7 @@ public:
record.SetReverse(Settings.GetReverse());
if (limit) {
record.SetMaxRows(*limit);
+ record.SetTotalRowsLimit(*limit);
}
record.SetMaxBytes(Min<ui64>(record.GetMaxBytes(), BufSize));
@@ -836,6 +867,10 @@ public:
ReadIdByTabletId[state->TabletId].push_back(id);
Send(PipeCacheId, new TEvPipeCache::TEvForward(ev.Release(), state->TabletId, true),
IEventHandle::FlagTrackDelivery);
+
+ if (auto delay = ShardTimeout()) {
+ TlsActivationContext->Schedule(*delay, new IEventHandle(SelfId(), SelfId(), new TEvRetryShard(id, Reads[id].LastSeqNo)));
+ }
}
void NotifyCA() {
@@ -1169,6 +1204,10 @@ public:
CA_LOG_D("sending ack for read #" << id << " limit " << limit << " seqno = " << record.GetSeqNo());
Send(PipeCacheId, new TEvPipeCache::TEvForward(request.Release(), Reads[id].Shard->TabletId, true),
IEventHandle::FlagTrackDelivery);
+
+ if (auto delay = ShardTimeout()) {
+ TlsActivationContext->Schedule(*delay, new IEventHandle(SelfId(), SelfId(), new TEvRetryShard(id, Reads[id].LastSeqNo)));
+ }
} else {
Reads[id].Finished = true;
}
@@ -1205,10 +1244,6 @@ public:
<< " has limit " << (Settings.GetItemsLimit() != 0)
<< " limit reached " << LimitReached());
- if (!Results.empty()) {
- NotifyCA();
- }
-
return bytes;
}
@@ -1339,6 +1374,8 @@ private:
TIntrusivePtr<TKqpCounters> Counters;
bool UseFollowers;
NActors::TActorId PipeCacheId;
+
+ size_t TotalRetries = 0;
};
@@ -1363,5 +1400,9 @@ void InterceptReadActorPipeCache(NActors::TActorId id) {
::MainPipeCacheId = id;
}
+void SetReadIteratorBackoffSettings(TIntrusivePtr<TIteratorReadBackoffSettings> ptr) {
+ Singleton<::TBackoffStorage>()->SettingsPtr.AtomicStore(ptr);
+}
+
} // namespace NKqp
} // namespace NKikimr
diff --git a/ydb/core/kqp/runtime/kqp_read_actor.h b/ydb/core/kqp/runtime/kqp_read_actor.h
index 22c4e05d5c..12da2c844c 100644
--- a/ydb/core/kqp/runtime/kqp_read_actor.h
+++ b/ydb/core/kqp/runtime/kqp_read_actor.h
@@ -12,6 +12,36 @@ class TEvReadAck;
namespace NKikimr {
namespace NKqp {
+struct TIteratorReadBackoffSettings : TAtomicRefCount<TIteratorReadBackoffSettings> {
+ TDuration StartRetryDelay = TDuration::MilliSeconds(5);
+ size_t MaxShardAttempts = 10;
+ size_t MaxShardResolves = 3;
+ double UnsertaintyRatio = 0.5;
+ double Multiplier = 2.0;
+ TDuration MaxRetryDelay = TDuration::Seconds(1);
+
+ TMaybe<size_t> MaxTotalRetries;
+ TMaybe<TDuration> ReadResponseTimeout;
+
+ TDuration CalcShardDelay(size_t attempt, bool allowInstantRetry) {
+ if (allowInstantRetry && attempt == 1) {
+ return TDuration::Zero();
+ }
+
+ auto delay = StartRetryDelay;
+ for (size_t i = 0; i < attempt; ++i) {
+ delay *= Multiplier;
+ delay = Min(delay, MaxRetryDelay);
+ }
+
+ delay *= (1 - UnsertaintyRatio * RandomNumber<double>());
+
+ return delay;
+ }
+};
+
+void SetReadIteratorBackoffSettings(TIntrusivePtr<TIteratorReadBackoffSettings>);
+
void RegisterKqpReadActor(NYql::NDq::TDqAsyncIoFactory&, TIntrusivePtr<TKqpCounters>);
void InjectRangeEvReadSettings(const NKikimrTxDataShard::TEvRead&);
diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
index 3940519fb8..94b585b4f1 100644
--- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
+++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp
@@ -761,7 +761,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
session.Close();
}
- Y_UNIT_TEST(QueryOltpAndOlap) {
+ Y_UNIT_TEST(ScanQueryOltpAndOlap) {
auto settings = TKikimrSettings()
.SetWithSampleTables(false);
TKikimrRunner kikimr(settings);
@@ -793,6 +793,36 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
}
}
+ Y_UNIT_TEST(YqlScriptOltpAndOlap) {
+ auto settings = TKikimrSettings()
+ .SetWithSampleTables(false);
+ TKikimrRunner kikimr(settings);
+
+ // EnableDebugLogging(kikimr);
+
+ TLocalHelper(kikimr).CreateTestOlapTable();
+ WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 3);
+
+ CreateSampleOltpTable(kikimr);
+
+ {
+ NScripting::TScriptingClient client(kikimr.GetDriver());
+ auto it = client.ExecuteYqlScript(R"(
+ --!syntax_v1
+
+ SELECT a.`resource_id`, a.`timestamp`, t.*
+ FROM `/Root/OltpTable` AS t
+ JOIN `/Root/olapStore/olapTable` AS a ON CAST(t.Key AS Utf8) = a.resource_id
+ ORDER BY a.`resource_id`, a.`timestamp`
+ )").GetValueSync();
+
+ UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString());
+ TString result = FormatResultSetYson(it.GetResultSet(0));
+ Cout << result << Endl;
+ CompareYson(result, R"([[[1u];["Value-001"];["1"];["1"];1000001u];[[2u];["Value-002"];["2"];["2"];1000002u]])");
+ }
+ }
+
Y_UNIT_TEST(EmptyRange) {
auto settings = TKikimrSettings()
.SetWithSampleTables(false);
@@ -4156,6 +4186,98 @@ Y_UNIT_TEST_SUITE(KqpOlap) {
TestTableWithNulls({ testCase });
}
+
+ Y_UNIT_TEST(Olap_InsertFails) {
+ auto settings = TKikimrSettings()
+ .SetWithSampleTables(false)
+ .SetEnableOlapSchemaOperations(true);
+ TKikimrRunner kikimr(settings);
+
+ EnableDebugLogging(kikimr);
+ TTableWithNullsHelper(kikimr).CreateTableWithNulls();
+
+ auto tableClient = kikimr.GetTableClient();
+
+ auto session = tableClient.CreateSession().GetValueSync().GetSession();
+
+ auto result = session.ExecuteDataQuery(R"(
+ INSERT INTO `/Root/tableWithNulls`(id, resource_id, level) VALUES(1, "1", 1);
+ )", TTxControl::BeginTx().CommitTx()).GetValueSync();
+
+ UNIT_ASSERT_C(!result.IsSuccess(), result.GetIssues().ToString());
+ }
+
+ Y_UNIT_TEST(OlapRead_FailsOnDataQuery) {
+ auto settings = TKikimrSettings()
+ .SetWithSampleTables(false)
+ .SetEnableOlapSchemaOperations(true);
+ TKikimrRunner kikimr(settings);
+
+ EnableDebugLogging(kikimr);
+ TTableWithNullsHelper(kikimr).CreateTableWithNulls();
+ TLocalHelper(kikimr).CreateTestOlapTable();
+
+ auto tableClient = kikimr.GetTableClient();
+
+ {
+ WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls");
+ WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 2);
+ }
+
+ auto session = tableClient.CreateSession().GetValueSync().GetSession();
+
+ auto result = session.ExecuteDataQuery(R"(
+ SELECT * FROM `/Root/tableWithNulls`;
+ )", TTxControl::BeginTx().CommitTx()).GetValueSync();
+
+ UNIT_ASSERT_C(!result.IsSuccess(), result.GetIssues().ToString());
+ }
+
+ Y_UNIT_TEST(OlapRead_UsesScanOnJoin) {
+ auto settings = TKikimrSettings()
+ .SetWithSampleTables(false)
+ .SetEnableOlapSchemaOperations(true);
+ TKikimrRunner kikimr(settings);
+
+ EnableDebugLogging(kikimr);
+ TTableWithNullsHelper(kikimr).CreateTableWithNulls();
+ TLocalHelper(kikimr).CreateTestOlapTable();
+
+ {
+ WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls");
+ WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 2);
+ }
+
+ NScripting::TScriptingClient client(kikimr.GetDriver());
+ auto result = client.ExecuteYqlScript(R"(
+ SELECT * FROM `/Root/olapStore/olapTable` WHERE resource_id IN (SELECT CAST(id AS Utf8) FROM `/Root/tableWithNulls`);
+ )").GetValueSync();
+
+ UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString());
+ }
+
+ Y_UNIT_TEST(OlapRead_UsesScanOnJoinWithDataShardTable) {
+ auto settings = TKikimrSettings()
+ .SetWithSampleTables(false)
+ .SetEnableOlapSchemaOperations(true);
+ TKikimrRunner kikimr(settings);
+
+ EnableDebugLogging(kikimr);
+ TTableWithNullsHelper(kikimr).CreateTableWithNulls();
+ TLocalHelper(kikimr).CreateTestOlapTable();
+
+ {
+ WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls");
+ WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 2);
+ }
+
+ NScripting::TScriptingClient client(kikimr.GetDriver());
+ auto result = client.ExecuteYqlScript(R"(
+ SELECT * FROM `/Root/olapStore/olapTable` WHERE resource_id IN (SELECT CAST(id AS Utf8) FROM `/Root/tableWithNulls`);
+ )").GetValueSync();
+
+ UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString());
+ }
}
} // namespace NKqp
diff --git a/ydb/core/kqp/ut/scan/kqp_split_ut.cpp b/ydb/core/kqp/ut/scan/kqp_split_ut.cpp
index 0cf0526973..bfae34d9d5 100644
--- a/ydb/core/kqp/ut/scan/kqp_split_ut.cpp
+++ b/ydb/core/kqp/ut/scan/kqp_split_ut.cpp
@@ -392,6 +392,13 @@ Y_UNIT_TEST_SUITE(KqpSplit) {
Runtime = Server->GetRuntime();
KqpProxy = MakeKqpProxyID(Runtime->GetNodeId(0));
+ {
+ auto settings = MakeIntrusive<TIteratorReadBackoffSettings>();
+ settings->StartRetryDelay = TDuration::MilliSeconds(250);
+ settings->MaxShardAttempts = 4;
+ SetReadIteratorBackoffSettings(settings);
+ }
+
Sender = Runtime->AllocateEdgeActor();
CollectKeysTo(&CollectedKeys, Runtime, Sender);
diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp
index 5697cd6470..cdb02bfa51 100644
--- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp
+++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp
@@ -1558,6 +1558,45 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
CreateTableWithTtlSettings(true);
}
+ void CreateTableWithTtlOnIntColumn(TValueSinceUnixEpochModeSettings::EUnit unit) {
+ TKikimrRunner kikimr;
+ auto db = kikimr.GetTableClient();
+ auto session = db.CreateSession().GetValueSync().GetSession();
+ TString tableName = "/Root/TableWithTtlSettings";
+
+ auto query = TStringBuilder() << R"(
+ --!syntax_v1
+ CREATE TABLE `)" << tableName << R"(` (
+ Key Uint64,
+ IntColumn Uint64,
+ PRIMARY KEY (Key)
+ ) WITH (
+ TTL = Interval("P1D") ON IntColumn AS )" << unit << R"(
+ ))";
+ {
+ auto result = session.ExecuteSchemeQuery(query).ExtractValueSync();
+ UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString());
+ }
+ {
+ auto result = session.DescribeTable(tableName).ExtractValueSync();
+ UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString());
+ UNIT_ASSERT_VALUES_EQUAL(result.GetTableDescription().GetTtlSettings()->GetValueSinceUnixEpoch().GetColumnUnit(), unit);
+ }
+ }
+
+ Y_UNIT_TEST(CreateTableWithTtlOnIntColumn) {
+ const auto cases = TVector<TValueSinceUnixEpochModeSettings::EUnit>{
+ TValueSinceUnixEpochModeSettings::EUnit::Seconds,
+ TValueSinceUnixEpochModeSettings::EUnit::MilliSeconds,
+ TValueSinceUnixEpochModeSettings::EUnit::MicroSeconds,
+ TValueSinceUnixEpochModeSettings::EUnit::NanoSeconds,
+ };
+
+ for (auto unit : cases) {
+ CreateTableWithTtlOnIntColumn(unit);
+ }
+ }
+
void CreateTableWithUniformPartitions(bool compat) {
TKikimrRunner kikimr;
auto db = kikimr.GetTableClient();
diff --git a/ydb/core/mind/node_broker.cpp b/ydb/core/mind/node_broker.cpp
index 829717d598..6d49f9015c 100644
--- a/ydb/core/mind/node_broker.cpp
+++ b/ydb/core/mind/node_broker.cpp
@@ -68,18 +68,9 @@ void TNodeBroker::OnTabletDead(TEvTablet::TEvTabletDead::TPtr &ev,
Die(ctx);
}
-void TNodeBroker::Enqueue(TAutoPtr<IEventHandle> &ev,
- const TActorContext &ctx)
-{
- switch (ev->GetTypeRewrite()) {
- case TEvNodeBroker::EvListNodes:
- case TEvNodeBroker::EvResolveNode:
- case TEvNodeBroker::EvRegistrationRequest:
- EnqueuedEvents.push_back(ev);
- [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME
- default:
- TTabletExecutedFlat::Enqueue(ev, ctx);
- }
+void TNodeBroker::DefaultSignalTabletActive(const TActorContext &ctx)
+{
+ Y_UNUSED(ctx);
}
bool TNodeBroker::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev,
@@ -286,13 +277,6 @@ void TNodeBroker::ScheduleEpochUpdate(const TActorContext &ctx)
}
}
-void TNodeBroker::ProcessEnqueuedEvents(const TActorContext &ctx)
-{
- for (auto &ev : EnqueuedEvents)
- Receive(ev, ctx);
- EnqueuedEvents.clear();
-}
-
void TNodeBroker::FillNodeInfo(const TNodeInfo &node,
NKikimrNodeBroker::TNodeInfo &info) const
{
diff --git a/ydb/core/mind/node_broker__load_state.cpp b/ydb/core/mind/node_broker__load_state.cpp
index 3518e14781..7cca5bea2c 100644
--- a/ydb/core/mind/node_broker__load_state.cpp
+++ b/ydb/core/mind/node_broker__load_state.cpp
@@ -40,7 +40,7 @@ public:
Self->SubscribeForConfigUpdates(ctx);
Self->ScheduleEpochUpdate(ctx);
Self->PrepareEpochCache();
- Self->ProcessEnqueuedEvents(ctx);
+ Self->SignalTabletActive(ctx);
Self->TxCompleted(this, ctx);
}
diff --git a/ydb/core/mind/node_broker_impl.h b/ydb/core/mind/node_broker_impl.h
index cbdb461832..884a06fafb 100644
--- a/ydb/core/mind/node_broker_impl.h
+++ b/ydb/core/mind/node_broker_impl.h
@@ -135,8 +135,7 @@ private:
void OnDetach(const TActorContext &ctx) override;
void OnTabletDead(TEvTablet::TEvTabletDead::TPtr &ev,
const TActorContext &ctx) override;
- void Enqueue(TAutoPtr<IEventHandle> &ev,
- const TActorContext &ctx) override;
+ void DefaultSignalTabletActive(const TActorContext &ctx) override;
bool OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev,
const TActorContext &ctx) override;
void Cleanup(const TActorContext &ctx);
@@ -220,7 +219,6 @@ private:
void ProcessDelayedListNodesRequests();
void ScheduleEpochUpdate(const TActorContext &ctx);
- void ProcessEnqueuedEvents(const TActorContext &ctx);
void FillNodeInfo(const TNodeInfo &node,
NKikimrNodeBroker::TNodeInfo &info) const;
@@ -310,7 +308,6 @@ private:
ui64 ConfigSubscriptionId;
// Events collected during initialization phase.
- TVector<TAutoPtr<IEventHandle>> EnqueuedEvents;
TMultiMap<ui64, TEvNodeBroker::TEvListNodes::TPtr> DelayedListNodesRequests;
// Transactions queue.
TTxProcessor::TPtr TxProcessor;
diff --git a/ydb/core/mon/async_http_mon.cpp b/ydb/core/mon/async_http_mon.cpp
index 7805201ee7..fa5d69165a 100644
--- a/ydb/core/mon/async_http_mon.cpp
+++ b/ydb/core/mon/async_http_mon.cpp
@@ -394,6 +394,7 @@ public:
STATEFN(StateWork) {
switch (ev->GetTypeRewrite()) {
hFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle);
+ cFunc(TEvents::TSystem::Poison, PassAway);
}
}
@@ -458,6 +459,7 @@ public:
STATEFN(StateWork) {
switch (ev->GetTypeRewrite()) {
hFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle);
+ cFunc(TEvents::TSystem::Poison, PassAway);
}
}
@@ -660,6 +662,7 @@ public:
switch (ev->GetTypeRewrite()) {
hFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle);
hFunc(TEvMon::TEvMonitoringRequest, Handle);
+ cFunc(TEvents::TSystem::Poison, PassAway);
}
}
@@ -729,7 +732,7 @@ void TAsyncHttpMon::Stop() {
IndexMonPage->ClearPages(); // it's required to avoid loop-reference
if (ActorSystem) {
TGuard<TMutex> g(Mutex);
- for (const TActorId& actorId : ActorServices) {
+ for (const auto& [path, actorId] : ActorServices) {
ActorSystem->Send(actorId, new TEvents::TEvPoisonPill);
}
ActorSystem->Send(NodeProxyServiceActorId, new TEvents::TEvPoisonPill);
@@ -752,12 +755,15 @@ NMonitoring::TIndexMonPage* TAsyncHttpMon::RegisterIndexPage(const TString& path
void TAsyncHttpMon::RegisterActorMonPage(const TActorMonPageInfo& pageInfo) {
if (ActorSystem) {
TActorMonPage* actorMonPage = static_cast<TActorMonPage*>(pageInfo.Page.Get());
- auto actorId = ActorSystem->Register(
+ auto& actorId = ActorServices[pageInfo.Path];
+ if (actorId) {
+ ActorSystem->Send(new IEventHandle(TEvents::TSystem::Poison, 0, actorId, {}, nullptr, 0));
+ }
+ actorId = ActorSystem->Register(
new THttpMonServiceLegacyActor(actorMonPage),
TMailboxType::ReadAsFilled,
ActorSystem->AppData<NKikimr::TAppData>()->UserPoolId);
ActorSystem->Send(HttpProxyActorId, new NHttp::TEvHttpProxy::TEvRegisterHandler(pageInfo.Path, actorId));
- ActorServices.push_back(actorId);
}
}
@@ -774,7 +780,9 @@ NMonitoring::IMonPage* TAsyncHttpMon::RegisterActorPage(TRegisterActorPageFields
fields.UseAuth ? Config.Authorizer : TRequestAuthorizer());
if (fields.Index) {
fields.Index->Register(page);
- fields.Index->SortPages();
+ if (fields.SortPages) {
+ fields.Index->SortPages();
+ }
} else {
Register(page.Get());
}
diff --git a/ydb/core/mon/async_http_mon.h b/ydb/core/mon/async_http_mon.h
index aff58155b4..3bed45c106 100644
--- a/ydb/core/mon/async_http_mon.h
+++ b/ydb/core/mon/async_http_mon.h
@@ -41,7 +41,7 @@ protected:
TMutex Mutex;
std::vector<TActorMonPageInfo> ActorMonPages;
- std::vector<TActorId> ActorServices;
+ THashMap<TString, TActorId> ActorServices;
void RegisterActorMonPage(const TActorMonPageInfo& pageInfo);
};
diff --git a/ydb/core/mon/mon.cpp b/ydb/core/mon/mon.cpp
index b32266d911..30874e675c 100644
--- a/ydb/core/mon/mon.cpp
+++ b/ydb/core/mon/mon.cpp
@@ -8,7 +8,7 @@ namespace NActors {
using namespace NMonitoring;
IMonPage* TMon::RegisterActorPage(TIndexMonPage* index, const TString& relPath,
- const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth) {
+ const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth, bool sortPages) {
return RegisterActorPage({
.Title = title,
.RelPath = relPath,
@@ -17,6 +17,7 @@ IMonPage* TMon::RegisterActorPage(TIndexMonPage* index, const TString& relPath,
.PreTag = preTag,
.ActorId = actorId,
.UseAuth = useAuth,
+ .SortPages = sortPages,
});
}
diff --git a/ydb/core/mon/mon.h b/ydb/core/mon/mon.h
index 8bf82ca724..ab69f0c3f8 100644
--- a/ydb/core/mon/mon.h
+++ b/ydb/core/mon/mon.h
@@ -47,11 +47,12 @@ public:
TActorId ActorId;
bool UseAuth = true;
TVector<TString> AllowedSIDs;
+ bool SortPages = true;
};
virtual NMonitoring::IMonPage* RegisterActorPage(TRegisterActorPageFields fields) = 0;
NMonitoring::IMonPage* RegisterActorPage(NMonitoring::TIndexMonPage* index, const TString& relPath,
- const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth = true);
+ const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth = true, bool sortPages = true);
virtual NMonitoring::IMonPage* RegisterCountersPage(const TString& path, const TString& title, TIntrusivePtr<::NMonitoring::TDynamicCounters> counters) = 0;
virtual NMonitoring::IMonPage* FindPage(const TString& relPath) = 0;
};
diff --git a/ydb/core/mon/sync_http_mon.cpp b/ydb/core/mon/sync_http_mon.cpp
index 6c863c55c1..8506344933 100644
--- a/ydb/core/mon/sync_http_mon.cpp
+++ b/ydb/core/mon/sync_http_mon.cpp
@@ -77,7 +77,9 @@ namespace NActors {
fields.UseAuth ? Config.Authorizer : TRequestAuthorizer());
if (fields.Index) {
fields.Index->Register(page);
- fields.Index->SortPages();
+ if (fields.SortPages) {
+ fields.Index->SortPages();
+ }
} else {
Register(page);
}
diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto
index 78ac72adc7..158dbc32f5 100644
--- a/ydb/core/protos/config.proto
+++ b/ydb/core/protos/config.proto
@@ -437,6 +437,7 @@ message TInterconnectConfig {
optional bool SuppressConnectivityCheck = 39 [default = false];
optional uint32 PreallocatedBufferSize = 40;
optional uint32 NumPreallocatedBuffers = 41;
+ optional uint32 SocketBacklogSize = 45; // SOMAXCONN if not set or zero
// ballast is added to IC handshake frames to ensure correctness of jumbo frames transmission over network
optional uint32 HandshakeBallastSize = 14;
@@ -453,6 +454,8 @@ message TInterconnectConfig {
optional NKikimrConfigUnits.TDuration ForceConfirmPeriodDuration = 27;
optional NKikimrConfigUnits.TDuration LostConnectionDuration = 28;
optional NKikimrConfigUnits.TDuration BatchPeriodDuration = 29;
+
+ optional uint32 OutgoingHandshakeInflightLimit = 43;
}
message TChannelProfileConfig {
@@ -1211,6 +1214,18 @@ message TTableServiceConfig {
optional uint32 MaxRetryNumber = 3 [default = 10];
}
+ message TIteratorReadsRetrySettings {
+ optional uint32 StartDelayMs = 1;
+ optional uint32 MaxDelayMs = 8;
+
+ optional uint32 MaxShardRetries = 2;
+ optional uint32 MaxShardResolves = 3;
+ optional double UnsertaintyRatio = 4;
+ optional double Multiplier = 5;
+ optional uint32 IteratorResponseTimeoutMs = 6;
+ optional uint32 MaxTotalRetries = 7;
+ }
+
optional uint32 QueryLimitBytes = 1;
optional uint32 ParametersLimitBytes = 2;
optional uint32 SessionsLimitPerNode = 3;
@@ -1249,6 +1264,7 @@ message TTableServiceConfig {
optional bool EnablePredicateExtractForScanQueries = 36 [default = true];
optional bool EnablePredicateExtractForDataQueries = 37 [default = true];
optional bool EnableKqpImmediateEffects = 38 [default = false];
+ optional TIteratorReadsRetrySettings IteratorReadsRetrySettings = 41;
};
// Config describes immediate controls and allows
@@ -1363,7 +1379,7 @@ message TImmediateControlsConfig {
Description: "Enables experimental persistent locked writes",
MinValue: 0,
MaxValue: 1,
- DefaultValue: 0 }];
+ DefaultValue: 1 }];
optional uint64 MaxLockedWritesPerKey = 15 [(ControlOptions) = {
Description: "Maximum number of uncommitted locked writes per key",
MinValue: 0,
@@ -1420,10 +1436,39 @@ message TImmediateControlsConfig {
DefaultValue: 0 }];
}
+ message TTCMallocControls {
+ optional uint64 ProfileSamplingRate = 1 [(ControlOptions) = {
+ Description: "Sets the sampling rate for heap profiles. TCMalloc samples approximately every rate bytes allocated.",
+ MinValue: 65536,
+ MaxValue: 4294967296,
+ DefaultValue: 2097152 }];
+ optional uint64 GuardedSamplingRate = 2 [(ControlOptions) = {
+ Description: "Sets the guarded sampling rate for sampled allocations. TCMalloc samples approximately every rate bytes allocated, subject to implementation limitations in GWP-ASan.",
+ MinValue: 65536,
+ MaxValue: 4294967296,
+ DefaultValue: 4294967296 }];
+ optional uint64 MemoryLimit = 3 [(ControlOptions) = {
+ Description: "Make a best effort attempt to prevent more than limit bytes of memory from being allocated by the system.",
+ MinValue: 0,
+ MaxValue: 9223372036854775807,
+ DefaultValue: 0 }];
+ optional uint64 PageCacheTargetSize = 4 [(ControlOptions) = {
+ Description: "Page Cache Target Size.",
+ MinValue: 0,
+ MaxValue: 137438953472,
+ DefaultValue: 536870912 }];
+ optional uint64 PageCacheReleaseRate = 5 [(ControlOptions) = {
+ Description: "Page Cache Release Rate.",
+ MinValue: 0,
+ MaxValue: 134217728,
+ DefaultValue: 8388608 }];
+ }
+
optional TDataShardControls DataShardControls = 1;
optional TTxLimitControls TxLimitControls = 2;
optional TCoordinatorControls CoordinatorControls = 3;
optional TSchemeShardControls SchemeShardControls = 4;
+ optional TTCMallocControls TCMallocControls = 5;
};
message TMeteringConfig {
diff --git a/ydb/core/protos/console_config.proto b/ydb/core/protos/console_config.proto
index 5c1d0c039d..7309041aeb 100644
--- a/ydb/core/protos/console_config.proto
+++ b/ydb/core/protos/console_config.proto
@@ -270,7 +270,8 @@ message TGetAllConfigsRequest {
}
message TGetAllConfigsResponse {
- optional Ydb.DynamicConfig.GetConfigResult Response = 1;
+ reserved 1;
+ optional Ydb.DynamicConfig.GetConfigResult Response = 2;
}
message TGetNodeLabelsRequest {
diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto
index 1ab1a3eaeb..52ad83dfe8 100644
--- a/ydb/core/protos/flat_scheme_op.proto
+++ b/ydb/core/protos/flat_scheme_op.proto
@@ -101,6 +101,10 @@ message TStorageConfig {
optional uint32 ExternalThreshold = 6;
}
+message TKeyValueStorageConfig {
+ repeated TStorageSettings Channel = 3;
+}
+
message TFamilyDescription {
optional uint32 Id = 1;
optional uint32 Room = 2; // Used by datashard, must not be used by users
@@ -1089,6 +1093,8 @@ message TCreateSolomonVolume {
optional uint64 PartitionCount = 3; // it is a mutually exclusive parametr
repeated TAdoptedPartition AdoptedPartitions = 4; // with this one
+
+ optional TKeyValueStorageConfig StorageConfig = 5;
}
message TAlterSolomonVolume {
@@ -1099,6 +1105,8 @@ message TAlterSolomonVolume {
optional uint64 PartitionCount = 3;
optional bool UpdateChannelsBinding = 4 [default = false];
+
+ optional TKeyValueStorageConfig StorageConfig = 5;
}
message TBlockStoreAssignOp {
diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto
index 7fe189aab0..1adf90944f 100644
--- a/ydb/core/protos/tx_datashard.proto
+++ b/ydb/core/protos/tx_datashard.proto
@@ -1643,6 +1643,9 @@ message TEvRead {
// When specified requests are handled in reverse order as well as range reads
optional bool Reverse = 10;
+ // Limits total number of rows which iterator can read.
+ optional uint64 TotalRowsLimit = 12;
+
// Request must contain either keys, queries or program
// mixed requests are not supported
diff --git a/ydb/core/tablet/tablet_counters_aggregator.cpp b/ydb/core/tablet/tablet_counters_aggregator.cpp
index a88826428d..8b28e47e69 100644
--- a/ydb/core/tablet/tablet_counters_aggregator.cpp
+++ b/ydb/core/tablet/tablet_counters_aggregator.cpp
@@ -315,7 +315,8 @@ public:
TTabletTypes::DataShard, CountersByTabletType);
auto hasSchemeshard = (bool)FindCountersByTabletType(
TTabletTypes::SchemeShard, CountersByTabletType);
- YdbCounters->Initialize(Counters, hasDatashard, hasSchemeshard);
+ bool hasColumnShard = static_cast<bool>(FindCountersByTabletType(TTabletTypes::ColumnShard, CountersByTabletType));
+ YdbCounters->Initialize(Counters, hasDatashard, hasSchemeshard, hasColumnShard);
YdbCounters->Transform();
}
}
@@ -758,6 +759,10 @@ private:
TCounterPtr ScanBytes;
TCounterPtr DatashardRowCount;
TCounterPtr DatashardSizeBytes;
+ TCounterPtr ColumnShardScanRows_;
+ TCounterPtr ColumnShardScanBytes_;
+ TCounterPtr ColumnShardBulkUpsertRows_;
+ TCounterPtr ColumnShardBulkUpsertBytes_;
TCounterPtr ResourcesStorageUsedBytes;
TCounterPtr ResourcesStorageLimitBytes;
TCounterPtr ResourcesStorageTableUsedBytes;
@@ -787,6 +792,11 @@ private:
TCounterPtr DbUniqueDataBytes;
THistogramPtr ConsumedCpuHistogram;
+ TCounterPtr ColumnShardScannedBytes_;
+ TCounterPtr ColumnShardScannedRows_;
+ TCounterPtr ColumnShardUpsertBlobsWritten_;
+ TCounterPtr ColumnShardUpsertBytesWritten_;
+
TCounterPtr DiskSpaceTablesTotalBytes;
TCounterPtr DiskSpaceTopicsTotalBytes;
TCounterPtr DiskSpaceSoftQuotaBytes;
@@ -826,6 +836,15 @@ private:
DatashardSizeBytes = ydbGroup->GetNamedCounter("name",
"table.datashard.size_bytes", false);
+ ColumnShardScanRows_ = ydbGroup->GetNamedCounter("name",
+ "table.columnshard.scan.rows", false);
+ ColumnShardScanBytes_ = ydbGroup->GetNamedCounter("name",
+ "table.columnshard.scan.bytes", false);
+ ColumnShardBulkUpsertRows_ = ydbGroup->GetNamedCounter("name",
+ "table.columnshard.bulk_upsert.rows", false);
+ ColumnShardBulkUpsertBytes_ = ydbGroup->GetNamedCounter("name",
+ "table.columnshard.bulk_upsert.bytes", false);
+
ResourcesStorageUsedBytes = ydbGroup->GetNamedCounter("name",
"resources.storage.used_bytes", false);
ResourcesStorageLimitBytes = ydbGroup->GetNamedCounter("name",
@@ -856,7 +875,7 @@ private:
"table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false);
};
- void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard) {
+ void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard, bool hasColumnShard) {
if (hasDatashard && !RowUpdates) {
auto datashardGroup = counters->GetSubgroup("type", "DataShard");
auto appGroup = datashardGroup->GetSubgroup("category", "app");
@@ -881,6 +900,16 @@ private:
ConsumedCpuHistogram = execGroup->FindHistogram("HIST(ConsumedCPU)");
}
+ if (hasColumnShard && !ColumnShardScannedBytes_) {
+ auto columnshardGroup = counters->GetSubgroup("type", "ColumnShard");
+ auto appGroup = columnshardGroup->GetSubgroup("category", "app");
+
+ ColumnShardScannedBytes_ = appGroup->GetCounter("ColumnShard/ScannedBytes");
+ ColumnShardScannedRows_ = appGroup->GetCounter("ColumnShard/ScannedRows");
+ ColumnShardUpsertBlobsWritten_ = appGroup->GetCounter("ColumnShard/UpsertBlobsWritten");
+ ColumnShardUpsertBytesWritten_ = appGroup->GetCounter("ColumnShard/UpsertBytesWritten");
+ }
+
if (hasSchemeshard && !DiskSpaceTablesTotalBytes) {
auto schemeshardGroup = counters->GetSubgroup("type", "SchemeShard");
auto appGroup = schemeshardGroup->GetSubgroup("category", "app");
@@ -917,6 +946,13 @@ private:
}
}
+ if (ColumnShardScannedBytes_) {
+ ColumnShardScanRows_->Set(ColumnShardScannedRows_->Val());
+ ColumnShardScanBytes_->Set(ColumnShardScannedBytes_->Val());
+ ColumnShardBulkUpsertRows_->Set(ColumnShardUpsertBlobsWritten_->Val());
+ ColumnShardBulkUpsertBytes_->Set(ColumnShardUpsertBytesWritten_->Val());
+ }
+
if (DiskSpaceTablesTotalBytes) {
ResourcesStorageLimitBytes->Set(DiskSpaceSoftQuotaBytes->Val());
ResourcesStorageTableUsedBytes->Set(DiskSpaceTablesTotalBytes->Val());
@@ -1000,7 +1036,8 @@ public:
if (YdbCounters) {
auto hasDatashard = (bool)GetCounters(TTabletTypes::DataShard);
auto hasSchemeshard = (bool)GetCounters(TTabletTypes::SchemeShard);
- YdbCounters->Initialize(SolomonCounters, hasDatashard, hasSchemeshard);
+ auto hasColumnshard = static_cast<bool>(GetCounters(TTabletTypes::ColumnShard));
+ YdbCounters->Initialize(SolomonCounters, hasDatashard, hasSchemeshard, hasColumnshard);
YdbCounters->Transform();
}
}
diff --git a/ydb/core/tablet/tablet_counters_aggregator_ut.cpp b/ydb/core/tablet/tablet_counters_aggregator_ut.cpp
index 8d9318696c..99b03f2ac2 100644
--- a/ydb/core/tablet/tablet_counters_aggregator_ut.cpp
+++ b/ydb/core/tablet/tablet_counters_aggregator_ut.cpp
@@ -91,10 +91,11 @@ void TestHeavy(const ui32 v, ui32 numWorkers) {
Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
struct TTabletWithHist {
- TTabletWithHist(ui64 tabletId)
+ TTabletWithHist(ui64 tabletId, const TTabletTypes::EType tabletType)
: TabletId(tabletId)
, TenantPathId(1113, 1001)
, CounterEventsInFlight(new TEvTabletCounters::TInFlightCookie)
+ , TabletType(tabletType)
, ExecutorCounters(new TTabletCountersBase)
{
auto simpleCount = sizeof(SimpleCountersMetaInfo) / sizeof(SimpleCountersMetaInfo[0]);
@@ -157,11 +158,11 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
}
public:
- static ::NMonitoring::TDynamicCounterPtr GetAppCounters(TTestBasicRuntime& runtime) {
+ static ::NMonitoring::TDynamicCounterPtr GetAppCounters(TTestBasicRuntime& runtime, const TTabletTypes::EType tabletType) {
::NMonitoring::TDynamicCounterPtr counters = runtime.GetAppData(0).Counters;
UNIT_ASSERT(counters);
- TString tabletTypeStr = TTabletTypes::TypeToStr(TabletType);
+ TString tabletTypeStr = TTabletTypes::TypeToStr(tabletType);
auto dsCounters = counters->GetSubgroup("counters", "tablets")->GetSubgroup("type", tabletTypeStr);
return dsCounters->GetSubgroup("category", "app");
}
@@ -185,12 +186,12 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
return StringToIndex(name, PercentileCountersMetaInfo);
}
- static NMonitoring::THistogramPtr GetHistogram(TTestBasicRuntime& runtime, const char* name) {
+ static NMonitoring::THistogramPtr GetHistogram(TTestBasicRuntime& runtime, const char* name, const TTabletTypes::EType tabletType) {
size_t index = PercentileNameToIndex(name);
- return GetAppCounters(runtime)->FindHistogram(PercentileCountersMetaInfo[index]);
+ return GetAppCounters(runtime, tabletType)->FindHistogram(PercentileCountersMetaInfo[index]);
}
- static std::vector<ui64> GetOldHistogram(TTestBasicRuntime& runtime, const char* name) {
+ static std::vector<ui64> GetOldHistogram(TTestBasicRuntime& runtime, const char* name, const TTabletTypes::EType tabletType) {
size_t index = PercentileNameToIndex(name);
auto rangesArray = RangeDefs[index].first;
auto rangeCount = RangeDefs[index].second;
@@ -200,7 +201,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
ranges.back().RangeName = "inf";
ranges.back().RangeVal = Max<ui64>();
- auto appCounters = GetAppCounters(runtime);
+ auto appCounters = GetAppCounters(runtime, tabletType);
std::vector<ui64> buckets;
for (auto i: xrange(ranges.size())) {
auto subGroup = appCounters->GetSubgroup("range", ranges[i].RangeName);
@@ -217,10 +218,12 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
TTestBasicRuntime& runtime,
const char* name,
const std::vector<ui64>& goldValuesNew,
- const std::vector<ui64>& goldValuesOld)
+ const std::vector<ui64>& goldValuesOld,
+ const TTabletTypes::EType tabletType
+ )
{
// new stype histogram
- auto histogram = TTabletWithHist::GetHistogram(runtime, name);
+ auto histogram = TTabletWithHist::GetHistogram(runtime, name, tabletType);
UNIT_ASSERT(histogram);
auto snapshot = histogram->Snapshot();
UNIT_ASSERT(snapshot);
@@ -236,7 +239,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
}
// old histogram
- auto values = TTabletWithHist::GetOldHistogram(runtime, name);
+ auto values = TTabletWithHist::GetOldHistogram(runtime, name, tabletType);
UNIT_ASSERT_VALUES_EQUAL(values.size(), goldValuesOld.size());
UNIT_ASSERT_VALUES_EQUAL(values, goldValuesOld);
}
@@ -245,6 +248,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
ui64 TabletId;
TPathId TenantPathId;
TIntrusivePtr<TEvTabletCounters::TInFlightCookie> CounterEventsInFlight;
+ const TTabletTypes::EType TabletType;
std::unique_ptr<TTabletCountersBase> ExecutorCounters;
std::unique_ptr<TTabletCountersBase> ExecutorCountersBaseline;
@@ -253,8 +257,6 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
std::unique_ptr<TTabletCountersBase> AppCountersBaseline;
public:
- static constexpr TTabletTypes::EType TabletType = TTabletTypes::DataShard;
-
static constexpr TTabletPercentileCounter::TRangeDef RangeDefs1[] = {
{0, "0"}
};
@@ -305,12 +307,12 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1);
runtime.DispatchEvents(options);
- TTabletWithHist tablet1(1);
+ TTabletWithHist tablet1(1, TTabletTypes::DataShard);
tablet1.SetSimpleCount("CountSingleBucket", 1);
tablet1.SendUpdate(runtime, aggregatorId, edge);
- TTabletWithHist tablet2(2);
+ TTabletWithHist tablet2(2, TTabletTypes::DataShard);
tablet2.SetSimpleCount("CountSingleBucket", 13);
tablet2.SendUpdate(runtime, aggregatorId, edge);
@@ -318,7 +320,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(CountSingleBucket)",
{0, 2},
- {0, 2}
+ {0, 2},
+ TTabletTypes::DataShard
);
// sanity check we didn't mess other histograms
@@ -327,21 +330,24 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0}
+ {0, 0, 0, 0, 0},
+ TTabletTypes::DataShard
);
TTabletWithHist::CheckHistogram(
runtime,
"HIST(Count)",
{2, 0, 0, 0, 0},
- {2, 0, 0, 0, 0}
+ {2, 0, 0, 0, 0},
+ TTabletTypes::DataShard
);
TTabletWithHist::CheckHistogram(
runtime,
"MyHistSingleBucket",
{0, 0},
- {0, 0}
+ {0, 0},
+ TTabletTypes::DataShard
);
}
@@ -361,7 +367,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1);
runtime.DispatchEvents(options);
- TTabletWithHist tablet1(1);
+ TTabletWithHist tablet1(1, TTabletTypes::DataShard);
tablet1.SetSimpleCount("Count", 1);
tablet1.SendUpdate(runtime, aggregatorId, edge);
@@ -370,10 +376,11 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(Count)",
{0, 1, 0, 0, 0},
- {0, 1, 0, 0, 0}
+ {0, 1, 0, 0, 0},
+ TTabletTypes::DataShard
);
- TTabletWithHist tablet2(2);
+ TTabletWithHist tablet2(2, TTabletTypes::DataShard);
tablet2.SetSimpleCount("Count", 13);
tablet2.SendUpdate(runtime, aggregatorId, edge);
@@ -381,10 +388,11 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(Count)",
{0, 1, 1, 0, 0},
- {0, 1, 1, 0, 0}
+ {0, 1, 1, 0, 0},
+ TTabletTypes::DataShard
);
- TTabletWithHist tablet3(3);
+ TTabletWithHist tablet3(3, TTabletTypes::DataShard);
tablet3.SetSimpleCount("Count", 1);
tablet3.SendUpdate(runtime, aggregatorId, edge);
@@ -392,7 +400,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(Count)",
{0, 2, 1, 0, 0},
- {0, 2, 1, 0, 0}
+ {0, 2, 1, 0, 0},
+ TTabletTypes::DataShard
);
tablet3.SetSimpleCount("Count", 13);
@@ -402,7 +411,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(Count)",
{0, 1, 2, 0, 0},
- {0, 1, 2, 0, 0}
+ {0, 1, 2, 0, 0},
+ TTabletTypes::DataShard
);
tablet3.ForgetTablet(runtime, aggregatorId, edge);
@@ -411,7 +421,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(Count)",
{0, 1, 1, 0, 0},
- {0, 1, 1, 0, 0}
+ {0, 1, 1, 0, 0},
+ TTabletTypes::DataShard
);
// sanity check we didn't mess other histograms
@@ -420,21 +431,24 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 0, 0, 0, 0},
- {0, 0, 0, 0, 0}
+ {0, 0, 0, 0, 0},
+ TTabletTypes::DataShard
);
TTabletWithHist::CheckHistogram(
runtime,
"HIST(CountSingleBucket)",
{2, 0},
- {2, 0}
+ {2, 0},
+ TTabletTypes::DataShard
);
TTabletWithHist::CheckHistogram(
runtime,
"MyHistSingleBucket",
{0, 0},
- {0, 0}
+ {0, 0},
+ TTabletTypes::DataShard
);
}
@@ -457,7 +471,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1);
runtime.DispatchEvents(options);
- TTabletWithHist tablet1(1);
+ TTabletWithHist tablet1(1, TTabletTypes::DataShard);
tablet1.SetSimpleCount("Count", Max<i64>() - 100UL);
tablet1.SendUpdate(runtime, aggregatorId, edge);
@@ -466,10 +480,11 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(Count)",
{0, 0, 0, 0, 1},
- {0, 0, 0, 0, 1}
+ {0, 0, 0, 0, 1},
+ TTabletTypes::DataShard
);
- TTabletWithHist tablet2(2);
+ TTabletWithHist tablet2(2, TTabletTypes::DataShard);
tablet2.SetSimpleCount("Count", 100);
tablet2.SendUpdate(runtime, aggregatorId, edge);
@@ -477,7 +492,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(Count)",
{0, 0, 0, 0, 2},
- {0, 0, 0, 0, 2}
+ {0, 0, 0, 0, 2},
+ TTabletTypes::DataShard
);
}
@@ -498,7 +514,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1);
runtime.DispatchEvents(options);
- TTabletWithHist tablet1(1);
+ TTabletWithHist tablet1(1, TTabletTypes::DataShard);
tablet1.UpdatePercentile("MyHist", 1);
tablet1.SendUpdate(runtime, aggregatorId, edge);
tablet1.SendUpdate(runtime, aggregatorId, edge);
@@ -507,7 +523,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 1, 0, 0, 0},
- {0, 1, 0, 0, 0}
+ {0, 1, 0, 0, 0},
+ TTabletTypes::DataShard
);
tablet1.UpdatePercentile("MyHist", 13);
@@ -518,7 +535,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 1, 1, 0, 0},
- {0, 1, 1, 0, 0}
+ {0, 1, 1, 0, 0},
+ TTabletTypes::DataShard
);
tablet1.UpdatePercentile("MyHist", 1);
@@ -531,7 +549,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 3, 1, 0, 1},
- {0, 3, 1, 0, 1}
+ {0, 3, 1, 0, 1},
+ TTabletTypes::DataShard
);
}
@@ -551,15 +570,15 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1);
runtime.DispatchEvents(options);
- TTabletWithHist tablet1(1);
+ TTabletWithHist tablet1(1, TTabletTypes::DataShard);
tablet1.UpdatePercentile("MyHist", 1);
tablet1.SendUpdate(runtime, aggregatorId, edge);
- TTabletWithHist tablet2(2);
+ TTabletWithHist tablet2(2, TTabletTypes::DataShard);
tablet2.UpdatePercentile("MyHist", 1);
tablet2.SendUpdate(runtime, aggregatorId, edge);
- TTabletWithHist tablet3(3);
+ TTabletWithHist tablet3(3, TTabletTypes::DataShard);
tablet3.UpdatePercentile("MyHist", 1);
tablet3.UpdatePercentile("MyHist", 13);
tablet3.SendUpdate(runtime, aggregatorId, edge);
@@ -568,7 +587,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 3, 1, 0, 0},
- {0, 3, 1, 0, 0}
+ {0, 3, 1, 0, 0},
+ TTabletTypes::DataShard
);
tablet3.ForgetTablet(runtime, aggregatorId, edge);
@@ -577,7 +597,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 2, 0, 0, 0},
- {0, 2, 0, 0, 0}
+ {0, 2, 0, 0, 0},
+ TTabletTypes::DataShard
);
// sanity check we didn't mess other histograms
@@ -586,21 +607,24 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"HIST(Count)",
{2, 0, 0, 0, 0},
- {2, 0, 0, 0, 0}
+ {2, 0, 0, 0, 0},
+ TTabletTypes::DataShard
);
TTabletWithHist::CheckHistogram(
runtime,
"MyHistSingleBucket",
{0, 0},
- {0, 0}
+ {0, 0},
+ TTabletTypes::DataShard
);
TTabletWithHist::CheckHistogram(
runtime,
"HIST(CountSingleBucket)",
{2, 0},
- {2, 0}
+ {2, 0},
+ TTabletTypes::DataShard
);
}
@@ -619,15 +643,15 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1);
runtime.DispatchEvents(options);
- TTabletWithHist tablet1(1);
+ TTabletWithHist tablet1(1, TTabletTypes::DataShard);
tablet1.UpdatePercentile("MyHist", 10, Max<i64>() - 100);
tablet1.SendUpdate(runtime, aggregatorId, edge);
- TTabletWithHist tablet2(2);
+ TTabletWithHist tablet2(2, TTabletTypes::DataShard);
tablet2.UpdatePercentile("MyHist", 10, 25);
tablet2.SendUpdate(runtime, aggregatorId, edge);
- TTabletWithHist tablet3(3);
+ TTabletWithHist tablet3(3, TTabletTypes::DataShard);
tablet3.UpdatePercentile("MyHist", 10, 5);
tablet3.SendUpdate(runtime, aggregatorId, edge);
@@ -636,7 +660,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 0, v, 0, 0},
- {0, 0, v, 0, 0}
+ {0, 0, v, 0, 0},
+ TTabletTypes::DataShard
);
tablet1.ForgetTablet(runtime, aggregatorId, edge);
@@ -644,7 +669,36 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) {
runtime,
"MyHist",
{0, 0, 30, 0, 0},
- {0, 0, 30, 0, 0}
+ {0, 0, 30, 0, 0},
+ TTabletTypes::DataShard
+ );
+ }
+
+ Y_UNIT_TEST(ColumnShardCounters) {
+ TTestBasicRuntime runtime(1);
+
+ runtime.Initialize(TAppPrepare().Unwrap());
+ TActorId edge = runtime.AllocateEdgeActor();
+
+ auto aggregator = CreateTabletCountersAggregator(false);
+ auto aggregatorId = runtime.Register(aggregator);
+ runtime.EnableScheduleForActor(aggregatorId);
+
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1);
+ runtime.DispatchEvents(options);
+
+ TTabletWithHist tablet1(1, TTabletTypes::ColumnShard);
+
+ tablet1.SetSimpleCount("Count", 1);
+ tablet1.SendUpdate(runtime, aggregatorId, edge);
+
+ TTabletWithHist::CheckHistogram(
+ runtime,
+ "HIST(Count)",
+ {0, 1, 0, 0, 0},
+ {0, 1, 0, 0, 0},
+ tablet1.TabletType
);
}
}
diff --git a/ydb/core/tx/columnshard/blob_manager.cpp b/ydb/core/tx/columnshard/blob_manager.cpp
index ae25078d77..115881be0f 100644
--- a/ydb/core/tx/columnshard/blob_manager.cpp
+++ b/ydb/core/tx/columnshard/blob_manager.cpp
@@ -516,7 +516,10 @@ void TBlobManager::DeleteBlob(const TUnifiedBlobId& blobId, IBlobManagerDb& db)
LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Blob " << blobId);
TLogoBlobID logoBlobId = blobId.GetLogoBlobId();
BlobsToDelete.insert(logoBlobId);
- NBlobCache::ForgetBlob(blobId);
+
+ if (!EvictedBlobs.contains(TEvictedBlob{.Blob = blobId})) {
+ NBlobCache::ForgetBlob(blobId);
+ }
} else {
LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Blob " << blobId);
BlobsToDeleteDelayed.insert(blobId.GetLogoBlobId());
@@ -602,8 +605,13 @@ bool TBlobManager::UpdateOneToOne(TEvictedBlob&& evict, IBlobManagerDb& db, bool
}
bool TBlobManager::EraseOneToOne(const TEvictedBlob& evict, IBlobManagerDb& db) {
- db.EraseEvictBlob(evict);
- return DroppedEvictedBlobs.erase(evict);
+ Y_VERIFY_DEBUG(!EvictedBlobs.contains(evict)); // erase before drop
+
+ if (DroppedEvictedBlobs.erase(evict)) {
+ db.EraseEvictBlob(evict);
+ return true;
+ }
+ return false;
}
bool TBlobManager::LoadOneToOneExport(IBlobManagerDb& db, THashSet<TUnifiedBlobId>& droppedEvicting) {
@@ -656,16 +664,26 @@ TEvictedBlob TBlobManager::GetDropped(const TUnifiedBlobId& blobId, TEvictMetada
return {};
}
-void TBlobManager::GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const {
- TString strBlobs;
- for (auto& [evict, _] : DroppedEvictedBlobs) {
+void TBlobManager::GetCleanupBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const {
+ TStringBuilder strBlobs;
+ for (auto& [evict, meta] : DroppedEvictedBlobs) {
if (evict.State != EEvictState::EVICTING) {
- strBlobs += "'" + evict.Blob.ToStringNew() + "' ";
- cleanup.insert(evict);
+ strBlobs << "'" << evict.Blob.ToStringNew() << "' ";
+ auto& tierName = meta.GetTierName();
+ tierBlobs[tierName].emplace(evict);
}
}
if (!strBlobs.empty()) {
- LOG_S_NOTICE("Cleanup evicted blobs " << strBlobs << "at tablet " << TabletInfo->TabletID);
+ LOG_S_DEBUG("Cleanup evicted blobs " << strBlobs << "at tablet " << TabletInfo->TabletID);
+ }
+}
+
+void TBlobManager::GetReexportBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const {
+ for (auto& [evict, meta] : EvictedBlobs) {
+ if (evict.State == EEvictState::EVICTING) {
+ auto& tierName = meta.GetTierName();
+ tierBlobs[tierName].emplace(evict);
+ }
}
}
@@ -704,6 +722,7 @@ void TBlobManager::SetBlobInUse(const TUnifiedBlobId& blobId, bool inUse) {
return;
}
+ LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Blob " << blobId << " is no longer in use");
BlobsUseCount.erase(useIt);
// Check if the blob is marked for delayed deletion
@@ -718,7 +737,10 @@ void TBlobManager::SetBlobInUse(const TUnifiedBlobId& blobId, bool inUse) {
if (BlobsToDeleteDelayed.erase(logoBlobId)) {
LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Delayed Blob " << blobId);
BlobsToDelete.insert(logoBlobId);
- NBlobCache::ForgetBlob(blobId);
+
+ if (!EvictedBlobs.contains(TEvictedBlob{.Blob = blobId})) {
+ NBlobCache::ForgetBlob(blobId);
+ }
}
}
}
diff --git a/ydb/core/tx/columnshard/blob_manager.h b/ydb/core/tx/columnshard/blob_manager.h
index f9922a928d..0347d36722 100644
--- a/ydb/core/tx/columnshard/blob_manager.h
+++ b/ydb/core/tx/columnshard/blob_manager.h
@@ -97,7 +97,8 @@ public:
virtual bool LoadOneToOneExport(IBlobManagerDb& db, THashSet<TUnifiedBlobId>& droppedEvicting) = 0;
virtual TEvictedBlob GetEvicted(const TUnifiedBlobId& blob, TEvictMetadata& meta) = 0;
virtual TEvictedBlob GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) = 0;
- virtual void GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const = 0;
+ virtual void GetCleanupBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const = 0;
+ virtual void GetReexportBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const = 0;
virtual bool HasExternBlobs() const = 0;
};
@@ -241,7 +242,8 @@ public:
bool LoadOneToOneExport(IBlobManagerDb& db, THashSet<TUnifiedBlobId>& droppedEvicting) override;
TEvictedBlob GetEvicted(const TUnifiedBlobId& blobId, TEvictMetadata& meta) override;
TEvictedBlob GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) override;
- void GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const override;
+ void GetCleanupBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const override;
+ void GetReexportBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const override;
bool HasExternBlobs() const override {
return EvictedBlobs.size() || DroppedEvictedBlobs.size();
diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp
index 0739271b2c..a9ec91271c 100644
--- a/ydb/core/tx/columnshard/columnshard.cpp
+++ b/ydb/core/tx/columnshard/columnshard.cpp
@@ -121,6 +121,9 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon
ScanTxInFlight.erase(txId);
SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size());
}
+
+ // Cleanup just freed dropped exported blobs
+ CleanForgottenBlobs(ctx);
}
void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx) {
diff --git a/ydb/core/tx/columnshard/columnshard__export.cpp b/ydb/core/tx/columnshard/columnshard__export.cpp
index 92ee25ad99..2d6db8dc28 100644
--- a/ydb/core/tx/columnshard/columnshard__export.cpp
+++ b/ydb/core/tx/columnshard/columnshard__export.cpp
@@ -19,7 +19,7 @@ public:
private:
TEvPrivate::TEvExport::TPtr Ev;
- THashSet<NOlap::TEvictedBlob> BlobsToForget;
+ THashMap<TString, THashSet<NOlap::TEvictedBlob>> BlobsToForget;
};
@@ -47,14 +47,8 @@ bool TTxExportFinish::Execute(TTransactionContext& txc, const TActorContext&) {
continue; // not exported
}
-#if 0 // TODO: SELF_CACHED logic
- NOlap::TEvictedBlob evict{
- .State = EEvictState::SELF_CACHED,
- .Blob = blobId,
- .ExternBlob = externId
- };
- Self->BlobManager->UpdateOneToOne(std::move(evict), blobManagerDb, dropped);
-#else
+ // TODO: SELF_CACHED logic
+
NOlap::TEvictedBlob evict{
.State = EEvictState::EXTERN,
.Blob = blobId,
@@ -75,13 +69,10 @@ bool TTxExportFinish::Execute(TTransactionContext& txc, const TActorContext&) {
evict = Self->BlobManager->GetDropped(blobId, meta);
Y_VERIFY(evict.State == EEvictState::EXTERN);
- BlobsToForget.emplace(std::move(evict));
+ BlobsToForget[meta.GetTierName()].emplace(std::move(evict));
} else {
LOG_S_ERROR("Unknown blob exported '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
}
-
- // TODO: delete not present in S3 for sure (avoid race between export and forget)
-#endif
}
}
@@ -101,28 +92,20 @@ void TTxExportFinish::Complete(const TActorContext& ctx) {
if (!BlobsToForget.empty()) {
Self->ForgetBlobs(ctx, BlobsToForget);
}
-
- Y_VERIFY(Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID());
- --Self->ActiveEvictions;
}
void TColumnShard::Handle(TEvPrivate::TEvExport::TPtr& ev, const TActorContext& ctx) {
auto& msg = *ev->Get();
auto status = msg.Status;
+ Y_VERIFY(status != NKikimrProto::UNKNOWN);
- Y_VERIFY(ActiveEvictions, "Unexpected active evictions count at tablet %lu", TabletID());
ui64 exportNo = msg.ExportNo;
auto& tierName = msg.TierName;
- ui64 pathId = msg.PathId;
- if (status == NKikimrProto::UNKNOWN) {
- LOG_S_DEBUG("Export (write): id " << exportNo << " tier '" << tierName << "' at tablet " << TabletID());
- ExportBlobs(ctx, exportNo, tierName, pathId, std::move(msg.Blobs));
- } else if (status == NKikimrProto::ERROR && msg.Blobs.empty()) {
+ if (status == NKikimrProto::ERROR && msg.Blobs.empty()) {
LOG_S_WARN("Export (fail): id " << exportNo << " tier '" << tierName << "' error: "
<< ev->Get()->SerializeErrorsToString() << "' at tablet " << TabletID());
- --ActiveEvictions;
} else {
// There's no atomicity needed here. Allow partial export
if (status == NKikimrProto::ERROR) {
diff --git a/ydb/core/tx/columnshard/columnshard__forget.cpp b/ydb/core/tx/columnshard/columnshard__forget.cpp
index 02cdbfb25b..12d6a38919 100644
--- a/ydb/core/tx/columnshard/columnshard__forget.cpp
+++ b/ydb/core/tx/columnshard/columnshard__forget.cpp
@@ -35,15 +35,17 @@ bool TTxForget::Execute(TTransactionContext& txc, const TActorContext&) {
TBlobManagerDb blobManagerDb(txc.DB);
TString strBlobs;
+ TString unknownBlobs;
for (auto& evict : msg.Evicted) {
bool erased = Self->BlobManager->EraseOneToOne(evict, blobManagerDb);
if (erased) {
strBlobs += "'" + evict.Blob.ToStringNew() + "' ";
} else {
- LOG_S_ERROR("Forget unknown blob " << evict.Blob << " at tablet " << Self->TabletID());
+ unknownBlobs += "'" + evict.Blob.ToStringNew() + "' ";
}
}
- LOG_S_NOTICE("Forget evicted blobs " << strBlobs << "at tablet " << Self->TabletID());
+ LOG_S_INFO("TTxForget forget evicted blobs " << strBlobs
+ << (unknownBlobs.size() ? ", forget unknown blobs " : "") << unknownBlobs << "at tablet " << Self->TabletID());
Self->IncCounter(COUNTER_FORGET_SUCCESS);
} else {
diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp
index 61149c87fa..dce954cb24 100644
--- a/ydb/core/tx/columnshard/columnshard__write_index.cpp
+++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp
@@ -23,18 +23,11 @@ public:
TTxType GetTxType() const override { return TXTYPE_WRITE_INDEX; }
private:
- struct TPathIdBlobs {
- THashMap<TUnifiedBlobId, TString> Blobs;
- ui64 PathId;
- TPathIdBlobs(const ui64 pathId)
- : PathId(pathId) {
-
- }
- };
+ using TPathIdBlobs = THashMap<ui64, THashSet<TUnifiedBlobId>>;
TEvPrivate::TEvWriteIndex::TPtr Ev;
THashMap<TString, TPathIdBlobs> ExportTierBlobs;
- THashSet<NOlap::TEvictedBlob> BlobsToForget;
+ THashMap<TString, THashSet<NOlap::TEvictedBlob>> BlobsToForget;
ui64 ExportNo = 0;
TBackgroundActivity TriggerActivity = TBackgroundActivity::All();
};
@@ -197,7 +190,7 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx)
auto evict = Self->BlobManager->GetDropped(blobId, meta);
Y_VERIFY(evict.State != EEvictState::UNKNOWN);
- BlobsToForget.emplace(std::move(evict));
+ BlobsToForget[meta.GetTierName()].emplace(std::move(evict));
if (NOlap::IsDeleted(evict.State)) {
LOG_S_DEBUG("Skip delete blob '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
@@ -228,21 +221,22 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx)
}
if (blobsToExport.size()) {
- size_t numBlobs = blobsToExport.size();
for (auto& [blobId, evFeatures] : blobsToExport) {
- auto it = ExportTierBlobs.find(evFeatures.TargetTierName);
- if (it == ExportTierBlobs.end()) {
- it = ExportTierBlobs.emplace(evFeatures.TargetTierName, TPathIdBlobs(evFeatures.PathId)).first;
- }
- it->second.Blobs.emplace(blobId, TString());
+ ExportTierBlobs[evFeatures.TargetTierName][evFeatures.PathId].emplace(blobId);
}
blobsToExport.clear();
- ExportNo = Self->LastExportNo + 1;
- Self->LastExportNo += ExportTierBlobs.size();
+ ui32 numExports = 0;
+ for (auto& [tierName, pathBlobs] : ExportTierBlobs) {
+ numExports += pathBlobs.size();
+ }
+
+ ExportNo = Self->LastExportNo;
+ Self->LastExportNo += numExports;
- LOG_S_DEBUG("TTxWriteIndex init export " << ExportNo << " of " << numBlobs << " blobs in "
- << ExportTierBlobs.size() << " tiers at tablet " << Self->TabletID());
+ // Do not start new TTL till we finish current tx. TODO: check if this protection needed
+ Y_VERIFY(!Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID());
+ Self->ActiveEvictions += numExports;
NIceDb::TNiceDb db(txc.DB);
Schema::SaveSpecialValue(db, Schema::EValueIds::LastExportNumber, Self->LastExportNo);
@@ -284,10 +278,6 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx)
Self->ActiveTtl = false;
//TriggerActivity = changes->NeedRepeat ? TBackgroundActivity::Ttl() : TBackgroundActivity::None();
- // Do not start new TTL till we evict current PortionsToEvict. We could evict them twice otherwise
- Y_VERIFY(!Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID());
- Self->ActiveEvictions = ExportTierBlobs.size();
-
Self->IncCounter(ok ? COUNTER_TTL_SUCCESS : COUNTER_TTL_FAIL);
Self->IncCounter(COUNTER_EVICTION_BLOBS_WRITTEN, blobsWritten);
Self->IncCounter(COUNTER_EVICTION_BYTES_WRITTEN, bytesWritten);
@@ -308,12 +298,16 @@ void TTxWriteIndex::Complete(const TActorContext& ctx) {
}
for (auto& [tierName, pathBlobs] : ExportTierBlobs) {
- Y_VERIFY(ExportNo);
- Y_VERIFY(pathBlobs.PathId);
-
- ctx.Send(Self->SelfId(),
- new TEvPrivate::TEvExport(ExportNo, tierName, pathBlobs.PathId, std::move(pathBlobs.Blobs)));
- ++ExportNo;
+ for (auto& [pathId, blobs] : pathBlobs) {
+ ++ExportNo;
+ Y_VERIFY(pathId);
+ auto event = std::make_unique<TEvPrivate::TEvExport>(ExportNo, tierName, pathId, std::move(blobs));
+ Self->ExportBlobs(ctx, std::move(event));
+ }
+ Self->ActiveEvictions -= pathBlobs.size();
+ }
+ if (ExportTierBlobs.size()) {
+ Y_VERIFY(!Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID());
}
Self->ForgetBlobs(ctx, BlobsToForget);
diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp
index b9128271bf..1d059d97fc 100644
--- a/ydb/core/tx/columnshard/columnshard_impl.cpp
+++ b/ydb/core/tx/columnshard/columnshard_impl.cpp
@@ -177,7 +177,9 @@ bool TColumnShard::WaitPlanStep(ui64 step) {
}
void TColumnShard::SendWaitPlanStep(ui64 step) {
- Send(MakeMediatorTimecastProxyID(), new TEvMediatorTimecast::TEvWaitPlanStep(TabletID(), step));
+ if (MediatorTimeCastRegistered) {
+ Send(MakeMediatorTimecastProxyID(), new TEvMediatorTimecast::TEvWaitPlanStep(TabletID(), step));
+ }
}
void TColumnShard::RescheduleWaitingReads() {
@@ -737,9 +739,7 @@ void TColumnShard::EnqueueBackgroundActivities(bool periodic, TBackgroundActivit
ctx.Send(SelfId(), event.release());
} else {
// Small cleanup (no index changes)
- THashSet<NOlap::TEvictedBlob> blobsToForget;
- BlobManager->GetCleanupBlobs(blobsToForget);
- ForgetBlobs(ctx, blobsToForget);
+ CleanForgottenBlobs(ctx);
}
}
@@ -1063,19 +1063,44 @@ void TColumnShard::MapExternBlobs(const TActorContext& /*ctx*/, NOlap::TReadMeta
}
}
-void TColumnShard::ExportBlobs(const TActorContext& ctx, ui64 exportNo, const TString& tierName, ui64 pathId,
- TEvPrivate::TEvExport::TBlobDataMap&& blobsInfo) const {
- Y_VERIFY(blobsInfo.size());
+void TColumnShard::CleanForgottenBlobs(const TActorContext& ctx) {
+ THashMap<TString, THashSet<NOlap::TEvictedBlob>> tierBlobsToForget;
+ BlobManager->GetCleanupBlobs(tierBlobsToForget);
+ ForgetBlobs(ctx, tierBlobsToForget);
+}
+
+void TColumnShard::Reexport(const TActorContext& ctx) {
+ THashMap<TString, THashSet<NOlap::TEvictedBlob>> tierBlobsToReexport;
+ BlobManager->GetReexportBlobs(tierBlobsToReexport);
+
+ ui64 exportNo = LastExportNo;
+ LastExportNo += tierBlobsToReexport.size(); // TODO: persist it?
- TString strBlobs;
- for (auto& [blobId, _] : blobsInfo) {
- strBlobs += "'" + blobId.ToStringNew() + "' ";
+ for (auto& [tierName, evictSet] : tierBlobsToReexport) {
+ ++exportNo;
+ LOG_S_INFO("Reexport " << exportNo << " at tablet " << TabletID());
+ ExportBlobs(ctx, std::make_unique<TEvPrivate::TEvExport>(exportNo, tierName, evictSet));
}
- LOG_S_NOTICE("Export blobs " << strBlobs << "at tablet " << TabletID());
+}
+void TColumnShard::ExportBlobs(const TActorContext& ctx, std::unique_ptr<TEvPrivate::TEvExport>&& event) {
+ Y_VERIFY(event);
+ Y_VERIFY(event->ExportNo);
+ Y_VERIFY(event->Blobs.size());
+ Y_VERIFY(event->SrcToDstBlobs.size() == event->Blobs.size());
+
+ const auto& tierName = event->TierName;
if (auto s3 = GetS3ActorForTier(tierName)) {
- auto event = std::make_unique<TEvPrivate::TEvExport>(exportNo, tierName, pathId, s3, std::move(blobsInfo));
- ctx.Register(CreateExportActor(TabletID(), ctx.SelfID, event.release()));
+ TStringBuilder strBlobs;
+ for (auto& [blobId, _] : event->Blobs) {
+ strBlobs << "'" << blobId.ToStringNew() << "' ";
+ }
+
+ event->DstActor = s3;
+ LOG_S_NOTICE("Export blobs " << strBlobs << "(tier '" << tierName << "') at tablet " << TabletID());
+ ctx.Register(CreateExportActor(TabletID(), SelfId(), event.release()));
+ } else {
+ LOG_S_INFO("Cannot export blobs (no S3 actor for tier '" << tierName << "') at tablet " << TabletID());
}
}
@@ -1088,32 +1113,42 @@ void TColumnShard::ForgetTierBlobs(const TActorContext& ctx, const TString& tier
}
}
-void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashSet<NOlap::TEvictedBlob>& evictedBlobs) {
- THashMap<TString, std::vector<NOlap::TEvictedBlob>> tierBlobs;
+void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashMap<TString, THashSet<NOlap::TEvictedBlob>>& evictedBlobs) {
+ TStringBuilder strBlobs;
+ TStringBuilder strBlobsDelayed;
- TString strBlobs;
- TString strBlobsDelayed;
+ for (const auto& [tierName, evictSet] : evictedBlobs) {
+ std::vector<NOlap::TEvictedBlob> tierBlobs;
- for (const auto& ev : evictedBlobs) {
- auto& blobId = ev.Blob;
- if (BlobManager->BlobInUse(blobId)) {
- LOG_S_DEBUG("Blob '" << blobId.ToStringNew() << "' in use at tablet " << TabletID());
- strBlobsDelayed += "'" + blobId.ToStringNew() + "' ";
- continue;
+ for (const auto& ev : evictSet) {
+ auto& blobId = ev.Blob;
+ if (BlobManager->BlobInUse(blobId)) {
+ LOG_S_DEBUG("Blob '" << blobId.ToStringNew() << "' is in use at tablet " << TabletID());
+ strBlobsDelayed << "'" << blobId.ToStringNew() << "' ";
+ continue;
+ }
+
+ TEvictMetadata meta;
+ auto evict = BlobManager->GetDropped(blobId, meta);
+ if (tierName != meta.GetTierName()) {
+ LOG_S_ERROR("Forget with unexpected tier name '" << meta.GetTierName() << "' at tablet " << TabletID());
+ continue;
+ }
+
+ if (evict.State == EEvictState::UNKNOWN) {
+ LOG_S_ERROR("Forget unknown blob '" << blobId.ToStringNew() << "' at tablet " << TabletID());
+ } else if (NOlap::CouldBeExported(evict.State)) {
+ Y_VERIFY(evict.Blob == blobId);
+ strBlobs << "'" << blobId.ToStringNew() << "' ";
+ tierBlobs.emplace_back(std::move(evict));
+ } else {
+ Y_VERIFY(evict.Blob == blobId);
+ strBlobsDelayed << "'" << blobId.ToStringNew() << "' ";
+ }
}
- TEvictMetadata meta;
- auto evict = BlobManager->GetDropped(blobId, meta);
-
- if (evict.State == EEvictState::UNKNOWN) {
- LOG_S_ERROR("Forget unknown blob '" << blobId.ToStringNew() << "' at tablet " << TabletID());
- } else if (NOlap::CouldBeExported(evict.State)) {
- Y_VERIFY(evict.Blob == blobId);
- strBlobs += "'" + blobId.ToStringNew() + "' ";
- tierBlobs[meta.GetTierName()].emplace_back(std::move(evict));
- } else {
- Y_VERIFY(evict.Blob == blobId);
- strBlobsDelayed += "'" + blobId.ToStringNew() + "' ";
+ if (tierBlobs.size()) {
+ ForgetTierBlobs(ctx, tierName, std::move(tierBlobs));
}
}
@@ -1123,10 +1158,6 @@ void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashSet<NOlap::T
if (strBlobsDelayed.size()) {
LOG_S_NOTICE("Forget blobs (deleyed) " << strBlobsDelayed << "at tablet " << TabletID());
}
-
- for (auto& [tierName, blobs] : tierBlobs) {
- ForgetTierBlobs(ctx, tierName, std::move(blobs));
- }
}
bool TColumnShard::GetExportedBlob(const TActorContext& ctx, TActorId dst, ui64 cookie, const TString& tierName,
@@ -1169,7 +1200,11 @@ void TColumnShard::Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr&
void TColumnShard::ActivateTiering(const ui64 pathId, const TString& useTiering) {
if (!Tiers) {
- Tiers = std::make_shared<TTiersManager>(TabletID(), SelfId());
+ Tiers = std::make_shared<TTiersManager>(TabletID(), SelfId(),
+ [this](const TActorContext& ctx){
+ CleanForgottenBlobs(ctx);
+ Reexport(ctx);
+ });
Tiers->Start(Tiers);
}
if (!!Tiers) {
diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h
index 9d015332bd..ff37a95e20 100644
--- a/ydb/core/tx/columnshard/columnshard_impl.h
+++ b/ydb/core/tx/columnshard/columnshard_impl.h
@@ -466,6 +466,7 @@ private:
void EnqueueProgressTx(const TActorContext& ctx);
void EnqueueBackgroundActivities(bool periodic = false, TBackgroundActivity activity = TBackgroundActivity::All());
+ void CleanForgottenBlobs(const TActorContext& ctx);
void UpdateSchemaSeqNo(const TMessageSeqNo& seqNo, NTabletFlatExecutor::TTransactionContext& txc);
void ProtectSchemaSeqNo(const NKikimrTxColumnShard::TSchemaSeqNo& seqNoProto, NTabletFlatExecutor::TTransactionContext& txc);
@@ -488,10 +489,10 @@ private:
NOlap::TIndexInfo ConvertSchema(const NKikimrSchemeOp::TColumnTableSchema& schema);
void MapExternBlobs(const TActorContext& ctx, NOlap::TReadMetadata& metadata);
TActorId GetS3ActorForTier(const TString& tierId) const;
- void ExportBlobs(const TActorContext& ctx, ui64 exportNo, const TString& tierName, ui64 pathId,
- TEvPrivate::TEvExport::TBlobDataMap&& blobsInfo) const;
+ void Reexport(const TActorContext& ctx);
+ void ExportBlobs(const TActorContext& ctx, std::unique_ptr<TEvPrivate::TEvExport>&& ev);
void ForgetTierBlobs(const TActorContext& ctx, const TString& tierName, std::vector<NOlap::TEvictedBlob>&& blobs) const;
- void ForgetBlobs(const TActorContext& ctx, const THashSet<NOlap::TEvictedBlob>& blobs);
+ void ForgetBlobs(const TActorContext& ctx, const THashMap<TString, THashSet<NOlap::TEvictedBlob>>& evictedBlobs);
bool GetExportedBlob(const TActorContext& ctx, TActorId dst, ui64 cookie, const TString& tierName,
NOlap::TEvictedBlob&& evicted, std::vector<NOlap::TBlobRange>&& ranges);
diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h
index b614ca9d7a..820a43962b 100644
--- a/ydb/core/tx/columnshard/columnshard_private_events.h
+++ b/ydb/core/tx/columnshard/columnshard_private_events.h
@@ -129,36 +129,42 @@ struct TEvPrivate {
NKikimrProto::EReplyStatus Status = NKikimrProto::UNKNOWN;
ui64 ExportNo = 0;
TString TierName;
- ui64 PathId = 0;
TActorId DstActor;
TBlobDataMap Blobs; // src: blobId -> data map; dst: exported blobIds set
THashMap<TUnifiedBlobId, TUnifiedBlobId> SrcToDstBlobs;
TMap<TString, TString> ErrorStrings;
- explicit TEvExport(ui64 exportNo, const TString& tierName, ui64 pathId, TBlobDataMap&& tierBlobs)
+ explicit TEvExport(ui64 exportNo, const TString& tierName, ui64 pathId,
+ const THashSet<TUnifiedBlobId>& blobIds)
: ExportNo(exportNo)
, TierName(tierName)
- , PathId(pathId)
- , Blobs(std::move(tierBlobs))
{
Y_VERIFY(ExportNo);
Y_VERIFY(!TierName.empty());
- Y_VERIFY(PathId);
- Y_VERIFY(!Blobs.empty());
+ Y_VERIFY(pathId);
+ Y_VERIFY(!blobIds.empty());
+
+ for (auto& blobId : blobIds) {
+ Blobs.emplace(blobId, TString());
+ SrcToDstBlobs[blobId] = blobId.MakeS3BlobId(pathId);
+ }
}
- TEvExport(ui64 exportNo, const TString& tierName, ui64 pathId, TActorId dstActor, TBlobDataMap&& blobs)
+ explicit TEvExport(ui64 exportNo, const TString& tierName, const THashSet<NOlap::TEvictedBlob>& evictSet)
: ExportNo(exportNo)
, TierName(tierName)
- , PathId(pathId)
- , DstActor(dstActor)
- , Blobs(std::move(blobs))
{
Y_VERIFY(ExportNo);
Y_VERIFY(!TierName.empty());
- Y_VERIFY(PathId);
- Y_VERIFY(DstActor);
- Y_VERIFY(!Blobs.empty());
+ Y_VERIFY(!evictSet.empty());
+
+ for (auto& evict : evictSet) {
+ Y_VERIFY(evict.IsEvicting());
+ Y_VERIFY(evict.ExternBlob.IsS3Blob());
+
+ Blobs.emplace(evict.Blob, TString());
+ SrcToDstBlobs[evict.Blob] = evict.ExternBlob;
+ }
}
void AddResult(const TUnifiedBlobId& blobId, const TString& key, const bool hasError, const TString& errStr) {
diff --git a/ydb/core/tx/columnshard/columnshard_ut_common.h b/ydb/core/tx/columnshard/columnshard_ut_common.h
index f7df60895d..cb5bd7dcb1 100644
--- a/ydb/core/tx/columnshard/columnshard_ut_common.h
+++ b/ydb/core/tx/columnshard/columnshard_ut_common.h
@@ -77,6 +77,30 @@ struct TTestSchema {
TtlColumn = columnName;
return *this;
}
+
+ static NKikimrSchemeOp::TS3Settings FakeS3() {
+ const TString bucket = "tiering-test-01";
+
+ NKikimrSchemeOp::TS3Settings s3Config;
+ s3Config.SetScheme(NKikimrSchemeOp::TS3Settings::HTTP);
+ s3Config.SetVerifySSL(false);
+ s3Config.SetBucket(bucket);
+//#define S3_TEST_USAGE
+#ifdef S3_TEST_USAGE
+ s3Config.SetEndpoint("storage.cloud-preprod.yandex.net");
+ s3Config.SetAccessKey("...");
+ s3Config.SetSecretKey("...");
+ s3Config.SetProxyHost("localhost");
+ s3Config.SetProxyPort(8080);
+ s3Config.SetProxyScheme(NKikimrSchemeOp::TS3Settings::HTTP);
+#else
+ s3Config.SetEndpoint("fake");
+#endif
+ s3Config.SetRequestTimeoutMs(10000);
+ s3Config.SetHttpRequestTimeoutMs(10000);
+ s3Config.SetConnectionTimeoutMs(10000);
+ return s3Config;
+ }
};
struct TTableSpecials : public TStorageTier {
diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h
index 30ab8d9f9b..9b862f1f3e 100644
--- a/ydb/core/tx/columnshard/engines/column_engine.h
+++ b/ydb/core/tx/columnshard/engines/column_engine.h
@@ -282,6 +282,25 @@ struct TColumnEngineStats {
void Clear() {
*this = {};
}
+
+ TPortionsStats& StatsByType(TPortionMeta::EProduced produced) {
+ switch (produced) {
+ case NOlap::TPortionMeta::INSERTED:
+ return Inserted;
+ case NOlap::TPortionMeta::COMPACTED:
+ return Compacted;
+ case NOlap::TPortionMeta::SPLIT_COMPACTED:
+ return SplitCompacted;
+ case NOlap::TPortionMeta::INACTIVE:
+ return Inactive;
+ case NOlap::TPortionMeta::EVICTED:
+ return Evicted;
+ case NOlap::TPortionMeta::UNSPECIFIED:
+ default:
+ break;
+ }
+ Y_VERIFY(false);
+ }
};
class IColumnEngine {
diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
index de42fde260..c1348c5f6f 100644
--- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
+++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp
@@ -442,8 +442,9 @@ const TColumnEngineStats& TColumnEngineForLogs::GetTotalStats() {
return Counters;
}
-void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType) {
- UpdatePortionStats(Counters, portionInfo, updateType);
+void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType,
+ const TPortionInfo* exPortionInfo) {
+ UpdatePortionStats(Counters, portionInfo, updateType, exPortionInfo);
ui64 granule = portionInfo.Granule();
Y_VERIFY(granule);
@@ -455,11 +456,12 @@ void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, E
stats = std::make_shared<TColumnEngineStats>();
stats->Tables = 1;
}
- UpdatePortionStats(*PathStats[pathId], portionInfo, updateType);
+ UpdatePortionStats(*PathStats[pathId], portionInfo, updateType, exPortionInfo);
}
void TColumnEngineForLogs::UpdatePortionStats(TColumnEngineStats& engineStats, const TPortionInfo& portionInfo,
- EStatsUpdateType updateType) const {
+ EStatsUpdateType updateType,
+ const TPortionInfo* exPortionInfo) const {
ui64 columnRecords = portionInfo.Records.size();
ui64 metadataBytes = 0;
THashSet<TUnifiedBlobId> blobs;
@@ -470,72 +472,76 @@ void TColumnEngineForLogs::UpdatePortionStats(TColumnEngineStats& engineStats, c
ui32 rows = portionInfo.NumRows();
ui64 rawBytes = portionInfo.RawBytesSum();
+ ui64 numBlobs = blobs.size();
ui64 bytes = 0;
for (auto& blobId : blobs) {
bytes += blobId.BlobSize();
}
+ blobs = {};
- TColumnEngineStats::TPortionsStats* srcStats = nullptr;
- switch (portionInfo.Meta.Produced) {
- case NOlap::TPortionMeta::UNSPECIFIED:
- Y_VERIFY(false); // unexpected
- case NOlap::TPortionMeta::INSERTED:
- srcStats = &engineStats.Inserted;
- break;
- case NOlap::TPortionMeta::COMPACTED:
- srcStats = &engineStats.Compacted;
- break;
- case NOlap::TPortionMeta::SPLIT_COMPACTED:
- srcStats = &engineStats.SplitCompacted;
- break;
- case NOlap::TPortionMeta::INACTIVE:
- Y_VERIFY_DEBUG(false); // Stale portions are not set INACTIVE. They have IsActive() property instead.
- srcStats = &engineStats.Inactive;
- break;
- case NOlap::TPortionMeta::EVICTED:
- srcStats = &engineStats.Evicted;
- break;
- }
- Y_VERIFY(srcStats);
- auto* stats = (updateType == EStatsUpdateType::EVICT)
- ? &engineStats.Evicted
- : (portionInfo.IsActive() ? srcStats : &engineStats.Inactive);
+ Y_VERIFY(!exPortionInfo || exPortionInfo->Meta.Produced != TPortionMeta::EProduced::UNSPECIFIED);
+ Y_VERIFY(portionInfo.Meta.Produced != TPortionMeta::EProduced::UNSPECIFIED);
- bool isErase = updateType == EStatsUpdateType::ERASE;
- bool isLoad = updateType == EStatsUpdateType::LOAD;
- bool isAppended = portionInfo.IsActive() && (updateType != EStatsUpdateType::EVICT);
+ TColumnEngineStats::TPortionsStats* srcStats = exPortionInfo
+ ? (exPortionInfo->IsActive()
+ ? &engineStats.StatsByType(exPortionInfo->Meta.Produced)
+ : &engineStats.StatsByType(TPortionMeta::EProduced::INACTIVE))
+ : &engineStats.StatsByType(portionInfo.Meta.Produced);
+ TColumnEngineStats::TPortionsStats* stats = portionInfo.IsActive()
+ ? &engineStats.StatsByType(portionInfo.Meta.Produced)
+ : &engineStats.StatsByType(TPortionMeta::EProduced::INACTIVE);
+
+ const bool isErase = updateType == EStatsUpdateType::ERASE;
+ const bool isAdd = updateType == EStatsUpdateType::ADD;
if (isErase) { // PortionsToDrop
engineStats.ColumnRecords -= columnRecords;
engineStats.ColumnMetadataBytes -= metadataBytes;
--stats->Portions;
- stats->Blobs -= blobs.size();
+ stats->Blobs -= numBlobs;
stats->Rows -= rows;
stats->Bytes -= bytes;
stats->RawBytes -= rawBytes;
- } else if (isLoad || isAppended) { // AppendedPortions
+ } else if (isAdd) { // AppendedPortions
engineStats.ColumnRecords += columnRecords;
engineStats.ColumnMetadataBytes += metadataBytes;
++stats->Portions;
- stats->Blobs += blobs.size();
+ stats->Blobs += numBlobs;
stats->Rows += rows;
stats->Bytes += bytes;
stats->RawBytes += rawBytes;
- } else { // SwitchedPortions || PortionsToEvict
+ } else if (srcStats != stats || exPortionInfo) { // SwitchedPortions || PortionsToEvict
--srcStats->Portions;
- srcStats->Blobs -= blobs.size();
- srcStats->Rows -= rows;
- srcStats->Bytes -= bytes;
- srcStats->RawBytes -= rawBytes;
+ if (exPortionInfo) {
+ blobs = {};
+ for (auto& rec : exPortionInfo->Records) {
+ blobs.insert(rec.BlobRange.BlobId);
+ }
+
+ srcStats->Rows -= exPortionInfo->NumRows();
+ srcStats->RawBytes -= exPortionInfo->RawBytesSum();
+ srcStats->Blobs -= blobs.size();
+ for (auto& blobId : blobs) {
+ srcStats->Bytes -= blobId.BlobSize();
+ }
+ blobs = {};
+ } else {
+ srcStats->Blobs -= numBlobs;
+ srcStats->Rows -= rows;
+ srcStats->Bytes -= bytes;
+ srcStats->RawBytes -= rawBytes;
+ }
++stats->Portions;
- stats->Blobs += blobs.size();
+ stats->Blobs += numBlobs;
stats->Rows += rows;
stats->Bytes += bytes;
stats->RawBytes += rawBytes;
}
+
+ Y_VERIFY_DEBUG(stats->Bytes >= 0);
}
void TColumnEngineForLogs::UpdateDefaultSchema(const TSnapshot& snapshot, TIndexInfo&& info) {
@@ -585,7 +591,7 @@ bool TColumnEngineForLogs::Load(IDbWrapper& db, THashSet<TUnifiedBlobId>& lostBl
CleanupGranules.insert(granule);
}
for (auto& [_, portionInfo] : spg->Portions) {
- UpdatePortionStats(portionInfo, EStatsUpdateType::LOAD);
+ UpdatePortionStats(portionInfo, EStatsUpdateType::ADD);
}
}
@@ -1115,7 +1121,10 @@ bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, const TChanges& changes,
}
}
- if (!UpsertPortion(portionInfo, apply)) {
+ // In case of race with eviction portion could become evicted
+ const TPortionInfo& oldInfo = Granules[granule]->Portions[portion];
+
+ if (!UpsertPortion(portionInfo, apply, &oldInfo)) {
LOG_S_ERROR("Cannot update portion " << portionInfo << " at tablet " << TabletId);
return false;
}
@@ -1152,11 +1161,7 @@ bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, const TChanges& changes,
}
Y_VERIFY(portionInfo.TierName != oldInfo.TierName);
- if (apply) {
- UpdatePortionStats(oldInfo, EStatsUpdateType::EVICT);
- }
-
- if (!UpsertPortion(portionInfo, apply, false)) {
+ if (!UpsertPortion(portionInfo, apply, &oldInfo)) {
LOG_S_ERROR("Cannot evict portion " << portionInfo << " at tablet " << TabletId);
return false;
}
@@ -1170,15 +1175,27 @@ bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, const TChanges& changes,
// Move portions in granules (zero-copy switch + append into new granules)
- for (auto& [info, granule] : changes.PortionsToMove) {
+ for (auto& [info, dstGranule] : changes.PortionsToMove) {
const auto& portionInfo = info;
+
+ ui64 granule = portionInfo.Granule();
+ ui64 portion = portionInfo.Portion();
+ if (!Granules.contains(granule) || !Granules[granule]->Portions.contains(portion)) {
+ LOG_S_ERROR("Cannot move unknown portion " << portionInfo << " at tablet " << TabletId);
+ return false;
+ }
+
+ // In case of race with eviction portion could become evicted
+ const TPortionInfo oldInfo = Granules[granule]->Portions[portion];
+
if (!ErasePortion(portionInfo, apply, false)) {
LOG_S_ERROR("Cannot erase moved portion " << portionInfo << " at tablet " << TabletId);
return false;
}
+
TPortionInfo moved = portionInfo;
- moved.SetGranule(granule);
- if (!UpsertPortion(moved, apply, false)) {
+ moved.SetGranule(dstGranule);
+ if (!UpsertPortion(moved, apply, &oldInfo)) {
LOG_S_ERROR("Cannot insert moved portion " << moved << " at tablet " << TabletId);
return false;
}
@@ -1307,7 +1324,7 @@ void TColumnEngineForLogs::EraseGranule(ui64 pathId, ui64 granule, const TMark&
PathGranules[pathId].erase(mark);
}
-bool TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, bool apply, bool updateStats) {
+bool TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, bool apply, const TPortionInfo* exInfo) {
ui64 granule = portionInfo.Granule();
if (!apply) {
@@ -1323,8 +1340,11 @@ bool TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, bool a
ui64 portion = portionInfo.Portion();
auto& spg = Granules[granule];
Y_VERIFY(spg);
- if (updateStats) {
- UpdatePortionStats(portionInfo);
+
+ if (exInfo) {
+ UpdatePortionStats(portionInfo, EStatsUpdateType::DEFAULT, exInfo);
+ } else {
+ UpdatePortionStats(portionInfo, EStatsUpdateType::ADD);
}
spg->Portions[portion] = portionInfo;
return true; // It must return true if (apply == true)
diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h
index e408c9283e..190e626b9c 100644
--- a/ydb/core/tx/columnshard/engines/column_engine_logs.h
+++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h
@@ -218,8 +218,7 @@ public:
enum class EStatsUpdateType {
DEFAULT = 0,
ERASE,
- LOAD,
- EVICT
+ ADD,
};
TColumnEngineForLogs(TIndexInfo&& info, ui64 tabletId, const TCompactionLimits& limits = {});
@@ -339,12 +338,14 @@ private:
void EraseGranule(ui64 pathId, ui64 granule, const TMark& mark);
bool SetGranule(const TGranuleRecord& rec, bool apply);
- bool UpsertPortion(const TPortionInfo& portionInfo, bool apply, bool updateStats = true);
+ bool UpsertPortion(const TPortionInfo& portionInfo, bool apply, const TPortionInfo* exInfo = nullptr);
bool ErasePortion(const TPortionInfo& portionInfo, bool apply, bool updateStats = true);
void AddColumnRecord(const TColumnRecord& row);
- void UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType = EStatsUpdateType::DEFAULT);
+ void UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType = EStatsUpdateType::DEFAULT,
+ const TPortionInfo* exPortionInfo = nullptr);
void UpdatePortionStats(TColumnEngineStats& engineStats, const TPortionInfo& portionInfo,
- EStatsUpdateType updateType) const;
+ EStatsUpdateType updateType,
+ const TPortionInfo* exPortionInfo = nullptr) const;
bool CanInsert(const TChanges& changes, const TSnapshot& commitSnap) const;
TMap<TSnapshot, TVector<ui64>> GetOrderedPortions(ui64 granule, const TSnapshot& snapshot = TSnapshot::Max()) const;
diff --git a/ydb/core/tx/columnshard/export_actor.cpp b/ydb/core/tx/columnshard/export_actor.cpp
index 8dff625bf1..87fed18189 100644
--- a/ydb/core/tx/columnshard/export_actor.cpp
+++ b/ydb/core/tx/columnshard/export_actor.cpp
@@ -32,10 +32,8 @@ public:
<< " at tablet " << TabletId << " (export)");
BlobsToRead.erase(blobId);
- Event->Status = event.Status;
- if (Event->Status == NKikimrProto::UNKNOWN) {
- Event->Status = NKikimrProto::ERROR;
- }
+ Event->AddResult(blobId, blobId.ToStringNew(), true,
+ TStringBuilder() << "cannot read, status " << NKikimrProto::EReplyStatus_Name(event.Status));
return;
}
@@ -102,9 +100,15 @@ private:
}
void SendResultAndDie(const TActorContext& ctx) {
- auto s3Actor = Event->DstActor;
- Event->DstActor = Parent;
- ctx.Send(s3Actor, Event.release());
+ if (Event->Status == NKikimrProto::UNKNOWN) {
+ auto s3Actor = Event->DstActor;
+ Event->DstActor = Parent;
+ ctx.Send(s3Actor, Event.release());
+ } else {
+ Y_VERIFY(Event->Status == NKikimrProto::ERROR);
+ Event->DstActor = Parent;
+ ctx.Send(Parent, Event.release());
+ }
Die(ctx);
}
};
diff --git a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp
index a5847ac677..928fd3ec67 100644
--- a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp
+++ b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp
@@ -1,4 +1,5 @@
#include "columnshard_ut_common.h"
+#include <ydb/core/base/tablet.h>
#include <ydb/core/wrappers/ut_helpers/s3_mock.h>
#include <ydb/core/wrappers/s3_wrapper.h>
#include <ydb/services/metadata/service.h>
@@ -168,18 +169,23 @@ bool TestCreateTable(const TString& txBody, ui64 planStep = 1000, ui64 txId = 10
return ProposeSchemaTx(runtime, sender, txBody, {++planStep, ++txId});
}
-TString GetReadResult(NKikimrTxColumnShard::TEvReadResult& resRead,
- std::optional<ui32> batchNo = 0,
- std::optional<bool> finished = true)
+enum class EExpectedResult {
+ OK_FINISHED,
+ OK,
+ ERROR
+};
+
+TString GetReadResult(NKikimrTxColumnShard::TEvReadResult& resRead, EExpectedResult expected = EExpectedResult::OK_FINISHED)
{
UNIT_ASSERT_EQUAL(resRead.GetOrigin(), TTestTxConfig::TxTablet0);
UNIT_ASSERT_EQUAL(resRead.GetTxInitiator(), TTestTxConfig::TxTablet1);
- UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS);
- if (batchNo) {
- UNIT_ASSERT_VALUES_EQUAL(resRead.GetBatch(), *batchNo);
+ if (expected == EExpectedResult::ERROR) {
+ UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::ERROR);
+ } else {
+ UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS);
}
- if (finished) {
- UNIT_ASSERT_EQUAL(resRead.GetFinished(), *finished);
+ if (expected == EExpectedResult::OK_FINISHED) {
+ UNIT_ASSERT_EQUAL(resRead.GetFinished(), true);
}
return resRead.GetData();
}
@@ -368,46 +374,68 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {},
class TCountersContainer {
private:
- ui32 SuccessCounterStart = 0;
+ struct TCounters {
+ ui32 Attempt = 0;
+ ui32 Request = 0;
+ ui32 Response = 0;
+ ui32 Success = 0;
+
+ void Clear() {
+ Attempt = 0;
+ Request = 0;
+ Response = 0;
+ Success = 0;
+ }
+
+ TString ToString() const {
+ return TStringBuilder() << Attempt << "/" << Request << "/" << Response << "/" << Success;
+ }
+ };
+
+ ui32 WaitNo = 0;
+
public:
- ui32 UnknownsCounter = 0;
- ui32 SuccessCounter = 0;
- ui32 ErrorsCounter = 0;
- ui32 ResponsesCounter = 0;
+ TCounters ExportCounters;
+ TCounters ForgetCounters;
ui32 CaptureReadEvents = 0;
std::vector<TAutoPtr<IEventHandle>> CapturedReads;
+ ui32 CaptureEvictResponse = 0;
+ ui32 CaptureForgetResponse = 0;
+ std::vector<TAutoPtr<IEventHandle>> CapturedResponses;
+ bool BlockForgets = false;
- TString SerializeToString() const {
- TStringBuilder sb;
- sb << "EXPORTS INFO: " << SuccessCounter << "/" << ErrorsCounter << "/" << UnknownsCounter << "/" << ResponsesCounter;
- return sb;
- }
-
- void WaitEvents(TTestBasicRuntime& runtime, const ui32 attemption, const ui32 expectedDeltaSuccess, const TDuration timeout) {
+ void WaitEvents(TTestBasicRuntime& runtime, const TDuration& timeout, ui32 waitExports, ui32 waitForgets,
+ const TString& promo = "START_WAITING") {
const TInstant startInstant = TAppData::TimeProvider->Now();
const TInstant deadline = startInstant + timeout;
- Cerr << "START_WAITING(" << attemption << "): " << SerializeToString() << Endl;
+ Cerr << promo << "(" << WaitNo << "): "
+ << "E" << ExportCounters.ToString() << " F" << ForgetCounters.ToString() << Endl;
while (TAppData::TimeProvider->Now() < deadline) {
- Cerr << "IN_WAITING(" << attemption << "):" << SerializeToString() << Endl;
+ Cerr << "IN_WAITING(" << WaitNo << "): "
+ << "E" << ExportCounters.ToString() << " F" << ForgetCounters.ToString() << Endl;
runtime.SimulateSleep(TDuration::Seconds(1));
- UNIT_ASSERT(ErrorsCounter == 0);
- if (expectedDeltaSuccess) {
- if (SuccessCounter >= SuccessCounterStart + expectedDeltaSuccess) {
- break;
- }
- } else {
- if (SuccessCounter > SuccessCounterStart) {
- break;
- }
+
+ if (!waitExports && ExportCounters.Success
+ || !waitForgets && ForgetCounters.Success
+ || !waitForgets && ExportCounters.Success >= waitExports
+ || !waitExports && ForgetCounters.Success >= waitForgets
+ || waitExports && waitForgets
+ && ExportCounters.Success >= waitExports && ForgetCounters.Success >= waitForgets) {
+ break;
}
}
- if (expectedDeltaSuccess) {
- UNIT_ASSERT(SuccessCounter >= SuccessCounterStart + expectedDeltaSuccess);
- } else {
- UNIT_ASSERT_VALUES_EQUAL(SuccessCounter, SuccessCounterStart);
- }
- Cerr << "FINISH_WAITING(" << attemption << "): " << SerializeToString() << Endl;
- SuccessCounterStart = SuccessCounter;
+ Cerr << "FINISH_WAITING(" << WaitNo << "): "
+ << "E" << ExportCounters.ToString() << " F" << ForgetCounters.ToString() << Endl;
+ UNIT_ASSERT_VALUES_EQUAL(ExportCounters.Success, waitExports);
+ UNIT_ASSERT_VALUES_EQUAL(ForgetCounters.Success, waitForgets);
+ ExportCounters.Clear();
+ ForgetCounters.Clear();
+ ++WaitNo;
+ }
+
+ void WaitMoreEvents(TTestBasicRuntime& runtime, const TDuration& timeout, ui32 waitExports, ui32 waitForgets) {
+ --WaitNo;
+ WaitEvents(runtime, timeout, waitExports, waitForgets, "CONTINUE_WAITING");
}
void WaitReadsCaptured(TTestBasicRuntime& runtime) const {
@@ -429,6 +457,18 @@ public:
}
CapturedReads.clear();
}
+
+ void ResendCapturedResponses(TTestBasicRuntime& runtime) {
+ for (auto& cev : CapturedResponses) {
+ Cerr << "RESEND S3_RESPONSE" << Endl;
+ runtime.Send(cev.Release());
+ }
+ CapturedResponses.clear();
+ }
+
+ void BlockForgetsTillReboot() {
+ BlockForgets = true;
+ }
};
class TEventsCounter {
@@ -446,13 +486,52 @@ public:
bool operator()(TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) {
TStringBuilder ss;
- if (auto* msg = TryGetPrivateEvent<NColumnShard::TEvPrivate::TEvExport>(ev)) {
- ss << "EXPORT(" << ++Counters->SuccessCounter << "): " << NKikimrProto::EReplyStatus_Name(msg->Status);
+ if (ev->GetTypeRewrite() == TEvTablet::EvBoot) {
+ Counters->BlockForgets = false;
+ return false;
+ } else if (auto* msg = TryGetPrivateEvent<NColumnShard::TEvPrivate::TEvExport>(ev)) {
+ if (msg->Status == NKikimrProto::OK) {
+ ss << "EXPORT(done " << ++Counters->ExportCounters.Success << "): ";
+ } else {
+ ss << "EXPORT(attempt " << ++Counters->ExportCounters.Attempt << "): "
+ << NKikimrProto::EReplyStatus_Name(msg->Status);
+ }
+ } else if (auto* msg = TryGetPrivateEvent<NColumnShard::TEvPrivate::TEvForget>(ev)) {
+ if (Counters->BlockForgets) {
+ ss << "FORGET(ignore " << NKikimrProto::EReplyStatus_Name(msg->Status) << "): ";
+ ss << " " << ev->Sender << "->" << ev->Recipient;
+ Cerr << ss << Endl;
+ return true;
+ }
+
+ if (msg->Status == NKikimrProto::OK) {
+ ss << "FORGET(done " << ++Counters->ForgetCounters.Success << "): ";
+ } else {
+ ss << "FORGET(attempt " << ++Counters->ForgetCounters.Attempt << "): "
+ << NKikimrProto::EReplyStatus_Name(msg->Status);
+ }
+ } else if (auto* msg = TryGetPrivateEvent<NWrappers::NExternalStorage::TEvPutObjectRequest>(ev)) {
+ ss << "S3_REQ(put " << ++Counters->ExportCounters.Request << "):";
} else if (auto* msg = TryGetPrivateEvent<NWrappers::NExternalStorage::TEvPutObjectResponse>(ev)) {
- ss << "S3_RESPONSE(put " << ++Counters->ResponsesCounter << "):";
+ if (Counters->CaptureEvictResponse) {
+ Cerr << "CAPTURE S3_RESPONSE(put)" << Endl;
+ --Counters->CaptureEvictResponse;
+ Counters->CapturedResponses.push_back(ev.Release());
+ return true;
+ }
+
+ ss << "S3_RESPONSE(put " << ++Counters->ExportCounters.Response << "):";
+ } else if (auto* msg = TryGetPrivateEvent<NWrappers::NExternalStorage::TEvDeleteObjectRequest>(ev)) {
+ ss << "S3_REQ(delete " << ++Counters->ForgetCounters.Request << "):";
} else if (auto* msg = TryGetPrivateEvent<NWrappers::NExternalStorage::TEvDeleteObjectResponse>(ev)) {
- ss << "(" << ++Counters->SuccessCounter << "): DELETE SUCCESS";
- ss << "S3_RESPONSE(delete " << ++Counters->ResponsesCounter << "):";
+ if (Counters->CaptureForgetResponse) {
+ Cerr << "CAPTURE S3_RESPONSE(delete)" << Endl;
+ --Counters->CaptureForgetResponse;
+ Counters->CapturedResponses.push_back(ev.Release());
+ return true;
+ }
+
+ ss << "S3_RESPONSE(delete " << ++Counters->ForgetCounters.Response << "):";
} else if (auto* msg = TryGetPrivateEvent<NBlobCache::TEvBlobCache::TEvReadBlobRange>(ev)) {
if (Counters->CaptureReadEvents) {
Cerr << "CAPTURE " << msg->BlobRange.ToString() << " "
@@ -463,23 +542,28 @@ public:
} else {
return false;
}
+ } else if (auto* msg = TryGetPrivateEvent<TEvColumnShard::TEvReadResult>(ev)) {
+ ss << "Got TEvReadResult " << NKikimrTxColumnShard::EResultStatus_Name(Proto(msg).GetStatus()) << Endl;
} else {
return false;
}
ss << " " << ev->Sender << "->" << ev->Recipient;
Cerr << ss << Endl;
return false;
- };
+ }
};
std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TString>& blobs,
const std::vector<TTestSchema::TTableSpecials>& specs,
- const ui32 initialEviction)
+ const THashSet<ui32>& exportSteps,
+ const THashSet<ui32>& forgetSteps,
+ std::optional<ui32> eventLoss = {})
{
TTestBasicRuntime runtime;
TTester::Setup(runtime);
- runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG);
+ runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_INFO);
+ runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD_SCAN, NActors::NLog::PRI_INFO);
TActorId sender = runtime.AllocateEdgeActor();
CreateTestBootstrapper(runtime,
@@ -509,6 +593,7 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt
ui64 tableId = 1;
ui64 planStep = 1000000000; // greater then delays
ui64 txId = 100;
+ const TDuration exportTimeout = TDuration::Seconds(40);
UNIT_ASSERT(specs.size() > 0);
{
@@ -532,18 +617,37 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt
RebootTablet(runtime, TTestTxConfig::TxTablet0, sender);
}
+ runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG);
+
TAutoPtr<IEventHandle> handle;
std::vector<std::pair<ui32, ui64>> specRowsBytes;
specRowsBytes.reserve(specs.size());
+ ui32 deplayedExports = 0;
+ ui32 deplayedForgets = 0;
TCountersContainer counter;
runtime.SetEventFilter(TEventsCounter(counter, runtime));
for (ui32 i = 0; i < specs.size(); ++i) {
+ ui32 numExports = exportSteps.contains(i) ? 1 : 0;
+ ui32 numForgets = forgetSteps.contains(i) ? 1 : 0;
bool hasColdEviction = false;
- for (auto&& i : specs[i].Tiers) {
- if (!!i.S3) {
+ bool misconfig = false;
+ auto expectedReadResult = EExpectedResult::OK;
+ for (auto&& spec : specs[i].Tiers) {
+ if (!!spec.S3) {
hasColdEviction = true;
+ if (spec.S3->GetEndpoint() != "fake") {
+ misconfig = true;
+ // misconfig in export => OK, misconfig after export => ERROR
+ if (i > 1) {
+ expectedReadResult = EExpectedResult::ERROR;
+ }
+ deplayedExports += numExports;
+ deplayedForgets += numForgets;
+ numExports = 0;
+ numForgets = 0;
+ }
break;
}
}
@@ -557,12 +661,37 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt
PlanSchemaTx(runtime, sender, { planStep, txId });
}
}
- if (specs[i].HasTiers()) {
+ if (specs[i].HasTiers() || reboots) {
ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i]));
}
+ if (!misconfig && (deplayedExports || deplayedForgets)) {
+ UNIT_ASSERT(hasColdEviction);
+ // continue waiting: finish previous step
+ counter.WaitMoreEvents(runtime, exportTimeout, deplayedExports, deplayedForgets);
+ deplayedExports = 0;
+ deplayedForgets = 0;
+ }
+
+ if (eventLoss) {
+ if (*eventLoss == i) {
+ if (numExports) {
+ counter.CaptureEvictResponse = 1;
+ deplayedExports += numExports;
+ numExports = 0;
+ } else if (numForgets) {
+ counter.CaptureForgetResponse = reboots ? 2 : 1;
+ deplayedForgets += numForgets;
+ numForgets = 0;
+ }
+ } else {
+ // Check there would be no troubles with delayed responses
+ counter.ResendCapturedResponses(runtime);
+ }
+ }
+
// Read crossed with eviction (start)
- {
+ if (!misconfig) {
auto read = std::make_unique<TEvColumnShard::TEvRead>(sender, metaShard, planStep-1, Max<ui64>(), tableId);
Proto(read.get()).AddColumnNames(specs[i].TtlColumn);
@@ -575,24 +704,26 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt
TriggerTTL(runtime, sender, { ++planStep, ++txId }, {}, 0, specs[i].TtlColumn);
- Cerr << (hasColdEviction ? "Cold" : "Hot")
- << " tiering, spec " << i << ", num tiers: " << specs[i].Tiers.size() << "\n";
+ Cerr << "-- " << (hasColdEviction ? "COLD" : "HOT")
+ << " TIERING(" << i << ") num tiers: " << specs[i].Tiers.size()
+ << ", exports: " << numExports << ", forgets: " << numForgets
+ << ", delayed exports: " << deplayedExports << ", delayed forgets: " << deplayedForgets << Endl;
- if (hasColdEviction) {
- if (i > initialEviction) {
- counter.WaitEvents(runtime, i, 1, TDuration::Seconds(40));
- } else {
- counter.WaitEvents(runtime, i, 0, TDuration::Seconds(20));
- }
+ if (numExports) {
+ UNIT_ASSERT(hasColdEviction);
+ counter.WaitEvents(runtime, exportTimeout, numExports, 0);
} else {
- counter.WaitEvents(runtime, i, 0, TDuration::Seconds(4));
+ TDuration timeout = hasColdEviction ? TDuration::Seconds(10) : TDuration::Seconds(4);
+ counter.WaitEvents(runtime, timeout, 0, 0);
}
- if (reboots) {
- ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i]));
+
+ if (numForgets && reboots) {
+ // Do not finish forget before reboot. Check forget would happen after it.
+ counter.BlockForgetsTillReboot();
}
// Read crossed with eviction (finish)
- {
+ if (!misconfig) {
counter.ResendCapturedReads(runtime);
ui32 numBatches = 0;
THashSet<ui32> batchNumbers;
@@ -601,7 +732,8 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt
UNIT_ASSERT(event);
auto& resRead = Proto(event);
- TString data = GetReadResult(resRead, {}, {});
+ TString data = GetReadResult(resRead, EExpectedResult::OK);
+
batchNumbers.insert(resRead.GetBatch());
if (resRead.GetFinished()) {
numBatches = resRead.GetBatch() + 1;
@@ -609,6 +741,16 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt
}
}
+ if (numForgets) {
+ UNIT_ASSERT(hasColdEviction);
+ if (reboots) {
+ Cerr << "INTERMEDIATE REBOOT(" << i << ")" << Endl;
+ RebootTablet(runtime, TTestTxConfig::TxTablet0, sender);
+ ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i]));
+ }
+ counter.WaitMoreEvents(runtime, exportTimeout, 0, numForgets);
+ }
+
// Read data after eviction
auto read = std::make_unique<TEvColumnShard::TEvRead>(sender, metaShard, planStep-1, Max<ui64>(), tableId);
@@ -616,12 +758,17 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt
ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release());
specRowsBytes.emplace_back(0, 0);
- while (true) {
+ ui32 numBatches = 0;
+ ui32 numExpected = (expectedReadResult == EExpectedResult::ERROR) ? 1 : 100;
+ for (; numBatches < numExpected; ++numBatches) {
auto event = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle);
UNIT_ASSERT(event);
auto& resRead = Proto(event);
- TString data = GetReadResult(resRead, {}, {});
+ TString data = GetReadResult(resRead, expectedReadResult);
+ if (expectedReadResult == EExpectedResult::ERROR) {
+ break;
+ }
if (!data.size()) {
break;
}
@@ -642,6 +789,7 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt
}
if (reboots) {
+ Cerr << "REBOOT(" << i << ")" << Endl;
RebootTablet(runtime, TTestTxConfig::TxTablet0, sender);
}
}
@@ -747,13 +895,32 @@ std::vector<std::pair<ui32, ui64>> TestTiersAndTtl(const TTestSchema::TTableSpec
size_t initialEviction = alters.size();
TEvictionChanges changes;
+ THashSet<ui32> exports;
+ THashSet<ui32> forgets;
if (testTtl) {
changes.AddTtlAlters(spec, {allowBoth, allowOne, allowNone}, alters);
} else {
changes.AddTierAlters(spec, {allowBoth, allowOne, allowNone}, alters);
+
+ for (ui32 i = initialEviction + 1; i < alters.size() - 1; ++i) {
+ for (auto& tier : alters[i].Tiers) {
+ if (tier.S3) {
+ exports.emplace(i);
+ break;
+ }
+ }
+ }
+ for (ui32 i = initialEviction + 2; i < alters.size(); ++i) {
+ for (auto& tier : alters[i].Tiers) {
+ if (tier.S3) {
+ forgets.emplace(i);
+ break;
+ }
+ }
+ }
}
- auto rowsBytes = TestTiers(reboots, blobs, alters, initialEviction);
+ auto rowsBytes = TestTiers(reboots, blobs, alters, exports, forgets);
for (auto&& i : rowsBytes) {
Cerr << i.first << "/" << i.second << Endl;
}
@@ -766,12 +933,50 @@ std::vector<std::pair<ui32, ui64>> TestTiersAndTtl(const TTestSchema::TTableSpec
return rowsBytes;
}
-void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial = EInitialEviction::None) {
+std::vector<std::pair<ui32, ui64>> TestOneTierExport(const TTestSchema::TTableSpecials& spec, bool reboots,
+ std::optional<ui32> misconfig, std::optional<ui32> loss) {
+ const std::vector<ui64> ts = { 1600000000, 1620000000 };
+
+ ui32 overlapSize = 0;
+ std::vector<TString> blobs = MakeData(ts, PORTION_ROWS, overlapSize, spec.TtlColumn);
+
+ TInstant now = TAppData::TimeProvider->Now();
+ TDuration allowBoth = TDuration::Seconds(now.Seconds() - ts[0] + 600);
+ TDuration allowOne = TDuration::Seconds(now.Seconds() - ts[1] + 600);
+ TDuration allowNone = TDuration::Seconds(now.Seconds() - ts[1] - 600);
+
+ std::vector<TTestSchema::TTableSpecials> alters = { TTestSchema::TTableSpecials() };
+
+ TEvictionChanges changes;
+ changes.AddTierAlters(spec, {allowBoth, allowOne, allowNone}, alters);
+ UNIT_ASSERT_VALUES_EQUAL(alters.size(), 4);
+
+ if (misconfig) {
+ // Add error in config => eviction + not finished export
+ UNIT_ASSERT_VALUES_EQUAL(alters[*misconfig].Tiers.size(), 1);
+ UNIT_ASSERT(alters[*misconfig].Tiers[0].S3);
+ alters[*misconfig].Tiers[0].S3->SetEndpoint("nowhere"); // clear special "fake" endpoint
+ }
+
+ auto rowsBytes = TestTiers(reboots, blobs, alters, {1}, {2, 3}, loss);
+ for (auto&& i : rowsBytes) {
+ Cerr << i.first << "/" << i.second << Endl;
+ }
+
+ UNIT_ASSERT_EQUAL(rowsBytes.size(), alters.size());
+ if (!misconfig) {
+ changes.Assert(spec, rowsBytes, 1);
+ }
+ return rowsBytes;
+}
+
+void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial = EInitialEviction::None,
+ bool revCompaction = false) {
TTestSchema::TTableSpecials spec;
spec.SetTtlColumn("timestamp");
spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier0").SetTtlColumn("timestamp"));
spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier1").SetTtlColumn("timestamp"));
- spec.Tiers.back().SetCodec("zstd");
+ spec.Tiers[(revCompaction ? 0 : 1)].SetCodec("zstd");
auto rowsBytes = TestTiersAndTtl(spec, reboot, initial, changeTtl);
if (changeTtl) {
@@ -792,12 +997,16 @@ void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial
UNIT_ASSERT_VALUES_EQUAL(rowsBytes[3].first, PORTION_ROWS);
UNIT_ASSERT_VALUES_EQUAL(rowsBytes[4].first, 0);
- UNIT_ASSERT(rowsBytes[1].second > rowsBytes[2].second); // compression works
+ // compression works
+ if (revCompaction) {
+ UNIT_ASSERT(rowsBytes[1].second < rowsBytes[2].second);
+ } else {
+ UNIT_ASSERT(rowsBytes[1].second > rowsBytes[2].second);
+ }
}
}
void TestHotAndColdTiers(bool reboot, const EInitialEviction initial) {
- const TString bucket = "tiering-test-01";
TPortManager portManager;
const ui16 port = portManager.GetPort();
@@ -808,32 +1017,26 @@ void TestHotAndColdTiers(bool reboot, const EInitialEviction initial) {
spec.SetTtlColumn("timestamp");
spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier0").SetTtlColumn("timestamp"));
spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier1").SetTtlColumn("timestamp"));
- spec.Tiers.back().S3 = NKikimrSchemeOp::TS3Settings();
- auto& s3Config = *spec.Tiers.back().S3;
- {
-
- s3Config.SetScheme(NKikimrSchemeOp::TS3Settings::HTTP);
- s3Config.SetVerifySSL(false);
- s3Config.SetBucket(bucket);
-//#define S3_TEST_USAGE
-#ifdef S3_TEST_USAGE
- s3Config.SetEndpoint("storage.cloud-preprod.yandex.net");
- s3Config.SetAccessKey("...");
- s3Config.SetSecretKey("...");
- s3Config.SetProxyHost("localhost");
- s3Config.SetProxyPort(8080);
- s3Config.SetProxyScheme(NKikimrSchemeOp::TS3Settings::HTTP);
-#else
- s3Config.SetEndpoint("fake");
-#endif
- s3Config.SetRequestTimeoutMs(10000);
- s3Config.SetHttpRequestTimeoutMs(10000);
- s3Config.SetConnectionTimeoutMs(10000);
- }
+ spec.Tiers.back().S3 = TTestSchema::TStorageTier::FakeS3();
TestTiersAndTtl(spec, reboot, initial);
}
+void TestExport(bool reboot, std::optional<ui32> misconfig = {}, std::optional<ui32> loss = {}) {
+ TPortManager portManager;
+ const ui16 port = portManager.GetPort();
+
+ TS3Mock s3Mock({}, TS3Mock::TSettings(port));
+ UNIT_ASSERT(s3Mock.Start());
+
+ TTestSchema::TTableSpecials spec;
+ spec.SetTtlColumn("timestamp");
+ spec.Tiers.emplace_back(TTestSchema::TStorageTier("cold").SetTtlColumn("timestamp"));
+ spec.Tiers.back().S3 = TTestSchema::TStorageTier::FakeS3();
+
+ TestOneTierExport(spec, reboot, misconfig, loss);
+}
+
void TestDrop(bool reboots) {
TTestBasicRuntime runtime;
TTester::Setup(runtime);
@@ -954,6 +1157,79 @@ void TestDropWriteRace() {
PlanCommit(runtime, sender, ++planStep, commitTxId);
}
+void TestCompaction(std::optional<ui32> numWrites = {}) {
+ TTestBasicRuntime runtime;
+ TTester::Setup(runtime);
+
+ TActorId sender = runtime.AllocateEdgeActor();
+ CreateTestBootstrapper(runtime,
+ CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard),
+ &CreateColumnShard);
+
+ TDispatchOptions options;
+ options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot));
+ runtime.DispatchEvents(options);
+
+ // Create table
+
+ ui64 metaShard = TTestTxConfig::TxTablet1;
+ ui64 writeId = 0;
+ ui64 tableId = 1;
+ ui64 planStep = 100;
+ ui64 txId = 100;
+
+ bool ok = ProposeSchemaTx(runtime, sender, TTestSchema::CreateTableTxBody(tableId, testYdbSchema, testYdbPk),
+ {++planStep, ++txId});
+ UNIT_ASSERT(ok);
+ PlanSchemaTx(runtime, sender, {planStep, txId});
+
+ // Set tiering
+
+ ui64 ts = 1620000000;
+ TInstant now = TAppData::TimeProvider->Now();
+ TDuration allow = TDuration::Seconds(now.Seconds() - ts + 3600);
+ TDuration disallow = TDuration::Seconds(now.Seconds() - ts - 3600);
+
+ TTestSchema::TTableSpecials spec;
+ spec.SetTtlColumn("timestamp");
+ spec.Tiers.emplace_back(TTestSchema::TStorageTier("hot").SetTtlColumn("timestamp"));
+ spec.Tiers.back().EvictAfter = disallow;
+ spec.Tiers.emplace_back(TTestSchema::TStorageTier("cold").SetTtlColumn("timestamp"));
+ spec.Tiers.back().EvictAfter = allow;
+ spec.Tiers.back().S3 = TTestSchema::TStorageTier::FakeS3();
+
+ ok = ProposeSchemaTx(runtime, sender, TTestSchema::AlterTableTxBody(tableId, 1, spec),
+ {++planStep, ++txId});
+ UNIT_ASSERT(ok);
+ PlanSchemaTx(runtime, sender, {planStep, txId});
+
+ ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec));
+
+ // Writes
+
+ std::vector<TString> blobs = MakeData({ts, ts}, PORTION_ROWS, 0, spec.TtlColumn);
+ const TString& triggerData = blobs[0];
+ //UNIT_ASSERT(triggerData.size() > NColumnShard::TLimits::MIN_BYTES_TO_INSERT);
+ //UNIT_ASSERT(triggerData.size() < NColumnShard::TLimits::GetMaxBlobSize());
+
+ if (!numWrites) {
+ numWrites = 4 * NOlap::TCompactionLimits().GranuleExpectedSize / triggerData.size();
+ }
+
+ ++planStep;
+ ++txId;
+ for (ui32 i = 0; i < *numWrites; ++i, ++writeId, ++planStep, ++txId) {
+ UNIT_ASSERT(WriteData(runtime, sender, metaShard, writeId, tableId, triggerData));
+
+ ProposeCommit(runtime, sender, metaShard, txId, {writeId});
+ PlanCommit(runtime, sender, planStep, txId);
+
+ if (i % 2 == 0) {
+ TriggerTTL(runtime, sender, {++planStep, ++txId}, {}, 0, spec.TtlColumn);
+ }
+ }
+}
+
}
namespace NColumnShard {
@@ -1117,6 +1393,14 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) {
TestTwoHotTiers(true, false);
}
+ Y_UNIT_TEST(HotTiersRevCompression) {
+ TestTwoHotTiers(false, false, EInitialEviction::None, true);
+ }
+
+ Y_UNIT_TEST(RebootHotTiersRevCompression) {
+ TestTwoHotTiers(true, false, EInitialEviction::None, true);
+ }
+
Y_UNIT_TEST(HotTiersTtl) {
NColumnShard::gAllowLogBatchingDefaultValue = false;
TestTwoHotTiers(false, true);
@@ -1162,7 +1446,55 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) {
TestHotAndColdTiers(true, EInitialEviction::Ttl);
}
- // TODO: EnableTtlAfterColdTiers
+ Y_UNIT_TEST(OneColdTier) {
+ TestExport(false);
+ }
+
+ Y_UNIT_TEST(RebootOneColdTier) {
+ TestExport(true);
+ }
+
+ Y_UNIT_TEST(ExportAfterFail) {
+ TestExport(false, 1);
+ }
+
+ Y_UNIT_TEST(RebootExportAfterFail) {
+ TestExport(true, 1);
+ }
+
+ Y_UNIT_TEST(ForgetAfterFail) {
+ TestExport(false, 2);
+ }
+
+ Y_UNIT_TEST(RebootForgetAfterFail) {
+ TestExport(true, 2);
+ }
+
+ Y_UNIT_TEST(ExportWithLostAnswer) {
+ TestExport(false, {}, 1);
+ }
+
+ Y_UNIT_TEST(RebootExportWithLostAnswer) {
+ TestExport(true, {}, 1);
+ }
+
+ Y_UNIT_TEST(ForgetWithLostAnswer) {
+ TestExport(false, {}, 2);
+ }
+
+ Y_UNIT_TEST(RebootForgettWithLostAnswer) {
+ TestExport(true, {}, 2);
+ }
+
+ // TODO: LastTierBorderIsTtl = false
+
+ // TODO: DisableTierAfterExport
+ // TODO: ReenableTierAfterExport
+ // TODO: AlterTierBorderAfterExport
+
+ Y_UNIT_TEST(ColdCompactionSmoke) {
+ TestCompaction();
+ }
Y_UNIT_TEST(Drop) {
TestDrop(false);
diff --git a/ydb/core/tx/coordinator/coordinator__init.cpp b/ydb/core/tx/coordinator/coordinator__init.cpp
index beb9975b42..0a06269b58 100644
--- a/ydb/core/tx/coordinator/coordinator__init.cpp
+++ b/ydb/core/tx/coordinator/coordinator__init.cpp
@@ -13,6 +13,8 @@ struct TTxCoordinator::TTxInit : public TTransactionBase<TTxCoordinator> {
ui64 PlanResolution;
ui64 LastPlanned = 0;
ui64 LastAcquired = 0;
+ TActorId LastBlockedActor;
+ ui64 LastBlockedStep = 0;
TTxInit(TSelf *coordinator)
: TBase(coordinator)
@@ -27,6 +29,7 @@ struct TTxCoordinator::TTxInit : public TTransactionBase<TTxCoordinator> {
ready &= LoadDomainConfiguration(db);
ready &= LoadLastPlanned(db);
ready &= LoadLastAcquired(db);
+ ready &= LoadLastBlocked(db);
return ready;
}
@@ -63,26 +66,29 @@ struct TTxCoordinator::TTxInit : public TTransactionBase<TTxCoordinator> {
}
bool LoadLastPlanned(NIceDb::TNiceDb &db) {
- auto rowset = db.Table<Schema::State>().Key(Schema::State::KeyLastPlanned).Select<Schema::State::StateValue>();
-
- if (!rowset.IsReady())
- return false;
-
- if (rowset.IsValid())
- LastPlanned = rowset.GetValue<Schema::State::StateValue>();
-
- return true;
+ return Schema::LoadState(db, Schema::State::KeyLastPlanned, LastPlanned);
}
bool LoadLastAcquired(NIceDb::TNiceDb &db) {
- auto rowset = db.Table<Schema::State>().Key(Schema::State::AcquireReadStepLast).Select<Schema::State::StateValue>();
+ return Schema::LoadState(db, Schema::State::AcquireReadStepLast, LastAcquired);
+ }
- if (!rowset.IsReady())
- return false;
+ bool LoadLastBlocked(NIceDb::TNiceDb &db) {
+ ui64 x1 = 0;
+ ui64 x2 = 0;
+ ui64 step = 0;
- if (rowset.IsValid())
- LastAcquired = rowset.GetValue<Schema::State::StateValue>();
+ bool ready = true;
+ ready &= Schema::LoadState(db, Schema::State::LastBlockedActorX1, x1);
+ ready &= Schema::LoadState(db, Schema::State::LastBlockedActorX2, x2);
+ ready &= Schema::LoadState(db, Schema::State::LastBlockedStep, step);
+ if (!ready) {
+ return false;
+ }
+
+ LastBlockedActor = TActorId(x1, x2);
+ LastBlockedStep = step;
return true;
}
@@ -101,6 +107,12 @@ struct TTxCoordinator::TTxInit : public TTransactionBase<TTxCoordinator> {
}
void Complete(const TActorContext &ctx) override {
+ // Assume worst case, everything up to LastBlockedStep was planned
+ LastPlanned = Max(LastPlanned, LastBlockedStep);
+
+ // Assume worst case, last planned step was also acquired
+ LastAcquired = Max(LastAcquired, LastPlanned);
+
Self->VolatileState.LastPlanned = LastPlanned;
Self->VolatileState.LastSentStep = LastPlanned;
Self->VolatileState.LastAcquired = LastAcquired;
diff --git a/ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp b/ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp
index 931a7c993e..02980bf2bc 100644
--- a/ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp
+++ b/ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp
@@ -18,19 +18,17 @@ struct TTxCoordinator::TTxUpgrade : public TTransactionBase<TTxCoordinator> {
bool Execute(TTransactionContext &txc, const TActorContext& ctx) override {
NIceDb::TNiceDb db(txc.DB);
-
- auto row = db.Table<Schema::State>().Key(Schema::State::DatabaseVersion).Select<Schema::State::StateValue>();
- if (!row.IsReady()) {
+ std::optional<ui64> databaseVersion;
+ if (!Schema::LoadState(db, Schema::State::DatabaseVersion, databaseVersion)) {
return false;
}
- if (!row.IsValid()) {
- db.Table<Schema::State>().Key(Schema::State::DatabaseVersion).Update(NIceDb::TUpdate<Schema::State::StateValue>(Schema::CurrentVersion));
+ if (!databaseVersion) {
+ Schema::SaveState(db, Schema::State::DatabaseVersion, Schema::CurrentVersion);
return true;
}
- Schema::State::StateValue::Type databaseVersion = row.GetValue<Schema::State::StateValue>();
- if (Schema::CurrentVersion == databaseVersion) {
+ if (*databaseVersion == Schema::CurrentVersion) {
return true;
}
@@ -38,7 +36,7 @@ struct TTxCoordinator::TTxUpgrade : public TTransactionBase<TTxCoordinator> {
FLOG_LOG_S(ctx, NActors::NLog::PRI_CRIT, NKikimrServices::TX_COORDINATOR,
"tablet# " << Self->Tablet() <<
" SEND to self TEvents::TEvPoisonPill" <<
- " databaseVersion# " << databaseVersion <<
+ " databaseVersion# " << *databaseVersion <<
" CurrentDataBaseVersion# " << Schema::CurrentVersion <<
" reason# no realisation for upgrade scheme present");
return true;
diff --git a/ydb/core/tx/coordinator/coordinator_impl.cpp b/ydb/core/tx/coordinator/coordinator_impl.cpp
index 64e01eeaf4..b48ef4615e 100644
--- a/ydb/core/tx/coordinator/coordinator_impl.cpp
+++ b/ydb/core/tx/coordinator/coordinator_impl.cpp
@@ -45,8 +45,6 @@ static TAutoPtr<TTransactionProposal> MakeTransactionProposal(TEvTxProxy::TEvPro
return proposal;
}
-const ui32 TTxCoordinator::Schema::CurrentVersion = 1;
-
TTxCoordinator::TTxCoordinator(TTabletStorageInfo *info, const TActorId &tablet)
: TActor(&TThis::StateInit)
, TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory)
diff --git a/ydb/core/tx/coordinator/coordinator_impl.h b/ydb/core/tx/coordinator/coordinator_impl.h
index 075fd89e81..02ef0c8118 100644
--- a/ydb/core/tx/coordinator/coordinator_impl.h
+++ b/ydb/core/tx/coordinator/coordinator_impl.h
@@ -386,7 +386,7 @@ class TTxCoordinator : public TActor<TTxCoordinator>, public TTabletExecutedFlat
public:
struct Schema : NIceDb::Schema {
- static const ui32 CurrentVersion;
+ static constexpr ui64 CurrentVersion = 1;
struct Transaction : Table<0> {
struct ID : Column<0, NScheme::NTypeIds::Uint64> {}; // PK
@@ -407,10 +407,13 @@ public:
};
struct State : Table<2> {
- enum EKeyType {
- KeyLastPlanned,
- DatabaseVersion,
- AcquireReadStepLast,
+ enum EKeyType : ui64 {
+ KeyLastPlanned = 0,
+ DatabaseVersion = 1,
+ AcquireReadStepLast = 2,
+ LastBlockedActorX1 = 3,
+ LastBlockedActorX2 = 4,
+ LastBlockedStep = 5,
};
struct StateKey : Column<0, NScheme::NTypeIds::Uint64> { using Type = EKeyType; }; // PK
@@ -431,6 +434,37 @@ public:
};
using TTables = SchemaTables<Transaction, AffectedSet, State, DomainConfiguration>;
+
+ template<class TCallback>
+ static bool LoadState(NIceDb::TNiceDb& db, State::EKeyType key, TCallback&& callback) {
+ auto rowset = db.Table<State>().Key(key).Select<State::StateValue>();
+
+ if (!rowset.IsReady()) {
+ return false;
+ }
+
+ if (rowset.IsValid()) {
+ callback(rowset.GetValue<State::StateValue>());
+ }
+
+ return true;
+ }
+
+ static bool LoadState(NIceDb::TNiceDb& db, State::EKeyType key, std::optional<ui64>& out) {
+ return LoadState(db, key, [&out](ui64 value) {
+ out.emplace(value);
+ });
+ }
+
+ static bool LoadState(NIceDb::TNiceDb& db, State::EKeyType key, ui64& out) {
+ return LoadState(db, key, [&out](ui64 value) {
+ out = value;
+ });
+ }
+
+ static void SaveState(NIceDb::TNiceDb& db, State::EKeyType key, ui64 value) {
+ db.Table<State>().Key(key).Update<State::StateValue>(value);
+ }
};
private:
diff --git a/ydb/core/tx/datashard/cdc_stream_scan.cpp b/ydb/core/tx/datashard/cdc_stream_scan.cpp
index 8d65d8ba5d..aff7bd01f4 100644
--- a/ydb/core/tx/datashard/cdc_stream_scan.cpp
+++ b/ydb/core/tx/datashard/cdc_stream_scan.cpp
@@ -163,6 +163,7 @@ class TDataShard::TTxCdcStreamScanProgress
TDataShard::TEvPrivate::TEvCdcStreamScanProgress::TPtr Request;
THolder<TDataShard::TEvPrivate::TEvCdcStreamScanContinue> Response;
TVector<IDataShardChangeCollector::TChange> ChangeRecords;
+ bool Reschedule = false;
static TVector<TRawTypeValue> MakeKey(TArrayRef<const TCell> cells, TUserTable::TCPtr table) {
TVector<TRawTypeValue> key(Reserve(cells.size()));
@@ -219,17 +220,30 @@ public:
LOG_D("Progress"
<< ": streamPathId# " << streamPathId);
- if (Self->CheckChangesQueueOverflow()) {
+ if (!Self->GetUserTables().contains(tablePathId.LocalPathId)) {
+ LOG_W("Cannot progress on unknown table"
+ << ": tablePathId# " << tablePathId);
return true;
}
- Y_VERIFY(Self->GetUserTables().contains(tablePathId.LocalPathId));
auto table = Self->GetUserTables().at(tablePathId.LocalPathId);
auto it = table->CdcStreams.find(streamPathId);
- Y_VERIFY(it != table->CdcStreams.end());
+ if (it == table->CdcStreams.end()) {
+ LOG_W("Cannot progress on unknown cdc stream"
+ << ": streamPathId# " << streamPathId);
+ return true;
+ }
+
+ ChangeRecords.clear();
+ if (Self->CheckChangesQueueOverflow()) {
+ Reschedule = true;
+ return true;
+ }
NIceDb::TNiceDb db(txc.DB);
+ bool pageFault = false;
+
for (const auto& [k, v] : ev.Rows) {
const auto key = MakeKey(k.GetCells(), table);
const auto& keyTags = table->KeyColumnIds;
@@ -238,10 +252,10 @@ public:
TSelectStats stats;
auto ready = txc.DB.Select(table->LocalTid, key, {}, row, stats, 0, readVersion);
if (ready == EReady::Page) {
- return false;
+ pageFault = true;
}
- if (ready == EReady::Gone || stats.InvisibleRowSkips) {
+ if (pageFault || ready == EReady::Gone || stats.InvisibleRowSkips) {
continue;
}
@@ -293,6 +307,10 @@ public:
Self->PersistChangeRecord(db, record);
}
+ if (pageFault) {
+ return false;
+ }
+
if (ev.Rows) {
const auto& [key, _] = ev.Rows.back();
@@ -315,12 +333,12 @@ public:
Self->EnqueueChangeRecords(std::move(ChangeRecords));
ctx.Send(Request->Sender, Response.Release());
- } else {
- LOG_I("Re-run progress tx"
+ } else if (Reschedule) {
+ LOG_I("Re-schedule progress tx"
<< ": streamPathId# " << Request->Get()->StreamPathId);
// re-schedule tx
- ctx.Schedule(TDuration::Seconds(1), Request->Release().Release());
+ ctx.TActivationContext::Schedule(TDuration::Seconds(1), Request->Forward(ctx.SelfID));
}
}
diff --git a/ydb/core/tx/datashard/change_collector_cdc_stream.cpp b/ydb/core/tx/datashard/change_collector_cdc_stream.cpp
index 56d000a623..326abd1a89 100644
--- a/ydb/core/tx/datashard/change_collector_cdc_stream.cpp
+++ b/ydb/core/tx/datashard/change_collector_cdc_stream.cpp
@@ -209,7 +209,7 @@ bool TCdcStreamChangeCollector::Collect(const TTableId& tableId, ERowOp rop,
}
}
} else {
- Y_FAIL_S("Cannot retrieve cdc stream scan info: " << pathId);
+ // nop, scan is completed
}
break;
default:
diff --git a/ydb/core/tx/datashard/change_sender_async_index.cpp b/ydb/core/tx/datashard/change_sender_async_index.cpp
index e90343c601..7b19d72dfd 100644
--- a/ydb/core/tx/datashard/change_sender_async_index.cpp
+++ b/ydb/core/tx/datashard/change_sender_async_index.cpp
@@ -188,12 +188,33 @@ class TAsyncIndexChangeSenderShard: public TActorBootstrapped<TAsyncIndexChangeS
}
}
+ bool CanRetry() const {
+ return Attempt < MaxAttempts;
+ }
+
+ void Retry() {
+ ++Attempt;
+ Delay = Min(2 * Delay, MaxDelay);
+
+ LOG_N("Retry"
+ << ": attempt# " << Attempt
+ << ", delay# " << Delay);
+
+ const auto random = TDuration::FromValue(TAppData::RandomProvider->GenRand64() % Delay.MicroSeconds());
+ Schedule(Delay + random, new TEvents::TEvWakeup());
+ }
+
void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) {
if (ShardId != ev->Get()->TabletId) {
return;
}
- Leave();
+ if (CanRetry()) {
+ Unlink();
+ Retry();
+ } else {
+ Leave();
+ }
}
void Handle(NMon::TEvRemoteHttpInfo::TPtr& ev) {
@@ -222,11 +243,14 @@ class TAsyncIndexChangeSenderShard: public TActorBootstrapped<TAsyncIndexChangeS
PassAway();
}
- void PassAway() override {
+ void Unlink() {
if (LeaderPipeCache) {
Send(LeaderPipeCache, new TEvPipeCache::TEvUnlink(ShardId));
}
+ }
+ void PassAway() override {
+ Unlink();
TActorBootstrapped::PassAway();
}
@@ -254,6 +278,7 @@ public:
switch (ev->GetTypeRewrite()) {
hFunc(TEvPipeCache::TEvDeliveryProblem, Handle);
hFunc(NMon::TEvRemoteHttpInfo, Handle);
+ sFunc(TEvents::TEvWakeup, Handshake);
sFunc(TEvents::TEvPoison, PassAway);
}
}
@@ -269,6 +294,12 @@ private:
TActorId LeaderPipeCache;
ui64 LastRecordOrder;
+ // Retry on delivery problem
+ static constexpr ui32 MaxAttempts = 3;
+ static constexpr auto MaxDelay = TDuration::MilliSeconds(50);
+ ui32 Attempt = 0;
+ TDuration Delay = TDuration::MilliSeconds(10);
+
}; // TAsyncIndexChangeSenderShard
class TAsyncIndexChangeSenderMain
@@ -624,8 +655,11 @@ class TAsyncIndexChangeSenderMain
return Retry();
}
+ const bool versionChanged = !IndexTableVersion || IndexTableVersion != entry.GeneralVersion;
+ IndexTableVersion = entry.GeneralVersion;
+
KeyDesc = std::move(entry.KeyDescription);
- CreateSenders(MakePartitionIds(KeyDesc->GetPartitions()));
+ CreateSenders(MakePartitionIds(KeyDesc->GetPartitions()), versionChanged);
Become(&TThis::StateMain);
}
@@ -723,6 +757,7 @@ public:
: TActorBootstrapped()
, TBaseChangeSender(this, this, dataShard, indexPathId)
, UserTableId(userTableId)
+ , IndexTableVersion(0)
{
}
@@ -751,6 +786,7 @@ private:
TMap<TTag, TTag> TagMap; // from main to index
TPathId IndexTablePathId;
+ ui64 IndexTableVersion;
THolder<TKeyDesc> KeyDesc;
}; // TAsyncIndexChangeSenderMain
diff --git a/ydb/core/tx/datashard/change_sender_common_ops.cpp b/ydb/core/tx/datashard/change_sender_common_ops.cpp
index 68094c6dad..4d8ab339b8 100644
--- a/ydb/core/tx/datashard/change_sender_common_ops.cpp
+++ b/ydb/core/tx/datashard/change_sender_common_ops.cpp
@@ -8,7 +8,7 @@
namespace NKikimr::NDataShard {
-void TBaseChangeSender::CreateSenders(const TVector<ui64>& partitionIds) {
+void TBaseChangeSender::CreateMissingSenders(const TVector<ui64>& partitionIds) {
THashMap<ui64, TSender> senders;
for (const auto& partitionId : partitionIds) {
@@ -32,6 +32,24 @@ void TBaseChangeSender::CreateSenders(const TVector<ui64>& partitionIds) {
}
Senders = std::move(senders);
+}
+
+void TBaseChangeSender::RecreateSenders(const TVector<ui64>& partitionIds) {
+ for (const auto& partitionId : partitionIds) {
+ Y_VERIFY(!Senders.contains(partitionId));
+ auto& sender = Senders[partitionId];
+ sender.ActorId = ActorOps->Register(CreateSender(partitionId));
+ }
+}
+
+void TBaseChangeSender::CreateSenders(const TVector<ui64>& partitionIds, bool partitioningChanged) {
+ if (partitioningChanged) {
+ CreateMissingSenders(partitionIds);
+ } else {
+ RecreateSenders(GonePartitions);
+ }
+
+ GonePartitions.clear();
if (!Enqueued || !RequestRecords()) {
SendRecords();
@@ -199,6 +217,7 @@ void TBaseChangeSender::OnGone(ui64 partitionId) {
}
Senders.erase(it);
+ GonePartitions.push_back(partitionId);
if (Resolver->IsResolving()) {
return;
diff --git a/ydb/core/tx/datashard/change_sender_common_ops.h b/ydb/core/tx/datashard/change_sender_common_ops.h
index a5de292ecd..26f5f6efed 100644
--- a/ydb/core/tx/datashard/change_sender_common_ops.h
+++ b/ydb/core/tx/datashard/change_sender_common_ops.h
@@ -57,7 +57,7 @@ class IChangeSender {
public:
virtual ~IChangeSender() = default;
- virtual void CreateSenders(const TVector<ui64>& partitionIds) = 0;
+ virtual void CreateSenders(const TVector<ui64>& partitionIds, bool partitioningChanged = true) = 0;
virtual void KillSenders() = 0;
virtual IActor* CreateSender(ui64 partitionId) = 0;
virtual void RemoveRecords() = 0;
@@ -89,6 +89,9 @@ class TBaseChangeSender: public IChangeSender {
TVector<TEnqueuedRecord> Pending;
};
+ void CreateMissingSenders(const TVector<ui64>& partitionIds);
+ void RecreateSenders(const TVector<ui64>& partitionIds);
+
bool RequestRecords();
void SendRecords();
@@ -103,7 +106,7 @@ protected:
ActorOps->Send(DataShard.ActorId, new TEvChangeExchange::TEvRemoveRecords(std::move(remove)));
}
- void CreateSenders(const TVector<ui64>& partitionIds) override;
+ void CreateSenders(const TVector<ui64>& partitionIds, bool partitioningChanged = true) override;
void KillSenders() override;
void RemoveRecords() override;
@@ -135,6 +138,8 @@ private:
TSet<TRequestedRecord> PendingBody;
TMap<ui64, TChangeRecord> PendingSent; // ui64 is order
+ TVector<ui64> GonePartitions;
+
}; // TBaseChangeSender
struct TSchemeCacheHelpers {
diff --git a/ydb/core/tx/datashard/datashard.cpp b/ydb/core/tx/datashard/datashard.cpp
index 43f1b96c91..c7e64c504a 100644
--- a/ydb/core/tx/datashard/datashard.cpp
+++ b/ydb/core/tx/datashard/datashard.cpp
@@ -147,7 +147,7 @@ TDataShard::TDataShard(const TActorId &tablet, TTabletStorageInfo *info)
, TtlReadAheadHi(0, 0, 128*1024*1024)
, EnablePrioritizedMvccSnapshotReads(1, 0, 1)
, EnableUnprotectedMvccSnapshotReads(1, 0, 1)
- , EnableLockedWrites(0, 0, 1)
+ , EnableLockedWrites(1, 0, 1)
, MaxLockedWritesPerKey(1000, 0, 1000000)
, EnableLeaderLeases(1, 0, 1)
, MinLeaderLeaseDurationUs(250000, 1000, 5000000)
diff --git a/ydb/core/tx/datashard/datashard__read_iterator.cpp b/ydb/core/tx/datashard/datashard__read_iterator.cpp
index b5d62ba07b..01587ce4bd 100644
--- a/ydb/core/tx/datashard/datashard__read_iterator.cpp
+++ b/ydb/core/tx/datashard/datashard__read_iterator.cpp
@@ -435,6 +435,11 @@ public:
// note that FirstUnprocessedQuery is unsigned and if we do reverse iteration,
// then it will also become less than size() when finished
while (FirstUnprocessedQuery < State.Request->Ranges.size()) {
+ if (ReachedTotalRowsLimit()) {
+ FirstUnprocessedQuery = -1;
+ return true;
+ }
+
if (ShouldStop())
return true;
@@ -464,6 +469,11 @@ public:
// note that FirstUnprocessedQuery is unsigned and if we do reverse iteration,
// then it will also become less than size() when finished
while (FirstUnprocessedQuery < State.Request->Keys.size()) {
+ if (ReachedTotalRowsLimit()) {
+ FirstUnprocessedQuery = -1;
+ return true;
+ }
+
if (ShouldStop())
return true;
@@ -631,6 +641,7 @@ public:
}
void UpdateState(TReadIteratorState& state) {
+ state.TotalRows += RowsRead;
state.FirstUnprocessedQuery = FirstUnprocessedQuery;
state.LastProcessedKey = LastProcessedKey;
state.ConsumeSeqNo(RowsRead, BytesInResult);
@@ -665,6 +676,27 @@ private:
return RowsRead >= State.MaxRowsInResult;
}
+ bool ReachedTotalRowsLimit() const {
+ if (State.TotalRowsLimit == Max<ui64>()) {
+ return false;
+ }
+
+ return State.TotalRows + RowsRead >= State.TotalRowsLimit;
+ }
+
+ ui64 GetTotalRowsLeft() const {
+ if (State.TotalRowsLimit == Max<ui64>()) {
+ return Max<ui64>();
+ }
+
+ if (State.TotalRows + RowsRead >= State.TotalRowsLimit) {
+ return 0;
+ }
+
+
+ return State.TotalRowsLimit - State.TotalRows - RowsRead;
+ }
+
bool ShouldStop() {
if (!CanResume()) {
return false;
@@ -690,6 +722,8 @@ private:
bytesLeft = State.Quota.Bytes - BlockBuilder.Bytes();
}
+ rowsLeft = Min(rowsLeft, GetTotalRowsLeft());
+
auto direction = reverse ? NTable::EDirection::Reverse : NTable::EDirection::Forward;
return db.Precharge(TableInfo.LocalTid,
keyFrom,
@@ -721,6 +755,10 @@ private:
Self->GetKeyAccessSampler()->AddSample(TableId, rowKey.Cells());
+ if (ReachedTotalRowsLimit()) {
+ break;
+ }
+
if (ShouldStop()) {
return EReadStatus::StoppedByLimit;
}
@@ -1207,6 +1245,9 @@ public:
if (record.HasMaxRowsInResult())
state.MaxRowsInResult = record.GetMaxRowsInResult();
+ if (record.HasTotalRowsLimit())
+ state.TotalRowsLimit = record.GetTotalRowsLimit();
+
if (record.HasSnapshot()) {
state.ReadVersion.Step = record.GetSnapshot().GetStep();
state.ReadVersion.TxId = record.GetSnapshot().GetTxId();
diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h
index 5742488661..38531d3d82 100644
--- a/ydb/core/tx/datashard/datashard_impl.h
+++ b/ydb/core/tx/datashard/datashard_impl.h
@@ -330,7 +330,7 @@ class TDataShard
EvMediatorRestoreBackup,
EvRemoveLockChangeRecords,
EvCdcStreamScanRegistered,
- EvCdcStreamScanProgress,
+ EvCdcStreamScanProgress, // WARNING: tests use ES_PRIVATE + 24
EvCdcStreamScanContinue,
EvRestartOperation, // used to restart after an aborted scan (e.g. backup)
EvChangeExchangeExecuteHandshakes,
diff --git a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp
index dc5249d84c..ec8ba239ab 100644
--- a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp
+++ b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp
@@ -2366,6 +2366,206 @@ Y_UNIT_TEST_SUITE(Cdc) {
});
}
+ Y_UNIT_TEST(InitialScanAndLimits) {
+ TPortManager portManager;
+ TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig())
+ .SetUseRealThreads(false)
+ .SetDomainName("Root")
+ .SetEnableChangefeedInitialScan(true)
+ .SetChangesQueueItemsLimit(1)
+ );
+
+ auto& runtime = *server->GetRuntime();
+ const auto edgeActor = runtime.AllocateEdgeActor();
+
+ SetupLogging(runtime);
+ InitRoot(server, edgeActor);
+ CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable());
+
+ ExecSQL(server, edgeActor, R"(
+ UPSERT INTO `/Root/Table` (key, value) VALUES
+ (1, 10),
+ (2, 20),
+ (3, 30);
+ )");
+
+ TVector<THolder<IEventHandle>> delayed;
+ ui32 progressCount = 0;
+
+ auto prevObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) {
+ static constexpr ui32 EvCdcStreamScanProgress = EventSpaceBegin(TKikimrEvents::ES_PRIVATE) + 24;
+
+ switch (ev->GetTypeRewrite()) {
+ case TEvDataShard::EvCdcStreamScanRequest:
+ if (auto* msg = ev->Get<TEvDataShard::TEvCdcStreamScanRequest>()) {
+ msg->Record.MutableLimits()->SetBatchMaxRows(1);
+ } else {
+ UNIT_ASSERT(false);
+ }
+ break;
+
+ case TEvChangeExchange::EvEnqueueRecords:
+ delayed.emplace_back(ev.Release());
+ return TTestActorRuntime::EEventAction::DROP;
+
+ case EvCdcStreamScanProgress:
+ ++progressCount;
+ break;
+ }
+
+ return TTestActorRuntime::EEventAction::PROCESS;
+ });
+
+ WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table",
+ WithInitialScan(Updates(NKikimrSchemeOp::ECdcStreamFormatJson))));
+
+ if (delayed.empty()) {
+ TDispatchOptions opts;
+ opts.FinalEvents.emplace_back([&delayed, &progressCount](IEventHandle&) {
+ return !delayed.empty() && progressCount >= 2;
+ });
+ runtime.DispatchEvents(opts);
+ }
+
+ runtime.SetObserverFunc(prevObserver);
+ for (auto& ev : std::exchange(delayed, TVector<THolder<IEventHandle>>())) {
+ runtime.Send(ev.Release(), 0, true);
+ }
+
+ WaitForContent(server, edgeActor, "/Root/Table/Stream", {
+ R"({"update":{"value":10},"key":[1]})",
+ R"({"update":{"value":20},"key":[2]})",
+ R"({"update":{"value":30},"key":[3]})",
+ });
+ }
+
+ Y_UNIT_TEST(InitialScanComplete) {
+ TPortManager portManager;
+ TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig())
+ .SetUseRealThreads(false)
+ .SetDomainName("Root")
+ .SetEnableChangefeedInitialScan(true)
+ );
+
+ auto& runtime = *server->GetRuntime();
+ const auto edgeActor = runtime.AllocateEdgeActor();
+
+ SetupLogging(runtime);
+ InitRoot(server, edgeActor);
+ CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable());
+
+ ExecSQL(server, edgeActor, R"(
+ UPSERT INTO `/Root/Table` (key, value) VALUES
+ (1, 10),
+ (2, 20);
+ )");
+
+ THolder<IEventHandle> delayed;
+ auto prevObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) {
+ if (ev->GetTypeRewrite() == NSchemeShard::TEvSchemeShard::EvModifySchemeTransaction) {
+ auto* msg = ev->Get<NSchemeShard::TEvSchemeShard::TEvModifySchemeTransaction>();
+ const auto& tx = msg->Record.GetTransaction(0);
+ if (tx.HasAlterCdcStream() && tx.GetAlterCdcStream().HasGetReady()) {
+ delayed.Reset(ev.Release());
+ return TTestActorRuntime::EEventAction::DROP;
+ }
+ }
+
+ return TTestActorRuntime::EEventAction::PROCESS;
+ });
+
+ WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table",
+ WithInitialScan(Updates(NKikimrSchemeOp::ECdcStreamFormatJson))));
+
+ if (!delayed) {
+ TDispatchOptions opts;
+ opts.FinalEvents.emplace_back([&delayed](IEventHandle&) {
+ return bool(delayed);
+ });
+ runtime.DispatchEvents(opts);
+ }
+
+ ExecSQL(server, edgeActor, R"(
+ UPSERT INTO `/Root/Table` (key, value) VALUES
+ (3, 30),
+ (4, 40);
+ )");
+
+ runtime.SetObserverFunc(prevObserver);
+ runtime.Send(delayed.Release(), 0, true);
+
+ WaitForContent(server, edgeActor, "/Root/Table/Stream", {
+ R"({"update":{"value":10},"key":[1]})",
+ R"({"update":{"value":20},"key":[2]})",
+ R"({"update":{"value":30},"key":[3]})",
+ R"({"update":{"value":40},"key":[4]})",
+ });
+ }
+
+ Y_UNIT_TEST(InitialScanRacyProgressAndDrop) {
+ TPortManager portManager;
+ TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig())
+ .SetUseRealThreads(false)
+ .SetDomainName("Root")
+ .SetEnableChangefeedInitialScan(true)
+ .SetChangesQueueItemsLimit(1)
+ );
+
+ auto& runtime = *server->GetRuntime();
+ const auto edgeActor = runtime.AllocateEdgeActor();
+
+ SetupLogging(runtime);
+ InitRoot(server, edgeActor);
+ CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable());
+
+ ExecSQL(server, edgeActor, R"(
+ UPSERT INTO `/Root/Table` (key, value) VALUES
+ (1, 10),
+ (2, 20),
+ (3, 30);
+ )");
+
+ bool delayProgress = true;
+ ui32 progressCount = 0;
+ TVector<THolder<IEventHandle>> delayed;
+
+ auto prevObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) {
+ static constexpr ui32 EvCdcStreamScanProgress = EventSpaceBegin(TKikimrEvents::ES_PRIVATE) + 24;
+ if (ev->GetTypeRewrite() == EvCdcStreamScanProgress) {
+ ++progressCount;
+ if (delayProgress) {
+ delayed.emplace_back(ev.Release());
+ return TTestActorRuntime::EEventAction::DROP;
+ }
+ }
+
+ return TTestActorRuntime::EEventAction::PROCESS;
+ });
+
+ auto waitProgress = [&](ui32 count) {
+ if (progressCount != count) {
+ TDispatchOptions opts;
+ opts.FinalEvents.emplace_back([&progressCount, count](IEventHandle&) {
+ return progressCount == count;
+ });
+ runtime.DispatchEvents(opts);
+ }
+ };
+
+ WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table",
+ WithInitialScan(Updates(NKikimrSchemeOp::ECdcStreamFormatJson))));
+
+ waitProgress(1);
+ WaitTxNotification(server, edgeActor, AsyncAlterDropStream(server, "/Root", "Table", "Stream"));
+
+ delayProgress = false;
+ for (auto& ev : std::exchange(delayed, TVector<THolder<IEventHandle>>())) {
+ runtime.Send(ev.Release(), 0, true);
+ }
+
+ waitProgress(2);
+ }
+
Y_UNIT_TEST(AwsRegion) {
TPortManager portManager;
TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig())
diff --git a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp
index 81c7efd31c..26fb3a7b0e 100644
--- a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp
+++ b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp
@@ -640,20 +640,32 @@ struct TTestHelper {
|| newLock->GetGeneration() != prevLock->GetGeneration());
}
- void TestChunkRead(ui32 chunkSize, ui32 rowCount) {
+ void TestChunkRead(ui32 chunkSize, ui32 rowCount, ui32 ranges = 1, ui32 limit = Max<ui32>()) {
UpsertMany(1, rowCount);
auto request = GetBaseReadRequest("table-1-many", 1, NKikimrTxDataShard::CELLVEC, TRowVersion::Max());
request->Record.ClearSnapshot();
- AddRangeQuery<ui32>(
- *request,
- {1, 1, 1},
- true,
- {rowCount + 1, 1, 1},
- true
- );
+
+ ui32 base = 1;
+ for (ui32 i = 0; i < ranges; ++i) {
+ ui32 count = rowCount / ranges;
+ if (i < (rowCount % ranges)) {
+ ++count;
+ }
+ AddRangeQuery<ui32>(
+ *request,
+ {base, 1, 1},
+ true,
+ {base + count - 1, Max<ui32>(), Max<ui32>()},
+ true
+ );
+ base += count;
+ }
request->Record.SetMaxRowsInResult(chunkSize);
+ if (limit != Max<ui32>()) {
+ request->Record.SetTotalRowsLimit(limit);
+ }
auto readResult = SendRead("table-1-many", request.release());
UNIT_ASSERT(readResult);
@@ -664,10 +676,12 @@ struct TTestHelper {
while (!readResult->Record.GetFinished()) {
readResult = WaitReadResult();
UNIT_ASSERT(readResult);
- rowsRead += readResult->GetRowsCount();
+ ui32 count = readResult->GetRowsCount();
+ UNIT_ASSERT_C(count > 0 || readResult->Record.GetFinished(), "Unexpected empty intermediate result");
+ rowsRead += count;
}
- UNIT_ASSERT_VALUES_EQUAL(rowsRead, rowCount);
+ UNIT_ASSERT_VALUES_EQUAL(rowsRead, Min(rowCount, limit));
}
struct THangedReturn {
@@ -1911,6 +1925,56 @@ Y_UNIT_TEST_SUITE(DataShardReadIterator) {
helper.TestChunkRead(99, 10000);
}
+ Y_UNIT_TEST(ShouldLimitReadRangeChunk1Limit100) {
+ TTestHelper helper;
+ helper.TestChunkRead(1, 1000, 1, 100);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit98) {
+ TTestHelper helper;
+ helper.TestChunkRead(99, 1000, 10, 98);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit99) {
+ TTestHelper helper;
+ helper.TestChunkRead(99, 1000, 10, 99);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit100) {
+ TTestHelper helper;
+ helper.TestChunkRead(99, 1000, 10, 100);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit101) {
+ TTestHelper helper;
+ helper.TestChunkRead(99, 1000, 10, 101);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit198) {
+ TTestHelper helper;
+ helper.TestChunkRead(99, 1000, 10, 198);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit900) {
+ TTestHelper helper;
+ helper.TestChunkRead(99, 1000, 10, 900);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk100Limit900) {
+ TTestHelper helper;
+ helper.TestChunkRead(100, 1000, 10, 900);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk100Limit1000) {
+ TTestHelper helper;
+ helper.TestChunkRead(100, 1000, 10, 1000);
+ }
+
+ Y_UNIT_TEST(ShouldLimitRead10RangesChunk100Limit1001) {
+ TTestHelper helper;
+ helper.TestChunkRead(100, 1000, 10, 1001);
+ }
+
Y_UNIT_TEST(ShouldReadKeyPrefix1) {
TTestHelper helper;
diff --git a/ydb/core/tx/datashard/export_s3_base_uploader.h b/ydb/core/tx/datashard/export_s3_base_uploader.h
index 8ebc1a79ff..5e84ae1920 100644
--- a/ydb/core/tx/datashard/export_s3_base_uploader.h
+++ b/ydb/core/tx/datashard/export_s3_base_uploader.h
@@ -269,14 +269,21 @@ protected:
<< ": self# " << this->SelfId()
<< ", result# " << result);
- if (!result.IsSuccess()) {
- const auto& error = result.GetError();
- if (error.GetErrorType() != Aws::S3::S3Errors::NO_SUCH_UPLOAD) {
- Error = error.GetMessage().c_str();
- }
+ if (result.IsSuccess()) {
+ return PassAway();
+ }
+
+ const auto& error = result.GetError();
+ if (error.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_UPLOAD) {
+ return PassAway();
}
- PassAway();
+ if (CanRetry(error)) {
+ Retry();
+ } else {
+ Error = error.GetMessage().c_str();
+ PassAway();
+ }
}
void Handle(TEvExternalStorage::TEvAbortMultipartUploadResponse::TPtr& ev) {
@@ -286,13 +293,19 @@ protected:
<< ": self# " << this->SelfId()
<< ", result# " << result);
- if (!result.IsSuccess()) {
+ if (result.IsSuccess()) {
+ return PassAway();
+ }
+
+ const auto& error = result.GetError();
+ if (CanRetry(error)) {
+ Retry();
+ } else {
Y_VERIFY(Error);
Error = TStringBuilder() << *Error << " Additionally, 'AbortMultipartUpload' has failed: "
- << result.GetError().GetMessage();
+ << error.GetMessage();
+ PassAway();
}
-
- PassAway();
}
template <typename TResult>
@@ -321,12 +334,19 @@ protected:
return false;
}
- void RetryOrFinish(const Aws::S3::S3Error& error) {
- if (Attempt++ < Retries && ShouldRetry(error)) {
- Delay = Min(Delay * Attempt, TDuration::Minutes(10));
- const TDuration random = TDuration::FromValue(TAppData::RandomProvider->GenRand64() % Delay.MicroSeconds());
+ bool CanRetry(const Aws::S3::S3Error& error) const {
+ return Attempt < Retries && ShouldRetry(error);
+ }
- this->Schedule(Delay + random, new TEvents::TEvWakeup());
+ void Retry() {
+ Delay = Min(Delay * ++Attempt, TDuration::Minutes(10));
+ const TDuration random = TDuration::FromValue(TAppData::RandomProvider->GenRand64() % Delay.MicroSeconds());
+ this->Schedule(Delay + random, new TEvents::TEvWakeup());
+ }
+
+ void RetryOrFinish(const Aws::S3::S3Error& error) {
+ if (CanRetry(error)) {
+ Retry();
} else {
Finish(false, TStringBuilder() << "S3 error: " << error.GetMessage().c_str());
}
diff --git a/ydb/core/tx/datashard/read_iterator.h b/ydb/core/tx/datashard/read_iterator.h
index c2fae3e0cd..aeda03decb 100644
--- a/ydb/core/tx/datashard/read_iterator.h
+++ b/ydb/core/tx/datashard/read_iterator.h
@@ -182,6 +182,10 @@ public:
TQuota Quota;
+ // Number of rows processed so far
+ ui64 TotalRows = 0;
+ ui64 TotalRowsLimit = Max<ui64>();
+
// items are running total,
// first item corresponds to SeqNo = LastAckSeqNo + 1,
// i.e. [LastAckSeqNo + 1; SeqNo]
diff --git a/ydb/core/tx/scheme_board/cache.cpp b/ydb/core/tx/scheme_board/cache.cpp
index 016a9a40b9..31fc79312b 100644
--- a/ydb/core/tx/scheme_board/cache.cpp
+++ b/ydb/core/tx/scheme_board/cache.cpp
@@ -1887,6 +1887,10 @@ class TSchemeCache: public TMonitorableActor<TSchemeCache> {
entry.Kind = TableKind;
entry.DomainInfo = DomainInfo;
+ if (Self) {
+ entry.GeneralVersion = Self->Info.GetVersion().GetGeneralVersion();
+ }
+
if (!CheckColumns(context, entry, KeyColumnTypes, Columns)) {
return;
}
diff --git a/ydb/core/tx/scheme_cache/scheme_cache.h b/ydb/core/tx/scheme_cache/scheme_cache.h
index d8a174d221..62fdbb0ca0 100644
--- a/ydb/core/tx/scheme_cache/scheme_cache.h
+++ b/ydb/core/tx/scheme_cache/scheme_cache.h
@@ -319,6 +319,7 @@ struct TSchemeCacheRequest {
EStatus Status = EStatus::Unknown;
EKind Kind = EKind::KindUnknown;
TIntrusivePtr<TDomainInfo> DomainInfo;
+ ui64 GeneralVersion = 0;
explicit TEntry(THolder<TKeyDesc> keyDesc)
: KeyDescription(std::move(keyDesc))
diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp
index 539320d087..132c62ba12 100644
--- a/ydb/core/tx/schemeshard/schemeshard__init.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp
@@ -2958,6 +2958,10 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
} else {
stream->DoneShards.insert(shardIdx);
}
+
+ if (!rowset.Next()) {
+ return false;
+ }
}
}
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation.cpp b/ydb/core/tx/schemeshard/schemeshard__operation.cpp
index f597e74b1e..ca3d30b0ea 100644
--- a/ydb/core/tx/schemeshard/schemeshard__operation.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__operation.cpp
@@ -656,14 +656,7 @@ TOperation::TSplitTransactionsResult TOperation::SplitIntoTransactions(const TTx
}
if (checks && !exists) {
- checks
- .IsValidLeafName()
- .DepthLimit()
- .PathsLimit();
- }
-
- if (checks && !exists && path.Parent().IsResolved()) {
- checks.DirChildrenLimit();
+ checks.IsValidLeafName();
}
if (!checks) {
@@ -764,14 +757,7 @@ TOperation::TSplitTransactionsResult TOperation::SplitIntoTransactions(const TTx
}
if (checks) {
- checks
- .IsValidLeafName()
- .DepthLimit()
- .PathsLimit(result.Transactions.size() + 1);
- }
-
- if (checks && path.Parent().IsResolved()) {
- checks.DirChildrenLimit();
+ checks.IsValidLeafName();
}
if (!checks) {
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp
index 1c46c00733..86b49463d1 100644
--- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp
@@ -259,7 +259,13 @@ public:
}
TChannelsBindings channelsBinding;
- if (!context.SS->ResolveSolomonChannels(channelProfileId, path.GetPathIdForDomain(), channelsBinding)) {
+ bool isResolved = false;
+ if (alter.HasStorageConfig()) {
+ isResolved = context.SS->ResolveSolomonChannels(alter.GetStorageConfig(), path.GetPathIdForDomain(), channelsBinding);
+ } else {
+ isResolved = context.SS->ResolveSolomonChannels(channelProfileId, path.GetPathIdForDomain(), channelsBinding);
+ }
+ if (!isResolved) {
result->SetError(NKikimrScheme::StatusInvalidParameter, "Unable to construct channel binding with the storage pool");
return result;
}
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp
index cf474204e2..df4b974b9b 100644
--- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp
@@ -366,7 +366,7 @@ public:
}
if (checks) {
- if (!parent.Base()->IsTableIndex()) {
+ if (!parent.Base()->IsTableIndex() && !isBackup) {
checks.DepthLimit();
}
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp
index 5f216399ac..58851357a9 100644
--- a/ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp
@@ -23,19 +23,27 @@ bool PrepareSchema(NKikimrSchemeOp::TColumnTableSchema& proto, TOlapSchema& sche
return schema.Parse(proto, errStr, allowNullableKeys);
}
+NKikimrSchemeOp::TColumnTableSharding DefaultSharding() {
+ NKikimrSchemeOp::TColumnTableSharding sharding;
+ auto* hashSharding = sharding.MutableHashSharding();
+ hashSharding->SetFunction(NKikimrSchemeOp::TColumnTableSharding::THashSharding::HASH_FUNCTION_MODULO_N);
+ return sharding;
+}
+
bool SetSharding(const TOlapSchema& schema, NKikimrSchemeOp::TColumnTableDescription& op,
TColumnTableInfo::TPtr tableInfo,
TEvSchemeShard::EStatus& status, TString& errStr)
{
- ui32 shardsCount = Max(ui32(1), op.GetColumnShardCount());
+ ui32 shardsCount = op.GetColumnShardCount();
+ if (!shardsCount) {
+ status = NKikimrScheme::StatusSchemeError;
+ errStr = Sprintf("Shards count is zero");
+ return false;
+ }
if (op.HasSharding()) {
tableInfo->Sharding = std::move(*op.MutableSharding());
- } else if (shardsCount < 2) {
- tableInfo->Sharding.MutableRandomSharding();
} else {
- status = NKikimrScheme::StatusSchemeError;
- errStr = Sprintf("Sharding is not set");
- return false;
+ tableInfo->Sharding = DefaultSharding();
}
op.ClearSharding();
@@ -51,8 +59,11 @@ bool SetSharding(const TOlapSchema& schema, NKikimrSchemeOp::TColumnTableDescrip
case NKikimrSchemeOp::TColumnTableSharding::kHashSharding: {
auto& sharding = *tableInfo->Sharding.MutableHashSharding();
if (sharding.ColumnsSize() == 0) {
+ sharding.MutableColumns()->CopyFrom(tableInfo->Description.GetSchema().GetKeyColumnNames());
+ }
+ if (shardsCount > 1 && sharding.ColumnsSize() == 0) {
status = NKikimrScheme::StatusSchemeError;
- errStr = Sprintf("Hash sharding requires a non-empty list of columns");
+ errStr = Sprintf("Hash sharding requires a non-empty list of columns or primary key specified");
return false;
}
bool keysOnly = true;
@@ -591,7 +602,11 @@ public:
const auto acceptExisted = !Transaction.GetFailOnExist();
const TString& parentPathStr = Transaction.GetWorkingDir();
- auto& createDescription = Transaction.GetCreateColumnTable();
+ auto createDescription = Transaction.GetCreateColumnTable();
+ if (!createDescription.HasColumnShardCount()) {
+ static constexpr ui32 DEFAULT_SHARDS_COUNT = 64;
+ createDescription.SetColumnShardCount(DEFAULT_SHARDS_COUNT);
+ }
const TString& name = createDescription.GetName();
const ui32 shardsCount = Max(ui32(1), createDescription.GetColumnShardCount());
auto opTxId = OperationId.GetTxId();
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp
index e742ead739..9d9f30ad45 100644
--- a/ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp
@@ -19,15 +19,15 @@ bool ValidateConfig(const NKikimrSchemeOp::TCreateSolomonVolume& op,
return false;
}
if (op.GetPartitionCount()) {
- if (!op.HasChannelProfileId()) {
- errStr = "set channel profile id, please";
+ if (!op.HasChannelProfileId() && !op.HasStorageConfig()) {
+ errStr = "set storage config, please";
status = TEvSchemeShard::EStatus::StatusInvalidParameter;
}
return true;
}
- if (op.HasChannelProfileId()) {
- errStr = "don't set channel profile id, please. We are going to adopt already created tablets";
+ if (op.HasChannelProfileId() || op.HasStorageConfig()) {
+ errStr = "don't set channel profile id or storage config, please. We are going to adopt already created tablets";
status = TEvSchemeShard::EStatus::StatusInvalidParameter;
}
@@ -330,9 +330,17 @@ public:
const bool adoptingTablets = solomonDescription.AdoptedPartitionsSize() > 0;
TChannelsBindings channelsBinding;
- if (!adoptingTablets && !context.SS->ResolveSolomonChannels(channelProfileId, dstPath.GetPathIdForDomain(), channelsBinding)) {
- result->SetError(NKikimrScheme::StatusInvalidParameter, "Unable to construct channel binding with the storage pool");
- return result;
+ if (!adoptingTablets) {
+ bool isResolved = false;
+ if (solomonDescription.HasStorageConfig()) {
+ isResolved = context.SS->ResolveSolomonChannels(solomonDescription.GetStorageConfig(), dstPath.GetPathIdForDomain(), channelsBinding);
+ } else {
+ isResolved = context.SS->ResolveSolomonChannels(channelProfileId, dstPath.GetPathIdForDomain(), channelsBinding);
+ }
+ if (!isResolved) {
+ result->SetError(NKikimrScheme::StatusInvalidParameter, "Unable to construct channel binding with the storage pool");
+ return result;
+ }
}
dstPath.MaterializeLeaf(owner);
diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp
index 3fa3d86e86..8ca973d696 100644
--- a/ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp
@@ -158,10 +158,14 @@ public:
if (checks) {
checks
.IsValidLeafName()
+ .IsValidACL(acl);
+ }
+
+ if (checks && !context.SS->SystemBackupSIDs.contains(owner)) {
+ checks
.DepthLimit()
.PathsLimit()
- .DirChildrenLimit()
- .IsValidACL(acl);
+ .DirChildrenLimit();
}
if (!checks) {
diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
index b58d3a500f..6d51b93e7e 100644
--- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp
@@ -481,7 +481,7 @@ struct TSchemeShard::TIndexBuilder::TTxReply: public TSchemeShard::TIndexBuilder
private:
TEvTxAllocatorClient::TEvAllocateResult::TPtr AllocateResult;
TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr ModifyResult;
- TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr Notification;
+ TTxId CompletedTxId = InvalidTxId;
TEvDataShard::TEvBuildIndexProgressResponse::TPtr ShardProgress;
struct {
TIndexBuildId BuildIndexId;
@@ -502,9 +502,9 @@ public:
{
}
- explicit TTxReply(TSelf* self, TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& notification)
+ explicit TTxReply(TSelf* self, TTxId completedTxId)
: TSchemeShard::TIndexBuilder::TTxBase(self)
- , Notification(notification)
+ , CompletedTxId(completedTxId)
{
}
@@ -530,7 +530,7 @@ public:
return OnAllocation(txc, ctx);
} else if (ModifyResult) {
return OnModifyResult(txc, ctx);
- } else if (Notification) {
+ } else if (CompletedTxId) {
return OnNotification(txc, ctx);
} else if (ShardProgress) {
return OnProgress(txc, ctx);
@@ -773,12 +773,10 @@ public:
}
bool OnNotification(TTransactionContext& txc, const TActorContext&) {
- const auto& record = Notification->Get()->Record;
-
- const auto txId = TTxId(record.GetTxId());
+ const auto txId = CompletedTxId;
if (!Self->TxIdToIndexBuilds.contains(txId)) {
LOG_I("TTxReply : TEvNotifyTxCompletionResult superfluous message"
- << ", txId: " << record.GetTxId()
+ << ", txId: " << txId
<< ", buildInfoId not found");
return true;
}
@@ -788,10 +786,10 @@ public:
TIndexBuildInfo::TPtr buildInfo = Self->IndexBuilds.at(buildId);
LOG_I("TTxReply : TEvNotifyTxCompletionResult"
- << ", txId# " << record.GetTxId()
+ << ", txId# " << txId
<< ", buildInfoId: " << buildInfo->Id);
LOG_D("TTxReply : TEvNotifyTxCompletionResult"
- << ", txId# " << record.GetTxId()
+ << ", txId# " << txId
<< ", buildInfo: " << *buildInfo);
switch (buildInfo->State) {
@@ -1280,8 +1278,8 @@ ITransaction* TSchemeShard::CreateTxReply(TEvSchemeShard::TEvModifySchemeTransac
return new TIndexBuilder::TTxReply(this, modifyResult);
}
-ITransaction* TSchemeShard::CreateTxReply(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& notification) {
- return new TIndexBuilder::TTxReply(this, notification);
+ITransaction* TSchemeShard::CreateTxReply(TTxId completedTxId) {
+ return new TIndexBuilder::TTxReply(this, completedTxId);
}
ITransaction* TSchemeShard::CreateTxReply(TEvDataShard::TEvBuildIndexProgressResponse::TPtr& progress) {
diff --git a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp
index 210c80b84b..30ac978ff0 100644
--- a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp
@@ -226,7 +226,7 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase
ui64 Id;
TEvTxAllocatorClient::TEvAllocateResult::TPtr AllocateResult = nullptr;
TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr ModifyResult = nullptr;
- TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr NotifyResult = nullptr;
+ TTxId CompletedTxId = InvalidTxId;
explicit TTxProgress(TSelf* self, ui64 id)
: TXxport::TTxBase(self)
@@ -246,9 +246,9 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase
{
}
- explicit TTxProgress(TSelf* self, TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev)
+ explicit TTxProgress(TSelf* self, TTxId completedTxId)
: TXxport::TTxBase(self)
- , NotifyResult(ev)
+ , CompletedTxId(completedTxId)
{
}
@@ -263,7 +263,7 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase
OnAllocateResult(txc, ctx);
} else if (ModifyResult) {
OnModifyResult(txc, ctx);
- } else if (NotifyResult) {
+ } else if (CompletedTxId) {
OnNotifyResult(txc, ctx);
} else {
Resume(txc, ctx);
@@ -427,6 +427,10 @@ private:
return InvalidTxId;
}
+ if (!ItemPathId(Self, exportInfo, 0)) {
+ return InvalidTxId;
+ }
+
return path->LastTxId;
}
@@ -782,7 +786,7 @@ private:
SubscribeTx(path->LastTxId);
Y_VERIFY_DEBUG(itemIdx == Max<ui32>());
- Self->TxIdToExport[path->LastTxId] = {exportInfo->Id, itemIdx};
+ Self->TxIdToDependentExport[path->LastTxId].insert(exportInfo->Id);
}
}
@@ -854,30 +858,47 @@ private:
}
void OnNotifyResult(TTransactionContext& txc, const TActorContext&) {
- Y_VERIFY(NotifyResult);
- const auto& record = NotifyResult->Get()->Record;
-
+ Y_VERIFY(CompletedTxId);
LOG_D("TExport::TTxProgress: OnNotifyResult"
- << ": txId# " << record.GetTxId());
+ << ": txId# " << CompletedTxId);
- const auto txId = TTxId(record.GetTxId());
- if (!Self->TxIdToExport.contains(txId)) {
+ const auto txId = CompletedTxId;
+ if (!Self->TxIdToExport.contains(txId) && !Self->TxIdToDependentExport.contains(txId)) {
LOG_E("TExport::TTxProgress: OnNotifyResult received unknown txId"
<< ": txId# " << txId);
return;
}
- ui64 id;
- ui32 itemIdx;
- std::tie(id, itemIdx) = Self->TxIdToExport.at(txId);
+ if (Self->TxIdToExport.contains(txId)) {
+ ui64 id;
+ ui32 itemIdx;
+ std::tie(id, itemIdx) = Self->TxIdToExport.at(txId);
+
+ OnNotifyResult(txId, id, itemIdx, txc);
+ Self->TxIdToExport.erase(txId);
+ }
+
+ if (Self->TxIdToDependentExport.contains(txId)) {
+ for (const auto id : Self->TxIdToDependentExport.at(txId)) {
+ OnNotifyResult(txId, id, Max<ui32>(), txc);
+ }
+
+ Self->TxIdToDependentExport.erase(txId);
+ }
+ }
+
+ void OnNotifyResult(TTxId txId, ui64 id, ui32 itemIdx, TTransactionContext& txc) {
+ LOG_D("TExport::TTxProgress: OnNotifyResult"
+ << ": txId# " << txId
+ << ", id# " << id
+ << ", itemIdx# " << itemIdx);
+
if (!Self->Exports.contains(id)) {
LOG_E("TExport::TTxProgress: OnNotifyResult received unknown id"
<< ": id# " << id);
return;
}
- Self->TxIdToExport.erase(txId);
-
TExportInfo::TPtr exportInfo = Self->Exports.at(id);
NIceDb::TNiceDb db(txc.DB);
@@ -977,8 +998,8 @@ ITransaction* TSchemeShard::CreateTxProgressExport(TEvSchemeShard::TEvModifySche
return new TExport::TTxProgress(this, ev);
}
-ITransaction* TSchemeShard::CreateTxProgressExport(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev) {
- return new TExport::TTxProgress(this, ev);
+ITransaction* TSchemeShard::CreateTxProgressExport(TTxId completedTxId) {
+ return new TExport::TTxProgress(this, completedTxId);
}
} // NSchemeShard
diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp
index d2074340a7..77d7f40952 100644
--- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp
@@ -824,6 +824,29 @@ bool TSchemeShard::ResolveRtmrChannels(const TPathId domainId, TChannelsBindings
return ResolveChannelCommon(profileId, domainId, channelsBinding, &ResolveChannelsDetailsAsIs);
}
+bool TSchemeShard::ResolveSolomonChannels(const NKikimrSchemeOp::TKeyValueStorageConfig &config, const TPathId domainId, TChannelsBindings& channelsBinding) const
+{
+ TSubDomainInfo::TPtr domainInfo = SubDomains.at(domainId);
+ auto& storagePools = domainInfo->EffectiveStoragePools();
+
+ if (!storagePools) {
+ // no storage pool no binding it's Ok
+ channelsBinding.clear();
+ return false;
+ }
+
+ auto getPoolKind = [&] (ui32 channel) {
+ return TStringBuf(config.GetChannel(channel).GetPreferredPoolKind());
+ };
+
+ return ResolvePoolNames(
+ config.ChannelSize(),
+ getPoolKind,
+ storagePools,
+ channelsBinding
+ );
+}
+
bool TSchemeShard::ResolveSolomonChannels(ui32 profileId, const TPathId domainId, TChannelsBindings &channelsBinding) const
{
return ResolveChannelCommon(profileId, domainId, channelsBinding, &ResolveChannelsDetailsAsIs);
@@ -5847,13 +5870,23 @@ void TSchemeShard::Handle(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev,
"Message:\n" << ev->Get()->Record.ShortDebugString());
const auto txId = TTxId(ev->Get()->Record.GetTxId());
+ bool executed = false;
+
+ if (TxIdToExport.contains(txId) || TxIdToDependentExport.contains(txId)) {
+ Execute(CreateTxProgressExport(txId), ctx);
+ executed = true;
+ }
+ if (TxIdToImport.contains(txId)) {
+ Execute(CreateTxProgressImport(txId), ctx);
+ executed = true;
+ }
+ if (TxIdToIndexBuilds.contains(txId)) {
+ Execute(CreateTxReply(txId), ctx);
+ executed = true;
+ }
- if (TxIdToExport.contains(txId)) {
- return Execute(CreateTxProgressExport(ev), ctx);
- } else if (TxIdToImport.contains(txId)) {
- return Execute(CreateTxProgressImport(ev), ctx);
- } else if (TxIdToIndexBuilds.contains(txId)) {
- return Execute(CreateTxReply(ev), ctx);
+ if (executed) {
+ return;
}
LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD,
diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h
index 5299fcb42b..a64beea609 100644
--- a/ydb/core/tx/schemeshard/schemeshard_impl.h
+++ b/ydb/core/tx/schemeshard/schemeshard_impl.h
@@ -438,6 +438,7 @@ public:
bool ResolveTabletChannels(ui32 profileId, const TPathId domainId, TChannelsBindings& channelsBinding) const;
bool ResolveRtmrChannels(const TPathId domainId, TChannelsBindings& channelsBinding) const;
bool ResolveSolomonChannels(ui32 profileId, const TPathId domainId, TChannelsBindings& channelsBinding) const;
+ bool ResolveSolomonChannels(const NKikimrSchemeOp::TKeyValueStorageConfig &config, const TPathId domainId, TChannelsBindings& channelsBinding) const;
bool ResolvePqChannels(ui32 profileId, const TPathId domainId, TChannelsBindings& channelsBinding) const;
bool ResolveChannelsByPoolKinds(
const TVector<TStringBuf>& channelPoolKinds,
@@ -1019,6 +1020,7 @@ public:
THashMap<ui64, TExportInfo::TPtr> Exports;
THashMap<TString, TExportInfo::TPtr> ExportsByUid;
THashMap<TTxId, std::pair<ui64, ui32>> TxIdToExport;
+ THashMap<TTxId, THashSet<ui64>> TxIdToDependentExport;
void FromXxportInfo(NKikimrExport::TExport& exprt, const TExportInfo::TPtr exportInfo);
@@ -1049,7 +1051,7 @@ public:
NTabletFlatExecutor::ITransaction* CreateTxProgressExport(ui64 id);
NTabletFlatExecutor::ITransaction* CreateTxProgressExport(TEvTxAllocatorClient::TEvAllocateResult::TPtr& ev);
NTabletFlatExecutor::ITransaction* CreateTxProgressExport(TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr& ev);
- NTabletFlatExecutor::ITransaction* CreateTxProgressExport(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev);
+ NTabletFlatExecutor::ITransaction* CreateTxProgressExport(TTxId completedTxId);
void Handle(TEvExport::TEvCreateExportRequest::TPtr& ev, const TActorContext& ctx);
void Handle(TEvExport::TEvGetExportRequest::TPtr& ev, const TActorContext& ctx);
@@ -1098,7 +1100,7 @@ public:
NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TEvTxAllocatorClient::TEvAllocateResult::TPtr& ev);
NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr& ev);
NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TEvIndexBuilder::TEvCreateResponse::TPtr& ev);
- NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev);
+ NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TTxId completedTxId);
void Handle(TEvImport::TEvCreateImportRequest::TPtr& ev, const TActorContext& ctx);
void Handle(TEvImport::TEvGetImportRequest::TPtr& ev, const TActorContext& ctx);
@@ -1174,7 +1176,7 @@ public:
NTabletFlatExecutor::ITransaction* CreateTxProgress(TIndexBuildId id);
NTabletFlatExecutor::ITransaction* CreateTxReply(TEvTxAllocatorClient::TEvAllocateResult::TPtr& allocateResult);
NTabletFlatExecutor::ITransaction* CreateTxReply(TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr& modifyResult);
- NTabletFlatExecutor::ITransaction* CreateTxReply(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& modifyResult);
+ NTabletFlatExecutor::ITransaction* CreateTxReply(TTxId completedTxId);
NTabletFlatExecutor::ITransaction* CreateTxReply(TEvDataShard::TEvBuildIndexProgressResponse::TPtr& progress);
NTabletFlatExecutor::ITransaction* CreatePipeRetry(TIndexBuildId indexBuildId, TTabletId tabletId);
NTabletFlatExecutor::ITransaction* CreateTxBilling(TEvPrivate::TEvIndexBuildingMakeABill::TPtr& ev);
diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp
index a3a879f1d3..b47201c311 100644
--- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp
+++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp
@@ -224,7 +224,7 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase
TEvTxAllocatorClient::TEvAllocateResult::TPtr AllocateResult = nullptr;
TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr ModifyResult = nullptr;
TEvIndexBuilder::TEvCreateResponse::TPtr CreateIndexResult = nullptr;
- TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr NotifyResult = nullptr;
+ TTxId CompletedTxId = InvalidTxId;
explicit TTxProgress(TSelf* self, ui64 id, const TMaybe<ui32>& itemIdx)
: TXxport::TTxBase(self)
@@ -257,9 +257,9 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase
{
}
- explicit TTxProgress(TSelf* self, TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev)
+ explicit TTxProgress(TSelf* self, TTxId completedTxId)
: TXxport::TTxBase(self)
- , NotifyResult(ev)
+ , CompletedTxId(completedTxId)
{
}
@@ -278,7 +278,7 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase
OnModifyResult(txc, ctx);
} else if (CreateIndexResult) {
OnCreateIndexResult(txc, ctx);
- } else if (NotifyResult) {
+ } else if (CompletedTxId) {
OnNotifyResult(txc, ctx);
} else {
Resume(txc, ctx);
@@ -908,13 +908,11 @@ private:
}
void OnNotifyResult(TTransactionContext& txc, const TActorContext&) {
- Y_VERIFY(NotifyResult);
- const auto& record = NotifyResult->Get()->Record;
-
+ Y_VERIFY(CompletedTxId);
LOG_D("TImport::TTxProgress: OnNotifyResult"
- << ": txId# " << record.GetTxId());
+ << ": txId# " << CompletedTxId);
- const auto txId = TTxId(record.GetTxId());
+ const auto txId = CompletedTxId;
if (!Self->TxIdToImport.contains(txId)) {
LOG_E("TImport::TTxProgress: OnNotifyResult received unknown txId"
<< ": txId# " << txId);
@@ -1018,8 +1016,8 @@ ITransaction* TSchemeShard::CreateTxProgressImport(TEvIndexBuilder::TEvCreateRes
return new TImport::TTxProgress(this, ev);
}
-ITransaction* TSchemeShard::CreateTxProgressImport(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev) {
- return new TImport::TTxProgress(this, ev);
+ITransaction* TSchemeShard::CreateTxProgressImport(TTxId completedTxId) {
+ return new TImport::TTxProgress(this, completedTxId);
}
} // NSchemeShard
diff --git a/ydb/core/tx/schemeshard/ut_export.cpp b/ydb/core/tx/schemeshard/ut_export.cpp
index 907e564aef..4c17ba3d2a 100644
--- a/ydb/core/tx/schemeshard/ut_export.cpp
+++ b/ydb/core/tx/schemeshard/ut_export.cpp
@@ -1071,7 +1071,173 @@ partitioning_settings {
TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::SUCCESS);
}
- Y_UNIT_TEST(ShouldCheckQuotas) {
+ Y_UNIT_TEST(ShouldSucceedOnConcurrentExport) {
+ TTestBasicRuntime runtime;
+ TTestEnv env(runtime);
+ ui64 txId = 100;
+
+ TestCreateTable(runtime, ++txId, "/MyRoot", R"(
+ Name: "Table"
+ Columns { Name: "key" Type: "Utf8" }
+ Columns { Name: "value" Type: "Utf8" }
+ KeyColumnNames: ["key"]
+ )");
+ env.TestWaitNotification(runtime, txId);
+
+ TPortManager portManager;
+ const ui16 port = portManager.GetPort();
+
+ TS3Mock s3Mock({}, TS3Mock::TSettings(port));
+ UNIT_ASSERT(s3Mock.Start());
+
+ TVector<THolder<IEventHandle>> copyTables;
+ auto origObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) {
+ if (ev->GetTypeRewrite() == TEvSchemeShard::EvModifySchemeTransaction) {
+ const auto& record = ev->Get<TEvSchemeShard::TEvModifySchemeTransaction>()->Record;
+ if (record.GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateConsistentCopyTables) {
+ copyTables.emplace_back(ev.Release());
+ return TTestActorRuntime::EEventAction::DROP;
+ }
+ }
+ return TTestActorRuntime::EEventAction::PROCESS;
+ });
+ auto waitCopyTables = [&runtime, &copyTables](ui32 size) {
+ if (copyTables.size() != size) {
+ TDispatchOptions opts;
+ opts.FinalEvents.emplace_back([&copyTables, size](IEventHandle&) -> bool {
+ return copyTables.size() == size;
+ });
+ runtime.DispatchEvents(opts);
+ }
+ };
+
+ TVector<ui64> exportIds;
+ for (ui32 i = 1; i <= 3; ++i) {
+ exportIds.push_back(++txId);
+ TestExport(runtime, exportIds[i - 1], "/MyRoot", Sprintf(R"(
+ ExportToS3Settings {
+ endpoint: "localhost:%d"
+ scheme: HTTP
+ items {
+ source_path: "/MyRoot/Table"
+ destination_prefix: "Table%u"
+ }
+ }
+ )", port, i));
+ waitCopyTables(i);
+ }
+
+ runtime.SetObserverFunc(origObserver);
+ for (auto& ev : copyTables) {
+ runtime.Send(ev.Release(), 0, true);
+ }
+
+ for (ui64 exportId : exportIds) {
+ env.TestWaitNotification(runtime, exportId);
+ TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::SUCCESS);
+ }
+ }
+
+ Y_UNIT_TEST(ShouldSucceedOnConcurrentImport) {
+ TTestBasicRuntime runtime;
+ TTestEnv env(runtime);
+ ui64 txId = 100;
+
+ TestCreateTable(runtime, ++txId, "/MyRoot", R"(
+ Name: "Table"
+ Columns { Name: "key" Type: "Utf8" }
+ Columns { Name: "value" Type: "Utf8" }
+ KeyColumnNames: ["key"]
+ )");
+ env.TestWaitNotification(runtime, txId);
+
+ TPortManager portManager;
+ const ui16 port = portManager.GetPort();
+
+ TS3Mock s3Mock({}, TS3Mock::TSettings(port));
+ UNIT_ASSERT(s3Mock.Start());
+
+ // prepare backup data
+ TestExport(runtime, ++txId, "/MyRoot", Sprintf(R"(
+ ExportToS3Settings {
+ endpoint: "localhost:%d"
+ scheme: HTTP
+ items {
+ source_path: "/MyRoot/Table"
+ destination_prefix: "Backup1"
+ }
+ }
+ )", port));
+ env.TestWaitNotification(runtime, txId);
+ TestGetExport(runtime, txId, "/MyRoot");
+
+ TVector<THolder<IEventHandle>> delayed;
+ auto origObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) {
+ if (ev->GetTypeRewrite() == TEvSchemeShard::EvModifySchemeTransaction) {
+ const auto& record = ev->Get<TEvSchemeShard::TEvModifySchemeTransaction>()->Record;
+ const auto opType = record.GetTransaction(0).GetOperationType();
+ switch (opType) {
+ case NKikimrSchemeOp::ESchemeOpRestore:
+ case NKikimrSchemeOp::ESchemeOpCreateConsistentCopyTables:
+ delayed.emplace_back(ev.Release());
+ return TTestActorRuntime::EEventAction::DROP;
+ default:
+ break;
+ }
+ }
+ return TTestActorRuntime::EEventAction::PROCESS;
+ });
+
+ auto waitForDelayed = [&runtime, &delayed](ui32 size) {
+ if (delayed.size() != size) {
+ TDispatchOptions opts;
+ opts.FinalEvents.emplace_back([&delayed, size](IEventHandle&) -> bool {
+ return delayed.size() == size;
+ });
+ runtime.DispatchEvents(opts);
+ }
+ };
+
+ const auto importId = ++txId;
+ TestImport(runtime, importId, "/MyRoot", Sprintf(R"(
+ ImportFromS3Settings {
+ endpoint: "localhost:%d"
+ scheme: HTTP
+ items {
+ source_prefix: "Backup1"
+ destination_path: "/MyRoot/Restored"
+ }
+ }
+ )", port));
+ // wait for restore op
+ waitForDelayed(1);
+
+ const auto exportId = ++txId;
+ TestExport(runtime, exportId, "/MyRoot", Sprintf(R"(
+ ExportToS3Settings {
+ endpoint: "localhost:%d"
+ scheme: HTTP
+ items {
+ source_path: "/MyRoot/Restored"
+ destination_prefix: "Backup2"
+ }
+ }
+ )", port));
+ // wait for copy table op
+ waitForDelayed(2);
+
+ runtime.SetObserverFunc(origObserver);
+ for (auto& ev : delayed) {
+ runtime.Send(ev.Release(), 0, true);
+ }
+
+ env.TestWaitNotification(runtime, importId);
+ TestGetImport(runtime, importId, "/MyRoot");
+ env.TestWaitNotification(runtime, exportId);
+ TestGetExport(runtime, exportId, "/MyRoot");
+ }
+
+ void ShouldCheckQuotas(const TSchemeLimits& limits, Ydb::StatusIds::StatusCode expectedFailStatus) {
TPortManager portManager;
const ui16 port = portManager.GetPort();
@@ -1082,9 +1248,7 @@ partitioning_settings {
TTestBasicRuntime runtime;
TTestEnv env(runtime, TTestEnvOptions().SystemBackupSIDs({userSID}));
- TSchemeLimits lowLimits;
- lowLimits.MaxExports = 0;
- SetSchemeshardSchemaLimits(runtime, lowLimits);
+ SetSchemeshardSchemaLimits(runtime, limits);
const TVector<TString> tables = {
R"(
@@ -1105,7 +1269,12 @@ partitioning_settings {
}
)", port);
- Run(runtime, env, tables, request, Ydb::StatusIds::PRECONDITION_FAILED);
+ Run(runtime, env, tables, request, expectedFailStatus);
Run(runtime, env, tables, request, Ydb::StatusIds::SUCCESS, "/MyRoot", false, userSID);
}
+
+ Y_UNIT_TEST(ShouldCheckQuotas) {
+ ShouldCheckQuotas(TSchemeLimits{.MaxExports = 0}, Ydb::StatusIds::PRECONDITION_FAILED);
+ ShouldCheckQuotas(TSchemeLimits{.MaxChildrenInDir = 1}, Ydb::StatusIds::CANCELLED);
+ }
}
diff --git a/ydb/core/tx/schemeshard/ut_olap.cpp b/ydb/core/tx/schemeshard/ut_olap.cpp
index bf2d24cf5e..84a972baaa 100644
--- a/ydb/core/tx/schemeshard/ut_olap.cpp
+++ b/ydb/core/tx/schemeshard/ut_olap.cpp
@@ -211,6 +211,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema = R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
)";
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", tableSchema);
@@ -223,6 +224,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Missing column from schema preset
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"(
Name: "ColumnTableMissingDataColumn"
+ ColumnShardCount: 1
Schema {
Columns { Name: "timestamp" Type: "Timestamp" }
KeyColumnNames: "timestamp"
@@ -233,6 +235,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Extra column not in schema preset
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"(
Name: "ColumnTableExtraColumn"
+ ColumnShardCount: 1
Schema {
Columns { Name: "timestamp" Type: "Timestamp" }
Columns { Name: "data" Type: "Utf8" }
@@ -245,6 +248,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Different column order
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"(
Name: "ColumnTableDifferentColumnOrder"
+ ColumnShardCount: 1
Schema {
Columns { Name: "data" Type: "Utf8" }
Columns { Name: "timestamp" Type: "Timestamp" }
@@ -256,6 +260,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Extra key column
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"(
Name: "ColumnTableExtraKeyColumn"
+ ColumnShardCount: 1
Schema {
Columns { Name: "timestamp" Type: "Timestamp" }
Columns { Name: "data" Type: "Utf8" }
@@ -268,6 +273,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Unknown key column
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"(
Name: "ColumnTableUnknownKeyColumn"
+ ColumnShardCount: 1
Schema {
Columns { Name: "timestamp" Type: "Timestamp" }
Columns { Name: "data" Type: "Utf8" }
@@ -279,6 +285,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Different data column type
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"(
Name: "ColumnTableDataColumnType"
+ ColumnShardCount: 1
Schema {
Columns { Name: "timestamp" Type: "Timestamp" }
Columns { Name: "data" Type: "String" }
@@ -290,6 +297,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Repeating preset schema should succeed
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"(
Name: "ColumnTableExplicitSchema"
+ ColumnShardCount: 1
Schema {
Columns { Name: "timestamp" Type: "Timestamp" }
Columns { Name: "data" Type: "Utf8" }
@@ -302,6 +310,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Creating table with directories should succeed
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore", R"(
Name: "DirA/DirB/NestedTable"
+ ColumnShardCount: 1
)");
env.TestWaitNotification(runtime, txId);
@@ -312,6 +321,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
// Additional storage tier in schema
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"(
Name: "TableWithTiers"
+ ColumnShardCount: 1
Schema {
Columns { Name: "timestamp" Type: "Timestamp" }
Columns { Name: "data" Type: "Utf8" }
@@ -333,6 +343,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore", R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
SchemaPresetName: "default"
)");
env.TestWaitNotification(runtime, txId);
@@ -362,6 +373,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema = R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
)";
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", tableSchema);
@@ -452,6 +464,67 @@ Y_UNIT_TEST_SUITE(TOlap) {
TestLsPathId(runtime, 4, NLs::PathStringEqual(""));
}
+ Y_UNIT_TEST(CreateDropStandaloneTableDefaultSharding) {
+ TTestBasicRuntime runtime;
+ TTestEnv env(runtime);
+ ui64 txId = 100;
+
+ TestMkDir(runtime, ++txId, "/MyRoot", "MyDir");
+ env.TestWaitNotification(runtime, txId);
+
+ TestLs(runtime, "/MyRoot/MyDir", false, NLs::PathExist);
+
+ TestCreateColumnTable(runtime, ++txId, "/MyRoot/MyDir", defaultTableSchema);
+ env.TestWaitNotification(runtime, txId);
+
+ TestLsPathId(runtime, 3, NLs::PathStringEqual("/MyRoot/MyDir/ColumnTable"));
+
+ TestDropColumnTable(runtime, ++txId, "/MyRoot/MyDir", "ColumnTable");
+ env.TestWaitNotification(runtime, txId);
+
+ TestLs(runtime, "/MyRoot/MyDir/ColumnTable", false, NLs::PathNotExist);
+ TestLsPathId(runtime, 3, NLs::PathStringEqual(""));
+
+ TString otherSchema = R"(
+ Name: "ColumnTable"
+ Schema {
+ Columns { Name: "timestamp" Type: "Timestamp" NotNull: true }
+ Columns { Name: "some" Type: "Uint64" NotNull: true }
+ Columns { Name: "data" Type: "Utf8" NotNull: true }
+ KeyColumnNames: "some"
+ KeyColumnNames: "data"
+ }
+ )";
+
+ TestCreateColumnTable(runtime, ++txId, "/MyRoot/MyDir", otherSchema);
+ env.TestWaitNotification(runtime, txId);
+
+ auto checkFn = [&](const NKikimrScheme::TEvDescribeSchemeResult& record) {
+ UNIT_ASSERT_VALUES_EQUAL(record.GetPath(), "/MyRoot/MyDir/ColumnTable");
+
+ auto& description = record.GetPathDescription().GetColumnTableDescription();
+ UNIT_ASSERT_VALUES_EQUAL(description.GetColumnShardCount(), 64);
+
+ auto& sharding = description.GetSharding();
+ UNIT_ASSERT_VALUES_EQUAL(sharding.ColumnShardsSize(), 64);
+ UNIT_ASSERT(sharding.HasHashSharding());
+ auto& hashSharding = sharding.GetHashSharding();
+ UNIT_ASSERT_VALUES_EQUAL(hashSharding.ColumnsSize(), 2);
+ UNIT_ASSERT_EQUAL(hashSharding.GetFunction(),
+ NKikimrSchemeOp::TColumnTableSharding::THashSharding::HASH_FUNCTION_MODULO_N);
+ UNIT_ASSERT_VALUES_EQUAL(hashSharding.GetColumns()[0], "some");
+ UNIT_ASSERT_VALUES_EQUAL(hashSharding.GetColumns()[1], "data");
+ };
+
+ TestLsPathId(runtime, 4, checkFn);
+
+ TestDropColumnTable(runtime, ++txId, "/MyRoot/MyDir", "ColumnTable");
+ env.TestWaitNotification(runtime, txId);
+
+ TestLs(runtime, "/MyRoot/MyDir/ColumnTable", false, NLs::PathNotExist);
+ TestLsPathId(runtime, 4, NLs::PathStringEqual(""));
+ }
+
Y_UNIT_TEST(CreateTableTtl) {
TTestBasicRuntime runtime;
TTestEnv env(runtime);
@@ -462,6 +535,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema1 = R"(
Name: "Table1"
+ ColumnShardCount: 1
TtlSettings {
Enabled { ColumnName: "timestamp" ExpireAfterSeconds: 300 }
}
@@ -478,6 +552,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema2 = R"(
Name: "Table2"
+ ColumnShardCount: 1
TtlSettings {
Disabled {}
}
@@ -494,6 +569,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema3 = R"(
Name: "Table3"
+ ColumnShardCount: 1
TtlSettings {
UseTiering : "Tiering1"
}
@@ -510,6 +586,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema4 = R"(
Name: "Table4"
+ ColumnShardCount: 1
TtlSettings {
UseTiering : "Tiering1"
}
@@ -531,6 +608,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchemaX = R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
TtlSettings {
Enabled {
ExpireAfterSeconds: 300
@@ -543,6 +621,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema = R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
TtlSettings {
Enabled {
ColumnName: "timestamp"
@@ -595,6 +674,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema = R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
TtlSettings {
Enabled {
ColumnName: "timestamp"
@@ -678,6 +758,7 @@ Y_UNIT_TEST_SUITE(TOlap) {
TString tableSchema = R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
)";
TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore", tableSchema);
diff --git a/ydb/core/tx/schemeshard/ut_olap_reboots.cpp b/ydb/core/tx/schemeshard/ut_olap_reboots.cpp
index 64bad70f3d..47ee8880c5 100644
--- a/ydb/core/tx/schemeshard/ut_olap_reboots.cpp
+++ b/ydb/core/tx/schemeshard/ut_olap_reboots.cpp
@@ -66,6 +66,7 @@ Y_UNIT_TEST_SUITE(TOlapReboots) {
TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
)");
t.TestEnv->TestWaitNotification(runtime, t.TxId);
@@ -111,6 +112,7 @@ Y_UNIT_TEST_SUITE(TOlapReboots) {
TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
)");
t.TestEnv->TestWaitNotification(runtime, t.TxId);
@@ -163,11 +165,13 @@ Y_UNIT_TEST_SUITE(TOlapReboots) {
t.TestEnv->ReliablePropose(runtime,
CreateColumnTableRequest(t.TxId += 2, "/MyRoot/OlapStore", R"(
Name: "ColumnTable1"
+ ColumnShardCount: 1
)"),
{NKikimrScheme::StatusAccepted, NKikimrScheme::StatusAlreadyExists, NKikimrScheme::StatusMultipleModifications});
t.TestEnv->ReliablePropose(runtime,
CreateColumnTableRequest(t.TxId - 1, "/MyRoot/OlapStore", R"(
Name: "ColumnTable2"
+ ColumnShardCount: 1
)"),
{NKikimrScheme::StatusAccepted, NKikimrScheme::StatusAlreadyExists, NKikimrScheme::StatusMultipleModifications});
t.TestEnv->TestWaitNotification(runtime, {t.TxId - 1, t.TxId});
@@ -221,11 +225,13 @@ Y_UNIT_TEST_SUITE(TOlapReboots) {
TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"(
Name: "ColumnTable1"
+ ColumnShardCount: 1
)");
t.TestEnv->TestWaitNotification(runtime, t.TxId);
TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"(
Name: "ColumnTable2"
+ ColumnShardCount: 1
)");
t.TestEnv->TestWaitNotification(runtime, t.TxId);
}
@@ -319,6 +325,7 @@ Y_UNIT_TEST_SUITE(TOlapReboots) {
TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
)");
t.TestEnv->TestWaitNotification(runtime, t.TxId);
}
@@ -354,6 +361,7 @@ Y_UNIT_TEST_SUITE(TOlapReboots) {
TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"(
Name: "ColumnTable"
+ ColumnShardCount: 1
SchemaPresetName: "default"
TtlSettings {
Enabled {
diff --git a/ydb/core/tx/tiering/manager.cpp b/ydb/core/tx/tiering/manager.cpp
index 8c79c96693..0632df554a 100644
--- a/ydb/core/tx/tiering/manager.cpp
+++ b/ydb/core/tx/tiering/manager.cpp
@@ -162,6 +162,10 @@ void TTiersManager::TakeConfigs(NMetadata::NFetcher::ISnapshot::TPtr snapshotExt
auto& manager = Managers.emplace(i.second.GetTierName(), std::move(localManager)).first->second;
manager.Start(Secrets);
}
+
+ if (ShardCallback && TlsActivationContext) {
+ ShardCallback(TActivationContext::AsActorContext());
+ }
}
TActorId TTiersManager::GetStorageActorId(const TString& tierId) {
diff --git a/ydb/core/tx/tiering/manager.h b/ydb/core/tx/tiering/manager.h
index 258058fe27..1c6662e870 100644
--- a/ydb/core/tx/tiering/manager.h
+++ b/ydb/core/tx/tiering/manager.h
@@ -1,6 +1,8 @@
#pragma once
#include "external_data.h"
+#include <functional>
+
#include <library/cpp/actors/core/actor_bootstrapped.h>
#include <library/cpp/actors/core/actor.h>
@@ -38,6 +40,7 @@ private:
using TManagers = std::unordered_map<TString, NTiers::TManager>;
ui64 TabletId = 0;
const TActorId TabletActorId;
+ std::function<void(const TActorContext& ctx)> ShardCallback;
TActor* Actor = nullptr;
std::unordered_map<ui64, TString> PathIdTiering;
YDB_READONLY_DEF(TManagers, Managers);
@@ -47,9 +50,11 @@ private:
mutable NMetadata::NFetcher::ISnapshotsFetcher::TPtr ExternalDataManipulation;
public:
- TTiersManager(const ui64 tabletId, const TActorId& tabletActorId)
+ TTiersManager(const ui64 tabletId, const TActorId& tabletActorId,
+ std::function<void(const TActorContext& ctx)> shardCallback = {})
: TabletId(tabletId)
, TabletActorId(tabletActorId)
+ , ShardCallback(shardCallback)
{
}
TActorId GetActorId() const;
diff --git a/ydb/core/tx/tiering/s3_actor.cpp b/ydb/core/tx/tiering/s3_actor.cpp
index 080be0cb6f..13913254fe 100644
--- a/ydb/core/tx/tiering/s3_actor.cpp
+++ b/ydb/core/tx/tiering/s3_actor.cpp
@@ -35,9 +35,9 @@ public:
return Event->Blobs;
}
- TUnifiedBlobId AddExported(const TUnifiedBlobId& srcBlob, const ui64 pathId) {
- Event->SrcToDstBlobs[srcBlob] = srcBlob.MakeS3BlobId(pathId);
- return Event->SrcToDstBlobs[srcBlob];
+ TString GetS3Key(const TUnifiedBlobId& srcBlob) const {
+ Y_VERIFY(Event->SrcToDstBlobs.contains(srcBlob));
+ return Event->SrcToDstBlobs.find(srcBlob)->second.GetS3Key();
}
bool ExtractionFinished() const {
@@ -52,6 +52,18 @@ public:
auto node = KeysToWrite.extract(key);
return node.mapped();
}
+
+ void RemoveBlobs(const THashSet<TUnifiedBlobId>& blobIds) {
+ for (auto& blobId : blobIds) {
+ Event->Blobs.erase(blobId);
+ Event->SrcToDstBlobs.erase(blobId);
+ }
+ }
+
+ bool IsNotFinished(const TString& key) const {
+ return KeysToWrite.contains(key);
+ }
+
private:
std::unordered_map<TString, TUnifiedBlobId> KeysToWrite;
};
@@ -125,47 +137,75 @@ public:
Exports[exportNo] = TS3Export(ev->Release());
auto& ex = Exports[exportNo];
+ THashSet<TUnifiedBlobId> retryes;
for (auto& [blobId, blobData] : ex.Blobs()) {
- TString key = ex.AddExported(blobId, msg.PathId).GetS3Key();
- Y_VERIFY(!ExportingKeys.count(key)); // TODO: allow reexport?
+ const TString key = ex.GetS3Key(blobId);
+ Y_VERIFY(!key.empty());
+
+ if (ExportingKeys.contains(key)) {
+ retryes.insert(blobId);
+ auto strBlobId = blobId.ToStringNew();
+
+ const auto& prevExport = Exports[ExportingKeys[key]];
+ if (prevExport.IsNotFinished(key)) {
+ LOG_S_INFO("[S3] Retry export blob '" << strBlobId << "' at tablet " << TabletId);
+ } else {
+ LOG_S_INFO("[S3] Avoid export retry for blob '" << strBlobId << "' at tablet " << TabletId);
+ blobData = {};
+ }
+ } else {
+ ex.RegisterKey(key, blobId);
+ ExportingKeys[key] = exportNo;
+ }
- ex.RegisterKey(key, blobId);
- ExportingKeys[key] = exportNo;
+ if (!blobData.empty()) {
+ SendPutObjectIfNotExists(key, std::move(blobData));
+ }
+ }
- SendPutObjectIfNotExists(key, std::move(blobData));
+ ex.RemoveBlobs(retryes);
+ if (ex.ExtractionFinished()) {
+ Exports.erase(exportNo);
+ LOG_S_DEBUG("[S3] Empty export " << exportNo << " at tablet " << TabletId);
}
}
void Handle(TEvPrivate::TEvForget::TPtr& ev) {
- // It's possible to get several forgets for the same blob (remove + cleanup)
- for (auto& evict : ev->Get()->Evicted) {
- if (evict.ExternBlob.IsS3Blob()) {
- const TString& key = evict.ExternBlob.GetS3Key();
- if (ForgettingKeys.count(key)) {
- LOG_S_NOTICE("[S3] Ignore forget '" << evict.Blob.ToStringNew() << "' at tablet " << TabletId);
- return; // TODO: return an error?
- }
- }
- }
-
ui64 forgetNo = ++ForgetNo;
-
Forgets[forgetNo] = TS3Forget(ev->Release());
auto& forget = Forgets[forgetNo];
- for (auto& evict : forget.Event->Evicted) {
+ auto& eventEvicted = forget.Event->Evicted;
+ Y_VERIFY(!eventEvicted.empty());
+
+ std::vector<NOlap::TEvictedBlob> newEvicted;
+ newEvicted.reserve(eventEvicted.size());
+
+ for (auto&& evict : forget.Event->Evicted) {
if (!evict.ExternBlob.IsS3Blob()) {
LOG_S_ERROR("[S3] Forget not exported '" << evict.Blob.ToStringNew() << "' at tablet " << TabletId);
continue;
}
- const TString& key = evict.ExternBlob.GetS3Key();
- Y_VERIFY(!ForgettingKeys.count(key));
+ const TString key = evict.ExternBlob.GetS3Key();
+
+ if (ForgettingKeys.contains(key)) {
+ auto strBlobId = evict.Blob.ToStringNew();
+ LOG_S_INFO("[S3] Retry forget blob '" << strBlobId << "' at tablet " << TabletId);
+ } else {
+ newEvicted.emplace_back(std::move(evict));
+ forget.KeysToDelete.emplace(key);
+ ForgettingKeys[key] = forgetNo;
+ }
- forget.KeysToDelete.emplace(key);
- ForgettingKeys[key] = forgetNo;
SendDeleteObject(key);
}
+
+ eventEvicted.swap(newEvicted);
+ if (eventEvicted.empty()) {
+ Forgets.erase(forgetNo);
+ LOG_S_DEBUG("[S3] Empty forget " << forgetNo << " at tablet " << TabletId);
+ }
}
void Handle(TEvPrivate::TEvGetExported::TPtr& ev) {
@@ -270,7 +310,7 @@ public:
LOG_S_DEBUG("[S3] DeleteObjectResponse '" << key << "' at tablet " << TabletId);
if (!ForgettingKeys.count(key)) {
- LOG_S_DEBUG("[S3] DeleteObjectResponse for unknown key '" << key << "' at tablet " << TabletId);
+ LOG_S_INFO("[S3] DeleteObjectResponse for unknown key '" << key << "' at tablet " << TabletId);
return;
}
@@ -278,7 +318,7 @@ public:
ForgettingKeys.erase(key);
if (!Forgets.count(forgetNo)) {
- LOG_S_DEBUG("[S3] DeleteObjectResponse for unknown forget with key '" << key << "' at tablet " << TabletId);
+ LOG_S_INFO("[S3] DeleteObjectResponse for unknown forget with key '" << key << "' at tablet " << TabletId);
return;
}
@@ -355,28 +395,30 @@ public:
}
void KeyFinished(const TString& key, const bool hasError, const TString& errStr) {
- ui64 exportNo = 0;
- {
- auto itExportKey = ExportingKeys.find(key);
- if (itExportKey == ExportingKeys.end()) {
- LOG_S_DEBUG("[S3] KeyFinished for unknown key '" << key << "' at tablet " << TabletId);
- return;
- }
- exportNo = itExportKey->second;
- ExportingKeys.erase(itExportKey);
+ auto itExportKey = ExportingKeys.find(key);
+ if (itExportKey == ExportingKeys.end()) {
+ LOG_S_INFO("[S3] KeyFinished for unknown key '" << key << "' at tablet " << TabletId);
+ return;
}
+ ui64 exportNo = itExportKey->second;
+
auto it = Exports.find(exportNo);
if (it == Exports.end()) {
- LOG_S_DEBUG("[S3] KeyFinished for unknown export with key '" << key << "' at tablet " << TabletId);
+ LOG_S_INFO("[S3] KeyFinished for unknown export with key '" << key << "' at tablet " << TabletId);
return;
}
+ LOG_S_DEBUG("[S3] KeyFinished for key '" << key << "' at tablet " << TabletId);
auto& ex = it->second;
TUnifiedBlobId blobId = ex.FinishKey(key);
ex.Event->AddResult(blobId, key, hasError, errStr);
if (ex.ExtractionFinished()) {
+ for (auto& [blobId, _] : ex.Blobs()) {
+ ExportingKeys.erase(ex.GetS3Key(blobId));
+ }
+
Y_VERIFY(ex.Event->Finished());
Send(ShardActor, ex.Event.release());
Exports.erase(exportNo);
diff --git a/ydb/core/viewer/json_cluster.h b/ydb/core/viewer/json_cluster.h
index e85ed63ba1..c4732c6c60 100644
--- a/ydb/core/viewer/json_cluster.h
+++ b/ydb/core/viewer/json_cluster.h
@@ -6,6 +6,7 @@
#include <ydb/core/tx/schemeshard/schemeshard.h>
#include <ydb/core/tx/tx_proxy/proxy.h>
#include <ydb/core/viewer/json/json.h>
+#include "json_pipe_req.h"
#include "viewer.h"
namespace NKikimr {
@@ -15,13 +16,10 @@ using namespace NActors;
using namespace NNodeWhiteboard;
using ::google::protobuf::FieldDescriptor;
-class TJsonCluster : public TActorBootstrapped<TJsonCluster> {
+class TJsonCluster : public TViewerPipeClient<TJsonCluster> {
using TThis = TJsonCluster;
- using TBase = TActorBootstrapped<TJsonCluster>;
+ using TBase = TViewerPipeClient<TJsonCluster>;
IViewer* Viewer;
- TActorId Initiator;
- ui32 Requested;
- ui32 Received;
NMon::TEvHttpInfo::TPtr Event;
THolder<TEvInterconnect::TEvNodesInfo> NodesInfo;
TMap<TNodeId, NKikimrWhiteboard::TEvSystemStateResponse> SystemInfo;
@@ -33,6 +31,7 @@ class TJsonCluster : public TActorBootstrapped<TJsonCluster> {
TSet<TNodeId> NodesAlive;
TJsonSettings JsonSettings;
ui32 Timeout;
+ ui32 TenantsNumber;
bool Tablets = false;
public:
@@ -42,58 +41,33 @@ public:
TJsonCluster(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev)
: Viewer(viewer)
- , Initiator(ev->Sender)
- , Requested(0)
- , Received(0)
, Event(ev)
{
const auto& params(Event->Get()->Request.GetParams());
JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true);
JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false);
+ InitConfig(params);
Tablets = FromStringWithDefault<bool>(params.Get("tablets"), false);
Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000);
}
- void Bootstrap(const TActorContext& ctx) {
- const TActorId nameserviceId = GetNameserviceActorId();
- ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes());
- TBase::Become(&TThis::StateRequestedBrowse);
- ctx.Schedule(TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup());
+ void Bootstrap(const TActorContext& ) {
+ SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes());
+ RequestConsoleListTenants();
+ Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup());
}
- void Die(const TActorContext& ctx) override {
+ void PassAway() override {
if (NodesInfo != nullptr) {
for (const auto& ni : NodesInfo->Nodes) {
- ctx.Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe());
+ Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe);
}
}
- TBase::Die(ctx);
+ TBase::PassAway();
}
- void SendRequest(ui32 nodeId, const TActorContext& ctx) {
- TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId);
- ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId);
- ++Requested;
- ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId);
- ++Requested;
- ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId);
- ++Requested;
- ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId);
- ++Requested;
- }
-
- void SendTabletStateRequest(ui32 nodeId, const TActorContext& ctx, THashSet<TTabletId>& filterTablets) {
- auto request = new TEvWhiteboard::TEvTabletStateRequest();
- for (TTabletId id: filterTablets) {
- request->Record.AddFilterTabletId(id);
- }
- TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId);
- ctx.Send(whiteboardServiceId, request, IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId);
- ++Requested;
- }
-
- void SendTabletStateRequest(const TActorContext& ctx) {
- TIntrusivePtr<TDomainsInfo> domains = AppData(ctx)->DomainsInfo;
+ void SendWhiteboardTabletStateRequest() {
+ TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
TIntrusivePtr<TDomainsInfo::TDomain> domain = domains->Domains.begin()->second;
THashSet<TTabletId> filterTablets;
for (TTabletId id : domain->Coordinators) {
@@ -124,18 +98,36 @@ public:
TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig;
for (const auto& ni : NodesInfo->Nodes) {
if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) {
- SendTabletStateRequest(ni.NodeId, ctx, filterTablets);
+ TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId);
+ auto request = new TEvWhiteboard::TEvTabletStateRequest();
+ for (TTabletId id: filterTablets) {
+ request->Record.AddFilterTabletId(id);
+ }
+ SendRequest(whiteboardServiceId, request, IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
}
}
}
- void HandleBrowse(TEvInterconnect::TEvNodesInfo::TPtr& ev, const TActorContext& ctx) {
+ void SendWhiteboardRequests() {
+ for (const auto& ni : NodesInfo->Nodes) {
+ TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId);
+ SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
+ SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
+ SendRequest(whiteboardServiceId,new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
+ SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
+ }
+ if (Tablets) {
+ SendWhiteboardTabletStateRequest();
+ }
+ }
+
+ void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) {
if (Tablets) {
THolder<TEvTxUserProxy::TEvNavigate> request = MakeHolder<TEvTxUserProxy::TEvNavigate>();
if (!Event->Get()->UserToken.empty()) {
request->Record.SetUserToken(Event->Get()->UserToken);
}
- TIntrusivePtr<TDomainsInfo> domains = AppData(ctx)->DomainsInfo;
+ TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
TIntrusivePtr<TDomainsInfo::TDomain> domain = domains->Domains.begin()->second;
TString domainPath = "/" + domain->Name;
NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath();
@@ -143,152 +135,136 @@ public:
record->MutableOptions()->SetReturnPartitioningInfo(false);
record->MutableOptions()->SetReturnPartitionConfig(false);
record->MutableOptions()->SetReturnChildren(false);
- TActorId txproxy = MakeTxProxyID();
- ctx.Send(txproxy, request.Release());
- ++Requested;
+ SendRequest(MakeTxProxyID(), request.Release());
}
NodesInfo = ev->Release();
- for (const auto& ni : NodesInfo->Nodes) {
- SendRequest(ni.NodeId, ctx);
- }
- if (Requested > 0) {
- TBase::Become(&TThis::StateRequestedNodeInfo);
- } else {
- ReplyAndDie(ctx);
- }
+ RequestDone();
}
- void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) {
+ void Undelivered(TEvents::TEvUndelivered::TPtr &ev) {
ui32 nodeId = ev.Get()->Cookie;
switch (ev->Get()->SourceType) {
case TEvWhiteboard::EvSystemStateRequest:
if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
break;
case TEvWhiteboard::EvVDiskStateRequest:
if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
break;
case TEvWhiteboard::EvPDiskStateRequest:
if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
break;
case TEvWhiteboard::EvBSGroupStateRequest:
if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
break;
case TEvWhiteboard::EvTabletStateRequest:
if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
break;
}
}
- void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) {
+ void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) {
ui32 nodeId = ev->Get()->NodeId;
if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
- if (Tablets) {
+ TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig;
+ if (Tablets && nodeId <= dynamicNameserviceConfig->MaxStaticNodeId) {
if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) {
- RequestDone(ctx);
+ RequestDone();
}
}
}
- void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev, const TActorContext& ctx) {
+ void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) {
ui64 nodeId = ev.Get()->Cookie;
SystemInfo[nodeId] = std::move(ev->Get()->Record);
NodesAlive.insert(nodeId);
- RequestDone(ctx);
+ RequestDone();
}
- void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev, const TActorContext& ctx) {
+ void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) {
ui64 nodeId = ev.Get()->Cookie;
VDiskInfo[nodeId] = std::move(ev->Get()->Record);
NodesAlive.insert(nodeId);
- RequestDone(ctx);
+ RequestDone();
}
- void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev, const TActorContext& ctx) {
+ void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) {
ui64 nodeId = ev.Get()->Cookie;
PDiskInfo[nodeId] = std::move(ev->Get()->Record);
NodesAlive.insert(nodeId);
- RequestDone(ctx);
+ RequestDone();
}
- void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev, const TActorContext& ctx) {
+ void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) {
ui64 nodeId = ev.Get()->Cookie;
BSGroupInfo[nodeId] = std::move(ev->Get()->Record);
NodesAlive.insert(nodeId);
- RequestDone(ctx);
+ RequestDone();
}
- void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev, const TActorContext& ctx) {
+ void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) {
ui64 nodeId = ev.Get()->Cookie;
TabletInfo[nodeId] = std::move(ev->Get()->Record);
NodesAlive.insert(nodeId);
- RequestDone(ctx);
+ RequestDone();
}
- void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext &ctx) {
- if (ev->Get()->GetRecord().GetStatus() == NKikimrScheme::StatusSuccess) {
- DescribeResult = ev->Release();
-
- if (Tablets) {
- SendTabletStateRequest(ctx);
- }
- }
- RequestDone(ctx);
+ void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) {
+ Ydb::Cms::ListDatabasesResult listTenantsResult;
+ ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult);
+ TenantsNumber = listTenantsResult.paths().size();
+ RequestDone();
}
- void RequestDone(const TActorContext& ctx) {
- ++Received;
- if (Received == Requested) {
- ReplyAndDie(ctx);
+ void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) {
+ if (ev->Get()->GetRecord().GetStatus() == NKikimrScheme::StatusSuccess) {
+ DescribeResult = ev->Release();
+ SendWhiteboardRequests();
}
+ RequestDone();
}
- void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) {
+ void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) {
if (ev->Get()->Status != NKikimrProto::OK) {
- RequestDone(ctx);
+ RequestDone();
}
}
- STFUNC(StateRequestedBrowse) {
+ STATEFN(StateRequested) {
switch (ev->GetTypeRewrite()) {
- HFunc(TEvInterconnect::TEvNodesInfo, HandleBrowse);
- CFunc(TEvents::TSystem::Wakeup, HandleTimeout);
- }
- }
-
- STFUNC(StateRequestedNodeInfo) {
- switch (ev->GetTypeRewrite()) {
- HFunc(TEvWhiteboard::TEvSystemStateResponse, Handle);
- HFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle);
- HFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle);
- HFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle);
- HFunc(TEvWhiteboard::TEvTabletStateResponse, Handle);
- HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle);
- HFunc(TEvents::TEvUndelivered, Undelivered);
- HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected);
- HFunc(TEvTabletPipe::TEvClientConnected, Handle);
- CFunc(TEvents::TSystem::Wakeup, HandleTimeout);
+ hFunc(TEvInterconnect::TEvNodesInfo, Handle);
+ hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle);
+ hFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle);
+ hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle);
+ hFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle);
+ hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle);
+ hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle);
+ hFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle);
+ hFunc(TEvents::TEvUndelivered, Undelivered);
+ hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected);
+ hFunc(TEvTabletPipe::TEvClientConnected, Handle);
+ cFunc(TEvents::TSystem::Wakeup, HandleTimeout);
}
}
@@ -299,7 +275,7 @@ public:
TMap<NKikimrBlobStorage::TVDiskID, const NKikimrWhiteboard::TVDiskStateInfo&> VDisksIndex;
TMap<std::pair<ui32, ui32>, const NKikimrWhiteboard::TPDiskStateInfo&> PDisksIndex;
- void ReplyAndDie(const TActorContext& ctx) {
+ void ReplyAndPassAway() {
TStringStream json;
MergeWhiteboardResponses(MergedBSGroupInfo, BSGroupInfo);
MergeWhiteboardResponses(MergedVDiskInfo, VDiskInfo);
@@ -309,7 +285,7 @@ public:
if (Tablets) {
MergeWhiteboardResponses(MergedTabletInfo, TabletInfo);
- TIntrusivePtr<TDomainsInfo> domains = AppData(ctx)->DomainsInfo;
+ TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
TIntrusivePtr<TDomainsInfo::TDomain> domain = domains->Domains.begin()->second;
ui32 hiveDomain = domains->GetHiveDomainUid(domain->DefaultHiveUid);
ui64 defaultStateStorageGroup = domains->GetDefaultStateStorageGroup(hiveDomain);
@@ -396,7 +372,6 @@ public:
NKikimrViewer::TClusterInfo pbCluster;
if (Tablets) {
- std::unordered_set<std::pair<ui64, ui64>> tenants; /// group by tenantid (TDomainKey)
for (const NKikimrWhiteboard::TTabletStateInfo& tabletInfo : MergedTabletInfo.GetTabletStateInfo()) {
if (tablets.contains(tabletInfo.GetTabletId())) {
NKikimrWhiteboard::TTabletStateInfo* tablet = pbCluster.AddSystemTablets();
@@ -405,15 +380,10 @@ public:
tablet->SetOverall(tabletFlag);
flag = Max(flag, GetViewerFlag(tabletFlag));
}
- std::pair<ui64, ui64> tenantId = {0, 0};
- if (tabletInfo.HasTenantId()) {
- tenantId = {tabletInfo.GetTenantId().GetSchemeShard(), tabletInfo.GetTenantId().GetPathId()};
- }
- tenants.emplace(tenantId);
}
pbCluster.SetTablets(MergedTabletInfo.TabletStateInfoSize());
- pbCluster.SetTenants(tenants.size());
}
+ pbCluster.SetTenants(TenantsNumber);
pbCluster.SetOverall(flag);
if (NodesInfo != nullptr) {
@@ -438,12 +408,12 @@ public:
pbCluster.SetName(itMax->first);
}
TProtoToJson::ProtoToJson(json, pbCluster, JsonSettings);
- ctx.Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom));
- Die(ctx);
+ Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom));
+ PassAway();
}
- void HandleTimeout(const TActorContext& ctx) {
- ReplyAndDie(ctx);
+ void HandleTimeout() {
+ ReplyAndPassAway();
}
};
diff --git a/ydb/core/viewer/json_storage.h b/ydb/core/viewer/json_storage.h
index adfc6e3458..652fb9afd3 100644
--- a/ydb/core/viewer/json_storage.h
+++ b/ydb/core/viewer/json_storage.h
@@ -74,6 +74,7 @@ class TJsonStorage : public TViewerPipeClient<TJsonStorage> {
bool NeedGroups = true;
bool NeedDisks = true;
bool NeedDonors = true;
+ bool NeedAdditionalNodesRequests;
enum class EWith {
Everything,
@@ -109,6 +110,7 @@ public:
FilterStoragePools.emplace(filterStoragePool);
}
SplitIds(params.Get("node_id"), ',', FilterNodeIds);
+ NeedAdditionalNodesRequests = !FilterNodeIds.empty();
SplitIds(params.Get("group_id"), ',', FilterGroupIds);
Sort(FilterGroupIds);
NeedGroups = FromStringWithDefault<bool>(params.Get("need_groups"), true);
@@ -325,17 +327,19 @@ public:
}
void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) {
+ ui64 nodeId = ev.Get()->Cookie;
for (const auto& info : ev->Get()->Record.GetBSGroupStateInfo()) {
TString storagePoolName = info.GetStoragePoolName();
if (storagePoolName.empty()) {
continue;
}
- StoragePoolInfo[storagePoolName].Groups.emplace(ToString(info.GetGroupID()));
+ if (FilterNodeIds.empty() || FilterNodeIds.contains(nodeId)) {
+ StoragePoolInfo[storagePoolName].Groups.emplace(ToString(info.GetGroupID()));
+ }
for (const auto& vDiskNodeId : info.GetVDiskNodeIds()) {
Group2NodeId[info.GetGroupID()].push_back(vDiskNodeId);
}
}
- ui64 nodeId = ev.Get()->Cookie;
BSGroupInfo[nodeId] = std::move(ev->Get()->Record);
RequestDone();
}
@@ -476,7 +480,8 @@ public:
}
void ReplyAndPassAway() {
- if (!FilterNodeIds.empty()) {
+ if (NeedAdditionalNodesRequests) {
+ NeedAdditionalNodesRequests = false;
for (const auto& [nodeId, vDiskInfo] : VDiskInfo) {
if (FilterNodeIds.count(nodeId) == 0) {
continue;
@@ -495,8 +500,6 @@ public:
}
}
- FilterNodeIds.clear(); // we don't need it anymore
-
if (Requests != 0) {
return; // retry requests for neighbours of our groups (when BSC wasn't available)
}
diff --git a/ydb/library/yql/sql/v1/SQLv1.g.in b/ydb/library/yql/sql/v1/SQLv1.g.in
index 21ffa80530..824bb231a1 100644
--- a/ydb/library/yql/sql/v1/SQLv1.g.in
+++ b/ydb/library/yql/sql/v1/SQLv1.g.in
@@ -572,7 +572,7 @@ table_setting_value:
| STRING_VALUE
| integer
| split_boundaries
- | expr ON an_id
+ | expr ON an_id (AS (SECONDS | MILLISECONDS | MICROSECONDS | NANOSECONDS))?
;
family_entry: FAMILY an_id family_settings;
@@ -978,7 +978,10 @@ keyword_compat: (
| LEFT
| LIKE
| MATCH
+ | MICROSECONDS
+ | MILLISECONDS
| NATURAL
+ | NANOSECONDS
| NO
| NOTNULL
| NULLS
@@ -1020,6 +1023,7 @@ keyword_compat: (
| ROW
| SAMPLE
| SAVEPOINT
+ | SECONDS
| SEMI
| SETS
| SUBQUERY
@@ -1278,7 +1282,10 @@ LIMIT: L I M I T;
LIST: L I S T;
LOCAL: L O C A L;
MATCH: M A T C H;
+MICROSECONDS: M I C R O S E C O N D S;
+MILLISECONDS: M I L L I S E C O N D S;
NATURAL: N A T U R A L;
+NANOSECONDS: N A N O S E C O N D S;
NO: N O;
NOT: N O T;
NOTNULL: N O T N U L L;
@@ -1332,6 +1339,7 @@ ROWS: R O W S;
SAMPLE: S A M P L E;
SAVEPOINT: S A V E P O I N T;
SCHEMA: S C H E M A;
+SECONDS: S E C O N D S;
SELECT: S E L E C T;
SEMI: S E M I;
SET: S E T;
diff --git a/ydb/library/yql/sql/v1/format/sql_format_ut.cpp b/ydb/library/yql/sql/v1/format/sql_format_ut.cpp
index 222784f367..73c2b168bd 100644
--- a/ydb/library/yql/sql/v1/format/sql_format_ut.cpp
+++ b/ydb/library/yql/sql/v1/format/sql_format_ut.cpp
@@ -217,6 +217,14 @@ Y_UNIT_TEST_SUITE(CheckSqlFormatter) {
{"create table user(partition by (user,user))","CREATE TABLE user (\n\tPARTITION BY (user, user)\n);\n"},
{"create table user(order by (user asc))","CREATE TABLE user (\n\tORDER BY (user ASC)\n);\n"},
{"create table user(order by (user desc,user))","CREATE TABLE user (\n\tORDER BY (user DESC, user)\n);\n"},
+ {"create table user(user int32) with (ttl=interval('P1D') on user as seconds)",
+ "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS SECONDS\n);\n"},
+ {"create table user(user int32) with (ttl=interval('P1D') on user as MilliSeconds)",
+ "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MILLISECONDS\n);\n"},
+ {"create table user(user int32) with (ttl=interval('P1D') on user as microSeconds)",
+ "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MICROSECONDS\n);\n"},
+ {"create table user(user int32) with (ttl=interval('P1D') on user as nAnOsEcOnDs)",
+ "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS NANOSECONDS\n);\n"},
{"create table user(index user global unique sync with (user=user,user=user) on (user,user))",
"CREATE TABLE user (\n\tINDEX user GLOBAL UNIQUE SYNC WITH (user = user, user = user) ON (user, user)\n);\n"},
{"create table user(index user global async with (user=user,) on (user))",
diff --git a/ydb/library/yql/sql/v1/node.cpp b/ydb/library/yql/sql/v1/node.cpp
index 9a340e1dd4..96ba11449f 100644
--- a/ydb/library/yql/sql/v1/node.cpp
+++ b/ydb/library/yql/sql/v1/node.cpp
@@ -2331,9 +2331,10 @@ TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, co
(ctx.AnsiQuotedIdentifiers && input.StartsWith('"'))? EStringContentMode::AnsiIdent : EStringContentMode::Default);
}
-TTtlSettings::TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr)
+TTtlSettings::TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit)
: ColumnName(columnName)
, Expr(expr)
+ , ColumnUnit(columnUnit)
{
}
diff --git a/ydb/library/yql/sql/v1/node.h b/ydb/library/yql/sql/v1/node.h
index 3b6db37450..48ca912e93 100644
--- a/ydb/library/yql/sql/v1/node.h
+++ b/ydb/library/yql/sql/v1/node.h
@@ -1111,10 +1111,18 @@ namespace NSQLTranslationV1 {
TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, const TString& input);
struct TTtlSettings {
+ enum class EUnit {
+ Seconds /* "seconds" */,
+ Milliseconds /* "milliseconds" */,
+ Microseconds /* "microseconds" */,
+ Nanoseconds /* "nanoseconds" */,
+ };
+
TIdentifier ColumnName;
TNodePtr Expr;
+ TMaybe<EUnit> ColumnUnit;
- TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr);
+ TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit = {});
};
struct TTableSettings {
diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp
index 9cf580a27e..f85965aea5 100644
--- a/ydb/library/yql/sql/v1/query.cpp
+++ b/ydb/library/yql/sql/v1/query.cpp
@@ -865,12 +865,16 @@ public:
if (const auto& ttl = Params.TableSettings.TtlSettings) {
if (ttl.IsSet()) {
const auto& ttlSettings = ttl.GetValueSet();
- auto columnName = BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name);
- auto nameValueTuple = Y(
- Q(Y(Q("columnName"), columnName)),
- Q(Y(Q("expireAfter"), ttlSettings.Expr))
- );
- settings = L(settings, Q(Y(Q("setTtlSettings"), Q(nameValueTuple))));
+ auto opts = Y();
+
+ opts = L(opts, Q(Y(Q("columnName"), BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name))));
+ opts = L(opts, Q(Y(Q("expireAfter"), ttlSettings.Expr)));
+
+ if (ttlSettings.ColumnUnit) {
+ opts = L(opts, Q(Y(Q("columnUnit"), Q(ToString(*ttlSettings.ColumnUnit)))));
+ }
+
+ settings = L(settings, Q(Y(Q("setTtlSettings"), Q(opts))));
} else {
YQL_ENSURE(false, "Can't reset TTL settings");
}
@@ -1049,12 +1053,16 @@ public:
if (const auto& ttl = Params.TableSettings.TtlSettings) {
if (ttl.IsSet()) {
const auto& ttlSettings = ttl.GetValueSet();
- auto columnName = BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name);
- auto nameValueTuple = Y(
- Q(Y(Q("columnName"), columnName)),
- Q(Y(Q("expireAfter"), ttlSettings.Expr))
- );
- settings = L(settings, Q(Y(Q("setTtlSettings"), Q(nameValueTuple))));
+ auto opts = Y();
+
+ opts = L(opts, Q(Y(Q("columnName"), BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name))));
+ opts = L(opts, Q(Y(Q("expireAfter"), ttlSettings.Expr)));
+
+ if (ttlSettings.ColumnUnit) {
+ opts = L(opts, Q(Y(Q("columnUnit"), Q(ToString(*ttlSettings.ColumnUnit)))));
+ }
+
+ settings = L(settings, Q(Y(Q("setTtlSettings"), Q(opts))));
} else {
settings = L(settings, Q(Y(Q("resetTtlSettings"), Q(Y()))));
}
diff --git a/ydb/library/yql/sql/v1/sql.cpp b/ydb/library/yql/sql/v1/sql.cpp
index d031f9ca26..1b199625e0 100644
--- a/ydb/library/yql/sql/v1/sql.cpp
+++ b/ydb/library/yql/sql/v1/sql.cpp
@@ -2169,7 +2169,17 @@ namespace {
return false;
}
- to.Set(TTtlSettings(columnName, exprNode));
+ TMaybe<TTtlSettings::EUnit> columnUnit;
+ if (from.GetAlt_table_setting_value5().HasBlock4()) {
+ const TString unit = to_lower(ctx.Token(from.GetAlt_table_setting_value5().GetBlock4().GetToken2()));
+ columnUnit.ConstructInPlace();
+ if (!TryFromString<TTtlSettings::EUnit>(unit, *columnUnit)) {
+ ctx.Error() << "Invalid unit: " << unit;
+ return false;
+ }
+ }
+
+ to.Set(TTtlSettings(columnName, exprNode, columnUnit));
break;
}
default:
diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp
index 1894fb54d6..2f4e7a6c88 100644
--- a/ydb/library/yql/sql/v1/sql_ut.cpp
+++ b/ydb/library/yql/sql/v1/sql_ut.cpp
@@ -268,6 +268,26 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) {
UNIT_ASSERT(SqlToYql("USE plato; SELECT CHANGEFEED FROM CHANGEFEED").IsOk());
}
+ Y_UNIT_TEST(SecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SECONDS (SECONDS Uint32, PRIMARY KEY (SECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT SECONDS FROM SECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(MillisecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MILLISECONDS (MILLISECONDS Uint32, PRIMARY KEY (MILLISECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT MILLISECONDS FROM MILLISECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(MicrosecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MICROSECONDS (MICROSECONDS Uint32, PRIMARY KEY (MICROSECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT MICROSECONDS FROM MICROSECONDS").IsOk());
+ }
+
+ Y_UNIT_TEST(NanosecondsKeywordNotReservedForNames) {
+ UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE NANOSECONDS (NANOSECONDS Uint32, PRIMARY KEY (NANOSECONDS));").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; SELECT NANOSECONDS FROM NANOSECONDS").IsOk());
+ }
+
Y_UNIT_TEST(Jubilee) {
NYql::TAstParseResult res = SqlToYql("USE plato; INSERT INTO Arcadia (r2000000) VALUES (\"2M GET!!!\");");
UNIT_ASSERT(res.Root);
@@ -1659,11 +1679,33 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) {
UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
}
- Y_UNIT_TEST(TtlParseCorrect) {
+ Y_UNIT_TEST(DateTimeTtlParseCorrect) {
NYql::TAstParseResult res = SqlToYql(
R"( USE plato;
CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key))
- WITH ( TTL = Interval("P1D") On CreatedAt);)"
+ WITH (TTL = Interval("P1D") On CreatedAt);)"
+ );
+ UNIT_ASSERT(res.Root);
+
+ TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) {
+ if (word == "Write") {
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000"));
+ }
+ };
+
+ TWordCountHive elementStat = { {TString("Write"), 0} };
+ VerifyProgram(res, elementStat, verifyLine);
+
+ UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]);
+ }
+
+ Y_UNIT_TEST(IntTtlParseCorrect) {
+ NYql::TAstParseResult res = SqlToYql(
+ R"( USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key))
+ WITH (TTL = Interval("P1D") On CreatedAt AS SECONDS);)"
);
UNIT_ASSERT(res.Root);
@@ -1672,6 +1714,8 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) {
UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings"));
UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter"));
UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit"));
+ UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds"));
}
};
@@ -1891,6 +1935,7 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) {
Y_UNIT_TEST(AlterTableSetTTLIsCorrect) {
UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column)").IsOk());
+ UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column AS SECONDS)").IsOk());
}
Y_UNIT_TEST(AlterTableSetTieringIsCorrect) {
@@ -3345,16 +3390,27 @@ select FormatType($f());
"<main>:6:39: Error: Unknown correlation name: t\n");
}
- Y_UNIT_TEST(InvalidTtl) {
+ Y_UNIT_TEST(InvalidTtlInterval) {
auto req = R"(
USE plato;
CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key))
- WITH ( TTL = 1 On ExpireAt );
+ WITH (TTL = 1 On CreatedAt);
+ )";
+ auto res = SqlToYql(req);
+ UNIT_ASSERT(!res.Root);
+ UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:25: Error: Literal of Interval type is expected for TTL\n"
+ "<main>:4:25: Error: Invalid TTL settings\n");
+ }
+
+ Y_UNIT_TEST(InvalidTtlUnit) {
+ auto req = R"(
+ USE plato;
+ CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key))
+ WITH (TTL = Interval("P1D") On CreatedAt AS PICOSECONDS);
)";
auto res = SqlToYql(req);
UNIT_ASSERT(!res.Root);
- UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:26: Error: Literal of Interval type is expected for TTL\n"
- "<main>:4:26: Error: Invalid TTL settings\n");
+ UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "<main>:4:56: Error: Unexpected token 'PICOSECONDS'");
}
Y_UNIT_TEST(InvalidChangefeedSink) {
diff --git a/ydb/public/api/grpc/CMakeLists.darwin.txt b/ydb/public/api/grpc/CMakeLists.darwin.txt
index a6103cf1ca..e38ea284d2 100644
--- a/ydb/public/api/grpc/CMakeLists.darwin.txt
+++ b/ydb/public/api/grpc/CMakeLists.darwin.txt
@@ -35,6 +35,7 @@ target_proto_messages(api-grpc PRIVATE
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_scripting_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_table_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_topic_v1.proto
+ ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_keyvalue_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/yq_v1.proto
)
target_proto_addincls(api-grpc
diff --git a/ydb/public/api/grpc/CMakeLists.linux-aarch64.txt b/ydb/public/api/grpc/CMakeLists.linux-aarch64.txt
index aa76ae23b2..e3126cd3c8 100644
--- a/ydb/public/api/grpc/CMakeLists.linux-aarch64.txt
+++ b/ydb/public/api/grpc/CMakeLists.linux-aarch64.txt
@@ -36,6 +36,7 @@ target_proto_messages(api-grpc PRIVATE
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_scripting_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_table_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_topic_v1.proto
+ ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_keyvalue_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/yq_v1.proto
)
target_proto_addincls(api-grpc
diff --git a/ydb/public/api/grpc/CMakeLists.linux.txt b/ydb/public/api/grpc/CMakeLists.linux.txt
index aa76ae23b2..e3126cd3c8 100644
--- a/ydb/public/api/grpc/CMakeLists.linux.txt
+++ b/ydb/public/api/grpc/CMakeLists.linux.txt
@@ -36,6 +36,7 @@ target_proto_messages(api-grpc PRIVATE
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_scripting_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_table_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_topic_v1.proto
+ ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_keyvalue_v1.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/yq_v1.proto
)
target_proto_addincls(api-grpc
diff --git a/ydb/public/api/grpc/ydb_keyvalue_v1.proto b/ydb/public/api/grpc/ydb_keyvalue_v1.proto
new file mode 100644
index 0000000000..07f073576d
--- /dev/null
+++ b/ydb/public/api/grpc/ydb_keyvalue_v1.proto
@@ -0,0 +1,49 @@
+syntax = "proto3";
+
+package Ydb.KeyValue.V1;
+
+option java_package = "com.yandex.ydb.keyvalue.v1";
+option java_outer_classname = "KeyValueGrpc";
+option java_multiple_files = true;
+
+import "ydb/public/api/protos/ydb_keyvalue.proto";
+
+// KeyValue tablets provide a simple key-value storage in a low-overhead and easy-to-shoot-your-leg manner.
+// To use KeyValue tablets in an efficient way one must be familiar with the design of both the KeyValue tablet
+// and the Distributed Storage underneath it.
+
+service KeyValueService {
+
+ // Create a volume by path and partition count
+ rpc CreateVolume(KeyValue.CreateVolumeRequest) returns (KeyValue.CreateVolumeResponse);
+
+ // Drop the volume by path
+ rpc DropVolume(KeyValue.DropVolumeRequest) returns (KeyValue.DropVolumeResponse);
+
+ // Alter the volume by path
+ rpc AlterVolume(KeyValue.AlterVolumeRequest) returns (KeyValue.AlterVolumeResponse);
+
+ // Describe the volume by path
+ rpc DescribeVolume(KeyValue.DescribeVolumeRequest) returns (KeyValue.DescribeVolumeResponse);
+
+ // List partitions of a volume at the local node.
+ rpc ListLocalPartitions(KeyValue.ListLocalPartitionsRequest) returns (KeyValue.ListLocalPartitionsResponse);
+
+ // Acquire an exclusive lock for the partition.
+ rpc AcquireLock(KeyValue.AcquireLockRequest) returns (KeyValue.AcquireLockResponse);
+
+ // Perform list of commands to modify the state of the partition as an atomic transaction.
+ rpc ExecuteTransaction(KeyValue.ExecuteTransactionRequest) returns (KeyValue.ExecuteTransactionResponse);
+
+ // Read the value stored in the item with the key specified.
+ rpc Read(KeyValue.ReadRequest) returns (KeyValue.ReadResponse);
+
+ // Read items with keys in the specified range.
+ rpc ReadRange(KeyValue.ReadRangeRequest) returns (KeyValue.ReadRangeResponse);
+
+ // List keys and metadata of items with keys in the specified range.
+ rpc ListRange(KeyValue.ListRangeRequest) returns (KeyValue.ListRangeResponse);
+
+ // Get storage channel status of the partition.
+ rpc GetStorageChannelStatus(KeyValue.GetStorageChannelStatusRequest) returns (KeyValue.GetStorageChannelStatusResponse);
+}
diff --git a/ydb/public/api/protos/CMakeLists.darwin.txt b/ydb/public/api/protos/CMakeLists.darwin.txt
index a2284957e0..173437c25e 100644
--- a/ydb/public/api/protos/CMakeLists.darwin.txt
+++ b/ydb/public/api/protos/CMakeLists.darwin.txt
@@ -50,6 +50,7 @@ target_proto_messages(api-protos PRIVATE
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_table.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_topic.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_value.proto
+ ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_keyvalue.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/yq.proto
)
generate_enum_serilization(api-protos
diff --git a/ydb/public/api/protos/CMakeLists.linux-aarch64.txt b/ydb/public/api/protos/CMakeLists.linux-aarch64.txt
index 7a83641a53..769499a1ae 100644
--- a/ydb/public/api/protos/CMakeLists.linux-aarch64.txt
+++ b/ydb/public/api/protos/CMakeLists.linux-aarch64.txt
@@ -51,6 +51,7 @@ target_proto_messages(api-protos PRIVATE
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_table.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_topic.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_value.proto
+ ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_keyvalue.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/yq.proto
)
generate_enum_serilization(api-protos
diff --git a/ydb/public/api/protos/CMakeLists.linux.txt b/ydb/public/api/protos/CMakeLists.linux.txt
index 7a83641a53..769499a1ae 100644
--- a/ydb/public/api/protos/CMakeLists.linux.txt
+++ b/ydb/public/api/protos/CMakeLists.linux.txt
@@ -51,6 +51,7 @@ target_proto_messages(api-protos PRIVATE
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_table.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_topic.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_value.proto
+ ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_keyvalue.proto
${CMAKE_SOURCE_DIR}/ydb/public/api/protos/yq.proto
)
generate_enum_serilization(api-protos
diff --git a/ydb/public/api/protos/ydb_keyvalue.proto b/ydb/public/api/protos/ydb_keyvalue.proto
new file mode 100644
index 0000000000..e9e58f15a2
--- /dev/null
+++ b/ydb/public/api/protos/ydb_keyvalue.proto
@@ -0,0 +1,544 @@
+syntax = "proto3";
+option cc_enable_arenas = true;
+
+package Ydb.KeyValue;
+
+option java_package = "com.yandex.ydb.keyvalue";
+option java_outer_classname = "KeyValueProtos";
+option java_multiple_files = true;
+
+import "ydb/public/api/protos/ydb_operation.proto";
+
+//
+// KeyValue API.
+//
+
+
+message StorageChannelInfo {
+ enum StatusFlag {
+ // The system was unable to get the storage channel status.
+ STATUS_FLAG_UNSPECIFIED = 0;
+
+ // Enough storage space is available.
+ STATUS_FLAG_GREEN = 10;
+
+ // Free storage space is low, user must stop writing new data. Compaction's writes are allow.
+ STATUS_FLAG_YELLOW_STOP = 20;
+
+ // No free storage space is available, no writes will successed.
+ STATUS_FLAG_ORANGE_OUT_SPACE = 30;
+ }
+
+ // Storage channel index.
+ uint32 storage_channel = 1;
+
+ // The status flag of the storage channel.
+ StatusFlag status_flag = 2;
+}
+
+
+message Priorities {
+ enum Priority {
+ // Use default priority (PRIORITY_REALTIME).
+ PRIORITY_UNSPECIFIED = 0;
+
+ // High priority for user-initiated operations, the default priority.
+ PRIORITY_REALTIME = 1;
+
+ // Low priority for background system activity.
+ PRIORITY_BACKGROUND = 2;
+ }
+}
+
+
+message StorageConfig {
+ message ChannelConfig {
+ // Media for the storage channel.
+ // This field specifies the kind of one storage_pool_types configured in config.yaml
+ string media = 1;
+ }
+
+ // Channel configs.
+ // Channels 0 and 1 are system channels needed for tablet operation.
+ // Channels starting with 2 are user channels.
+ repeated ChannelConfig channel = 1;
+}
+
+
+message KeyRange {
+ // The lower bound of the key range.
+ // If unspecified, the range begins from the lowest key.
+ oneof from_bound {
+ // Set in order for the range to include the key specified
+ string from_key_inclusive = 1;
+ // Set in order for the range not to include the key specified
+ string from_key_exclusive = 2;
+ }
+
+ // The higher bound of the key range.
+ // If unspecified, the range ends with the highest key.
+ oneof to_bound {
+ // Set in order for the range to include the key specified
+ string to_key_inclusive = 3;
+ // Set in order for the range not to include the key specified
+ string to_key_exclusive = 4;
+ }
+}
+
+// The lock mechanism provides a way to ensure that only one client holds the lock.
+// The client is provided the lock generation.
+// Only operations with matching lock generation and operations with no lock generation are executed.
+// When lock generation is missmatched operations will be failed with PRECONDITION_FAILED status.
+message AcquireLockRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+ // Partition of the volume.
+ uint64 partition_id = 3;
+}
+
+message AcquireLockResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message AcquireLockResult {
+ // The generation of the lock to provide as an argument to all the operations the user performs with the partition.
+ uint64 lock_generation = 1;
+
+ // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise.
+ uint32 node_id = 2;
+}
+
+message ExecuteTransactionRequest {
+ message Command {
+ message Rename {
+ // The key to change.
+ string old_key = 1;
+
+ // The new key to change the old key to.
+ string new_key = 2;
+ }
+ message Concat {
+ // Keys to use as the source for the concatenation.
+ repeated string input_keys = 1;
+
+ // New key to use for the result of the concatenation.
+ string output_key = 2;
+
+ // Input keys are deleted after the concatenation by default.
+ // In order to keep both the inputs and the output, set keep_inputs to true.
+ bool keep_inputs = 3;
+ }
+
+ // Make a copy of a range of key-value pairs.
+ // New keys are formed by removing a prefix and/or prepending keys with the new prefix.
+ // For example, copy of the key-value pairs [{aaabc,1}, {aaaef,2}, {baaef,3}] can be stripped of the 'aa' prefix
+ // and prepended with the 'x' so that the new pairs that are added are [{xabc, 1}, {xaef, 2}].
+ message CopyRange {
+ // The range of keys to copy
+ KeyRange range = 1;
+
+ // For each source key that begins with the prefix_to_remove, that prefix is removed from the new key before
+ // prepending it with the prefix_to_add.
+ // Acts as filter if not empty.
+ string prefix_to_remove = 2;
+
+ // The prefix to prepend to each new key.
+ string prefix_to_add = 3;
+ }
+ message Write {
+ enum Tactic {
+ // Use default tactic (TACTIC_MAX_THROUGHPUT).
+ TACTIC_UNSPECIFIED = 0;
+
+ // Write minimum required redundant data. Does not affect storage durability. The default tactic.
+ TACTIC_MAX_THROUGHPUT = 1;
+
+ // Write additional redundant data to reduce operation duration. Will use additional space.
+ TACTIC_MIN_LATENCY = 2;
+ }
+ // Key of the key-value pair to write.
+ string key = 1;
+
+ // Value of the key-value pair to write.
+ bytes value = 2;
+
+ // Storage channel to write the value to. Channel numbers begin with 1 and may go up to approximately 250
+ // (depends on the channel configuration of each partition).
+ // Channel 1 is called the INLINE channel (value is stored in the index table).
+ // Channel 2 is called the MAIN channel (value is stored as a separate blob in the Distributed Storage).
+ // Channels 1 and 2 are available for all partitions.
+ // If the storage channel specified is not configured for the partition, the value is stored in
+ // channel 2 (the MAIN channel).
+ uint32 storage_channel = 3; // (default = 0 is same as 2 or MAIN)
+
+ // Priority to use for the Distributed Storage Get operation.
+ // Has no effect for the INLINE storage channel.
+ Priorities.Priority priority = 4;
+
+ // Tactic to use for the Distributed Storage Put operation.
+ // Has no effect for the INLINE storage channel.
+ Tactic tactic = 5;
+ }
+ message DeleteRange {
+ // The range of keys to delete.
+ KeyRange range = 1;
+ }
+
+ oneof action {
+ // Delete key-value pairs with keys in the range specified.
+ DeleteRange delete_range = 1;
+
+ // Change the key of a key-value pair.
+ Rename rename = 2;
+
+ // Create a copy of key-value pairs with keys in the range specified by removing and/or prepending a prefix
+ // to each key.
+ CopyRange copy_range = 3;
+
+ // Create a new key-value pair with key specified by concatenating values of multiple other key-value pairs
+ // with keys specified.
+ Concat concat = 4;
+
+ // Create a new key-value pair with key and value specified.
+ Write write = 5;
+ }
+ }
+
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+ // Partition of the volume.
+ uint64 partition_id = 3;
+
+ // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call.
+ optional uint64 lock_generation = 4;
+
+ // Commands to execute as a single atomic transaction.
+ // The order of execution of commands is the same as the order of commands in the ExecuteTransactionRequest.
+ // The order of execution of different transactions is not specified.
+ repeated Command commands = 5;
+}
+
+message ExecuteTransactionResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message ExecuteTransactionResult {
+ // Contains status flags for the storage channels used by the transaction.
+ repeated StorageChannelInfo storage_channel_info = 1;
+
+ // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise.
+ uint32 node_id = 2;
+}
+
+message ReadRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+ // Partition of the volume.
+ uint64 partition_id = 3;
+
+ // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call.
+ optional uint64 lock_generation = 4;
+
+ // Key of the key-value pair to read.
+ string key = 5;
+
+ // Offset in bytes from the beginning of the value to read data from.
+ uint64 offset = 6;
+
+ // Size of the data to read in bytes. 0 means "read to the end of the value".
+ uint64 size = 7;
+
+ // Result protobuf size limit.
+ // Overrides the default limit only with a smaller value.
+ // 0 means "use the default limit".
+ uint64 limit_bytes = 8;
+
+ // Priority to use for the Distributed Storage Get operation.
+ // Has no effect for the INLINE storage channel.
+ Priorities.Priority priority = 9;
+}
+
+message ReadResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message ReadResult {
+ // The key of the requested key-value pair.
+ string requested_key = 1;
+
+ // Offset in bytes from the beginning of the value requested.
+ uint64 requested_offset = 2;
+
+ // Size of the data requested.
+ uint64 requested_size = 3;
+
+ // The bytes of the requested part of the value.
+ bytes value = 4;
+
+ // If requested data size is larger than limit_bytes then result will contain only part of the requested value and
+ // the is_overrun flag will be set.
+ bool is_overrun = 5;
+
+ // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise.
+ uint32 node_id = 6;
+}
+
+message ReadRangeRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+ // Partition of the volume.
+ uint64 partition_id = 3;
+
+ // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call.
+ optional uint64 lock_generation = 4;
+
+ // The range of keys to read.
+ KeyRange range = 5;
+
+ // Result protobuf size limit.
+ // Overrides the default limit only with a smaller value.
+ // 0 means "use the default limit".
+ uint64 limit_bytes = 6;
+
+ // Priority to use for the Distributed Storage Get operation.
+ // Has no effect for the INLINE storage channel.
+ Priorities.Priority priority = 7;
+}
+
+message ReadRangeResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message ReadRangeResult {
+ message KeyValuePair {
+ // The key of the key-value pair.
+ string key = 1;
+
+ // The value of the key-value pair.
+ bytes value = 2;
+
+ // Unix time of the creation of the key-value pair (in ms).
+ uint64 creation_unix_time = 4;
+
+ // Contains the index of the actually used storage channel. The actually used storage channel may differ from
+ // the value specified in the write request for example if there were no such storage channel at the moment
+ // of execution of the write command.
+ // For values created as a result of concatenation or copy of concatenated values, the storage channel of the first
+ // part of the value is specified.
+ uint32 storage_channel = 5;
+ }
+
+ // List of key-value pairs requested.
+ repeated KeyValuePair pair = 1;
+
+ // If requested data size is larger than limit_bytes then result will contain
+ // only part of the requested key-value pairs and the is_overrun flag will be set.
+ // The pair list contains only full values.
+ // In order to continue reading the client should send another request for the key range
+ // with from_key_exclusive set to the last key read.
+ // If first pair doesn't fit the limit_bytes then the result will be empty and the is_overrun flag will be set.
+ // Use ListRange and Read methods to find and read large key-value pairs.
+ bool is_overrun = 2;
+
+ // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise.
+ uint32 node_id = 3;
+}
+
+message ListRangeRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+ // Partition of the volume.
+ uint64 partition_id = 3;
+
+ // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call.
+ optional uint64 lock_generation = 4;
+
+ // The range of keys to read
+ KeyRange range = 5;
+
+ // Result protobuf size limit. If not 0, overrides the default one only with a smaller value.
+ uint64 limit_bytes = 6;
+}
+
+message ListRangeResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message ListRangeResult {
+ message KeyInfo {
+ // The key of the key-value pair.
+ string key = 1;
+
+ // Full size of the value of the key-value pair.
+ uint32 value_size = 2;
+
+ // Unix time of the creation of the key-value pair (in ms).
+ uint64 creation_unix_time = 3;
+
+ // Contains the index of the actually used storage channel. The actually used storage channel may differ from
+ // the value specified in the write request for example if there were no such storage channel at the moment
+ // of execution of the write command.
+ // For values created as a result of concatenation or copy of concatenated values, the storage channel of the first
+ // part of the value is specified.
+ uint32 storage_channel = 4;
+ }
+
+ // List of the key-value pairs and metadata requested.
+ repeated KeyInfo key = 1;
+
+ // If requested data size is larger than limit_bytes then result will contain
+ // only part of the requested key-value pairs and the is_overrun flag will be set.
+ bool is_overrun = 2;
+
+ // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise.
+ uint32 node_id = 3;
+}
+
+message GetStorageChannelStatusRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+ // Partition of the volume.
+ uint64 partition_id = 3;
+
+ // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call.
+ optional uint64 lock_generation = 4;
+
+ // List of storage channels to get StorageChannelInfo for.
+ repeated uint32 storage_channel = 5;
+}
+
+message GetStorageChannelStatusResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message GetStorageChannelStatusResult {
+ // Contains status flags for the requested storage channels.
+ repeated StorageChannelInfo storage_channel_info = 1;
+
+ // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise.
+ uint32 node_id = 2;
+}
+
+message CreateVolumeRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+
+ // The partition count of the new volume.
+ uint32 partition_count = 4;
+
+ // Set storage kinds for storage channels.
+ StorageConfig storage_config = 5;
+}
+
+message CreateVolumeResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message CreateVolumeResult {
+}
+
+message DropVolumeRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+}
+
+message DropVolumeResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message DropVolumeResult {
+}
+
+message AlterVolumeRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+
+ // Change the partition count of the volume.
+ // The value should be greater or equal than current patition count.
+ uint32 alter_partition_count = 3;
+
+ // Set storage kinds for storage channels.
+ // If the field is not present, storage channel settings are not changed.
+ StorageConfig storage_config = 4;
+}
+
+message AlterVolumeResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message AlterVolumeResult {
+}
+
+message DescribeVolumeRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+}
+
+message DescribeVolumeResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message DescribeVolumeResult {
+ // Volume path.
+ string path = 1;
+
+ // Count of partitions.
+ uint64 partition_count = 2;
+}
+
+message ListLocalPartitionsRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+
+ // Volume path.
+ string path = 2;
+
+ // ID of the node to get partitions for.
+ // 0 means the node the request was send to.
+ uint64 node_id = 3;
+}
+
+message ListLocalPartitionsResponse {
+ // Operation contains the result of the request. Check the ydb_operation.proto.
+ Ydb.Operations.Operation operation = 1;
+}
+
+message ListLocalPartitionsResult {
+ // Volume path.
+ string path = 1;
+
+ // ID of the node.
+ uint64 node_id = 2;
+
+ // List of the partitions of the volume on the node.
+ repeated uint64 partition_ids = 3;
+}
diff --git a/ydb/services/CMakeLists.txt b/ydb/services/CMakeLists.txt
index 03be2fc52a..ec13d419c4 100644
--- a/ydb/services/CMakeLists.txt
+++ b/ydb/services/CMakeLists.txt
@@ -14,6 +14,7 @@ add_subdirectory(discovery)
add_subdirectory(dynamic_config)
add_subdirectory(fq)
add_subdirectory(kesus)
+add_subdirectory(keyvalue)
add_subdirectory(lib)
add_subdirectory(local_discovery)
add_subdirectory(metadata)
diff --git a/ydb/services/datastreams/datastreams_ut.cpp b/ydb/services/datastreams/datastreams_ut.cpp
index 0addc976c4..ad3645ef8c 100644
--- a/ydb/services/datastreams/datastreams_ut.cpp
+++ b/ydb/services/datastreams/datastreams_ut.cpp
@@ -1374,7 +1374,7 @@ Y_UNIT_TEST_SUITE(DataStreams) {
UNIT_ASSERT_VALUES_EQUAL(item.GetData(), item.GetPartitionKey());
auto hashKey = item.GetExplicitHash().empty() ? HexBytesToDecimal(MD5::Calc(item.GetPartitionKey())) : BytesToDecimal(item.GetExplicitHash());
UNIT_ASSERT_VALUES_EQUAL(NKikimr::NDataStreams::V1::ShardFromDecimal(hashKey, 5), item.GetPartitionStream()->GetPartitionId());
- UNIT_ASSERT(!item.GetIp().empty());
+ UNIT_ASSERT(item.GetIp().empty());
if (item.GetData() == dataStr) {
UNIT_ASSERT_VALUES_EQUAL(item.GetExplicitHash(), dataStr);
}
diff --git a/ydb/services/datastreams/put_records_actor.h b/ydb/services/datastreams/put_records_actor.h
index efd1d716b4..20886af766 100644
--- a/ydb/services/datastreams/put_records_actor.h
+++ b/ydb/services/datastreams/put_records_actor.h
@@ -28,7 +28,9 @@ namespace NKikimr::NDataStreams::V1 {
TString GetSerializedData(const TPutRecordsItem& item) {
NKikimrPQClient::TDataChunk proto;
- proto.SetIp(item.Ip);
+ //TODO: get ip from client, not grpc;
+ // proto.SetIp(item.Ip);
+
proto.SetCodec(0); // NPersQueue::CODEC_RAW
proto.SetData(item.Data);
@@ -516,10 +518,11 @@ namespace NKikimr::NDataStreams::V1 {
if (putRecordsResult.records(0).error_code() == "ProvisionedThroughputExceededException"
|| putRecordsResult.records(0).error_code() == "ThrottlingException")
{
- return ReplyWithResult(Ydb::StatusIds::OVERLOADED, ctx);
+ return ReplyWithError(Ydb::StatusIds::OVERLOADED, Ydb::PersQueue::ErrorCode::OVERLOAD, putRecordsResult.records(0).error_message(), ctx);
}
//TODO: other codes - access denied and so on
- return ReplyWithResult(Ydb::StatusIds::INTERNAL_ERROR, ctx);
+ return ReplyWithError(Ydb::StatusIds::INTERNAL_ERROR, Ydb::PersQueue::ErrorCode::ERROR, putRecordsResult.records(0).error_message(), ctx);
+
}
}
diff --git a/ydb/services/keyvalue/CMakeLists.darwin.txt b/ydb/services/keyvalue/CMakeLists.darwin.txt
new file mode 100644
index 0000000000..471d8eac78
--- /dev/null
+++ b/ydb/services/keyvalue/CMakeLists.darwin.txt
@@ -0,0 +1,24 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(ydb-services-keyvalue)
+target_link_libraries(ydb-services-keyvalue PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ api-grpc
+ cpp-grpc-server
+ ydb-core-grpc_services
+ core-grpc_services-base
+ core-kesus-tablet
+ ydb-core-keyvalue
+)
+target_sources(ydb-services-keyvalue PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service.cpp
+)
diff --git a/ydb/services/keyvalue/CMakeLists.linux-aarch64.txt b/ydb/services/keyvalue/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..a31f491ba7
--- /dev/null
+++ b/ydb/services/keyvalue/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,25 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(ydb-services-keyvalue)
+target_link_libraries(ydb-services-keyvalue PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ api-grpc
+ cpp-grpc-server
+ ydb-core-grpc_services
+ core-grpc_services-base
+ core-kesus-tablet
+ ydb-core-keyvalue
+)
+target_sources(ydb-services-keyvalue PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service.cpp
+)
diff --git a/ydb/services/keyvalue/CMakeLists.linux.txt b/ydb/services/keyvalue/CMakeLists.linux.txt
new file mode 100644
index 0000000000..a31f491ba7
--- /dev/null
+++ b/ydb/services/keyvalue/CMakeLists.linux.txt
@@ -0,0 +1,25 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(ut)
+
+add_library(ydb-services-keyvalue)
+target_link_libraries(ydb-services-keyvalue PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ api-grpc
+ cpp-grpc-server
+ ydb-core-grpc_services
+ core-grpc_services-base
+ core-kesus-tablet
+ ydb-core-keyvalue
+)
+target_sources(ydb-services-keyvalue PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service.cpp
+)
diff --git a/ydb/services/keyvalue/CMakeLists.txt b/ydb/services/keyvalue/CMakeLists.txt
new file mode 100644
index 0000000000..3e0811fb22
--- /dev/null
+++ b/ydb/services/keyvalue/CMakeLists.txt
@@ -0,0 +1,15 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (APPLE)
+ include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux.txt)
+endif()
diff --git a/ydb/services/keyvalue/grpc_service.cpp b/ydb/services/keyvalue/grpc_service.cpp
new file mode 100644
index 0000000000..8e3b663027
--- /dev/null
+++ b/ydb/services/keyvalue/grpc_service.cpp
@@ -0,0 +1,81 @@
+#include "grpc_service.h"
+
+#include <ydb/core/grpc_services/grpc_helper.h>
+#include <ydb/core/grpc_services/base/base.h>
+#include <ydb/core/grpc_services/service_keyvalue.h>
+
+
+namespace NKikimr::NGRpcService {
+
+TKeyValueGRpcService::TKeyValueGRpcService(NActors::TActorSystem* actorSystem, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, NActors::TActorId grpcRequestProxyId)
+ : ActorSystem(actorSystem)
+ , Counters(std::move(counters))
+ , GRpcRequestProxyId(grpcRequestProxyId)
+{
+}
+
+TKeyValueGRpcService::~TKeyValueGRpcService() = default;
+
+void TKeyValueGRpcService::InitService(grpc::ServerCompletionQueue* cq, NGrpc::TLoggerPtr logger) {
+ CQ = cq;
+ SetupIncomingRequests(std::move(logger));
+}
+
+void TKeyValueGRpcService::SetGlobalLimiterHandle(NGrpc::TGlobalLimiter* limiter) {
+ Limiter = limiter;
+}
+
+bool TKeyValueGRpcService::IncRequest() {
+ return Limiter->Inc();
+}
+
+void TKeyValueGRpcService::DecRequest() {
+ Limiter->Dec();
+}
+
+void TKeyValueGRpcService::SetupIncomingRequests(NGrpc::TLoggerPtr logger) {
+ auto getCounterBlock = NGRpcService::CreateCounterCb(Counters, ActorSystem);
+
+#ifdef SETUP_METHOD
+#error SETUP_METHOD macro collision
+#endif
+
+#define SETUP_METHOD(methodName, method, rlMode) \
+ MakeIntrusive<NGRpcService::TGRpcRequest< \
+ Ydb::KeyValue::Y_CAT(methodName, Request), \
+ Ydb::KeyValue::Y_CAT(methodName, Response), \
+ TKeyValueGRpcService>> \
+ ( \
+ this, \
+ &Service_, \
+ CQ, \
+ [this](NGrpc::IRequestContextBase* reqCtx) { \
+ NGRpcService::ReportGrpcReqToMon(*ActorSystem, reqCtx->GetPeer()); \
+ ActorSystem->Send(GRpcRequestProxyId, new TGrpcRequestOperationCall< \
+ Ydb::KeyValue::Y_CAT(methodName, Request), \
+ Ydb::KeyValue::Y_CAT(methodName, Response)>(reqCtx, &method, \
+ TRequestAuxSettings{rlMode, nullptr})); \
+ }, \
+ &Ydb::KeyValue::V1::KeyValueService::AsyncService::Y_CAT(Request, methodName), \
+ "KeyValue/" Y_STRINGIZE(methodName), \
+ logger, \
+ getCounterBlock("keyvalue", Y_STRINGIZE(methodName)) \
+ )->Run()
+
+ SETUP_METHOD(CreateVolume, DoCreateVolumeKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(DropVolume, DoDropVolumeKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(AlterVolume, DoAlterVolumeKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(DescribeVolume, DoDescribeVolumeKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(ListLocalPartitions, DoListLocalPartitionsKeyValue, TRateLimiterMode::Rps);
+
+ SETUP_METHOD(AcquireLock, DoAcquireLockKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(ExecuteTransaction, DoExecuteTransactionKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(Read, DoReadKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(ReadRange, DoReadRangeKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(ListRange, DoListRangeKeyValue, TRateLimiterMode::Rps);
+ SETUP_METHOD(GetStorageChannelStatus, DoGetStorageChannelStatusKeyValue, TRateLimiterMode::Rps);
+
+#undef SETUP_METHOD
+}
+
+} // namespace NKikimr::NGRpcService
diff --git a/ydb/services/keyvalue/grpc_service.h b/ydb/services/keyvalue/grpc_service.h
new file mode 100644
index 0000000000..18161acad1
--- /dev/null
+++ b/ydb/services/keyvalue/grpc_service.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <ydb/public/api/grpc/ydb_keyvalue_v1.grpc.pb.h>
+
+#include <library/cpp/grpc/server/grpc_server.h>
+#include <library/cpp/actors/core/actorsystem.h>
+
+
+namespace NKikimr::NGRpcService {
+
+class TKeyValueGRpcService
+ : public NGrpc::TGrpcServiceBase<Ydb::KeyValue::V1::KeyValueService>
+{
+public:
+ TKeyValueGRpcService(NActors::TActorSystem* actorSystem, TIntrusivePtr<NMonitoring::TDynamicCounters> counters,
+ NActors::TActorId grpcRequestProxyId);
+ ~TKeyValueGRpcService();
+
+ void InitService(grpc::ServerCompletionQueue* cq, NGrpc::TLoggerPtr logger) override;
+ void SetGlobalLimiterHandle(NGrpc::TGlobalLimiter* limiter) override;
+
+ bool IncRequest();
+ void DecRequest();
+
+private:
+ void SetupIncomingRequests(NGrpc::TLoggerPtr logger);
+
+private:
+ NActors::TActorSystem* ActorSystem = nullptr;
+ TIntrusivePtr<NMonitoring::TDynamicCounters> Counters;
+ NActors::TActorId GRpcRequestProxyId;
+
+ grpc::ServerCompletionQueue* CQ = nullptr;
+ NGrpc::TGlobalLimiter* Limiter = nullptr;
+};
+
+} // namespace NKikimr::NGRpcService
diff --git a/ydb/services/keyvalue/grpc_service_ut.cpp b/ydb/services/keyvalue/grpc_service_ut.cpp
new file mode 100644
index 0000000000..131c3ac363
--- /dev/null
+++ b/ydb/services/keyvalue/grpc_service_ut.cpp
@@ -0,0 +1,838 @@
+#include "grpc_service.h"
+
+#include <ydb/core/keyvalue/keyvalue.h>
+#include <ydb/core/keyvalue/keyvalue_events.h>
+#include <ydb/core/protos/config.pb.h>
+#include <ydb/core/testlib/basics/appdata.h>
+#include <ydb/core/testlib/test_client.h>
+#include <ydb/core/tx/scheme_cache/scheme_cache.h>
+
+#include <ydb/public/api/grpc/ydb_scheme_v1.grpc.pb.h>
+
+#include <ydb/public/sdk/cpp/client/resources/ydb_resources.h>
+
+#include <library/cpp/grpc/client/grpc_client_low.h>
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/testing/unittest/tests_data.h>
+#include <library/cpp/logger/backend.h>
+
+#include <grpc++/client_context.h>
+#include <grpc++/create_channel.h>
+
+#include <util/string/builder.h>
+
+
+TString PrintIssue(const ::google::protobuf::RepeatedPtrField< ::Ydb::Issue::IssueMessage> &issues) {
+ TStringBuilder msg;
+ msg << '{';
+ for (auto &issue : issues) {
+ msg << " issue# " << issue.message();
+ }
+ msg << " }";
+ return msg;
+}
+
+
+#define UNIT_ASSERT_CHECK_STATUS(got, exp) \
+ UNIT_ASSERT_C(got.status() == exp, "exp# " << Ydb::StatusIds::StatusCode_Name(exp) \
+ << " got# " << Ydb::StatusIds::StatusCode_Name(got.status()) << " issues# " << PrintIssue(got.issues())) \
+// UNIT_ASSERT_CHECK_STATUS
+
+
+namespace NKikimr::NGRpcService {
+
+
+struct TKikimrTestSettings {
+ static constexpr bool SSL = false;
+ static constexpr bool AUTH = false;
+ static constexpr bool PrecreatePools = true;
+ static constexpr bool EnableSystemViews = true;
+};
+
+struct TKikimrTestWithAuth : TKikimrTestSettings {
+ static constexpr bool AUTH = true;
+};
+
+struct TKikimrTestWithAuthAndSsl : TKikimrTestWithAuth {
+ static constexpr bool SSL = true;
+};
+
+struct TKikimrTestNoSystemViews : TKikimrTestSettings {
+ static constexpr bool EnableSystemViews = false;
+};
+
+template <typename TestSettings = TKikimrTestSettings>
+class TBasicKikimrWithGrpcAndRootSchema {
+public:
+ TBasicKikimrWithGrpcAndRootSchema(
+ NKikimrConfig::TAppConfig appConfig = {},
+ TAutoPtr<TLogBackend> logBackend = {})
+ {
+ ui16 port = PortManager.GetPort(2134);
+ ui16 grpc = PortManager.GetPort(2135);
+ ServerSettings = new Tests::TServerSettings(port);
+ ServerSettings->SetGrpcPort(grpc);
+ ServerSettings->SetLogBackend(logBackend);
+ ServerSettings->SetDomainName("Root");
+ ServerSettings->SetDynamicNodeCount(1);
+ if (TestSettings::PrecreatePools) {
+ ServerSettings->AddStoragePool("ssd");
+ ServerSettings->AddStoragePool("hdd");
+ ServerSettings->AddStoragePool("hdd1");
+ ServerSettings->AddStoragePool("hdd2");
+ } else {
+ ServerSettings->AddStoragePoolType("ssd");
+ ServerSettings->AddStoragePoolType("hdd");
+ ServerSettings->AddStoragePoolType("hdd1");
+ ServerSettings->AddStoragePoolType("hdd2");
+ }
+ ServerSettings->Formats = new TFormatFactory;
+ ServerSettings->FeatureFlags = appConfig.GetFeatureFlags();
+ ServerSettings->RegisterGrpcService<NKikimr::NGRpcService::TKeyValueGRpcService>("keyvalue");
+
+ Server_.Reset(new Tests::TServer(*ServerSettings));
+ Tenants_.Reset(new Tests::TTenants(Server_));
+
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_PROXY_SCHEME_CACHE, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::SCHEME_BOARD_REPLICA, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::SCHEME_BOARD_SUBSCRIBER, NActors::NLog::PRI_TRACE);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::SCHEME_BOARD_POPULATOR, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_PROXY, NActors::NLog::PRI_DEBUG);
+ Server_->GetRuntime()->SetLogPriority(NKikimrServices::KEYVALUE, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::BOOTSTRAPPER, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::STATESTORAGE, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_EXECUTOR, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::SAUSAGE_BIO, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_FLATBOOT, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_OPS_HOST, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_SAUSAGECACHE, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_OLAPSHARD, NActors::NLog::PRI_DEBUG);
+ //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG);
+
+ NGrpc::TServerOptions grpcOption;
+ if (TestSettings::AUTH) {
+ grpcOption.SetUseAuth(true);
+ }
+ grpcOption.SetPort(grpc);
+ Server_->EnableGRpc(grpcOption);
+
+ Tests::TClient annoyingClient(*ServerSettings);
+ if (ServerSettings->AppConfig.GetDomainsConfig().GetSecurityConfig().GetEnforceUserTokenRequirement()) {
+ annoyingClient.SetSecurityToken("root@builtin");
+ }
+ annoyingClient.InitRootScheme("Root");
+ GRpcPort_ = grpc;
+ }
+
+ ui16 GetPort() {
+ return GRpcPort_;
+ }
+
+ TPortManager& GetPortManager() {
+ return PortManager;
+ }
+
+ void ResetSchemeCache(TString path, ui32 nodeIndex = 0) {
+ TTestActorRuntime* runtime = Server_->GetRuntime();
+ Tests::TClient annoyingClient(*ServerSettings);
+ annoyingClient.RefreshPathCache(runtime, path, nodeIndex);
+ }
+
+ TTestActorRuntime* GetRuntime() {
+ return Server_->GetRuntime();
+ }
+
+ Tests::TServer& GetServer() {
+ return *Server_;
+ }
+
+ Tests::TServerSettings::TPtr ServerSettings;
+ Tests::TServer::TPtr Server_;
+ THolder<Tests::TTenants> Tenants_;
+private:
+ TPortManager PortManager;
+ ui16 GRpcPort_;
+};
+
+using TKikimrWithGrpcAndRootSchema = TBasicKikimrWithGrpcAndRootSchema<TKikimrTestSettings>;
+
+Y_UNIT_TEST_SUITE(KeyValueGRPCService) {
+
+ void InitTablet(TKikimrWithGrpcAndRootSchema &server, ui64 tabletId) {
+ server.GetRuntime()->SetScheduledLimit(100);
+ CreateTestBootstrapper(*server.GetRuntime(),
+ CreateTestTabletInfo(tabletId, TTabletTypes::KeyValue),
+ &CreateKeyValueFlat);
+ NanoSleep(3'000'000'000);
+ }
+
+ void CmdWrite(ui64 tabletId, const TDeque<TString> &keys, const TDeque<TString> &values, TKikimrWithGrpcAndRootSchema &server)
+ {
+ Y_VERIFY(keys.size() == values.size());
+ TAutoPtr<IEventHandle> handle;
+ TEvKeyValue::TEvResponse *result;
+ THolder<TEvKeyValue::TEvRequest> request;
+ TActorId edgeActor = server.GetRuntime()->AllocateEdgeActor();
+ for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) {
+ try {
+ server.GetRuntime()->ResetScheduledCount();
+ request.Reset(new TEvKeyValue::TEvRequest);
+ for (ui64 idx = 0; idx < keys.size(); ++idx) {
+ auto write = request->Record.AddCmdWrite();
+ write->SetKey(keys[idx]);
+ write->SetValue(values[idx]);
+ write->SetStorageChannel(NKikimrClient::TKeyValueRequest::MAIN);
+ write->SetPriority(NKikimrClient::TKeyValueRequest::REALTIME);
+ }
+ server.GetRuntime()->SendToPipe(tabletId, edgeActor, request.Release(), 0, GetPipeConfigWithRetries());
+ result = server.GetRuntime()->GrabEdgeEvent<TEvKeyValue::TEvResponse>(handle);
+ UNIT_ASSERT(result);
+ UNIT_ASSERT(result->Record.HasStatus());
+ UNIT_ASSERT_EQUAL(result->Record.GetStatus(), NMsgBusProxy::MSTATUS_OK);
+ UNIT_ASSERT_VALUES_EQUAL(result->Record.WriteResultSize(), values.size());
+ for (ui64 idx = 0; idx < values.size(); ++idx) {
+ const auto &writeResult = result->Record.GetWriteResult(idx);
+ UNIT_ASSERT(writeResult.HasStatus());
+ UNIT_ASSERT_EQUAL(writeResult.GetStatus(), NKikimrProto::OK);
+ UNIT_ASSERT(writeResult.HasStatusFlags());
+ if (values[idx].size()) {
+ UNIT_ASSERT(writeResult.GetStatusFlags() & ui32(NKikimrBlobStorage::StatusIsValid));
+ }
+ }
+ retriesLeft = 0;
+ } catch (NActors::TSchedulingLimitReachedException) {
+ UNIT_ASSERT(retriesLeft == 2);
+ }
+ }
+ }
+
+ template <typename TCtx>
+ void AdjustCtxForDB(TCtx &ctx) {
+ ctx.AddMetadata(NYdb::YDB_AUTH_TICKET_HEADER, "root@builtin");
+ }
+
+ void MakeDirectory(auto &channel, const TString &path) {
+ std::unique_ptr<Ydb::Scheme::V1::SchemeService::Stub> stub;
+ stub = Ydb::Scheme::V1::SchemeService::NewStub(channel);
+
+ Ydb::Scheme::MakeDirectoryRequest makeDirectoryRequest;
+ makeDirectoryRequest.set_path(path);
+ Ydb::Scheme::MakeDirectoryResponse makeDirectoryResponse;
+ grpc::ClientContext makeDirectoryCtx;
+ AdjustCtxForDB(makeDirectoryCtx);
+ stub->MakeDirectory(&makeDirectoryCtx, makeDirectoryRequest, &makeDirectoryResponse);
+ UNIT_ASSERT_CHECK_STATUS(makeDirectoryResponse.operation(), Ydb::StatusIds::SUCCESS);
+ }
+
+ void MakeTable(auto &channel, const TString &path) {
+ std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub;
+ stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel);
+
+ Ydb::KeyValue::CreateVolumeRequest createVolumeRequest;
+ createVolumeRequest.set_path(path);
+ createVolumeRequest.set_partition_count(1);
+ auto *storage_config = createVolumeRequest.mutable_storage_config();
+ storage_config->add_channel()->set_media("ssd");
+ storage_config->add_channel()->set_media("ssd");
+ storage_config->add_channel()->set_media("ssd");
+
+ Ydb::KeyValue::CreateVolumeResponse createVolumeResponse;
+ Ydb::KeyValue::CreateVolumeResult createVolumeResult;
+
+ grpc::ClientContext createVolumeCtx;
+ AdjustCtxForDB(createVolumeCtx);
+ stub->CreateVolume(&createVolumeCtx, createVolumeRequest, &createVolumeResponse);
+ UNIT_ASSERT_CHECK_STATUS(createVolumeResponse.operation(), Ydb::StatusIds::SUCCESS);
+ createVolumeResponse.operation().result().UnpackTo(&createVolumeResult);
+ }
+
+ void AlterVolume(auto &channel, const TString &path, ui32 partition_count = 1) {
+ std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub;
+ stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel);
+
+ Ydb::KeyValue::AlterVolumeRequest alterVolumeRequest;
+ alterVolumeRequest.set_path(path);
+ alterVolumeRequest.set_alter_partition_count(partition_count);
+
+ Ydb::KeyValue::AlterVolumeResponse alterVolumeResponse;
+ Ydb::KeyValue::AlterVolumeResult alterVolumeResult;
+
+ grpc::ClientContext alterVolumeCtx;
+ AdjustCtxForDB(alterVolumeCtx);
+ stub->AlterVolume(&alterVolumeCtx, alterVolumeRequest, &alterVolumeResponse);
+ UNIT_ASSERT_CHECK_STATUS(alterVolumeResponse.operation(), Ydb::StatusIds::SUCCESS);
+ alterVolumeResponse.operation().result().UnpackTo(&alterVolumeResult);
+ }
+
+ void DropVolume(auto &channel, const TString &path) {
+ std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub;
+ stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel);
+
+ Ydb::KeyValue::DropVolumeRequest dropVolumeRequest;
+ dropVolumeRequest.set_path(path);
+
+ Ydb::KeyValue::DropVolumeResponse dropVolumeResponse;
+ Ydb::KeyValue::DropVolumeResult dropVolumeResult;
+
+ grpc::ClientContext dropVolumeCtx;
+ AdjustCtxForDB(dropVolumeCtx);
+ stub->DropVolume(&dropVolumeCtx, dropVolumeRequest, &dropVolumeResponse);
+ UNIT_ASSERT_CHECK_STATUS(dropVolumeResponse.operation(), Ydb::StatusIds::SUCCESS);
+ dropVolumeResponse.operation().result().UnpackTo(&dropVolumeResult);
+ }
+
+ Ydb::KeyValue::DescribeVolumeResult DescribeVolume(auto &channel, const TString &path) {
+ std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub;
+ stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel);
+
+ Ydb::KeyValue::DescribeVolumeRequest describeVolumeRequest;
+ describeVolumeRequest.set_path(path);
+
+ Ydb::KeyValue::DescribeVolumeResponse describeVolumeResponse;
+ Ydb::KeyValue::DescribeVolumeResult describeVolumeResult;
+
+ grpc::ClientContext describeVolumeCtx;
+ AdjustCtxForDB(describeVolumeCtx);
+ stub->DescribeVolume(&describeVolumeCtx, describeVolumeRequest, &describeVolumeResponse);
+ UNIT_ASSERT_CHECK_STATUS(describeVolumeResponse.operation(), Ydb::StatusIds::SUCCESS);
+ describeVolumeResponse.operation().result().UnpackTo(&describeVolumeResult);
+ return describeVolumeResult;
+ }
+
+
+ Ydb::Scheme::ListDirectoryResult ListDirectory(auto &channel, const TString &path) {
+ std::unique_ptr<Ydb::Scheme::V1::SchemeService::Stub> stub;
+ stub = Ydb::Scheme::V1::SchemeService::NewStub(channel);
+ Ydb::Scheme::ListDirectoryRequest listDirectoryRequest;
+ listDirectoryRequest.set_path(path);
+
+ Ydb::Scheme::ListDirectoryResult listDirectoryResult;
+ Ydb::Scheme::ListDirectoryResponse listDirectoryResponse;
+
+ grpc::ClientContext listDirectoryCtx;
+ AdjustCtxForDB(listDirectoryCtx);
+ stub->ListDirectory(&listDirectoryCtx, listDirectoryRequest, &listDirectoryResponse);
+
+ UNIT_ASSERT_CHECK_STATUS(listDirectoryResponse.operation(), Ydb::StatusIds::SUCCESS);
+ listDirectoryResponse.operation().result().UnpackTo(&listDirectoryResult);
+ return listDirectoryResult;
+ }
+
+ ui64 AcquireLock( const TString &path, ui64 partitionId, auto &stub) {
+ Ydb::KeyValue::AcquireLockRequest request;
+ request.set_path(path);
+ request.set_partition_id(partitionId);
+
+ Ydb::KeyValue::AcquireLockResponse response;
+ Ydb::KeyValue::AcquireLockResult result;
+
+ grpc::ClientContext ctx;
+ AdjustCtxForDB(ctx);
+ stub->AcquireLock(&ctx, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS);
+ response.operation().result().UnpackTo(&result);
+ return result.lock_generation();
+ }
+
+ void WaitTableCreation(TKikimrWithGrpcAndRootSchema &server, const TString &path) {
+ bool again = true;
+ for (ui32 i = 0; i < 10 && again; ++i) {
+ Cerr << "Wait iteration# " << i << Endl;
+ auto req = MakeHolder<NSchemeCache::TSchemeCacheNavigate>();
+ auto& entry = req->ResultSet.emplace_back();
+ entry.Path = SplitPath(path);
+ entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByPath;
+ entry.ShowPrivatePath = true;
+ entry.SyncVersion = false;
+ req->UserToken = new NACLib::TUserToken("root@builtin", {});
+ UNIT_ASSERT(req->UserToken);
+ TActorId edgeActor = server.GetRuntime()->AllocateEdgeActor();
+ auto ev = new TEvTxProxySchemeCache::TEvNavigateKeySet(req.Release());
+ UNIT_ASSERT(ev->Request->UserToken);
+ auto schemeCache = MakeSchemeCacheID();
+ server.GetRuntime()->Send(new IEventHandle(schemeCache, edgeActor, ev));
+
+ TAutoPtr<IEventHandle> handle;
+ auto *result = server.GetRuntime()->GrabEdgeEvent<TEvTxProxySchemeCache::TEvNavigateKeySetResult>(handle);
+ UNIT_ASSERT_VALUES_EQUAL(result->Request->ResultSet.size(), 1);
+ again = result->Request->ResultSet[0].Status != NSchemeCache::TSchemeCacheNavigate::EStatus::Ok;
+ }
+ }
+
+ void MakeSimpleTest(const TString &tablePath,
+ std::function<void(const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub>&)> func)
+ {
+ TKikimrWithGrpcAndRootSchema server;
+ ui16 grpc = server.GetPort();
+ TString location = TStringBuilder() << "localhost:" << grpc;
+
+ ////////////////////////////////////////////////////////////////////////
+
+ std::shared_ptr<grpc::Channel> channel;
+ std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub;
+ channel = grpc::CreateChannel("localhost:" + ToString(grpc), grpc::InsecureChannelCredentials());
+ MakeDirectory(channel, "/Root/mydb");
+ MakeTable(channel, tablePath);
+ auto pr = SplitPath(tablePath);
+ Ydb::Scheme::ListDirectoryResult listDirectoryResult = ListDirectory(channel, "/Root/mydb");
+ UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.self().name(), "mydb");
+ UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.children(0).name(), pr.back());
+
+ WaitTableCreation(server, tablePath);
+ stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel);
+ func(stub);
+ }
+
+ Y_UNIT_TEST(SimpleAcquireLock) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Ydb::KeyValue::AcquireLockRequest request;
+ request.set_path(tablePath);
+ request.set_partition_id(0);
+ Ydb::KeyValue::AcquireLockResponse response;
+ Ydb::KeyValue::AcquireLockResult result;
+
+ grpc::ClientContext ctx1;
+ AdjustCtxForDB(ctx1);
+ stub->AcquireLock(&ctx1, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS);
+ response.operation().result().UnpackTo(&result);
+ UNIT_ASSERT(result.lock_generation() == 1);
+
+ grpc::ClientContext ctx2;
+ AdjustCtxForDB(ctx2);
+ stub->AcquireLock(&ctx2, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS);
+ response.operation().result().UnpackTo(&result);
+ UNIT_ASSERT(result.lock_generation() == 2);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleExecuteTransaction) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Ydb::KeyValue::ExecuteTransactionRequest request;
+ request.set_path(tablePath);
+ request.set_partition_id(0);
+ Ydb::KeyValue::ExecuteTransactionResponse response;
+
+ grpc::ClientContext ctx;
+ AdjustCtxForDB(ctx);
+ stub->ExecuteTransaction(&ctx, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleExecuteTransactionWithWrongGeneration) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Ydb::KeyValue::ExecuteTransactionRequest request;
+ request.set_path(tablePath);
+ request.set_partition_id(0);
+ request.set_lock_generation(42);
+ Ydb::KeyValue::ExecuteTransactionResponse response;
+
+ grpc::ClientContext ctx;
+ AdjustCtxForDB(ctx);
+ stub->ExecuteTransaction(&ctx, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::PRECONDITION_FAILED);
+ });
+ }
+
+ Ydb::KeyValue::ExecuteTransactionResult Write(const TString &path, ui64 partitionId, const TString &key, const TString &value, ui64 storageChannel,
+ const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub)
+ {
+ Ydb::KeyValue::ExecuteTransactionRequest writeRequest;
+ writeRequest.set_path(path);
+ writeRequest.set_partition_id(partitionId);
+ auto *cmd = writeRequest.add_commands();
+ auto *write = cmd->mutable_write();
+ write->set_key(key);
+ write->set_value(value);
+ write->set_storage_channel(storageChannel);
+ Ydb::KeyValue::ExecuteTransactionResponse writeResponse;
+
+ grpc::ClientContext writeCtx;
+ AdjustCtxForDB(writeCtx);
+ stub->ExecuteTransaction(&writeCtx, writeRequest, &writeResponse);
+ UNIT_ASSERT_CHECK_STATUS(writeResponse.operation(), Ydb::StatusIds::SUCCESS);
+ Ydb::KeyValue::ExecuteTransactionResult writeResult;
+ writeResponse.operation().result().UnpackTo(&writeResult);
+ return writeResult;
+ }
+
+ void Rename(const TString &path, ui64 partitionId, const TString &oldKey, const TString &newKey,
+ const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub)
+ {
+ Ydb::KeyValue::ExecuteTransactionRequest request;
+ request.set_path(path);
+ request.set_partition_id(partitionId);
+ auto *cmd = request.add_commands();
+ auto *rename = cmd->mutable_rename();
+ rename->set_old_key(oldKey);
+ rename->set_new_key(newKey);
+ Ydb::KeyValue::ExecuteTransactionResponse response;
+
+ grpc::ClientContext ctx;
+ AdjustCtxForDB(ctx);
+ stub->ExecuteTransaction(&ctx, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS);
+ }
+
+
+ Y_UNIT_TEST(SimpleRenameUnexistedKey) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Ydb::KeyValue::ExecuteTransactionRequest request;
+ request.set_path(tablePath);
+ request.set_partition_id(0);
+ auto *cmd = request.add_commands();
+ auto *rename = cmd->mutable_rename();
+ rename->set_old_key("key1");
+ rename->set_new_key("key2");
+ Ydb::KeyValue::ExecuteTransactionResponse response;
+
+ grpc::ClientContext ctx;
+ AdjustCtxForDB(ctx);
+ stub->ExecuteTransaction(&ctx, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::NOT_FOUND);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleConcatUnexistedKey) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Ydb::KeyValue::ExecuteTransactionRequest request;
+ request.set_path(tablePath);
+ request.set_partition_id(0);
+ auto *cmd = request.add_commands();
+ auto *concat = cmd->mutable_concat();
+ concat->add_input_keys("key1");
+ concat->add_input_keys("key2");
+ concat->set_output_key("key3");
+ Ydb::KeyValue::ExecuteTransactionResponse response;
+
+ grpc::ClientContext ctx;
+ AdjustCtxForDB(ctx);
+ stub->ExecuteTransaction(&ctx, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::NOT_FOUND);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleCopyUnexistedKey) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Ydb::KeyValue::ExecuteTransactionRequest request;
+ request.set_path(tablePath);
+ request.set_partition_id(0);
+ auto *cmd = request.add_commands();
+ auto *rename = cmd->mutable_copy_range();
+ auto *range = rename->mutable_range();
+ range->set_from_key_inclusive("key1");
+ range->set_to_key_inclusive("key2");
+ rename->set_prefix_to_add("A");
+ Ydb::KeyValue::ExecuteTransactionResponse response;
+
+ grpc::ClientContext ctx;
+ AdjustCtxForDB(ctx);
+ stub->ExecuteTransaction(&ctx, request, &response);
+ UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleWriteRead) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Write(tablePath, 0, "key", "value", 0, stub);
+
+ Ydb::KeyValue::ReadRequest readRequest;
+ readRequest.set_path(tablePath);
+ readRequest.set_partition_id(0);
+ readRequest.set_key("key");
+ Ydb::KeyValue::ReadResponse readResponse;
+ Ydb::KeyValue::ReadResult readResult;
+
+ grpc::ClientContext readCtx;
+ AdjustCtxForDB(readCtx);
+ stub->Read(&readCtx, readRequest, &readResponse);
+ UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SUCCESS);
+ readResponse.operation().result().UnpackTo(&readResult);
+ UNIT_ASSERT(!readResult.is_overrun());
+ UNIT_ASSERT_VALUES_EQUAL(readResult.requested_key(), "key");
+ UNIT_ASSERT_VALUES_EQUAL(readResult.value(), "value");
+ UNIT_ASSERT_VALUES_EQUAL(readResult.requested_offset(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(readResult.requested_size(), 5);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleWriteReadWithIncorreectPath) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Write(tablePath, 0, "key", "value", 0, stub);
+
+ Ydb::KeyValue::ReadRequest readRequest;
+ readRequest.set_path("/Root/mydb/table");
+ readRequest.set_partition_id(0);
+ readRequest.set_key("key");
+ Ydb::KeyValue::ReadResponse readResponse;
+ Ydb::KeyValue::ReadResult readResult;
+
+ grpc::ClientContext readCtx;
+ AdjustCtxForDB(readCtx);
+ stub->Read(&readCtx, readRequest, &readResponse);
+ UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SCHEME_ERROR);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleWriteReadWithoutToken) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ return;
+ Write(tablePath, 0, "key", "value", 0, stub);
+
+ Ydb::KeyValue::ReadRequest readRequest;
+ readRequest.set_path("/Root/mydb/kvtable");
+ readRequest.set_partition_id(0);
+ readRequest.set_key("key");
+ Ydb::KeyValue::ReadResponse readResponse;
+ Ydb::KeyValue::ReadResult readResult;
+
+ grpc::ClientContext readCtx;
+ //AdjustCtxForDB(readCtx);
+ stub->Read(&readCtx, readRequest, &readResponse);
+ UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SCHEME_ERROR);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleWriteReadWithoutLockGeneration1) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ AcquireLock(tablePath, 0, stub);
+ Write(tablePath, 0, "key", "value", 0, stub);
+ Ydb::KeyValue::ReadRequest readRequest;
+ readRequest.set_path(tablePath);
+ readRequest.set_partition_id(0);
+ readRequest.set_key("key");
+ Ydb::KeyValue::ReadResponse readResponse;
+ Ydb::KeyValue::ReadResult readResult;
+
+ grpc::ClientContext readCtx;
+ AdjustCtxForDB(readCtx);
+ stub->Read(&readCtx, readRequest, &readResponse);
+ UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SUCCESS);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleWriteReadWithoutLockGeneration2) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Write(tablePath, 0, "key", "value", 0, stub);
+ AcquireLock(tablePath, 0, stub);
+ Ydb::KeyValue::ReadRequest readRequest;
+ readRequest.set_path(tablePath);
+ readRequest.set_partition_id(0);
+ readRequest.set_key("key");
+ Ydb::KeyValue::ReadResponse readResponse;
+ Ydb::KeyValue::ReadResult readResult;
+
+ grpc::ClientContext readCtx;
+ AdjustCtxForDB(readCtx);
+ stub->Read(&readCtx, readRequest, &readResponse);
+ UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SUCCESS);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleWriteReadOverrun) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Write(tablePath, 0, "key", "value", 0, stub);
+
+ Ydb::KeyValue::ReadRequest readRequest;
+ readRequest.set_path(tablePath);
+ readRequest.set_partition_id(0);
+ readRequest.set_key("key");
+ ui64 limitBytes = 1 + 5 + 3 // Key id, length
+ + 1 + 5 + 1 // Value id, length, value
+ + 1 + 8 // Offset id, value
+ + 1 + 8 // Size id, value
+ + 1 + 1 // Status id, value
+ ;
+ readRequest.set_limit_bytes(limitBytes);
+ Ydb::KeyValue::ReadResponse readResponse;
+ Ydb::KeyValue::ReadResult readResult;
+
+ grpc::ClientContext readCtx;
+ AdjustCtxForDB(readCtx);
+ stub->Read(&readCtx, readRequest, &readResponse);
+ UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SUCCESS);
+ readResponse.operation().result().UnpackTo(&readResult);
+ UNIT_ASSERT(readResult.is_overrun());
+ UNIT_ASSERT_VALUES_EQUAL(readResult.requested_key(), "key");
+ UNIT_ASSERT_VALUES_EQUAL(readResult.value(), "v");
+ UNIT_ASSERT_VALUES_EQUAL(readResult.requested_offset(), 0);
+ UNIT_ASSERT_VALUES_EQUAL(readResult.requested_size(), 5);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleWriteReadRange) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Write(tablePath, 0, "key1", "value1", 1, stub);
+ Write(tablePath, 0, "key2", "value12", 2, stub);
+
+ Ydb::KeyValue::ReadRangeRequest readRangeRequest;
+ readRangeRequest.set_path(tablePath);
+ readRangeRequest.set_partition_id(0);
+ auto *r = readRangeRequest.mutable_range();
+ r->set_from_key_inclusive("key1");
+ r->set_to_key_inclusive("key3");
+ Ydb::KeyValue::ReadRangeResponse readRangeResponse;
+ Ydb::KeyValue::ReadRangeResult readRangeResult;
+
+ grpc::ClientContext readRangeCtx;
+ AdjustCtxForDB(readRangeCtx);
+ stub->ReadRange(&readRangeCtx, readRangeRequest, &readRangeResponse);
+ UNIT_ASSERT_CHECK_STATUS(readRangeResponse.operation(), Ydb::StatusIds::SUCCESS);
+ readRangeResponse.operation().result().UnpackTo(&readRangeResult);
+
+ UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(0).key(), "key1");
+ UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(1).key(), "key2");
+
+ UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(0).value(), "value1");
+ UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(1).value(), "value12");
+
+ UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(0).storage_channel(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(1).storage_channel(), 2);
+ });
+ }
+
+
+ Y_UNIT_TEST(SimpleWriteListRange) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Write(tablePath, 0, "key1", "value1", 1, stub);
+ Write(tablePath, 0, "key2", "value12", 2, stub);
+
+ Ydb::KeyValue::ListRangeRequest listRangeRequest;
+ listRangeRequest.set_path(tablePath);
+ listRangeRequest.set_partition_id(0);
+ auto *r = listRangeRequest.mutable_range();
+ r->set_from_key_inclusive("key1");
+ r->set_to_key_inclusive("key3");
+ Ydb::KeyValue::ListRangeResponse listRangeResponse;
+ Ydb::KeyValue::ListRangeResult listRangeResult;
+
+ grpc::ClientContext listRangeCtx;
+ AdjustCtxForDB(listRangeCtx);
+ stub->ListRange(&listRangeCtx, listRangeRequest, &listRangeResponse);
+ UNIT_ASSERT_CHECK_STATUS(listRangeResponse.operation(), Ydb::StatusIds::SUCCESS);
+ listRangeResponse.operation().result().UnpackTo(&listRangeResult);
+
+ UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(0).key(), "key1");
+ UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(1).key(), "key2");
+
+ UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(0).value_size(), 6);
+ UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(1).value_size(), 7);
+
+ UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(0).storage_channel(), 1);
+ UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(1).storage_channel(), 2);
+ });
+ }
+
+
+ Y_UNIT_TEST(SimpleGetStorageChannelStatus) {
+ TString tablePath = "/Root/mydb/kvtable";
+ MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){
+ Ydb::KeyValue::GetStorageChannelStatusRequest getStatusRequest;
+ getStatusRequest.set_path(tablePath);
+ getStatusRequest.set_partition_id(0);
+ getStatusRequest.add_storage_channel(1);
+ getStatusRequest.add_storage_channel(2);
+ getStatusRequest.add_storage_channel(3);
+ Ydb::KeyValue::GetStorageChannelStatusResponse getStatusResponse;
+ Ydb::KeyValue::GetStorageChannelStatusResult getStatusResult;
+
+ grpc::ClientContext getStatusCtx;
+ AdjustCtxForDB(getStatusCtx);
+ stub->GetStorageChannelStatus(&getStatusCtx, getStatusRequest, &getStatusResponse);
+ UNIT_ASSERT_CHECK_STATUS(getStatusResponse.operation(), Ydb::StatusIds::SUCCESS);
+ getStatusResponse.operation().result().UnpackTo(&getStatusResult);
+ UNIT_ASSERT_VALUES_EQUAL(getStatusResult.storage_channel_info_size(), 3);
+ });
+ }
+
+ Y_UNIT_TEST(SimpleCreateAlterDropVolume) {
+ TKikimrWithGrpcAndRootSchema server;
+ ui16 grpc = server.GetPort();
+ TString location = TStringBuilder() << "localhost:" << grpc;
+
+ std::shared_ptr<grpc::Channel> channel;
+ channel = grpc::CreateChannel("localhost:" + ToString(grpc), grpc::InsecureChannelCredentials());
+
+ TString path = "/Root/mydb/";
+ TString tablePath = "/Root/mydb/mytable";
+ MakeDirectory(channel, path);
+ MakeTable(channel, tablePath);
+
+ Ydb::Scheme::ListDirectoryResult listDirectoryResult = ListDirectory(channel, path);
+ UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.self().name(), "mydb");
+ UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.children(0).name(), "mytable");
+
+ UNIT_ASSERT_VALUES_EQUAL(1, DescribeVolume(channel, tablePath).partition_count());
+
+ AlterVolume(channel, tablePath, 2);
+ listDirectoryResult = ListDirectory(channel, path);
+ UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.self().name(), "mydb");
+ UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.children(0).name(), "mytable");
+
+
+ UNIT_ASSERT_VALUES_EQUAL(2, DescribeVolume(channel, tablePath).partition_count());
+
+ DropVolume(channel, tablePath);
+ listDirectoryResult = ListDirectory(channel, path);
+ UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.self().name(), "mydb");
+ UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.children_size(), 0);
+ }
+
+ Y_UNIT_TEST(SimpleListPartitions) {
+ return; // delete it after adding ydb_token to requests in tests
+ TKikimrWithGrpcAndRootSchema server;
+ ui16 grpc = server.GetPort();
+ TString location = TStringBuilder() << "localhost:" << grpc;
+
+ std::shared_ptr<grpc::Channel> channel;
+ channel = grpc::CreateChannel("localhost:" + ToString(grpc), grpc::InsecureChannelCredentials());
+
+ TString path = "/Root/mydb/";
+ TString tablePath = "/Root/mydb/mytable";
+ MakeDirectory(channel, path);
+ MakeTable(channel, tablePath);
+
+ std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub;
+ stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel);
+
+ Write(tablePath, 0, "key1", "value1", 1, stub);
+
+ Ydb::KeyValue::ListLocalPartitionsRequest enumerateRequest;
+ enumerateRequest.set_path(tablePath);
+ enumerateRequest.set_node_id(2);
+
+ Ydb::KeyValue::ListLocalPartitionsResult enumerateResult;
+ Ydb::KeyValue::ListLocalPartitionsResponse eumerateResponse;
+
+ grpc::ClientContext enumerateCtx;
+ AdjustCtxForDB(enumerateCtx);
+ stub->ListLocalPartitions(&enumerateCtx, enumerateRequest, &eumerateResponse);
+
+ UNIT_ASSERT_CHECK_STATUS(eumerateResponse.operation(), Ydb::StatusIds::SUCCESS);
+ eumerateResponse.operation().result().UnpackTo(&enumerateResult);
+ UNIT_ASSERT_VALUES_EQUAL(enumerateResult.partition_ids_size(), 1);
+
+ auto writeRes = Write(tablePath, enumerateResult.partition_ids(0), "key2", "value2", 1, stub);
+ UNIT_ASSERT_VALUES_EQUAL(writeRes.node_id(), 2);
+ }
+
+} // Y_UNIT_TEST_SUITE(KeyValueGRPCService)
+
+} // NKikimr::NGRpcService
diff --git a/ydb/services/keyvalue/ut/CMakeLists.darwin.txt b/ydb/services/keyvalue/ut/CMakeLists.darwin.txt
new file mode 100644
index 0000000000..6c9a38b222
--- /dev/null
+++ b/ydb/services/keyvalue/ut/CMakeLists.darwin.txt
@@ -0,0 +1,49 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-services-keyvalue-ut)
+target_compile_options(ydb-services-keyvalue-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_include_directories(ydb-services-keyvalue-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue
+)
+target_link_libraries(ydb-services-keyvalue-ut PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ ydb-services-keyvalue
+ library-cpp-logger
+ ydb-core-protos
+ core-testlib-default
+)
+target_link_options(ydb-services-keyvalue-ut PRIVATE
+ -Wl,-no_deduplicate
+ -Wl,-sdk_version,10.15
+ -fPIC
+ -fPIC
+ -framework
+ CoreFoundation
+)
+target_sources(ydb-services-keyvalue-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service_ut.cpp
+)
+add_test(
+ NAME
+ ydb-services-keyvalue-ut
+ COMMAND
+ ydb-services-keyvalue-ut
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+vcs_info(ydb-services-keyvalue-ut)
diff --git a/ydb/services/keyvalue/ut/CMakeLists.linux-aarch64.txt b/ydb/services/keyvalue/ut/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..6b64e4360d
--- /dev/null
+++ b/ydb/services/keyvalue/ut/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,52 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-services-keyvalue-ut)
+target_compile_options(ydb-services-keyvalue-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_include_directories(ydb-services-keyvalue-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue
+)
+target_link_libraries(ydb-services-keyvalue-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-lfalloc
+ cpp-testing-unittest_main
+ ydb-services-keyvalue
+ library-cpp-logger
+ ydb-core-protos
+ core-testlib-default
+)
+target_link_options(ydb-services-keyvalue-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-services-keyvalue-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service_ut.cpp
+)
+add_test(
+ NAME
+ ydb-services-keyvalue-ut
+ COMMAND
+ ydb-services-keyvalue-ut
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+vcs_info(ydb-services-keyvalue-ut)
diff --git a/ydb/services/keyvalue/ut/CMakeLists.linux.txt b/ydb/services/keyvalue/ut/CMakeLists.linux.txt
new file mode 100644
index 0000000000..d754baa780
--- /dev/null
+++ b/ydb/services/keyvalue/ut/CMakeLists.linux.txt
@@ -0,0 +1,54 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(ydb-services-keyvalue-ut)
+target_compile_options(ydb-services-keyvalue-ut PRIVATE
+ -DUSE_CURRENT_UDF_ABI_VERSION
+)
+target_include_directories(ydb-services-keyvalue-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue
+)
+target_link_libraries(ydb-services-keyvalue-ut PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+ library-cpp-cpuid_check
+ cpp-testing-unittest_main
+ ydb-services-keyvalue
+ library-cpp-logger
+ ydb-core-protos
+ core-testlib-default
+)
+target_link_options(ydb-services-keyvalue-ut PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(ydb-services-keyvalue-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service_ut.cpp
+)
+add_test(
+ NAME
+ ydb-services-keyvalue-ut
+ COMMAND
+ ydb-services-keyvalue-ut
+ --print-before-suite
+ --print-before-test
+ --fork-tests
+ --print-times
+ --show-fails
+)
+vcs_info(ydb-services-keyvalue-ut)
diff --git a/ydb/services/keyvalue/ut/CMakeLists.txt b/ydb/services/keyvalue/ut/CMakeLists.txt
new file mode 100644
index 0000000000..3e0811fb22
--- /dev/null
+++ b/ydb/services/keyvalue/ut/CMakeLists.txt
@@ -0,0 +1,15 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (APPLE)
+ include(CMakeLists.darwin.txt)
+elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
+ include(CMakeLists.linux.txt)
+endif()
diff --git a/ydb/tests/functional/autoconfig/test_actorsystem.py b/ydb/tests/functional/autoconfig/test_actorsystem.py
new file mode 100644
index 0000000000..352c750662
--- /dev/null
+++ b/ydb/tests/functional/autoconfig/test_actorsystem.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+
+
+from ydb.tests.library.harness.kikimr_cluster import kikimr_cluster_factory
+from ydb.tests.library.harness.kikimr_config import KikimrConfigGenerator
+
+
+def make_test_for_specific_actor_system(node_type, cpu_count):
+ class TestWithSpecificActorSystem(object):
+ @classmethod
+ def setup_class(cls):
+ actor_system_config = {
+ "node_type": node_type,
+ "cpu_count": cpu_count,
+ "use_auto_config": True
+ }
+ configuration = KikimrConfigGenerator(overrided_actor_system_config=actor_system_config)
+ cls.kikimr_cluster = kikimr_cluster_factory(configuration)
+ cls.kikimr_cluster.start()
+
+ @classmethod
+ def teardown_class(cls):
+ cls.kikimr_cluster.stop()
+
+ def test(self):
+ pass
+
+ return TestWithSpecificActorSystem
+
+
+for node_type in ("Compute", "Storage", "Hybrid"):
+ for cpu_count in range(1, 40):
+ test = make_test_for_specific_actor_system(node_type.upper(), cpu_count)
+ setattr(sys.modules[__name__], "TestWith%sNodeWith%dCpu" % (node_type, cpu_count), test)
diff --git a/ydb/tests/library/harness/kikimr_config.py b/ydb/tests/library/harness/kikimr_config.py
index b40fbe3e62..129d4c3f66 100644
--- a/ydb/tests/library/harness/kikimr_config.py
+++ b/ydb/tests/library/harness/kikimr_config.py
@@ -142,6 +142,7 @@ class KikimrConfigGenerator(object):
enable_alter_database_create_hive_first=False,
disable_iterator_reads=False,
disable_iterator_lookups=False,
+ overrided_actor_system_config=None
):
self._version = version
self.use_log_files = use_log_files
@@ -212,6 +213,9 @@ class KikimrConfigGenerator(object):
self.yaml_config = load_default_yaml(self.__node_ids, self.domain_name, self.static_erasure, self.__additional_log_configs)
+ if overrided_actor_system_config:
+ self.yaml_config["actor_system_config"] = overrided_actor_system_config
+
if disable_iterator_reads:
if "table_service_config" not in self.yaml_config:
self.yaml_config["table_service_config"] = {}