diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2023-07-20 22:11:42 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2023-07-20 22:11:42 +0300 |
commit | d63f0523399ab2d93c1c6ca6c2dca082be5e52ba (patch) | |
tree | 1123a7aa3ac1d42f3ceaae288e639931d9dca92a | |
parent | 068d4453cf9fc68c875eee73f5c637bb076f6a71 (diff) | |
download | ydb-23.2.11.tar.gz |
Ydb stable 23-2-1123.2.11
x-stable-origin-commit: 758ace972646c843c5e0785d75c8f4fe044580a1
159 files changed, 6707 insertions, 1065 deletions
diff --git a/library/cpp/actors/interconnect/CMakeLists.darwin.txt b/library/cpp/actors/interconnect/CMakeLists.darwin.txt index c0b4981c37..4d13e9ec54 100644 --- a/library/cpp/actors/interconnect/CMakeLists.darwin.txt +++ b/library/cpp/actors/interconnect/CMakeLists.darwin.txt @@ -10,6 +10,7 @@ find_package(OpenSSL REQUIRED) add_subdirectory(mock) add_subdirectory(ut) add_subdirectory(ut_fat) +add_subdirectory(ut_huge_cluster) add_library(cpp-actors-interconnect) target_link_libraries(cpp-actors-interconnect PUBLIC diff --git a/library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt b/library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt index ada1e68d25..4142c4c6b5 100644 --- a/library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt +++ b/library/cpp/actors/interconnect/CMakeLists.linux-aarch64.txt @@ -10,6 +10,7 @@ find_package(OpenSSL REQUIRED) add_subdirectory(mock) add_subdirectory(ut) add_subdirectory(ut_fat) +add_subdirectory(ut_huge_cluster) add_library(cpp-actors-interconnect) target_link_libraries(cpp-actors-interconnect PUBLIC diff --git a/library/cpp/actors/interconnect/CMakeLists.linux.txt b/library/cpp/actors/interconnect/CMakeLists.linux.txt index ada1e68d25..4142c4c6b5 100644 --- a/library/cpp/actors/interconnect/CMakeLists.linux.txt +++ b/library/cpp/actors/interconnect/CMakeLists.linux.txt @@ -10,6 +10,7 @@ find_package(OpenSSL REQUIRED) add_subdirectory(mock) add_subdirectory(ut) add_subdirectory(ut_fat) +add_subdirectory(ut_huge_cluster) add_library(cpp-actors-interconnect) target_link_libraries(cpp-actors-interconnect PUBLIC diff --git a/library/cpp/actors/interconnect/events_local.h b/library/cpp/actors/interconnect/events_local.h index b1b8ae0c75..966cdb763e 100644 --- a/library/cpp/actors/interconnect/events_local.h +++ b/library/cpp/actors/interconnect/events_local.h @@ -52,6 +52,9 @@ namespace NActors { EvProcessPingRequest, EvGetSecureSocket, EvSecureSocket, + HandshakeBrokerTake, + HandshakeBrokerFree, + HandshakeBrokerPermit, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // nonlocal messages; their indices must be preserved in order to work properly while doing rolling update @@ -98,6 +101,18 @@ namespace NActors { } }; + struct TEvHandshakeBrokerTake: TEventLocal<TEvHandshakeBrokerTake, ui32(ENetwork::HandshakeBrokerTake)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeBrokerTake, "Network: TEvHandshakeBrokerTake") + }; + + struct TEvHandshakeBrokerFree: TEventLocal<TEvHandshakeBrokerFree, ui32(ENetwork::HandshakeBrokerFree)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeBrokerFree, "Network: TEvHandshakeBrokerFree") + }; + + struct TEvHandshakeBrokerPermit: TEventLocal<TEvHandshakeBrokerPermit, ui32(ENetwork::HandshakeBrokerPermit)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeBrokerPermit, "Network: TEvHandshakeBrokerPermit") + }; + struct TEvHandshakeAsk: public TEventLocal<TEvHandshakeAsk, ui32(ENetwork::HandshakeAsk)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeAsk, "Network: TEvHandshakeAsk") TEvHandshakeAsk(const TActorId& self, diff --git a/library/cpp/actors/interconnect/handshake_broker.h b/library/cpp/actors/interconnect/handshake_broker.h new file mode 100644 index 0000000000..9910fb4b71 --- /dev/null +++ b/library/cpp/actors/interconnect/handshake_broker.h @@ -0,0 +1,157 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/interconnect/events_local.h> + +#include <deque> + +namespace NActors { + class TBrokerLeaseHolder { + public: + TBrokerLeaseHolder(TActorSystem* actorSystem, TActorId waiterId, TActorId brokerId) + : ActorSystem(actorSystem) + , WaiterId(waiterId) + , BrokerId(brokerId) { + if (ActorSystem->Send(new IEventHandle(BrokerId, WaiterId, new TEvHandshakeBrokerTake()))) { + LeaseRequested = true; + } + } + + ~TBrokerLeaseHolder() { + if (LeaseRequested) { + ActorSystem->Send(new IEventHandle(BrokerId, WaiterId, new TEvHandshakeBrokerFree())); + } + } + + bool IsLeaseRequested() { + return LeaseRequested; + } + + void ForgetLease() { + // only call when TDtorException was caught + LeaseRequested = false; + } + + private: + TActorSystem* ActorSystem; + TActorId WaiterId; + TActorId BrokerId; + bool LeaseRequested = false; + }; + + class THandshakeBroker : public TActor<THandshakeBroker> { + private: + enum class ESelectionStrategy { + FIFO = 0, + LIFO, + Random, + }; + + private: + void PermitNext() { + if (Capacity == 0 && !Waiters.empty()) { + TActorId waiter; + + switch (SelectionStrategy) { + case ESelectionStrategy::FIFO: + waiter = Waiters.front(); + Waiters.pop_front(); + SelectionStrategy = ESelectionStrategy::LIFO; + break; + + case ESelectionStrategy::LIFO: + waiter = Waiters.back(); + Waiters.pop_back(); + SelectionStrategy = ESelectionStrategy::Random; + break; + + case ESelectionStrategy::Random: { + const auto it = WaiterLookup.begin(); + waiter = it->first; + Waiters.erase(it->second); + SelectionStrategy = ESelectionStrategy::FIFO; + break; + } + + default: + Y_FAIL("Unimplimented selection strategy"); + } + + const size_t n = WaiterLookup.erase(waiter); + Y_VERIFY(n == 1); + + Send(waiter, new TEvHandshakeBrokerPermit()); + PermittedLeases.insert(waiter); + } else { + Capacity += 1; + } + } + + private: + using TWaiters = std::list<TActorId>; + TWaiters Waiters; + std::unordered_map<TActorId, TWaiters::iterator> WaiterLookup; + std::unordered_set<TActorId> PermittedLeases; + + ESelectionStrategy SelectionStrategy = ESelectionStrategy::FIFO; + + ui32 Capacity; + + void Handle(TEvHandshakeBrokerTake::TPtr &ev) { + const TActorId sender = ev->Sender; + if (Capacity > 0) { + Capacity -= 1; + PermittedLeases.insert(sender); + Send(sender, new TEvHandshakeBrokerPermit()); + } else { + const auto [it, inserted] = WaiterLookup.try_emplace(sender, + Waiters.insert(Waiters.end(), sender)); + Y_VERIFY(inserted); + } + } + + void Handle(TEvHandshakeBrokerFree::TPtr& ev) { + const TActorId sender = ev->Sender; + if (!PermittedLeases.erase(sender)) { + // Lease was not permitted yet, remove sender from Waiters queue + const auto it = WaiterLookup.find(sender); + Y_VERIFY(it != WaiterLookup.end()); + Waiters.erase(it->second); + WaiterLookup.erase(it); + } + PermitNext(); + } + + public: + THandshakeBroker(ui32 inflightLimit) + : TActor(&TThis::StateFunc) + , Capacity(inflightLimit) + { + } + + STFUNC(StateFunc) { + Y_UNUSED(ctx); + switch (ev->GetTypeRewrite()) { + hFunc(TEvHandshakeBrokerTake, Handle); + hFunc(TEvHandshakeBrokerFree, Handle); + + default: + Y_FAIL("unexpected event 0x%08" PRIx32, ev->GetTypeRewrite()); + } + } + + void Bootstrap() { + Become(&TThis::StateFunc); + }; + }; + + inline IActor* CreateHandshakeBroker(ui32 maxCapacity) { + return new THandshakeBroker(maxCapacity); + } + + inline TActorId MakeHandshakeBrokerOutId() { + char x[12] = {'I', 'C', 'H', 's', 'h', 'k', 'B', 'r', 'k', 'O', 'u', 't'}; + return TActorId(0, TStringBuf(std::begin(x), std::end(x))); + } +}; diff --git a/library/cpp/actors/interconnect/interconnect_common.h b/library/cpp/actors/interconnect/interconnect_common.h index ea6a5310d4..d526621491 100644 --- a/library/cpp/actors/interconnect/interconnect_common.h +++ b/library/cpp/actors/interconnect/interconnect_common.h @@ -48,6 +48,7 @@ namespace NActors { ui32 MaxSerializedEventSize = NActors::EventMaxByteSize; ui32 PreallocatedBufferSize = 8 << 10; // 8 KB ui32 NumPreallocatedBuffers = 16; + ui32 SocketBacklogSize = 0; // SOMAXCONN if zero ui32 GetSendBufferSize() const { ui32 res = 512 * 1024; // 512 kb is the default value for send buffer @@ -94,6 +95,7 @@ namespace NActors { std::shared_ptr<TEventFilter> EventFilter; TString Cookie; // unique random identifier of a node instance (generated randomly at every start) std::unordered_map<ui16, TString> ChannelName; + std::optional<ui32> OutgoingHandshakeInflightLimit; struct TVersionInfo { TString Tag; // version tag for this node diff --git a/library/cpp/actors/interconnect/interconnect_handshake.cpp b/library/cpp/actors/interconnect/interconnect_handshake.cpp index dc651f3762..8d281ae52e 100644 --- a/library/cpp/actors/interconnect/interconnect_handshake.cpp +++ b/library/cpp/actors/interconnect/interconnect_handshake.cpp @@ -1,4 +1,5 @@ #include "interconnect_handshake.h" +#include "handshake_broker.h" #include "interconnect_tcp_proxy.h" #include <library/cpp/actors/core/actor_coroutine.h> @@ -96,6 +97,8 @@ namespace NActors { THashMap<ui32, TInstant> LastLogNotice; const TDuration MuteDuration = TDuration::Seconds(15); TInstant Deadline; + TActorId HandshakeBroker; + std::optional<TBrokerLeaseHolder> BrokerLeaseHolder; public: THandshakeActor(TInterconnectProxyCommon::TPtr common, const TActorId& self, const TActorId& peer, @@ -113,6 +116,7 @@ namespace NActors { Y_VERIFY(SelfVirtualId); Y_VERIFY(SelfVirtualId.NodeId()); Y_VERIFY(PeerNodeId); + HandshakeBroker = MakeHandshakeBrokerOutId(); } THandshakeActor(TInterconnectProxyCommon::TPtr common, TSocketPtr socket) @@ -135,14 +139,42 @@ namespace NActors { } void Run() override { + try { + RunImpl(); + } catch (const TDtorException&) { + if (BrokerLeaseHolder) { + BrokerLeaseHolder->ForgetLease(); + } + throw; + } catch (...) { + throw; + } + } + + void RunImpl() { UpdatePrefix(); + if (!Socket && Common->OutgoingHandshakeInflightLimit) { + // Create holder, which sends request to broker and automatically frees the place when destroyed + BrokerLeaseHolder.emplace(GetActorSystem(), SelfActorId, HandshakeBroker); + } + + if (BrokerLeaseHolder && BrokerLeaseHolder->IsLeaseRequested()) { + WaitForSpecificEvent<TEvHandshakeBrokerPermit>("HandshakeBrokerPermit"); + } + // set up overall handshake process timer TDuration timeout = Common->Settings.Handshake; if (timeout == TDuration::Zero()) { timeout = DEFAULT_HANDSHAKE_TIMEOUT; } timeout += ResolveTimeout * 2; + + if (Socket) { + // Incoming handshakes have shorter timeout than outgoing + timeout *= 0.9; + } + Deadline = Now() + timeout; Schedule(Deadline, new TEvents::TEvWakeup); @@ -176,6 +208,7 @@ namespace NActors { *NextPacketFromPeer, ProgramInfo->Release(), std::move(Params))); } + BrokerLeaseHolder.reset(); Socket.Reset(); } @@ -850,7 +883,7 @@ namespace NActors { addresses.emplace_back(r.GetAddress(), static_cast<ui16>(r.GetPort())); } else { Y_VERIFY(ev->GetTypeRewrite() == ui32(ENetwork::ResolveError)); - Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: " + ev->Get<TEvResolveError>()->Explain + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: " + ev->Get<TEvResolveError>()->Explain + ", Unresolved host# " + ev->Get<TEvResolveError>()->Host, true); } diff --git a/library/cpp/actors/interconnect/interconnect_handshake.h b/library/cpp/actors/interconnect/interconnect_handshake.h index b3c0db6c5d..fc37f11251 100644 --- a/library/cpp/actors/interconnect/interconnect_handshake.h +++ b/library/cpp/actors/interconnect/interconnect_handshake.h @@ -10,7 +10,7 @@ #include "events_local.h" namespace NActors { - static constexpr TDuration DEFAULT_HANDSHAKE_TIMEOUT = TDuration::Seconds(1); + static constexpr TDuration DEFAULT_HANDSHAKE_TIMEOUT = TDuration::Seconds(5); static constexpr ui64 INTERCONNECT_PROTOCOL_VERSION = 2; using TSocketPtr = TIntrusivePtr<NInterconnect::TStreamSocket>; diff --git a/library/cpp/actors/interconnect/interconnect_tcp_server.cpp b/library/cpp/actors/interconnect/interconnect_tcp_server.cpp index aad8677ca4..ede35b0b8b 100644 --- a/library/cpp/actors/interconnect/interconnect_tcp_server.cpp +++ b/library/cpp/actors/interconnect/interconnect_tcp_server.cpp @@ -46,9 +46,10 @@ namespace NActors { if (addr.GetFamily() == AF_INET6) { SetSockOpt(*Listener, IPPROTO_IPV6, IPV6_V6ONLY, 0); } + const ui32 backlog = ProxyCommonCtx->Settings.SocketBacklogSize; if (const auto e = -Listener->Bind(addr)) { return e; - } else if (const auto e = -Listener->Listen(SOMAXCONN)) { + } else if (const auto e = -Listener->Listen(backlog ? backlog : SOMAXCONN)) { return e; } else { return 0; diff --git a/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h b/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h index 2b6d27cd3f..dd2557e25e 100644 --- a/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h +++ b/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h @@ -26,13 +26,15 @@ private: TList<TTrafficInterrupter> interrupters; NActors::TChannelsConfig ChannelsConfig; TPortManager PortManager; + TIntrusivePtr<NLog::TSettings> LoggerSettings; public: TTestICCluster(ui32 numNodes = 1, NActors::TChannelsConfig channelsConfig = NActors::TChannelsConfig(), - TTrafficInterrupterSettings* tiSettings = nullptr) + TTrafficInterrupterSettings* tiSettings = nullptr, TIntrusivePtr<NLog::TSettings> loggerSettings = nullptr) : NumNodes(numNodes) , Counters(new NMonitoring::TDynamicCounters) , ChannelsConfig(channelsConfig) + , LoggerSettings(loggerSettings) { THashMap<ui32, ui16> nodeToPortMap; THashMap<ui32, THashMap<ui32, ui16>> specificNodePortMap; @@ -59,7 +61,8 @@ public: for (ui32 i = 1; i <= NumNodes; ++i) { auto& portMap = tiSettings ? specificNodePortMap[i] : nodeToPortMap; - Nodes.emplace(i, MakeHolder<TNode>(i, NumNodes, portMap, Address, Counters, DeadPeerTimeout, ChannelsConfig)); + Nodes.emplace(i, MakeHolder<TNode>(i, NumNodes, portMap, Address, Counters, DeadPeerTimeout, ChannelsConfig, + /*numDynamicNodes=*/0, /*numThreads=*/1, LoggerSettings)); } } diff --git a/library/cpp/actors/interconnect/ut/lib/node.h b/library/cpp/actors/interconnect/ut/lib/node.h index ff30b1445e..0b538cdb1c 100644 --- a/library/cpp/actors/interconnect/ut/lib/node.h +++ b/library/cpp/actors/interconnect/ut/lib/node.h @@ -6,6 +6,7 @@ #include <library/cpp/actors/core/mailbox.h> #include <library/cpp/actors/dnsresolver/dnsresolver.h> +#include <library/cpp/actors/interconnect/handshake_broker.h> #include <library/cpp/actors/interconnect/interconnect_tcp_server.h> #include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h> #include <library/cpp/actors/interconnect/interconnect_proxy_wrapper.h> @@ -19,7 +20,8 @@ public: TNode(ui32 nodeId, ui32 numNodes, const THashMap<ui32, ui16>& nodeToPort, const TString& address, NMonitoring::TDynamicCounterPtr counters, TDuration deadPeerTimeout, TChannelsConfig channelsSettings = TChannelsConfig(), - ui32 numDynamicNodes = 0, ui32 numThreads = 1) { + ui32 numDynamicNodes = 0, ui32 numThreads = 1, + TIntrusivePtr<NLog::TSettings> loggerSettings = nullptr) { TActorSystemSetup setup; setup.NodeId = nodeId; setup.ExecutorsCount = 1; @@ -43,6 +45,7 @@ public: common->Settings.SendBufferDieLimitInMB = 512; common->Settings.TotalInflightAmountOfData = 512 * 1024; common->Settings.TCPSocketBufferSize = 2048 * 1024; + common->OutgoingHandshakeInflightLimit = 3; setup.Interconnect.ProxyActors.resize(numNodes + 1 - numDynamicNodes); setup.Interconnect.ProxyWrapperFactory = CreateProxyWrapperFactory(common, interconnectPoolId); @@ -62,29 +65,31 @@ public: setup.LocalServices.emplace_back(MakePollerActorId(), TActorSetupCmd(CreatePollerActor(), TMailboxType::ReadAsFilled, 0)); - const TActorId loggerActorId(0, "logger"); - constexpr ui32 LoggerComponentId = 410; // NKikimrServices::LOGGER - - auto loggerSettings = MakeIntrusive<NLog::TSettings>( - loggerActorId, - (NLog::EComponent)LoggerComponentId, - NLog::PRI_INFO, - NLog::PRI_DEBUG, - 0U); - - loggerSettings->Append( - NActorsServices::EServiceCommon_MIN, - NActorsServices::EServiceCommon_MAX, - NActorsServices::EServiceCommon_Name - ); - - constexpr ui32 WilsonComponentId = 430; // NKikimrServices::WILSON - static const TString WilsonComponentName = "WILSON"; - - loggerSettings->Append( - (NLog::EComponent)WilsonComponentId, - (NLog::EComponent)WilsonComponentId + 1, - [](NLog::EComponent) -> const TString & { return WilsonComponentName; }); + const TActorId loggerActorId = loggerSettings ? loggerSettings->LoggerActorId : TActorId(0, "logger"); + + if (!loggerSettings) { + constexpr ui32 LoggerComponentId = 410; // NKikimrServices::LOGGER + loggerSettings = MakeIntrusive<NLog::TSettings>( + loggerActorId, + (NLog::EComponent)LoggerComponentId, + NLog::PRI_INFO, + NLog::PRI_DEBUG, + 0U); + + loggerSettings->Append( + NActorsServices::EServiceCommon_MIN, + NActorsServices::EServiceCommon_MAX, + NActorsServices::EServiceCommon_Name + ); + + constexpr ui32 WilsonComponentId = 430; // NKikimrServices::WILSON + static const TString WilsonComponentName = "WILSON"; + + loggerSettings->Append( + (NLog::EComponent)WilsonComponentId, + (NLog::EComponent)WilsonComponentId + 1, + [](NLog::EComponent) -> const TString & { return WilsonComponentName; }); + } // register nameserver table auto names = MakeIntrusive<TTableNameserverSetup>(); @@ -105,6 +110,14 @@ public: CreateStderrBackend(), counters->GetSubgroup("subsystem", "logger")), TMailboxType::ReadAsFilled, interconnectPoolId)); + + if (common->OutgoingHandshakeInflightLimit) { + // create handshake broker actor + setup.LocalServices.emplace_back(MakeHandshakeBrokerOutId(), TActorSetupCmd( + CreateHandshakeBroker(*common->OutgoingHandshakeInflightLimit), + TMailboxType::ReadAsFilled, interconnectPoolId)); + } + auto sp = MakeHolder<TActorSystemSetup>(std::move(setup)); ActorSystem.Reset(new TActorSystem(sp, nullptr, loggerSettings)); ActorSystem->Start(); diff --git a/library/cpp/actors/interconnect/ut/lib/test_events.h b/library/cpp/actors/interconnect/ut/lib/test_events.h index cd0d9e0152..1bb5eb7d38 100644 --- a/library/cpp/actors/interconnect/ut/lib/test_events.h +++ b/library/cpp/actors/interconnect/ut/lib/test_events.h @@ -9,6 +9,7 @@ namespace NActors { EvTestSmall, EvTestLarge, EvTestResponse, + EvTestStartPolling, }; struct TEvTest : TEventPB<TEvTest, NInterconnectTest::TEvTest, EvTest> { @@ -46,4 +47,8 @@ namespace NActors { } }; + struct TEvTestStartPolling : TEventPB<TEvTestStartPolling, NInterconnectTest::TEvTestStartPolling, EvTestStartPolling> { + TEvTestStartPolling() = default; + }; + } diff --git a/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto b/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto index b9b2bd6a4e..b74d068a8b 100644 --- a/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto +++ b/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto @@ -23,3 +23,6 @@ message TEvTestSmall { message TEvTestResponse { optional uint64 ConfirmedSequenceNumber = 1; } + +message TEvTestStartPolling { +} diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.darwin.txt b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.darwin.txt new file mode 100644 index 0000000000..89c38824c2 --- /dev/null +++ b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.darwin.txt @@ -0,0 +1,45 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(library-cpp-actors-interconnect-ut_huge_cluster) +target_link_libraries(library-cpp-actors-interconnect-ut_huge_cluster PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + cpp-actors-core + cpp-actors-interconnect + interconnect-ut-lib + interconnect-ut-protos + cpp-testing-unittest + cpp-actors-testlib +) +target_link_options(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE + -Wl,-no_deduplicate + -Wl,-sdk_version,10.15 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp +) +add_test( + NAME + library-cpp-actors-interconnect-ut_huge_cluster + COMMAND + library-cpp-actors-interconnect-ut_huge_cluster + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(library-cpp-actors-interconnect-ut_huge_cluster) diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux-aarch64.txt b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..33064b5008 --- /dev/null +++ b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux-aarch64.txt @@ -0,0 +1,48 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(library-cpp-actors-interconnect-ut_huge_cluster) +target_link_libraries(library-cpp-actors-interconnect-ut_huge_cluster PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-lfalloc + cpp-testing-unittest_main + cpp-actors-core + cpp-actors-interconnect + interconnect-ut-lib + interconnect-ut-protos + cpp-testing-unittest + cpp-actors-testlib +) +target_link_options(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp +) +add_test( + NAME + library-cpp-actors-interconnect-ut_huge_cluster + COMMAND + library-cpp-actors-interconnect-ut_huge_cluster + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(library-cpp-actors-interconnect-ut_huge_cluster) diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux.txt b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux.txt new file mode 100644 index 0000000000..5b08a947cf --- /dev/null +++ b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.linux.txt @@ -0,0 +1,50 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(library-cpp-actors-interconnect-ut_huge_cluster) +target_link_libraries(library-cpp-actors-interconnect-ut_huge_cluster PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache + library-cpp-cpuid_check + cpp-testing-unittest_main + cpp-actors-core + cpp-actors-interconnect + interconnect-ut-lib + interconnect-ut-protos + cpp-testing-unittest + cpp-actors-testlib +) +target_link_options(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(library-cpp-actors-interconnect-ut_huge_cluster PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp +) +add_test( + NAME + library-cpp-actors-interconnect-ut_huge_cluster + COMMAND + library-cpp-actors-interconnect-ut_huge_cluster + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(library-cpp-actors-interconnect-ut_huge_cluster) diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.txt b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.txt new file mode 100644 index 0000000000..3e0811fb22 --- /dev/null +++ b/library/cpp/actors/interconnect/ut_huge_cluster/CMakeLists.txt @@ -0,0 +1,15 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux-aarch64.txt) +elseif (APPLE) + include(CMakeLists.darwin.txt) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux.txt) +endif() diff --git a/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp b/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp new file mode 100644 index 0000000000..458ead3459 --- /dev/null +++ b/library/cpp/actors/interconnect/ut_huge_cluster/huge_cluster.cpp @@ -0,0 +1,167 @@ +#include <library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h> +#include <library/cpp/actors/interconnect/ut/lib/test_events.h> +#include <library/cpp/actors/interconnect/ut/lib/test_actors.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <vector> + +Y_UNIT_TEST_SUITE(HugeCluster) { + using namespace NActors; + + class TPoller: public TActor<TPoller> { + const std::vector<TActorId>& Targets; + std::unordered_map<TActorId, TManualEvent>& Connected; + + public: + TPoller(const std::vector<TActorId>& targets, std::unordered_map<TActorId, TManualEvent>& events) + : TActor(&TPoller::StateFunc) + , Targets(targets) + , Connected(events) + {} + + void Handle(TEvTestStartPolling::TPtr /*ev*/, const TActorContext& ctx) { + for (ui32 i = 0; i < Targets.size(); ++i) { + ctx.Send(Targets[i], new TEvTest(), IEventHandle::FlagTrackDelivery, i); + } + } + + void Handle(TEvents::TEvUndelivered::TPtr ev, const TActorContext& ctx) { + const ui32 cookie = ev->Cookie; + // Cerr << "TEvUndelivered ping from node# " << SelfId().NodeId() << " to node# " << cookie + 1 << Endl; + ctx.Send(Targets[cookie], new TEvTest(), IEventHandle::FlagTrackDelivery, cookie); + } + + void Handle(TEvTest::TPtr ev, const TActorContext& /*ctx*/) { + // Cerr << "Polled from " << ev->Sender.ToString() << Endl; + Connected[ev->Sender].Signal(); + } + + void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) { + Die(ctx); + } + + STRICT_STFUNC(StateFunc, + HFunc(TEvents::TEvUndelivered, Handle) + HFunc(TEvTestStartPolling, Handle) + HFunc(TEvTest, Handle) + HFunc(TEvents::TEvPoisonPill, Handle) + ) + }; + + class TStartPollers : public TActorBootstrapped<TStartPollers> { + const std::vector<TActorId>& Pollers; + + public: + TStartPollers(const std::vector<TActorId>& pollers) + : Pollers(pollers) + {} + + void Bootstrap(const TActorContext& ctx) { + Become(&TThis::StateFunc); + for (ui32 i = 0; i < Pollers.size(); ++i) { + ctx.Send(Pollers[i], new TEvTestStartPolling(), IEventHandle::FlagTrackDelivery, i); + } + } + + void Handle(TEvents::TEvUndelivered::TPtr ev, const TActorContext& ctx) { + const ui32 cookie = ev->Cookie; + // Cerr << "TEvUndelivered start poller message to node# " << cookie + 1 << Endl; + ctx.Send(Pollers[cookie], new TEvTestStartPolling(), IEventHandle::FlagTrackDelivery, cookie); + } + + void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) { + Die(ctx); + } + + STRICT_STFUNC(StateFunc, + HFunc(TEvents::TEvUndelivered, Handle) + HFunc(TEvents::TEvPoisonPill, Handle) + ) + }; + + TIntrusivePtr<NLog::TSettings> MakeLogConfigs(NLog::EPriority priority) { + // custom logger settings + auto loggerSettings = MakeIntrusive<NLog::TSettings>( + TActorId(0, "logger"), + (NLog::EComponent)410, + priority, + priority, + 0U); + + loggerSettings->Append( + NActorsServices::EServiceCommon_MIN, + NActorsServices::EServiceCommon_MAX, + NActorsServices::EServiceCommon_Name + ); + + constexpr ui32 WilsonComponentId = 430; // NKikimrServices::WILSON + static const TString WilsonComponentName = "WILSON"; + + loggerSettings->Append( + (NLog::EComponent)WilsonComponentId, + (NLog::EComponent)WilsonComponentId + 1, + [](NLog::EComponent) -> const TString & { return WilsonComponentName; }); + + return loggerSettings; + } + + Y_UNIT_TEST(AllToAll) { + ui32 nodesNum = 120; + std::vector<TActorId> pollers(nodesNum); + std::vector<std::unordered_map<TActorId, TManualEvent>> events(nodesNum); + + // Must destroy actor system before shared arrays + { + TTestICCluster testCluster(nodesNum, NActors::TChannelsConfig(), nullptr, MakeLogConfigs(NLog::PRI_EMERG)); + + for (ui32 i = 0; i < nodesNum; ++i) { + pollers[i] = testCluster.RegisterActor(new TPoller(pollers, events[i]), i + 1); + } + + for (ui32 i = 0; i < nodesNum; ++i) { + for (const auto& actor : pollers) { + events[i][actor] = TManualEvent(); + } + } + + testCluster.RegisterActor(new TStartPollers(pollers), 1); + + for (ui32 i = 0; i < nodesNum; ++i) { + for (auto& [_, ev] : events[i]) { + ev.WaitI(); + } + } + } + } + + + Y_UNIT_TEST(AllToOne) { + ui32 nodesNum = 500; + std::vector<TActorId> listeners; + std::vector<TActorId> pollers(nodesNum - 1); + std::unordered_map<TActorId, TManualEvent> events; + std::unordered_map<TActorId, TManualEvent> emptyEventList; + + // Must destroy actor system before shared arrays + { + TTestICCluster testCluster(nodesNum, NActors::TChannelsConfig(), nullptr, MakeLogConfigs(NLog::PRI_EMERG)); + + const TActorId listener = testCluster.RegisterActor(new TPoller({}, events), nodesNum); + listeners = { listener }; + for (ui32 i = 0; i < nodesNum - 1; ++i) { + pollers[i] = testCluster.RegisterActor(new TPoller(listeners, emptyEventList), i + 1); + } + + for (const auto& actor : pollers) { + events[actor] = TManualEvent(); + } + + testCluster.RegisterActor(new TStartPollers(pollers), 1); + + for (auto& [_, ev] : events) { + ev.WaitI(); + } + } + } +} diff --git a/library/cpp/actors/util/rc_buf.h b/library/cpp/actors/util/rc_buf.h index a2bce33fba..5d4517ade2 100644 --- a/library/cpp/actors/util/rc_buf.h +++ b/library/cpp/actors/util/rc_buf.h @@ -306,9 +306,12 @@ class TRcBuf { struct TBackendHolder { uintptr_t Data[2]; - operator bool() const noexcept { + explicit operator bool() const noexcept { return Data[0] || Data[1]; } + friend bool operator ==(const TBackendHolder& x, const TBackendHolder& y) { + return x.Data[0] == y.Data[0] && x.Data[1] == y.Data[1]; + } }; constexpr static TBackendHolder Empty = {0, 0}; @@ -592,7 +595,7 @@ class TRcBuf { } explicit operator bool() const { - return Owner; + return static_cast<bool>(Owner); } private: diff --git a/library/cpp/monlib/service/pages/index_mon_page.cpp b/library/cpp/monlib/service/pages/index_mon_page.cpp index 2bfa0faca8..c9b2f82cc0 100644 --- a/library/cpp/monlib/service/pages/index_mon_page.cpp +++ b/library/cpp/monlib/service/pages/index_mon_page.cpp @@ -28,9 +28,8 @@ void TIndexMonPage::Output(IMonHttpRequest& request) { TGuard<TMutex> g(Mtx); TStringBuf pathTmp = request.GetPathInfo(); for (;;) { - TPagesByPath::iterator i = PagesByPath.find(pathTmp); - if (i != PagesByPath.end()) { - found = i->second; + if (TPagesByPath::iterator i = PagesByPath.find(pathTmp); i != PagesByPath.end()) { + found = *i->second; pathInfo = request.GetPathInfo().substr(pathTmp.size()); Y_VERIFY(pathInfo.empty() || pathInfo.StartsWith('/')); break; @@ -67,18 +66,12 @@ void TIndexMonPage::OutputIndex(IOutputStream& out, bool pathEndsWithSlash) { void TIndexMonPage::Register(TMonPagePtr page) { TGuard<TMutex> g(Mtx); - auto insres = PagesByPath.insert(std::make_pair("/" + page->GetPath(), page)); - if (insres.second) { - // new unique page just inserted, update Pages - Pages.push_back(page); + if (auto [it, inserted] = PagesByPath.try_emplace("/" + page->GetPath()); inserted) { + // new unique page just inserted, insert it to the end + it->second = Pages.insert(Pages.end(), page); } else { // a page with the given path is already present, replace it with the new page - - // find old page, sorry for O(n) - auto it = std::find(Pages.begin(), Pages.end(), insres.first->second); - *it = page; - // this already present, replace it - insres.first->second = page; + *it->second = page; } page->Parent = this; } @@ -101,7 +94,7 @@ IMonPage* TIndexMonPage::FindPage(const TString& relativePath) { if (i == PagesByPath.end()) { return nullptr; } else { - return i->second.Get(); + return i->second->Get(); } } @@ -171,7 +164,7 @@ void TIndexMonPage::OutputBody(IMonHttpRequest& req) { void TIndexMonPage::SortPages() { TGuard<TMutex> g(Mtx); - std::sort(Pages.begin(), Pages.end(), [](const TMonPagePtr& a, const TMonPagePtr& b) { + Pages.sort([](const TMonPagePtr& a, const TMonPagePtr& b) { return AsciiCompareIgnoreCase(a->GetTitle(), b->GetTitle()) < 0; }); } diff --git a/library/cpp/monlib/service/pages/index_mon_page.h b/library/cpp/monlib/service/pages/index_mon_page.h index af96bcd2b9..0aaf826d46 100644 --- a/library/cpp/monlib/service/pages/index_mon_page.h +++ b/library/cpp/monlib/service/pages/index_mon_page.h @@ -2,12 +2,14 @@ #include "mon_page.h" +#include <list> + namespace NMonitoring { struct TIndexMonPage: public IMonPage { TMutex Mtx; - typedef TVector<TMonPagePtr> TPages; - TPages Pages; - typedef THashMap<TString, TMonPagePtr> TPagesByPath; + using TPages = std::list<TMonPagePtr>; + TPages Pages; // a list of pages to maintain specific order + using TPagesByPath = THashMap<TString, TPages::iterator>; TPagesByPath PagesByPath; TIndexMonPage(const TString& path, const TString& title) diff --git a/library/cpp/threading/CMakeLists.txt b/library/cpp/threading/CMakeLists.txt index 1246829e67..6a92c755cd 100644 --- a/library/cpp/threading/CMakeLists.txt +++ b/library/cpp/threading/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(atomic) add_subdirectory(chunk_queue) add_subdirectory(equeue) add_subdirectory(future) +add_subdirectory(hot_swap) add_subdirectory(light_rw_lock) add_subdirectory(local_executor) add_subdirectory(poor_man_openmp) diff --git a/library/cpp/threading/hot_swap/CMakeLists.darwin.txt b/library/cpp/threading/hot_swap/CMakeLists.darwin.txt new file mode 100644 index 0000000000..fb3d6d7710 --- /dev/null +++ b/library/cpp/threading/hot_swap/CMakeLists.darwin.txt @@ -0,0 +1,18 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-threading-hot_swap) +target_link_libraries(cpp-threading-hot_swap PUBLIC + contrib-libs-cxxsupp + yutil + cpp-deprecated-atomic +) +target_sources(cpp-threading-hot_swap PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/threading/hot_swap/hot_swap.cpp +) diff --git a/library/cpp/threading/hot_swap/CMakeLists.linux-aarch64.txt b/library/cpp/threading/hot_swap/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..48692e2319 --- /dev/null +++ b/library/cpp/threading/hot_swap/CMakeLists.linux-aarch64.txt @@ -0,0 +1,19 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-threading-hot_swap) +target_link_libraries(cpp-threading-hot_swap PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-deprecated-atomic +) +target_sources(cpp-threading-hot_swap PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/threading/hot_swap/hot_swap.cpp +) diff --git a/library/cpp/threading/hot_swap/CMakeLists.linux.txt b/library/cpp/threading/hot_swap/CMakeLists.linux.txt new file mode 100644 index 0000000000..48692e2319 --- /dev/null +++ b/library/cpp/threading/hot_swap/CMakeLists.linux.txt @@ -0,0 +1,19 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-threading-hot_swap) +target_link_libraries(cpp-threading-hot_swap PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-deprecated-atomic +) +target_sources(cpp-threading-hot_swap PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/threading/hot_swap/hot_swap.cpp +) diff --git a/library/cpp/threading/hot_swap/CMakeLists.txt b/library/cpp/threading/hot_swap/CMakeLists.txt new file mode 100644 index 0000000000..3e0811fb22 --- /dev/null +++ b/library/cpp/threading/hot_swap/CMakeLists.txt @@ -0,0 +1,15 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux-aarch64.txt) +elseif (APPLE) + include(CMakeLists.darwin.txt) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux.txt) +endif() diff --git a/ydb/core/cms/json_proxy_proto.h b/ydb/core/cms/json_proxy_proto.h index ebff56630c..88ecfa1a72 100644 --- a/ydb/core/cms/json_proxy_proto.h +++ b/ydb/core/cms/json_proxy_proto.h @@ -76,6 +76,8 @@ protected: return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TCoordinatorControls::descriptor(), ctx); else if (name == ".NKikimrConfig.TImmediateControlsConfig.TSchemeShardControls") return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TSchemeShardControls::descriptor(), ctx); + else if (name == ".NKikimrConfig.TImmediateControlsConfig.TTCMallocControls") + return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TTCMallocControls::descriptor(), ctx); } ctx.Send(RequestEvent->Sender, diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index 29278be5ad..256acbf804 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -183,6 +183,7 @@ #include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h> #include <library/cpp/actors/interconnect/interconnect_proxy_wrapper.h> #include <library/cpp/actors/interconnect/interconnect_tcp_server.h> +#include <library/cpp/actors/interconnect/handshake_broker.h> #include <library/cpp/actors/interconnect/load.h> #include <library/cpp/actors/interconnect/poller_actor.h> #include <library/cpp/actors/interconnect/poller_tcp.h> @@ -558,17 +559,19 @@ static TInterconnectSettings GetInterconnectSettings(const NKikimrConfig::TInter if (config.HasNumPreallocatedBuffers()) { result.NumPreallocatedBuffers = config.GetNumPreallocatedBuffers(); } + result.SocketBacklogSize = config.GetSocketBacklogSize(); return result; } namespace { - enum class EPoolType : i8 { + enum class EPoolKind : i8 { System = 0, User = 1, Batch = 2, - IC = 3, + IO = 3, + IC = 4, }; struct TShortPoolCfg { @@ -584,116 +587,116 @@ namespace { constexpr i16 GRpcHandlersPerCompletionQueueInMaxPreparedCpuCase = 1000; constexpr i16 GRpcHandlersPerCompletionQueuePerCpu = GRpcHandlersPerCompletionQueueInMaxPreparedCpuCase / MaxPreparedCpuCount; - TShortPoolCfg ComputeCpuTable[MaxPreparedCpuCount + 1][4] { - { {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0 - { {1, 1}, {0, 1}, {0, 1}, {0, 0} }, // 1 - { {1, 1}, {1, 2}, {0, 1}, {0, 1} }, // 2 - { {1, 2}, {1, 3}, {1, 1}, {0, 1} }, // 3 - { {1, 2}, {1, 4}, {1, 1}, {1, 2} }, // 4 - { {1, 3}, {2, 5}, {1, 1}, {1, 2} }, // 5 - { {1, 3}, {3, 6}, {1, 1}, {1, 2} }, // 6 - { {2, 4}, {3, 7}, {1, 2}, {1, 3} }, // 7 - { {2, 4}, {4, 8}, {1, 2}, {1, 3} }, // 8 - { {2, 5}, {4, 9}, {2, 3}, {1, 3} }, // 9 - { {2, 5}, {5, 10}, {2, 3}, {1, 3} }, // 10 - { {2, 6}, {6, 11}, {2, 3}, {2, 4} }, // 11 - { {2, 6}, {7, 12}, {2, 3}, {2, 5} }, // 12 - { {3, 7}, {7, 13}, {2, 3}, {2, 5} }, // 13 - { {3, 7}, {7, 14}, {2, 3}, {3, 6} }, // 14 - { {3, 8}, {8, 15}, {2, 4}, {3, 6} }, // 15 - { {3, 8}, {9, 16}, {2, 4}, {3, 6} }, // 16 - { {3, 9}, {10, 17}, {2, 4}, {3, 7} }, // 17 - { {3, 9}, {10, 18}, {3, 5}, {3, 7} }, // 18 - { {4, 10}, {10, 19}, {3, 5}, {4, 8} }, // 19 - { {4, 10}, {10, 20}, {3, 5}, {4, 8} }, // 20 - { {4, 11}, {11, 21}, {3, 5}, {4, 8} }, // 21 - { {4, 11}, {12, 22}, {3, 5}, {4, 9} }, // 22 - { {4, 12}, {13, 23}, {3, 6}, {4, 9} }, // 23 - { {4, 12}, {13, 24}, {3, 6}, {5, 10} }, // 24 - { {5, 13}, {13, 25}, {3, 6}, {5, 10} }, // 25 - { {5, 13}, {13, 26}, {4, 7}, {5, 10} }, // 26 - { {5, 14}, {14, 27}, {4, 7}, {5, 11} }, // 27 - { {5, 14}, {14, 28}, {4, 7}, {5, 11} }, // 28 - { {5, 15}, {15, 29}, {4, 8}, {6, 12} }, // 29 - { {5, 15}, {16, 30}, {4, 8}, {6, 12} }, // 30 - { {6, 18}, {16, 31}, {4, 8}, {6, 12} }, // 31 + TShortPoolCfg ComputeCpuTable[MaxPreparedCpuCount + 1][5] { + { {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0 + { {1, 1}, {0, 1}, {0, 1}, {0, 0}, {0, 0} }, // 1 + { {1, 1}, {0, 2}, {0, 1}, {0, 0}, {1, 1} }, // 2 + { {1, 2}, {0, 3}, {1, 1}, {0, 0}, {1, 1} }, // 3 + { {1, 2}, {1, 4}, {1, 1}, {0, 0}, {1, 2} }, // 4 + { {1, 3}, {2, 5}, {1, 1}, {0, 0}, {1, 2} }, // 5 + { {1, 3}, {3, 6}, {1, 1}, {0, 0}, {1, 2} }, // 6 + { {2, 4}, {3, 7}, {1, 2}, {0, 0}, {1, 3} }, // 7 + { {2, 4}, {4, 8}, {1, 2}, {0, 0}, {1, 3} }, // 8 + { {2, 5}, {4, 9}, {2, 3}, {0, 0}, {1, 3} }, // 9 + { {2, 5}, {5, 10}, {2, 3}, {0, 0}, {1, 3} }, // 10 + { {2, 6}, {6, 11}, {2, 3}, {0, 0}, {2, 4} }, // 11 + { {2, 6}, {7, 12}, {2, 3}, {0, 0}, {2, 5} }, // 12 + { {3, 7}, {7, 13}, {2, 3}, {0, 0}, {2, 5} }, // 13 + { {3, 7}, {7, 14}, {2, 3}, {0, 0}, {3, 6} }, // 14 + { {3, 8}, {8, 15}, {2, 4}, {0, 0}, {3, 6} }, // 15 + { {3, 8}, {9, 16}, {2, 4}, {0, 0}, {3, 6} }, // 16 + { {3, 9}, {10, 17}, {2, 4}, {0, 0}, {3, 7} }, // 17 + { {3, 9}, {10, 18}, {3, 5}, {0, 0}, {3, 7} }, // 18 + { {4, 10}, {10, 19}, {3, 5}, {0, 0}, {4, 8} }, // 19 + { {4, 10}, {10, 20}, {3, 5}, {0, 0}, {4, 8} }, // 20 + { {4, 11}, {11, 21}, {3, 5}, {0, 0}, {4, 8} }, // 21 + { {4, 11}, {12, 22}, {3, 5}, {0, 0}, {4, 9} }, // 22 + { {4, 12}, {13, 23}, {3, 6}, {0, 0}, {4, 9} }, // 23 + { {4, 12}, {13, 24}, {3, 6}, {0, 0}, {5, 10} }, // 24 + { {5, 13}, {13, 25}, {3, 6}, {0, 0}, {5, 10} }, // 25 + { {5, 13}, {13, 26}, {4, 7}, {0, 0}, {5, 10} }, // 26 + { {5, 14}, {14, 27}, {4, 7}, {0, 0}, {5, 11} }, // 27 + { {5, 14}, {14, 28}, {4, 7}, {0, 0}, {5, 11} }, // 28 + { {5, 15}, {15, 29}, {4, 8}, {0, 0}, {6, 12} }, // 29 + { {5, 15}, {16, 30}, {4, 8}, {0, 0}, {6, 12} }, // 30 + { {6, 18}, {16, 31}, {4, 8}, {0, 0}, {6, 12} }, // 31 }; - TShortPoolCfg HybridCpuTable[MaxPreparedCpuCount + 1][4] { - { {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0 - { {1, 1}, {0, 1}, {0, 1}, {0, 0} }, // 1 - { {1, 1}, {1, 2}, {0, 1}, {0, 1} }, // 2 - { {1, 2}, {1, 3}, {1, 1}, {0, 1} }, // 3 - { {1, 2}, {1, 4}, {1, 1}, {1, 2} }, // 4 - { {1, 2}, {2, 5}, {1, 1}, {1, 2} }, // 5 - { {1, 2}, {2, 6}, {1, 1}, {2, 2} }, // 6 - { {2, 3}, {2, 7}, {1, 2}, {2, 3} }, // 7 - { {2, 3}, {3, 8}, {1, 2}, {2, 3} }, // 8 - { {2, 4}, {3, 9}, {1, 2}, {3, 4} }, // 9 - { {3, 4}, {3, 10}, {1, 2}, {3, 4} }, // 10 - { {3, 5}, {4, 11}, {1, 2}, {3, 5} }, // 11 - { {3, 5}, {4, 12}, {1, 3}, {4, 5} }, // 12 - { {4, 6}, {4, 13}, {1, 3}, {4, 6} }, // 13 - { {4, 6}, {5, 14}, {1, 3}, {4, 6} }, // 14 - { {4, 7}, {5, 15}, {1, 3}, {5, 7} }, // 15 - { {5, 7}, {5, 16}, {1, 3}, {5, 7} }, // 16 - { {5, 8}, {6, 17}, {1, 4}, {5, 8} }, // 17 - { {5, 8}, {6, 18}, {1, 4}, {6, 8} }, // 18 - { {6, 9}, {6, 19}, {1, 4}, {6, 9} }, // 19 - { {6, 9}, {7, 20}, {1, 4}, {6, 9} }, // 20 - { {6, 10}, {7, 21}, {1, 4}, {7, 10} }, // 21 - { {7, 10}, {7, 22}, {1, 5}, {7, 10} }, // 22 - { {7, 11}, {8, 23}, {1, 5}, {7, 11} }, // 23 - { {7, 11}, {8, 24}, {1, 5}, {8, 11} }, // 24 - { {8, 12}, {8, 25}, {1, 5}, {8, 12} }, // 25 - { {8, 12}, {9, 26}, {1, 5}, {8, 12} }, // 26 - { {8, 13}, {9, 27}, {1, 6}, {9, 13} }, // 27 - { {9, 13}, {9, 28}, {1, 6}, {9, 13} }, // 28 - { {9, 14}, {10, 29}, {1, 6}, {9, 14} }, // 29 - { {9, 14}, {10, 30}, {1, 6}, {10, 14} }, // 30 - { {10, 15}, {10, 31}, {1, 6}, {10, 15} }, // 31 + TShortPoolCfg HybridCpuTable[MaxPreparedCpuCount + 1][5] { + { {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0 + { {1, 1}, {0, 1}, {0, 1}, {0, 0}, {0, 0} }, // 1 + { {1, 1}, {0, 2}, {0, 1}, {0, 0}, {1, 1} }, // 2 + { {1, 2}, {0, 3}, {1, 1}, {0, 0}, {1, 1} }, // 3 + { {1, 2}, {1, 4}, {1, 1}, {0, 0}, {1, 2} }, // 4 + { {1, 2}, {2, 5}, {1, 1}, {0, 0}, {1, 2} }, // 5 + { {1, 2}, {2, 6}, {1, 1}, {0, 0}, {2, 2} }, // 6 + { {2, 3}, {2, 7}, {1, 2}, {0, 0}, {2, 3} }, // 7 + { {2, 3}, {3, 8}, {1, 2}, {0, 0}, {2, 3} }, // 8 + { {2, 4}, {3, 9}, {1, 2}, {0, 0}, {3, 4} }, // 9 + { {3, 4}, {3, 10}, {1, 2}, {0, 0}, {3, 4} }, // 10 + { {3, 5}, {4, 11}, {1, 2}, {0, 0}, {3, 5} }, // 11 + { {3, 5}, {4, 12}, {1, 3}, {0, 0}, {4, 5} }, // 12 + { {4, 6}, {4, 13}, {1, 3}, {0, 0}, {4, 6} }, // 13 + { {4, 6}, {5, 14}, {1, 3}, {0, 0}, {4, 6} }, // 14 + { {4, 7}, {5, 15}, {1, 3}, {0, 0}, {5, 7} }, // 15 + { {5, 7}, {5, 16}, {1, 3}, {0, 0}, {5, 7} }, // 16 + { {5, 8}, {6, 17}, {1, 4}, {0, 0}, {5, 8} }, // 17 + { {5, 8}, {6, 18}, {1, 4}, {0, 0}, {6, 8} }, // 18 + { {6, 9}, {6, 19}, {1, 4}, {0, 0}, {6, 9} }, // 19 + { {6, 9}, {7, 20}, {1, 4}, {0, 0}, {6, 9} }, // 20 + { {6, 10}, {7, 21}, {1, 4}, {0, 0}, {7, 10} }, // 21 + { {7, 10}, {7, 22}, {1, 5}, {0, 0}, {7, 10} }, // 22 + { {7, 11}, {8, 23}, {1, 5}, {0, 0}, {7, 11} }, // 23 + { {7, 11}, {8, 24}, {1, 5}, {0, 0}, {8, 11} }, // 24 + { {8, 12}, {8, 25}, {1, 5}, {0, 0}, {8, 12} }, // 25 + { {8, 12}, {9, 26}, {1, 5}, {0, 0}, {8, 12} }, // 26 + { {8, 13}, {9, 27}, {1, 6}, {0, 0}, {9, 13} }, // 27 + { {9, 13}, {9, 28}, {1, 6}, {0, 0}, {9, 13} }, // 28 + { {9, 14}, {10, 29}, {1, 6}, {0, 0}, {9, 14} }, // 29 + { {9, 14}, {10, 30}, {1, 6}, {0, 0}, {10, 14} }, // 30 + { {10, 15}, {10, 31}, {1, 6}, {0, 0}, {10, 15} }, // 31 }; - TShortPoolCfg StorageCpuTable[MaxPreparedCpuCount + 1][4] { - { {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0 - { {1, 1}, {0, 1}, {0, 1}, {0, 0} }, // 1 - { {2, 2}, {0, 2}, {0, 1}, {0, 1} }, // 2 - { {1, 3}, {1, 3}, {1, 1}, {0, 1} }, // 3 - { {1, 4}, {1, 4}, {1, 1}, {1, 2} }, // 4 - { {2, 5}, {1, 5}, {1, 1}, {1, 2} }, // 5 - { {3, 6}, {1, 6}, {1, 1}, {1, 2} }, // 6 - { {4, 7}, {1, 7}, {1, 2}, {1, 3} }, // 7 - { {5, 8}, {1, 8}, {1, 2}, {1, 3} }, // 8 - { {5, 9}, {1, 9}, {1, 2}, {2, 4} }, // 9 - { {6, 10}, {1, 10}, {1, 2}, {2, 4} }, // 10 - { {6, 11}, {1, 11}, {2, 3}, {2, 4} }, // 11 - { {7, 12}, {1, 12}, {2, 3}, {2, 5} }, // 12 - { {8, 13}, {1, 13}, {2, 3}, {2, 5} }, // 13 - { {8, 14}, {1, 14}, {2, 3}, {3, 6} }, // 14 - { {9, 15}, {1, 15}, {2, 4}, {3, 6} }, // 15 - { {10, 16}, {1, 16}, {2, 4}, {3, 6} }, // 16 - { {11, 17}, {1, 17}, {2, 4}, {3, 7} }, // 17 - { {11, 18}, {1, 18}, {3, 5}, {3, 7} }, // 18 - { {11, 19}, {1, 19}, {3, 5}, {4, 8} }, // 19 - { {12, 20}, {1, 20}, {3, 5}, {4, 8} }, // 20 - { {13, 21}, {1, 21}, {3, 5}, {4, 8} }, // 21 - { {14, 22}, {1, 22}, {3, 6}, {4, 9} }, // 22 - { {15, 23}, {1, 23}, {3, 6}, {4, 9} }, // 23 - { {15, 24}, {1, 24}, {3, 6}, {5, 10} }, // 24 - { {16, 25}, {1, 25}, {3, 6}, {5, 10} }, // 25 - { {16, 26}, {1, 26}, {4, 7}, {5, 10} }, // 26 - { {17, 27}, {1, 27}, {4, 7}, {5, 11} }, // 27 - { {18, 28}, {1, 28}, {4, 7}, {5, 11} }, // 28 - { {18, 29}, {1, 29}, {4, 7}, {6, 12} }, // 29 - { {19, 30}, {1, 30}, {4, 8}, {6, 12} }, // 30 - { {20, 31}, {1, 31}, {4, 8}, {6, 12} }, // 31 + TShortPoolCfg StorageCpuTable[MaxPreparedCpuCount + 1][5] { + { {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, // 0 + { {1, 1}, {0, 1}, {0, 1}, {0, 0}, {0, 0} }, // 1 + { {1, 2}, {0, 2}, {0, 1}, {0, 0}, {1, 1} }, // 2 + { {1, 3}, {0, 3}, {1, 1}, {0, 0}, {1, 1} }, // 3 + { {1, 4}, {1, 4}, {1, 1}, {0, 0}, {1, 2} }, // 4 + { {2, 5}, {1, 5}, {1, 1}, {0, 0}, {1, 2} }, // 5 + { {3, 6}, {1, 6}, {1, 1}, {0, 0}, {1, 2} }, // 6 + { {4, 7}, {1, 7}, {1, 2}, {0, 0}, {1, 3} }, // 7 + { {5, 8}, {1, 8}, {1, 2}, {0, 0}, {1, 3} }, // 8 + { {5, 9}, {1, 9}, {1, 2}, {0, 0}, {2, 4} }, // 9 + { {6, 10}, {1, 10}, {1, 2}, {0, 0}, {2, 4} }, // 10 + { {6, 11}, {1, 11}, {2, 3}, {0, 0}, {2, 4} }, // 11 + { {7, 12}, {1, 12}, {2, 3}, {0, 0}, {2, 5} }, // 12 + { {8, 13}, {1, 13}, {2, 3}, {0, 0}, {2, 5} }, // 13 + { {8, 14}, {1, 14}, {2, 3}, {0, 0}, {3, 6} }, // 14 + { {9, 15}, {1, 15}, {2, 4}, {0, 0}, {3, 6} }, // 15 + { {10, 16}, {1, 16}, {2, 4}, {0, 0}, {3, 6} }, // 16 + { {11, 17}, {1, 17}, {2, 4}, {0, 0}, {3, 7} }, // 17 + { {11, 18}, {1, 18}, {3, 5}, {0, 0}, {3, 7} }, // 18 + { {11, 19}, {1, 19}, {3, 5}, {0, 0}, {4, 8} }, // 19 + { {12, 20}, {1, 20}, {3, 5}, {0, 0}, {4, 8} }, // 20 + { {13, 21}, {1, 21}, {3, 5}, {0, 0}, {4, 8} }, // 21 + { {14, 22}, {1, 22}, {3, 6}, {0, 0}, {4, 9} }, // 22 + { {15, 23}, {1, 23}, {3, 6}, {0, 0}, {4, 9} }, // 23 + { {15, 24}, {1, 24}, {3, 6}, {0, 0}, {5, 10} }, // 24 + { {16, 25}, {1, 25}, {3, 6}, {0, 0}, {5, 10} }, // 25 + { {16, 26}, {1, 26}, {4, 7}, {0, 0}, {5, 10} }, // 26 + { {17, 27}, {1, 27}, {4, 7}, {0, 0}, {5, 11} }, // 27 + { {18, 28}, {1, 28}, {4, 7}, {0, 0}, {5, 11} }, // 28 + { {18, 29}, {1, 29}, {4, 7}, {0, 0}, {6, 12} }, // 29 + { {19, 30}, {1, 30}, {4, 8}, {0, 0}, {6, 12} }, // 30 + { {20, 31}, {1, 31}, {4, 8}, {0, 0}, {6, 12} }, // 31 }; i16 GetIOThreadCount(i16 cpuCount) { return (cpuCount - 1) / (MaxPreparedCpuCount * 2) + 1; } - TShortPoolCfg GetShortPoolChg(EPoolType pool, i16 cpuCount, TShortPoolCfg cpuTable[][4]) { + TShortPoolCfg GetShortPoolChg(EPoolKind pool, i16 cpuCount, TShortPoolCfg cpuTable[][5]) { i16 k = cpuCount / MaxPreparedCpuCount; i16 mod = cpuCount % MaxPreparedCpuCount; ui8 poolIdx = static_cast<i8>(pool); @@ -743,72 +746,100 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s scheduler->SetProgressThreshold(10'000); } - NKikimrConfig::TActorSystemConfig::TExecutor *executors[] = { - mutableSystemConfig->AddExecutor(), - mutableSystemConfig->AddExecutor(), - mutableSystemConfig->AddExecutor(), - mutableSystemConfig->AddExecutor(), - mutableSystemConfig->AddExecutor() - }; - mutableSystemConfig->SetIoExecutor(0); - auto *ioExecutor = executors[3]; - ioExecutor->SetType(NKikimrConfig::TActorSystemConfig::TExecutor::IO); - ioExecutor->SetThreads(GetIOThreadCount(cpuCount)); - ioExecutor->SetName("IO"); - ui16 poolCount = Min(5, cpuCount + 1); - ui32 executorIds[4] = {0, 1, 2, 4}; - TVector<TString> names = {"System", "User", "Batch", "IC"}; - TVector<ui32> priorities = {30, 20, 10, 40}; + TVector<TString> names = {"System", "User", "Batch", "IO", "IC"}; + TVector<ui32> priorities = {30, 20, 10, 0, 40}; + TVector<ui32> executorIds = {0, 1, 2, 3, 4}; + + auto *serviceExecutor = mutableSystemConfig->AddServiceExecutor(); + serviceExecutor->SetServiceName("Interconnect"); switch (cpuCount) { case 1: mutableSystemConfig->SetUserExecutor(1); mutableSystemConfig->SetSysExecutor(1); mutableSystemConfig->SetBatchExecutor(1); - names = {"Common"}; - priorities = {40,}; + mutableSystemConfig->SetIoExecutor(2); + serviceExecutor->SetExecutorId(1); + + poolCount = 2; + names = {"Common", "IO"}; + priorities = {40, 0}; + executorIds = {0, 0, 0, 1, 0}; break; case 2: mutableSystemConfig->SetUserExecutor(1); mutableSystemConfig->SetSysExecutor(1); mutableSystemConfig->SetBatchExecutor(1); - names = {"Common"}; - priorities = {40,}; + mutableSystemConfig->SetIoExecutor(2); + serviceExecutor->SetExecutorId(1); + poolCount = 2; + names = {"Common", "IO"}; + priorities = {40, 0}; + executorIds = {0, 0, 0, 1, 0}; break; case 3: mutableSystemConfig->SetUserExecutor(1); mutableSystemConfig->SetSysExecutor(1); mutableSystemConfig->SetBatchExecutor(2); - names = {"Common", "Batch", "IC"}; - priorities = {30, 10, 40,}; + mutableSystemConfig->SetIoExecutor(3); + serviceExecutor->SetExecutorId(4); + + poolCount = 4; + names = {"Common", "Batch", "IO", "IC"}; + priorities = {30, 10, 0, 40,}; + executorIds = {0, 0, 1, 2, 3}; break; default: mutableSystemConfig->SetUserExecutor(1); mutableSystemConfig->SetSysExecutor(2); mutableSystemConfig->SetBatchExecutor(3); + mutableSystemConfig->SetIoExecutor(4); + serviceExecutor->SetExecutorId(5); break; } - auto *serviceExecutor = mutableSystemConfig->AddServiceExecutor(); - serviceExecutor->SetServiceName("Interconnect"); - serviceExecutor->SetExecutorId(poolCount - 1); + + TVector<NKikimrConfig::TActorSystemConfig::TExecutor *> executors; + for (ui32 poolIdx = 0; poolIdx < poolCount; ++poolIdx) { + executors.push_back(mutableSystemConfig->AddExecutor()); + } auto &cpuTable = (mutableSystemConfig->GetNodeType() == NKikimrConfig::TActorSystemConfig::STORAGE ? StorageCpuTable : mutableSystemConfig->GetNodeType() == NKikimrConfig::TActorSystemConfig::COMPUTE ? ComputeCpuTable : HybridCpuTable ); - for (ui32 poolType = 0; poolType < poolCount - 1; ++poolType) { - TShortPoolCfg cfg = GetShortPoolChg(static_cast<EPoolType>(poolType), cpuCount, cpuTable); - auto *executor = executors[executorIds[poolType]]; + + for (ui32 poolIdx = 0; poolIdx < poolCount; ++poolIdx) { + auto *executor = executors[poolIdx]; + if (names[poolIdx] == "IO") { + executor->SetType(NKikimrConfig::TActorSystemConfig::TExecutor::IO); + executor->SetThreads(GetIOThreadCount(cpuCount)); + executor->SetName(names[poolIdx]); + continue; + } + EPoolKind poolKind = EPoolKind::System; + if (names[poolIdx] == "User") { + poolKind = EPoolKind::User; + } else if (names[poolIdx] == "Batch") { + poolKind = EPoolKind::Batch; + } else if (names[poolIdx] == "IC") { + poolKind = EPoolKind::IC; + } + TShortPoolCfg cfg = GetShortPoolChg(poolKind, cpuCount, cpuTable); + i16 threadsCount = cfg.ThreadCount; + if (poolCount == 2) { + threadsCount = cpuCount; + } executor->SetType(NKikimrConfig::TActorSystemConfig::TExecutor::BASIC); - executor->SetThreads(cpuCount == 2 ? 2 : cfg.ThreadCount); - executor->SetMaxThreads(cpuCount == 2 ? 2 : cfg.MaxThreadCount); - executor->SetPriority(priorities[poolType]); - executor->SetName(names[poolType]); - if (cpuCount == 1 || cpuCount == 2) { + executor->SetThreads(threadsCount); + executor->SetThreads(Max(cfg.MaxThreadCount, threadsCount)); + executor->SetPriority(priorities[poolIdx]); + executor->SetName(names[poolIdx]); + + if (names[poolIdx] == "Common") { executor->SetSpinThreshold(0); executor->SetTimePerMailboxMicroSecs(100); - } else if (poolType == poolCount - 2) { // IC pool + } else if (names[poolIdx] == "IC") { executor->SetSpinThreshold(10); executor->SetTimePerMailboxMicroSecs(100); executor->SetMaxAvgPingDeviation(500); @@ -949,6 +980,15 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s icCommon->LocalScopeId = ScopeId.GetInterconnectScopeId(); icCommon->Cookie = icConfig.GetSuppressConnectivityCheck() ? TString() : CreateGuidAsString(); + if (icConfig.HasOutgoingHandshakeInflightLimit()) { + icCommon->OutgoingHandshakeInflightLimit = icConfig.GetOutgoingHandshakeInflightLimit(); + + // create handshake broker actor + setup->LocalServices.emplace_back(MakeHandshakeBrokerOutId(), TActorSetupCmd( + CreateHandshakeBroker(*icCommon->OutgoingHandshakeInflightLimit), + TMailboxType::ReadAsFilled, systemPoolId)); + } + #define CHANNEL(NAME) {TInterconnectChannels::NAME, #NAME} icCommon->ChannelName = { CHANNEL(IC_COMMON), @@ -986,7 +1026,7 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s if (const auto& mon = appData->Mon) { icCommon->RegisterMonPage = [mon](const TString& path, const TString& title, TActorSystem *actorSystem, const TActorId& actorId) { NMonitoring::TIndexMonPage *page = mon->RegisterIndexPage("actors", "Actors")->RegisterIndexPage("interconnect", "Interconnect"); - mon->RegisterActorPage(page, path, title, false, actorSystem, actorId); + mon->RegisterActorPage(page, path, title, false, actorSystem, actorId, /*useAuth=*/true, /*sortPages=*/false); }; setup->LocalServices.emplace_back(NInterconnect::MakeInterconnectMonActorId(NodeId), TActorSetupCmd( NInterconnect::CreateInterconnectMonActor(icCommon), TMailboxType::ReadAsFilled, systemPoolId)); diff --git a/ydb/core/grpc_services/CMakeLists.darwin.txt b/ydb/core/grpc_services/CMakeLists.darwin.txt index 209341cbd3..2816a8a926 100644 --- a/ydb/core/grpc_services/CMakeLists.darwin.txt +++ b/ydb/core/grpc_services/CMakeLists.darwin.txt @@ -100,6 +100,7 @@ target_sources(ydb-core-grpc_services PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keep_alive.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keyvalue.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_describe.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_snapshots.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kqp_base.cpp diff --git a/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt b/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt index fcf80c2b10..a25eb23a5f 100644 --- a/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt +++ b/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt @@ -101,6 +101,7 @@ target_sources(ydb-core-grpc_services PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keep_alive.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keyvalue.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_describe.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_snapshots.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kqp_base.cpp diff --git a/ydb/core/grpc_services/CMakeLists.linux.txt b/ydb/core/grpc_services/CMakeLists.linux.txt index fcf80c2b10..a25eb23a5f 100644 --- a/ydb/core/grpc_services/CMakeLists.linux.txt +++ b/ydb/core/grpc_services/CMakeLists.linux.txt @@ -101,6 +101,7 @@ target_sources(ydb-core-grpc_services PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_import_data.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keep_alive.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_keyvalue.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_describe.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kh_snapshots.cpp ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_kqp_base.cpp diff --git a/ydb/core/grpc_services/rpc_keyvalue.cpp b/ydb/core/grpc_services/rpc_keyvalue.cpp new file mode 100644 index 0000000000..3f6746d7f5 --- /dev/null +++ b/ydb/core/grpc_services/rpc_keyvalue.cpp @@ -0,0 +1,1048 @@ +#include "service_keyvalue.h" + +#include <ydb/public/api/protos/ydb_keyvalue.pb.h> + +#include <ydb/core/base/path.h> +#include <ydb/core/grpc_services/rpc_scheme_base.h> +#include <ydb/core/grpc_services/rpc_common.h> +#include <ydb/core/keyvalue/keyvalue_events.h> +#include <ydb/core/tx/scheme_cache/scheme_cache.h> +#include <ydb/core/mind/local.h> +#include <ydb/core/protos/local.pb.h> + + +namespace NKikimr::NGRpcService { + +using namespace NActors; +using namespace Ydb; + +using TEvCreateVolumeKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::CreateVolumeRequest, + Ydb::KeyValue::CreateVolumeResponse>; +using TEvDropVolumeKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::DropVolumeRequest, + Ydb::KeyValue::DropVolumeResponse>; +using TEvAlterVolumeKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::AlterVolumeRequest, + Ydb::KeyValue::AlterVolumeResponse>; +using TEvDescribeVolumeKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::DescribeVolumeRequest, + Ydb::KeyValue::DescribeVolumeResponse>; +using TEvListLocalPartitionsKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::ListLocalPartitionsRequest, + Ydb::KeyValue::ListLocalPartitionsResponse>; + +using TEvAcquireLockKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::AcquireLockRequest, + Ydb::KeyValue::AcquireLockResponse>; +using TEvExecuteTransactionKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::ExecuteTransactionRequest, + Ydb::KeyValue::ExecuteTransactionResponse>; +using TEvReadKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::ReadRequest, + Ydb::KeyValue::ReadResponse>; +using TEvReadRangeKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::ReadRangeRequest, + Ydb::KeyValue::ReadRangeResponse>; +using TEvListRangeKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::ListRangeRequest, + Ydb::KeyValue::ListRangeResponse>; +using TEvGetStorageChannelStatusKeyValueRequest = + TGrpcRequestOperationCall<Ydb::KeyValue::GetStorageChannelStatusRequest, + Ydb::KeyValue::GetStorageChannelStatusResponse>; + +} // namespace NKikimr::NGRpcService + + +namespace NKikimr::NGRpcService { + +using namespace NActors; +using namespace Ydb; + +#define COPY_PRIMITIVE_FIELD(name) \ + to->set_ ## name(static_cast<decltype(to->name())>(from.name())) \ +// COPY_PRIMITIVE_FIELD + +#define COPY_PRIMITIVE_OPTIONAL_FIELD(name) \ + if (from.has_ ## name()) { \ + to->set_ ## name(static_cast<decltype(to->name())>(from.name())); \ + } \ +// COPY_PRIMITIVE_FIELD + +namespace { + +void CopyProtobuf(const Ydb::KeyValue::AcquireLockRequest &/*from*/, + NKikimrKeyValue::AcquireLockRequest */*to*/) +{ +} + +void CopyProtobuf(const NKikimrKeyValue::AcquireLockResult &from, + Ydb::KeyValue::AcquireLockResult *to) +{ + COPY_PRIMITIVE_FIELD(lock_generation); + COPY_PRIMITIVE_FIELD(node_id); +} + + +void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::Rename &from, + NKikimrKeyValue::ExecuteTransactionRequest::Command::Rename *to) +{ + COPY_PRIMITIVE_FIELD(old_key); + COPY_PRIMITIVE_FIELD(new_key); +} + +void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::Concat &from, + NKikimrKeyValue::ExecuteTransactionRequest::Command::Concat *to) +{ + *to->mutable_input_keys() = from.input_keys(); + COPY_PRIMITIVE_FIELD(output_key); + COPY_PRIMITIVE_FIELD(keep_inputs); +} + +void CopyProtobuf(const Ydb::KeyValue::KeyRange &from, NKikimrKeyValue::KVRange *to) { +#define CHECK_AND_SET(name) \ + if (from.has_ ## name()) { \ + COPY_PRIMITIVE_FIELD(name); \ + } \ +// CHECK_AND_SET + + CHECK_AND_SET(from_key_inclusive) + CHECK_AND_SET(from_key_exclusive) + CHECK_AND_SET(to_key_inclusive) + CHECK_AND_SET(to_key_exclusive) + +#undef CHECK_AND_SET +} + +void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::CopyRange &from, + NKikimrKeyValue::ExecuteTransactionRequest::Command::CopyRange *to) +{ + CopyProtobuf(from.range(), to->mutable_range()); + COPY_PRIMITIVE_FIELD(prefix_to_remove); + COPY_PRIMITIVE_FIELD(prefix_to_add); +} + +template <typename TProtoFrom, typename TProtoTo> +void CopyPriority(TProtoFrom &&from, TProtoTo *to) { + switch(from.priority()) { + case Ydb::KeyValue::Priorities::PRIORITY_REALTIME: + to->set_priority(NKikimrKeyValue::Priorities::PRIORITY_REALTIME); + break; + case Ydb::KeyValue::Priorities::PRIORITY_BACKGROUND: + to->set_priority(NKikimrKeyValue::Priorities::PRIORITY_BACKGROUND); + break; + default: + to->set_priority(NKikimrKeyValue::Priorities::PRIORITY_UNSPECIFIED); + break; + } +} + +void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::Write &from, + NKikimrKeyValue::ExecuteTransactionRequest::Command::Write *to) +{ + COPY_PRIMITIVE_FIELD(key); + COPY_PRIMITIVE_FIELD(value); + COPY_PRIMITIVE_FIELD(storage_channel); + CopyPriority(from, to); + switch(from.tactic()) { + case Ydb::KeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_MAX_THROUGHPUT: + to->set_tactic(NKikimrKeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_MAX_THROUGHPUT); + break; + case Ydb::KeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_MIN_LATENCY: + to->set_tactic(NKikimrKeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_MIN_LATENCY); + break; + default: + to->set_tactic(NKikimrKeyValue::ExecuteTransactionRequest::Command::Write::TACTIC_UNSPECIFIED); + break; + } +} + +void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command::DeleteRange &from, + NKikimrKeyValue::ExecuteTransactionRequest::Command::DeleteRange *to) +{ + CopyProtobuf(from.range(), to->mutable_range()); +} + +void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest::Command &from, + NKikimrKeyValue::ExecuteTransactionRequest::Command *to) +{ +#define CHECK_AND_COPY(name) \ + if (from.has_ ## name()) { \ + CopyProtobuf(from.name(), to->mutable_ ## name()); \ + } \ +// CHECK_AND_COPY + + CHECK_AND_COPY(rename) + CHECK_AND_COPY(concat) + CHECK_AND_COPY(copy_range) + CHECK_AND_COPY(write) + CHECK_AND_COPY(delete_range) + +#undef CHECK_AND_COPY +} + +void CopyProtobuf(const Ydb::KeyValue::ExecuteTransactionRequest &from, + NKikimrKeyValue::ExecuteTransactionRequest *to) +{ + COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation); + for (auto &cmd : from.commands()) { + CopyProtobuf(cmd, to->add_commands()); + } +} + +void CopyProtobuf(const NKikimrKeyValue::StorageChannel &from, Ydb::KeyValue::StorageChannelInfo *to) { + COPY_PRIMITIVE_FIELD(storage_channel); + COPY_PRIMITIVE_FIELD(status_flag); +} + +void CopyProtobuf(const NKikimrKeyValue::ExecuteTransactionResult &from, + Ydb::KeyValue::ExecuteTransactionResult *to) +{ + COPY_PRIMITIVE_FIELD(node_id); + for (auto &channel : from.storage_channel()) { + CopyProtobuf(channel, to->add_storage_channel_info()); + } +} + +void CopyProtobuf(const Ydb::KeyValue::ReadRequest &from, NKikimrKeyValue::ReadRequest *to) { + COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation); + COPY_PRIMITIVE_FIELD(key); + COPY_PRIMITIVE_FIELD(offset); + COPY_PRIMITIVE_FIELD(size); + CopyPriority(from, to); + COPY_PRIMITIVE_FIELD(limit_bytes); +} + +void CopyProtobuf(const NKikimrKeyValue::ReadResult &from, Ydb::KeyValue::ReadResult *to) { + COPY_PRIMITIVE_FIELD(requested_key); + COPY_PRIMITIVE_FIELD(requested_offset); + COPY_PRIMITIVE_FIELD(requested_size); + COPY_PRIMITIVE_FIELD(value); + COPY_PRIMITIVE_FIELD(node_id); + switch (from.status()) { + case NKikimrKeyValue::Statuses::RSTATUS_OVERRUN: + to->set_is_overrun(true); + break; + default: + break; + } +} + +void CopyProtobuf(const Ydb::KeyValue::ReadRangeRequest &from, NKikimrKeyValue::ReadRangeRequest *to) { + COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation); + CopyProtobuf(from.range(), to->mutable_range()); + to->set_include_data(true); + COPY_PRIMITIVE_FIELD(limit_bytes); + CopyPriority(from, to); +} + +void CopyProtobuf(const Ydb::KeyValue::ListRangeRequest &from, NKikimrKeyValue::ReadRangeRequest *to) { + COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation); + CopyProtobuf(from.range(), to->mutable_range()); + to->set_include_data(false); + COPY_PRIMITIVE_FIELD(limit_bytes); +} + +void CopyProtobuf(const NKikimrKeyValue::ReadRangeResult::KeyValuePair &from, + Ydb::KeyValue::ReadRangeResult::KeyValuePair *to) +{ + COPY_PRIMITIVE_FIELD(key); + COPY_PRIMITIVE_FIELD(value); + COPY_PRIMITIVE_FIELD(creation_unix_time); + COPY_PRIMITIVE_FIELD(storage_channel); +} + +void CopyProtobuf(const NKikimrKeyValue::ReadRangeResult &from, + Ydb::KeyValue::ReadRangeResult *to) +{ + for (auto &pair : from.pair()) { + CopyProtobuf(pair, to->add_pair()); + } + if (from.status() == NKikimrKeyValue::Statuses::RSTATUS_OVERRUN) { + to->set_is_overrun(true); + } + COPY_PRIMITIVE_FIELD(node_id); +} + +void CopyProtobuf(const NKikimrKeyValue::ReadRangeResult::KeyValuePair &from, + Ydb::KeyValue::ListRangeResult::KeyInfo *to) +{ + COPY_PRIMITIVE_FIELD(key); + COPY_PRIMITIVE_FIELD(value_size); + COPY_PRIMITIVE_FIELD(creation_unix_time); + COPY_PRIMITIVE_FIELD(storage_channel); +} + +void CopyProtobuf(const NKikimrKeyValue::ReadRangeResult &from, + Ydb::KeyValue::ListRangeResult *to) +{ + for (auto &pair : from.pair()) { + CopyProtobuf(pair, to->add_key()); + } + if (from.status() == NKikimrKeyValue::Statuses::RSTATUS_OVERRUN) { + to->set_is_overrun(true); + } + COPY_PRIMITIVE_FIELD(node_id); +} + +void CopyProtobuf(const Ydb::KeyValue::GetStorageChannelStatusRequest &from, + NKikimrKeyValue::GetStorageChannelStatusRequest *to) +{ + COPY_PRIMITIVE_OPTIONAL_FIELD(lock_generation); + *to->mutable_storage_channel() = from.storage_channel(); +} + + +void CopyProtobuf(const NKikimrKeyValue::GetStorageChannelStatusResult &from, + Ydb::KeyValue::GetStorageChannelStatusResult *to) +{ + for (auto &channel : from.storage_channel()) { + CopyProtobuf(channel, to->add_storage_channel_info()); + } + COPY_PRIMITIVE_FIELD(node_id); +} + + +Ydb::StatusIds::StatusCode PullStatus(const NKikimrKeyValue::AcquireLockResult &) { + return Ydb::StatusIds::SUCCESS; +} + +template <typename TResult> +Ydb::StatusIds::StatusCode PullStatus(const TResult &result) { + switch (result.status()) { + case NKikimrKeyValue::Statuses::RSTATUS_OK: + case NKikimrKeyValue::Statuses::RSTATUS_OVERRUN: + return Ydb::StatusIds::SUCCESS; + case NKikimrKeyValue::Statuses::RSTATUS_ERROR: + return Ydb::StatusIds::GENERIC_ERROR; + case NKikimrKeyValue::Statuses::RSTATUS_TIMEOUT: + return Ydb::StatusIds::TIMEOUT; + case NKikimrKeyValue::Statuses::RSTATUS_NOT_FOUND: + return Ydb::StatusIds::NOT_FOUND; + case NKikimrKeyValue::Statuses::RSTATUS_WRONG_LOCK_GENERATION: + return Ydb::StatusIds::PRECONDITION_FAILED; + default: + return Ydb::StatusIds::INTERNAL_ERROR; + } +} + +namespace { + void AssignPoolKinds(auto &storageConfig, auto *internalStorageConfig) { + ui32 size = storageConfig.channel_size(); + + for (ui32 channelIdx = 0; channelIdx < size; ++channelIdx) { + internalStorageConfig->AddChannel()->SetPreferredPoolKind(storageConfig.channel(channelIdx).media()); + } + } +} + + +class TCreateVolumeRequest : public TRpcSchemeRequestActor<TCreateVolumeRequest, TEvCreateVolumeKeyValueRequest> { +public: + using TBase = TRpcSchemeRequestActor<TCreateVolumeRequest, TEvCreateVolumeKeyValueRequest>; + using TBase::TBase; + + void Bootstrap(const TActorContext& ctx) { + TBase::Bootstrap(ctx); + Become(&TCreateVolumeRequest::StateFunc); + SendProposeRequest(ctx); + } + + void SendProposeRequest(const TActorContext &ctx) { + const auto req = this->GetProtoRequest(); + + std::pair<TString, TString> pathPair; + try { + pathPair = SplitPath(Request_->GetDatabaseName(), req->path()); + } catch (const std::exception& ex) { + Request_->RaiseIssue(NYql::ExceptionToIssue(ex)); + return Reply(StatusIds::BAD_REQUEST, ctx); + } + const auto& workingDir = pathPair.first; + const auto& name = pathPair.second; + + std::unique_ptr<TEvTxUserProxy::TEvProposeTransaction> proposeRequest = this->CreateProposeTransaction(); + NKikimrTxUserProxy::TEvProposeTransaction& record = proposeRequest->Record; + NKikimrSchemeOp::TModifyScheme* modifyScheme = record.MutableTransaction()->MutableModifyScheme(); + modifyScheme->SetWorkingDir(workingDir); + NKikimrSchemeOp::TCreateSolomonVolume* tableDesc = nullptr; + + modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpCreateSolomonVolume); + tableDesc = modifyScheme->MutableCreateSolomonVolume(); + tableDesc->SetName(name); + tableDesc->SetPartitionCount(req->partition_count()); + + if (GetProtoRequest()->has_storage_config()) { + auto &storageConfig = GetProtoRequest()->storage_config(); + auto *internalStorageConfig = tableDesc->MutableStorageConfig(); + AssignPoolKinds(storageConfig, internalStorageConfig); + } else { + tableDesc->SetChannelProfileId(GetProtoRequest()->partition_count()); + } + + ctx.Send(MakeTxProxyID(), proposeRequest.release()); + } + + STFUNC(StateFunc) { + return TBase::StateWork(ev, ctx); + } +}; + + +class TDropVolumeRequest : public TRpcSchemeRequestActor<TDropVolumeRequest, TEvDropVolumeKeyValueRequest> { +public: + using TBase = TRpcSchemeRequestActor<TDropVolumeRequest, TEvDropVolumeKeyValueRequest>; + using TBase::TBase; + + void Bootstrap(const TActorContext& ctx) { + TBase::Bootstrap(ctx); + Become(&TDropVolumeRequest::StateFunc); + SendProposeRequest(ctx); + } + + void SendProposeRequest(const TActorContext &ctx) { + const auto req = this->GetProtoRequest(); + + std::pair<TString, TString> pathPair; + try { + pathPair = SplitPath(req->path()); + } catch (const std::exception& ex) { + Request_->RaiseIssue(NYql::ExceptionToIssue(ex)); + return Reply(StatusIds::BAD_REQUEST, ctx); + } + const auto& workingDir = pathPair.first; + const auto& name = pathPair.second; + + std::unique_ptr<TEvTxUserProxy::TEvProposeTransaction> proposeRequest = this->CreateProposeTransaction(); + NKikimrTxUserProxy::TEvProposeTransaction& record = proposeRequest->Record; + NKikimrSchemeOp::TModifyScheme* modifyScheme = record.MutableTransaction()->MutableModifyScheme(); + modifyScheme->SetWorkingDir(workingDir); + NKikimrSchemeOp::TDrop* drop = nullptr; + + modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpDropSolomonVolume); + drop = modifyScheme->MutableDrop(); + drop->SetName(name); + + ctx.Send(MakeTxProxyID(), proposeRequest.release()); + } + + STFUNC(StateFunc) { + return TBase::StateWork(ev, ctx); + } +}; + +class TAlterVolumeRequest : public TRpcSchemeRequestActor<TAlterVolumeRequest, TEvAlterVolumeKeyValueRequest> { +public: + using TBase = TRpcSchemeRequestActor<TAlterVolumeRequest, TEvAlterVolumeKeyValueRequest>; + using TBase::TBase; + + void Bootstrap(const TActorContext& ctx) { + TBase::Bootstrap(ctx); + Become(&TAlterVolumeRequest::StateFunc); + SendProposeRequest(ctx); + } + + void SendProposeRequest(const TActorContext &ctx) { + const auto req = this->GetProtoRequest(); + + std::pair<TString, TString> pathPair; + try { + pathPair = SplitPath(req->path()); + } catch (const std::exception& ex) { + Request_->RaiseIssue(NYql::ExceptionToIssue(ex)); + return Reply(StatusIds::BAD_REQUEST, ctx); + } + const auto& workingDir = pathPair.first; + const auto& name = pathPair.second; + + std::unique_ptr<TEvTxUserProxy::TEvProposeTransaction> proposeRequest = this->CreateProposeTransaction(); + NKikimrTxUserProxy::TEvProposeTransaction& record = proposeRequest->Record; + NKikimrSchemeOp::TModifyScheme* modifyScheme = record.MutableTransaction()->MutableModifyScheme(); + modifyScheme->SetWorkingDir(workingDir); + NKikimrSchemeOp::TAlterSolomonVolume* tableDesc = nullptr; + + modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpAlterSolomonVolume); + tableDesc = modifyScheme->MutableAlterSolomonVolume(); + tableDesc->SetName(name); + tableDesc->SetPartitionCount(req->alter_partition_count()); + + if (GetProtoRequest()->has_storage_config()) { + tableDesc->SetUpdateChannelsBinding(true); + auto &storageConfig = GetProtoRequest()->storage_config(); + auto *internalStorageConfig = tableDesc->MutableStorageConfig(); + AssignPoolKinds(storageConfig, internalStorageConfig); + } else { + tableDesc->SetUpdateChannelsBinding(false); + tableDesc->SetChannelProfileId(0); + } + + ctx.Send(MakeTxProxyID(), proposeRequest.release()); + } + + STFUNC(StateFunc) { + return TBase::StateWork(ev, ctx); + } +}; + +template <typename TDerived> +class TBaseKeyValueRequest { +protected: + void OnBootstrap() { + auto self = static_cast<TDerived*>(this); + Ydb::StatusIds::StatusCode status = Ydb::StatusIds::STATUS_CODE_UNSPECIFIED; + NYql::TIssues issues; + if (!self->ValidateRequest(status, issues)) { + self->Reply(status, issues, TActivationContext::AsActorContext()); + return; + } + if (const auto& userToken = self->Request_->GetSerializedToken()) { + UserToken = new NACLib::TUserToken(userToken); + } + SendNavigateRequest(); + } + + void SendNavigateRequest() { + auto self = static_cast<TDerived*>(this); + auto &rec = *self->GetProtoRequest(); + auto req = MakeHolder<NSchemeCache::TSchemeCacheNavigate>(); + auto& entry = req->ResultSet.emplace_back(); + entry.Path = ::NKikimr::SplitPath(rec.path()); + entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByPath; + entry.ShowPrivatePath = true; + entry.SyncVersion = false; + req->UserToken = UserToken; + req->DatabaseName = self->Request_->GetDatabaseName().GetOrElse(""); + auto ev = new TEvTxProxySchemeCache::TEvNavigateKeySet(req.Release()); + self->Send(MakeSchemeCacheID(), ev); + } + + bool OnNavigateKeySetResult(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev, ui32 access) { + auto self = static_cast<TDerived*>(this); + TEvTxProxySchemeCache::TEvNavigateKeySetResult* res = ev->Get(); + NSchemeCache::TSchemeCacheNavigate *request = res->Request.Get(); + + auto ctx = TActivationContext::AsActorContext(); + + if (res->Request->ResultSet.size() != 1) { + self->Reply(StatusIds::INTERNAL_ERROR, "Received an incorrect answer from SchemeCache.", NKikimrIssues::TIssuesIds::UNEXPECTED, ctx); + return false; + } + + switch (request->ResultSet[0].Status) { + case NSchemeCache::TSchemeCacheNavigate::EStatus::Ok: + break; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RootUnknown: + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown: + self->Reply(StatusIds::SCHEME_ERROR, "Path isn't exist.", NKikimrIssues::TIssuesIds::PATH_NOT_EXIST, ctx); + return false; + case NSchemeCache::TSchemeCacheNavigate::EStatus::LookupError: + case NSchemeCache::TSchemeCacheNavigate::EStatus::RedirectLookupError: + self->Reply(StatusIds::UNAVAILABLE, "Database resolve failed with no certain result.", NKikimrIssues::TIssuesIds::RESOLVE_LOOKUP_ERROR, ctx); + return false; + default: + self->Reply(StatusIds::UNAVAILABLE, "Resolve error", NKikimrIssues::TIssuesIds::GENERIC_RESOLVE_ERROR, ctx); + return false; + } + + if (!self->CheckAccess(CanonizePath(res->Request->ResultSet[0].Path), res->Request->ResultSet[0].SecurityObject, access)) { + return false; + } + if (!request->ResultSet[0].SolomonVolumeInfo) { + self->Reply(StatusIds::SCHEME_ERROR, "Table isn't keyvalue.", NKikimrIssues::TIssuesIds::DEFAULT_ERROR, ctx); + return false; + } + + return true; + } + + bool CheckAccess(const TString& path, TIntrusivePtr<TSecurityObject> securityObject, ui32 access) { + auto self = static_cast<TDerived*>(this); + if (!UserToken || !securityObject) { + return true; + } + + if (securityObject->CheckAccess(access, *UserToken)) { + return true; + } + + self->Reply(Ydb::StatusIds::UNAUTHORIZED, + TStringBuilder() << "Access denied" + << ": for# " << UserToken->GetUserSID() + << ", path# " << path + << ", access# " << NACLib::AccessRightsToString(access), + NKikimrIssues::TIssuesIds::ACCESS_DENIED, + TActivationContext::AsActorContext()); + return false; + } + +private: + TIntrusiveConstPtr<NACLib::TUserToken> UserToken; +}; + +class TDescribeVolumeRequest + : public TRpcOperationRequestActor<TDescribeVolumeRequest, TEvDescribeVolumeKeyValueRequest> + , public TBaseKeyValueRequest<TDescribeVolumeRequest> +{ +public: + using TBase = TRpcOperationRequestActor<TDescribeVolumeRequest, TEvDescribeVolumeKeyValueRequest>; + using TBase::TBase; + + friend class TBaseKeyValueRequest<TDescribeVolumeRequest>; + + void Bootstrap(const TActorContext& ctx) { + TBase::Bootstrap(ctx); + OnBootstrap(); + Become(&TDescribeVolumeRequest::StateFunc); + } + + +protected: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + default: + return TBase::StateFuncBase(ev, ctx); + } + } + + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) { + TEvTxProxySchemeCache::TEvNavigateKeySetResult* res = ev->Get(); + NSchemeCache::TSchemeCacheNavigate *request = res->Request.Get(); + + if (!OnNavigateKeySetResult(ev, NACLib::DescribeSchema)) { + return; + } + + const NKikimrSchemeOp::TSolomonVolumeDescription &desc = request->ResultSet[0].SolomonVolumeInfo->Description; + Ydb::KeyValue::DescribeVolumeResult result; + result.set_path(this->GetProtoRequest()->path()); + result.set_partition_count(desc.PartitionsSize()); + this->ReplyWithResult(Ydb::StatusIds::SUCCESS, result, TActivationContext::AsActorContext()); + } + + bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) { + return true; + } + +private: + TIntrusiveConstPtr<NACLib::TUserToken> UserToken; +}; + + +class TListLocalPartitionsRequest + : public TRpcOperationRequestActor<TListLocalPartitionsRequest, TEvListLocalPartitionsKeyValueRequest> + , public TBaseKeyValueRequest<TListLocalPartitionsRequest> +{ +public: + using TBase = TRpcOperationRequestActor<TListLocalPartitionsRequest, TEvListLocalPartitionsKeyValueRequest>; + using TBase::TBase; + + friend class TBaseKeyValueRequest<TListLocalPartitionsRequest>; + + void Bootstrap(const TActorContext& ctx) { + TBase::Bootstrap(ctx); + OnBootstrap(); + Become(&TListLocalPartitionsRequest::StateFunc); + } + +protected: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + hFunc(TEvLocal::TEvEnumerateTabletsResult, Handle); + default: + return TBase::StateFuncBase(ev, ctx); + } + } + + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) { + TEvTxProxySchemeCache::TEvNavigateKeySetResult* res = ev->Get(); + NSchemeCache::TSchemeCacheNavigate *request = res->Request.Get(); + + if (!OnNavigateKeySetResult(ev, NACLib::DescribeSchema)) { + return; + } + + const NKikimrSchemeOp::TSolomonVolumeDescription &desc = request->ResultSet[0].SolomonVolumeInfo->Description; + for (const NKikimrSchemeOp::TSolomonVolumeDescription::TPartition &partition : desc.GetPartitions()) { + TabletIdToPartitionId[partition.GetTabletId()] = partition.GetPartitionId(); + } + + if (TabletIdToPartitionId.empty()) { + Ydb::KeyValue::ListLocalPartitionsResult result; + result.set_path(this->GetProtoRequest()->path()); + result.set_node_id(SelfId().NodeId()); + this->ReplyWithResult(Ydb::StatusIds::SUCCESS, result, TActivationContext::AsActorContext()); + return; + } + + SendRequest(); + } + + TActorId MakeLocalRegistrarID() { + auto &ctx = TActivationContext::AsActorContext(); + auto &domainsInfo = *AppData(ctx)->DomainsInfo; + auto domainIt = domainsInfo.Domains.find(1); + if (domainIt == domainsInfo.Domains.end()) { + TActorId invalidId; + return invalidId; + } + auto &rec = *this->GetProtoRequest(); + ui32 nodeId = rec.node_id() ? rec.node_id() : ctx.SelfID.NodeId(); + ui32 hiveUid = domainsInfo.GetDefaultHiveUid(1); + ui64 hiveId = domainsInfo.GetHive(hiveUid); + return ::NKikimr::MakeLocalRegistrarID(nodeId, hiveId); + } + + TEvLocal::TEvEnumerateTablets* MakeRequest() { + return new TEvLocal::TEvEnumerateTablets(TTabletTypes::KeyValue); + } + + void SendRequest() { + Send(MakeLocalRegistrarID(), MakeRequest(), IEventHandle::FlagTrackDelivery, 0); + } + + void Handle(TEvLocal::TEvEnumerateTabletsResult::TPtr &ev) { + const NKikimrLocal::TEvEnumerateTabletsResult &record = ev->Get()->Record; + if (!record.HasStatus() || record.GetStatus() != NKikimrProto::OK) { + this->Reply(StatusIds::INTERNAL_ERROR, "Received an incorrect answer from Local.", NKikimrIssues::TIssuesIds::UNEXPECTED, TActivationContext::AsActorContext()); + return; + } + + Ydb::KeyValue::ListLocalPartitionsResult result; + result.set_path(this->GetProtoRequest()->path()); + result.set_node_id(SelfId().NodeId()); + for (auto &item : record.GetTabletInfo()) { + if (!item.HasTabletId()) { + continue; + } + auto it = TabletIdToPartitionId.find(item.GetTabletId()); + if (it != TabletIdToPartitionId.end()) { + result.add_partition_ids(it->second); + } + } + this->ReplyWithResult(Ydb::StatusIds::SUCCESS, result, TActivationContext::AsActorContext()); + } + + bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) { + return true; + } + +private: + THashMap<ui64, ui64> TabletIdToPartitionId; +}; + + +template <typename TDerived, typename TRequest, typename TResultRecord, typename TKVRequest> +class TKeyValueRequestGrpc + : public TRpcOperationRequestActor<TDerived, TRequest> + , public TBaseKeyValueRequest<TKeyValueRequestGrpc<TDerived, TRequest, TResultRecord, TKVRequest>> +{ +public: + using TBase = TRpcOperationRequestActor<TDerived, TRequest>; + using TBase::TBase; + + friend class TBaseKeyValueRequest<TKeyValueRequestGrpc<TDerived, TRequest, TResultRecord, TKVRequest>>; + + void Bootstrap(const TActorContext& ctx) { + TBase::Bootstrap(ctx); + this->OnBootstrap(); + this->Become(&TKeyValueRequestGrpc::StateFunc); + } + + +protected: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTabletPipe::TEvClientConnected, Handle); + hFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + hFunc(TKVRequest::TResponse, Handle); + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + default: + return TBase::StateFuncBase(ev, ctx); + } + } + + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) { + TEvTxProxySchemeCache::TEvNavigateKeySetResult* res = ev->Get(); + NSchemeCache::TSchemeCacheNavigate *request = res->Request.Get(); + + if (!this->OnNavigateKeySetResult(ev, static_cast<TDerived*>(this)->GetRequiredAccessRights())) { + return; + } + + auto &rec = *this->GetProtoRequest(); + const NKikimrSchemeOp::TSolomonVolumeDescription &desc = request->ResultSet[0].SolomonVolumeInfo->Description; + + if (rec.partition_id() >= desc.PartitionsSize()) { + this->Reply(StatusIds::SCHEME_ERROR, "The partition wasn't found. Partition ID was larger or equal partition count.", NKikimrIssues::TIssuesIds::DEFAULT_ERROR, TActivationContext::AsActorContext()); + return; + } + + ui64 partitionId = rec.partition_id(); + if (const auto &partition = desc.GetPartitions(rec.partition_id()); partition.GetPartitionId() == partitionId) { + KVTabletId = partition.GetTabletId(); + } else { + Y_VERIFY_DEBUG(false); + for (const NKikimrSchemeOp::TSolomonVolumeDescription::TPartition &partition : desc.GetPartitions()) { + if (partition.GetPartitionId() == partitionId) { + KVTabletId = partition.GetTabletId(); + break; + } + } + } + + if (!KVTabletId) { + this->Reply(StatusIds::INTERNAL_ERROR, "Partition wasn't found.", NKikimrIssues::TIssuesIds::DEFAULT_ERROR, TActivationContext::AsActorContext()); + return; + } + + CreatePipe(); + SendRequest(); + } + + void SendRequest() { + std::unique_ptr<TKVRequest> req = std::make_unique<TKVRequest>(); + auto &rec = *this->GetProtoRequest(); + CopyProtobuf(rec, &req->Record); + req->Record.set_tablet_id(KVTabletId); + NTabletPipe::SendData(this->SelfId(), KVPipeClient, req.release(), 0); + } + + void Handle(typename TKVRequest::TResponse::TPtr &ev) { + TResultRecord result; + CopyProtobuf(ev->Get()->Record, &result); + auto status = PullStatus(ev->Get()->Record); + this->ReplyWithResult(status, result, TActivationContext::AsActorContext()); + } + + NTabletPipe::TClientConfig GetPipeConfig() { + NTabletPipe::TClientConfig cfg; + cfg.RetryPolicy = { + .RetryLimitCount = 3u + }; + return cfg; + } + + void CreatePipe() { + KVPipeClient = this->Register(NTabletPipe::CreateClient(this->SelfId(), KVTabletId, GetPipeConfig())); + } + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { + if (ev->Get()->Status != NKikimrProto::OK) { + this->Reply(StatusIds::UNAVAILABLE, "Failed to connect to coordination node.", NKikimrIssues::TIssuesIds::SHARD_NOT_AVAILABLE, TActivationContext::AsActorContext()); + } + } + + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr&) { + this->Reply(StatusIds::UNAVAILABLE, "Connection to coordination node was lost.", NKikimrIssues::TIssuesIds::SHARD_NOT_AVAILABLE, TActivationContext::AsActorContext()); + } + + virtual bool ValidateRequest(Ydb::StatusIds::StatusCode& status, NYql::TIssues& issues) = 0; + + void PassAway() override { + if (KVPipeClient) { + NTabletPipe::CloseClient(this->SelfId(), KVPipeClient); + KVPipeClient = {}; + } + TBase::PassAway(); + } + +protected: + ui64 KVTabletId = 0; + TActorId KVPipeClient; +}; + +class TAcquireLockRequest + : public TKeyValueRequestGrpc<TAcquireLockRequest, TEvAcquireLockKeyValueRequest, + Ydb::KeyValue::AcquireLockResult, TEvKeyValue::TEvAcquireLock> +{ +public: + using TBase = TKeyValueRequestGrpc<TAcquireLockRequest, TEvAcquireLockKeyValueRequest, + Ydb::KeyValue::AcquireLockResult, TEvKeyValue::TEvAcquireLock>; + using TBase::TBase; + + bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override { + return true; + } + NACLib::EAccessRights GetRequiredAccessRights() const { + return NACLib::UpdateRow; + } +}; + + +class TExecuteTransactionRequest + : public TKeyValueRequestGrpc<TExecuteTransactionRequest, TEvExecuteTransactionKeyValueRequest, + Ydb::KeyValue::ExecuteTransactionResult, TEvKeyValue::TEvExecuteTransaction> { +public: + using TBase = TKeyValueRequestGrpc<TExecuteTransactionRequest, TEvExecuteTransactionKeyValueRequest, + Ydb::KeyValue::ExecuteTransactionResult, TEvKeyValue::TEvExecuteTransaction>; + using TBase::TBase; + + bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override { + return true; + } + + NACLib::EAccessRights GetRequiredAccessRights() const { + ui32 accessRights = 0; + auto &rec = *this->GetProtoRequest(); + for (auto &command : rec.commands()) { + if (command.has_delete_range()) { + accessRights |= NACLib::EraseRow; + } + if (command.has_rename()) { + accessRights |= NACLib::UpdateRow | NACLib::EraseRow; + } + if (command.has_copy_range()) { + accessRights |= NACLib::UpdateRow; + } + if (command.has_concat() && !command.concat().keep_inputs()) { + accessRights |= NACLib::UpdateRow | NACLib::EraseRow; + } + if (command.has_concat() && command.concat().keep_inputs()) { + accessRights |= NACLib::UpdateRow; + } + if (command.has_write()) { + accessRights |= NACLib::UpdateRow; + } + } + return static_cast<NACLib::EAccessRights>(accessRights); + } +}; + +class TReadRequest + : public TKeyValueRequestGrpc<TReadRequest, TEvReadKeyValueRequest, + Ydb::KeyValue::ReadResult, TEvKeyValue::TEvRead> { +public: + using TBase = TKeyValueRequestGrpc<TReadRequest, TEvReadKeyValueRequest, + Ydb::KeyValue::ReadResult, TEvKeyValue::TEvRead>; + using TBase::TBase; + using TBase::Handle; + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + default: + return TBase::StateFunc(ev, ctx); + } + } + bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override { + return true; + } + NACLib::EAccessRights GetRequiredAccessRights() const { + return NACLib::SelectRow; + } +}; + +class TReadRangeRequest + : public TKeyValueRequestGrpc<TReadRangeRequest, TEvReadRangeKeyValueRequest, + Ydb::KeyValue::ReadRangeResult, TEvKeyValue::TEvReadRange> { +public: + using TBase = TKeyValueRequestGrpc<TReadRangeRequest, TEvReadRangeKeyValueRequest, + Ydb::KeyValue::ReadRangeResult, TEvKeyValue::TEvReadRange>; + using TBase::TBase; + using TBase::Handle; + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + default: + return TBase::StateFunc(ev, ctx); + } + } + bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override { + return true; + } + NACLib::EAccessRights GetRequiredAccessRights() const { + return NACLib::SelectRow; + } +}; + +class TListRangeRequest + : public TKeyValueRequestGrpc<TListRangeRequest, TEvListRangeKeyValueRequest, + Ydb::KeyValue::ListRangeResult, TEvKeyValue::TEvReadRange> { +public: + using TBase = TKeyValueRequestGrpc<TListRangeRequest, TEvListRangeKeyValueRequest, + Ydb::KeyValue::ListRangeResult, TEvKeyValue::TEvReadRange>; + using TBase::TBase; + using TBase::Handle; + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + default: + return TBase::StateFunc(ev, ctx); + } + } + bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override { + return true; + } + NACLib::EAccessRights GetRequiredAccessRights() const { + return NACLib::SelectRow; + } +}; + +class TGetStorageChannelStatusRequest + : public TKeyValueRequestGrpc<TGetStorageChannelStatusRequest, TEvGetStorageChannelStatusKeyValueRequest, + Ydb::KeyValue::GetStorageChannelStatusResult, TEvKeyValue::TEvGetStorageChannelStatus> { +public: + using TBase = TKeyValueRequestGrpc<TGetStorageChannelStatusRequest, TEvGetStorageChannelStatusKeyValueRequest, + Ydb::KeyValue::GetStorageChannelStatusResult, TEvKeyValue::TEvGetStorageChannelStatus>; + using TBase::TBase; + using TBase::Handle; + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + default: + return TBase::StateFunc(ev, ctx); + } + } + bool ValidateRequest(Ydb::StatusIds::StatusCode& /*status*/, NYql::TIssues& /*issues*/) override { + return true; + } + NACLib::EAccessRights GetRequiredAccessRights() const { + return NACLib::DescribeSchema; + } +}; + +} + + +void DoCreateVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TCreateVolumeRequest(p.release())); +} + +void DoDropVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TDropVolumeRequest(p.release())); +} + +void DoAlterVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TAlterVolumeRequest(p.release())); +} + +void DoDescribeVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TDescribeVolumeRequest(p.release())); +} + +void DoListLocalPartitionsKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TListLocalPartitionsRequest(p.release())); +} + +void DoAcquireLockKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TAcquireLockRequest(p.release())); +} + +void DoExecuteTransactionKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TExecuteTransactionRequest(p.release())); +} + +void DoReadKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TReadRequest(p.release())); +} + +void DoReadRangeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TReadRangeRequest(p.release())); +} + +void DoListRangeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TListRangeRequest(p.release())); +} + +void DoGetStorageChannelStatusKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { + TActivationContext::AsActorContext().Register(new TGetStorageChannelStatusRequest(p.release())); +} + +} // namespace NKikimr::NGRpcService diff --git a/ydb/core/grpc_services/service_keyvalue.h b/ydb/core/grpc_services/service_keyvalue.h new file mode 100644 index 0000000000..9bbb430178 --- /dev/null +++ b/ydb/core/grpc_services/service_keyvalue.h @@ -0,0 +1,23 @@ +#pragma once + +#include <memory> + +namespace NKikimr::NGRpcService { + + class IRequestOpCtx; + class IFacilityProvider; + + void DoCreateVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoDropVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoAlterVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoDescribeVolumeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoListLocalPartitionsKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + + void DoAcquireLockKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoExecuteTransactionKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoReadKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoReadRangeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoListRangeKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + void DoGetStorageChannelStatusKeyValue(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); + +} // NKikimr::NGRpcService diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index 409b3594db..2154f6b56a 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -1639,6 +1639,7 @@ public: if (!vDiskInfo.GetReplicated()) { context.IssueRecords.clear(); context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, "Replication in progress", ETags::VDiskState); + storageVDiskStatus.set_overall(context.GetOverallStatus()); return; } diff --git a/ydb/core/health_check/health_check_ut.cpp b/ydb/core/health_check/health_check_ut.cpp index b402d39f2d..f7c5716209 100644 --- a/ydb/core/health_check/health_check_ut.cpp +++ b/ydb/core/health_check/health_check_ut.cpp @@ -47,8 +47,8 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } int const GROUP_START_ID = 1200; - int const VCARD_START_ID = 5500; - + int const VCARD_START_ID = 124; + void ChangeDescribeSchemeResult(TEvSchemeShard::TEvDescribeSchemeResult::TPtr* ev, ui64 size = 20000000, ui64 quota = 90000000) { auto record = (*ev)->Get()->MutableRecord(); auto pool = record->mutable_pathdescription()->mutable_domaindescription()->add_storagepools(); @@ -79,7 +79,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } }; - void AddGroupVSlotInControllerConfigResponse(TEvBlobStorage::TEvControllerConfigResponse::TPtr* ev, int groupCount, int vslotCount) { + void AddGroupVSlotInControllerConfigResponse(TEvBlobStorage::TEvControllerConfigResponse::TPtr* ev, int groupCount, int vslotCount, TString erasurespecies = NHealthCheck::TSelfCheckRequest::BLOCK_4_2) { auto& pbRecord = (*ev)->Get()->Record; auto pbConfig = pbRecord.mutable_response()->mutable_status(0)->mutable_baseconfig(); @@ -88,6 +88,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { auto vslotIdSample = pbConfig->group(0).vslotid(0); pbConfig->clear_group(); pbConfig->clear_vslot(); + pbConfig->clear_pdisk(); auto groupId = GROUP_START_ID; auto vslotId = VCARD_START_ID; @@ -96,6 +97,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { auto group = pbConfig->add_group(); group->CopyFrom(groupSample); group->set_groupid(groupId); + group->set_erasurespecies(erasurespecies); group->clear_vslotid(); for (int j = 0; j < vslotCount; j++) { @@ -134,6 +136,51 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } } + void AddVSlotInVDiskStateResponse(NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateResponse::TPtr* ev, const TVector<Ydb::Monitoring::StatusFlag::Status>& vdiskStatuses) { + auto& pbRecord = (*ev)->Get()->Record; + + auto sample = pbRecord.vdiskstateinfo(0); + pbRecord.clear_vdiskstateinfo(); + + auto groupId = GROUP_START_ID; + auto vslotId = VCARD_START_ID; + + for (auto status: vdiskStatuses) { + switch (status) { + case Ydb::Monitoring::StatusFlag::RED: { + auto state = pbRecord.add_vdiskstateinfo(); + state->CopyFrom(sample); + state->mutable_vdiskid()->set_vdisk(vslotId++); + state->mutable_vdiskid()->set_groupid(groupId); + state->set_pdiskid(100); + state->set_vdiskstate(NKikimrWhiteboard::EVDiskState::PDiskError); + break; + } + case Ydb::Monitoring::StatusFlag::BLUE: { + auto state = pbRecord.add_vdiskstateinfo(); + state->CopyFrom(sample); + state->mutable_vdiskid()->set_vdisk(vslotId++); + state->mutable_vdiskid()->set_groupid(groupId); + state->set_pdiskid(100); + state->set_vdiskstate(NKikimrWhiteboard::EVDiskState::OK); + state->set_replicated(false); + break; + } + case Ydb::Monitoring::StatusFlag::YELLOW: { + auto state = pbRecord.add_vdiskstateinfo(); + state->CopyFrom(sample); + state->mutable_vdiskid()->set_vdisk(vslotId++); + state->mutable_vdiskid()->set_groupid(groupId); + state->set_pdiskid(100); + state->set_vdiskstate(NKikimrWhiteboard::EVDiskState::SyncGuidRecovery); + break; + } + default: + break; + } + } + } + void ListingTest(int const groupNumber, int const vdiscPerGroupNumber) { TPortManager tp; ui16 port = tp.GetPort(2134); @@ -145,7 +192,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { TServer server(settings); server.EnableGRpc(grpcPort); TClient client(settings); - TTestActorRuntime &runtime = *server.GetRuntime(); + TTestActorRuntime& runtime = *server.GetRuntime(); TActorId sender = runtime.AllocateEdgeActor(); TAutoPtr<IEventHandle> handle; @@ -214,48 +261,66 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { UNIT_ASSERT_VALUES_EQUAL(issueVdiscCount, issueVdiscNumber); } - Y_UNIT_TEST(IssuesGroupsListing) { - int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; - ListingTest(groupNumber, 1); - } - - Y_UNIT_TEST(IssuesVCardListing) { - int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; - ListingTest(1, vcardNumber); - } - - Y_UNIT_TEST(IssuesGroupsVCardListing) { - int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; - int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; - ListingTest(groupNumber, vcardNumber); - } - - Y_UNIT_TEST(IssuesGroupsMerging) { - int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; - ListingTest(groupNumber, 1); - } - - Y_UNIT_TEST(IssuesVCardMerging) { - int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; - ListingTest(1, vcardNumber); - } + void CheckGroupStatusDependsOnVdisks(TString erasurespecies, const Ydb::Monitoring::StatusFlag::Status expectiongGroupStatus, const TVector<Ydb::Monitoring::StatusFlag::Status>& vdiskStatuses) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(2) + .SetUseRealThreads(false) + .SetDomainName("Root"); + TServer server(settings); + server.EnableGRpc(grpcPort); + TClient client(settings); + TTestActorRuntime& runtime = *server.GetRuntime(); - Y_UNIT_TEST(IssuesGroupsVCardMerging) { - int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; - int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; - ListingTest(groupNumber, vcardNumber); - } + TActorId sender = runtime.AllocateEdgeActor(); + TAutoPtr<IEventHandle> handle; - Y_UNIT_TEST(IssuesGroupsDeleting) { - ListingTest(100, 1); - } + auto observerFunc = [&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) { + switch (ev->GetTypeRewrite()) { + case TEvSchemeShard::EvDescribeSchemeResult: { + auto *x = reinterpret_cast<NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr*>(&ev); + ChangeDescribeSchemeResult(x); + break; + } + case TEvBlobStorage::EvControllerSelectGroupsResult: { + auto *x = reinterpret_cast<TEvBlobStorage::TEvControllerSelectGroupsResult::TPtr*>(&ev); + AddGroupsInControllerSelectGroupsResult(x, 1); + break; + } + case TEvBlobStorage::EvControllerConfigResponse: { + auto *x = reinterpret_cast<TEvBlobStorage::TEvControllerConfigResponse::TPtr*>(&ev); + AddGroupVSlotInControllerConfigResponse(x, 1, vdiskStatuses.size(), erasurespecies); + break; + } + case NNodeWhiteboard::TEvWhiteboard::EvVDiskStateResponse: { + auto *x = reinterpret_cast<NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateResponse::TPtr*>(&ev); + AddVSlotInVDiskStateResponse(x, vdiskStatuses); + break; + } + case NNodeWhiteboard::TEvWhiteboard::EvPDiskStateResponse: { + auto *x = reinterpret_cast<NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateResponse::TPtr*>(&ev); + (*x)->Get()->Record.clear_pdiskstateinfo(); + break; + } + } - Y_UNIT_TEST(IssuesVCardDeleting) { - ListingTest(1, 100); - } + return TTestActorRuntime::EEventAction::PROCESS; + }; + runtime.SetObserverFunc(observerFunc); - Y_UNIT_TEST(IssuesGroupsVCardDeleting) { - ListingTest(100, 100); + auto *request = new NHealthCheck::TEvSelfCheckRequest; + runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0)); + NHealthCheck::TEvSelfCheckResult* result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle); + int groupIssuesCount = 0; + for (const auto& issue_log : result->Result.Getissue_log()) { + if (issue_log.type() == "STORAGE_GROUP" && issue_log.location().storage().pool().name() == "/Root:test") { + UNIT_ASSERT_VALUES_EQUAL((int)issue_log.status(), (int)expectiongGroupStatus); + groupIssuesCount++; + } + } + UNIT_ASSERT_VALUES_EQUAL(groupIssuesCount, 1); } void ChangeUsageDescribeSchemeResult(TEvSchemeShard::TEvDescribeSchemeResult::TPtr* ev, ui64 size, ui64 quota) { @@ -280,7 +345,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { TServer server(settings); server.EnableGRpc(grpcPort); TClient client(settings); - TTestActorRuntime &runtime = *server.GetRuntime(); + TTestActorRuntime& runtime = *server.GetRuntime(); TActorId sender = runtime.AllocateEdgeActor(); TAutoPtr<IEventHandle> handle; @@ -312,6 +377,118 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { UNIT_ASSERT_VALUES_EQUAL(storageIssuesCount, storageIssuesNumber); } + Y_UNIT_TEST(IssuesGroupsListing) { + int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; + ListingTest(groupNumber, 1); + } + + Y_UNIT_TEST(IssuesVCardListing) { + int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; + ListingTest(1, vcardNumber); + } + + Y_UNIT_TEST(IssuesGroupsVCardListing) { + int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; + int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; + ListingTest(groupNumber, vcardNumber); + } + + Y_UNIT_TEST(IssuesGroupsMerging) { + int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; + ListingTest(groupNumber, 1); + } + + Y_UNIT_TEST(IssuesVCardMerging) { + int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; + ListingTest(1, vcardNumber); + } + + Y_UNIT_TEST(IssuesGroupsVCardMerging) { + int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; + int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; + ListingTest(groupNumber, vcardNumber); + } + + Y_UNIT_TEST(IssuesGroupsDeleting) { + ListingTest(100, 1); + } + + Y_UNIT_TEST(IssuesVCardDeleting) { + ListingTest(1, 100); + } + + Y_UNIT_TEST(IssuesGroupsVCardDeleting) { + ListingTest(100, 100); + } + + Y_UNIT_TEST(NoneRedGroupWhenRedVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::NONE, Ydb::Monitoring::StatusFlag::RED, {Ydb::Monitoring::StatusFlag::RED}); + } + + Y_UNIT_TEST(NoneRedGroupWhenBlueVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::NONE, Ydb::Monitoring::StatusFlag::RED, {Ydb::Monitoring::StatusFlag::BLUE}); + } + + Y_UNIT_TEST(NoneYellowGroupWhenYellowVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::NONE, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::YELLOW}); + } + + Y_UNIT_TEST(Block42RedGroupWhen3RedVdisks) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::RED, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED}); + } + + Y_UNIT_TEST(Block42RedGroupWhen2RedBlueVdisks) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::RED, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::BLUE}); + } + + Y_UNIT_TEST(Block42OrangeGroupWhen2RedVdisks) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::ORANGE, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED}); + } + + Y_UNIT_TEST(Block42OrangeGroupWhenRedBlueVdisks) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::ORANGE, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::BLUE}); + } + + Y_UNIT_TEST(Block42YellowGroupWhenRedVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED}); + } + + Y_UNIT_TEST(Block42BlueGroupWhenBlueVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::BLUE, {Ydb::Monitoring::StatusFlag::BLUE}); + } + + Y_UNIT_TEST(Block42YellowGroupWhenYellowVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::BLOCK_4_2, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::YELLOW}); + } + + Y_UNIT_TEST(Mirrot3dcYellowGroupWhen3RedVdisks) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED}); + } + + Y_UNIT_TEST(Mirrot3dcYellowGroupWhen2RedBlueVdisks) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::BLUE}); + } + + Y_UNIT_TEST(Mirrot3dcYellowGroupWhen2RedVdisks) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::RED}); + } + + Y_UNIT_TEST(Mirrot3dcYellowGroupWhenRedBlueVdisks) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED, Ydb::Monitoring::StatusFlag::BLUE}); + } + + Y_UNIT_TEST(Mirrot3dcYellowGroupWhenRedVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::RED}); + } + + Y_UNIT_TEST(Mirrot3dcBlueGroupWhenBlueVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::BLUE, {Ydb::Monitoring::StatusFlag::BLUE}); + } + + Y_UNIT_TEST(Mirrot3dcYellowGroupWhenYellowVdisk) { + CheckGroupStatusDependsOnVdisks(NHealthCheck::TSelfCheckRequest::MIRROR_3_DC, Ydb::Monitoring::StatusFlag::YELLOW, {Ydb::Monitoring::StatusFlag::YELLOW}); + } + Y_UNIT_TEST(StorageLimit95) { StorageTest(95, 100, 1, Ydb::Monitoring::StatusFlag::RED); } diff --git a/ydb/core/http_proxy/custom_metrics.h b/ydb/core/http_proxy/custom_metrics.h index 089e9190ac..a25a2989c2 100644 --- a/ydb/core/http_proxy/custom_metrics.h +++ b/ydb/core/http_proxy/custom_metrics.h @@ -28,12 +28,12 @@ TVector<std::pair<TString, TString>> BuildLabels(const TString& method, const TH {"name", name}}; } if (method.empty()) { - return {{"database", httpContext.DatabaseName}, {"cloud_id", httpContext.CloudId}, + return {{"database", httpContext.DatabasePath}, {"cloud_id", httpContext.CloudId}, {"folder_id", httpContext.FolderId}, {"database_id", httpContext.DatabaseId}, {"topic", httpContext.StreamName}, {"name", name}}; } - return {{"database", httpContext.DatabaseName}, {"method", method}, {"cloud_id", httpContext.CloudId}, + return {{"database", httpContext.DatabasePath}, {"method", method}, {"cloud_id", httpContext.CloudId}, {"folder_id", httpContext.FolderId}, {"database_id", httpContext.DatabaseId}, {"topic", httpContext.StreamName}, {"name", name}}; } diff --git a/ydb/core/http_proxy/events.h b/ydb/core/http_proxy/events.h index 4acb6376ed..010a18e73b 100644 --- a/ydb/core/http_proxy/events.h +++ b/ydb/core/http_proxy/events.h @@ -41,7 +41,6 @@ namespace NKikimr::NHttpProxy { EvUpdateDatabasesEvent, EvListEndpointsRequest, EvListEndpointsResponse, - EvError, EvErrorWithIssue, EvCounter, EvHistCounter, @@ -120,10 +119,13 @@ namespace NKikimr::NHttpProxy { TString SerializedUserToken; - TEvToken(const TString& serviceAccountId, const TString& iamToken, const TString& serializedUserToken = "") + TDatabase Database; + + TEvToken(const TString& serviceAccountId, const TString& iamToken, const TString& serializedUserToken, const TDatabase& database) : ServiceAccountId(serviceAccountId) , IamToken(iamToken) , SerializedUserToken(serializedUserToken) + , Database(database) {} }; @@ -131,25 +133,17 @@ namespace NKikimr::NHttpProxy { TEvClientReady() {} }; - struct TEvError : public TEventLocal<TEvError, EvError> { - NYdb::EStatus Status; - TString Response; - - TEvError(const NYdb::EStatus status, const TString& response) - : Status(status) - , Response(response) - {} - }; - struct TEvErrorWithIssue : public TEventLocal<TEvErrorWithIssue, EvErrorWithIssue> { NYdb::EStatus Status; size_t IssueCode; TString Response; + TDatabase Database; - TEvErrorWithIssue(const NYdb::EStatus status, const TString& response, size_t issueCode=0) + TEvErrorWithIssue(const NYdb::EStatus status, const TString& response, const TDatabase& database, size_t issueCode) : Status(status) , IssueCode(issueCode) , Response(response) + , Database(database) {} }; }; diff --git a/ydb/core/http_proxy/http_req.cpp b/ydb/core/http_proxy/http_req.cpp index 2ca59fbb56..f67688f5da 100644 --- a/ydb/core/http_proxy/http_req.cpp +++ b/ydb/core/http_proxy/http_req.cpp @@ -143,17 +143,17 @@ namespace NKikimr::NHttpProxy { } template<class TProto> - TString TruncateStreamName(const TProto& req, const TString& database) + TString TruncateStreamName(const TProto& req, const TString& databasePath) { constexpr bool has_stream_name = requires(const TProto& t) { t.stream_name(); }; if constexpr (has_stream_name) { - Y_VERIFY(req.stream_name().StartsWith(database)); - return req.stream_name().substr(database.size(), -1); + Y_VERIFY(req.stream_name().StartsWith(databasePath)); + return req.stream_name().substr(databasePath.size(), -1); } - return ExtractStreamNameWithoutProtoField<TProto>(req).substr(database.size(), -1); + return ExtractStreamNameWithoutProtoField<TProto>(req).substr(databasePath.size(), -1); } constexpr TStringBuf IAM_HEADER = "x-yacloud-subjecttoken"; @@ -231,7 +231,6 @@ namespace NKikimr::NHttpProxy { HFunc(TEvents::TEvWakeup, HandleTimeout); HFunc(TEvServerlessProxy::TEvClientReady, HandleClientReady); HFunc(TEvServerlessProxy::TEvDiscoverDatabaseEndpointResult, Handle); - HFunc(TEvServerlessProxy::TEvError, HandleError); HFunc(TEvServerlessProxy::TEvErrorWithIssue, HandleErrorWithIssue); HFunc(TEvServerlessProxy::TEvGrpcRequestResult, HandleGrpcResponse); HFunc(TEvServerlessProxy::TEvToken, HandleToken); @@ -247,7 +246,7 @@ namespace NKikimr::NHttpProxy { RequestState = StateAuthorization; auto request = MakeHolder<TEvServerlessProxy::TEvDiscoverDatabaseEndpointRequest>(); - request->DatabasePath = HttpContext.DatabaseName; + request->DatabasePath = HttpContext.DatabasePath; ctx.Send(MakeTenantDiscoveryID(), std::move(request)); } @@ -256,17 +255,17 @@ namespace NKikimr::NHttpProxy { RequestState = StateListEndpoints; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "create client to '" << HttpContext.DiscoveryEndpoint << - "' database: '" << HttpContext.DatabaseName << + "' database: '" << HttpContext.DatabasePath << "' iam token size: " << HttpContext.IamToken.size()); auto clientSettings = NYdb::TCommonClientSettings() .DiscoveryEndpoint(HttpContext.DiscoveryEndpoint) - .Database(HttpContext.DatabaseName) + .Database(HttpContext.DatabasePath) .AuthToken(HttpContext.IamToken) .DiscoveryMode(NYdb::EDiscoveryMode::Async); - if (!HttpContext.DatabaseName.empty() && !HttpContext.ServiceConfig.GetTestMode()) { - clientSettings.Database(HttpContext.DatabaseName); + if (!HttpContext.DatabasePath.empty() && !HttpContext.ServiceConfig.GetTestMode()) { + clientSettings.Database(HttpContext.DatabasePath); } Y_VERIFY(!Client); Client.Reset(new TDataStreamsClient(*HttpContext.Driver, clientSettings)); @@ -285,10 +284,10 @@ namespace NKikimr::NHttpProxy { RequestState = StateGrpcRequest; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "sending grpc request to '" << HttpContext.DiscoveryEndpoint << - "' database: '" << HttpContext.DatabaseName << + "' database: '" << HttpContext.DatabasePath << "' iam token size: " << HttpContext.IamToken.size()); - RpcFuture = NRpcService::DoLocalRpc<TRpcEv>(std::move(Request), HttpContext.DatabaseName, + RpcFuture = NRpcService::DoLocalRpc<TRpcEv>(std::move(Request), HttpContext.DatabasePath, HttpContext.SerializedUserToken, ctx.ActorSystem()); RpcFuture.Subscribe([actorId = ctx.SelfID, actorSystem = ctx.ActorSystem()] (const NThreading::TFuture<TProtoResponse>& future) { @@ -313,7 +312,7 @@ namespace NKikimr::NHttpProxy { RequestState = StateGrpcRequest; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "sending grpc request to '" << HttpContext.DiscoveryEndpoint << - "' database: '" << HttpContext.DatabaseName << + "' database: '" << HttpContext.DatabasePath << "' iam token size: " << HttpContext.IamToken.size()); Y_VERIFY(Client); @@ -345,6 +344,23 @@ namespace NKikimr::NHttpProxy { Y_UNUSED(ev, ctx); } + void TryUpdateDbInfo(const TDatabase& db, const TActorContext& ctx) { + if (db.Path) { + HttpContext.DatabasePath = db.Path; + HttpContext.DatabaseId = db.Id; + HttpContext.CloudId = db.CloudId; + HttpContext.FolderId = db.FolderId; + if (ExtractStreamName<TProtoRequest>(Request).StartsWith(HttpContext.DatabasePath + "/")) { + HttpContext.StreamName = + TruncateStreamName<TProtoRequest>(Request, HttpContext.DatabasePath + "/"); + } else { + HttpContext.StreamName = ExtractStreamName<TProtoRequest>(Request); + } + + } + ReportInputCounters(ctx); + } + void HandleToken(TEvServerlessProxy::TEvToken::TPtr& ev, const TActorContext& ctx) { HttpContext.ServiceAccountId = ev->Get()->ServiceAccountId; HttpContext.IamToken = ev->Get()->IamToken; @@ -353,15 +369,14 @@ namespace NKikimr::NHttpProxy { if (HttpContext.Driver) { SendYdbDriverRequest(ctx); } else { + TryUpdateDbInfo(ev->Get()->Database, ctx); SendGrpcRequestNoDriver(ctx); } } - void HandleError(TEvServerlessProxy::TEvError::TPtr& ev, const TActorContext& ctx) { - ReplyWithError(ctx, ev->Get()->Status, ev->Get()->Response); - } void HandleErrorWithIssue(TEvServerlessProxy::TEvErrorWithIssue::TPtr& ev, const TActorContext& ctx) { + TryUpdateDbInfo(ev->Get()->Database, ctx); ReplyWithError(ctx, ev->Get()->Status, ev->Get()->Response, ev->Get()->IssueCode); } @@ -377,6 +392,20 @@ namespace NKikimr::NHttpProxy { {"code", TStringBuilder() << (int)MapToException(status, Method, issueCode).second}, {"name", "api.http.errors_per_second"}} }); + + ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{ + 1, true, true, + {{"database", HttpContext.DatabasePath}, + {"method", Method}, + {"cloud_id", HttpContext.CloudId}, + {"folder_id", HttpContext.FolderId}, + {"database_id", HttpContext.DatabaseId}, + {"topic", HttpContext.StreamName}, + {"code", TStringBuilder() << (int)MapToException(status, Method, issueCode).second}, + {"name", "api.http.data_streams.response.count"}} + }); + HttpContext.ResponseData.Status = status; HttpContext.ResponseData.ErrorText = errorText; HttpContext.DoReply(ctx, issueCode); @@ -386,6 +415,24 @@ namespace NKikimr::NHttpProxy { TBase::Die(ctx); } + void ReportInputCounters(const TActorContext& ctx) { + + if (InputCountersReported) { + return; + } + InputCountersReported = true; + + FillInputCustomMetrics<TProtoRequest>(Request, HttpContext, ctx); + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{1, true, true, + BuildLabels(Method, HttpContext, "api.http.requests_per_second", setStreamPrefix) + }); + ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{1, true, true, + BuildLabels(Method, HttpContext, "api.http.data_streams.request.count") + }); + } + void Handle(TEvServerlessProxy::TEvDiscoverDatabaseEndpointResult::TPtr ev, const TActorContext& ctx) { if (ev->Get()->DatabaseInfo) { @@ -394,25 +441,15 @@ namespace NKikimr::NHttpProxy { HttpContext.CloudId = db->CloudId; HttpContext.DatabaseId = db->Id; HttpContext.DiscoveryEndpoint = db->Endpoint; - HttpContext.DatabaseName = db->Path; + HttpContext.DatabasePath = db->Path; - if (ExtractStreamName<TProtoRequest>(Request).StartsWith(HttpContext.DatabaseName + "/")) { + if (ExtractStreamName<TProtoRequest>(Request).StartsWith(HttpContext.DatabasePath + "/")) { HttpContext.StreamName = - TruncateStreamName<TProtoRequest>(Request, HttpContext.DatabaseName + "/"); + TruncateStreamName<TProtoRequest>(Request, HttpContext.DatabasePath + "/"); } else { HttpContext.StreamName = ExtractStreamName<TProtoRequest>(Request); } - - FillInputCustomMetrics<TProtoRequest>(Request, HttpContext, ctx); - /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), - new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels(Method, HttpContext, "api.http.requests_per_second", setStreamPrefix) - }); - ctx.Send(MakeMetricsServiceID(), - new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels(Method, HttpContext, "api.http.data_streams.request.count") - }); - //TODO: add api.http.request.count + ReportInputCounters(ctx); CreateClient(ctx); return; } @@ -440,7 +477,6 @@ namespace NKikimr::NHttpProxy { FillOutputCustomMetrics<TProtoResult>( *(dynamic_cast<TProtoResult*>(ev->Get()->Message.Get())), HttpContext, ctx); ReportLatencyCounters(ctx); - /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{1, true, true, BuildLabels(Method, HttpContext, "api.http.success_per_second", setStreamPrefix) @@ -448,7 +484,7 @@ namespace NKikimr::NHttpProxy { ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{ 1, true, true, - {{"database", HttpContext.DatabaseName}, + {{"database", HttpContext.DatabasePath}, {"method", Method}, {"cloud_id", HttpContext.CloudId}, {"folder_id", HttpContext.FolderId}, @@ -505,17 +541,17 @@ namespace NKikimr::NHttpProxy { } catch (const std::exception& e) { LOG_SP_WARN_S(ctx, NKikimrServices::HTTP_PROXY, "got new request with incorrect json from [" << HttpContext.SourceAddress << "] " << - "database '" << HttpContext.DatabaseName << "'"); + "database '" << HttpContext.DatabasePath << "'"); return ReplyWithError(ctx, NYdb::EStatus::BAD_REQUEST, e.what(), static_cast<size_t>(NYds::EErrorCodes::INVALID_ARGUMENT)); } - if (HttpContext.DatabaseName.empty()) { - HttpContext.DatabaseName = ExtractStreamName<TProtoRequest>(Request); + if (HttpContext.DatabasePath.empty()) { + HttpContext.DatabasePath = ExtractStreamName<TProtoRequest>(Request); } LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "got new request from [" << HttpContext.SourceAddress << "] " << - "database '" << HttpContext.DatabaseName << "' " << + "database '" << HttpContext.DatabasePath << "' " << "stream '" << ExtractStreamName<TProtoRequest>(Request) << "'"); // Use Signature or no sdk mode - then need to auth anyway @@ -553,6 +589,7 @@ namespace NKikimr::NHttpProxy { THolder<TDataStreamsClient> Client; TActorId AuthActor; + bool InputCountersReported = false; }; private: @@ -652,9 +689,9 @@ namespace NKikimr::NHttpProxy { SourceAddress = address; } - DatabaseName = Request->URL; - if (DatabaseName == "/") { - DatabaseName = ""; + DatabasePath = Request->URL; + if (DatabasePath == "/") { + DatabasePath = ""; } //TODO: find out databaseId ParseHeaders(Request->Headers); @@ -837,7 +874,7 @@ namespace NKikimr::NHttpProxy { , ServiceConfig(context.ServiceConfig) , IamToken(context.IamToken) , Authorize(!context.Driver) - , Database(context.DatabaseName) + , DatabasePath(context.DatabasePath) , StreamName(context.StreamName) { } @@ -864,7 +901,7 @@ namespace NKikimr::NHttpProxy { void SendDescribeRequest(const TActorContext& ctx) { auto schemeCacheRequest = std::make_unique<NSchemeCache::TSchemeCacheNavigate>(); NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.Path = NKikimr::SplitPath(Database); + entry.Path = NKikimr::SplitPath(DatabasePath); entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpPath; entry.SyncVersion = false; schemeCacheRequest->ResultSet.emplace_back(entry); @@ -875,7 +912,7 @@ namespace NKikimr::NHttpProxy { const NSchemeCache::TSchemeCacheNavigate* navigate = ev->Get()->Request.Get(); if (navigate->ErrorCount) { return ReplyWithError( - ctx, NYdb::EStatus::SCHEME_ERROR, TStringBuilder() << "Database with path '" << Database << "' doesn't exists", + ctx, NYdb::EStatus::SCHEME_ERROR, TStringBuilder() << "Database with path '" << DatabasePath << "' doesn't exists", NYds::EErrorCodes::NOT_FOUND ); } @@ -885,6 +922,7 @@ namespace NKikimr::NHttpProxy { FolderId = description.GetPQTabletConfig().GetYcFolderId(); CloudId = description.GetPQTabletConfig().GetYcCloudId(); DatabaseId = description.GetPQTabletConfig().GetYdbDatabaseId(); + DatabasePath = description.GetPQTabletConfig().GetYdbDatabasePath(); } for (const auto& attr : navigate->ResultSet.front().Attributes) { if (attr.first == "folder_id") FolderId = attr.second; @@ -903,7 +941,7 @@ namespace NKikimr::NHttpProxy { if (ev->Get()->Error) { return ReplyWithError(ctx, NYdb::EStatus::UNAUTHORIZED, ev->Get()->Error.Message); }; - ctx.Send(Sender, new TEvServerlessProxy::TEvToken(ev->Get()->Token->GetUserSID(), "", ev->Get()->SerializedToken)); + ctx.Send(Sender, new TEvServerlessProxy::TEvToken(ev->Get()->Token->GetUserSID(), "", ev->Get()->SerializedToken, {"", DatabaseId, DatabasePath, CloudId, FolderId})); LOG_SP_DEBUG_S(ctx, NKikimrServices::HTTP_PROXY, "Authorized successfully"); @@ -1035,7 +1073,7 @@ namespace NKikimr::NHttpProxy { void ReplyWithError(const TActorContext& ctx, NYdb::EStatus status, const TString& errorText, NYds::EErrorCodes issueCode = NYds::EErrorCodes::GENERIC_ERROR) { - ctx.Send(Sender, new TEvServerlessProxy::TEvErrorWithIssue(status, errorText, static_cast<size_t>(issueCode))); + ctx.Send(Sender, new TEvServerlessProxy::TEvErrorWithIssue(status, errorText, {"", DatabaseId, DatabasePath, CloudId, FolderId}, static_cast<size_t>(issueCode))); TBase::Die(ctx); } @@ -1058,7 +1096,7 @@ namespace NKikimr::NHttpProxy { Y_VERIFY(!ev->Get()->Response.iam_token().empty()); ctx.Send(Sender, - new TEvServerlessProxy::TEvToken(ServiceAccountId, ev->Get()->Response.iam_token())); + new TEvServerlessProxy::TEvToken(ServiceAccountId, ev->Get()->Response.iam_token(), "", {})); LOG_SP_DEBUG_S(ctx, NKikimrServices::HTTP_PROXY, "IAM token generated"); @@ -1094,7 +1132,7 @@ namespace NKikimr::NHttpProxy { TString FolderId; TString CloudId; TString DatabaseId; - TString Database; + TString DatabasePath; TString StreamName; }; diff --git a/ydb/core/http_proxy/http_req.h b/ydb/core/http_proxy/http_req.h index 60437ff3b2..6effd74297 100644 --- a/ydb/core/http_proxy/http_req.h +++ b/ydb/core/http_proxy/http_req.h @@ -75,7 +75,7 @@ struct THttpRequestContext { TString ServiceAccountId; TString RequestId; TString DiscoveryEndpoint; - TString DatabaseName; + TString DatabasePath; TString DatabaseId; // not in context TString FolderId; // not in context TString CloudId; // not in context diff --git a/ydb/core/http_proxy/http_service.cpp b/ydb/core/http_proxy/http_service.cpp index a08d764bc5..656ec9be03 100644 --- a/ydb/core/http_proxy/http_service.cpp +++ b/ydb/core/http_proxy/http_service.cpp @@ -94,7 +94,7 @@ namespace NKikimr::NHttpProxy { " incoming request from [" << context.SourceAddress << "]" << " request [" << context.MethodName << "]" << " url [" << context.Request->URL << "]" << - " database [" << context.DatabaseName << "]" << + " database [" << context.DatabasePath << "]" << " requestId: " << context.RequestId); try { diff --git a/ydb/core/kqp/compile_service/kqp_compile_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_service.cpp index 5388386fa8..8cd2a925c3 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_service.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_service.cpp @@ -367,6 +367,7 @@ private: bool enableKqpScanQueryPredicateExtract = Config.GetEnablePredicateExtractForScanQueries(); bool defaultSyntaxVersion = Config.GetSqlVersion(); + bool enableKqpImmediateEffects = Config.GetEnableKqpImmediateEffects(); Config.Swap(event.MutableConfig()->MutableTableServiceConfig()); LOG_INFO(*TlsActivationContext, NKikimrServices::KQP_COMPILE_SERVICE, "Updated config"); @@ -380,8 +381,9 @@ private: Config.GetEnableKqpScanQueryStreamIdxLookupJoin() != enableKqpScanQueryStreamIdxLookupJoin || Config.GetEnableKqpDataQuerySourceRead() != enableKqpDataQuerySourceRead || Config.GetEnableKqpScanQuerySourceRead() != enableKqpScanQuerySourceRead || - Config.GetEnablePredicateExtractForDataQueries() != enableKqpDataQueryPredicateExtract || - Config.GetEnablePredicateExtractForScanQueries() != enableKqpScanQueryPredicateExtract) + Config.GetEnablePredicateExtractForDataQueries() != enableKqpDataQueryPredicateExtract || + Config.GetEnablePredicateExtractForScanQueries() != enableKqpScanQueryPredicateExtract || + Config.GetEnableKqpImmediateEffects() != enableKqpImmediateEffects) { LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::KQP_COMPILE_SERVICE, diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 561dccef84..722b9f1d9c 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -1302,6 +1302,11 @@ private: break; } + case NKqpProto::TKqpPhyTableOperation::kReadOlapRange: { + YQL_ENSURE(false, "The previous check did not work! Data query read does not support column shard tables." << Endl + << this->DebugString()); + } + default: { YQL_ENSURE(false, "Unexpected table operation: " << (ui32) op.GetTypeCase() << Endl << this->DebugString()); @@ -1528,6 +1533,14 @@ private: } } + if (stageInfo.Meta.IsOlap() && tx.Body->GetType() == NKqpProto::TKqpPhyTx::TYPE_DATA) { + auto error = TStringBuilder() << "Data manipulation queries do not support column shard tables"; + LOG_E(error); + ReplyErrorAndDie(Ydb::StatusIds::PRECONDITION_FAILED, + YqlIssue({}, NYql::TIssuesIds::KIKIMR_PRECONDITION_FAILED, error)); + return; + } + LOG_D("Stage " << stageInfo.Id << " AST: " << stage.GetProgramAst()); if (stage.SourcesSize() > 0) { diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index 7da9ef93cc..00b3bae793 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -1069,7 +1069,7 @@ protected: } proxy = CreateResultStreamChannelProxy(TxId, channel.Id, ResponseEv->TxResults[0].MkqlItemType, - ResponseEv->TxResults[0].ColumnOrder, Target, Stats.get(), this->SelfId()); + ResponseEv->TxResults[0].ColumnOrder, Target, Stats, this->SelfId()); } else { YQL_ENSURE(channel.DstInputIndex < ResponseEv->ResultsSize()); @@ -1079,7 +1079,7 @@ protected: return channelIt->second; } - proxy = CreateResultDataChannelProxy(TxId, channel.Id, Stats.get(), this->SelfId(), + proxy = CreateResultDataChannelProxy(TxId, channel.Id, Stats, this->SelfId(), channel.DstInputIndex, ResponseEv.get()); } @@ -1101,6 +1101,11 @@ protected: if (KqpShardsResolverId) { this->Send(KqpShardsResolverId, new TEvents::TEvPoison); } + + if (Planner) { + Planner->Unsubscribe(); + } + if (KqpTableResolverId) { this->Send(KqpTableResolverId, new TEvents::TEvPoison); this->Send(this->SelfId(), new TEvents::TEvPoison); @@ -1149,7 +1154,7 @@ protected: const TString Database; const TIntrusiveConstPtr<NACLib::TUserToken> UserToken; TKqpRequestCounters::TPtr Counters; - std::unique_ptr<TQueryExecutionStats> Stats; + std::shared_ptr<TQueryExecutionStats> Stats; TInstant StartTime; TMaybe<TInstant> Deadline; TActorId DeadlineActor; diff --git a/ydb/core/kqp/executer_actor/kqp_executer_stats.h b/ydb/core/kqp/executer_actor/kqp_executer_stats.h index 4c936207bc..2b919e519f 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_stats.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_stats.h @@ -21,8 +21,8 @@ struct TQueryExecutionStats { // basic stats std::unordered_set<ui64> AffectedShards; ui32 TotalTasks = 0; - ui64 ResultBytes = 0; - ui64 ResultRows = 0; + std::atomic<ui64> ResultBytes = 0; + std::atomic<ui64> ResultRows = 0; TDuration ExecuterCpuTime; TInstant StartTs; diff --git a/ydb/core/kqp/executer_actor/kqp_planner.cpp b/ydb/core/kqp/executer_actor/kqp_planner.cpp index 753bc532bd..106ecdf459 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.cpp +++ b/ydb/core/kqp/executer_actor/kqp_planner.cpp @@ -78,7 +78,7 @@ bool TKqpPlanner::SendStartKqpTasksRequest(ui32 requestId, const TActorId& targe LOG_D("Try to retry to another node, nodeId: " << *targetNode << ", requestId: " << requestId); auto anotherTarget = MakeKqpNodeServiceID(*targetNode); TlsActivationContext->Send(std::make_unique<NActors::IEventHandle>(anotherTarget, ExecuterId, ev.Release(), - IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, requestId, nullptr, ExecuterSpan.GetTraceId())); + CalcSendMessageFlagsForNode(*targetNode), requestId, nullptr, ExecuterSpan.GetTraceId())); requestData.RetryNumber++; return true; } @@ -234,6 +234,13 @@ void TKqpPlanner::ProcessTasksForScanExecuter() { } } +void TKqpPlanner::Unsubscribe() { + for(ui64 nodeId: TrackingNodes) { + TlsActivationContext->Send(std::make_unique<NActors::IEventHandle>( + TActivationContext::InterconnectProxy(nodeId), ExecuterId, new TEvents::TEvUnsubscribe())); + } +} + void TKqpPlanner::PrepareToProcess() { auto rmConfig = GetKqpResourceManager()->GetConfig(); diff --git a/ydb/core/kqp/executer_actor/kqp_planner.h b/ydb/core/kqp/executer_actor/kqp_planner.h index 9c422c3903..1ea7c26722 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.h +++ b/ydb/core/kqp/executer_actor/kqp_planner.h @@ -34,6 +34,8 @@ public: TVector<NKikimrKqp::TKqpNodeResources>&& resourcesSnapshot, const NKikimrConfig::TTableServiceConfig::TExecuterRetriesConfig& executerRetriesConfig); bool SendStartKqpTasksRequest(ui32 requestId, const TActorId& target); + void Unsubscribe(); + void ProcessTasksForScanExecuter(); void ProcessTasksForDataExecuter(); diff --git a/ydb/core/kqp/executer_actor/kqp_result_channel.cpp b/ydb/core/kqp/executer_actor/kqp_result_channel.cpp index 3d83981ca7..261ef8ff48 100644 --- a/ydb/core/kqp/executer_actor/kqp_result_channel.cpp +++ b/ydb/core/kqp/executer_actor/kqp_result_channel.cpp @@ -20,11 +20,11 @@ public: return NKikimrServices::TActivity::KQP_RESULT_CHANNEL_PROXY; } - TResultCommonChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, TActorId executer) + TResultCommonChannelProxy(ui64 txId, ui64 channelId, std::shared_ptr<TQueryExecutionStats> stats, TActorId executer) : TActor(&TResultCommonChannelProxy::WorkState) , TxId(txId) , ChannelId(channelId) - , Stats(stats) + , Stats(std::move(stats)) , Executer(executer) {} protected: @@ -112,7 +112,7 @@ private: private: const ui64 TxId; const ui64 ChannelId; - TQueryExecutionStats* Stats; // owned by KqpExecuter + std::shared_ptr<TQueryExecutionStats> Stats; // owned by KqpExecuter const NActors::TActorId Executer; NActors::TActorId ComputeActor; }; @@ -120,9 +120,9 @@ private: class TResultStreamChannelProxy : public TResultCommonChannelProxy { public: TResultStreamChannelProxy(ui64 txId, ui64 channelId, NKikimr::NMiniKQL::TType* itemType, - const TVector<ui32>* columnOrder, TActorId target, TQueryExecutionStats* stats, + const TVector<ui32>* columnOrder, TActorId target, std::shared_ptr<TQueryExecutionStats> stats, TActorId executer) - : TResultCommonChannelProxy(txId, channelId, stats, executer) + : TResultCommonChannelProxy(txId, channelId, std::move(stats), executer) , ColumnOrder(columnOrder) , ItemType(itemType) , Target(target) {} @@ -154,9 +154,9 @@ private: class TResultDataChannelProxy : public TResultCommonChannelProxy { public: - TResultDataChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, TActorId executer, + TResultDataChannelProxy(ui64 txId, ui64 channelId, std::shared_ptr<TQueryExecutionStats> stats, TActorId executer, ui32 inputIndex, TEvKqpExecuter::TEvTxResponse* resultReceiver) - : TResultCommonChannelProxy(txId, channelId, stats, executer) + : TResultCommonChannelProxy(txId, channelId, std::move(stats), executer) , InputIndex(inputIndex) , ResultReceiver(resultReceiver) {} @@ -184,18 +184,18 @@ private: } // anonymous namespace end NActors::IActor* CreateResultStreamChannelProxy(ui64 txId, ui64 channelId, NKikimr::NMiniKQL::TType* itemType, - const TVector<ui32>* columnOrder, TActorId target, TQueryExecutionStats* stats, TActorId executer) + const TVector<ui32>* columnOrder, TActorId target, std::shared_ptr<TQueryExecutionStats> stats, TActorId executer) { LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, "CreateResultStreamChannelProxy: TxId: " << txId << ", channelId: " << channelId ); - return new TResultStreamChannelProxy(txId, channelId, itemType, columnOrder, target, stats, executer); + return new TResultStreamChannelProxy(txId, channelId, itemType, columnOrder, target, std::move(stats), executer); } NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, - TQueryExecutionStats* stats, TActorId executer, + std::shared_ptr<TQueryExecutionStats> stats, TActorId executer, ui32 inputIndex, TEvKqpExecuter::TEvTxResponse* resultsReceiver) { LOG_DEBUG_S(*NActors::TlsActivationContext, NKikimrServices::KQP_EXECUTER, @@ -203,7 +203,7 @@ NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, ", channelId: " << channelId ); - return new TResultDataChannelProxy(txId, channelId, stats, executer, inputIndex, resultsReceiver); + return new TResultDataChannelProxy(txId, channelId, std::move(stats), executer, inputIndex, resultsReceiver); } } // namespace NKqp diff --git a/ydb/core/kqp/executer_actor/kqp_result_channel.h b/ydb/core/kqp/executer_actor/kqp_result_channel.h index 5cc8c54fb0..6441e288c6 100644 --- a/ydb/core/kqp/executer_actor/kqp_result_channel.h +++ b/ydb/core/kqp/executer_actor/kqp_result_channel.h @@ -26,10 +26,10 @@ struct TQueryExecutionStats; struct TKqpExecuterTxResult; NActors::IActor* CreateResultStreamChannelProxy(ui64 txId, ui64 channelId, NKikimr::NMiniKQL::TType* itemType, - const TVector<ui32>* columnOrder, NActors::TActorId target, TQueryExecutionStats* stats, + const TVector<ui32>* columnOrder, NActors::TActorId target, std::shared_ptr<TQueryExecutionStats> stats, NActors::TActorId executer); -NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, TQueryExecutionStats* stats, +NActors::IActor* CreateResultDataChannelProxy(ui64 txId, ui64 channelId, std::shared_ptr<TQueryExecutionStats> stats, NActors::TActorId executer, ui32 inputIndex, TEvKqpExecuter::TEvTxResponse* receiver); } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp b/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp index 6e8103fc2c..c375cfc98e 100644 --- a/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp @@ -730,10 +730,6 @@ private: channelPair.second->Receive(ev, TActivationContext::AsActorContext()); } - for (auto& [shardId, nodeId] : ShardIdToNodeId) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); - } - auto totalTime = TInstant::Now() - StartTime; Counters->Counters->ScanTxTotalTimeHistogram->Collect(totalTime.MilliSeconds()); diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index d8e4979926..401861e22f 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -810,8 +810,8 @@ private: for (const auto& operation : queryBlock.Operations()) { auto& tableData = SessionCtx->Tables().ExistingTable(operation.Cluster(), operation.Table()); - if (!tableData.Metadata->SysView.empty()) { - // Always use ScanQuery for queries with system tables. + if (tableData.Metadata->IsOlap() || !tableData.Metadata->SysView.empty()) { + // Always use ScanQuery for queries with OLAP and system tables. return true; } } diff --git a/ydb/core/kqp/node_service/kqp_node_service.cpp b/ydb/core/kqp/node_service/kqp_node_service.cpp index dd57aadc36..7f89570b5e 100644 --- a/ydb/core/kqp/node_service/kqp_node_service.cpp +++ b/ydb/core/kqp/node_service/kqp_node_service.cpp @@ -11,6 +11,7 @@ #include <ydb/core/kqp/compute_actor/kqp_compute_actor.h> #include <ydb/core/kqp/rm_service/kqp_resource_estimation.h> #include <ydb/core/kqp/rm_service/kqp_rm_service.h> +#include <ydb/core/kqp/runtime/kqp_read_actor.h> #include <ydb/core/kqp/common/kqp_resolve.h> #include <ydb/core/base/wilson.h> @@ -76,7 +77,12 @@ public: IKqpNodeComputeActorFactory* caFactory) : Config(config.GetResourceManager()) , Counters(counters) - , CaFactory(caFactory) {} + , CaFactory(caFactory) + { + if (config.HasIteratorReadsRetrySettings()) { + SetIteratorReadsRetrySettings(config.GetIteratorReadsRetrySettings()); + } + } void Bootstrap() { LOG_I("Starting KQP Node service"); @@ -444,10 +450,31 @@ private: LOG_I("Updated table service config: " << Config.DebugString()); } + if (event.GetConfig().GetTableServiceConfig().HasIteratorReadsRetrySettings()) { + SetIteratorReadsRetrySettings(event.GetConfig().GetTableServiceConfig().GetIteratorReadsRetrySettings()); + } + auto responseEv = MakeHolder<NConsole::TEvConsole::TEvConfigNotificationResponse>(event); Send(ev->Sender, responseEv.Release(), IEventHandle::FlagTrackDelivery, ev->Cookie); } + void SetIteratorReadsRetrySettings(const NKikimrConfig::TTableServiceConfig::TIteratorReadsRetrySettings& settings) { + auto ptr = MakeIntrusive<NKikimr::NKqp::TIteratorReadBackoffSettings>(); + ptr->StartRetryDelay = TDuration::MilliSeconds(settings.GetStartDelayMs()); + ptr->MaxShardAttempts = settings.GetMaxShardRetries(); + ptr->MaxShardResolves = settings.GetMaxShardResolves(); + ptr->UnsertaintyRatio = settings.GetUnsertaintyRatio(); + ptr->Multiplier = settings.GetMultiplier(); + if (settings.GetMaxTotalRetries()) { + ptr->MaxTotalRetries = settings.GetMaxTotalRetries(); + } + if (settings.GetIteratorResponseTimeoutMs()) { + ptr->ReadResponseTimeout = TDuration::MilliSeconds(settings.GetIteratorResponseTimeoutMs()); + } + ptr->MaxRetryDelay = TDuration::MilliSeconds(settings.GetMaxDelayMs()); + SetReadIteratorBackoffSettings(ptr); + } + void HandleWork(TEvents::TEvUndelivered::TPtr& ev) { switch (ev->Get()->SourceType) { case TEvKqpNode::TEvStartKqpTasksResponse::EventType: { diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp index d251aad27e..066749c709 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp @@ -160,6 +160,21 @@ bool TTtlSettings::TryParse(const NNodes::TCoNameValueTupleList& node, TTtlSetti } settings.ExpireAfter = TDuration::FromValue(value); + } else if (name == "columnUnit") { + YQL_ENSURE(field.Value().Maybe<TCoAtom>()); + auto value = field.Value().Cast<TCoAtom>().StringValue(); + if (value == "seconds") { + settings.ColumnUnit = EUnit::Seconds; + } else if (value == "milliseconds") { + settings.ColumnUnit = EUnit::Milliseconds; + } else if (value == "microseconds") { + settings.ColumnUnit = EUnit::Microseconds; + } else if (value == "nanoseconds") { + settings.ColumnUnit = EUnit::Nanoseconds; + } else { + error = TStringBuilder() << "Invalid unit: " << value; + return false; + } } else { error = TStringBuilder() << "Unknown field: " << name; return false; @@ -292,8 +307,16 @@ bool ConvertReadReplicasSettingsToProto(const TString settings, Ydb::Table::Read } void ConvertTtlSettingsToProto(const NYql::TTtlSettings& settings, Ydb::Table::TtlSettings& proto) { - proto.mutable_date_type_column()->set_column_name(settings.ColumnName); - proto.mutable_date_type_column()->set_expire_after_seconds(settings.ExpireAfter.Seconds()); + if (!settings.ColumnUnit) { + auto& opts = *proto.mutable_date_type_column(); + opts.set_column_name(settings.ColumnName); + opts.set_expire_after_seconds(settings.ExpireAfter.Seconds()); + } else { + auto& opts = *proto.mutable_value_since_unix_epoch(); + opts.set_column_name(settings.ColumnName); + opts.set_column_unit(static_cast<Ydb::Table::ValueSinceUnixEpochModeSettings::Unit>(*settings.ColumnUnit)); + opts.set_expire_after_seconds(settings.ExpireAfter.Seconds()); + } } Ydb::FeatureFlag::Status GetFlagValue(const TMaybe<bool>& value) { diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.h b/ydb/core/kqp/provider/yql_kikimr_gateway.h index a0f5a0b3b4..3ae5ab535f 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.h +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.h @@ -151,8 +151,16 @@ struct TColumnFamily { }; struct TTtlSettings { + enum class EUnit: ui32 { + Seconds = 1, + Milliseconds = 2, + Microseconds = 3, + Nanoseconds = 4, + }; + TString ColumnName; TDuration ExpireAfter; + TMaybe<EUnit> ColumnUnit; static bool TryParse(const NNodes::TCoNameValueTupleList& node, TTtlSettings& settings, TString& error); }; @@ -439,6 +447,10 @@ struct TKikimrTableMetadata : public TThrRefBase { } return {nullptr, TIndexDescription::EIndexState::Invalid}; } + + bool IsOlap() const { + return Kind == EKikimrTableKind::Olap; + } }; struct TCreateUserSettings { diff --git a/ydb/core/kqp/runtime/CMakeLists.darwin.txt b/ydb/core/kqp/runtime/CMakeLists.darwin.txt index 3f0643e6e8..88cc8947ec 100644 --- a/ydb/core/kqp/runtime/CMakeLists.darwin.txt +++ b/ydb/core/kqp/runtime/CMakeLists.darwin.txt @@ -31,6 +31,7 @@ target_link_libraries(core-kqp-runtime PUBLIC library-yql-utils dq-actors-protos yql-dq-runtime + cpp-threading-hot_swap tools-enum_parser-enum_serialization_runtime ) target_sources(core-kqp-runtime PRIVATE diff --git a/ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt index 7dee34ab15..7d5bf657f4 100644 --- a/ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt +++ b/ydb/core/kqp/runtime/CMakeLists.linux-aarch64.txt @@ -32,6 +32,7 @@ target_link_libraries(core-kqp-runtime PUBLIC library-yql-utils dq-actors-protos yql-dq-runtime + cpp-threading-hot_swap tools-enum_parser-enum_serialization_runtime ) target_sources(core-kqp-runtime PRIVATE diff --git a/ydb/core/kqp/runtime/CMakeLists.linux.txt b/ydb/core/kqp/runtime/CMakeLists.linux.txt index 7dee34ab15..7d5bf657f4 100644 --- a/ydb/core/kqp/runtime/CMakeLists.linux.txt +++ b/ydb/core/kqp/runtime/CMakeLists.linux.txt @@ -32,6 +32,7 @@ target_link_libraries(core-kqp-runtime PUBLIC library-yql-utils dq-actors-protos yql-dq-runtime + cpp-threading-hot_swap tools-enum_parser-enum_serialization_runtime ) target_sources(core-kqp-runtime PRIVATE diff --git a/ydb/core/kqp/runtime/kqp_read_actor.cpp b/ydb/core/kqp/runtime/kqp_read_actor.cpp index fa9feecb0f..bfa8a24f6c 100644 --- a/ydb/core/kqp/runtime/kqp_read_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_read_actor.cpp @@ -14,6 +14,7 @@ #include <ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h> +#include <library/cpp/threading/hot_swap/hot_swap.h> #include <library/cpp/actors/core/interconnect.h> #include <library/cpp/actors/core/actorsystem.h> @@ -21,9 +22,6 @@ namespace { -static constexpr ui64 MAX_SHARD_RETRIES = 5; -static constexpr ui64 MAX_SHARD_RESOLVES = 3; - bool IsDebugLogEnabled(const NActors::TActorSystem* actorSystem, NActors::NLog::EComponent component) { auto* settings = actorSystem->LoggerSettings(); return settings && settings->Satisfies(NActors::NLog::EPriority::PRI_DEBUG, component); @@ -64,7 +62,33 @@ THolder<NKikimr::TEvDataShard::TEvReadAck> DefaultAckSettings() { NActors::TActorId MainPipeCacheId = NKikimr::MakePipePeNodeCacheID(false); NActors::TActorId FollowersPipeCacheId = NKikimr::MakePipePeNodeCacheID(true); -TDuration StartRetryDelay = TDuration::MilliSeconds(250); +struct TBackoffStorage { + THotSwap<NKikimr::NKqp::TIteratorReadBackoffSettings> SettingsPtr; + + TBackoffStorage() { + SettingsPtr.AtomicStore(new NKikimr::NKqp::TIteratorReadBackoffSettings()); + } +}; + +TDuration CalcDelay(size_t attempt, bool allowInstantRetry) { + return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->CalcShardDelay(attempt, allowInstantRetry); +} + +size_t MaxShardResolves() { + return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->MaxShardResolves; +} + +size_t MaxShardRetries() { + return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->MaxShardAttempts; +} + +TMaybe<size_t> MaxTotalRetries() { + return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->MaxTotalRetries; +} + +TMaybe<TDuration> ShardTimeout() { + return Singleton<::TBackoffStorage>()->SettingsPtr.AtomicLoad()->ReadResponseTimeout; +} } @@ -441,6 +465,7 @@ public: state.AddRange(TSerializedTableRange(range)); } } else { + YQL_ENSURE(Settings.GetRanges().KeyPointsSize() > 0); for (const auto& point : Settings.GetRanges().GetKeyPoints()) { state.AddPoint(TSerializedCellVec(point)); } @@ -485,7 +510,7 @@ public: } void ResolveShard(TShardState* state) { - if (state->ResolveAttempt >= MAX_SHARD_RESOLVES) { + if (state->ResolveAttempt >= ::MaxShardResolves()) { RuntimeError(TStringBuilder() << "Table '" << Settings.GetTable().GetTablePath() << "' resolve limit exceeded", NDqProto::StatusIds::UNAVAILABLE); return; @@ -723,12 +748,23 @@ public: } auto state = Reads[id].Shard; - if (state->RetryAttempt == 0 && allowInstantRetry) { // instant retry - return DoRetryRead(id); + + TotalRetries += 1; + auto limit = ::MaxTotalRetries(); + if (limit && TotalRetries > *limit) { + return RuntimeError(TStringBuilder() << "Table '" << Settings.GetTable().GetTablePath() << "' retry limit exceeded", + NDqProto::StatusIds::UNAVAILABLE); + } + + state->RetryAttempt += 1; + if (state->RetryAttempt > ::MaxShardRetries()) { + ResetRead(id); + return ResolveShard(state); } - auto delay = ::StartRetryDelay; - for (size_t i = 0; i < state->RetryAttempt; ++i) { - delay *= 2; + + auto delay = ::CalcDelay(state->RetryAttempt, allowInstantRetry); + if (delay == TDuration::Zero()) { + return DoRetryRead(id); } CA_LOG_D("schedule retry #" << id << " after " << delay); @@ -741,12 +777,6 @@ public: } auto state = Reads[id].Shard; - - state->RetryAttempt += 1; - if (state->RetryAttempt >= MAX_SHARD_RETRIES) { - ResetRead(id); - return ResolveShard(state); - } CA_LOG_D("Retrying read #" << id); ResetRead(id); @@ -811,6 +841,7 @@ public: record.SetReverse(Settings.GetReverse()); if (limit) { record.SetMaxRows(*limit); + record.SetTotalRowsLimit(*limit); } record.SetMaxBytes(Min<ui64>(record.GetMaxBytes(), BufSize)); @@ -836,6 +867,10 @@ public: ReadIdByTabletId[state->TabletId].push_back(id); Send(PipeCacheId, new TEvPipeCache::TEvForward(ev.Release(), state->TabletId, true), IEventHandle::FlagTrackDelivery); + + if (auto delay = ShardTimeout()) { + TlsActivationContext->Schedule(*delay, new IEventHandle(SelfId(), SelfId(), new TEvRetryShard(id, Reads[id].LastSeqNo))); + } } void NotifyCA() { @@ -1169,6 +1204,10 @@ public: CA_LOG_D("sending ack for read #" << id << " limit " << limit << " seqno = " << record.GetSeqNo()); Send(PipeCacheId, new TEvPipeCache::TEvForward(request.Release(), Reads[id].Shard->TabletId, true), IEventHandle::FlagTrackDelivery); + + if (auto delay = ShardTimeout()) { + TlsActivationContext->Schedule(*delay, new IEventHandle(SelfId(), SelfId(), new TEvRetryShard(id, Reads[id].LastSeqNo))); + } } else { Reads[id].Finished = true; } @@ -1205,10 +1244,6 @@ public: << " has limit " << (Settings.GetItemsLimit() != 0) << " limit reached " << LimitReached()); - if (!Results.empty()) { - NotifyCA(); - } - return bytes; } @@ -1339,6 +1374,8 @@ private: TIntrusivePtr<TKqpCounters> Counters; bool UseFollowers; NActors::TActorId PipeCacheId; + + size_t TotalRetries = 0; }; @@ -1363,5 +1400,9 @@ void InterceptReadActorPipeCache(NActors::TActorId id) { ::MainPipeCacheId = id; } +void SetReadIteratorBackoffSettings(TIntrusivePtr<TIteratorReadBackoffSettings> ptr) { + Singleton<::TBackoffStorage>()->SettingsPtr.AtomicStore(ptr); +} + } // namespace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/runtime/kqp_read_actor.h b/ydb/core/kqp/runtime/kqp_read_actor.h index 22c4e05d5c..12da2c844c 100644 --- a/ydb/core/kqp/runtime/kqp_read_actor.h +++ b/ydb/core/kqp/runtime/kqp_read_actor.h @@ -12,6 +12,36 @@ class TEvReadAck; namespace NKikimr { namespace NKqp { +struct TIteratorReadBackoffSettings : TAtomicRefCount<TIteratorReadBackoffSettings> { + TDuration StartRetryDelay = TDuration::MilliSeconds(5); + size_t MaxShardAttempts = 10; + size_t MaxShardResolves = 3; + double UnsertaintyRatio = 0.5; + double Multiplier = 2.0; + TDuration MaxRetryDelay = TDuration::Seconds(1); + + TMaybe<size_t> MaxTotalRetries; + TMaybe<TDuration> ReadResponseTimeout; + + TDuration CalcShardDelay(size_t attempt, bool allowInstantRetry) { + if (allowInstantRetry && attempt == 1) { + return TDuration::Zero(); + } + + auto delay = StartRetryDelay; + for (size_t i = 0; i < attempt; ++i) { + delay *= Multiplier; + delay = Min(delay, MaxRetryDelay); + } + + delay *= (1 - UnsertaintyRatio * RandomNumber<double>()); + + return delay; + } +}; + +void SetReadIteratorBackoffSettings(TIntrusivePtr<TIteratorReadBackoffSettings>); + void RegisterKqpReadActor(NYql::NDq::TDqAsyncIoFactory&, TIntrusivePtr<TKqpCounters>); void InjectRangeEvReadSettings(const NKikimrTxDataShard::TEvRead&); diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 3940519fb8..94b585b4f1 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -761,7 +761,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { session.Close(); } - Y_UNIT_TEST(QueryOltpAndOlap) { + Y_UNIT_TEST(ScanQueryOltpAndOlap) { auto settings = TKikimrSettings() .SetWithSampleTables(false); TKikimrRunner kikimr(settings); @@ -793,6 +793,36 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } } + Y_UNIT_TEST(YqlScriptOltpAndOlap) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + // EnableDebugLogging(kikimr); + + TLocalHelper(kikimr).CreateTestOlapTable(); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 3); + + CreateSampleOltpTable(kikimr); + + { + NScripting::TScriptingClient client(kikimr.GetDriver()); + auto it = client.ExecuteYqlScript(R"( + --!syntax_v1 + + SELECT a.`resource_id`, a.`timestamp`, t.* + FROM `/Root/OltpTable` AS t + JOIN `/Root/olapStore/olapTable` AS a ON CAST(t.Key AS Utf8) = a.resource_id + ORDER BY a.`resource_id`, a.`timestamp` + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = FormatResultSetYson(it.GetResultSet(0)); + Cout << result << Endl; + CompareYson(result, R"([[[1u];["Value-001"];["1"];["1"];1000001u];[[2u];["Value-002"];["2"];["2"];1000002u]])"); + } + } + Y_UNIT_TEST(EmptyRange) { auto settings = TKikimrSettings() .SetWithSampleTables(false); @@ -4156,6 +4186,98 @@ Y_UNIT_TEST_SUITE(KqpOlap) { TestTableWithNulls({ testCase }); } + + Y_UNIT_TEST(Olap_InsertFails) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetEnableOlapSchemaOperations(true); + TKikimrRunner kikimr(settings); + + EnableDebugLogging(kikimr); + TTableWithNullsHelper(kikimr).CreateTableWithNulls(); + + auto tableClient = kikimr.GetTableClient(); + + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + auto result = session.ExecuteDataQuery(R"( + INSERT INTO `/Root/tableWithNulls`(id, resource_id, level) VALUES(1, "1", 1); + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + + UNIT_ASSERT_C(!result.IsSuccess(), result.GetIssues().ToString()); + } + + Y_UNIT_TEST(OlapRead_FailsOnDataQuery) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetEnableOlapSchemaOperations(true); + TKikimrRunner kikimr(settings); + + EnableDebugLogging(kikimr); + TTableWithNullsHelper(kikimr).CreateTableWithNulls(); + TLocalHelper(kikimr).CreateTestOlapTable(); + + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls"); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 2); + } + + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + auto result = session.ExecuteDataQuery(R"( + SELECT * FROM `/Root/tableWithNulls`; + )", TTxControl::BeginTx().CommitTx()).GetValueSync(); + + UNIT_ASSERT_C(!result.IsSuccess(), result.GetIssues().ToString()); + } + + Y_UNIT_TEST(OlapRead_UsesScanOnJoin) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetEnableOlapSchemaOperations(true); + TKikimrRunner kikimr(settings); + + EnableDebugLogging(kikimr); + TTableWithNullsHelper(kikimr).CreateTableWithNulls(); + TLocalHelper(kikimr).CreateTestOlapTable(); + + { + WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls"); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 2); + } + + NScripting::TScriptingClient client(kikimr.GetDriver()); + auto result = client.ExecuteYqlScript(R"( + SELECT * FROM `/Root/olapStore/olapTable` WHERE resource_id IN (SELECT CAST(id AS Utf8) FROM `/Root/tableWithNulls`); + )").GetValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + + Y_UNIT_TEST(OlapRead_UsesScanOnJoinWithDataShardTable) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetEnableOlapSchemaOperations(true); + TKikimrRunner kikimr(settings); + + EnableDebugLogging(kikimr); + TTableWithNullsHelper(kikimr).CreateTableWithNulls(); + TLocalHelper(kikimr).CreateTestOlapTable(); + + { + WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls"); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 2); + } + + NScripting::TScriptingClient client(kikimr.GetDriver()); + auto result = client.ExecuteYqlScript(R"( + SELECT * FROM `/Root/olapStore/olapTable` WHERE resource_id IN (SELECT CAST(id AS Utf8) FROM `/Root/tableWithNulls`); + )").GetValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/scan/kqp_split_ut.cpp b/ydb/core/kqp/ut/scan/kqp_split_ut.cpp index 0cf0526973..bfae34d9d5 100644 --- a/ydb/core/kqp/ut/scan/kqp_split_ut.cpp +++ b/ydb/core/kqp/ut/scan/kqp_split_ut.cpp @@ -392,6 +392,13 @@ Y_UNIT_TEST_SUITE(KqpSplit) { Runtime = Server->GetRuntime(); KqpProxy = MakeKqpProxyID(Runtime->GetNodeId(0)); + { + auto settings = MakeIntrusive<TIteratorReadBackoffSettings>(); + settings->StartRetryDelay = TDuration::MilliSeconds(250); + settings->MaxShardAttempts = 4; + SetReadIteratorBackoffSettings(settings); + } + Sender = Runtime->AllocateEdgeActor(); CollectKeysTo(&CollectedKeys, Runtime, Sender); diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 5697cd6470..cdb02bfa51 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -1558,6 +1558,45 @@ Y_UNIT_TEST_SUITE(KqpScheme) { CreateTableWithTtlSettings(true); } + void CreateTableWithTtlOnIntColumn(TValueSinceUnixEpochModeSettings::EUnit unit) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + TString tableName = "/Root/TableWithTtlSettings"; + + auto query = TStringBuilder() << R"( + --!syntax_v1 + CREATE TABLE `)" << tableName << R"(` ( + Key Uint64, + IntColumn Uint64, + PRIMARY KEY (Key) + ) WITH ( + TTL = Interval("P1D") ON IntColumn AS )" << unit << R"( + ))"; + { + auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto result = session.DescribeTable(tableName).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetTableDescription().GetTtlSettings()->GetValueSinceUnixEpoch().GetColumnUnit(), unit); + } + } + + Y_UNIT_TEST(CreateTableWithTtlOnIntColumn) { + const auto cases = TVector<TValueSinceUnixEpochModeSettings::EUnit>{ + TValueSinceUnixEpochModeSettings::EUnit::Seconds, + TValueSinceUnixEpochModeSettings::EUnit::MilliSeconds, + TValueSinceUnixEpochModeSettings::EUnit::MicroSeconds, + TValueSinceUnixEpochModeSettings::EUnit::NanoSeconds, + }; + + for (auto unit : cases) { + CreateTableWithTtlOnIntColumn(unit); + } + } + void CreateTableWithUniformPartitions(bool compat) { TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); diff --git a/ydb/core/mind/node_broker.cpp b/ydb/core/mind/node_broker.cpp index 829717d598..6d49f9015c 100644 --- a/ydb/core/mind/node_broker.cpp +++ b/ydb/core/mind/node_broker.cpp @@ -68,18 +68,9 @@ void TNodeBroker::OnTabletDead(TEvTablet::TEvTabletDead::TPtr &ev, Die(ctx); } -void TNodeBroker::Enqueue(TAutoPtr<IEventHandle> &ev, - const TActorContext &ctx) -{ - switch (ev->GetTypeRewrite()) { - case TEvNodeBroker::EvListNodes: - case TEvNodeBroker::EvResolveNode: - case TEvNodeBroker::EvRegistrationRequest: - EnqueuedEvents.push_back(ev); - [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME - default: - TTabletExecutedFlat::Enqueue(ev, ctx); - } +void TNodeBroker::DefaultSignalTabletActive(const TActorContext &ctx) +{ + Y_UNUSED(ctx); } bool TNodeBroker::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, @@ -286,13 +277,6 @@ void TNodeBroker::ScheduleEpochUpdate(const TActorContext &ctx) } } -void TNodeBroker::ProcessEnqueuedEvents(const TActorContext &ctx) -{ - for (auto &ev : EnqueuedEvents) - Receive(ev, ctx); - EnqueuedEvents.clear(); -} - void TNodeBroker::FillNodeInfo(const TNodeInfo &node, NKikimrNodeBroker::TNodeInfo &info) const { diff --git a/ydb/core/mind/node_broker__load_state.cpp b/ydb/core/mind/node_broker__load_state.cpp index 3518e14781..7cca5bea2c 100644 --- a/ydb/core/mind/node_broker__load_state.cpp +++ b/ydb/core/mind/node_broker__load_state.cpp @@ -40,7 +40,7 @@ public: Self->SubscribeForConfigUpdates(ctx); Self->ScheduleEpochUpdate(ctx); Self->PrepareEpochCache(); - Self->ProcessEnqueuedEvents(ctx); + Self->SignalTabletActive(ctx); Self->TxCompleted(this, ctx); } diff --git a/ydb/core/mind/node_broker_impl.h b/ydb/core/mind/node_broker_impl.h index cbdb461832..884a06fafb 100644 --- a/ydb/core/mind/node_broker_impl.h +++ b/ydb/core/mind/node_broker_impl.h @@ -135,8 +135,7 @@ private: void OnDetach(const TActorContext &ctx) override; void OnTabletDead(TEvTablet::TEvTabletDead::TPtr &ev, const TActorContext &ctx) override; - void Enqueue(TAutoPtr<IEventHandle> &ev, - const TActorContext &ctx) override; + void DefaultSignalTabletActive(const TActorContext &ctx) override; bool OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext &ctx) override; void Cleanup(const TActorContext &ctx); @@ -220,7 +219,6 @@ private: void ProcessDelayedListNodesRequests(); void ScheduleEpochUpdate(const TActorContext &ctx); - void ProcessEnqueuedEvents(const TActorContext &ctx); void FillNodeInfo(const TNodeInfo &node, NKikimrNodeBroker::TNodeInfo &info) const; @@ -310,7 +308,6 @@ private: ui64 ConfigSubscriptionId; // Events collected during initialization phase. - TVector<TAutoPtr<IEventHandle>> EnqueuedEvents; TMultiMap<ui64, TEvNodeBroker::TEvListNodes::TPtr> DelayedListNodesRequests; // Transactions queue. TTxProcessor::TPtr TxProcessor; diff --git a/ydb/core/mon/async_http_mon.cpp b/ydb/core/mon/async_http_mon.cpp index 7805201ee7..fa5d69165a 100644 --- a/ydb/core/mon/async_http_mon.cpp +++ b/ydb/core/mon/async_http_mon.cpp @@ -394,6 +394,7 @@ public: STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { hFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle); + cFunc(TEvents::TSystem::Poison, PassAway); } } @@ -458,6 +459,7 @@ public: STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { hFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle); + cFunc(TEvents::TSystem::Poison, PassAway); } } @@ -660,6 +662,7 @@ public: switch (ev->GetTypeRewrite()) { hFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle); hFunc(TEvMon::TEvMonitoringRequest, Handle); + cFunc(TEvents::TSystem::Poison, PassAway); } } @@ -729,7 +732,7 @@ void TAsyncHttpMon::Stop() { IndexMonPage->ClearPages(); // it's required to avoid loop-reference if (ActorSystem) { TGuard<TMutex> g(Mutex); - for (const TActorId& actorId : ActorServices) { + for (const auto& [path, actorId] : ActorServices) { ActorSystem->Send(actorId, new TEvents::TEvPoisonPill); } ActorSystem->Send(NodeProxyServiceActorId, new TEvents::TEvPoisonPill); @@ -752,12 +755,15 @@ NMonitoring::TIndexMonPage* TAsyncHttpMon::RegisterIndexPage(const TString& path void TAsyncHttpMon::RegisterActorMonPage(const TActorMonPageInfo& pageInfo) { if (ActorSystem) { TActorMonPage* actorMonPage = static_cast<TActorMonPage*>(pageInfo.Page.Get()); - auto actorId = ActorSystem->Register( + auto& actorId = ActorServices[pageInfo.Path]; + if (actorId) { + ActorSystem->Send(new IEventHandle(TEvents::TSystem::Poison, 0, actorId, {}, nullptr, 0)); + } + actorId = ActorSystem->Register( new THttpMonServiceLegacyActor(actorMonPage), TMailboxType::ReadAsFilled, ActorSystem->AppData<NKikimr::TAppData>()->UserPoolId); ActorSystem->Send(HttpProxyActorId, new NHttp::TEvHttpProxy::TEvRegisterHandler(pageInfo.Path, actorId)); - ActorServices.push_back(actorId); } } @@ -774,7 +780,9 @@ NMonitoring::IMonPage* TAsyncHttpMon::RegisterActorPage(TRegisterActorPageFields fields.UseAuth ? Config.Authorizer : TRequestAuthorizer()); if (fields.Index) { fields.Index->Register(page); - fields.Index->SortPages(); + if (fields.SortPages) { + fields.Index->SortPages(); + } } else { Register(page.Get()); } diff --git a/ydb/core/mon/async_http_mon.h b/ydb/core/mon/async_http_mon.h index aff58155b4..3bed45c106 100644 --- a/ydb/core/mon/async_http_mon.h +++ b/ydb/core/mon/async_http_mon.h @@ -41,7 +41,7 @@ protected: TMutex Mutex; std::vector<TActorMonPageInfo> ActorMonPages; - std::vector<TActorId> ActorServices; + THashMap<TString, TActorId> ActorServices; void RegisterActorMonPage(const TActorMonPageInfo& pageInfo); }; diff --git a/ydb/core/mon/mon.cpp b/ydb/core/mon/mon.cpp index b32266d911..30874e675c 100644 --- a/ydb/core/mon/mon.cpp +++ b/ydb/core/mon/mon.cpp @@ -8,7 +8,7 @@ namespace NActors { using namespace NMonitoring; IMonPage* TMon::RegisterActorPage(TIndexMonPage* index, const TString& relPath, - const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth) { + const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth, bool sortPages) { return RegisterActorPage({ .Title = title, .RelPath = relPath, @@ -17,6 +17,7 @@ IMonPage* TMon::RegisterActorPage(TIndexMonPage* index, const TString& relPath, .PreTag = preTag, .ActorId = actorId, .UseAuth = useAuth, + .SortPages = sortPages, }); } diff --git a/ydb/core/mon/mon.h b/ydb/core/mon/mon.h index 8bf82ca724..ab69f0c3f8 100644 --- a/ydb/core/mon/mon.h +++ b/ydb/core/mon/mon.h @@ -47,11 +47,12 @@ public: TActorId ActorId; bool UseAuth = true; TVector<TString> AllowedSIDs; + bool SortPages = true; }; virtual NMonitoring::IMonPage* RegisterActorPage(TRegisterActorPageFields fields) = 0; NMonitoring::IMonPage* RegisterActorPage(NMonitoring::TIndexMonPage* index, const TString& relPath, - const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth = true); + const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth = true, bool sortPages = true); virtual NMonitoring::IMonPage* RegisterCountersPage(const TString& path, const TString& title, TIntrusivePtr<::NMonitoring::TDynamicCounters> counters) = 0; virtual NMonitoring::IMonPage* FindPage(const TString& relPath) = 0; }; diff --git a/ydb/core/mon/sync_http_mon.cpp b/ydb/core/mon/sync_http_mon.cpp index 6c863c55c1..8506344933 100644 --- a/ydb/core/mon/sync_http_mon.cpp +++ b/ydb/core/mon/sync_http_mon.cpp @@ -77,7 +77,9 @@ namespace NActors { fields.UseAuth ? Config.Authorizer : TRequestAuthorizer()); if (fields.Index) { fields.Index->Register(page); - fields.Index->SortPages(); + if (fields.SortPages) { + fields.Index->SortPages(); + } } else { Register(page); } diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 78ac72adc7..158dbc32f5 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -437,6 +437,7 @@ message TInterconnectConfig { optional bool SuppressConnectivityCheck = 39 [default = false]; optional uint32 PreallocatedBufferSize = 40; optional uint32 NumPreallocatedBuffers = 41; + optional uint32 SocketBacklogSize = 45; // SOMAXCONN if not set or zero // ballast is added to IC handshake frames to ensure correctness of jumbo frames transmission over network optional uint32 HandshakeBallastSize = 14; @@ -453,6 +454,8 @@ message TInterconnectConfig { optional NKikimrConfigUnits.TDuration ForceConfirmPeriodDuration = 27; optional NKikimrConfigUnits.TDuration LostConnectionDuration = 28; optional NKikimrConfigUnits.TDuration BatchPeriodDuration = 29; + + optional uint32 OutgoingHandshakeInflightLimit = 43; } message TChannelProfileConfig { @@ -1211,6 +1214,18 @@ message TTableServiceConfig { optional uint32 MaxRetryNumber = 3 [default = 10]; } + message TIteratorReadsRetrySettings { + optional uint32 StartDelayMs = 1; + optional uint32 MaxDelayMs = 8; + + optional uint32 MaxShardRetries = 2; + optional uint32 MaxShardResolves = 3; + optional double UnsertaintyRatio = 4; + optional double Multiplier = 5; + optional uint32 IteratorResponseTimeoutMs = 6; + optional uint32 MaxTotalRetries = 7; + } + optional uint32 QueryLimitBytes = 1; optional uint32 ParametersLimitBytes = 2; optional uint32 SessionsLimitPerNode = 3; @@ -1249,6 +1264,7 @@ message TTableServiceConfig { optional bool EnablePredicateExtractForScanQueries = 36 [default = true]; optional bool EnablePredicateExtractForDataQueries = 37 [default = true]; optional bool EnableKqpImmediateEffects = 38 [default = false]; + optional TIteratorReadsRetrySettings IteratorReadsRetrySettings = 41; }; // Config describes immediate controls and allows @@ -1363,7 +1379,7 @@ message TImmediateControlsConfig { Description: "Enables experimental persistent locked writes", MinValue: 0, MaxValue: 1, - DefaultValue: 0 }]; + DefaultValue: 1 }]; optional uint64 MaxLockedWritesPerKey = 15 [(ControlOptions) = { Description: "Maximum number of uncommitted locked writes per key", MinValue: 0, @@ -1420,10 +1436,39 @@ message TImmediateControlsConfig { DefaultValue: 0 }]; } + message TTCMallocControls { + optional uint64 ProfileSamplingRate = 1 [(ControlOptions) = { + Description: "Sets the sampling rate for heap profiles. TCMalloc samples approximately every rate bytes allocated.", + MinValue: 65536, + MaxValue: 4294967296, + DefaultValue: 2097152 }]; + optional uint64 GuardedSamplingRate = 2 [(ControlOptions) = { + Description: "Sets the guarded sampling rate for sampled allocations. TCMalloc samples approximately every rate bytes allocated, subject to implementation limitations in GWP-ASan.", + MinValue: 65536, + MaxValue: 4294967296, + DefaultValue: 4294967296 }]; + optional uint64 MemoryLimit = 3 [(ControlOptions) = { + Description: "Make a best effort attempt to prevent more than limit bytes of memory from being allocated by the system.", + MinValue: 0, + MaxValue: 9223372036854775807, + DefaultValue: 0 }]; + optional uint64 PageCacheTargetSize = 4 [(ControlOptions) = { + Description: "Page Cache Target Size.", + MinValue: 0, + MaxValue: 137438953472, + DefaultValue: 536870912 }]; + optional uint64 PageCacheReleaseRate = 5 [(ControlOptions) = { + Description: "Page Cache Release Rate.", + MinValue: 0, + MaxValue: 134217728, + DefaultValue: 8388608 }]; + } + optional TDataShardControls DataShardControls = 1; optional TTxLimitControls TxLimitControls = 2; optional TCoordinatorControls CoordinatorControls = 3; optional TSchemeShardControls SchemeShardControls = 4; + optional TTCMallocControls TCMallocControls = 5; }; message TMeteringConfig { diff --git a/ydb/core/protos/console_config.proto b/ydb/core/protos/console_config.proto index 5c1d0c039d..7309041aeb 100644 --- a/ydb/core/protos/console_config.proto +++ b/ydb/core/protos/console_config.proto @@ -270,7 +270,8 @@ message TGetAllConfigsRequest { } message TGetAllConfigsResponse { - optional Ydb.DynamicConfig.GetConfigResult Response = 1; + reserved 1; + optional Ydb.DynamicConfig.GetConfigResult Response = 2; } message TGetNodeLabelsRequest { diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 1ab1a3eaeb..52ad83dfe8 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -101,6 +101,10 @@ message TStorageConfig { optional uint32 ExternalThreshold = 6; } +message TKeyValueStorageConfig { + repeated TStorageSettings Channel = 3; +} + message TFamilyDescription { optional uint32 Id = 1; optional uint32 Room = 2; // Used by datashard, must not be used by users @@ -1089,6 +1093,8 @@ message TCreateSolomonVolume { optional uint64 PartitionCount = 3; // it is a mutually exclusive parametr repeated TAdoptedPartition AdoptedPartitions = 4; // with this one + + optional TKeyValueStorageConfig StorageConfig = 5; } message TAlterSolomonVolume { @@ -1099,6 +1105,8 @@ message TAlterSolomonVolume { optional uint64 PartitionCount = 3; optional bool UpdateChannelsBinding = 4 [default = false]; + + optional TKeyValueStorageConfig StorageConfig = 5; } message TBlockStoreAssignOp { diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index 7fe189aab0..1adf90944f 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -1643,6 +1643,9 @@ message TEvRead { // When specified requests are handled in reverse order as well as range reads optional bool Reverse = 10; + // Limits total number of rows which iterator can read. + optional uint64 TotalRowsLimit = 12; + // Request must contain either keys, queries or program // mixed requests are not supported diff --git a/ydb/core/tablet/tablet_counters_aggregator.cpp b/ydb/core/tablet/tablet_counters_aggregator.cpp index a88826428d..8b28e47e69 100644 --- a/ydb/core/tablet/tablet_counters_aggregator.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator.cpp @@ -315,7 +315,8 @@ public: TTabletTypes::DataShard, CountersByTabletType); auto hasSchemeshard = (bool)FindCountersByTabletType( TTabletTypes::SchemeShard, CountersByTabletType); - YdbCounters->Initialize(Counters, hasDatashard, hasSchemeshard); + bool hasColumnShard = static_cast<bool>(FindCountersByTabletType(TTabletTypes::ColumnShard, CountersByTabletType)); + YdbCounters->Initialize(Counters, hasDatashard, hasSchemeshard, hasColumnShard); YdbCounters->Transform(); } } @@ -758,6 +759,10 @@ private: TCounterPtr ScanBytes; TCounterPtr DatashardRowCount; TCounterPtr DatashardSizeBytes; + TCounterPtr ColumnShardScanRows_; + TCounterPtr ColumnShardScanBytes_; + TCounterPtr ColumnShardBulkUpsertRows_; + TCounterPtr ColumnShardBulkUpsertBytes_; TCounterPtr ResourcesStorageUsedBytes; TCounterPtr ResourcesStorageLimitBytes; TCounterPtr ResourcesStorageTableUsedBytes; @@ -787,6 +792,11 @@ private: TCounterPtr DbUniqueDataBytes; THistogramPtr ConsumedCpuHistogram; + TCounterPtr ColumnShardScannedBytes_; + TCounterPtr ColumnShardScannedRows_; + TCounterPtr ColumnShardUpsertBlobsWritten_; + TCounterPtr ColumnShardUpsertBytesWritten_; + TCounterPtr DiskSpaceTablesTotalBytes; TCounterPtr DiskSpaceTopicsTotalBytes; TCounterPtr DiskSpaceSoftQuotaBytes; @@ -826,6 +836,15 @@ private: DatashardSizeBytes = ydbGroup->GetNamedCounter("name", "table.datashard.size_bytes", false); + ColumnShardScanRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.scan.rows", false); + ColumnShardScanBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.scan.bytes", false); + ColumnShardBulkUpsertRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.bulk_upsert.rows", false); + ColumnShardBulkUpsertBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.bulk_upsert.bytes", false); + ResourcesStorageUsedBytes = ydbGroup->GetNamedCounter("name", "resources.storage.used_bytes", false); ResourcesStorageLimitBytes = ydbGroup->GetNamedCounter("name", @@ -856,7 +875,7 @@ private: "table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false); }; - void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard) { + void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard, bool hasColumnShard) { if (hasDatashard && !RowUpdates) { auto datashardGroup = counters->GetSubgroup("type", "DataShard"); auto appGroup = datashardGroup->GetSubgroup("category", "app"); @@ -881,6 +900,16 @@ private: ConsumedCpuHistogram = execGroup->FindHistogram("HIST(ConsumedCPU)"); } + if (hasColumnShard && !ColumnShardScannedBytes_) { + auto columnshardGroup = counters->GetSubgroup("type", "ColumnShard"); + auto appGroup = columnshardGroup->GetSubgroup("category", "app"); + + ColumnShardScannedBytes_ = appGroup->GetCounter("ColumnShard/ScannedBytes"); + ColumnShardScannedRows_ = appGroup->GetCounter("ColumnShard/ScannedRows"); + ColumnShardUpsertBlobsWritten_ = appGroup->GetCounter("ColumnShard/UpsertBlobsWritten"); + ColumnShardUpsertBytesWritten_ = appGroup->GetCounter("ColumnShard/UpsertBytesWritten"); + } + if (hasSchemeshard && !DiskSpaceTablesTotalBytes) { auto schemeshardGroup = counters->GetSubgroup("type", "SchemeShard"); auto appGroup = schemeshardGroup->GetSubgroup("category", "app"); @@ -917,6 +946,13 @@ private: } } + if (ColumnShardScannedBytes_) { + ColumnShardScanRows_->Set(ColumnShardScannedRows_->Val()); + ColumnShardScanBytes_->Set(ColumnShardScannedBytes_->Val()); + ColumnShardBulkUpsertRows_->Set(ColumnShardUpsertBlobsWritten_->Val()); + ColumnShardBulkUpsertBytes_->Set(ColumnShardUpsertBytesWritten_->Val()); + } + if (DiskSpaceTablesTotalBytes) { ResourcesStorageLimitBytes->Set(DiskSpaceSoftQuotaBytes->Val()); ResourcesStorageTableUsedBytes->Set(DiskSpaceTablesTotalBytes->Val()); @@ -1000,7 +1036,8 @@ public: if (YdbCounters) { auto hasDatashard = (bool)GetCounters(TTabletTypes::DataShard); auto hasSchemeshard = (bool)GetCounters(TTabletTypes::SchemeShard); - YdbCounters->Initialize(SolomonCounters, hasDatashard, hasSchemeshard); + auto hasColumnshard = static_cast<bool>(GetCounters(TTabletTypes::ColumnShard)); + YdbCounters->Initialize(SolomonCounters, hasDatashard, hasSchemeshard, hasColumnshard); YdbCounters->Transform(); } } diff --git a/ydb/core/tablet/tablet_counters_aggregator_ut.cpp b/ydb/core/tablet/tablet_counters_aggregator_ut.cpp index 8d9318696c..99b03f2ac2 100644 --- a/ydb/core/tablet/tablet_counters_aggregator_ut.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator_ut.cpp @@ -91,10 +91,11 @@ void TestHeavy(const ui32 v, ui32 numWorkers) { Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { struct TTabletWithHist { - TTabletWithHist(ui64 tabletId) + TTabletWithHist(ui64 tabletId, const TTabletTypes::EType tabletType) : TabletId(tabletId) , TenantPathId(1113, 1001) , CounterEventsInFlight(new TEvTabletCounters::TInFlightCookie) + , TabletType(tabletType) , ExecutorCounters(new TTabletCountersBase) { auto simpleCount = sizeof(SimpleCountersMetaInfo) / sizeof(SimpleCountersMetaInfo[0]); @@ -157,11 +158,11 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { } public: - static ::NMonitoring::TDynamicCounterPtr GetAppCounters(TTestBasicRuntime& runtime) { + static ::NMonitoring::TDynamicCounterPtr GetAppCounters(TTestBasicRuntime& runtime, const TTabletTypes::EType tabletType) { ::NMonitoring::TDynamicCounterPtr counters = runtime.GetAppData(0).Counters; UNIT_ASSERT(counters); - TString tabletTypeStr = TTabletTypes::TypeToStr(TabletType); + TString tabletTypeStr = TTabletTypes::TypeToStr(tabletType); auto dsCounters = counters->GetSubgroup("counters", "tablets")->GetSubgroup("type", tabletTypeStr); return dsCounters->GetSubgroup("category", "app"); } @@ -185,12 +186,12 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { return StringToIndex(name, PercentileCountersMetaInfo); } - static NMonitoring::THistogramPtr GetHistogram(TTestBasicRuntime& runtime, const char* name) { + static NMonitoring::THistogramPtr GetHistogram(TTestBasicRuntime& runtime, const char* name, const TTabletTypes::EType tabletType) { size_t index = PercentileNameToIndex(name); - return GetAppCounters(runtime)->FindHistogram(PercentileCountersMetaInfo[index]); + return GetAppCounters(runtime, tabletType)->FindHistogram(PercentileCountersMetaInfo[index]); } - static std::vector<ui64> GetOldHistogram(TTestBasicRuntime& runtime, const char* name) { + static std::vector<ui64> GetOldHistogram(TTestBasicRuntime& runtime, const char* name, const TTabletTypes::EType tabletType) { size_t index = PercentileNameToIndex(name); auto rangesArray = RangeDefs[index].first; auto rangeCount = RangeDefs[index].second; @@ -200,7 +201,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { ranges.back().RangeName = "inf"; ranges.back().RangeVal = Max<ui64>(); - auto appCounters = GetAppCounters(runtime); + auto appCounters = GetAppCounters(runtime, tabletType); std::vector<ui64> buckets; for (auto i: xrange(ranges.size())) { auto subGroup = appCounters->GetSubgroup("range", ranges[i].RangeName); @@ -217,10 +218,12 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { TTestBasicRuntime& runtime, const char* name, const std::vector<ui64>& goldValuesNew, - const std::vector<ui64>& goldValuesOld) + const std::vector<ui64>& goldValuesOld, + const TTabletTypes::EType tabletType + ) { // new stype histogram - auto histogram = TTabletWithHist::GetHistogram(runtime, name); + auto histogram = TTabletWithHist::GetHistogram(runtime, name, tabletType); UNIT_ASSERT(histogram); auto snapshot = histogram->Snapshot(); UNIT_ASSERT(snapshot); @@ -236,7 +239,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { } // old histogram - auto values = TTabletWithHist::GetOldHistogram(runtime, name); + auto values = TTabletWithHist::GetOldHistogram(runtime, name, tabletType); UNIT_ASSERT_VALUES_EQUAL(values.size(), goldValuesOld.size()); UNIT_ASSERT_VALUES_EQUAL(values, goldValuesOld); } @@ -245,6 +248,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { ui64 TabletId; TPathId TenantPathId; TIntrusivePtr<TEvTabletCounters::TInFlightCookie> CounterEventsInFlight; + const TTabletTypes::EType TabletType; std::unique_ptr<TTabletCountersBase> ExecutorCounters; std::unique_ptr<TTabletCountersBase> ExecutorCountersBaseline; @@ -253,8 +257,6 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { std::unique_ptr<TTabletCountersBase> AppCountersBaseline; public: - static constexpr TTabletTypes::EType TabletType = TTabletTypes::DataShard; - static constexpr TTabletPercentileCounter::TRangeDef RangeDefs1[] = { {0, "0"} }; @@ -305,12 +307,12 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); runtime.DispatchEvents(options); - TTabletWithHist tablet1(1); + TTabletWithHist tablet1(1, TTabletTypes::DataShard); tablet1.SetSimpleCount("CountSingleBucket", 1); tablet1.SendUpdate(runtime, aggregatorId, edge); - TTabletWithHist tablet2(2); + TTabletWithHist tablet2(2, TTabletTypes::DataShard); tablet2.SetSimpleCount("CountSingleBucket", 13); tablet2.SendUpdate(runtime, aggregatorId, edge); @@ -318,7 +320,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(CountSingleBucket)", {0, 2}, - {0, 2} + {0, 2}, + TTabletTypes::DataShard ); // sanity check we didn't mess other histograms @@ -327,21 +330,24 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0} + {0, 0, 0, 0, 0}, + TTabletTypes::DataShard ); TTabletWithHist::CheckHistogram( runtime, "HIST(Count)", {2, 0, 0, 0, 0}, - {2, 0, 0, 0, 0} + {2, 0, 0, 0, 0}, + TTabletTypes::DataShard ); TTabletWithHist::CheckHistogram( runtime, "MyHistSingleBucket", {0, 0}, - {0, 0} + {0, 0}, + TTabletTypes::DataShard ); } @@ -361,7 +367,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); runtime.DispatchEvents(options); - TTabletWithHist tablet1(1); + TTabletWithHist tablet1(1, TTabletTypes::DataShard); tablet1.SetSimpleCount("Count", 1); tablet1.SendUpdate(runtime, aggregatorId, edge); @@ -370,10 +376,11 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(Count)", {0, 1, 0, 0, 0}, - {0, 1, 0, 0, 0} + {0, 1, 0, 0, 0}, + TTabletTypes::DataShard ); - TTabletWithHist tablet2(2); + TTabletWithHist tablet2(2, TTabletTypes::DataShard); tablet2.SetSimpleCount("Count", 13); tablet2.SendUpdate(runtime, aggregatorId, edge); @@ -381,10 +388,11 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(Count)", {0, 1, 1, 0, 0}, - {0, 1, 1, 0, 0} + {0, 1, 1, 0, 0}, + TTabletTypes::DataShard ); - TTabletWithHist tablet3(3); + TTabletWithHist tablet3(3, TTabletTypes::DataShard); tablet3.SetSimpleCount("Count", 1); tablet3.SendUpdate(runtime, aggregatorId, edge); @@ -392,7 +400,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(Count)", {0, 2, 1, 0, 0}, - {0, 2, 1, 0, 0} + {0, 2, 1, 0, 0}, + TTabletTypes::DataShard ); tablet3.SetSimpleCount("Count", 13); @@ -402,7 +411,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(Count)", {0, 1, 2, 0, 0}, - {0, 1, 2, 0, 0} + {0, 1, 2, 0, 0}, + TTabletTypes::DataShard ); tablet3.ForgetTablet(runtime, aggregatorId, edge); @@ -411,7 +421,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(Count)", {0, 1, 1, 0, 0}, - {0, 1, 1, 0, 0} + {0, 1, 1, 0, 0}, + TTabletTypes::DataShard ); // sanity check we didn't mess other histograms @@ -420,21 +431,24 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0} + {0, 0, 0, 0, 0}, + TTabletTypes::DataShard ); TTabletWithHist::CheckHistogram( runtime, "HIST(CountSingleBucket)", {2, 0}, - {2, 0} + {2, 0}, + TTabletTypes::DataShard ); TTabletWithHist::CheckHistogram( runtime, "MyHistSingleBucket", {0, 0}, - {0, 0} + {0, 0}, + TTabletTypes::DataShard ); } @@ -457,7 +471,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); runtime.DispatchEvents(options); - TTabletWithHist tablet1(1); + TTabletWithHist tablet1(1, TTabletTypes::DataShard); tablet1.SetSimpleCount("Count", Max<i64>() - 100UL); tablet1.SendUpdate(runtime, aggregatorId, edge); @@ -466,10 +480,11 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(Count)", {0, 0, 0, 0, 1}, - {0, 0, 0, 0, 1} + {0, 0, 0, 0, 1}, + TTabletTypes::DataShard ); - TTabletWithHist tablet2(2); + TTabletWithHist tablet2(2, TTabletTypes::DataShard); tablet2.SetSimpleCount("Count", 100); tablet2.SendUpdate(runtime, aggregatorId, edge); @@ -477,7 +492,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(Count)", {0, 0, 0, 0, 2}, - {0, 0, 0, 0, 2} + {0, 0, 0, 0, 2}, + TTabletTypes::DataShard ); } @@ -498,7 +514,7 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); runtime.DispatchEvents(options); - TTabletWithHist tablet1(1); + TTabletWithHist tablet1(1, TTabletTypes::DataShard); tablet1.UpdatePercentile("MyHist", 1); tablet1.SendUpdate(runtime, aggregatorId, edge); tablet1.SendUpdate(runtime, aggregatorId, edge); @@ -507,7 +523,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 1, 0, 0, 0}, - {0, 1, 0, 0, 0} + {0, 1, 0, 0, 0}, + TTabletTypes::DataShard ); tablet1.UpdatePercentile("MyHist", 13); @@ -518,7 +535,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 1, 1, 0, 0}, - {0, 1, 1, 0, 0} + {0, 1, 1, 0, 0}, + TTabletTypes::DataShard ); tablet1.UpdatePercentile("MyHist", 1); @@ -531,7 +549,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 3, 1, 0, 1}, - {0, 3, 1, 0, 1} + {0, 3, 1, 0, 1}, + TTabletTypes::DataShard ); } @@ -551,15 +570,15 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); runtime.DispatchEvents(options); - TTabletWithHist tablet1(1); + TTabletWithHist tablet1(1, TTabletTypes::DataShard); tablet1.UpdatePercentile("MyHist", 1); tablet1.SendUpdate(runtime, aggregatorId, edge); - TTabletWithHist tablet2(2); + TTabletWithHist tablet2(2, TTabletTypes::DataShard); tablet2.UpdatePercentile("MyHist", 1); tablet2.SendUpdate(runtime, aggregatorId, edge); - TTabletWithHist tablet3(3); + TTabletWithHist tablet3(3, TTabletTypes::DataShard); tablet3.UpdatePercentile("MyHist", 1); tablet3.UpdatePercentile("MyHist", 13); tablet3.SendUpdate(runtime, aggregatorId, edge); @@ -568,7 +587,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 3, 1, 0, 0}, - {0, 3, 1, 0, 0} + {0, 3, 1, 0, 0}, + TTabletTypes::DataShard ); tablet3.ForgetTablet(runtime, aggregatorId, edge); @@ -577,7 +597,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 2, 0, 0, 0}, - {0, 2, 0, 0, 0} + {0, 2, 0, 0, 0}, + TTabletTypes::DataShard ); // sanity check we didn't mess other histograms @@ -586,21 +607,24 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "HIST(Count)", {2, 0, 0, 0, 0}, - {2, 0, 0, 0, 0} + {2, 0, 0, 0, 0}, + TTabletTypes::DataShard ); TTabletWithHist::CheckHistogram( runtime, "MyHistSingleBucket", {0, 0}, - {0, 0} + {0, 0}, + TTabletTypes::DataShard ); TTabletWithHist::CheckHistogram( runtime, "HIST(CountSingleBucket)", {2, 0}, - {2, 0} + {2, 0}, + TTabletTypes::DataShard ); } @@ -619,15 +643,15 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); runtime.DispatchEvents(options); - TTabletWithHist tablet1(1); + TTabletWithHist tablet1(1, TTabletTypes::DataShard); tablet1.UpdatePercentile("MyHist", 10, Max<i64>() - 100); tablet1.SendUpdate(runtime, aggregatorId, edge); - TTabletWithHist tablet2(2); + TTabletWithHist tablet2(2, TTabletTypes::DataShard); tablet2.UpdatePercentile("MyHist", 10, 25); tablet2.SendUpdate(runtime, aggregatorId, edge); - TTabletWithHist tablet3(3); + TTabletWithHist tablet3(3, TTabletTypes::DataShard); tablet3.UpdatePercentile("MyHist", 10, 5); tablet3.SendUpdate(runtime, aggregatorId, edge); @@ -636,7 +660,8 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 0, v, 0, 0}, - {0, 0, v, 0, 0} + {0, 0, v, 0, 0}, + TTabletTypes::DataShard ); tablet1.ForgetTablet(runtime, aggregatorId, edge); @@ -644,7 +669,36 @@ Y_UNIT_TEST_SUITE(TTabletCountersAggregator) { runtime, "MyHist", {0, 0, 30, 0, 0}, - {0, 0, 30, 0, 0} + {0, 0, 30, 0, 0}, + TTabletTypes::DataShard + ); + } + + Y_UNIT_TEST(ColumnShardCounters) { + TTestBasicRuntime runtime(1); + + runtime.Initialize(TAppPrepare().Unwrap()); + TActorId edge = runtime.AllocateEdgeActor(); + + auto aggregator = CreateTabletCountersAggregator(false); + auto aggregatorId = runtime.Register(aggregator); + runtime.EnableScheduleForActor(aggregatorId); + + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); + runtime.DispatchEvents(options); + + TTabletWithHist tablet1(1, TTabletTypes::ColumnShard); + + tablet1.SetSimpleCount("Count", 1); + tablet1.SendUpdate(runtime, aggregatorId, edge); + + TTabletWithHist::CheckHistogram( + runtime, + "HIST(Count)", + {0, 1, 0, 0, 0}, + {0, 1, 0, 0, 0}, + tablet1.TabletType ); } } diff --git a/ydb/core/tx/columnshard/blob_manager.cpp b/ydb/core/tx/columnshard/blob_manager.cpp index ae25078d77..115881be0f 100644 --- a/ydb/core/tx/columnshard/blob_manager.cpp +++ b/ydb/core/tx/columnshard/blob_manager.cpp @@ -516,7 +516,10 @@ void TBlobManager::DeleteBlob(const TUnifiedBlobId& blobId, IBlobManagerDb& db) LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Blob " << blobId); TLogoBlobID logoBlobId = blobId.GetLogoBlobId(); BlobsToDelete.insert(logoBlobId); - NBlobCache::ForgetBlob(blobId); + + if (!EvictedBlobs.contains(TEvictedBlob{.Blob = blobId})) { + NBlobCache::ForgetBlob(blobId); + } } else { LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Blob " << blobId); BlobsToDeleteDelayed.insert(blobId.GetLogoBlobId()); @@ -602,8 +605,13 @@ bool TBlobManager::UpdateOneToOne(TEvictedBlob&& evict, IBlobManagerDb& db, bool } bool TBlobManager::EraseOneToOne(const TEvictedBlob& evict, IBlobManagerDb& db) { - db.EraseEvictBlob(evict); - return DroppedEvictedBlobs.erase(evict); + Y_VERIFY_DEBUG(!EvictedBlobs.contains(evict)); // erase before drop + + if (DroppedEvictedBlobs.erase(evict)) { + db.EraseEvictBlob(evict); + return true; + } + return false; } bool TBlobManager::LoadOneToOneExport(IBlobManagerDb& db, THashSet<TUnifiedBlobId>& droppedEvicting) { @@ -656,16 +664,26 @@ TEvictedBlob TBlobManager::GetDropped(const TUnifiedBlobId& blobId, TEvictMetada return {}; } -void TBlobManager::GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const { - TString strBlobs; - for (auto& [evict, _] : DroppedEvictedBlobs) { +void TBlobManager::GetCleanupBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const { + TStringBuilder strBlobs; + for (auto& [evict, meta] : DroppedEvictedBlobs) { if (evict.State != EEvictState::EVICTING) { - strBlobs += "'" + evict.Blob.ToStringNew() + "' "; - cleanup.insert(evict); + strBlobs << "'" << evict.Blob.ToStringNew() << "' "; + auto& tierName = meta.GetTierName(); + tierBlobs[tierName].emplace(evict); } } if (!strBlobs.empty()) { - LOG_S_NOTICE("Cleanup evicted blobs " << strBlobs << "at tablet " << TabletInfo->TabletID); + LOG_S_DEBUG("Cleanup evicted blobs " << strBlobs << "at tablet " << TabletInfo->TabletID); + } +} + +void TBlobManager::GetReexportBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const { + for (auto& [evict, meta] : EvictedBlobs) { + if (evict.State == EEvictState::EVICTING) { + auto& tierName = meta.GetTierName(); + tierBlobs[tierName].emplace(evict); + } } } @@ -704,6 +722,7 @@ void TBlobManager::SetBlobInUse(const TUnifiedBlobId& blobId, bool inUse) { return; } + LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Blob " << blobId << " is no longer in use"); BlobsUseCount.erase(useIt); // Check if the blob is marked for delayed deletion @@ -718,7 +737,10 @@ void TBlobManager::SetBlobInUse(const TUnifiedBlobId& blobId, bool inUse) { if (BlobsToDeleteDelayed.erase(logoBlobId)) { LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Delayed Blob " << blobId); BlobsToDelete.insert(logoBlobId); - NBlobCache::ForgetBlob(blobId); + + if (!EvictedBlobs.contains(TEvictedBlob{.Blob = blobId})) { + NBlobCache::ForgetBlob(blobId); + } } } } diff --git a/ydb/core/tx/columnshard/blob_manager.h b/ydb/core/tx/columnshard/blob_manager.h index f9922a928d..0347d36722 100644 --- a/ydb/core/tx/columnshard/blob_manager.h +++ b/ydb/core/tx/columnshard/blob_manager.h @@ -97,7 +97,8 @@ public: virtual bool LoadOneToOneExport(IBlobManagerDb& db, THashSet<TUnifiedBlobId>& droppedEvicting) = 0; virtual TEvictedBlob GetEvicted(const TUnifiedBlobId& blob, TEvictMetadata& meta) = 0; virtual TEvictedBlob GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) = 0; - virtual void GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const = 0; + virtual void GetCleanupBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const = 0; + virtual void GetReexportBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const = 0; virtual bool HasExternBlobs() const = 0; }; @@ -241,7 +242,8 @@ public: bool LoadOneToOneExport(IBlobManagerDb& db, THashSet<TUnifiedBlobId>& droppedEvicting) override; TEvictedBlob GetEvicted(const TUnifiedBlobId& blobId, TEvictMetadata& meta) override; TEvictedBlob GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) override; - void GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const override; + void GetCleanupBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const override; + void GetReexportBlobs(THashMap<TString, THashSet<TEvictedBlob>>& tierBlobs) const override; bool HasExternBlobs() const override { return EvictedBlobs.size() || DroppedEvictedBlobs.size(); diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index 0739271b2c..a9ec91271c 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -121,6 +121,9 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon ScanTxInFlight.erase(txId); SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); } + + // Cleanup just freed dropped exported blobs + CleanForgottenBlobs(ctx); } void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx) { diff --git a/ydb/core/tx/columnshard/columnshard__export.cpp b/ydb/core/tx/columnshard/columnshard__export.cpp index 92ee25ad99..2d6db8dc28 100644 --- a/ydb/core/tx/columnshard/columnshard__export.cpp +++ b/ydb/core/tx/columnshard/columnshard__export.cpp @@ -19,7 +19,7 @@ public: private: TEvPrivate::TEvExport::TPtr Ev; - THashSet<NOlap::TEvictedBlob> BlobsToForget; + THashMap<TString, THashSet<NOlap::TEvictedBlob>> BlobsToForget; }; @@ -47,14 +47,8 @@ bool TTxExportFinish::Execute(TTransactionContext& txc, const TActorContext&) { continue; // not exported } -#if 0 // TODO: SELF_CACHED logic - NOlap::TEvictedBlob evict{ - .State = EEvictState::SELF_CACHED, - .Blob = blobId, - .ExternBlob = externId - }; - Self->BlobManager->UpdateOneToOne(std::move(evict), blobManagerDb, dropped); -#else + // TODO: SELF_CACHED logic + NOlap::TEvictedBlob evict{ .State = EEvictState::EXTERN, .Blob = blobId, @@ -75,13 +69,10 @@ bool TTxExportFinish::Execute(TTransactionContext& txc, const TActorContext&) { evict = Self->BlobManager->GetDropped(blobId, meta); Y_VERIFY(evict.State == EEvictState::EXTERN); - BlobsToForget.emplace(std::move(evict)); + BlobsToForget[meta.GetTierName()].emplace(std::move(evict)); } else { LOG_S_ERROR("Unknown blob exported '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID()); } - - // TODO: delete not present in S3 for sure (avoid race between export and forget) -#endif } } @@ -101,28 +92,20 @@ void TTxExportFinish::Complete(const TActorContext& ctx) { if (!BlobsToForget.empty()) { Self->ForgetBlobs(ctx, BlobsToForget); } - - Y_VERIFY(Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID()); - --Self->ActiveEvictions; } void TColumnShard::Handle(TEvPrivate::TEvExport::TPtr& ev, const TActorContext& ctx) { auto& msg = *ev->Get(); auto status = msg.Status; + Y_VERIFY(status != NKikimrProto::UNKNOWN); - Y_VERIFY(ActiveEvictions, "Unexpected active evictions count at tablet %lu", TabletID()); ui64 exportNo = msg.ExportNo; auto& tierName = msg.TierName; - ui64 pathId = msg.PathId; - if (status == NKikimrProto::UNKNOWN) { - LOG_S_DEBUG("Export (write): id " << exportNo << " tier '" << tierName << "' at tablet " << TabletID()); - ExportBlobs(ctx, exportNo, tierName, pathId, std::move(msg.Blobs)); - } else if (status == NKikimrProto::ERROR && msg.Blobs.empty()) { + if (status == NKikimrProto::ERROR && msg.Blobs.empty()) { LOG_S_WARN("Export (fail): id " << exportNo << " tier '" << tierName << "' error: " << ev->Get()->SerializeErrorsToString() << "' at tablet " << TabletID()); - --ActiveEvictions; } else { // There's no atomicity needed here. Allow partial export if (status == NKikimrProto::ERROR) { diff --git a/ydb/core/tx/columnshard/columnshard__forget.cpp b/ydb/core/tx/columnshard/columnshard__forget.cpp index 02cdbfb25b..12d6a38919 100644 --- a/ydb/core/tx/columnshard/columnshard__forget.cpp +++ b/ydb/core/tx/columnshard/columnshard__forget.cpp @@ -35,15 +35,17 @@ bool TTxForget::Execute(TTransactionContext& txc, const TActorContext&) { TBlobManagerDb blobManagerDb(txc.DB); TString strBlobs; + TString unknownBlobs; for (auto& evict : msg.Evicted) { bool erased = Self->BlobManager->EraseOneToOne(evict, blobManagerDb); if (erased) { strBlobs += "'" + evict.Blob.ToStringNew() + "' "; } else { - LOG_S_ERROR("Forget unknown blob " << evict.Blob << " at tablet " << Self->TabletID()); + unknownBlobs += "'" + evict.Blob.ToStringNew() + "' "; } } - LOG_S_NOTICE("Forget evicted blobs " << strBlobs << "at tablet " << Self->TabletID()); + LOG_S_INFO("TTxForget forget evicted blobs " << strBlobs + << (unknownBlobs.size() ? ", forget unknown blobs " : "") << unknownBlobs << "at tablet " << Self->TabletID()); Self->IncCounter(COUNTER_FORGET_SUCCESS); } else { diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp index 61149c87fa..dce954cb24 100644 --- a/ydb/core/tx/columnshard/columnshard__write_index.cpp +++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp @@ -23,18 +23,11 @@ public: TTxType GetTxType() const override { return TXTYPE_WRITE_INDEX; } private: - struct TPathIdBlobs { - THashMap<TUnifiedBlobId, TString> Blobs; - ui64 PathId; - TPathIdBlobs(const ui64 pathId) - : PathId(pathId) { - - } - }; + using TPathIdBlobs = THashMap<ui64, THashSet<TUnifiedBlobId>>; TEvPrivate::TEvWriteIndex::TPtr Ev; THashMap<TString, TPathIdBlobs> ExportTierBlobs; - THashSet<NOlap::TEvictedBlob> BlobsToForget; + THashMap<TString, THashSet<NOlap::TEvictedBlob>> BlobsToForget; ui64 ExportNo = 0; TBackgroundActivity TriggerActivity = TBackgroundActivity::All(); }; @@ -197,7 +190,7 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) auto evict = Self->BlobManager->GetDropped(blobId, meta); Y_VERIFY(evict.State != EEvictState::UNKNOWN); - BlobsToForget.emplace(std::move(evict)); + BlobsToForget[meta.GetTierName()].emplace(std::move(evict)); if (NOlap::IsDeleted(evict.State)) { LOG_S_DEBUG("Skip delete blob '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID()); @@ -228,21 +221,22 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) } if (blobsToExport.size()) { - size_t numBlobs = blobsToExport.size(); for (auto& [blobId, evFeatures] : blobsToExport) { - auto it = ExportTierBlobs.find(evFeatures.TargetTierName); - if (it == ExportTierBlobs.end()) { - it = ExportTierBlobs.emplace(evFeatures.TargetTierName, TPathIdBlobs(evFeatures.PathId)).first; - } - it->second.Blobs.emplace(blobId, TString()); + ExportTierBlobs[evFeatures.TargetTierName][evFeatures.PathId].emplace(blobId); } blobsToExport.clear(); - ExportNo = Self->LastExportNo + 1; - Self->LastExportNo += ExportTierBlobs.size(); + ui32 numExports = 0; + for (auto& [tierName, pathBlobs] : ExportTierBlobs) { + numExports += pathBlobs.size(); + } + + ExportNo = Self->LastExportNo; + Self->LastExportNo += numExports; - LOG_S_DEBUG("TTxWriteIndex init export " << ExportNo << " of " << numBlobs << " blobs in " - << ExportTierBlobs.size() << " tiers at tablet " << Self->TabletID()); + // Do not start new TTL till we finish current tx. TODO: check if this protection needed + Y_VERIFY(!Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID()); + Self->ActiveEvictions += numExports; NIceDb::TNiceDb db(txc.DB); Schema::SaveSpecialValue(db, Schema::EValueIds::LastExportNumber, Self->LastExportNo); @@ -284,10 +278,6 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) Self->ActiveTtl = false; //TriggerActivity = changes->NeedRepeat ? TBackgroundActivity::Ttl() : TBackgroundActivity::None(); - // Do not start new TTL till we evict current PortionsToEvict. We could evict them twice otherwise - Y_VERIFY(!Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID()); - Self->ActiveEvictions = ExportTierBlobs.size(); - Self->IncCounter(ok ? COUNTER_TTL_SUCCESS : COUNTER_TTL_FAIL); Self->IncCounter(COUNTER_EVICTION_BLOBS_WRITTEN, blobsWritten); Self->IncCounter(COUNTER_EVICTION_BYTES_WRITTEN, bytesWritten); @@ -308,12 +298,16 @@ void TTxWriteIndex::Complete(const TActorContext& ctx) { } for (auto& [tierName, pathBlobs] : ExportTierBlobs) { - Y_VERIFY(ExportNo); - Y_VERIFY(pathBlobs.PathId); - - ctx.Send(Self->SelfId(), - new TEvPrivate::TEvExport(ExportNo, tierName, pathBlobs.PathId, std::move(pathBlobs.Blobs))); - ++ExportNo; + for (auto& [pathId, blobs] : pathBlobs) { + ++ExportNo; + Y_VERIFY(pathId); + auto event = std::make_unique<TEvPrivate::TEvExport>(ExportNo, tierName, pathId, std::move(blobs)); + Self->ExportBlobs(ctx, std::move(event)); + } + Self->ActiveEvictions -= pathBlobs.size(); + } + if (ExportTierBlobs.size()) { + Y_VERIFY(!Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID()); } Self->ForgetBlobs(ctx, BlobsToForget); diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index b9128271bf..1d059d97fc 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -177,7 +177,9 @@ bool TColumnShard::WaitPlanStep(ui64 step) { } void TColumnShard::SendWaitPlanStep(ui64 step) { - Send(MakeMediatorTimecastProxyID(), new TEvMediatorTimecast::TEvWaitPlanStep(TabletID(), step)); + if (MediatorTimeCastRegistered) { + Send(MakeMediatorTimecastProxyID(), new TEvMediatorTimecast::TEvWaitPlanStep(TabletID(), step)); + } } void TColumnShard::RescheduleWaitingReads() { @@ -737,9 +739,7 @@ void TColumnShard::EnqueueBackgroundActivities(bool periodic, TBackgroundActivit ctx.Send(SelfId(), event.release()); } else { // Small cleanup (no index changes) - THashSet<NOlap::TEvictedBlob> blobsToForget; - BlobManager->GetCleanupBlobs(blobsToForget); - ForgetBlobs(ctx, blobsToForget); + CleanForgottenBlobs(ctx); } } @@ -1063,19 +1063,44 @@ void TColumnShard::MapExternBlobs(const TActorContext& /*ctx*/, NOlap::TReadMeta } } -void TColumnShard::ExportBlobs(const TActorContext& ctx, ui64 exportNo, const TString& tierName, ui64 pathId, - TEvPrivate::TEvExport::TBlobDataMap&& blobsInfo) const { - Y_VERIFY(blobsInfo.size()); +void TColumnShard::CleanForgottenBlobs(const TActorContext& ctx) { + THashMap<TString, THashSet<NOlap::TEvictedBlob>> tierBlobsToForget; + BlobManager->GetCleanupBlobs(tierBlobsToForget); + ForgetBlobs(ctx, tierBlobsToForget); +} + +void TColumnShard::Reexport(const TActorContext& ctx) { + THashMap<TString, THashSet<NOlap::TEvictedBlob>> tierBlobsToReexport; + BlobManager->GetReexportBlobs(tierBlobsToReexport); + + ui64 exportNo = LastExportNo; + LastExportNo += tierBlobsToReexport.size(); // TODO: persist it? - TString strBlobs; - for (auto& [blobId, _] : blobsInfo) { - strBlobs += "'" + blobId.ToStringNew() + "' "; + for (auto& [tierName, evictSet] : tierBlobsToReexport) { + ++exportNo; + LOG_S_INFO("Reexport " << exportNo << " at tablet " << TabletID()); + ExportBlobs(ctx, std::make_unique<TEvPrivate::TEvExport>(exportNo, tierName, evictSet)); } - LOG_S_NOTICE("Export blobs " << strBlobs << "at tablet " << TabletID()); +} +void TColumnShard::ExportBlobs(const TActorContext& ctx, std::unique_ptr<TEvPrivate::TEvExport>&& event) { + Y_VERIFY(event); + Y_VERIFY(event->ExportNo); + Y_VERIFY(event->Blobs.size()); + Y_VERIFY(event->SrcToDstBlobs.size() == event->Blobs.size()); + + const auto& tierName = event->TierName; if (auto s3 = GetS3ActorForTier(tierName)) { - auto event = std::make_unique<TEvPrivate::TEvExport>(exportNo, tierName, pathId, s3, std::move(blobsInfo)); - ctx.Register(CreateExportActor(TabletID(), ctx.SelfID, event.release())); + TStringBuilder strBlobs; + for (auto& [blobId, _] : event->Blobs) { + strBlobs << "'" << blobId.ToStringNew() << "' "; + } + + event->DstActor = s3; + LOG_S_NOTICE("Export blobs " << strBlobs << "(tier '" << tierName << "') at tablet " << TabletID()); + ctx.Register(CreateExportActor(TabletID(), SelfId(), event.release())); + } else { + LOG_S_INFO("Cannot export blobs (no S3 actor for tier '" << tierName << "') at tablet " << TabletID()); } } @@ -1088,32 +1113,42 @@ void TColumnShard::ForgetTierBlobs(const TActorContext& ctx, const TString& tier } } -void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashSet<NOlap::TEvictedBlob>& evictedBlobs) { - THashMap<TString, std::vector<NOlap::TEvictedBlob>> tierBlobs; +void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashMap<TString, THashSet<NOlap::TEvictedBlob>>& evictedBlobs) { + TStringBuilder strBlobs; + TStringBuilder strBlobsDelayed; - TString strBlobs; - TString strBlobsDelayed; + for (const auto& [tierName, evictSet] : evictedBlobs) { + std::vector<NOlap::TEvictedBlob> tierBlobs; - for (const auto& ev : evictedBlobs) { - auto& blobId = ev.Blob; - if (BlobManager->BlobInUse(blobId)) { - LOG_S_DEBUG("Blob '" << blobId.ToStringNew() << "' in use at tablet " << TabletID()); - strBlobsDelayed += "'" + blobId.ToStringNew() + "' "; - continue; + for (const auto& ev : evictSet) { + auto& blobId = ev.Blob; + if (BlobManager->BlobInUse(blobId)) { + LOG_S_DEBUG("Blob '" << blobId.ToStringNew() << "' is in use at tablet " << TabletID()); + strBlobsDelayed << "'" << blobId.ToStringNew() << "' "; + continue; + } + + TEvictMetadata meta; + auto evict = BlobManager->GetDropped(blobId, meta); + if (tierName != meta.GetTierName()) { + LOG_S_ERROR("Forget with unexpected tier name '" << meta.GetTierName() << "' at tablet " << TabletID()); + continue; + } + + if (evict.State == EEvictState::UNKNOWN) { + LOG_S_ERROR("Forget unknown blob '" << blobId.ToStringNew() << "' at tablet " << TabletID()); + } else if (NOlap::CouldBeExported(evict.State)) { + Y_VERIFY(evict.Blob == blobId); + strBlobs << "'" << blobId.ToStringNew() << "' "; + tierBlobs.emplace_back(std::move(evict)); + } else { + Y_VERIFY(evict.Blob == blobId); + strBlobsDelayed << "'" << blobId.ToStringNew() << "' "; + } } - TEvictMetadata meta; - auto evict = BlobManager->GetDropped(blobId, meta); - - if (evict.State == EEvictState::UNKNOWN) { - LOG_S_ERROR("Forget unknown blob '" << blobId.ToStringNew() << "' at tablet " << TabletID()); - } else if (NOlap::CouldBeExported(evict.State)) { - Y_VERIFY(evict.Blob == blobId); - strBlobs += "'" + blobId.ToStringNew() + "' "; - tierBlobs[meta.GetTierName()].emplace_back(std::move(evict)); - } else { - Y_VERIFY(evict.Blob == blobId); - strBlobsDelayed += "'" + blobId.ToStringNew() + "' "; + if (tierBlobs.size()) { + ForgetTierBlobs(ctx, tierName, std::move(tierBlobs)); } } @@ -1123,10 +1158,6 @@ void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashSet<NOlap::T if (strBlobsDelayed.size()) { LOG_S_NOTICE("Forget blobs (deleyed) " << strBlobsDelayed << "at tablet " << TabletID()); } - - for (auto& [tierName, blobs] : tierBlobs) { - ForgetTierBlobs(ctx, tierName, std::move(blobs)); - } } bool TColumnShard::GetExportedBlob(const TActorContext& ctx, TActorId dst, ui64 cookie, const TString& tierName, @@ -1169,7 +1200,11 @@ void TColumnShard::Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& void TColumnShard::ActivateTiering(const ui64 pathId, const TString& useTiering) { if (!Tiers) { - Tiers = std::make_shared<TTiersManager>(TabletID(), SelfId()); + Tiers = std::make_shared<TTiersManager>(TabletID(), SelfId(), + [this](const TActorContext& ctx){ + CleanForgottenBlobs(ctx); + Reexport(ctx); + }); Tiers->Start(Tiers); } if (!!Tiers) { diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index 9d015332bd..ff37a95e20 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -466,6 +466,7 @@ private: void EnqueueProgressTx(const TActorContext& ctx); void EnqueueBackgroundActivities(bool periodic = false, TBackgroundActivity activity = TBackgroundActivity::All()); + void CleanForgottenBlobs(const TActorContext& ctx); void UpdateSchemaSeqNo(const TMessageSeqNo& seqNo, NTabletFlatExecutor::TTransactionContext& txc); void ProtectSchemaSeqNo(const NKikimrTxColumnShard::TSchemaSeqNo& seqNoProto, NTabletFlatExecutor::TTransactionContext& txc); @@ -488,10 +489,10 @@ private: NOlap::TIndexInfo ConvertSchema(const NKikimrSchemeOp::TColumnTableSchema& schema); void MapExternBlobs(const TActorContext& ctx, NOlap::TReadMetadata& metadata); TActorId GetS3ActorForTier(const TString& tierId) const; - void ExportBlobs(const TActorContext& ctx, ui64 exportNo, const TString& tierName, ui64 pathId, - TEvPrivate::TEvExport::TBlobDataMap&& blobsInfo) const; + void Reexport(const TActorContext& ctx); + void ExportBlobs(const TActorContext& ctx, std::unique_ptr<TEvPrivate::TEvExport>&& ev); void ForgetTierBlobs(const TActorContext& ctx, const TString& tierName, std::vector<NOlap::TEvictedBlob>&& blobs) const; - void ForgetBlobs(const TActorContext& ctx, const THashSet<NOlap::TEvictedBlob>& blobs); + void ForgetBlobs(const TActorContext& ctx, const THashMap<TString, THashSet<NOlap::TEvictedBlob>>& evictedBlobs); bool GetExportedBlob(const TActorContext& ctx, TActorId dst, ui64 cookie, const TString& tierName, NOlap::TEvictedBlob&& evicted, std::vector<NOlap::TBlobRange>&& ranges); diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index b614ca9d7a..820a43962b 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -129,36 +129,42 @@ struct TEvPrivate { NKikimrProto::EReplyStatus Status = NKikimrProto::UNKNOWN; ui64 ExportNo = 0; TString TierName; - ui64 PathId = 0; TActorId DstActor; TBlobDataMap Blobs; // src: blobId -> data map; dst: exported blobIds set THashMap<TUnifiedBlobId, TUnifiedBlobId> SrcToDstBlobs; TMap<TString, TString> ErrorStrings; - explicit TEvExport(ui64 exportNo, const TString& tierName, ui64 pathId, TBlobDataMap&& tierBlobs) + explicit TEvExport(ui64 exportNo, const TString& tierName, ui64 pathId, + const THashSet<TUnifiedBlobId>& blobIds) : ExportNo(exportNo) , TierName(tierName) - , PathId(pathId) - , Blobs(std::move(tierBlobs)) { Y_VERIFY(ExportNo); Y_VERIFY(!TierName.empty()); - Y_VERIFY(PathId); - Y_VERIFY(!Blobs.empty()); + Y_VERIFY(pathId); + Y_VERIFY(!blobIds.empty()); + + for (auto& blobId : blobIds) { + Blobs.emplace(blobId, TString()); + SrcToDstBlobs[blobId] = blobId.MakeS3BlobId(pathId); + } } - TEvExport(ui64 exportNo, const TString& tierName, ui64 pathId, TActorId dstActor, TBlobDataMap&& blobs) + explicit TEvExport(ui64 exportNo, const TString& tierName, const THashSet<NOlap::TEvictedBlob>& evictSet) : ExportNo(exportNo) , TierName(tierName) - , PathId(pathId) - , DstActor(dstActor) - , Blobs(std::move(blobs)) { Y_VERIFY(ExportNo); Y_VERIFY(!TierName.empty()); - Y_VERIFY(PathId); - Y_VERIFY(DstActor); - Y_VERIFY(!Blobs.empty()); + Y_VERIFY(!evictSet.empty()); + + for (auto& evict : evictSet) { + Y_VERIFY(evict.IsEvicting()); + Y_VERIFY(evict.ExternBlob.IsS3Blob()); + + Blobs.emplace(evict.Blob, TString()); + SrcToDstBlobs[evict.Blob] = evict.ExternBlob; + } } void AddResult(const TUnifiedBlobId& blobId, const TString& key, const bool hasError, const TString& errStr) { diff --git a/ydb/core/tx/columnshard/columnshard_ut_common.h b/ydb/core/tx/columnshard/columnshard_ut_common.h index f7df60895d..cb5bd7dcb1 100644 --- a/ydb/core/tx/columnshard/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/columnshard_ut_common.h @@ -77,6 +77,30 @@ struct TTestSchema { TtlColumn = columnName; return *this; } + + static NKikimrSchemeOp::TS3Settings FakeS3() { + const TString bucket = "tiering-test-01"; + + NKikimrSchemeOp::TS3Settings s3Config; + s3Config.SetScheme(NKikimrSchemeOp::TS3Settings::HTTP); + s3Config.SetVerifySSL(false); + s3Config.SetBucket(bucket); +//#define S3_TEST_USAGE +#ifdef S3_TEST_USAGE + s3Config.SetEndpoint("storage.cloud-preprod.yandex.net"); + s3Config.SetAccessKey("..."); + s3Config.SetSecretKey("..."); + s3Config.SetProxyHost("localhost"); + s3Config.SetProxyPort(8080); + s3Config.SetProxyScheme(NKikimrSchemeOp::TS3Settings::HTTP); +#else + s3Config.SetEndpoint("fake"); +#endif + s3Config.SetRequestTimeoutMs(10000); + s3Config.SetHttpRequestTimeoutMs(10000); + s3Config.SetConnectionTimeoutMs(10000); + return s3Config; + } }; struct TTableSpecials : public TStorageTier { diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 30ab8d9f9b..9b862f1f3e 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -282,6 +282,25 @@ struct TColumnEngineStats { void Clear() { *this = {}; } + + TPortionsStats& StatsByType(TPortionMeta::EProduced produced) { + switch (produced) { + case NOlap::TPortionMeta::INSERTED: + return Inserted; + case NOlap::TPortionMeta::COMPACTED: + return Compacted; + case NOlap::TPortionMeta::SPLIT_COMPACTED: + return SplitCompacted; + case NOlap::TPortionMeta::INACTIVE: + return Inactive; + case NOlap::TPortionMeta::EVICTED: + return Evicted; + case NOlap::TPortionMeta::UNSPECIFIED: + default: + break; + } + Y_VERIFY(false); + } }; class IColumnEngine { diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index de42fde260..c1348c5f6f 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -442,8 +442,9 @@ const TColumnEngineStats& TColumnEngineForLogs::GetTotalStats() { return Counters; } -void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType) { - UpdatePortionStats(Counters, portionInfo, updateType); +void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType, + const TPortionInfo* exPortionInfo) { + UpdatePortionStats(Counters, portionInfo, updateType, exPortionInfo); ui64 granule = portionInfo.Granule(); Y_VERIFY(granule); @@ -455,11 +456,12 @@ void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, E stats = std::make_shared<TColumnEngineStats>(); stats->Tables = 1; } - UpdatePortionStats(*PathStats[pathId], portionInfo, updateType); + UpdatePortionStats(*PathStats[pathId], portionInfo, updateType, exPortionInfo); } void TColumnEngineForLogs::UpdatePortionStats(TColumnEngineStats& engineStats, const TPortionInfo& portionInfo, - EStatsUpdateType updateType) const { + EStatsUpdateType updateType, + const TPortionInfo* exPortionInfo) const { ui64 columnRecords = portionInfo.Records.size(); ui64 metadataBytes = 0; THashSet<TUnifiedBlobId> blobs; @@ -470,72 +472,76 @@ void TColumnEngineForLogs::UpdatePortionStats(TColumnEngineStats& engineStats, c ui32 rows = portionInfo.NumRows(); ui64 rawBytes = portionInfo.RawBytesSum(); + ui64 numBlobs = blobs.size(); ui64 bytes = 0; for (auto& blobId : blobs) { bytes += blobId.BlobSize(); } + blobs = {}; - TColumnEngineStats::TPortionsStats* srcStats = nullptr; - switch (portionInfo.Meta.Produced) { - case NOlap::TPortionMeta::UNSPECIFIED: - Y_VERIFY(false); // unexpected - case NOlap::TPortionMeta::INSERTED: - srcStats = &engineStats.Inserted; - break; - case NOlap::TPortionMeta::COMPACTED: - srcStats = &engineStats.Compacted; - break; - case NOlap::TPortionMeta::SPLIT_COMPACTED: - srcStats = &engineStats.SplitCompacted; - break; - case NOlap::TPortionMeta::INACTIVE: - Y_VERIFY_DEBUG(false); // Stale portions are not set INACTIVE. They have IsActive() property instead. - srcStats = &engineStats.Inactive; - break; - case NOlap::TPortionMeta::EVICTED: - srcStats = &engineStats.Evicted; - break; - } - Y_VERIFY(srcStats); - auto* stats = (updateType == EStatsUpdateType::EVICT) - ? &engineStats.Evicted - : (portionInfo.IsActive() ? srcStats : &engineStats.Inactive); + Y_VERIFY(!exPortionInfo || exPortionInfo->Meta.Produced != TPortionMeta::EProduced::UNSPECIFIED); + Y_VERIFY(portionInfo.Meta.Produced != TPortionMeta::EProduced::UNSPECIFIED); - bool isErase = updateType == EStatsUpdateType::ERASE; - bool isLoad = updateType == EStatsUpdateType::LOAD; - bool isAppended = portionInfo.IsActive() && (updateType != EStatsUpdateType::EVICT); + TColumnEngineStats::TPortionsStats* srcStats = exPortionInfo + ? (exPortionInfo->IsActive() + ? &engineStats.StatsByType(exPortionInfo->Meta.Produced) + : &engineStats.StatsByType(TPortionMeta::EProduced::INACTIVE)) + : &engineStats.StatsByType(portionInfo.Meta.Produced); + TColumnEngineStats::TPortionsStats* stats = portionInfo.IsActive() + ? &engineStats.StatsByType(portionInfo.Meta.Produced) + : &engineStats.StatsByType(TPortionMeta::EProduced::INACTIVE); + + const bool isErase = updateType == EStatsUpdateType::ERASE; + const bool isAdd = updateType == EStatsUpdateType::ADD; if (isErase) { // PortionsToDrop engineStats.ColumnRecords -= columnRecords; engineStats.ColumnMetadataBytes -= metadataBytes; --stats->Portions; - stats->Blobs -= blobs.size(); + stats->Blobs -= numBlobs; stats->Rows -= rows; stats->Bytes -= bytes; stats->RawBytes -= rawBytes; - } else if (isLoad || isAppended) { // AppendedPortions + } else if (isAdd) { // AppendedPortions engineStats.ColumnRecords += columnRecords; engineStats.ColumnMetadataBytes += metadataBytes; ++stats->Portions; - stats->Blobs += blobs.size(); + stats->Blobs += numBlobs; stats->Rows += rows; stats->Bytes += bytes; stats->RawBytes += rawBytes; - } else { // SwitchedPortions || PortionsToEvict + } else if (srcStats != stats || exPortionInfo) { // SwitchedPortions || PortionsToEvict --srcStats->Portions; - srcStats->Blobs -= blobs.size(); - srcStats->Rows -= rows; - srcStats->Bytes -= bytes; - srcStats->RawBytes -= rawBytes; + if (exPortionInfo) { + blobs = {}; + for (auto& rec : exPortionInfo->Records) { + blobs.insert(rec.BlobRange.BlobId); + } + + srcStats->Rows -= exPortionInfo->NumRows(); + srcStats->RawBytes -= exPortionInfo->RawBytesSum(); + srcStats->Blobs -= blobs.size(); + for (auto& blobId : blobs) { + srcStats->Bytes -= blobId.BlobSize(); + } + blobs = {}; + } else { + srcStats->Blobs -= numBlobs; + srcStats->Rows -= rows; + srcStats->Bytes -= bytes; + srcStats->RawBytes -= rawBytes; + } ++stats->Portions; - stats->Blobs += blobs.size(); + stats->Blobs += numBlobs; stats->Rows += rows; stats->Bytes += bytes; stats->RawBytes += rawBytes; } + + Y_VERIFY_DEBUG(stats->Bytes >= 0); } void TColumnEngineForLogs::UpdateDefaultSchema(const TSnapshot& snapshot, TIndexInfo&& info) { @@ -585,7 +591,7 @@ bool TColumnEngineForLogs::Load(IDbWrapper& db, THashSet<TUnifiedBlobId>& lostBl CleanupGranules.insert(granule); } for (auto& [_, portionInfo] : spg->Portions) { - UpdatePortionStats(portionInfo, EStatsUpdateType::LOAD); + UpdatePortionStats(portionInfo, EStatsUpdateType::ADD); } } @@ -1115,7 +1121,10 @@ bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, const TChanges& changes, } } - if (!UpsertPortion(portionInfo, apply)) { + // In case of race with eviction portion could become evicted + const TPortionInfo& oldInfo = Granules[granule]->Portions[portion]; + + if (!UpsertPortion(portionInfo, apply, &oldInfo)) { LOG_S_ERROR("Cannot update portion " << portionInfo << " at tablet " << TabletId); return false; } @@ -1152,11 +1161,7 @@ bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, const TChanges& changes, } Y_VERIFY(portionInfo.TierName != oldInfo.TierName); - if (apply) { - UpdatePortionStats(oldInfo, EStatsUpdateType::EVICT); - } - - if (!UpsertPortion(portionInfo, apply, false)) { + if (!UpsertPortion(portionInfo, apply, &oldInfo)) { LOG_S_ERROR("Cannot evict portion " << portionInfo << " at tablet " << TabletId); return false; } @@ -1170,15 +1175,27 @@ bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, const TChanges& changes, // Move portions in granules (zero-copy switch + append into new granules) - for (auto& [info, granule] : changes.PortionsToMove) { + for (auto& [info, dstGranule] : changes.PortionsToMove) { const auto& portionInfo = info; + + ui64 granule = portionInfo.Granule(); + ui64 portion = portionInfo.Portion(); + if (!Granules.contains(granule) || !Granules[granule]->Portions.contains(portion)) { + LOG_S_ERROR("Cannot move unknown portion " << portionInfo << " at tablet " << TabletId); + return false; + } + + // In case of race with eviction portion could become evicted + const TPortionInfo oldInfo = Granules[granule]->Portions[portion]; + if (!ErasePortion(portionInfo, apply, false)) { LOG_S_ERROR("Cannot erase moved portion " << portionInfo << " at tablet " << TabletId); return false; } + TPortionInfo moved = portionInfo; - moved.SetGranule(granule); - if (!UpsertPortion(moved, apply, false)) { + moved.SetGranule(dstGranule); + if (!UpsertPortion(moved, apply, &oldInfo)) { LOG_S_ERROR("Cannot insert moved portion " << moved << " at tablet " << TabletId); return false; } @@ -1307,7 +1324,7 @@ void TColumnEngineForLogs::EraseGranule(ui64 pathId, ui64 granule, const TMark& PathGranules[pathId].erase(mark); } -bool TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, bool apply, bool updateStats) { +bool TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, bool apply, const TPortionInfo* exInfo) { ui64 granule = portionInfo.Granule(); if (!apply) { @@ -1323,8 +1340,11 @@ bool TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, bool a ui64 portion = portionInfo.Portion(); auto& spg = Granules[granule]; Y_VERIFY(spg); - if (updateStats) { - UpdatePortionStats(portionInfo); + + if (exInfo) { + UpdatePortionStats(portionInfo, EStatsUpdateType::DEFAULT, exInfo); + } else { + UpdatePortionStats(portionInfo, EStatsUpdateType::ADD); } spg->Portions[portion] = portionInfo; return true; // It must return true if (apply == true) diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index e408c9283e..190e626b9c 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -218,8 +218,7 @@ public: enum class EStatsUpdateType { DEFAULT = 0, ERASE, - LOAD, - EVICT + ADD, }; TColumnEngineForLogs(TIndexInfo&& info, ui64 tabletId, const TCompactionLimits& limits = {}); @@ -339,12 +338,14 @@ private: void EraseGranule(ui64 pathId, ui64 granule, const TMark& mark); bool SetGranule(const TGranuleRecord& rec, bool apply); - bool UpsertPortion(const TPortionInfo& portionInfo, bool apply, bool updateStats = true); + bool UpsertPortion(const TPortionInfo& portionInfo, bool apply, const TPortionInfo* exInfo = nullptr); bool ErasePortion(const TPortionInfo& portionInfo, bool apply, bool updateStats = true); void AddColumnRecord(const TColumnRecord& row); - void UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType = EStatsUpdateType::DEFAULT); + void UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType = EStatsUpdateType::DEFAULT, + const TPortionInfo* exPortionInfo = nullptr); void UpdatePortionStats(TColumnEngineStats& engineStats, const TPortionInfo& portionInfo, - EStatsUpdateType updateType) const; + EStatsUpdateType updateType, + const TPortionInfo* exPortionInfo = nullptr) const; bool CanInsert(const TChanges& changes, const TSnapshot& commitSnap) const; TMap<TSnapshot, TVector<ui64>> GetOrderedPortions(ui64 granule, const TSnapshot& snapshot = TSnapshot::Max()) const; diff --git a/ydb/core/tx/columnshard/export_actor.cpp b/ydb/core/tx/columnshard/export_actor.cpp index 8dff625bf1..87fed18189 100644 --- a/ydb/core/tx/columnshard/export_actor.cpp +++ b/ydb/core/tx/columnshard/export_actor.cpp @@ -32,10 +32,8 @@ public: << " at tablet " << TabletId << " (export)"); BlobsToRead.erase(blobId); - Event->Status = event.Status; - if (Event->Status == NKikimrProto::UNKNOWN) { - Event->Status = NKikimrProto::ERROR; - } + Event->AddResult(blobId, blobId.ToStringNew(), true, + TStringBuilder() << "cannot read, status " << NKikimrProto::EReplyStatus_Name(event.Status)); return; } @@ -102,9 +100,15 @@ private: } void SendResultAndDie(const TActorContext& ctx) { - auto s3Actor = Event->DstActor; - Event->DstActor = Parent; - ctx.Send(s3Actor, Event.release()); + if (Event->Status == NKikimrProto::UNKNOWN) { + auto s3Actor = Event->DstActor; + Event->DstActor = Parent; + ctx.Send(s3Actor, Event.release()); + } else { + Y_VERIFY(Event->Status == NKikimrProto::ERROR); + Event->DstActor = Parent; + ctx.Send(Parent, Event.release()); + } Die(ctx); } }; diff --git a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp index a5847ac677..928fd3ec67 100644 --- a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp @@ -1,4 +1,5 @@ #include "columnshard_ut_common.h" +#include <ydb/core/base/tablet.h> #include <ydb/core/wrappers/ut_helpers/s3_mock.h> #include <ydb/core/wrappers/s3_wrapper.h> #include <ydb/services/metadata/service.h> @@ -168,18 +169,23 @@ bool TestCreateTable(const TString& txBody, ui64 planStep = 1000, ui64 txId = 10 return ProposeSchemaTx(runtime, sender, txBody, {++planStep, ++txId}); } -TString GetReadResult(NKikimrTxColumnShard::TEvReadResult& resRead, - std::optional<ui32> batchNo = 0, - std::optional<bool> finished = true) +enum class EExpectedResult { + OK_FINISHED, + OK, + ERROR +}; + +TString GetReadResult(NKikimrTxColumnShard::TEvReadResult& resRead, EExpectedResult expected = EExpectedResult::OK_FINISHED) { UNIT_ASSERT_EQUAL(resRead.GetOrigin(), TTestTxConfig::TxTablet0); UNIT_ASSERT_EQUAL(resRead.GetTxInitiator(), TTestTxConfig::TxTablet1); - UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); - if (batchNo) { - UNIT_ASSERT_VALUES_EQUAL(resRead.GetBatch(), *batchNo); + if (expected == EExpectedResult::ERROR) { + UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::ERROR); + } else { + UNIT_ASSERT_EQUAL(resRead.GetStatus(), NKikimrTxColumnShard::EResultStatus::SUCCESS); } - if (finished) { - UNIT_ASSERT_EQUAL(resRead.GetFinished(), *finished); + if (expected == EExpectedResult::OK_FINISHED) { + UNIT_ASSERT_EQUAL(resRead.GetFinished(), true); } return resRead.GetData(); } @@ -368,46 +374,68 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, class TCountersContainer { private: - ui32 SuccessCounterStart = 0; + struct TCounters { + ui32 Attempt = 0; + ui32 Request = 0; + ui32 Response = 0; + ui32 Success = 0; + + void Clear() { + Attempt = 0; + Request = 0; + Response = 0; + Success = 0; + } + + TString ToString() const { + return TStringBuilder() << Attempt << "/" << Request << "/" << Response << "/" << Success; + } + }; + + ui32 WaitNo = 0; + public: - ui32 UnknownsCounter = 0; - ui32 SuccessCounter = 0; - ui32 ErrorsCounter = 0; - ui32 ResponsesCounter = 0; + TCounters ExportCounters; + TCounters ForgetCounters; ui32 CaptureReadEvents = 0; std::vector<TAutoPtr<IEventHandle>> CapturedReads; + ui32 CaptureEvictResponse = 0; + ui32 CaptureForgetResponse = 0; + std::vector<TAutoPtr<IEventHandle>> CapturedResponses; + bool BlockForgets = false; - TString SerializeToString() const { - TStringBuilder sb; - sb << "EXPORTS INFO: " << SuccessCounter << "/" << ErrorsCounter << "/" << UnknownsCounter << "/" << ResponsesCounter; - return sb; - } - - void WaitEvents(TTestBasicRuntime& runtime, const ui32 attemption, const ui32 expectedDeltaSuccess, const TDuration timeout) { + void WaitEvents(TTestBasicRuntime& runtime, const TDuration& timeout, ui32 waitExports, ui32 waitForgets, + const TString& promo = "START_WAITING") { const TInstant startInstant = TAppData::TimeProvider->Now(); const TInstant deadline = startInstant + timeout; - Cerr << "START_WAITING(" << attemption << "): " << SerializeToString() << Endl; + Cerr << promo << "(" << WaitNo << "): " + << "E" << ExportCounters.ToString() << " F" << ForgetCounters.ToString() << Endl; while (TAppData::TimeProvider->Now() < deadline) { - Cerr << "IN_WAITING(" << attemption << "):" << SerializeToString() << Endl; + Cerr << "IN_WAITING(" << WaitNo << "): " + << "E" << ExportCounters.ToString() << " F" << ForgetCounters.ToString() << Endl; runtime.SimulateSleep(TDuration::Seconds(1)); - UNIT_ASSERT(ErrorsCounter == 0); - if (expectedDeltaSuccess) { - if (SuccessCounter >= SuccessCounterStart + expectedDeltaSuccess) { - break; - } - } else { - if (SuccessCounter > SuccessCounterStart) { - break; - } + + if (!waitExports && ExportCounters.Success + || !waitForgets && ForgetCounters.Success + || !waitForgets && ExportCounters.Success >= waitExports + || !waitExports && ForgetCounters.Success >= waitForgets + || waitExports && waitForgets + && ExportCounters.Success >= waitExports && ForgetCounters.Success >= waitForgets) { + break; } } - if (expectedDeltaSuccess) { - UNIT_ASSERT(SuccessCounter >= SuccessCounterStart + expectedDeltaSuccess); - } else { - UNIT_ASSERT_VALUES_EQUAL(SuccessCounter, SuccessCounterStart); - } - Cerr << "FINISH_WAITING(" << attemption << "): " << SerializeToString() << Endl; - SuccessCounterStart = SuccessCounter; + Cerr << "FINISH_WAITING(" << WaitNo << "): " + << "E" << ExportCounters.ToString() << " F" << ForgetCounters.ToString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(ExportCounters.Success, waitExports); + UNIT_ASSERT_VALUES_EQUAL(ForgetCounters.Success, waitForgets); + ExportCounters.Clear(); + ForgetCounters.Clear(); + ++WaitNo; + } + + void WaitMoreEvents(TTestBasicRuntime& runtime, const TDuration& timeout, ui32 waitExports, ui32 waitForgets) { + --WaitNo; + WaitEvents(runtime, timeout, waitExports, waitForgets, "CONTINUE_WAITING"); } void WaitReadsCaptured(TTestBasicRuntime& runtime) const { @@ -429,6 +457,18 @@ public: } CapturedReads.clear(); } + + void ResendCapturedResponses(TTestBasicRuntime& runtime) { + for (auto& cev : CapturedResponses) { + Cerr << "RESEND S3_RESPONSE" << Endl; + runtime.Send(cev.Release()); + } + CapturedResponses.clear(); + } + + void BlockForgetsTillReboot() { + BlockForgets = true; + } }; class TEventsCounter { @@ -446,13 +486,52 @@ public: bool operator()(TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) { TStringBuilder ss; - if (auto* msg = TryGetPrivateEvent<NColumnShard::TEvPrivate::TEvExport>(ev)) { - ss << "EXPORT(" << ++Counters->SuccessCounter << "): " << NKikimrProto::EReplyStatus_Name(msg->Status); + if (ev->GetTypeRewrite() == TEvTablet::EvBoot) { + Counters->BlockForgets = false; + return false; + } else if (auto* msg = TryGetPrivateEvent<NColumnShard::TEvPrivate::TEvExport>(ev)) { + if (msg->Status == NKikimrProto::OK) { + ss << "EXPORT(done " << ++Counters->ExportCounters.Success << "): "; + } else { + ss << "EXPORT(attempt " << ++Counters->ExportCounters.Attempt << "): " + << NKikimrProto::EReplyStatus_Name(msg->Status); + } + } else if (auto* msg = TryGetPrivateEvent<NColumnShard::TEvPrivate::TEvForget>(ev)) { + if (Counters->BlockForgets) { + ss << "FORGET(ignore " << NKikimrProto::EReplyStatus_Name(msg->Status) << "): "; + ss << " " << ev->Sender << "->" << ev->Recipient; + Cerr << ss << Endl; + return true; + } + + if (msg->Status == NKikimrProto::OK) { + ss << "FORGET(done " << ++Counters->ForgetCounters.Success << "): "; + } else { + ss << "FORGET(attempt " << ++Counters->ForgetCounters.Attempt << "): " + << NKikimrProto::EReplyStatus_Name(msg->Status); + } + } else if (auto* msg = TryGetPrivateEvent<NWrappers::NExternalStorage::TEvPutObjectRequest>(ev)) { + ss << "S3_REQ(put " << ++Counters->ExportCounters.Request << "):"; } else if (auto* msg = TryGetPrivateEvent<NWrappers::NExternalStorage::TEvPutObjectResponse>(ev)) { - ss << "S3_RESPONSE(put " << ++Counters->ResponsesCounter << "):"; + if (Counters->CaptureEvictResponse) { + Cerr << "CAPTURE S3_RESPONSE(put)" << Endl; + --Counters->CaptureEvictResponse; + Counters->CapturedResponses.push_back(ev.Release()); + return true; + } + + ss << "S3_RESPONSE(put " << ++Counters->ExportCounters.Response << "):"; + } else if (auto* msg = TryGetPrivateEvent<NWrappers::NExternalStorage::TEvDeleteObjectRequest>(ev)) { + ss << "S3_REQ(delete " << ++Counters->ForgetCounters.Request << "):"; } else if (auto* msg = TryGetPrivateEvent<NWrappers::NExternalStorage::TEvDeleteObjectResponse>(ev)) { - ss << "(" << ++Counters->SuccessCounter << "): DELETE SUCCESS"; - ss << "S3_RESPONSE(delete " << ++Counters->ResponsesCounter << "):"; + if (Counters->CaptureForgetResponse) { + Cerr << "CAPTURE S3_RESPONSE(delete)" << Endl; + --Counters->CaptureForgetResponse; + Counters->CapturedResponses.push_back(ev.Release()); + return true; + } + + ss << "S3_RESPONSE(delete " << ++Counters->ForgetCounters.Response << "):"; } else if (auto* msg = TryGetPrivateEvent<NBlobCache::TEvBlobCache::TEvReadBlobRange>(ev)) { if (Counters->CaptureReadEvents) { Cerr << "CAPTURE " << msg->BlobRange.ToString() << " " @@ -463,23 +542,28 @@ public: } else { return false; } + } else if (auto* msg = TryGetPrivateEvent<TEvColumnShard::TEvReadResult>(ev)) { + ss << "Got TEvReadResult " << NKikimrTxColumnShard::EResultStatus_Name(Proto(msg).GetStatus()) << Endl; } else { return false; } ss << " " << ev->Sender << "->" << ev->Recipient; Cerr << ss << Endl; return false; - }; + } }; std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TString>& blobs, const std::vector<TTestSchema::TTableSpecials>& specs, - const ui32 initialEviction) + const THashSet<ui32>& exportSteps, + const THashSet<ui32>& forgetSteps, + std::optional<ui32> eventLoss = {}) { TTestBasicRuntime runtime; TTester::Setup(runtime); - runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_INFO); + runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD_SCAN, NActors::NLog::PRI_INFO); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, @@ -509,6 +593,7 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt ui64 tableId = 1; ui64 planStep = 1000000000; // greater then delays ui64 txId = 100; + const TDuration exportTimeout = TDuration::Seconds(40); UNIT_ASSERT(specs.size() > 0); { @@ -532,18 +617,37 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); } + runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); + TAutoPtr<IEventHandle> handle; std::vector<std::pair<ui32, ui64>> specRowsBytes; specRowsBytes.reserve(specs.size()); + ui32 deplayedExports = 0; + ui32 deplayedForgets = 0; TCountersContainer counter; runtime.SetEventFilter(TEventsCounter(counter, runtime)); for (ui32 i = 0; i < specs.size(); ++i) { + ui32 numExports = exportSteps.contains(i) ? 1 : 0; + ui32 numForgets = forgetSteps.contains(i) ? 1 : 0; bool hasColdEviction = false; - for (auto&& i : specs[i].Tiers) { - if (!!i.S3) { + bool misconfig = false; + auto expectedReadResult = EExpectedResult::OK; + for (auto&& spec : specs[i].Tiers) { + if (!!spec.S3) { hasColdEviction = true; + if (spec.S3->GetEndpoint() != "fake") { + misconfig = true; + // misconfig in export => OK, misconfig after export => ERROR + if (i > 1) { + expectedReadResult = EExpectedResult::ERROR; + } + deplayedExports += numExports; + deplayedForgets += numForgets; + numExports = 0; + numForgets = 0; + } break; } } @@ -557,12 +661,37 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt PlanSchemaTx(runtime, sender, { planStep, txId }); } } - if (specs[i].HasTiers()) { + if (specs[i].HasTiers() || reboots) { ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i])); } + if (!misconfig && (deplayedExports || deplayedForgets)) { + UNIT_ASSERT(hasColdEviction); + // continue waiting: finish previous step + counter.WaitMoreEvents(runtime, exportTimeout, deplayedExports, deplayedForgets); + deplayedExports = 0; + deplayedForgets = 0; + } + + if (eventLoss) { + if (*eventLoss == i) { + if (numExports) { + counter.CaptureEvictResponse = 1; + deplayedExports += numExports; + numExports = 0; + } else if (numForgets) { + counter.CaptureForgetResponse = reboots ? 2 : 1; + deplayedForgets += numForgets; + numForgets = 0; + } + } else { + // Check there would be no troubles with delayed responses + counter.ResendCapturedResponses(runtime); + } + } + // Read crossed with eviction (start) - { + if (!misconfig) { auto read = std::make_unique<TEvColumnShard::TEvRead>(sender, metaShard, planStep-1, Max<ui64>(), tableId); Proto(read.get()).AddColumnNames(specs[i].TtlColumn); @@ -575,24 +704,26 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt TriggerTTL(runtime, sender, { ++planStep, ++txId }, {}, 0, specs[i].TtlColumn); - Cerr << (hasColdEviction ? "Cold" : "Hot") - << " tiering, spec " << i << ", num tiers: " << specs[i].Tiers.size() << "\n"; + Cerr << "-- " << (hasColdEviction ? "COLD" : "HOT") + << " TIERING(" << i << ") num tiers: " << specs[i].Tiers.size() + << ", exports: " << numExports << ", forgets: " << numForgets + << ", delayed exports: " << deplayedExports << ", delayed forgets: " << deplayedForgets << Endl; - if (hasColdEviction) { - if (i > initialEviction) { - counter.WaitEvents(runtime, i, 1, TDuration::Seconds(40)); - } else { - counter.WaitEvents(runtime, i, 0, TDuration::Seconds(20)); - } + if (numExports) { + UNIT_ASSERT(hasColdEviction); + counter.WaitEvents(runtime, exportTimeout, numExports, 0); } else { - counter.WaitEvents(runtime, i, 0, TDuration::Seconds(4)); + TDuration timeout = hasColdEviction ? TDuration::Seconds(10) : TDuration::Seconds(4); + counter.WaitEvents(runtime, timeout, 0, 0); } - if (reboots) { - ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i])); + + if (numForgets && reboots) { + // Do not finish forget before reboot. Check forget would happen after it. + counter.BlockForgetsTillReboot(); } // Read crossed with eviction (finish) - { + if (!misconfig) { counter.ResendCapturedReads(runtime); ui32 numBatches = 0; THashSet<ui32> batchNumbers; @@ -601,7 +732,8 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt UNIT_ASSERT(event); auto& resRead = Proto(event); - TString data = GetReadResult(resRead, {}, {}); + TString data = GetReadResult(resRead, EExpectedResult::OK); + batchNumbers.insert(resRead.GetBatch()); if (resRead.GetFinished()) { numBatches = resRead.GetBatch() + 1; @@ -609,6 +741,16 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt } } + if (numForgets) { + UNIT_ASSERT(hasColdEviction); + if (reboots) { + Cerr << "INTERMEDIATE REBOOT(" << i << ")" << Endl; + RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); + ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i])); + } + counter.WaitMoreEvents(runtime, exportTimeout, 0, numForgets); + } + // Read data after eviction auto read = std::make_unique<TEvColumnShard::TEvRead>(sender, metaShard, planStep-1, Max<ui64>(), tableId); @@ -616,12 +758,17 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); specRowsBytes.emplace_back(0, 0); - while (true) { + ui32 numBatches = 0; + ui32 numExpected = (expectedReadResult == EExpectedResult::ERROR) ? 1 : 100; + for (; numBatches < numExpected; ++numBatches) { auto event = runtime.GrabEdgeEvent<TEvColumnShard::TEvReadResult>(handle); UNIT_ASSERT(event); auto& resRead = Proto(event); - TString data = GetReadResult(resRead, {}, {}); + TString data = GetReadResult(resRead, expectedReadResult); + if (expectedReadResult == EExpectedResult::ERROR) { + break; + } if (!data.size()) { break; } @@ -642,6 +789,7 @@ std::vector<std::pair<ui32, ui64>> TestTiers(bool reboots, const std::vector<TSt } if (reboots) { + Cerr << "REBOOT(" << i << ")" << Endl; RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); } } @@ -747,13 +895,32 @@ std::vector<std::pair<ui32, ui64>> TestTiersAndTtl(const TTestSchema::TTableSpec size_t initialEviction = alters.size(); TEvictionChanges changes; + THashSet<ui32> exports; + THashSet<ui32> forgets; if (testTtl) { changes.AddTtlAlters(spec, {allowBoth, allowOne, allowNone}, alters); } else { changes.AddTierAlters(spec, {allowBoth, allowOne, allowNone}, alters); + + for (ui32 i = initialEviction + 1; i < alters.size() - 1; ++i) { + for (auto& tier : alters[i].Tiers) { + if (tier.S3) { + exports.emplace(i); + break; + } + } + } + for (ui32 i = initialEviction + 2; i < alters.size(); ++i) { + for (auto& tier : alters[i].Tiers) { + if (tier.S3) { + forgets.emplace(i); + break; + } + } + } } - auto rowsBytes = TestTiers(reboots, blobs, alters, initialEviction); + auto rowsBytes = TestTiers(reboots, blobs, alters, exports, forgets); for (auto&& i : rowsBytes) { Cerr << i.first << "/" << i.second << Endl; } @@ -766,12 +933,50 @@ std::vector<std::pair<ui32, ui64>> TestTiersAndTtl(const TTestSchema::TTableSpec return rowsBytes; } -void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial = EInitialEviction::None) { +std::vector<std::pair<ui32, ui64>> TestOneTierExport(const TTestSchema::TTableSpecials& spec, bool reboots, + std::optional<ui32> misconfig, std::optional<ui32> loss) { + const std::vector<ui64> ts = { 1600000000, 1620000000 }; + + ui32 overlapSize = 0; + std::vector<TString> blobs = MakeData(ts, PORTION_ROWS, overlapSize, spec.TtlColumn); + + TInstant now = TAppData::TimeProvider->Now(); + TDuration allowBoth = TDuration::Seconds(now.Seconds() - ts[0] + 600); + TDuration allowOne = TDuration::Seconds(now.Seconds() - ts[1] + 600); + TDuration allowNone = TDuration::Seconds(now.Seconds() - ts[1] - 600); + + std::vector<TTestSchema::TTableSpecials> alters = { TTestSchema::TTableSpecials() }; + + TEvictionChanges changes; + changes.AddTierAlters(spec, {allowBoth, allowOne, allowNone}, alters); + UNIT_ASSERT_VALUES_EQUAL(alters.size(), 4); + + if (misconfig) { + // Add error in config => eviction + not finished export + UNIT_ASSERT_VALUES_EQUAL(alters[*misconfig].Tiers.size(), 1); + UNIT_ASSERT(alters[*misconfig].Tiers[0].S3); + alters[*misconfig].Tiers[0].S3->SetEndpoint("nowhere"); // clear special "fake" endpoint + } + + auto rowsBytes = TestTiers(reboots, blobs, alters, {1}, {2, 3}, loss); + for (auto&& i : rowsBytes) { + Cerr << i.first << "/" << i.second << Endl; + } + + UNIT_ASSERT_EQUAL(rowsBytes.size(), alters.size()); + if (!misconfig) { + changes.Assert(spec, rowsBytes, 1); + } + return rowsBytes; +} + +void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial = EInitialEviction::None, + bool revCompaction = false) { TTestSchema::TTableSpecials spec; spec.SetTtlColumn("timestamp"); spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier0").SetTtlColumn("timestamp")); spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier1").SetTtlColumn("timestamp")); - spec.Tiers.back().SetCodec("zstd"); + spec.Tiers[(revCompaction ? 0 : 1)].SetCodec("zstd"); auto rowsBytes = TestTiersAndTtl(spec, reboot, initial, changeTtl); if (changeTtl) { @@ -792,12 +997,16 @@ void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial UNIT_ASSERT_VALUES_EQUAL(rowsBytes[3].first, PORTION_ROWS); UNIT_ASSERT_VALUES_EQUAL(rowsBytes[4].first, 0); - UNIT_ASSERT(rowsBytes[1].second > rowsBytes[2].second); // compression works + // compression works + if (revCompaction) { + UNIT_ASSERT(rowsBytes[1].second < rowsBytes[2].second); + } else { + UNIT_ASSERT(rowsBytes[1].second > rowsBytes[2].second); + } } } void TestHotAndColdTiers(bool reboot, const EInitialEviction initial) { - const TString bucket = "tiering-test-01"; TPortManager portManager; const ui16 port = portManager.GetPort(); @@ -808,32 +1017,26 @@ void TestHotAndColdTiers(bool reboot, const EInitialEviction initial) { spec.SetTtlColumn("timestamp"); spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier0").SetTtlColumn("timestamp")); spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier1").SetTtlColumn("timestamp")); - spec.Tiers.back().S3 = NKikimrSchemeOp::TS3Settings(); - auto& s3Config = *spec.Tiers.back().S3; - { - - s3Config.SetScheme(NKikimrSchemeOp::TS3Settings::HTTP); - s3Config.SetVerifySSL(false); - s3Config.SetBucket(bucket); -//#define S3_TEST_USAGE -#ifdef S3_TEST_USAGE - s3Config.SetEndpoint("storage.cloud-preprod.yandex.net"); - s3Config.SetAccessKey("..."); - s3Config.SetSecretKey("..."); - s3Config.SetProxyHost("localhost"); - s3Config.SetProxyPort(8080); - s3Config.SetProxyScheme(NKikimrSchemeOp::TS3Settings::HTTP); -#else - s3Config.SetEndpoint("fake"); -#endif - s3Config.SetRequestTimeoutMs(10000); - s3Config.SetHttpRequestTimeoutMs(10000); - s3Config.SetConnectionTimeoutMs(10000); - } + spec.Tiers.back().S3 = TTestSchema::TStorageTier::FakeS3(); TestTiersAndTtl(spec, reboot, initial); } +void TestExport(bool reboot, std::optional<ui32> misconfig = {}, std::optional<ui32> loss = {}) { + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + TTestSchema::TTableSpecials spec; + spec.SetTtlColumn("timestamp"); + spec.Tiers.emplace_back(TTestSchema::TStorageTier("cold").SetTtlColumn("timestamp")); + spec.Tiers.back().S3 = TTestSchema::TStorageTier::FakeS3(); + + TestOneTierExport(spec, reboot, misconfig, loss); +} + void TestDrop(bool reboots) { TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -954,6 +1157,79 @@ void TestDropWriteRace() { PlanCommit(runtime, sender, ++planStep, commitTxId); } +void TestCompaction(std::optional<ui32> numWrites = {}) { + TTestBasicRuntime runtime; + TTester::Setup(runtime); + + TActorId sender = runtime.AllocateEdgeActor(); + CreateTestBootstrapper(runtime, + CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), + &CreateColumnShard); + + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); + runtime.DispatchEvents(options); + + // Create table + + ui64 metaShard = TTestTxConfig::TxTablet1; + ui64 writeId = 0; + ui64 tableId = 1; + ui64 planStep = 100; + ui64 txId = 100; + + bool ok = ProposeSchemaTx(runtime, sender, TTestSchema::CreateTableTxBody(tableId, testYdbSchema, testYdbPk), + {++planStep, ++txId}); + UNIT_ASSERT(ok); + PlanSchemaTx(runtime, sender, {planStep, txId}); + + // Set tiering + + ui64 ts = 1620000000; + TInstant now = TAppData::TimeProvider->Now(); + TDuration allow = TDuration::Seconds(now.Seconds() - ts + 3600); + TDuration disallow = TDuration::Seconds(now.Seconds() - ts - 3600); + + TTestSchema::TTableSpecials spec; + spec.SetTtlColumn("timestamp"); + spec.Tiers.emplace_back(TTestSchema::TStorageTier("hot").SetTtlColumn("timestamp")); + spec.Tiers.back().EvictAfter = disallow; + spec.Tiers.emplace_back(TTestSchema::TStorageTier("cold").SetTtlColumn("timestamp")); + spec.Tiers.back().EvictAfter = allow; + spec.Tiers.back().S3 = TTestSchema::TStorageTier::FakeS3(); + + ok = ProposeSchemaTx(runtime, sender, TTestSchema::AlterTableTxBody(tableId, 1, spec), + {++planStep, ++txId}); + UNIT_ASSERT(ok); + PlanSchemaTx(runtime, sender, {planStep, txId}); + + ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec)); + + // Writes + + std::vector<TString> blobs = MakeData({ts, ts}, PORTION_ROWS, 0, spec.TtlColumn); + const TString& triggerData = blobs[0]; + //UNIT_ASSERT(triggerData.size() > NColumnShard::TLimits::MIN_BYTES_TO_INSERT); + //UNIT_ASSERT(triggerData.size() < NColumnShard::TLimits::GetMaxBlobSize()); + + if (!numWrites) { + numWrites = 4 * NOlap::TCompactionLimits().GranuleExpectedSize / triggerData.size(); + } + + ++planStep; + ++txId; + for (ui32 i = 0; i < *numWrites; ++i, ++writeId, ++planStep, ++txId) { + UNIT_ASSERT(WriteData(runtime, sender, metaShard, writeId, tableId, triggerData)); + + ProposeCommit(runtime, sender, metaShard, txId, {writeId}); + PlanCommit(runtime, sender, planStep, txId); + + if (i % 2 == 0) { + TriggerTTL(runtime, sender, {++planStep, ++txId}, {}, 0, spec.TtlColumn); + } + } +} + } namespace NColumnShard { @@ -1117,6 +1393,14 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { TestTwoHotTiers(true, false); } + Y_UNIT_TEST(HotTiersRevCompression) { + TestTwoHotTiers(false, false, EInitialEviction::None, true); + } + + Y_UNIT_TEST(RebootHotTiersRevCompression) { + TestTwoHotTiers(true, false, EInitialEviction::None, true); + } + Y_UNIT_TEST(HotTiersTtl) { NColumnShard::gAllowLogBatchingDefaultValue = false; TestTwoHotTiers(false, true); @@ -1162,7 +1446,55 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { TestHotAndColdTiers(true, EInitialEviction::Ttl); } - // TODO: EnableTtlAfterColdTiers + Y_UNIT_TEST(OneColdTier) { + TestExport(false); + } + + Y_UNIT_TEST(RebootOneColdTier) { + TestExport(true); + } + + Y_UNIT_TEST(ExportAfterFail) { + TestExport(false, 1); + } + + Y_UNIT_TEST(RebootExportAfterFail) { + TestExport(true, 1); + } + + Y_UNIT_TEST(ForgetAfterFail) { + TestExport(false, 2); + } + + Y_UNIT_TEST(RebootForgetAfterFail) { + TestExport(true, 2); + } + + Y_UNIT_TEST(ExportWithLostAnswer) { + TestExport(false, {}, 1); + } + + Y_UNIT_TEST(RebootExportWithLostAnswer) { + TestExport(true, {}, 1); + } + + Y_UNIT_TEST(ForgetWithLostAnswer) { + TestExport(false, {}, 2); + } + + Y_UNIT_TEST(RebootForgettWithLostAnswer) { + TestExport(true, {}, 2); + } + + // TODO: LastTierBorderIsTtl = false + + // TODO: DisableTierAfterExport + // TODO: ReenableTierAfterExport + // TODO: AlterTierBorderAfterExport + + Y_UNIT_TEST(ColdCompactionSmoke) { + TestCompaction(); + } Y_UNIT_TEST(Drop) { TestDrop(false); diff --git a/ydb/core/tx/coordinator/coordinator__init.cpp b/ydb/core/tx/coordinator/coordinator__init.cpp index beb9975b42..0a06269b58 100644 --- a/ydb/core/tx/coordinator/coordinator__init.cpp +++ b/ydb/core/tx/coordinator/coordinator__init.cpp @@ -13,6 +13,8 @@ struct TTxCoordinator::TTxInit : public TTransactionBase<TTxCoordinator> { ui64 PlanResolution; ui64 LastPlanned = 0; ui64 LastAcquired = 0; + TActorId LastBlockedActor; + ui64 LastBlockedStep = 0; TTxInit(TSelf *coordinator) : TBase(coordinator) @@ -27,6 +29,7 @@ struct TTxCoordinator::TTxInit : public TTransactionBase<TTxCoordinator> { ready &= LoadDomainConfiguration(db); ready &= LoadLastPlanned(db); ready &= LoadLastAcquired(db); + ready &= LoadLastBlocked(db); return ready; } @@ -63,26 +66,29 @@ struct TTxCoordinator::TTxInit : public TTransactionBase<TTxCoordinator> { } bool LoadLastPlanned(NIceDb::TNiceDb &db) { - auto rowset = db.Table<Schema::State>().Key(Schema::State::KeyLastPlanned).Select<Schema::State::StateValue>(); - - if (!rowset.IsReady()) - return false; - - if (rowset.IsValid()) - LastPlanned = rowset.GetValue<Schema::State::StateValue>(); - - return true; + return Schema::LoadState(db, Schema::State::KeyLastPlanned, LastPlanned); } bool LoadLastAcquired(NIceDb::TNiceDb &db) { - auto rowset = db.Table<Schema::State>().Key(Schema::State::AcquireReadStepLast).Select<Schema::State::StateValue>(); + return Schema::LoadState(db, Schema::State::AcquireReadStepLast, LastAcquired); + } - if (!rowset.IsReady()) - return false; + bool LoadLastBlocked(NIceDb::TNiceDb &db) { + ui64 x1 = 0; + ui64 x2 = 0; + ui64 step = 0; - if (rowset.IsValid()) - LastAcquired = rowset.GetValue<Schema::State::StateValue>(); + bool ready = true; + ready &= Schema::LoadState(db, Schema::State::LastBlockedActorX1, x1); + ready &= Schema::LoadState(db, Schema::State::LastBlockedActorX2, x2); + ready &= Schema::LoadState(db, Schema::State::LastBlockedStep, step); + if (!ready) { + return false; + } + + LastBlockedActor = TActorId(x1, x2); + LastBlockedStep = step; return true; } @@ -101,6 +107,12 @@ struct TTxCoordinator::TTxInit : public TTransactionBase<TTxCoordinator> { } void Complete(const TActorContext &ctx) override { + // Assume worst case, everything up to LastBlockedStep was planned + LastPlanned = Max(LastPlanned, LastBlockedStep); + + // Assume worst case, last planned step was also acquired + LastAcquired = Max(LastAcquired, LastPlanned); + Self->VolatileState.LastPlanned = LastPlanned; Self->VolatileState.LastSentStep = LastPlanned; Self->VolatileState.LastAcquired = LastAcquired; diff --git a/ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp b/ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp index 931a7c993e..02980bf2bc 100644 --- a/ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp +++ b/ydb/core/tx/coordinator/coordinator__schema_upgrade.cpp @@ -18,19 +18,17 @@ struct TTxCoordinator::TTxUpgrade : public TTransactionBase<TTxCoordinator> { bool Execute(TTransactionContext &txc, const TActorContext& ctx) override { NIceDb::TNiceDb db(txc.DB); - - auto row = db.Table<Schema::State>().Key(Schema::State::DatabaseVersion).Select<Schema::State::StateValue>(); - if (!row.IsReady()) { + std::optional<ui64> databaseVersion; + if (!Schema::LoadState(db, Schema::State::DatabaseVersion, databaseVersion)) { return false; } - if (!row.IsValid()) { - db.Table<Schema::State>().Key(Schema::State::DatabaseVersion).Update(NIceDb::TUpdate<Schema::State::StateValue>(Schema::CurrentVersion)); + if (!databaseVersion) { + Schema::SaveState(db, Schema::State::DatabaseVersion, Schema::CurrentVersion); return true; } - Schema::State::StateValue::Type databaseVersion = row.GetValue<Schema::State::StateValue>(); - if (Schema::CurrentVersion == databaseVersion) { + if (*databaseVersion == Schema::CurrentVersion) { return true; } @@ -38,7 +36,7 @@ struct TTxCoordinator::TTxUpgrade : public TTransactionBase<TTxCoordinator> { FLOG_LOG_S(ctx, NActors::NLog::PRI_CRIT, NKikimrServices::TX_COORDINATOR, "tablet# " << Self->Tablet() << " SEND to self TEvents::TEvPoisonPill" << - " databaseVersion# " << databaseVersion << + " databaseVersion# " << *databaseVersion << " CurrentDataBaseVersion# " << Schema::CurrentVersion << " reason# no realisation for upgrade scheme present"); return true; diff --git a/ydb/core/tx/coordinator/coordinator_impl.cpp b/ydb/core/tx/coordinator/coordinator_impl.cpp index 64e01eeaf4..b48ef4615e 100644 --- a/ydb/core/tx/coordinator/coordinator_impl.cpp +++ b/ydb/core/tx/coordinator/coordinator_impl.cpp @@ -45,8 +45,6 @@ static TAutoPtr<TTransactionProposal> MakeTransactionProposal(TEvTxProxy::TEvPro return proposal; } -const ui32 TTxCoordinator::Schema::CurrentVersion = 1; - TTxCoordinator::TTxCoordinator(TTabletStorageInfo *info, const TActorId &tablet) : TActor(&TThis::StateInit) , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) diff --git a/ydb/core/tx/coordinator/coordinator_impl.h b/ydb/core/tx/coordinator/coordinator_impl.h index 075fd89e81..02ef0c8118 100644 --- a/ydb/core/tx/coordinator/coordinator_impl.h +++ b/ydb/core/tx/coordinator/coordinator_impl.h @@ -386,7 +386,7 @@ class TTxCoordinator : public TActor<TTxCoordinator>, public TTabletExecutedFlat public: struct Schema : NIceDb::Schema { - static const ui32 CurrentVersion; + static constexpr ui64 CurrentVersion = 1; struct Transaction : Table<0> { struct ID : Column<0, NScheme::NTypeIds::Uint64> {}; // PK @@ -407,10 +407,13 @@ public: }; struct State : Table<2> { - enum EKeyType { - KeyLastPlanned, - DatabaseVersion, - AcquireReadStepLast, + enum EKeyType : ui64 { + KeyLastPlanned = 0, + DatabaseVersion = 1, + AcquireReadStepLast = 2, + LastBlockedActorX1 = 3, + LastBlockedActorX2 = 4, + LastBlockedStep = 5, }; struct StateKey : Column<0, NScheme::NTypeIds::Uint64> { using Type = EKeyType; }; // PK @@ -431,6 +434,37 @@ public: }; using TTables = SchemaTables<Transaction, AffectedSet, State, DomainConfiguration>; + + template<class TCallback> + static bool LoadState(NIceDb::TNiceDb& db, State::EKeyType key, TCallback&& callback) { + auto rowset = db.Table<State>().Key(key).Select<State::StateValue>(); + + if (!rowset.IsReady()) { + return false; + } + + if (rowset.IsValid()) { + callback(rowset.GetValue<State::StateValue>()); + } + + return true; + } + + static bool LoadState(NIceDb::TNiceDb& db, State::EKeyType key, std::optional<ui64>& out) { + return LoadState(db, key, [&out](ui64 value) { + out.emplace(value); + }); + } + + static bool LoadState(NIceDb::TNiceDb& db, State::EKeyType key, ui64& out) { + return LoadState(db, key, [&out](ui64 value) { + out = value; + }); + } + + static void SaveState(NIceDb::TNiceDb& db, State::EKeyType key, ui64 value) { + db.Table<State>().Key(key).Update<State::StateValue>(value); + } }; private: diff --git a/ydb/core/tx/datashard/cdc_stream_scan.cpp b/ydb/core/tx/datashard/cdc_stream_scan.cpp index 8d65d8ba5d..aff7bd01f4 100644 --- a/ydb/core/tx/datashard/cdc_stream_scan.cpp +++ b/ydb/core/tx/datashard/cdc_stream_scan.cpp @@ -163,6 +163,7 @@ class TDataShard::TTxCdcStreamScanProgress TDataShard::TEvPrivate::TEvCdcStreamScanProgress::TPtr Request; THolder<TDataShard::TEvPrivate::TEvCdcStreamScanContinue> Response; TVector<IDataShardChangeCollector::TChange> ChangeRecords; + bool Reschedule = false; static TVector<TRawTypeValue> MakeKey(TArrayRef<const TCell> cells, TUserTable::TCPtr table) { TVector<TRawTypeValue> key(Reserve(cells.size())); @@ -219,17 +220,30 @@ public: LOG_D("Progress" << ": streamPathId# " << streamPathId); - if (Self->CheckChangesQueueOverflow()) { + if (!Self->GetUserTables().contains(tablePathId.LocalPathId)) { + LOG_W("Cannot progress on unknown table" + << ": tablePathId# " << tablePathId); return true; } - Y_VERIFY(Self->GetUserTables().contains(tablePathId.LocalPathId)); auto table = Self->GetUserTables().at(tablePathId.LocalPathId); auto it = table->CdcStreams.find(streamPathId); - Y_VERIFY(it != table->CdcStreams.end()); + if (it == table->CdcStreams.end()) { + LOG_W("Cannot progress on unknown cdc stream" + << ": streamPathId# " << streamPathId); + return true; + } + + ChangeRecords.clear(); + if (Self->CheckChangesQueueOverflow()) { + Reschedule = true; + return true; + } NIceDb::TNiceDb db(txc.DB); + bool pageFault = false; + for (const auto& [k, v] : ev.Rows) { const auto key = MakeKey(k.GetCells(), table); const auto& keyTags = table->KeyColumnIds; @@ -238,10 +252,10 @@ public: TSelectStats stats; auto ready = txc.DB.Select(table->LocalTid, key, {}, row, stats, 0, readVersion); if (ready == EReady::Page) { - return false; + pageFault = true; } - if (ready == EReady::Gone || stats.InvisibleRowSkips) { + if (pageFault || ready == EReady::Gone || stats.InvisibleRowSkips) { continue; } @@ -293,6 +307,10 @@ public: Self->PersistChangeRecord(db, record); } + if (pageFault) { + return false; + } + if (ev.Rows) { const auto& [key, _] = ev.Rows.back(); @@ -315,12 +333,12 @@ public: Self->EnqueueChangeRecords(std::move(ChangeRecords)); ctx.Send(Request->Sender, Response.Release()); - } else { - LOG_I("Re-run progress tx" + } else if (Reschedule) { + LOG_I("Re-schedule progress tx" << ": streamPathId# " << Request->Get()->StreamPathId); // re-schedule tx - ctx.Schedule(TDuration::Seconds(1), Request->Release().Release()); + ctx.TActivationContext::Schedule(TDuration::Seconds(1), Request->Forward(ctx.SelfID)); } } diff --git a/ydb/core/tx/datashard/change_collector_cdc_stream.cpp b/ydb/core/tx/datashard/change_collector_cdc_stream.cpp index 56d000a623..326abd1a89 100644 --- a/ydb/core/tx/datashard/change_collector_cdc_stream.cpp +++ b/ydb/core/tx/datashard/change_collector_cdc_stream.cpp @@ -209,7 +209,7 @@ bool TCdcStreamChangeCollector::Collect(const TTableId& tableId, ERowOp rop, } } } else { - Y_FAIL_S("Cannot retrieve cdc stream scan info: " << pathId); + // nop, scan is completed } break; default: diff --git a/ydb/core/tx/datashard/change_sender_async_index.cpp b/ydb/core/tx/datashard/change_sender_async_index.cpp index e90343c601..7b19d72dfd 100644 --- a/ydb/core/tx/datashard/change_sender_async_index.cpp +++ b/ydb/core/tx/datashard/change_sender_async_index.cpp @@ -188,12 +188,33 @@ class TAsyncIndexChangeSenderShard: public TActorBootstrapped<TAsyncIndexChangeS } } + bool CanRetry() const { + return Attempt < MaxAttempts; + } + + void Retry() { + ++Attempt; + Delay = Min(2 * Delay, MaxDelay); + + LOG_N("Retry" + << ": attempt# " << Attempt + << ", delay# " << Delay); + + const auto random = TDuration::FromValue(TAppData::RandomProvider->GenRand64() % Delay.MicroSeconds()); + Schedule(Delay + random, new TEvents::TEvWakeup()); + } + void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { if (ShardId != ev->Get()->TabletId) { return; } - Leave(); + if (CanRetry()) { + Unlink(); + Retry(); + } else { + Leave(); + } } void Handle(NMon::TEvRemoteHttpInfo::TPtr& ev) { @@ -222,11 +243,14 @@ class TAsyncIndexChangeSenderShard: public TActorBootstrapped<TAsyncIndexChangeS PassAway(); } - void PassAway() override { + void Unlink() { if (LeaderPipeCache) { Send(LeaderPipeCache, new TEvPipeCache::TEvUnlink(ShardId)); } + } + void PassAway() override { + Unlink(); TActorBootstrapped::PassAway(); } @@ -254,6 +278,7 @@ public: switch (ev->GetTypeRewrite()) { hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); hFunc(NMon::TEvRemoteHttpInfo, Handle); + sFunc(TEvents::TEvWakeup, Handshake); sFunc(TEvents::TEvPoison, PassAway); } } @@ -269,6 +294,12 @@ private: TActorId LeaderPipeCache; ui64 LastRecordOrder; + // Retry on delivery problem + static constexpr ui32 MaxAttempts = 3; + static constexpr auto MaxDelay = TDuration::MilliSeconds(50); + ui32 Attempt = 0; + TDuration Delay = TDuration::MilliSeconds(10); + }; // TAsyncIndexChangeSenderShard class TAsyncIndexChangeSenderMain @@ -624,8 +655,11 @@ class TAsyncIndexChangeSenderMain return Retry(); } + const bool versionChanged = !IndexTableVersion || IndexTableVersion != entry.GeneralVersion; + IndexTableVersion = entry.GeneralVersion; + KeyDesc = std::move(entry.KeyDescription); - CreateSenders(MakePartitionIds(KeyDesc->GetPartitions())); + CreateSenders(MakePartitionIds(KeyDesc->GetPartitions()), versionChanged); Become(&TThis::StateMain); } @@ -723,6 +757,7 @@ public: : TActorBootstrapped() , TBaseChangeSender(this, this, dataShard, indexPathId) , UserTableId(userTableId) + , IndexTableVersion(0) { } @@ -751,6 +786,7 @@ private: TMap<TTag, TTag> TagMap; // from main to index TPathId IndexTablePathId; + ui64 IndexTableVersion; THolder<TKeyDesc> KeyDesc; }; // TAsyncIndexChangeSenderMain diff --git a/ydb/core/tx/datashard/change_sender_common_ops.cpp b/ydb/core/tx/datashard/change_sender_common_ops.cpp index 68094c6dad..4d8ab339b8 100644 --- a/ydb/core/tx/datashard/change_sender_common_ops.cpp +++ b/ydb/core/tx/datashard/change_sender_common_ops.cpp @@ -8,7 +8,7 @@ namespace NKikimr::NDataShard { -void TBaseChangeSender::CreateSenders(const TVector<ui64>& partitionIds) { +void TBaseChangeSender::CreateMissingSenders(const TVector<ui64>& partitionIds) { THashMap<ui64, TSender> senders; for (const auto& partitionId : partitionIds) { @@ -32,6 +32,24 @@ void TBaseChangeSender::CreateSenders(const TVector<ui64>& partitionIds) { } Senders = std::move(senders); +} + +void TBaseChangeSender::RecreateSenders(const TVector<ui64>& partitionIds) { + for (const auto& partitionId : partitionIds) { + Y_VERIFY(!Senders.contains(partitionId)); + auto& sender = Senders[partitionId]; + sender.ActorId = ActorOps->Register(CreateSender(partitionId)); + } +} + +void TBaseChangeSender::CreateSenders(const TVector<ui64>& partitionIds, bool partitioningChanged) { + if (partitioningChanged) { + CreateMissingSenders(partitionIds); + } else { + RecreateSenders(GonePartitions); + } + + GonePartitions.clear(); if (!Enqueued || !RequestRecords()) { SendRecords(); @@ -199,6 +217,7 @@ void TBaseChangeSender::OnGone(ui64 partitionId) { } Senders.erase(it); + GonePartitions.push_back(partitionId); if (Resolver->IsResolving()) { return; diff --git a/ydb/core/tx/datashard/change_sender_common_ops.h b/ydb/core/tx/datashard/change_sender_common_ops.h index a5de292ecd..26f5f6efed 100644 --- a/ydb/core/tx/datashard/change_sender_common_ops.h +++ b/ydb/core/tx/datashard/change_sender_common_ops.h @@ -57,7 +57,7 @@ class IChangeSender { public: virtual ~IChangeSender() = default; - virtual void CreateSenders(const TVector<ui64>& partitionIds) = 0; + virtual void CreateSenders(const TVector<ui64>& partitionIds, bool partitioningChanged = true) = 0; virtual void KillSenders() = 0; virtual IActor* CreateSender(ui64 partitionId) = 0; virtual void RemoveRecords() = 0; @@ -89,6 +89,9 @@ class TBaseChangeSender: public IChangeSender { TVector<TEnqueuedRecord> Pending; }; + void CreateMissingSenders(const TVector<ui64>& partitionIds); + void RecreateSenders(const TVector<ui64>& partitionIds); + bool RequestRecords(); void SendRecords(); @@ -103,7 +106,7 @@ protected: ActorOps->Send(DataShard.ActorId, new TEvChangeExchange::TEvRemoveRecords(std::move(remove))); } - void CreateSenders(const TVector<ui64>& partitionIds) override; + void CreateSenders(const TVector<ui64>& partitionIds, bool partitioningChanged = true) override; void KillSenders() override; void RemoveRecords() override; @@ -135,6 +138,8 @@ private: TSet<TRequestedRecord> PendingBody; TMap<ui64, TChangeRecord> PendingSent; // ui64 is order + TVector<ui64> GonePartitions; + }; // TBaseChangeSender struct TSchemeCacheHelpers { diff --git a/ydb/core/tx/datashard/datashard.cpp b/ydb/core/tx/datashard/datashard.cpp index 43f1b96c91..c7e64c504a 100644 --- a/ydb/core/tx/datashard/datashard.cpp +++ b/ydb/core/tx/datashard/datashard.cpp @@ -147,7 +147,7 @@ TDataShard::TDataShard(const TActorId &tablet, TTabletStorageInfo *info) , TtlReadAheadHi(0, 0, 128*1024*1024) , EnablePrioritizedMvccSnapshotReads(1, 0, 1) , EnableUnprotectedMvccSnapshotReads(1, 0, 1) - , EnableLockedWrites(0, 0, 1) + , EnableLockedWrites(1, 0, 1) , MaxLockedWritesPerKey(1000, 0, 1000000) , EnableLeaderLeases(1, 0, 1) , MinLeaderLeaseDurationUs(250000, 1000, 5000000) diff --git a/ydb/core/tx/datashard/datashard__read_iterator.cpp b/ydb/core/tx/datashard/datashard__read_iterator.cpp index b5d62ba07b..01587ce4bd 100644 --- a/ydb/core/tx/datashard/datashard__read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard__read_iterator.cpp @@ -435,6 +435,11 @@ public: // note that FirstUnprocessedQuery is unsigned and if we do reverse iteration, // then it will also become less than size() when finished while (FirstUnprocessedQuery < State.Request->Ranges.size()) { + if (ReachedTotalRowsLimit()) { + FirstUnprocessedQuery = -1; + return true; + } + if (ShouldStop()) return true; @@ -464,6 +469,11 @@ public: // note that FirstUnprocessedQuery is unsigned and if we do reverse iteration, // then it will also become less than size() when finished while (FirstUnprocessedQuery < State.Request->Keys.size()) { + if (ReachedTotalRowsLimit()) { + FirstUnprocessedQuery = -1; + return true; + } + if (ShouldStop()) return true; @@ -631,6 +641,7 @@ public: } void UpdateState(TReadIteratorState& state) { + state.TotalRows += RowsRead; state.FirstUnprocessedQuery = FirstUnprocessedQuery; state.LastProcessedKey = LastProcessedKey; state.ConsumeSeqNo(RowsRead, BytesInResult); @@ -665,6 +676,27 @@ private: return RowsRead >= State.MaxRowsInResult; } + bool ReachedTotalRowsLimit() const { + if (State.TotalRowsLimit == Max<ui64>()) { + return false; + } + + return State.TotalRows + RowsRead >= State.TotalRowsLimit; + } + + ui64 GetTotalRowsLeft() const { + if (State.TotalRowsLimit == Max<ui64>()) { + return Max<ui64>(); + } + + if (State.TotalRows + RowsRead >= State.TotalRowsLimit) { + return 0; + } + + + return State.TotalRowsLimit - State.TotalRows - RowsRead; + } + bool ShouldStop() { if (!CanResume()) { return false; @@ -690,6 +722,8 @@ private: bytesLeft = State.Quota.Bytes - BlockBuilder.Bytes(); } + rowsLeft = Min(rowsLeft, GetTotalRowsLeft()); + auto direction = reverse ? NTable::EDirection::Reverse : NTable::EDirection::Forward; return db.Precharge(TableInfo.LocalTid, keyFrom, @@ -721,6 +755,10 @@ private: Self->GetKeyAccessSampler()->AddSample(TableId, rowKey.Cells()); + if (ReachedTotalRowsLimit()) { + break; + } + if (ShouldStop()) { return EReadStatus::StoppedByLimit; } @@ -1207,6 +1245,9 @@ public: if (record.HasMaxRowsInResult()) state.MaxRowsInResult = record.GetMaxRowsInResult(); + if (record.HasTotalRowsLimit()) + state.TotalRowsLimit = record.GetTotalRowsLimit(); + if (record.HasSnapshot()) { state.ReadVersion.Step = record.GetSnapshot().GetStep(); state.ReadVersion.TxId = record.GetSnapshot().GetTxId(); diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h index 5742488661..38531d3d82 100644 --- a/ydb/core/tx/datashard/datashard_impl.h +++ b/ydb/core/tx/datashard/datashard_impl.h @@ -330,7 +330,7 @@ class TDataShard EvMediatorRestoreBackup, EvRemoveLockChangeRecords, EvCdcStreamScanRegistered, - EvCdcStreamScanProgress, + EvCdcStreamScanProgress, // WARNING: tests use ES_PRIVATE + 24 EvCdcStreamScanContinue, EvRestartOperation, // used to restart after an aborted scan (e.g. backup) EvChangeExchangeExecuteHandshakes, diff --git a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp index dc5249d84c..ec8ba239ab 100644 --- a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp +++ b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp @@ -2366,6 +2366,206 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } + Y_UNIT_TEST(InitialScanAndLimits) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + .SetEnableChangefeedInitialScan(true) + .SetChangesQueueItemsLimit(1) + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (1, 10), + (2, 20), + (3, 30); + )"); + + TVector<THolder<IEventHandle>> delayed; + ui32 progressCount = 0; + + auto prevObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) { + static constexpr ui32 EvCdcStreamScanProgress = EventSpaceBegin(TKikimrEvents::ES_PRIVATE) + 24; + + switch (ev->GetTypeRewrite()) { + case TEvDataShard::EvCdcStreamScanRequest: + if (auto* msg = ev->Get<TEvDataShard::TEvCdcStreamScanRequest>()) { + msg->Record.MutableLimits()->SetBatchMaxRows(1); + } else { + UNIT_ASSERT(false); + } + break; + + case TEvChangeExchange::EvEnqueueRecords: + delayed.emplace_back(ev.Release()); + return TTestActorRuntime::EEventAction::DROP; + + case EvCdcStreamScanProgress: + ++progressCount; + break; + } + + return TTestActorRuntime::EEventAction::PROCESS; + }); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithInitialScan(Updates(NKikimrSchemeOp::ECdcStreamFormatJson)))); + + if (delayed.empty()) { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&delayed, &progressCount](IEventHandle&) { + return !delayed.empty() && progressCount >= 2; + }); + runtime.DispatchEvents(opts); + } + + runtime.SetObserverFunc(prevObserver); + for (auto& ev : std::exchange(delayed, TVector<THolder<IEventHandle>>())) { + runtime.Send(ev.Release(), 0, true); + } + + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"update":{"value":10},"key":[1]})", + R"({"update":{"value":20},"key":[2]})", + R"({"update":{"value":30},"key":[3]})", + }); + } + + Y_UNIT_TEST(InitialScanComplete) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + .SetEnableChangefeedInitialScan(true) + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (1, 10), + (2, 20); + )"); + + THolder<IEventHandle> delayed; + auto prevObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) { + if (ev->GetTypeRewrite() == NSchemeShard::TEvSchemeShard::EvModifySchemeTransaction) { + auto* msg = ev->Get<NSchemeShard::TEvSchemeShard::TEvModifySchemeTransaction>(); + const auto& tx = msg->Record.GetTransaction(0); + if (tx.HasAlterCdcStream() && tx.GetAlterCdcStream().HasGetReady()) { + delayed.Reset(ev.Release()); + return TTestActorRuntime::EEventAction::DROP; + } + } + + return TTestActorRuntime::EEventAction::PROCESS; + }); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithInitialScan(Updates(NKikimrSchemeOp::ECdcStreamFormatJson)))); + + if (!delayed) { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&delayed](IEventHandle&) { + return bool(delayed); + }); + runtime.DispatchEvents(opts); + } + + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (3, 30), + (4, 40); + )"); + + runtime.SetObserverFunc(prevObserver); + runtime.Send(delayed.Release(), 0, true); + + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"update":{"value":10},"key":[1]})", + R"({"update":{"value":20},"key":[2]})", + R"({"update":{"value":30},"key":[3]})", + R"({"update":{"value":40},"key":[4]})", + }); + } + + Y_UNIT_TEST(InitialScanRacyProgressAndDrop) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + .SetEnableChangefeedInitialScan(true) + .SetChangesQueueItemsLimit(1) + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (1, 10), + (2, 20), + (3, 30); + )"); + + bool delayProgress = true; + ui32 progressCount = 0; + TVector<THolder<IEventHandle>> delayed; + + auto prevObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) { + static constexpr ui32 EvCdcStreamScanProgress = EventSpaceBegin(TKikimrEvents::ES_PRIVATE) + 24; + if (ev->GetTypeRewrite() == EvCdcStreamScanProgress) { + ++progressCount; + if (delayProgress) { + delayed.emplace_back(ev.Release()); + return TTestActorRuntime::EEventAction::DROP; + } + } + + return TTestActorRuntime::EEventAction::PROCESS; + }); + + auto waitProgress = [&](ui32 count) { + if (progressCount != count) { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&progressCount, count](IEventHandle&) { + return progressCount == count; + }); + runtime.DispatchEvents(opts); + } + }; + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithInitialScan(Updates(NKikimrSchemeOp::ECdcStreamFormatJson)))); + + waitProgress(1); + WaitTxNotification(server, edgeActor, AsyncAlterDropStream(server, "/Root", "Table", "Stream")); + + delayProgress = false; + for (auto& ev : std::exchange(delayed, TVector<THolder<IEventHandle>>())) { + runtime.Send(ev.Release(), 0, true); + } + + waitProgress(2); + } + Y_UNIT_TEST(AwsRegion) { TPortManager portManager; TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) diff --git a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp index 81c7efd31c..26fb3a7b0e 100644 --- a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp @@ -640,20 +640,32 @@ struct TTestHelper { || newLock->GetGeneration() != prevLock->GetGeneration()); } - void TestChunkRead(ui32 chunkSize, ui32 rowCount) { + void TestChunkRead(ui32 chunkSize, ui32 rowCount, ui32 ranges = 1, ui32 limit = Max<ui32>()) { UpsertMany(1, rowCount); auto request = GetBaseReadRequest("table-1-many", 1, NKikimrTxDataShard::CELLVEC, TRowVersion::Max()); request->Record.ClearSnapshot(); - AddRangeQuery<ui32>( - *request, - {1, 1, 1}, - true, - {rowCount + 1, 1, 1}, - true - ); + + ui32 base = 1; + for (ui32 i = 0; i < ranges; ++i) { + ui32 count = rowCount / ranges; + if (i < (rowCount % ranges)) { + ++count; + } + AddRangeQuery<ui32>( + *request, + {base, 1, 1}, + true, + {base + count - 1, Max<ui32>(), Max<ui32>()}, + true + ); + base += count; + } request->Record.SetMaxRowsInResult(chunkSize); + if (limit != Max<ui32>()) { + request->Record.SetTotalRowsLimit(limit); + } auto readResult = SendRead("table-1-many", request.release()); UNIT_ASSERT(readResult); @@ -664,10 +676,12 @@ struct TTestHelper { while (!readResult->Record.GetFinished()) { readResult = WaitReadResult(); UNIT_ASSERT(readResult); - rowsRead += readResult->GetRowsCount(); + ui32 count = readResult->GetRowsCount(); + UNIT_ASSERT_C(count > 0 || readResult->Record.GetFinished(), "Unexpected empty intermediate result"); + rowsRead += count; } - UNIT_ASSERT_VALUES_EQUAL(rowsRead, rowCount); + UNIT_ASSERT_VALUES_EQUAL(rowsRead, Min(rowCount, limit)); } struct THangedReturn { @@ -1911,6 +1925,56 @@ Y_UNIT_TEST_SUITE(DataShardReadIterator) { helper.TestChunkRead(99, 10000); } + Y_UNIT_TEST(ShouldLimitReadRangeChunk1Limit100) { + TTestHelper helper; + helper.TestChunkRead(1, 1000, 1, 100); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit98) { + TTestHelper helper; + helper.TestChunkRead(99, 1000, 10, 98); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit99) { + TTestHelper helper; + helper.TestChunkRead(99, 1000, 10, 99); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit100) { + TTestHelper helper; + helper.TestChunkRead(99, 1000, 10, 100); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit101) { + TTestHelper helper; + helper.TestChunkRead(99, 1000, 10, 101); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit198) { + TTestHelper helper; + helper.TestChunkRead(99, 1000, 10, 198); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk99Limit900) { + TTestHelper helper; + helper.TestChunkRead(99, 1000, 10, 900); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk100Limit900) { + TTestHelper helper; + helper.TestChunkRead(100, 1000, 10, 900); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk100Limit1000) { + TTestHelper helper; + helper.TestChunkRead(100, 1000, 10, 1000); + } + + Y_UNIT_TEST(ShouldLimitRead10RangesChunk100Limit1001) { + TTestHelper helper; + helper.TestChunkRead(100, 1000, 10, 1001); + } + Y_UNIT_TEST(ShouldReadKeyPrefix1) { TTestHelper helper; diff --git a/ydb/core/tx/datashard/export_s3_base_uploader.h b/ydb/core/tx/datashard/export_s3_base_uploader.h index 8ebc1a79ff..5e84ae1920 100644 --- a/ydb/core/tx/datashard/export_s3_base_uploader.h +++ b/ydb/core/tx/datashard/export_s3_base_uploader.h @@ -269,14 +269,21 @@ protected: << ": self# " << this->SelfId() << ", result# " << result); - if (!result.IsSuccess()) { - const auto& error = result.GetError(); - if (error.GetErrorType() != Aws::S3::S3Errors::NO_SUCH_UPLOAD) { - Error = error.GetMessage().c_str(); - } + if (result.IsSuccess()) { + return PassAway(); + } + + const auto& error = result.GetError(); + if (error.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_UPLOAD) { + return PassAway(); } - PassAway(); + if (CanRetry(error)) { + Retry(); + } else { + Error = error.GetMessage().c_str(); + PassAway(); + } } void Handle(TEvExternalStorage::TEvAbortMultipartUploadResponse::TPtr& ev) { @@ -286,13 +293,19 @@ protected: << ": self# " << this->SelfId() << ", result# " << result); - if (!result.IsSuccess()) { + if (result.IsSuccess()) { + return PassAway(); + } + + const auto& error = result.GetError(); + if (CanRetry(error)) { + Retry(); + } else { Y_VERIFY(Error); Error = TStringBuilder() << *Error << " Additionally, 'AbortMultipartUpload' has failed: " - << result.GetError().GetMessage(); + << error.GetMessage(); + PassAway(); } - - PassAway(); } template <typename TResult> @@ -321,12 +334,19 @@ protected: return false; } - void RetryOrFinish(const Aws::S3::S3Error& error) { - if (Attempt++ < Retries && ShouldRetry(error)) { - Delay = Min(Delay * Attempt, TDuration::Minutes(10)); - const TDuration random = TDuration::FromValue(TAppData::RandomProvider->GenRand64() % Delay.MicroSeconds()); + bool CanRetry(const Aws::S3::S3Error& error) const { + return Attempt < Retries && ShouldRetry(error); + } - this->Schedule(Delay + random, new TEvents::TEvWakeup()); + void Retry() { + Delay = Min(Delay * ++Attempt, TDuration::Minutes(10)); + const TDuration random = TDuration::FromValue(TAppData::RandomProvider->GenRand64() % Delay.MicroSeconds()); + this->Schedule(Delay + random, new TEvents::TEvWakeup()); + } + + void RetryOrFinish(const Aws::S3::S3Error& error) { + if (CanRetry(error)) { + Retry(); } else { Finish(false, TStringBuilder() << "S3 error: " << error.GetMessage().c_str()); } diff --git a/ydb/core/tx/datashard/read_iterator.h b/ydb/core/tx/datashard/read_iterator.h index c2fae3e0cd..aeda03decb 100644 --- a/ydb/core/tx/datashard/read_iterator.h +++ b/ydb/core/tx/datashard/read_iterator.h @@ -182,6 +182,10 @@ public: TQuota Quota; + // Number of rows processed so far + ui64 TotalRows = 0; + ui64 TotalRowsLimit = Max<ui64>(); + // items are running total, // first item corresponds to SeqNo = LastAckSeqNo + 1, // i.e. [LastAckSeqNo + 1; SeqNo] diff --git a/ydb/core/tx/scheme_board/cache.cpp b/ydb/core/tx/scheme_board/cache.cpp index 016a9a40b9..31fc79312b 100644 --- a/ydb/core/tx/scheme_board/cache.cpp +++ b/ydb/core/tx/scheme_board/cache.cpp @@ -1887,6 +1887,10 @@ class TSchemeCache: public TMonitorableActor<TSchemeCache> { entry.Kind = TableKind; entry.DomainInfo = DomainInfo; + if (Self) { + entry.GeneralVersion = Self->Info.GetVersion().GetGeneralVersion(); + } + if (!CheckColumns(context, entry, KeyColumnTypes, Columns)) { return; } diff --git a/ydb/core/tx/scheme_cache/scheme_cache.h b/ydb/core/tx/scheme_cache/scheme_cache.h index d8a174d221..62fdbb0ca0 100644 --- a/ydb/core/tx/scheme_cache/scheme_cache.h +++ b/ydb/core/tx/scheme_cache/scheme_cache.h @@ -319,6 +319,7 @@ struct TSchemeCacheRequest { EStatus Status = EStatus::Unknown; EKind Kind = EKind::KindUnknown; TIntrusivePtr<TDomainInfo> DomainInfo; + ui64 GeneralVersion = 0; explicit TEntry(THolder<TKeyDesc> keyDesc) : KeyDescription(std::move(keyDesc)) diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 539320d087..132c62ba12 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -2958,6 +2958,10 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { } else { stream->DoneShards.insert(shardIdx); } + + if (!rowset.Next()) { + return false; + } } } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation.cpp b/ydb/core/tx/schemeshard/schemeshard__operation.cpp index f597e74b1e..ca3d30b0ea 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation.cpp @@ -656,14 +656,7 @@ TOperation::TSplitTransactionsResult TOperation::SplitIntoTransactions(const TTx } if (checks && !exists) { - checks - .IsValidLeafName() - .DepthLimit() - .PathsLimit(); - } - - if (checks && !exists && path.Parent().IsResolved()) { - checks.DirChildrenLimit(); + checks.IsValidLeafName(); } if (!checks) { @@ -764,14 +757,7 @@ TOperation::TSplitTransactionsResult TOperation::SplitIntoTransactions(const TTx } if (checks) { - checks - .IsValidLeafName() - .DepthLimit() - .PathsLimit(result.Transactions.size() + 1); - } - - if (checks && path.Parent().IsResolved()) { - checks.DirChildrenLimit(); + checks.IsValidLeafName(); } if (!checks) { diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp index 1c46c00733..86b49463d1 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_solomon.cpp @@ -259,7 +259,13 @@ public: } TChannelsBindings channelsBinding; - if (!context.SS->ResolveSolomonChannels(channelProfileId, path.GetPathIdForDomain(), channelsBinding)) { + bool isResolved = false; + if (alter.HasStorageConfig()) { + isResolved = context.SS->ResolveSolomonChannels(alter.GetStorageConfig(), path.GetPathIdForDomain(), channelsBinding); + } else { + isResolved = context.SS->ResolveSolomonChannels(channelProfileId, path.GetPathIdForDomain(), channelsBinding); + } + if (!isResolved) { result->SetError(NKikimrScheme::StatusInvalidParameter, "Unable to construct channel binding with the storage pool"); return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp index cf474204e2..df4b974b9b 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp @@ -366,7 +366,7 @@ public: } if (checks) { - if (!parent.Base()->IsTableIndex()) { + if (!parent.Base()->IsTableIndex() && !isBackup) { checks.DepthLimit(); } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp index 5f216399ac..58851357a9 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_olap_table.cpp @@ -23,19 +23,27 @@ bool PrepareSchema(NKikimrSchemeOp::TColumnTableSchema& proto, TOlapSchema& sche return schema.Parse(proto, errStr, allowNullableKeys); } +NKikimrSchemeOp::TColumnTableSharding DefaultSharding() { + NKikimrSchemeOp::TColumnTableSharding sharding; + auto* hashSharding = sharding.MutableHashSharding(); + hashSharding->SetFunction(NKikimrSchemeOp::TColumnTableSharding::THashSharding::HASH_FUNCTION_MODULO_N); + return sharding; +} + bool SetSharding(const TOlapSchema& schema, NKikimrSchemeOp::TColumnTableDescription& op, TColumnTableInfo::TPtr tableInfo, TEvSchemeShard::EStatus& status, TString& errStr) { - ui32 shardsCount = Max(ui32(1), op.GetColumnShardCount()); + ui32 shardsCount = op.GetColumnShardCount(); + if (!shardsCount) { + status = NKikimrScheme::StatusSchemeError; + errStr = Sprintf("Shards count is zero"); + return false; + } if (op.HasSharding()) { tableInfo->Sharding = std::move(*op.MutableSharding()); - } else if (shardsCount < 2) { - tableInfo->Sharding.MutableRandomSharding(); } else { - status = NKikimrScheme::StatusSchemeError; - errStr = Sprintf("Sharding is not set"); - return false; + tableInfo->Sharding = DefaultSharding(); } op.ClearSharding(); @@ -51,8 +59,11 @@ bool SetSharding(const TOlapSchema& schema, NKikimrSchemeOp::TColumnTableDescrip case NKikimrSchemeOp::TColumnTableSharding::kHashSharding: { auto& sharding = *tableInfo->Sharding.MutableHashSharding(); if (sharding.ColumnsSize() == 0) { + sharding.MutableColumns()->CopyFrom(tableInfo->Description.GetSchema().GetKeyColumnNames()); + } + if (shardsCount > 1 && sharding.ColumnsSize() == 0) { status = NKikimrScheme::StatusSchemeError; - errStr = Sprintf("Hash sharding requires a non-empty list of columns"); + errStr = Sprintf("Hash sharding requires a non-empty list of columns or primary key specified"); return false; } bool keysOnly = true; @@ -591,7 +602,11 @@ public: const auto acceptExisted = !Transaction.GetFailOnExist(); const TString& parentPathStr = Transaction.GetWorkingDir(); - auto& createDescription = Transaction.GetCreateColumnTable(); + auto createDescription = Transaction.GetCreateColumnTable(); + if (!createDescription.HasColumnShardCount()) { + static constexpr ui32 DEFAULT_SHARDS_COUNT = 64; + createDescription.SetColumnShardCount(DEFAULT_SHARDS_COUNT); + } const TString& name = createDescription.GetName(); const ui32 shardsCount = Max(ui32(1), createDescription.GetColumnShardCount()); auto opTxId = OperationId.GetTxId(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp index e742ead739..9d9f30ad45 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_solomon.cpp @@ -19,15 +19,15 @@ bool ValidateConfig(const NKikimrSchemeOp::TCreateSolomonVolume& op, return false; } if (op.GetPartitionCount()) { - if (!op.HasChannelProfileId()) { - errStr = "set channel profile id, please"; + if (!op.HasChannelProfileId() && !op.HasStorageConfig()) { + errStr = "set storage config, please"; status = TEvSchemeShard::EStatus::StatusInvalidParameter; } return true; } - if (op.HasChannelProfileId()) { - errStr = "don't set channel profile id, please. We are going to adopt already created tablets"; + if (op.HasChannelProfileId() || op.HasStorageConfig()) { + errStr = "don't set channel profile id or storage config, please. We are going to adopt already created tablets"; status = TEvSchemeShard::EStatus::StatusInvalidParameter; } @@ -330,9 +330,17 @@ public: const bool adoptingTablets = solomonDescription.AdoptedPartitionsSize() > 0; TChannelsBindings channelsBinding; - if (!adoptingTablets && !context.SS->ResolveSolomonChannels(channelProfileId, dstPath.GetPathIdForDomain(), channelsBinding)) { - result->SetError(NKikimrScheme::StatusInvalidParameter, "Unable to construct channel binding with the storage pool"); - return result; + if (!adoptingTablets) { + bool isResolved = false; + if (solomonDescription.HasStorageConfig()) { + isResolved = context.SS->ResolveSolomonChannels(solomonDescription.GetStorageConfig(), dstPath.GetPathIdForDomain(), channelsBinding); + } else { + isResolved = context.SS->ResolveSolomonChannels(channelProfileId, dstPath.GetPathIdForDomain(), channelsBinding); + } + if (!isResolved) { + result->SetError(NKikimrScheme::StatusInvalidParameter, "Unable to construct channel binding with the storage pool"); + return result; + } } dstPath.MaterializeLeaf(owner); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp index 3fa3d86e86..8ca973d696 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_mkdir.cpp @@ -158,10 +158,14 @@ public: if (checks) { checks .IsValidLeafName() + .IsValidACL(acl); + } + + if (checks && !context.SS->SystemBackupSIDs.contains(owner)) { + checks .DepthLimit() .PathsLimit() - .DirChildrenLimit() - .IsValidACL(acl); + .DirChildrenLimit(); } if (!checks) { diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp index b58d3a500f..6d51b93e7e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp @@ -481,7 +481,7 @@ struct TSchemeShard::TIndexBuilder::TTxReply: public TSchemeShard::TIndexBuilder private: TEvTxAllocatorClient::TEvAllocateResult::TPtr AllocateResult; TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr ModifyResult; - TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr Notification; + TTxId CompletedTxId = InvalidTxId; TEvDataShard::TEvBuildIndexProgressResponse::TPtr ShardProgress; struct { TIndexBuildId BuildIndexId; @@ -502,9 +502,9 @@ public: { } - explicit TTxReply(TSelf* self, TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& notification) + explicit TTxReply(TSelf* self, TTxId completedTxId) : TSchemeShard::TIndexBuilder::TTxBase(self) - , Notification(notification) + , CompletedTxId(completedTxId) { } @@ -530,7 +530,7 @@ public: return OnAllocation(txc, ctx); } else if (ModifyResult) { return OnModifyResult(txc, ctx); - } else if (Notification) { + } else if (CompletedTxId) { return OnNotification(txc, ctx); } else if (ShardProgress) { return OnProgress(txc, ctx); @@ -773,12 +773,10 @@ public: } bool OnNotification(TTransactionContext& txc, const TActorContext&) { - const auto& record = Notification->Get()->Record; - - const auto txId = TTxId(record.GetTxId()); + const auto txId = CompletedTxId; if (!Self->TxIdToIndexBuilds.contains(txId)) { LOG_I("TTxReply : TEvNotifyTxCompletionResult superfluous message" - << ", txId: " << record.GetTxId() + << ", txId: " << txId << ", buildInfoId not found"); return true; } @@ -788,10 +786,10 @@ public: TIndexBuildInfo::TPtr buildInfo = Self->IndexBuilds.at(buildId); LOG_I("TTxReply : TEvNotifyTxCompletionResult" - << ", txId# " << record.GetTxId() + << ", txId# " << txId << ", buildInfoId: " << buildInfo->Id); LOG_D("TTxReply : TEvNotifyTxCompletionResult" - << ", txId# " << record.GetTxId() + << ", txId# " << txId << ", buildInfo: " << *buildInfo); switch (buildInfo->State) { @@ -1280,8 +1278,8 @@ ITransaction* TSchemeShard::CreateTxReply(TEvSchemeShard::TEvModifySchemeTransac return new TIndexBuilder::TTxReply(this, modifyResult); } -ITransaction* TSchemeShard::CreateTxReply(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& notification) { - return new TIndexBuilder::TTxReply(this, notification); +ITransaction* TSchemeShard::CreateTxReply(TTxId completedTxId) { + return new TIndexBuilder::TTxReply(this, completedTxId); } ITransaction* TSchemeShard::CreateTxReply(TEvDataShard::TEvBuildIndexProgressResponse::TPtr& progress) { diff --git a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp index 210c80b84b..30ac978ff0 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp @@ -226,7 +226,7 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase ui64 Id; TEvTxAllocatorClient::TEvAllocateResult::TPtr AllocateResult = nullptr; TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr ModifyResult = nullptr; - TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr NotifyResult = nullptr; + TTxId CompletedTxId = InvalidTxId; explicit TTxProgress(TSelf* self, ui64 id) : TXxport::TTxBase(self) @@ -246,9 +246,9 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase { } - explicit TTxProgress(TSelf* self, TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev) + explicit TTxProgress(TSelf* self, TTxId completedTxId) : TXxport::TTxBase(self) - , NotifyResult(ev) + , CompletedTxId(completedTxId) { } @@ -263,7 +263,7 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase OnAllocateResult(txc, ctx); } else if (ModifyResult) { OnModifyResult(txc, ctx); - } else if (NotifyResult) { + } else if (CompletedTxId) { OnNotifyResult(txc, ctx); } else { Resume(txc, ctx); @@ -427,6 +427,10 @@ private: return InvalidTxId; } + if (!ItemPathId(Self, exportInfo, 0)) { + return InvalidTxId; + } + return path->LastTxId; } @@ -782,7 +786,7 @@ private: SubscribeTx(path->LastTxId); Y_VERIFY_DEBUG(itemIdx == Max<ui32>()); - Self->TxIdToExport[path->LastTxId] = {exportInfo->Id, itemIdx}; + Self->TxIdToDependentExport[path->LastTxId].insert(exportInfo->Id); } } @@ -854,30 +858,47 @@ private: } void OnNotifyResult(TTransactionContext& txc, const TActorContext&) { - Y_VERIFY(NotifyResult); - const auto& record = NotifyResult->Get()->Record; - + Y_VERIFY(CompletedTxId); LOG_D("TExport::TTxProgress: OnNotifyResult" - << ": txId# " << record.GetTxId()); + << ": txId# " << CompletedTxId); - const auto txId = TTxId(record.GetTxId()); - if (!Self->TxIdToExport.contains(txId)) { + const auto txId = CompletedTxId; + if (!Self->TxIdToExport.contains(txId) && !Self->TxIdToDependentExport.contains(txId)) { LOG_E("TExport::TTxProgress: OnNotifyResult received unknown txId" << ": txId# " << txId); return; } - ui64 id; - ui32 itemIdx; - std::tie(id, itemIdx) = Self->TxIdToExport.at(txId); + if (Self->TxIdToExport.contains(txId)) { + ui64 id; + ui32 itemIdx; + std::tie(id, itemIdx) = Self->TxIdToExport.at(txId); + + OnNotifyResult(txId, id, itemIdx, txc); + Self->TxIdToExport.erase(txId); + } + + if (Self->TxIdToDependentExport.contains(txId)) { + for (const auto id : Self->TxIdToDependentExport.at(txId)) { + OnNotifyResult(txId, id, Max<ui32>(), txc); + } + + Self->TxIdToDependentExport.erase(txId); + } + } + + void OnNotifyResult(TTxId txId, ui64 id, ui32 itemIdx, TTransactionContext& txc) { + LOG_D("TExport::TTxProgress: OnNotifyResult" + << ": txId# " << txId + << ", id# " << id + << ", itemIdx# " << itemIdx); + if (!Self->Exports.contains(id)) { LOG_E("TExport::TTxProgress: OnNotifyResult received unknown id" << ": id# " << id); return; } - Self->TxIdToExport.erase(txId); - TExportInfo::TPtr exportInfo = Self->Exports.at(id); NIceDb::TNiceDb db(txc.DB); @@ -977,8 +998,8 @@ ITransaction* TSchemeShard::CreateTxProgressExport(TEvSchemeShard::TEvModifySche return new TExport::TTxProgress(this, ev); } -ITransaction* TSchemeShard::CreateTxProgressExport(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev) { - return new TExport::TTxProgress(this, ev); +ITransaction* TSchemeShard::CreateTxProgressExport(TTxId completedTxId) { + return new TExport::TTxProgress(this, completedTxId); } } // NSchemeShard diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index d2074340a7..77d7f40952 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -824,6 +824,29 @@ bool TSchemeShard::ResolveRtmrChannels(const TPathId domainId, TChannelsBindings return ResolveChannelCommon(profileId, domainId, channelsBinding, &ResolveChannelsDetailsAsIs); } +bool TSchemeShard::ResolveSolomonChannels(const NKikimrSchemeOp::TKeyValueStorageConfig &config, const TPathId domainId, TChannelsBindings& channelsBinding) const +{ + TSubDomainInfo::TPtr domainInfo = SubDomains.at(domainId); + auto& storagePools = domainInfo->EffectiveStoragePools(); + + if (!storagePools) { + // no storage pool no binding it's Ok + channelsBinding.clear(); + return false; + } + + auto getPoolKind = [&] (ui32 channel) { + return TStringBuf(config.GetChannel(channel).GetPreferredPoolKind()); + }; + + return ResolvePoolNames( + config.ChannelSize(), + getPoolKind, + storagePools, + channelsBinding + ); +} + bool TSchemeShard::ResolveSolomonChannels(ui32 profileId, const TPathId domainId, TChannelsBindings &channelsBinding) const { return ResolveChannelCommon(profileId, domainId, channelsBinding, &ResolveChannelsDetailsAsIs); @@ -5847,13 +5870,23 @@ void TSchemeShard::Handle(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev, "Message:\n" << ev->Get()->Record.ShortDebugString()); const auto txId = TTxId(ev->Get()->Record.GetTxId()); + bool executed = false; + + if (TxIdToExport.contains(txId) || TxIdToDependentExport.contains(txId)) { + Execute(CreateTxProgressExport(txId), ctx); + executed = true; + } + if (TxIdToImport.contains(txId)) { + Execute(CreateTxProgressImport(txId), ctx); + executed = true; + } + if (TxIdToIndexBuilds.contains(txId)) { + Execute(CreateTxReply(txId), ctx); + executed = true; + } - if (TxIdToExport.contains(txId)) { - return Execute(CreateTxProgressExport(ev), ctx); - } else if (TxIdToImport.contains(txId)) { - return Execute(CreateTxProgressImport(ev), ctx); - } else if (TxIdToIndexBuilds.contains(txId)) { - return Execute(CreateTxReply(ev), ctx); + if (executed) { + return; } LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index 5299fcb42b..a64beea609 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -438,6 +438,7 @@ public: bool ResolveTabletChannels(ui32 profileId, const TPathId domainId, TChannelsBindings& channelsBinding) const; bool ResolveRtmrChannels(const TPathId domainId, TChannelsBindings& channelsBinding) const; bool ResolveSolomonChannels(ui32 profileId, const TPathId domainId, TChannelsBindings& channelsBinding) const; + bool ResolveSolomonChannels(const NKikimrSchemeOp::TKeyValueStorageConfig &config, const TPathId domainId, TChannelsBindings& channelsBinding) const; bool ResolvePqChannels(ui32 profileId, const TPathId domainId, TChannelsBindings& channelsBinding) const; bool ResolveChannelsByPoolKinds( const TVector<TStringBuf>& channelPoolKinds, @@ -1019,6 +1020,7 @@ public: THashMap<ui64, TExportInfo::TPtr> Exports; THashMap<TString, TExportInfo::TPtr> ExportsByUid; THashMap<TTxId, std::pair<ui64, ui32>> TxIdToExport; + THashMap<TTxId, THashSet<ui64>> TxIdToDependentExport; void FromXxportInfo(NKikimrExport::TExport& exprt, const TExportInfo::TPtr exportInfo); @@ -1049,7 +1051,7 @@ public: NTabletFlatExecutor::ITransaction* CreateTxProgressExport(ui64 id); NTabletFlatExecutor::ITransaction* CreateTxProgressExport(TEvTxAllocatorClient::TEvAllocateResult::TPtr& ev); NTabletFlatExecutor::ITransaction* CreateTxProgressExport(TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr& ev); - NTabletFlatExecutor::ITransaction* CreateTxProgressExport(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev); + NTabletFlatExecutor::ITransaction* CreateTxProgressExport(TTxId completedTxId); void Handle(TEvExport::TEvCreateExportRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvExport::TEvGetExportRequest::TPtr& ev, const TActorContext& ctx); @@ -1098,7 +1100,7 @@ public: NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TEvTxAllocatorClient::TEvAllocateResult::TPtr& ev); NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr& ev); NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TEvIndexBuilder::TEvCreateResponse::TPtr& ev); - NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev); + NTabletFlatExecutor::ITransaction* CreateTxProgressImport(TTxId completedTxId); void Handle(TEvImport::TEvCreateImportRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvImport::TEvGetImportRequest::TPtr& ev, const TActorContext& ctx); @@ -1174,7 +1176,7 @@ public: NTabletFlatExecutor::ITransaction* CreateTxProgress(TIndexBuildId id); NTabletFlatExecutor::ITransaction* CreateTxReply(TEvTxAllocatorClient::TEvAllocateResult::TPtr& allocateResult); NTabletFlatExecutor::ITransaction* CreateTxReply(TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr& modifyResult); - NTabletFlatExecutor::ITransaction* CreateTxReply(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& modifyResult); + NTabletFlatExecutor::ITransaction* CreateTxReply(TTxId completedTxId); NTabletFlatExecutor::ITransaction* CreateTxReply(TEvDataShard::TEvBuildIndexProgressResponse::TPtr& progress); NTabletFlatExecutor::ITransaction* CreatePipeRetry(TIndexBuildId indexBuildId, TTabletId tabletId); NTabletFlatExecutor::ITransaction* CreateTxBilling(TEvPrivate::TEvIndexBuildingMakeABill::TPtr& ev); diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index a3a879f1d3..b47201c311 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -224,7 +224,7 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase TEvTxAllocatorClient::TEvAllocateResult::TPtr AllocateResult = nullptr; TEvSchemeShard::TEvModifySchemeTransactionResult::TPtr ModifyResult = nullptr; TEvIndexBuilder::TEvCreateResponse::TPtr CreateIndexResult = nullptr; - TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr NotifyResult = nullptr; + TTxId CompletedTxId = InvalidTxId; explicit TTxProgress(TSelf* self, ui64 id, const TMaybe<ui32>& itemIdx) : TXxport::TTxBase(self) @@ -257,9 +257,9 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase { } - explicit TTxProgress(TSelf* self, TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev) + explicit TTxProgress(TSelf* self, TTxId completedTxId) : TXxport::TTxBase(self) - , NotifyResult(ev) + , CompletedTxId(completedTxId) { } @@ -278,7 +278,7 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase OnModifyResult(txc, ctx); } else if (CreateIndexResult) { OnCreateIndexResult(txc, ctx); - } else if (NotifyResult) { + } else if (CompletedTxId) { OnNotifyResult(txc, ctx); } else { Resume(txc, ctx); @@ -908,13 +908,11 @@ private: } void OnNotifyResult(TTransactionContext& txc, const TActorContext&) { - Y_VERIFY(NotifyResult); - const auto& record = NotifyResult->Get()->Record; - + Y_VERIFY(CompletedTxId); LOG_D("TImport::TTxProgress: OnNotifyResult" - << ": txId# " << record.GetTxId()); + << ": txId# " << CompletedTxId); - const auto txId = TTxId(record.GetTxId()); + const auto txId = CompletedTxId; if (!Self->TxIdToImport.contains(txId)) { LOG_E("TImport::TTxProgress: OnNotifyResult received unknown txId" << ": txId# " << txId); @@ -1018,8 +1016,8 @@ ITransaction* TSchemeShard::CreateTxProgressImport(TEvIndexBuilder::TEvCreateRes return new TImport::TTxProgress(this, ev); } -ITransaction* TSchemeShard::CreateTxProgressImport(TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev) { - return new TImport::TTxProgress(this, ev); +ITransaction* TSchemeShard::CreateTxProgressImport(TTxId completedTxId) { + return new TImport::TTxProgress(this, completedTxId); } } // NSchemeShard diff --git a/ydb/core/tx/schemeshard/ut_export.cpp b/ydb/core/tx/schemeshard/ut_export.cpp index 907e564aef..4c17ba3d2a 100644 --- a/ydb/core/tx/schemeshard/ut_export.cpp +++ b/ydb/core/tx/schemeshard/ut_export.cpp @@ -1071,7 +1071,173 @@ partitioning_settings { TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::SUCCESS); } - Y_UNIT_TEST(ShouldCheckQuotas) { + Y_UNIT_TEST(ShouldSucceedOnConcurrentExport) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + TVector<THolder<IEventHandle>> copyTables; + auto origObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) { + if (ev->GetTypeRewrite() == TEvSchemeShard::EvModifySchemeTransaction) { + const auto& record = ev->Get<TEvSchemeShard::TEvModifySchemeTransaction>()->Record; + if (record.GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateConsistentCopyTables) { + copyTables.emplace_back(ev.Release()); + return TTestActorRuntime::EEventAction::DROP; + } + } + return TTestActorRuntime::EEventAction::PROCESS; + }); + auto waitCopyTables = [&runtime, ©Tables](ui32 size) { + if (copyTables.size() != size) { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([©Tables, size](IEventHandle&) -> bool { + return copyTables.size() == size; + }); + runtime.DispatchEvents(opts); + } + }; + + TVector<ui64> exportIds; + for (ui32 i = 1; i <= 3; ++i) { + exportIds.push_back(++txId); + TestExport(runtime, exportIds[i - 1], "/MyRoot", Sprintf(R"( + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Table" + destination_prefix: "Table%u" + } + } + )", port, i)); + waitCopyTables(i); + } + + runtime.SetObserverFunc(origObserver); + for (auto& ev : copyTables) { + runtime.Send(ev.Release(), 0, true); + } + + for (ui64 exportId : exportIds) { + env.TestWaitNotification(runtime, exportId); + TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::SUCCESS); + } + } + + Y_UNIT_TEST(ShouldSucceedOnConcurrentImport) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + // prepare backup data + TestExport(runtime, ++txId, "/MyRoot", Sprintf(R"( + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Table" + destination_prefix: "Backup1" + } + } + )", port)); + env.TestWaitNotification(runtime, txId); + TestGetExport(runtime, txId, "/MyRoot"); + + TVector<THolder<IEventHandle>> delayed; + auto origObserver = runtime.SetObserverFunc([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& ev) { + if (ev->GetTypeRewrite() == TEvSchemeShard::EvModifySchemeTransaction) { + const auto& record = ev->Get<TEvSchemeShard::TEvModifySchemeTransaction>()->Record; + const auto opType = record.GetTransaction(0).GetOperationType(); + switch (opType) { + case NKikimrSchemeOp::ESchemeOpRestore: + case NKikimrSchemeOp::ESchemeOpCreateConsistentCopyTables: + delayed.emplace_back(ev.Release()); + return TTestActorRuntime::EEventAction::DROP; + default: + break; + } + } + return TTestActorRuntime::EEventAction::PROCESS; + }); + + auto waitForDelayed = [&runtime, &delayed](ui32 size) { + if (delayed.size() != size) { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&delayed, size](IEventHandle&) -> bool { + return delayed.size() == size; + }); + runtime.DispatchEvents(opts); + } + }; + + const auto importId = ++txId; + TestImport(runtime, importId, "/MyRoot", Sprintf(R"( + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_prefix: "Backup1" + destination_path: "/MyRoot/Restored" + } + } + )", port)); + // wait for restore op + waitForDelayed(1); + + const auto exportId = ++txId; + TestExport(runtime, exportId, "/MyRoot", Sprintf(R"( + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Restored" + destination_prefix: "Backup2" + } + } + )", port)); + // wait for copy table op + waitForDelayed(2); + + runtime.SetObserverFunc(origObserver); + for (auto& ev : delayed) { + runtime.Send(ev.Release(), 0, true); + } + + env.TestWaitNotification(runtime, importId); + TestGetImport(runtime, importId, "/MyRoot"); + env.TestWaitNotification(runtime, exportId); + TestGetExport(runtime, exportId, "/MyRoot"); + } + + void ShouldCheckQuotas(const TSchemeLimits& limits, Ydb::StatusIds::StatusCode expectedFailStatus) { TPortManager portManager; const ui16 port = portManager.GetPort(); @@ -1082,9 +1248,7 @@ partitioning_settings { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().SystemBackupSIDs({userSID})); - TSchemeLimits lowLimits; - lowLimits.MaxExports = 0; - SetSchemeshardSchemaLimits(runtime, lowLimits); + SetSchemeshardSchemaLimits(runtime, limits); const TVector<TString> tables = { R"( @@ -1105,7 +1269,12 @@ partitioning_settings { } )", port); - Run(runtime, env, tables, request, Ydb::StatusIds::PRECONDITION_FAILED); + Run(runtime, env, tables, request, expectedFailStatus); Run(runtime, env, tables, request, Ydb::StatusIds::SUCCESS, "/MyRoot", false, userSID); } + + Y_UNIT_TEST(ShouldCheckQuotas) { + ShouldCheckQuotas(TSchemeLimits{.MaxExports = 0}, Ydb::StatusIds::PRECONDITION_FAILED); + ShouldCheckQuotas(TSchemeLimits{.MaxChildrenInDir = 1}, Ydb::StatusIds::CANCELLED); + } } diff --git a/ydb/core/tx/schemeshard/ut_olap.cpp b/ydb/core/tx/schemeshard/ut_olap.cpp index bf2d24cf5e..84a972baaa 100644 --- a/ydb/core/tx/schemeshard/ut_olap.cpp +++ b/ydb/core/tx/schemeshard/ut_olap.cpp @@ -211,6 +211,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema = R"( Name: "ColumnTable" + ColumnShardCount: 1 )"; TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", tableSchema); @@ -223,6 +224,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Missing column from schema preset TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"( Name: "ColumnTableMissingDataColumn" + ColumnShardCount: 1 Schema { Columns { Name: "timestamp" Type: "Timestamp" } KeyColumnNames: "timestamp" @@ -233,6 +235,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Extra column not in schema preset TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"( Name: "ColumnTableExtraColumn" + ColumnShardCount: 1 Schema { Columns { Name: "timestamp" Type: "Timestamp" } Columns { Name: "data" Type: "Utf8" } @@ -245,6 +248,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Different column order TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"( Name: "ColumnTableDifferentColumnOrder" + ColumnShardCount: 1 Schema { Columns { Name: "data" Type: "Utf8" } Columns { Name: "timestamp" Type: "Timestamp" } @@ -256,6 +260,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Extra key column TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"( Name: "ColumnTableExtraKeyColumn" + ColumnShardCount: 1 Schema { Columns { Name: "timestamp" Type: "Timestamp" } Columns { Name: "data" Type: "Utf8" } @@ -268,6 +273,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Unknown key column TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"( Name: "ColumnTableUnknownKeyColumn" + ColumnShardCount: 1 Schema { Columns { Name: "timestamp" Type: "Timestamp" } Columns { Name: "data" Type: "Utf8" } @@ -279,6 +285,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Different data column type TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"( Name: "ColumnTableDataColumnType" + ColumnShardCount: 1 Schema { Columns { Name: "timestamp" Type: "Timestamp" } Columns { Name: "data" Type: "String" } @@ -290,6 +297,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Repeating preset schema should succeed TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"( Name: "ColumnTableExplicitSchema" + ColumnShardCount: 1 Schema { Columns { Name: "timestamp" Type: "Timestamp" } Columns { Name: "data" Type: "Utf8" } @@ -302,6 +310,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Creating table with directories should succeed TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore", R"( Name: "DirA/DirB/NestedTable" + ColumnShardCount: 1 )"); env.TestWaitNotification(runtime, txId); @@ -312,6 +321,7 @@ Y_UNIT_TEST_SUITE(TOlap) { // Additional storage tier in schema TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", R"( Name: "TableWithTiers" + ColumnShardCount: 1 Schema { Columns { Name: "timestamp" Type: "Timestamp" } Columns { Name: "data" Type: "Utf8" } @@ -333,6 +343,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore", R"( Name: "ColumnTable" + ColumnShardCount: 1 SchemaPresetName: "default" )"); env.TestWaitNotification(runtime, txId); @@ -362,6 +373,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema = R"( Name: "ColumnTable" + ColumnShardCount: 1 )"; TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore/MyDir", tableSchema); @@ -452,6 +464,67 @@ Y_UNIT_TEST_SUITE(TOlap) { TestLsPathId(runtime, 4, NLs::PathStringEqual("")); } + Y_UNIT_TEST(CreateDropStandaloneTableDefaultSharding) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestMkDir(runtime, ++txId, "/MyRoot", "MyDir"); + env.TestWaitNotification(runtime, txId); + + TestLs(runtime, "/MyRoot/MyDir", false, NLs::PathExist); + + TestCreateColumnTable(runtime, ++txId, "/MyRoot/MyDir", defaultTableSchema); + env.TestWaitNotification(runtime, txId); + + TestLsPathId(runtime, 3, NLs::PathStringEqual("/MyRoot/MyDir/ColumnTable")); + + TestDropColumnTable(runtime, ++txId, "/MyRoot/MyDir", "ColumnTable"); + env.TestWaitNotification(runtime, txId); + + TestLs(runtime, "/MyRoot/MyDir/ColumnTable", false, NLs::PathNotExist); + TestLsPathId(runtime, 3, NLs::PathStringEqual("")); + + TString otherSchema = R"( + Name: "ColumnTable" + Schema { + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } + Columns { Name: "some" Type: "Uint64" NotNull: true } + Columns { Name: "data" Type: "Utf8" NotNull: true } + KeyColumnNames: "some" + KeyColumnNames: "data" + } + )"; + + TestCreateColumnTable(runtime, ++txId, "/MyRoot/MyDir", otherSchema); + env.TestWaitNotification(runtime, txId); + + auto checkFn = [&](const NKikimrScheme::TEvDescribeSchemeResult& record) { + UNIT_ASSERT_VALUES_EQUAL(record.GetPath(), "/MyRoot/MyDir/ColumnTable"); + + auto& description = record.GetPathDescription().GetColumnTableDescription(); + UNIT_ASSERT_VALUES_EQUAL(description.GetColumnShardCount(), 64); + + auto& sharding = description.GetSharding(); + UNIT_ASSERT_VALUES_EQUAL(sharding.ColumnShardsSize(), 64); + UNIT_ASSERT(sharding.HasHashSharding()); + auto& hashSharding = sharding.GetHashSharding(); + UNIT_ASSERT_VALUES_EQUAL(hashSharding.ColumnsSize(), 2); + UNIT_ASSERT_EQUAL(hashSharding.GetFunction(), + NKikimrSchemeOp::TColumnTableSharding::THashSharding::HASH_FUNCTION_MODULO_N); + UNIT_ASSERT_VALUES_EQUAL(hashSharding.GetColumns()[0], "some"); + UNIT_ASSERT_VALUES_EQUAL(hashSharding.GetColumns()[1], "data"); + }; + + TestLsPathId(runtime, 4, checkFn); + + TestDropColumnTable(runtime, ++txId, "/MyRoot/MyDir", "ColumnTable"); + env.TestWaitNotification(runtime, txId); + + TestLs(runtime, "/MyRoot/MyDir/ColumnTable", false, NLs::PathNotExist); + TestLsPathId(runtime, 4, NLs::PathStringEqual("")); + } + Y_UNIT_TEST(CreateTableTtl) { TTestBasicRuntime runtime; TTestEnv env(runtime); @@ -462,6 +535,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema1 = R"( Name: "Table1" + ColumnShardCount: 1 TtlSettings { Enabled { ColumnName: "timestamp" ExpireAfterSeconds: 300 } } @@ -478,6 +552,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema2 = R"( Name: "Table2" + ColumnShardCount: 1 TtlSettings { Disabled {} } @@ -494,6 +569,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema3 = R"( Name: "Table3" + ColumnShardCount: 1 TtlSettings { UseTiering : "Tiering1" } @@ -510,6 +586,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema4 = R"( Name: "Table4" + ColumnShardCount: 1 TtlSettings { UseTiering : "Tiering1" } @@ -531,6 +608,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchemaX = R"( Name: "ColumnTable" + ColumnShardCount: 1 TtlSettings { Enabled { ExpireAfterSeconds: 300 @@ -543,6 +621,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema = R"( Name: "ColumnTable" + ColumnShardCount: 1 TtlSettings { Enabled { ColumnName: "timestamp" @@ -595,6 +674,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema = R"( Name: "ColumnTable" + ColumnShardCount: 1 TtlSettings { Enabled { ColumnName: "timestamp" @@ -678,6 +758,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TString tableSchema = R"( Name: "ColumnTable" + ColumnShardCount: 1 )"; TestCreateColumnTable(runtime, ++txId, "/MyRoot/OlapStore", tableSchema); diff --git a/ydb/core/tx/schemeshard/ut_olap_reboots.cpp b/ydb/core/tx/schemeshard/ut_olap_reboots.cpp index 64bad70f3d..47ee8880c5 100644 --- a/ydb/core/tx/schemeshard/ut_olap_reboots.cpp +++ b/ydb/core/tx/schemeshard/ut_olap_reboots.cpp @@ -66,6 +66,7 @@ Y_UNIT_TEST_SUITE(TOlapReboots) { TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"( Name: "ColumnTable" + ColumnShardCount: 1 )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); @@ -111,6 +112,7 @@ Y_UNIT_TEST_SUITE(TOlapReboots) { TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"( Name: "ColumnTable" + ColumnShardCount: 1 )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); @@ -163,11 +165,13 @@ Y_UNIT_TEST_SUITE(TOlapReboots) { t.TestEnv->ReliablePropose(runtime, CreateColumnTableRequest(t.TxId += 2, "/MyRoot/OlapStore", R"( Name: "ColumnTable1" + ColumnShardCount: 1 )"), {NKikimrScheme::StatusAccepted, NKikimrScheme::StatusAlreadyExists, NKikimrScheme::StatusMultipleModifications}); t.TestEnv->ReliablePropose(runtime, CreateColumnTableRequest(t.TxId - 1, "/MyRoot/OlapStore", R"( Name: "ColumnTable2" + ColumnShardCount: 1 )"), {NKikimrScheme::StatusAccepted, NKikimrScheme::StatusAlreadyExists, NKikimrScheme::StatusMultipleModifications}); t.TestEnv->TestWaitNotification(runtime, {t.TxId - 1, t.TxId}); @@ -221,11 +225,13 @@ Y_UNIT_TEST_SUITE(TOlapReboots) { TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"( Name: "ColumnTable1" + ColumnShardCount: 1 )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"( Name: "ColumnTable2" + ColumnShardCount: 1 )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); } @@ -319,6 +325,7 @@ Y_UNIT_TEST_SUITE(TOlapReboots) { TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"( Name: "ColumnTable" + ColumnShardCount: 1 )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); } @@ -354,6 +361,7 @@ Y_UNIT_TEST_SUITE(TOlapReboots) { TestCreateColumnTable(runtime, ++t.TxId, "/MyRoot/OlapStore", R"( Name: "ColumnTable" + ColumnShardCount: 1 SchemaPresetName: "default" TtlSettings { Enabled { diff --git a/ydb/core/tx/tiering/manager.cpp b/ydb/core/tx/tiering/manager.cpp index 8c79c96693..0632df554a 100644 --- a/ydb/core/tx/tiering/manager.cpp +++ b/ydb/core/tx/tiering/manager.cpp @@ -162,6 +162,10 @@ void TTiersManager::TakeConfigs(NMetadata::NFetcher::ISnapshot::TPtr snapshotExt auto& manager = Managers.emplace(i.second.GetTierName(), std::move(localManager)).first->second; manager.Start(Secrets); } + + if (ShardCallback && TlsActivationContext) { + ShardCallback(TActivationContext::AsActorContext()); + } } TActorId TTiersManager::GetStorageActorId(const TString& tierId) { diff --git a/ydb/core/tx/tiering/manager.h b/ydb/core/tx/tiering/manager.h index 258058fe27..1c6662e870 100644 --- a/ydb/core/tx/tiering/manager.h +++ b/ydb/core/tx/tiering/manager.h @@ -1,6 +1,8 @@ #pragma once #include "external_data.h" +#include <functional> + #include <library/cpp/actors/core/actor_bootstrapped.h> #include <library/cpp/actors/core/actor.h> @@ -38,6 +40,7 @@ private: using TManagers = std::unordered_map<TString, NTiers::TManager>; ui64 TabletId = 0; const TActorId TabletActorId; + std::function<void(const TActorContext& ctx)> ShardCallback; TActor* Actor = nullptr; std::unordered_map<ui64, TString> PathIdTiering; YDB_READONLY_DEF(TManagers, Managers); @@ -47,9 +50,11 @@ private: mutable NMetadata::NFetcher::ISnapshotsFetcher::TPtr ExternalDataManipulation; public: - TTiersManager(const ui64 tabletId, const TActorId& tabletActorId) + TTiersManager(const ui64 tabletId, const TActorId& tabletActorId, + std::function<void(const TActorContext& ctx)> shardCallback = {}) : TabletId(tabletId) , TabletActorId(tabletActorId) + , ShardCallback(shardCallback) { } TActorId GetActorId() const; diff --git a/ydb/core/tx/tiering/s3_actor.cpp b/ydb/core/tx/tiering/s3_actor.cpp index 080be0cb6f..13913254fe 100644 --- a/ydb/core/tx/tiering/s3_actor.cpp +++ b/ydb/core/tx/tiering/s3_actor.cpp @@ -35,9 +35,9 @@ public: return Event->Blobs; } - TUnifiedBlobId AddExported(const TUnifiedBlobId& srcBlob, const ui64 pathId) { - Event->SrcToDstBlobs[srcBlob] = srcBlob.MakeS3BlobId(pathId); - return Event->SrcToDstBlobs[srcBlob]; + TString GetS3Key(const TUnifiedBlobId& srcBlob) const { + Y_VERIFY(Event->SrcToDstBlobs.contains(srcBlob)); + return Event->SrcToDstBlobs.find(srcBlob)->second.GetS3Key(); } bool ExtractionFinished() const { @@ -52,6 +52,18 @@ public: auto node = KeysToWrite.extract(key); return node.mapped(); } + + void RemoveBlobs(const THashSet<TUnifiedBlobId>& blobIds) { + for (auto& blobId : blobIds) { + Event->Blobs.erase(blobId); + Event->SrcToDstBlobs.erase(blobId); + } + } + + bool IsNotFinished(const TString& key) const { + return KeysToWrite.contains(key); + } + private: std::unordered_map<TString, TUnifiedBlobId> KeysToWrite; }; @@ -125,47 +137,75 @@ public: Exports[exportNo] = TS3Export(ev->Release()); auto& ex = Exports[exportNo]; + THashSet<TUnifiedBlobId> retryes; for (auto& [blobId, blobData] : ex.Blobs()) { - TString key = ex.AddExported(blobId, msg.PathId).GetS3Key(); - Y_VERIFY(!ExportingKeys.count(key)); // TODO: allow reexport? + const TString key = ex.GetS3Key(blobId); + Y_VERIFY(!key.empty()); + + if (ExportingKeys.contains(key)) { + retryes.insert(blobId); + auto strBlobId = blobId.ToStringNew(); + + const auto& prevExport = Exports[ExportingKeys[key]]; + if (prevExport.IsNotFinished(key)) { + LOG_S_INFO("[S3] Retry export blob '" << strBlobId << "' at tablet " << TabletId); + } else { + LOG_S_INFO("[S3] Avoid export retry for blob '" << strBlobId << "' at tablet " << TabletId); + blobData = {}; + } + } else { + ex.RegisterKey(key, blobId); + ExportingKeys[key] = exportNo; + } - ex.RegisterKey(key, blobId); - ExportingKeys[key] = exportNo; + if (!blobData.empty()) { + SendPutObjectIfNotExists(key, std::move(blobData)); + } + } - SendPutObjectIfNotExists(key, std::move(blobData)); + ex.RemoveBlobs(retryes); + if (ex.ExtractionFinished()) { + Exports.erase(exportNo); + LOG_S_DEBUG("[S3] Empty export " << exportNo << " at tablet " << TabletId); } } void Handle(TEvPrivate::TEvForget::TPtr& ev) { - // It's possible to get several forgets for the same blob (remove + cleanup) - for (auto& evict : ev->Get()->Evicted) { - if (evict.ExternBlob.IsS3Blob()) { - const TString& key = evict.ExternBlob.GetS3Key(); - if (ForgettingKeys.count(key)) { - LOG_S_NOTICE("[S3] Ignore forget '" << evict.Blob.ToStringNew() << "' at tablet " << TabletId); - return; // TODO: return an error? - } - } - } - ui64 forgetNo = ++ForgetNo; - Forgets[forgetNo] = TS3Forget(ev->Release()); auto& forget = Forgets[forgetNo]; - for (auto& evict : forget.Event->Evicted) { + auto& eventEvicted = forget.Event->Evicted; + Y_VERIFY(!eventEvicted.empty()); + + std::vector<NOlap::TEvictedBlob> newEvicted; + newEvicted.reserve(eventEvicted.size()); + + for (auto&& evict : forget.Event->Evicted) { if (!evict.ExternBlob.IsS3Blob()) { LOG_S_ERROR("[S3] Forget not exported '" << evict.Blob.ToStringNew() << "' at tablet " << TabletId); continue; } - const TString& key = evict.ExternBlob.GetS3Key(); - Y_VERIFY(!ForgettingKeys.count(key)); + const TString key = evict.ExternBlob.GetS3Key(); + + if (ForgettingKeys.contains(key)) { + auto strBlobId = evict.Blob.ToStringNew(); + LOG_S_INFO("[S3] Retry forget blob '" << strBlobId << "' at tablet " << TabletId); + } else { + newEvicted.emplace_back(std::move(evict)); + forget.KeysToDelete.emplace(key); + ForgettingKeys[key] = forgetNo; + } - forget.KeysToDelete.emplace(key); - ForgettingKeys[key] = forgetNo; SendDeleteObject(key); } + + eventEvicted.swap(newEvicted); + if (eventEvicted.empty()) { + Forgets.erase(forgetNo); + LOG_S_DEBUG("[S3] Empty forget " << forgetNo << " at tablet " << TabletId); + } } void Handle(TEvPrivate::TEvGetExported::TPtr& ev) { @@ -270,7 +310,7 @@ public: LOG_S_DEBUG("[S3] DeleteObjectResponse '" << key << "' at tablet " << TabletId); if (!ForgettingKeys.count(key)) { - LOG_S_DEBUG("[S3] DeleteObjectResponse for unknown key '" << key << "' at tablet " << TabletId); + LOG_S_INFO("[S3] DeleteObjectResponse for unknown key '" << key << "' at tablet " << TabletId); return; } @@ -278,7 +318,7 @@ public: ForgettingKeys.erase(key); if (!Forgets.count(forgetNo)) { - LOG_S_DEBUG("[S3] DeleteObjectResponse for unknown forget with key '" << key << "' at tablet " << TabletId); + LOG_S_INFO("[S3] DeleteObjectResponse for unknown forget with key '" << key << "' at tablet " << TabletId); return; } @@ -355,28 +395,30 @@ public: } void KeyFinished(const TString& key, const bool hasError, const TString& errStr) { - ui64 exportNo = 0; - { - auto itExportKey = ExportingKeys.find(key); - if (itExportKey == ExportingKeys.end()) { - LOG_S_DEBUG("[S3] KeyFinished for unknown key '" << key << "' at tablet " << TabletId); - return; - } - exportNo = itExportKey->second; - ExportingKeys.erase(itExportKey); + auto itExportKey = ExportingKeys.find(key); + if (itExportKey == ExportingKeys.end()) { + LOG_S_INFO("[S3] KeyFinished for unknown key '" << key << "' at tablet " << TabletId); + return; } + ui64 exportNo = itExportKey->second; + auto it = Exports.find(exportNo); if (it == Exports.end()) { - LOG_S_DEBUG("[S3] KeyFinished for unknown export with key '" << key << "' at tablet " << TabletId); + LOG_S_INFO("[S3] KeyFinished for unknown export with key '" << key << "' at tablet " << TabletId); return; } + LOG_S_DEBUG("[S3] KeyFinished for key '" << key << "' at tablet " << TabletId); auto& ex = it->second; TUnifiedBlobId blobId = ex.FinishKey(key); ex.Event->AddResult(blobId, key, hasError, errStr); if (ex.ExtractionFinished()) { + for (auto& [blobId, _] : ex.Blobs()) { + ExportingKeys.erase(ex.GetS3Key(blobId)); + } + Y_VERIFY(ex.Event->Finished()); Send(ShardActor, ex.Event.release()); Exports.erase(exportNo); diff --git a/ydb/core/viewer/json_cluster.h b/ydb/core/viewer/json_cluster.h index e85ed63ba1..c4732c6c60 100644 --- a/ydb/core/viewer/json_cluster.h +++ b/ydb/core/viewer/json_cluster.h @@ -6,6 +6,7 @@ #include <ydb/core/tx/schemeshard/schemeshard.h> #include <ydb/core/tx/tx_proxy/proxy.h> #include <ydb/core/viewer/json/json.h> +#include "json_pipe_req.h" #include "viewer.h" namespace NKikimr { @@ -15,13 +16,10 @@ using namespace NActors; using namespace NNodeWhiteboard; using ::google::protobuf::FieldDescriptor; -class TJsonCluster : public TActorBootstrapped<TJsonCluster> { +class TJsonCluster : public TViewerPipeClient<TJsonCluster> { using TThis = TJsonCluster; - using TBase = TActorBootstrapped<TJsonCluster>; + using TBase = TViewerPipeClient<TJsonCluster>; IViewer* Viewer; - TActorId Initiator; - ui32 Requested; - ui32 Received; NMon::TEvHttpInfo::TPtr Event; THolder<TEvInterconnect::TEvNodesInfo> NodesInfo; TMap<TNodeId, NKikimrWhiteboard::TEvSystemStateResponse> SystemInfo; @@ -33,6 +31,7 @@ class TJsonCluster : public TActorBootstrapped<TJsonCluster> { TSet<TNodeId> NodesAlive; TJsonSettings JsonSettings; ui32 Timeout; + ui32 TenantsNumber; bool Tablets = false; public: @@ -42,58 +41,33 @@ public: TJsonCluster(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) : Viewer(viewer) - , Initiator(ev->Sender) - , Requested(0) - , Received(0) , Event(ev) { const auto& params(Event->Get()->Request.GetParams()); JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true); JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); + InitConfig(params); Tablets = FromStringWithDefault<bool>(params.Get("tablets"), false); Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); } - void Bootstrap(const TActorContext& ctx) { - const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - TBase::Become(&TThis::StateRequestedBrowse); - ctx.Schedule(TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); + void Bootstrap(const TActorContext& ) { + SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); + RequestConsoleListTenants(); + Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); } - void Die(const TActorContext& ctx) override { + void PassAway() override { if (NodesInfo != nullptr) { for (const auto& ni : NodesInfo->Nodes) { - ctx.Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe()); + Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe); } } - TBase::Die(ctx); + TBase::PassAway(); } - void SendRequest(ui32 nodeId, const TActorContext& ctx) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - } - - void SendTabletStateRequest(ui32 nodeId, const TActorContext& ctx, THashSet<TTabletId>& filterTablets) { - auto request = new TEvWhiteboard::TEvTabletStateRequest(); - for (TTabletId id: filterTablets) { - request->Record.AddFilterTabletId(id); - } - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - ctx.Send(whiteboardServiceId, request, IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - } - - void SendTabletStateRequest(const TActorContext& ctx) { - TIntrusivePtr<TDomainsInfo> domains = AppData(ctx)->DomainsInfo; + void SendWhiteboardTabletStateRequest() { + TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo; TIntrusivePtr<TDomainsInfo::TDomain> domain = domains->Domains.begin()->second; THashSet<TTabletId> filterTablets; for (TTabletId id : domain->Coordinators) { @@ -124,18 +98,36 @@ public: TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; for (const auto& ni : NodesInfo->Nodes) { if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - SendTabletStateRequest(ni.NodeId, ctx, filterTablets); + TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId); + auto request = new TEvWhiteboard::TEvTabletStateRequest(); + for (TTabletId id: filterTablets) { + request->Record.AddFilterTabletId(id); + } + SendRequest(whiteboardServiceId, request, IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); } } } - void HandleBrowse(TEvInterconnect::TEvNodesInfo::TPtr& ev, const TActorContext& ctx) { + void SendWhiteboardRequests() { + for (const auto& ni : NodesInfo->Nodes) { + TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId); + SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); + SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); + SendRequest(whiteboardServiceId,new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); + SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); + } + if (Tablets) { + SendWhiteboardTabletStateRequest(); + } + } + + void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { if (Tablets) { THolder<TEvTxUserProxy::TEvNavigate> request = MakeHolder<TEvTxUserProxy::TEvNavigate>(); if (!Event->Get()->UserToken.empty()) { request->Record.SetUserToken(Event->Get()->UserToken); } - TIntrusivePtr<TDomainsInfo> domains = AppData(ctx)->DomainsInfo; + TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo; TIntrusivePtr<TDomainsInfo::TDomain> domain = domains->Domains.begin()->second; TString domainPath = "/" + domain->Name; NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath(); @@ -143,152 +135,136 @@ public: record->MutableOptions()->SetReturnPartitioningInfo(false); record->MutableOptions()->SetReturnPartitionConfig(false); record->MutableOptions()->SetReturnChildren(false); - TActorId txproxy = MakeTxProxyID(); - ctx.Send(txproxy, request.Release()); - ++Requested; + SendRequest(MakeTxProxyID(), request.Release()); } NodesInfo = ev->Release(); - for (const auto& ni : NodesInfo->Nodes) { - SendRequest(ni.NodeId, ctx); - } - if (Requested > 0) { - TBase::Become(&TThis::StateRequestedNodeInfo); - } else { - ReplyAndDie(ctx); - } + RequestDone(); } - void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { + void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { ui32 nodeId = ev.Get()->Cookie; switch (ev->Get()->SourceType) { case TEvWhiteboard::EvSystemStateRequest: if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } break; case TEvWhiteboard::EvVDiskStateRequest: if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } break; case TEvWhiteboard::EvPDiskStateRequest: if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } break; case TEvWhiteboard::EvBSGroupStateRequest: if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } break; case TEvWhiteboard::EvTabletStateRequest: if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } break; } } - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) { + void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { ui32 nodeId = ev->Get()->NodeId; if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } - if (Tablets) { + TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; + if (Tablets && nodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(ctx); + RequestDone(); } } } - void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev, const TActorContext& ctx) { + void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; SystemInfo[nodeId] = std::move(ev->Get()->Record); NodesAlive.insert(nodeId); - RequestDone(ctx); + RequestDone(); } - void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev, const TActorContext& ctx) { + void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; VDiskInfo[nodeId] = std::move(ev->Get()->Record); NodesAlive.insert(nodeId); - RequestDone(ctx); + RequestDone(); } - void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev, const TActorContext& ctx) { + void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; PDiskInfo[nodeId] = std::move(ev->Get()->Record); NodesAlive.insert(nodeId); - RequestDone(ctx); + RequestDone(); } - void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev, const TActorContext& ctx) { + void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; BSGroupInfo[nodeId] = std::move(ev->Get()->Record); NodesAlive.insert(nodeId); - RequestDone(ctx); + RequestDone(); } - void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev, const TActorContext& ctx) { + void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; TabletInfo[nodeId] = std::move(ev->Get()->Record); NodesAlive.insert(nodeId); - RequestDone(ctx); + RequestDone(); } - void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext &ctx) { - if (ev->Get()->GetRecord().GetStatus() == NKikimrScheme::StatusSuccess) { - DescribeResult = ev->Release(); - - if (Tablets) { - SendTabletStateRequest(ctx); - } - } - RequestDone(ctx); + void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { + Ydb::Cms::ListDatabasesResult listTenantsResult; + ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); + TenantsNumber = listTenantsResult.paths().size(); + RequestDone(); } - void RequestDone(const TActorContext& ctx) { - ++Received; - if (Received == Requested) { - ReplyAndDie(ctx); + void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { + if (ev->Get()->GetRecord().GetStatus() == NKikimrScheme::StatusSuccess) { + DescribeResult = ev->Release(); + SendWhiteboardRequests(); } + RequestDone(); } - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { if (ev->Get()->Status != NKikimrProto::OK) { - RequestDone(ctx); + RequestDone(); } } - STFUNC(StateRequestedBrowse) { + STATEFN(StateRequested) { switch (ev->GetTypeRewrite()) { - HFunc(TEvInterconnect::TEvNodesInfo, HandleBrowse); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - STFUNC(StateRequestedNodeInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - HFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle); - HFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); - HFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle); - HFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); - HFunc(TEvents::TEvUndelivered, Undelivered); - HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - HFunc(TEvTabletPipe::TEvClientConnected, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + hFunc(TEvInterconnect::TEvNodesInfo, Handle); + hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); + hFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle); + hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); + hFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle); + hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); + hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); + hFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); + hFunc(TEvents::TEvUndelivered, Undelivered); + hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); + hFunc(TEvTabletPipe::TEvClientConnected, Handle); + cFunc(TEvents::TSystem::Wakeup, HandleTimeout); } } @@ -299,7 +275,7 @@ public: TMap<NKikimrBlobStorage::TVDiskID, const NKikimrWhiteboard::TVDiskStateInfo&> VDisksIndex; TMap<std::pair<ui32, ui32>, const NKikimrWhiteboard::TPDiskStateInfo&> PDisksIndex; - void ReplyAndDie(const TActorContext& ctx) { + void ReplyAndPassAway() { TStringStream json; MergeWhiteboardResponses(MergedBSGroupInfo, BSGroupInfo); MergeWhiteboardResponses(MergedVDiskInfo, VDiskInfo); @@ -309,7 +285,7 @@ public: if (Tablets) { MergeWhiteboardResponses(MergedTabletInfo, TabletInfo); - TIntrusivePtr<TDomainsInfo> domains = AppData(ctx)->DomainsInfo; + TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo; TIntrusivePtr<TDomainsInfo::TDomain> domain = domains->Domains.begin()->second; ui32 hiveDomain = domains->GetHiveDomainUid(domain->DefaultHiveUid); ui64 defaultStateStorageGroup = domains->GetDefaultStateStorageGroup(hiveDomain); @@ -396,7 +372,6 @@ public: NKikimrViewer::TClusterInfo pbCluster; if (Tablets) { - std::unordered_set<std::pair<ui64, ui64>> tenants; /// group by tenantid (TDomainKey) for (const NKikimrWhiteboard::TTabletStateInfo& tabletInfo : MergedTabletInfo.GetTabletStateInfo()) { if (tablets.contains(tabletInfo.GetTabletId())) { NKikimrWhiteboard::TTabletStateInfo* tablet = pbCluster.AddSystemTablets(); @@ -405,15 +380,10 @@ public: tablet->SetOverall(tabletFlag); flag = Max(flag, GetViewerFlag(tabletFlag)); } - std::pair<ui64, ui64> tenantId = {0, 0}; - if (tabletInfo.HasTenantId()) { - tenantId = {tabletInfo.GetTenantId().GetSchemeShard(), tabletInfo.GetTenantId().GetPathId()}; - } - tenants.emplace(tenantId); } pbCluster.SetTablets(MergedTabletInfo.TabletStateInfoSize()); - pbCluster.SetTenants(tenants.size()); } + pbCluster.SetTenants(TenantsNumber); pbCluster.SetOverall(flag); if (NodesInfo != nullptr) { @@ -438,12 +408,12 @@ public: pbCluster.SetName(itMax->first); } TProtoToJson::ProtoToJson(json, pbCluster, JsonSettings); - ctx.Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); + Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + PassAway(); } - void HandleTimeout(const TActorContext& ctx) { - ReplyAndDie(ctx); + void HandleTimeout() { + ReplyAndPassAway(); } }; diff --git a/ydb/core/viewer/json_storage.h b/ydb/core/viewer/json_storage.h index adfc6e3458..652fb9afd3 100644 --- a/ydb/core/viewer/json_storage.h +++ b/ydb/core/viewer/json_storage.h @@ -74,6 +74,7 @@ class TJsonStorage : public TViewerPipeClient<TJsonStorage> { bool NeedGroups = true; bool NeedDisks = true; bool NeedDonors = true; + bool NeedAdditionalNodesRequests; enum class EWith { Everything, @@ -109,6 +110,7 @@ public: FilterStoragePools.emplace(filterStoragePool); } SplitIds(params.Get("node_id"), ',', FilterNodeIds); + NeedAdditionalNodesRequests = !FilterNodeIds.empty(); SplitIds(params.Get("group_id"), ',', FilterGroupIds); Sort(FilterGroupIds); NeedGroups = FromStringWithDefault<bool>(params.Get("need_groups"), true); @@ -325,17 +327,19 @@ public: } void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) { + ui64 nodeId = ev.Get()->Cookie; for (const auto& info : ev->Get()->Record.GetBSGroupStateInfo()) { TString storagePoolName = info.GetStoragePoolName(); if (storagePoolName.empty()) { continue; } - StoragePoolInfo[storagePoolName].Groups.emplace(ToString(info.GetGroupID())); + if (FilterNodeIds.empty() || FilterNodeIds.contains(nodeId)) { + StoragePoolInfo[storagePoolName].Groups.emplace(ToString(info.GetGroupID())); + } for (const auto& vDiskNodeId : info.GetVDiskNodeIds()) { Group2NodeId[info.GetGroupID()].push_back(vDiskNodeId); } } - ui64 nodeId = ev.Get()->Cookie; BSGroupInfo[nodeId] = std::move(ev->Get()->Record); RequestDone(); } @@ -476,7 +480,8 @@ public: } void ReplyAndPassAway() { - if (!FilterNodeIds.empty()) { + if (NeedAdditionalNodesRequests) { + NeedAdditionalNodesRequests = false; for (const auto& [nodeId, vDiskInfo] : VDiskInfo) { if (FilterNodeIds.count(nodeId) == 0) { continue; @@ -495,8 +500,6 @@ public: } } - FilterNodeIds.clear(); // we don't need it anymore - if (Requests != 0) { return; // retry requests for neighbours of our groups (when BSC wasn't available) } diff --git a/ydb/library/yql/sql/v1/SQLv1.g.in b/ydb/library/yql/sql/v1/SQLv1.g.in index 21ffa80530..824bb231a1 100644 --- a/ydb/library/yql/sql/v1/SQLv1.g.in +++ b/ydb/library/yql/sql/v1/SQLv1.g.in @@ -572,7 +572,7 @@ table_setting_value: | STRING_VALUE | integer | split_boundaries - | expr ON an_id + | expr ON an_id (AS (SECONDS | MILLISECONDS | MICROSECONDS | NANOSECONDS))? ; family_entry: FAMILY an_id family_settings; @@ -978,7 +978,10 @@ keyword_compat: ( | LEFT | LIKE | MATCH + | MICROSECONDS + | MILLISECONDS | NATURAL + | NANOSECONDS | NO | NOTNULL | NULLS @@ -1020,6 +1023,7 @@ keyword_compat: ( | ROW | SAMPLE | SAVEPOINT + | SECONDS | SEMI | SETS | SUBQUERY @@ -1278,7 +1282,10 @@ LIMIT: L I M I T; LIST: L I S T; LOCAL: L O C A L; MATCH: M A T C H; +MICROSECONDS: M I C R O S E C O N D S; +MILLISECONDS: M I L L I S E C O N D S; NATURAL: N A T U R A L; +NANOSECONDS: N A N O S E C O N D S; NO: N O; NOT: N O T; NOTNULL: N O T N U L L; @@ -1332,6 +1339,7 @@ ROWS: R O W S; SAMPLE: S A M P L E; SAVEPOINT: S A V E P O I N T; SCHEMA: S C H E M A; +SECONDS: S E C O N D S; SELECT: S E L E C T; SEMI: S E M I; SET: S E T; diff --git a/ydb/library/yql/sql/v1/format/sql_format_ut.cpp b/ydb/library/yql/sql/v1/format/sql_format_ut.cpp index 222784f367..73c2b168bd 100644 --- a/ydb/library/yql/sql/v1/format/sql_format_ut.cpp +++ b/ydb/library/yql/sql/v1/format/sql_format_ut.cpp @@ -217,6 +217,14 @@ Y_UNIT_TEST_SUITE(CheckSqlFormatter) { {"create table user(partition by (user,user))","CREATE TABLE user (\n\tPARTITION BY (user, user)\n);\n"}, {"create table user(order by (user asc))","CREATE TABLE user (\n\tORDER BY (user ASC)\n);\n"}, {"create table user(order by (user desc,user))","CREATE TABLE user (\n\tORDER BY (user DESC, user)\n);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') on user as seconds)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS SECONDS\n);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') on user as MilliSeconds)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MILLISECONDS\n);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') on user as microSeconds)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS MICROSECONDS\n);\n"}, + {"create table user(user int32) with (ttl=interval('P1D') on user as nAnOsEcOnDs)", + "CREATE TABLE user (\n\tuser int32\n)\nWITH (ttl = interval('P1D') ON user AS NANOSECONDS\n);\n"}, {"create table user(index user global unique sync with (user=user,user=user) on (user,user))", "CREATE TABLE user (\n\tINDEX user GLOBAL UNIQUE SYNC WITH (user = user, user = user) ON (user, user)\n);\n"}, {"create table user(index user global async with (user=user,) on (user))", diff --git a/ydb/library/yql/sql/v1/node.cpp b/ydb/library/yql/sql/v1/node.cpp index 9a340e1dd4..96ba11449f 100644 --- a/ydb/library/yql/sql/v1/node.cpp +++ b/ydb/library/yql/sql/v1/node.cpp @@ -2331,9 +2331,10 @@ TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, co (ctx.AnsiQuotedIdentifiers && input.StartsWith('"'))? EStringContentMode::AnsiIdent : EStringContentMode::Default); } -TTtlSettings::TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr) +TTtlSettings::TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit) : ColumnName(columnName) , Expr(expr) + , ColumnUnit(columnUnit) { } diff --git a/ydb/library/yql/sql/v1/node.h b/ydb/library/yql/sql/v1/node.h index 3b6db37450..48ca912e93 100644 --- a/ydb/library/yql/sql/v1/node.h +++ b/ydb/library/yql/sql/v1/node.h @@ -1111,10 +1111,18 @@ namespace NSQLTranslationV1 { TMaybe<TStringContent> StringContentOrIdContent(TContext& ctx, TPosition pos, const TString& input); struct TTtlSettings { + enum class EUnit { + Seconds /* "seconds" */, + Milliseconds /* "milliseconds" */, + Microseconds /* "microseconds" */, + Nanoseconds /* "nanoseconds" */, + }; + TIdentifier ColumnName; TNodePtr Expr; + TMaybe<EUnit> ColumnUnit; - TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr); + TTtlSettings(const TIdentifier& columnName, const TNodePtr& expr, const TMaybe<EUnit>& columnUnit = {}); }; struct TTableSettings { diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp index 9cf580a27e..f85965aea5 100644 --- a/ydb/library/yql/sql/v1/query.cpp +++ b/ydb/library/yql/sql/v1/query.cpp @@ -865,12 +865,16 @@ public: if (const auto& ttl = Params.TableSettings.TtlSettings) { if (ttl.IsSet()) { const auto& ttlSettings = ttl.GetValueSet(); - auto columnName = BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name); - auto nameValueTuple = Y( - Q(Y(Q("columnName"), columnName)), - Q(Y(Q("expireAfter"), ttlSettings.Expr)) - ); - settings = L(settings, Q(Y(Q("setTtlSettings"), Q(nameValueTuple)))); + auto opts = Y(); + + opts = L(opts, Q(Y(Q("columnName"), BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name)))); + opts = L(opts, Q(Y(Q("expireAfter"), ttlSettings.Expr))); + + if (ttlSettings.ColumnUnit) { + opts = L(opts, Q(Y(Q("columnUnit"), Q(ToString(*ttlSettings.ColumnUnit))))); + } + + settings = L(settings, Q(Y(Q("setTtlSettings"), Q(opts)))); } else { YQL_ENSURE(false, "Can't reset TTL settings"); } @@ -1049,12 +1053,16 @@ public: if (const auto& ttl = Params.TableSettings.TtlSettings) { if (ttl.IsSet()) { const auto& ttlSettings = ttl.GetValueSet(); - auto columnName = BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name); - auto nameValueTuple = Y( - Q(Y(Q("columnName"), columnName)), - Q(Y(Q("expireAfter"), ttlSettings.Expr)) - ); - settings = L(settings, Q(Y(Q("setTtlSettings"), Q(nameValueTuple)))); + auto opts = Y(); + + opts = L(opts, Q(Y(Q("columnName"), BuildQuotedAtom(ttlSettings.ColumnName.Pos, ttlSettings.ColumnName.Name)))); + opts = L(opts, Q(Y(Q("expireAfter"), ttlSettings.Expr))); + + if (ttlSettings.ColumnUnit) { + opts = L(opts, Q(Y(Q("columnUnit"), Q(ToString(*ttlSettings.ColumnUnit))))); + } + + settings = L(settings, Q(Y(Q("setTtlSettings"), Q(opts)))); } else { settings = L(settings, Q(Y(Q("resetTtlSettings"), Q(Y())))); } diff --git a/ydb/library/yql/sql/v1/sql.cpp b/ydb/library/yql/sql/v1/sql.cpp index d031f9ca26..1b199625e0 100644 --- a/ydb/library/yql/sql/v1/sql.cpp +++ b/ydb/library/yql/sql/v1/sql.cpp @@ -2169,7 +2169,17 @@ namespace { return false; } - to.Set(TTtlSettings(columnName, exprNode)); + TMaybe<TTtlSettings::EUnit> columnUnit; + if (from.GetAlt_table_setting_value5().HasBlock4()) { + const TString unit = to_lower(ctx.Token(from.GetAlt_table_setting_value5().GetBlock4().GetToken2())); + columnUnit.ConstructInPlace(); + if (!TryFromString<TTtlSettings::EUnit>(unit, *columnUnit)) { + ctx.Error() << "Invalid unit: " << unit; + return false; + } + } + + to.Set(TTtlSettings(columnName, exprNode, columnUnit)); break; } default: diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index 1894fb54d6..2f4e7a6c88 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -268,6 +268,26 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT(SqlToYql("USE plato; SELECT CHANGEFEED FROM CHANGEFEED").IsOk()); } + Y_UNIT_TEST(SecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE SECONDS (SECONDS Uint32, PRIMARY KEY (SECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT SECONDS FROM SECONDS").IsOk()); + } + + Y_UNIT_TEST(MillisecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MILLISECONDS (MILLISECONDS Uint32, PRIMARY KEY (MILLISECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT MILLISECONDS FROM MILLISECONDS").IsOk()); + } + + Y_UNIT_TEST(MicrosecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE MICROSECONDS (MICROSECONDS Uint32, PRIMARY KEY (MICROSECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT MICROSECONDS FROM MICROSECONDS").IsOk()); + } + + Y_UNIT_TEST(NanosecondsKeywordNotReservedForNames) { + UNIT_ASSERT(SqlToYql("USE plato; CREATE TABLE NANOSECONDS (NANOSECONDS Uint32, PRIMARY KEY (NANOSECONDS));").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; SELECT NANOSECONDS FROM NANOSECONDS").IsOk()); + } + Y_UNIT_TEST(Jubilee) { NYql::TAstParseResult res = SqlToYql("USE plato; INSERT INTO Arcadia (r2000000) VALUES (\"2M GET!!!\");"); UNIT_ASSERT(res.Root); @@ -1659,11 +1679,33 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); } - Y_UNIT_TEST(TtlParseCorrect) { + Y_UNIT_TEST(DateTimeTtlParseCorrect) { NYql::TAstParseResult res = SqlToYql( R"( USE plato; CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key)) - WITH ( TTL = Interval("P1D") On CreatedAt);)" + WITH (TTL = Interval("P1D") On CreatedAt);)" + ); + UNIT_ASSERT(res.Root); + + TVerifyLineFunc verifyLine = [](const TString& word, const TString& line) { + if (word == "Write") { + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + } + }; + + TWordCountHive elementStat = { {TString("Write"), 0} }; + VerifyProgram(res, elementStat, verifyLine); + + UNIT_ASSERT_VALUES_EQUAL(1, elementStat["Write"]); + } + + Y_UNIT_TEST(IntTtlParseCorrect) { + NYql::TAstParseResult res = SqlToYql( + R"( USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key)) + WITH (TTL = Interval("P1D") On CreatedAt AS SECONDS);)" ); UNIT_ASSERT(res.Root); @@ -1672,6 +1714,8 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("setTtlSettings")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("expireAfter")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("86400000")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("columnUnit")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("seconds")); } }; @@ -1891,6 +1935,7 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { Y_UNIT_TEST(AlterTableSetTTLIsCorrect) { UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column)").IsOk()); + UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table SET (TTL = Interval(\"PT3H\") ON column AS SECONDS)").IsOk()); } Y_UNIT_TEST(AlterTableSetTieringIsCorrect) { @@ -3345,16 +3390,27 @@ select FormatType($f()); "<main>:6:39: Error: Unknown correlation name: t\n"); } - Y_UNIT_TEST(InvalidTtl) { + Y_UNIT_TEST(InvalidTtlInterval) { auto req = R"( USE plato; CREATE TABLE tableName (Key Uint32, CreatedAt Timestamp, PRIMARY KEY (Key)) - WITH ( TTL = 1 On ExpireAt ); + WITH (TTL = 1 On CreatedAt); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:25: Error: Literal of Interval type is expected for TTL\n" + "<main>:4:25: Error: Invalid TTL settings\n"); + } + + Y_UNIT_TEST(InvalidTtlUnit) { + auto req = R"( + USE plato; + CREATE TABLE tableName (Key Uint32, CreatedAt Uint32, PRIMARY KEY (Key)) + WITH (TTL = Interval("P1D") On CreatedAt AS PICOSECONDS); )"; auto res = SqlToYql(req); UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:4:26: Error: Literal of Interval type is expected for TTL\n" - "<main>:4:26: Error: Invalid TTL settings\n"); + UNIT_ASSERT_STRING_CONTAINS(Err2Str(res), "<main>:4:56: Error: Unexpected token 'PICOSECONDS'"); } Y_UNIT_TEST(InvalidChangefeedSink) { diff --git a/ydb/public/api/grpc/CMakeLists.darwin.txt b/ydb/public/api/grpc/CMakeLists.darwin.txt index a6103cf1ca..e38ea284d2 100644 --- a/ydb/public/api/grpc/CMakeLists.darwin.txt +++ b/ydb/public/api/grpc/CMakeLists.darwin.txt @@ -35,6 +35,7 @@ target_proto_messages(api-grpc PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_scripting_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_table_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_topic_v1.proto + ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_keyvalue_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/yq_v1.proto ) target_proto_addincls(api-grpc diff --git a/ydb/public/api/grpc/CMakeLists.linux-aarch64.txt b/ydb/public/api/grpc/CMakeLists.linux-aarch64.txt index aa76ae23b2..e3126cd3c8 100644 --- a/ydb/public/api/grpc/CMakeLists.linux-aarch64.txt +++ b/ydb/public/api/grpc/CMakeLists.linux-aarch64.txt @@ -36,6 +36,7 @@ target_proto_messages(api-grpc PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_scripting_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_table_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_topic_v1.proto + ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_keyvalue_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/yq_v1.proto ) target_proto_addincls(api-grpc diff --git a/ydb/public/api/grpc/CMakeLists.linux.txt b/ydb/public/api/grpc/CMakeLists.linux.txt index aa76ae23b2..e3126cd3c8 100644 --- a/ydb/public/api/grpc/CMakeLists.linux.txt +++ b/ydb/public/api/grpc/CMakeLists.linux.txt @@ -36,6 +36,7 @@ target_proto_messages(api-grpc PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_scripting_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_table_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_topic_v1.proto + ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/ydb_keyvalue_v1.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/grpc/yq_v1.proto ) target_proto_addincls(api-grpc diff --git a/ydb/public/api/grpc/ydb_keyvalue_v1.proto b/ydb/public/api/grpc/ydb_keyvalue_v1.proto new file mode 100644 index 0000000000..07f073576d --- /dev/null +++ b/ydb/public/api/grpc/ydb_keyvalue_v1.proto @@ -0,0 +1,49 @@ +syntax = "proto3"; + +package Ydb.KeyValue.V1; + +option java_package = "com.yandex.ydb.keyvalue.v1"; +option java_outer_classname = "KeyValueGrpc"; +option java_multiple_files = true; + +import "ydb/public/api/protos/ydb_keyvalue.proto"; + +// KeyValue tablets provide a simple key-value storage in a low-overhead and easy-to-shoot-your-leg manner. +// To use KeyValue tablets in an efficient way one must be familiar with the design of both the KeyValue tablet +// and the Distributed Storage underneath it. + +service KeyValueService { + + // Create a volume by path and partition count + rpc CreateVolume(KeyValue.CreateVolumeRequest) returns (KeyValue.CreateVolumeResponse); + + // Drop the volume by path + rpc DropVolume(KeyValue.DropVolumeRequest) returns (KeyValue.DropVolumeResponse); + + // Alter the volume by path + rpc AlterVolume(KeyValue.AlterVolumeRequest) returns (KeyValue.AlterVolumeResponse); + + // Describe the volume by path + rpc DescribeVolume(KeyValue.DescribeVolumeRequest) returns (KeyValue.DescribeVolumeResponse); + + // List partitions of a volume at the local node. + rpc ListLocalPartitions(KeyValue.ListLocalPartitionsRequest) returns (KeyValue.ListLocalPartitionsResponse); + + // Acquire an exclusive lock for the partition. + rpc AcquireLock(KeyValue.AcquireLockRequest) returns (KeyValue.AcquireLockResponse); + + // Perform list of commands to modify the state of the partition as an atomic transaction. + rpc ExecuteTransaction(KeyValue.ExecuteTransactionRequest) returns (KeyValue.ExecuteTransactionResponse); + + // Read the value stored in the item with the key specified. + rpc Read(KeyValue.ReadRequest) returns (KeyValue.ReadResponse); + + // Read items with keys in the specified range. + rpc ReadRange(KeyValue.ReadRangeRequest) returns (KeyValue.ReadRangeResponse); + + // List keys and metadata of items with keys in the specified range. + rpc ListRange(KeyValue.ListRangeRequest) returns (KeyValue.ListRangeResponse); + + // Get storage channel status of the partition. + rpc GetStorageChannelStatus(KeyValue.GetStorageChannelStatusRequest) returns (KeyValue.GetStorageChannelStatusResponse); +} diff --git a/ydb/public/api/protos/CMakeLists.darwin.txt b/ydb/public/api/protos/CMakeLists.darwin.txt index a2284957e0..173437c25e 100644 --- a/ydb/public/api/protos/CMakeLists.darwin.txt +++ b/ydb/public/api/protos/CMakeLists.darwin.txt @@ -50,6 +50,7 @@ target_proto_messages(api-protos PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_table.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_topic.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_value.proto + ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_keyvalue.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/yq.proto ) generate_enum_serilization(api-protos diff --git a/ydb/public/api/protos/CMakeLists.linux-aarch64.txt b/ydb/public/api/protos/CMakeLists.linux-aarch64.txt index 7a83641a53..769499a1ae 100644 --- a/ydb/public/api/protos/CMakeLists.linux-aarch64.txt +++ b/ydb/public/api/protos/CMakeLists.linux-aarch64.txt @@ -51,6 +51,7 @@ target_proto_messages(api-protos PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_table.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_topic.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_value.proto + ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_keyvalue.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/yq.proto ) generate_enum_serilization(api-protos diff --git a/ydb/public/api/protos/CMakeLists.linux.txt b/ydb/public/api/protos/CMakeLists.linux.txt index 7a83641a53..769499a1ae 100644 --- a/ydb/public/api/protos/CMakeLists.linux.txt +++ b/ydb/public/api/protos/CMakeLists.linux.txt @@ -51,6 +51,7 @@ target_proto_messages(api-protos PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_table.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_topic.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_value.proto + ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/ydb_keyvalue.proto ${CMAKE_SOURCE_DIR}/ydb/public/api/protos/yq.proto ) generate_enum_serilization(api-protos diff --git a/ydb/public/api/protos/ydb_keyvalue.proto b/ydb/public/api/protos/ydb_keyvalue.proto new file mode 100644 index 0000000000..e9e58f15a2 --- /dev/null +++ b/ydb/public/api/protos/ydb_keyvalue.proto @@ -0,0 +1,544 @@ +syntax = "proto3"; +option cc_enable_arenas = true; + +package Ydb.KeyValue; + +option java_package = "com.yandex.ydb.keyvalue"; +option java_outer_classname = "KeyValueProtos"; +option java_multiple_files = true; + +import "ydb/public/api/protos/ydb_operation.proto"; + +// +// KeyValue API. +// + + +message StorageChannelInfo { + enum StatusFlag { + // The system was unable to get the storage channel status. + STATUS_FLAG_UNSPECIFIED = 0; + + // Enough storage space is available. + STATUS_FLAG_GREEN = 10; + + // Free storage space is low, user must stop writing new data. Compaction's writes are allow. + STATUS_FLAG_YELLOW_STOP = 20; + + // No free storage space is available, no writes will successed. + STATUS_FLAG_ORANGE_OUT_SPACE = 30; + } + + // Storage channel index. + uint32 storage_channel = 1; + + // The status flag of the storage channel. + StatusFlag status_flag = 2; +} + + +message Priorities { + enum Priority { + // Use default priority (PRIORITY_REALTIME). + PRIORITY_UNSPECIFIED = 0; + + // High priority for user-initiated operations, the default priority. + PRIORITY_REALTIME = 1; + + // Low priority for background system activity. + PRIORITY_BACKGROUND = 2; + } +} + + +message StorageConfig { + message ChannelConfig { + // Media for the storage channel. + // This field specifies the kind of one storage_pool_types configured in config.yaml + string media = 1; + } + + // Channel configs. + // Channels 0 and 1 are system channels needed for tablet operation. + // Channels starting with 2 are user channels. + repeated ChannelConfig channel = 1; +} + + +message KeyRange { + // The lower bound of the key range. + // If unspecified, the range begins from the lowest key. + oneof from_bound { + // Set in order for the range to include the key specified + string from_key_inclusive = 1; + // Set in order for the range not to include the key specified + string from_key_exclusive = 2; + } + + // The higher bound of the key range. + // If unspecified, the range ends with the highest key. + oneof to_bound { + // Set in order for the range to include the key specified + string to_key_inclusive = 3; + // Set in order for the range not to include the key specified + string to_key_exclusive = 4; + } +} + +// The lock mechanism provides a way to ensure that only one client holds the lock. +// The client is provided the lock generation. +// Only operations with matching lock generation and operations with no lock generation are executed. +// When lock generation is missmatched operations will be failed with PRECONDITION_FAILED status. +message AcquireLockRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + // Partition of the volume. + uint64 partition_id = 3; +} + +message AcquireLockResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message AcquireLockResult { + // The generation of the lock to provide as an argument to all the operations the user performs with the partition. + uint64 lock_generation = 1; + + // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise. + uint32 node_id = 2; +} + +message ExecuteTransactionRequest { + message Command { + message Rename { + // The key to change. + string old_key = 1; + + // The new key to change the old key to. + string new_key = 2; + } + message Concat { + // Keys to use as the source for the concatenation. + repeated string input_keys = 1; + + // New key to use for the result of the concatenation. + string output_key = 2; + + // Input keys are deleted after the concatenation by default. + // In order to keep both the inputs and the output, set keep_inputs to true. + bool keep_inputs = 3; + } + + // Make a copy of a range of key-value pairs. + // New keys are formed by removing a prefix and/or prepending keys with the new prefix. + // For example, copy of the key-value pairs [{aaabc,1}, {aaaef,2}, {baaef,3}] can be stripped of the 'aa' prefix + // and prepended with the 'x' so that the new pairs that are added are [{xabc, 1}, {xaef, 2}]. + message CopyRange { + // The range of keys to copy + KeyRange range = 1; + + // For each source key that begins with the prefix_to_remove, that prefix is removed from the new key before + // prepending it with the prefix_to_add. + // Acts as filter if not empty. + string prefix_to_remove = 2; + + // The prefix to prepend to each new key. + string prefix_to_add = 3; + } + message Write { + enum Tactic { + // Use default tactic (TACTIC_MAX_THROUGHPUT). + TACTIC_UNSPECIFIED = 0; + + // Write minimum required redundant data. Does not affect storage durability. The default tactic. + TACTIC_MAX_THROUGHPUT = 1; + + // Write additional redundant data to reduce operation duration. Will use additional space. + TACTIC_MIN_LATENCY = 2; + } + // Key of the key-value pair to write. + string key = 1; + + // Value of the key-value pair to write. + bytes value = 2; + + // Storage channel to write the value to. Channel numbers begin with 1 and may go up to approximately 250 + // (depends on the channel configuration of each partition). + // Channel 1 is called the INLINE channel (value is stored in the index table). + // Channel 2 is called the MAIN channel (value is stored as a separate blob in the Distributed Storage). + // Channels 1 and 2 are available for all partitions. + // If the storage channel specified is not configured for the partition, the value is stored in + // channel 2 (the MAIN channel). + uint32 storage_channel = 3; // (default = 0 is same as 2 or MAIN) + + // Priority to use for the Distributed Storage Get operation. + // Has no effect for the INLINE storage channel. + Priorities.Priority priority = 4; + + // Tactic to use for the Distributed Storage Put operation. + // Has no effect for the INLINE storage channel. + Tactic tactic = 5; + } + message DeleteRange { + // The range of keys to delete. + KeyRange range = 1; + } + + oneof action { + // Delete key-value pairs with keys in the range specified. + DeleteRange delete_range = 1; + + // Change the key of a key-value pair. + Rename rename = 2; + + // Create a copy of key-value pairs with keys in the range specified by removing and/or prepending a prefix + // to each key. + CopyRange copy_range = 3; + + // Create a new key-value pair with key specified by concatenating values of multiple other key-value pairs + // with keys specified. + Concat concat = 4; + + // Create a new key-value pair with key and value specified. + Write write = 5; + } + } + + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + // Partition of the volume. + uint64 partition_id = 3; + + // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call. + optional uint64 lock_generation = 4; + + // Commands to execute as a single atomic transaction. + // The order of execution of commands is the same as the order of commands in the ExecuteTransactionRequest. + // The order of execution of different transactions is not specified. + repeated Command commands = 5; +} + +message ExecuteTransactionResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message ExecuteTransactionResult { + // Contains status flags for the storage channels used by the transaction. + repeated StorageChannelInfo storage_channel_info = 1; + + // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise. + uint32 node_id = 2; +} + +message ReadRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + // Partition of the volume. + uint64 partition_id = 3; + + // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call. + optional uint64 lock_generation = 4; + + // Key of the key-value pair to read. + string key = 5; + + // Offset in bytes from the beginning of the value to read data from. + uint64 offset = 6; + + // Size of the data to read in bytes. 0 means "read to the end of the value". + uint64 size = 7; + + // Result protobuf size limit. + // Overrides the default limit only with a smaller value. + // 0 means "use the default limit". + uint64 limit_bytes = 8; + + // Priority to use for the Distributed Storage Get operation. + // Has no effect for the INLINE storage channel. + Priorities.Priority priority = 9; +} + +message ReadResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message ReadResult { + // The key of the requested key-value pair. + string requested_key = 1; + + // Offset in bytes from the beginning of the value requested. + uint64 requested_offset = 2; + + // Size of the data requested. + uint64 requested_size = 3; + + // The bytes of the requested part of the value. + bytes value = 4; + + // If requested data size is larger than limit_bytes then result will contain only part of the requested value and + // the is_overrun flag will be set. + bool is_overrun = 5; + + // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise. + uint32 node_id = 6; +} + +message ReadRangeRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + // Partition of the volume. + uint64 partition_id = 3; + + // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call. + optional uint64 lock_generation = 4; + + // The range of keys to read. + KeyRange range = 5; + + // Result protobuf size limit. + // Overrides the default limit only with a smaller value. + // 0 means "use the default limit". + uint64 limit_bytes = 6; + + // Priority to use for the Distributed Storage Get operation. + // Has no effect for the INLINE storage channel. + Priorities.Priority priority = 7; +} + +message ReadRangeResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message ReadRangeResult { + message KeyValuePair { + // The key of the key-value pair. + string key = 1; + + // The value of the key-value pair. + bytes value = 2; + + // Unix time of the creation of the key-value pair (in ms). + uint64 creation_unix_time = 4; + + // Contains the index of the actually used storage channel. The actually used storage channel may differ from + // the value specified in the write request for example if there were no such storage channel at the moment + // of execution of the write command. + // For values created as a result of concatenation or copy of concatenated values, the storage channel of the first + // part of the value is specified. + uint32 storage_channel = 5; + } + + // List of key-value pairs requested. + repeated KeyValuePair pair = 1; + + // If requested data size is larger than limit_bytes then result will contain + // only part of the requested key-value pairs and the is_overrun flag will be set. + // The pair list contains only full values. + // In order to continue reading the client should send another request for the key range + // with from_key_exclusive set to the last key read. + // If first pair doesn't fit the limit_bytes then the result will be empty and the is_overrun flag will be set. + // Use ListRange and Read methods to find and read large key-value pairs. + bool is_overrun = 2; + + // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise. + uint32 node_id = 3; +} + +message ListRangeRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + // Partition of the volume. + uint64 partition_id = 3; + + // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call. + optional uint64 lock_generation = 4; + + // The range of keys to read + KeyRange range = 5; + + // Result protobuf size limit. If not 0, overrides the default one only with a smaller value. + uint64 limit_bytes = 6; +} + +message ListRangeResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message ListRangeResult { + message KeyInfo { + // The key of the key-value pair. + string key = 1; + + // Full size of the value of the key-value pair. + uint32 value_size = 2; + + // Unix time of the creation of the key-value pair (in ms). + uint64 creation_unix_time = 3; + + // Contains the index of the actually used storage channel. The actually used storage channel may differ from + // the value specified in the write request for example if there were no such storage channel at the moment + // of execution of the write command. + // For values created as a result of concatenation or copy of concatenated values, the storage channel of the first + // part of the value is specified. + uint32 storage_channel = 4; + } + + // List of the key-value pairs and metadata requested. + repeated KeyInfo key = 1; + + // If requested data size is larger than limit_bytes then result will contain + // only part of the requested key-value pairs and the is_overrun flag will be set. + bool is_overrun = 2; + + // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise. + uint32 node_id = 3; +} + +message GetStorageChannelStatusRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + // Partition of the volume. + uint64 partition_id = 3; + + // Generation of the exclusive lock acquired for the partition as a result of an AcquireLock call. + optional uint64 lock_generation = 4; + + // List of storage channels to get StorageChannelInfo for. + repeated uint32 storage_channel = 5; +} + +message GetStorageChannelStatusResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message GetStorageChannelStatusResult { + // Contains status flags for the requested storage channels. + repeated StorageChannelInfo storage_channel_info = 1; + + // Contains 0 if the request was sent to the node of the partition, node ID of the partition otherwise. + uint32 node_id = 2; +} + +message CreateVolumeRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + + // The partition count of the new volume. + uint32 partition_count = 4; + + // Set storage kinds for storage channels. + StorageConfig storage_config = 5; +} + +message CreateVolumeResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message CreateVolumeResult { +} + +message DropVolumeRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; +} + +message DropVolumeResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message DropVolumeResult { +} + +message AlterVolumeRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + + // Change the partition count of the volume. + // The value should be greater or equal than current patition count. + uint32 alter_partition_count = 3; + + // Set storage kinds for storage channels. + // If the field is not present, storage channel settings are not changed. + StorageConfig storage_config = 4; +} + +message AlterVolumeResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message AlterVolumeResult { +} + +message DescribeVolumeRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; +} + +message DescribeVolumeResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message DescribeVolumeResult { + // Volume path. + string path = 1; + + // Count of partitions. + uint64 partition_count = 2; +} + +message ListLocalPartitionsRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Volume path. + string path = 2; + + // ID of the node to get partitions for. + // 0 means the node the request was send to. + uint64 node_id = 3; +} + +message ListLocalPartitionsResponse { + // Operation contains the result of the request. Check the ydb_operation.proto. + Ydb.Operations.Operation operation = 1; +} + +message ListLocalPartitionsResult { + // Volume path. + string path = 1; + + // ID of the node. + uint64 node_id = 2; + + // List of the partitions of the volume on the node. + repeated uint64 partition_ids = 3; +} diff --git a/ydb/services/CMakeLists.txt b/ydb/services/CMakeLists.txt index 03be2fc52a..ec13d419c4 100644 --- a/ydb/services/CMakeLists.txt +++ b/ydb/services/CMakeLists.txt @@ -14,6 +14,7 @@ add_subdirectory(discovery) add_subdirectory(dynamic_config) add_subdirectory(fq) add_subdirectory(kesus) +add_subdirectory(keyvalue) add_subdirectory(lib) add_subdirectory(local_discovery) add_subdirectory(metadata) diff --git a/ydb/services/datastreams/datastreams_ut.cpp b/ydb/services/datastreams/datastreams_ut.cpp index 0addc976c4..ad3645ef8c 100644 --- a/ydb/services/datastreams/datastreams_ut.cpp +++ b/ydb/services/datastreams/datastreams_ut.cpp @@ -1374,7 +1374,7 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(item.GetData(), item.GetPartitionKey()); auto hashKey = item.GetExplicitHash().empty() ? HexBytesToDecimal(MD5::Calc(item.GetPartitionKey())) : BytesToDecimal(item.GetExplicitHash()); UNIT_ASSERT_VALUES_EQUAL(NKikimr::NDataStreams::V1::ShardFromDecimal(hashKey, 5), item.GetPartitionStream()->GetPartitionId()); - UNIT_ASSERT(!item.GetIp().empty()); + UNIT_ASSERT(item.GetIp().empty()); if (item.GetData() == dataStr) { UNIT_ASSERT_VALUES_EQUAL(item.GetExplicitHash(), dataStr); } diff --git a/ydb/services/datastreams/put_records_actor.h b/ydb/services/datastreams/put_records_actor.h index efd1d716b4..20886af766 100644 --- a/ydb/services/datastreams/put_records_actor.h +++ b/ydb/services/datastreams/put_records_actor.h @@ -28,7 +28,9 @@ namespace NKikimr::NDataStreams::V1 { TString GetSerializedData(const TPutRecordsItem& item) { NKikimrPQClient::TDataChunk proto; - proto.SetIp(item.Ip); + //TODO: get ip from client, not grpc; + // proto.SetIp(item.Ip); + proto.SetCodec(0); // NPersQueue::CODEC_RAW proto.SetData(item.Data); @@ -516,10 +518,11 @@ namespace NKikimr::NDataStreams::V1 { if (putRecordsResult.records(0).error_code() == "ProvisionedThroughputExceededException" || putRecordsResult.records(0).error_code() == "ThrottlingException") { - return ReplyWithResult(Ydb::StatusIds::OVERLOADED, ctx); + return ReplyWithError(Ydb::StatusIds::OVERLOADED, Ydb::PersQueue::ErrorCode::OVERLOAD, putRecordsResult.records(0).error_message(), ctx); } //TODO: other codes - access denied and so on - return ReplyWithResult(Ydb::StatusIds::INTERNAL_ERROR, ctx); + return ReplyWithError(Ydb::StatusIds::INTERNAL_ERROR, Ydb::PersQueue::ErrorCode::ERROR, putRecordsResult.records(0).error_message(), ctx); + } } diff --git a/ydb/services/keyvalue/CMakeLists.darwin.txt b/ydb/services/keyvalue/CMakeLists.darwin.txt new file mode 100644 index 0000000000..471d8eac78 --- /dev/null +++ b/ydb/services/keyvalue/CMakeLists.darwin.txt @@ -0,0 +1,24 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(ydb-services-keyvalue) +target_link_libraries(ydb-services-keyvalue PUBLIC + contrib-libs-cxxsupp + yutil + api-grpc + cpp-grpc-server + ydb-core-grpc_services + core-grpc_services-base + core-kesus-tablet + ydb-core-keyvalue +) +target_sources(ydb-services-keyvalue PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service.cpp +) diff --git a/ydb/services/keyvalue/CMakeLists.linux-aarch64.txt b/ydb/services/keyvalue/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..a31f491ba7 --- /dev/null +++ b/ydb/services/keyvalue/CMakeLists.linux-aarch64.txt @@ -0,0 +1,25 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(ydb-services-keyvalue) +target_link_libraries(ydb-services-keyvalue PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + api-grpc + cpp-grpc-server + ydb-core-grpc_services + core-grpc_services-base + core-kesus-tablet + ydb-core-keyvalue +) +target_sources(ydb-services-keyvalue PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service.cpp +) diff --git a/ydb/services/keyvalue/CMakeLists.linux.txt b/ydb/services/keyvalue/CMakeLists.linux.txt new file mode 100644 index 0000000000..a31f491ba7 --- /dev/null +++ b/ydb/services/keyvalue/CMakeLists.linux.txt @@ -0,0 +1,25 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(ydb-services-keyvalue) +target_link_libraries(ydb-services-keyvalue PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + api-grpc + cpp-grpc-server + ydb-core-grpc_services + core-grpc_services-base + core-kesus-tablet + ydb-core-keyvalue +) +target_sources(ydb-services-keyvalue PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service.cpp +) diff --git a/ydb/services/keyvalue/CMakeLists.txt b/ydb/services/keyvalue/CMakeLists.txt new file mode 100644 index 0000000000..3e0811fb22 --- /dev/null +++ b/ydb/services/keyvalue/CMakeLists.txt @@ -0,0 +1,15 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux-aarch64.txt) +elseif (APPLE) + include(CMakeLists.darwin.txt) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux.txt) +endif() diff --git a/ydb/services/keyvalue/grpc_service.cpp b/ydb/services/keyvalue/grpc_service.cpp new file mode 100644 index 0000000000..8e3b663027 --- /dev/null +++ b/ydb/services/keyvalue/grpc_service.cpp @@ -0,0 +1,81 @@ +#include "grpc_service.h" + +#include <ydb/core/grpc_services/grpc_helper.h> +#include <ydb/core/grpc_services/base/base.h> +#include <ydb/core/grpc_services/service_keyvalue.h> + + +namespace NKikimr::NGRpcService { + +TKeyValueGRpcService::TKeyValueGRpcService(NActors::TActorSystem* actorSystem, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, NActors::TActorId grpcRequestProxyId) + : ActorSystem(actorSystem) + , Counters(std::move(counters)) + , GRpcRequestProxyId(grpcRequestProxyId) +{ +} + +TKeyValueGRpcService::~TKeyValueGRpcService() = default; + +void TKeyValueGRpcService::InitService(grpc::ServerCompletionQueue* cq, NGrpc::TLoggerPtr logger) { + CQ = cq; + SetupIncomingRequests(std::move(logger)); +} + +void TKeyValueGRpcService::SetGlobalLimiterHandle(NGrpc::TGlobalLimiter* limiter) { + Limiter = limiter; +} + +bool TKeyValueGRpcService::IncRequest() { + return Limiter->Inc(); +} + +void TKeyValueGRpcService::DecRequest() { + Limiter->Dec(); +} + +void TKeyValueGRpcService::SetupIncomingRequests(NGrpc::TLoggerPtr logger) { + auto getCounterBlock = NGRpcService::CreateCounterCb(Counters, ActorSystem); + +#ifdef SETUP_METHOD +#error SETUP_METHOD macro collision +#endif + +#define SETUP_METHOD(methodName, method, rlMode) \ + MakeIntrusive<NGRpcService::TGRpcRequest< \ + Ydb::KeyValue::Y_CAT(methodName, Request), \ + Ydb::KeyValue::Y_CAT(methodName, Response), \ + TKeyValueGRpcService>> \ + ( \ + this, \ + &Service_, \ + CQ, \ + [this](NGrpc::IRequestContextBase* reqCtx) { \ + NGRpcService::ReportGrpcReqToMon(*ActorSystem, reqCtx->GetPeer()); \ + ActorSystem->Send(GRpcRequestProxyId, new TGrpcRequestOperationCall< \ + Ydb::KeyValue::Y_CAT(methodName, Request), \ + Ydb::KeyValue::Y_CAT(methodName, Response)>(reqCtx, &method, \ + TRequestAuxSettings{rlMode, nullptr})); \ + }, \ + &Ydb::KeyValue::V1::KeyValueService::AsyncService::Y_CAT(Request, methodName), \ + "KeyValue/" Y_STRINGIZE(methodName), \ + logger, \ + getCounterBlock("keyvalue", Y_STRINGIZE(methodName)) \ + )->Run() + + SETUP_METHOD(CreateVolume, DoCreateVolumeKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(DropVolume, DoDropVolumeKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(AlterVolume, DoAlterVolumeKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(DescribeVolume, DoDescribeVolumeKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(ListLocalPartitions, DoListLocalPartitionsKeyValue, TRateLimiterMode::Rps); + + SETUP_METHOD(AcquireLock, DoAcquireLockKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(ExecuteTransaction, DoExecuteTransactionKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(Read, DoReadKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(ReadRange, DoReadRangeKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(ListRange, DoListRangeKeyValue, TRateLimiterMode::Rps); + SETUP_METHOD(GetStorageChannelStatus, DoGetStorageChannelStatusKeyValue, TRateLimiterMode::Rps); + +#undef SETUP_METHOD +} + +} // namespace NKikimr::NGRpcService diff --git a/ydb/services/keyvalue/grpc_service.h b/ydb/services/keyvalue/grpc_service.h new file mode 100644 index 0000000000..18161acad1 --- /dev/null +++ b/ydb/services/keyvalue/grpc_service.h @@ -0,0 +1,37 @@ +#pragma once + +#include <ydb/public/api/grpc/ydb_keyvalue_v1.grpc.pb.h> + +#include <library/cpp/grpc/server/grpc_server.h> +#include <library/cpp/actors/core/actorsystem.h> + + +namespace NKikimr::NGRpcService { + +class TKeyValueGRpcService + : public NGrpc::TGrpcServiceBase<Ydb::KeyValue::V1::KeyValueService> +{ +public: + TKeyValueGRpcService(NActors::TActorSystem* actorSystem, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + NActors::TActorId grpcRequestProxyId); + ~TKeyValueGRpcService(); + + void InitService(grpc::ServerCompletionQueue* cq, NGrpc::TLoggerPtr logger) override; + void SetGlobalLimiterHandle(NGrpc::TGlobalLimiter* limiter) override; + + bool IncRequest(); + void DecRequest(); + +private: + void SetupIncomingRequests(NGrpc::TLoggerPtr logger); + +private: + NActors::TActorSystem* ActorSystem = nullptr; + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + NActors::TActorId GRpcRequestProxyId; + + grpc::ServerCompletionQueue* CQ = nullptr; + NGrpc::TGlobalLimiter* Limiter = nullptr; +}; + +} // namespace NKikimr::NGRpcService diff --git a/ydb/services/keyvalue/grpc_service_ut.cpp b/ydb/services/keyvalue/grpc_service_ut.cpp new file mode 100644 index 0000000000..131c3ac363 --- /dev/null +++ b/ydb/services/keyvalue/grpc_service_ut.cpp @@ -0,0 +1,838 @@ +#include "grpc_service.h" + +#include <ydb/core/keyvalue/keyvalue.h> +#include <ydb/core/keyvalue/keyvalue_events.h> +#include <ydb/core/protos/config.pb.h> +#include <ydb/core/testlib/basics/appdata.h> +#include <ydb/core/testlib/test_client.h> +#include <ydb/core/tx/scheme_cache/scheme_cache.h> + +#include <ydb/public/api/grpc/ydb_scheme_v1.grpc.pb.h> + +#include <ydb/public/sdk/cpp/client/resources/ydb_resources.h> + +#include <library/cpp/grpc/client/grpc_client_low.h> +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/tests_data.h> +#include <library/cpp/logger/backend.h> + +#include <grpc++/client_context.h> +#include <grpc++/create_channel.h> + +#include <util/string/builder.h> + + +TString PrintIssue(const ::google::protobuf::RepeatedPtrField< ::Ydb::Issue::IssueMessage> &issues) { + TStringBuilder msg; + msg << '{'; + for (auto &issue : issues) { + msg << " issue# " << issue.message(); + } + msg << " }"; + return msg; +} + + +#define UNIT_ASSERT_CHECK_STATUS(got, exp) \ + UNIT_ASSERT_C(got.status() == exp, "exp# " << Ydb::StatusIds::StatusCode_Name(exp) \ + << " got# " << Ydb::StatusIds::StatusCode_Name(got.status()) << " issues# " << PrintIssue(got.issues())) \ +// UNIT_ASSERT_CHECK_STATUS + + +namespace NKikimr::NGRpcService { + + +struct TKikimrTestSettings { + static constexpr bool SSL = false; + static constexpr bool AUTH = false; + static constexpr bool PrecreatePools = true; + static constexpr bool EnableSystemViews = true; +}; + +struct TKikimrTestWithAuth : TKikimrTestSettings { + static constexpr bool AUTH = true; +}; + +struct TKikimrTestWithAuthAndSsl : TKikimrTestWithAuth { + static constexpr bool SSL = true; +}; + +struct TKikimrTestNoSystemViews : TKikimrTestSettings { + static constexpr bool EnableSystemViews = false; +}; + +template <typename TestSettings = TKikimrTestSettings> +class TBasicKikimrWithGrpcAndRootSchema { +public: + TBasicKikimrWithGrpcAndRootSchema( + NKikimrConfig::TAppConfig appConfig = {}, + TAutoPtr<TLogBackend> logBackend = {}) + { + ui16 port = PortManager.GetPort(2134); + ui16 grpc = PortManager.GetPort(2135); + ServerSettings = new Tests::TServerSettings(port); + ServerSettings->SetGrpcPort(grpc); + ServerSettings->SetLogBackend(logBackend); + ServerSettings->SetDomainName("Root"); + ServerSettings->SetDynamicNodeCount(1); + if (TestSettings::PrecreatePools) { + ServerSettings->AddStoragePool("ssd"); + ServerSettings->AddStoragePool("hdd"); + ServerSettings->AddStoragePool("hdd1"); + ServerSettings->AddStoragePool("hdd2"); + } else { + ServerSettings->AddStoragePoolType("ssd"); + ServerSettings->AddStoragePoolType("hdd"); + ServerSettings->AddStoragePoolType("hdd1"); + ServerSettings->AddStoragePoolType("hdd2"); + } + ServerSettings->Formats = new TFormatFactory; + ServerSettings->FeatureFlags = appConfig.GetFeatureFlags(); + ServerSettings->RegisterGrpcService<NKikimr::NGRpcService::TKeyValueGRpcService>("keyvalue"); + + Server_.Reset(new Tests::TServer(*ServerSettings)); + Tenants_.Reset(new Tests::TTenants(Server_)); + + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_PROXY_SCHEME_CACHE, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::SCHEME_BOARD_REPLICA, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::SCHEME_BOARD_SUBSCRIBER, NActors::NLog::PRI_TRACE); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::SCHEME_BOARD_POPULATOR, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_PROXY, NActors::NLog::PRI_DEBUG); + Server_->GetRuntime()->SetLogPriority(NKikimrServices::KEYVALUE, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::BOOTSTRAPPER, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::STATESTORAGE, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_EXECUTOR, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::SAUSAGE_BIO, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_FLATBOOT, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_OPS_HOST, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_SAUSAGECACHE, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_OLAPSHARD, NActors::NLog::PRI_DEBUG); + //Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); + + NGrpc::TServerOptions grpcOption; + if (TestSettings::AUTH) { + grpcOption.SetUseAuth(true); + } + grpcOption.SetPort(grpc); + Server_->EnableGRpc(grpcOption); + + Tests::TClient annoyingClient(*ServerSettings); + if (ServerSettings->AppConfig.GetDomainsConfig().GetSecurityConfig().GetEnforceUserTokenRequirement()) { + annoyingClient.SetSecurityToken("root@builtin"); + } + annoyingClient.InitRootScheme("Root"); + GRpcPort_ = grpc; + } + + ui16 GetPort() { + return GRpcPort_; + } + + TPortManager& GetPortManager() { + return PortManager; + } + + void ResetSchemeCache(TString path, ui32 nodeIndex = 0) { + TTestActorRuntime* runtime = Server_->GetRuntime(); + Tests::TClient annoyingClient(*ServerSettings); + annoyingClient.RefreshPathCache(runtime, path, nodeIndex); + } + + TTestActorRuntime* GetRuntime() { + return Server_->GetRuntime(); + } + + Tests::TServer& GetServer() { + return *Server_; + } + + Tests::TServerSettings::TPtr ServerSettings; + Tests::TServer::TPtr Server_; + THolder<Tests::TTenants> Tenants_; +private: + TPortManager PortManager; + ui16 GRpcPort_; +}; + +using TKikimrWithGrpcAndRootSchema = TBasicKikimrWithGrpcAndRootSchema<TKikimrTestSettings>; + +Y_UNIT_TEST_SUITE(KeyValueGRPCService) { + + void InitTablet(TKikimrWithGrpcAndRootSchema &server, ui64 tabletId) { + server.GetRuntime()->SetScheduledLimit(100); + CreateTestBootstrapper(*server.GetRuntime(), + CreateTestTabletInfo(tabletId, TTabletTypes::KeyValue), + &CreateKeyValueFlat); + NanoSleep(3'000'000'000); + } + + void CmdWrite(ui64 tabletId, const TDeque<TString> &keys, const TDeque<TString> &values, TKikimrWithGrpcAndRootSchema &server) + { + Y_VERIFY(keys.size() == values.size()); + TAutoPtr<IEventHandle> handle; + TEvKeyValue::TEvResponse *result; + THolder<TEvKeyValue::TEvRequest> request; + TActorId edgeActor = server.GetRuntime()->AllocateEdgeActor(); + for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { + try { + server.GetRuntime()->ResetScheduledCount(); + request.Reset(new TEvKeyValue::TEvRequest); + for (ui64 idx = 0; idx < keys.size(); ++idx) { + auto write = request->Record.AddCmdWrite(); + write->SetKey(keys[idx]); + write->SetValue(values[idx]); + write->SetStorageChannel(NKikimrClient::TKeyValueRequest::MAIN); + write->SetPriority(NKikimrClient::TKeyValueRequest::REALTIME); + } + server.GetRuntime()->SendToPipe(tabletId, edgeActor, request.Release(), 0, GetPipeConfigWithRetries()); + result = server.GetRuntime()->GrabEdgeEvent<TEvKeyValue::TEvResponse>(handle); + UNIT_ASSERT(result); + UNIT_ASSERT(result->Record.HasStatus()); + UNIT_ASSERT_EQUAL(result->Record.GetStatus(), NMsgBusProxy::MSTATUS_OK); + UNIT_ASSERT_VALUES_EQUAL(result->Record.WriteResultSize(), values.size()); + for (ui64 idx = 0; idx < values.size(); ++idx) { + const auto &writeResult = result->Record.GetWriteResult(idx); + UNIT_ASSERT(writeResult.HasStatus()); + UNIT_ASSERT_EQUAL(writeResult.GetStatus(), NKikimrProto::OK); + UNIT_ASSERT(writeResult.HasStatusFlags()); + if (values[idx].size()) { + UNIT_ASSERT(writeResult.GetStatusFlags() & ui32(NKikimrBlobStorage::StatusIsValid)); + } + } + retriesLeft = 0; + } catch (NActors::TSchedulingLimitReachedException) { + UNIT_ASSERT(retriesLeft == 2); + } + } + } + + template <typename TCtx> + void AdjustCtxForDB(TCtx &ctx) { + ctx.AddMetadata(NYdb::YDB_AUTH_TICKET_HEADER, "root@builtin"); + } + + void MakeDirectory(auto &channel, const TString &path) { + std::unique_ptr<Ydb::Scheme::V1::SchemeService::Stub> stub; + stub = Ydb::Scheme::V1::SchemeService::NewStub(channel); + + Ydb::Scheme::MakeDirectoryRequest makeDirectoryRequest; + makeDirectoryRequest.set_path(path); + Ydb::Scheme::MakeDirectoryResponse makeDirectoryResponse; + grpc::ClientContext makeDirectoryCtx; + AdjustCtxForDB(makeDirectoryCtx); + stub->MakeDirectory(&makeDirectoryCtx, makeDirectoryRequest, &makeDirectoryResponse); + UNIT_ASSERT_CHECK_STATUS(makeDirectoryResponse.operation(), Ydb::StatusIds::SUCCESS); + } + + void MakeTable(auto &channel, const TString &path) { + std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub; + stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel); + + Ydb::KeyValue::CreateVolumeRequest createVolumeRequest; + createVolumeRequest.set_path(path); + createVolumeRequest.set_partition_count(1); + auto *storage_config = createVolumeRequest.mutable_storage_config(); + storage_config->add_channel()->set_media("ssd"); + storage_config->add_channel()->set_media("ssd"); + storage_config->add_channel()->set_media("ssd"); + + Ydb::KeyValue::CreateVolumeResponse createVolumeResponse; + Ydb::KeyValue::CreateVolumeResult createVolumeResult; + + grpc::ClientContext createVolumeCtx; + AdjustCtxForDB(createVolumeCtx); + stub->CreateVolume(&createVolumeCtx, createVolumeRequest, &createVolumeResponse); + UNIT_ASSERT_CHECK_STATUS(createVolumeResponse.operation(), Ydb::StatusIds::SUCCESS); + createVolumeResponse.operation().result().UnpackTo(&createVolumeResult); + } + + void AlterVolume(auto &channel, const TString &path, ui32 partition_count = 1) { + std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub; + stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel); + + Ydb::KeyValue::AlterVolumeRequest alterVolumeRequest; + alterVolumeRequest.set_path(path); + alterVolumeRequest.set_alter_partition_count(partition_count); + + Ydb::KeyValue::AlterVolumeResponse alterVolumeResponse; + Ydb::KeyValue::AlterVolumeResult alterVolumeResult; + + grpc::ClientContext alterVolumeCtx; + AdjustCtxForDB(alterVolumeCtx); + stub->AlterVolume(&alterVolumeCtx, alterVolumeRequest, &alterVolumeResponse); + UNIT_ASSERT_CHECK_STATUS(alterVolumeResponse.operation(), Ydb::StatusIds::SUCCESS); + alterVolumeResponse.operation().result().UnpackTo(&alterVolumeResult); + } + + void DropVolume(auto &channel, const TString &path) { + std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub; + stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel); + + Ydb::KeyValue::DropVolumeRequest dropVolumeRequest; + dropVolumeRequest.set_path(path); + + Ydb::KeyValue::DropVolumeResponse dropVolumeResponse; + Ydb::KeyValue::DropVolumeResult dropVolumeResult; + + grpc::ClientContext dropVolumeCtx; + AdjustCtxForDB(dropVolumeCtx); + stub->DropVolume(&dropVolumeCtx, dropVolumeRequest, &dropVolumeResponse); + UNIT_ASSERT_CHECK_STATUS(dropVolumeResponse.operation(), Ydb::StatusIds::SUCCESS); + dropVolumeResponse.operation().result().UnpackTo(&dropVolumeResult); + } + + Ydb::KeyValue::DescribeVolumeResult DescribeVolume(auto &channel, const TString &path) { + std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub; + stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel); + + Ydb::KeyValue::DescribeVolumeRequest describeVolumeRequest; + describeVolumeRequest.set_path(path); + + Ydb::KeyValue::DescribeVolumeResponse describeVolumeResponse; + Ydb::KeyValue::DescribeVolumeResult describeVolumeResult; + + grpc::ClientContext describeVolumeCtx; + AdjustCtxForDB(describeVolumeCtx); + stub->DescribeVolume(&describeVolumeCtx, describeVolumeRequest, &describeVolumeResponse); + UNIT_ASSERT_CHECK_STATUS(describeVolumeResponse.operation(), Ydb::StatusIds::SUCCESS); + describeVolumeResponse.operation().result().UnpackTo(&describeVolumeResult); + return describeVolumeResult; + } + + + Ydb::Scheme::ListDirectoryResult ListDirectory(auto &channel, const TString &path) { + std::unique_ptr<Ydb::Scheme::V1::SchemeService::Stub> stub; + stub = Ydb::Scheme::V1::SchemeService::NewStub(channel); + Ydb::Scheme::ListDirectoryRequest listDirectoryRequest; + listDirectoryRequest.set_path(path); + + Ydb::Scheme::ListDirectoryResult listDirectoryResult; + Ydb::Scheme::ListDirectoryResponse listDirectoryResponse; + + grpc::ClientContext listDirectoryCtx; + AdjustCtxForDB(listDirectoryCtx); + stub->ListDirectory(&listDirectoryCtx, listDirectoryRequest, &listDirectoryResponse); + + UNIT_ASSERT_CHECK_STATUS(listDirectoryResponse.operation(), Ydb::StatusIds::SUCCESS); + listDirectoryResponse.operation().result().UnpackTo(&listDirectoryResult); + return listDirectoryResult; + } + + ui64 AcquireLock( const TString &path, ui64 partitionId, auto &stub) { + Ydb::KeyValue::AcquireLockRequest request; + request.set_path(path); + request.set_partition_id(partitionId); + + Ydb::KeyValue::AcquireLockResponse response; + Ydb::KeyValue::AcquireLockResult result; + + grpc::ClientContext ctx; + AdjustCtxForDB(ctx); + stub->AcquireLock(&ctx, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS); + response.operation().result().UnpackTo(&result); + return result.lock_generation(); + } + + void WaitTableCreation(TKikimrWithGrpcAndRootSchema &server, const TString &path) { + bool again = true; + for (ui32 i = 0; i < 10 && again; ++i) { + Cerr << "Wait iteration# " << i << Endl; + auto req = MakeHolder<NSchemeCache::TSchemeCacheNavigate>(); + auto& entry = req->ResultSet.emplace_back(); + entry.Path = SplitPath(path); + entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByPath; + entry.ShowPrivatePath = true; + entry.SyncVersion = false; + req->UserToken = new NACLib::TUserToken("root@builtin", {}); + UNIT_ASSERT(req->UserToken); + TActorId edgeActor = server.GetRuntime()->AllocateEdgeActor(); + auto ev = new TEvTxProxySchemeCache::TEvNavigateKeySet(req.Release()); + UNIT_ASSERT(ev->Request->UserToken); + auto schemeCache = MakeSchemeCacheID(); + server.GetRuntime()->Send(new IEventHandle(schemeCache, edgeActor, ev)); + + TAutoPtr<IEventHandle> handle; + auto *result = server.GetRuntime()->GrabEdgeEvent<TEvTxProxySchemeCache::TEvNavigateKeySetResult>(handle); + UNIT_ASSERT_VALUES_EQUAL(result->Request->ResultSet.size(), 1); + again = result->Request->ResultSet[0].Status != NSchemeCache::TSchemeCacheNavigate::EStatus::Ok; + } + } + + void MakeSimpleTest(const TString &tablePath, + std::function<void(const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub>&)> func) + { + TKikimrWithGrpcAndRootSchema server; + ui16 grpc = server.GetPort(); + TString location = TStringBuilder() << "localhost:" << grpc; + + //////////////////////////////////////////////////////////////////////// + + std::shared_ptr<grpc::Channel> channel; + std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub; + channel = grpc::CreateChannel("localhost:" + ToString(grpc), grpc::InsecureChannelCredentials()); + MakeDirectory(channel, "/Root/mydb"); + MakeTable(channel, tablePath); + auto pr = SplitPath(tablePath); + Ydb::Scheme::ListDirectoryResult listDirectoryResult = ListDirectory(channel, "/Root/mydb"); + UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.self().name(), "mydb"); + UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.children(0).name(), pr.back()); + + WaitTableCreation(server, tablePath); + stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel); + func(stub); + } + + Y_UNIT_TEST(SimpleAcquireLock) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Ydb::KeyValue::AcquireLockRequest request; + request.set_path(tablePath); + request.set_partition_id(0); + Ydb::KeyValue::AcquireLockResponse response; + Ydb::KeyValue::AcquireLockResult result; + + grpc::ClientContext ctx1; + AdjustCtxForDB(ctx1); + stub->AcquireLock(&ctx1, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS); + response.operation().result().UnpackTo(&result); + UNIT_ASSERT(result.lock_generation() == 1); + + grpc::ClientContext ctx2; + AdjustCtxForDB(ctx2); + stub->AcquireLock(&ctx2, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS); + response.operation().result().UnpackTo(&result); + UNIT_ASSERT(result.lock_generation() == 2); + }); + } + + Y_UNIT_TEST(SimpleExecuteTransaction) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Ydb::KeyValue::ExecuteTransactionRequest request; + request.set_path(tablePath); + request.set_partition_id(0); + Ydb::KeyValue::ExecuteTransactionResponse response; + + grpc::ClientContext ctx; + AdjustCtxForDB(ctx); + stub->ExecuteTransaction(&ctx, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS); + }); + } + + Y_UNIT_TEST(SimpleExecuteTransactionWithWrongGeneration) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Ydb::KeyValue::ExecuteTransactionRequest request; + request.set_path(tablePath); + request.set_partition_id(0); + request.set_lock_generation(42); + Ydb::KeyValue::ExecuteTransactionResponse response; + + grpc::ClientContext ctx; + AdjustCtxForDB(ctx); + stub->ExecuteTransaction(&ctx, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::PRECONDITION_FAILED); + }); + } + + Ydb::KeyValue::ExecuteTransactionResult Write(const TString &path, ui64 partitionId, const TString &key, const TString &value, ui64 storageChannel, + const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub) + { + Ydb::KeyValue::ExecuteTransactionRequest writeRequest; + writeRequest.set_path(path); + writeRequest.set_partition_id(partitionId); + auto *cmd = writeRequest.add_commands(); + auto *write = cmd->mutable_write(); + write->set_key(key); + write->set_value(value); + write->set_storage_channel(storageChannel); + Ydb::KeyValue::ExecuteTransactionResponse writeResponse; + + grpc::ClientContext writeCtx; + AdjustCtxForDB(writeCtx); + stub->ExecuteTransaction(&writeCtx, writeRequest, &writeResponse); + UNIT_ASSERT_CHECK_STATUS(writeResponse.operation(), Ydb::StatusIds::SUCCESS); + Ydb::KeyValue::ExecuteTransactionResult writeResult; + writeResponse.operation().result().UnpackTo(&writeResult); + return writeResult; + } + + void Rename(const TString &path, ui64 partitionId, const TString &oldKey, const TString &newKey, + const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub) + { + Ydb::KeyValue::ExecuteTransactionRequest request; + request.set_path(path); + request.set_partition_id(partitionId); + auto *cmd = request.add_commands(); + auto *rename = cmd->mutable_rename(); + rename->set_old_key(oldKey); + rename->set_new_key(newKey); + Ydb::KeyValue::ExecuteTransactionResponse response; + + grpc::ClientContext ctx; + AdjustCtxForDB(ctx); + stub->ExecuteTransaction(&ctx, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS); + } + + + Y_UNIT_TEST(SimpleRenameUnexistedKey) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Ydb::KeyValue::ExecuteTransactionRequest request; + request.set_path(tablePath); + request.set_partition_id(0); + auto *cmd = request.add_commands(); + auto *rename = cmd->mutable_rename(); + rename->set_old_key("key1"); + rename->set_new_key("key2"); + Ydb::KeyValue::ExecuteTransactionResponse response; + + grpc::ClientContext ctx; + AdjustCtxForDB(ctx); + stub->ExecuteTransaction(&ctx, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::NOT_FOUND); + }); + } + + Y_UNIT_TEST(SimpleConcatUnexistedKey) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Ydb::KeyValue::ExecuteTransactionRequest request; + request.set_path(tablePath); + request.set_partition_id(0); + auto *cmd = request.add_commands(); + auto *concat = cmd->mutable_concat(); + concat->add_input_keys("key1"); + concat->add_input_keys("key2"); + concat->set_output_key("key3"); + Ydb::KeyValue::ExecuteTransactionResponse response; + + grpc::ClientContext ctx; + AdjustCtxForDB(ctx); + stub->ExecuteTransaction(&ctx, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::NOT_FOUND); + }); + } + + Y_UNIT_TEST(SimpleCopyUnexistedKey) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Ydb::KeyValue::ExecuteTransactionRequest request; + request.set_path(tablePath); + request.set_partition_id(0); + auto *cmd = request.add_commands(); + auto *rename = cmd->mutable_copy_range(); + auto *range = rename->mutable_range(); + range->set_from_key_inclusive("key1"); + range->set_to_key_inclusive("key2"); + rename->set_prefix_to_add("A"); + Ydb::KeyValue::ExecuteTransactionResponse response; + + grpc::ClientContext ctx; + AdjustCtxForDB(ctx); + stub->ExecuteTransaction(&ctx, request, &response); + UNIT_ASSERT_CHECK_STATUS(response.operation(), Ydb::StatusIds::SUCCESS); + }); + } + + Y_UNIT_TEST(SimpleWriteRead) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Write(tablePath, 0, "key", "value", 0, stub); + + Ydb::KeyValue::ReadRequest readRequest; + readRequest.set_path(tablePath); + readRequest.set_partition_id(0); + readRequest.set_key("key"); + Ydb::KeyValue::ReadResponse readResponse; + Ydb::KeyValue::ReadResult readResult; + + grpc::ClientContext readCtx; + AdjustCtxForDB(readCtx); + stub->Read(&readCtx, readRequest, &readResponse); + UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SUCCESS); + readResponse.operation().result().UnpackTo(&readResult); + UNIT_ASSERT(!readResult.is_overrun()); + UNIT_ASSERT_VALUES_EQUAL(readResult.requested_key(), "key"); + UNIT_ASSERT_VALUES_EQUAL(readResult.value(), "value"); + UNIT_ASSERT_VALUES_EQUAL(readResult.requested_offset(), 0); + UNIT_ASSERT_VALUES_EQUAL(readResult.requested_size(), 5); + }); + } + + Y_UNIT_TEST(SimpleWriteReadWithIncorreectPath) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Write(tablePath, 0, "key", "value", 0, stub); + + Ydb::KeyValue::ReadRequest readRequest; + readRequest.set_path("/Root/mydb/table"); + readRequest.set_partition_id(0); + readRequest.set_key("key"); + Ydb::KeyValue::ReadResponse readResponse; + Ydb::KeyValue::ReadResult readResult; + + grpc::ClientContext readCtx; + AdjustCtxForDB(readCtx); + stub->Read(&readCtx, readRequest, &readResponse); + UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SCHEME_ERROR); + }); + } + + Y_UNIT_TEST(SimpleWriteReadWithoutToken) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + return; + Write(tablePath, 0, "key", "value", 0, stub); + + Ydb::KeyValue::ReadRequest readRequest; + readRequest.set_path("/Root/mydb/kvtable"); + readRequest.set_partition_id(0); + readRequest.set_key("key"); + Ydb::KeyValue::ReadResponse readResponse; + Ydb::KeyValue::ReadResult readResult; + + grpc::ClientContext readCtx; + //AdjustCtxForDB(readCtx); + stub->Read(&readCtx, readRequest, &readResponse); + UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SCHEME_ERROR); + }); + } + + Y_UNIT_TEST(SimpleWriteReadWithoutLockGeneration1) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + AcquireLock(tablePath, 0, stub); + Write(tablePath, 0, "key", "value", 0, stub); + Ydb::KeyValue::ReadRequest readRequest; + readRequest.set_path(tablePath); + readRequest.set_partition_id(0); + readRequest.set_key("key"); + Ydb::KeyValue::ReadResponse readResponse; + Ydb::KeyValue::ReadResult readResult; + + grpc::ClientContext readCtx; + AdjustCtxForDB(readCtx); + stub->Read(&readCtx, readRequest, &readResponse); + UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SUCCESS); + }); + } + + Y_UNIT_TEST(SimpleWriteReadWithoutLockGeneration2) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Write(tablePath, 0, "key", "value", 0, stub); + AcquireLock(tablePath, 0, stub); + Ydb::KeyValue::ReadRequest readRequest; + readRequest.set_path(tablePath); + readRequest.set_partition_id(0); + readRequest.set_key("key"); + Ydb::KeyValue::ReadResponse readResponse; + Ydb::KeyValue::ReadResult readResult; + + grpc::ClientContext readCtx; + AdjustCtxForDB(readCtx); + stub->Read(&readCtx, readRequest, &readResponse); + UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SUCCESS); + }); + } + + Y_UNIT_TEST(SimpleWriteReadOverrun) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Write(tablePath, 0, "key", "value", 0, stub); + + Ydb::KeyValue::ReadRequest readRequest; + readRequest.set_path(tablePath); + readRequest.set_partition_id(0); + readRequest.set_key("key"); + ui64 limitBytes = 1 + 5 + 3 // Key id, length + + 1 + 5 + 1 // Value id, length, value + + 1 + 8 // Offset id, value + + 1 + 8 // Size id, value + + 1 + 1 // Status id, value + ; + readRequest.set_limit_bytes(limitBytes); + Ydb::KeyValue::ReadResponse readResponse; + Ydb::KeyValue::ReadResult readResult; + + grpc::ClientContext readCtx; + AdjustCtxForDB(readCtx); + stub->Read(&readCtx, readRequest, &readResponse); + UNIT_ASSERT_CHECK_STATUS(readResponse.operation(), Ydb::StatusIds::SUCCESS); + readResponse.operation().result().UnpackTo(&readResult); + UNIT_ASSERT(readResult.is_overrun()); + UNIT_ASSERT_VALUES_EQUAL(readResult.requested_key(), "key"); + UNIT_ASSERT_VALUES_EQUAL(readResult.value(), "v"); + UNIT_ASSERT_VALUES_EQUAL(readResult.requested_offset(), 0); + UNIT_ASSERT_VALUES_EQUAL(readResult.requested_size(), 5); + }); + } + + Y_UNIT_TEST(SimpleWriteReadRange) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Write(tablePath, 0, "key1", "value1", 1, stub); + Write(tablePath, 0, "key2", "value12", 2, stub); + + Ydb::KeyValue::ReadRangeRequest readRangeRequest; + readRangeRequest.set_path(tablePath); + readRangeRequest.set_partition_id(0); + auto *r = readRangeRequest.mutable_range(); + r->set_from_key_inclusive("key1"); + r->set_to_key_inclusive("key3"); + Ydb::KeyValue::ReadRangeResponse readRangeResponse; + Ydb::KeyValue::ReadRangeResult readRangeResult; + + grpc::ClientContext readRangeCtx; + AdjustCtxForDB(readRangeCtx); + stub->ReadRange(&readRangeCtx, readRangeRequest, &readRangeResponse); + UNIT_ASSERT_CHECK_STATUS(readRangeResponse.operation(), Ydb::StatusIds::SUCCESS); + readRangeResponse.operation().result().UnpackTo(&readRangeResult); + + UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(0).key(), "key1"); + UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(1).key(), "key2"); + + UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(0).value(), "value1"); + UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(1).value(), "value12"); + + UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(0).storage_channel(), 1); + UNIT_ASSERT_VALUES_EQUAL(readRangeResult.pair(1).storage_channel(), 2); + }); + } + + + Y_UNIT_TEST(SimpleWriteListRange) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Write(tablePath, 0, "key1", "value1", 1, stub); + Write(tablePath, 0, "key2", "value12", 2, stub); + + Ydb::KeyValue::ListRangeRequest listRangeRequest; + listRangeRequest.set_path(tablePath); + listRangeRequest.set_partition_id(0); + auto *r = listRangeRequest.mutable_range(); + r->set_from_key_inclusive("key1"); + r->set_to_key_inclusive("key3"); + Ydb::KeyValue::ListRangeResponse listRangeResponse; + Ydb::KeyValue::ListRangeResult listRangeResult; + + grpc::ClientContext listRangeCtx; + AdjustCtxForDB(listRangeCtx); + stub->ListRange(&listRangeCtx, listRangeRequest, &listRangeResponse); + UNIT_ASSERT_CHECK_STATUS(listRangeResponse.operation(), Ydb::StatusIds::SUCCESS); + listRangeResponse.operation().result().UnpackTo(&listRangeResult); + + UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(0).key(), "key1"); + UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(1).key(), "key2"); + + UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(0).value_size(), 6); + UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(1).value_size(), 7); + + UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(0).storage_channel(), 1); + UNIT_ASSERT_VALUES_EQUAL(listRangeResult.key(1).storage_channel(), 2); + }); + } + + + Y_UNIT_TEST(SimpleGetStorageChannelStatus) { + TString tablePath = "/Root/mydb/kvtable"; + MakeSimpleTest(tablePath, [tablePath](const std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> &stub){ + Ydb::KeyValue::GetStorageChannelStatusRequest getStatusRequest; + getStatusRequest.set_path(tablePath); + getStatusRequest.set_partition_id(0); + getStatusRequest.add_storage_channel(1); + getStatusRequest.add_storage_channel(2); + getStatusRequest.add_storage_channel(3); + Ydb::KeyValue::GetStorageChannelStatusResponse getStatusResponse; + Ydb::KeyValue::GetStorageChannelStatusResult getStatusResult; + + grpc::ClientContext getStatusCtx; + AdjustCtxForDB(getStatusCtx); + stub->GetStorageChannelStatus(&getStatusCtx, getStatusRequest, &getStatusResponse); + UNIT_ASSERT_CHECK_STATUS(getStatusResponse.operation(), Ydb::StatusIds::SUCCESS); + getStatusResponse.operation().result().UnpackTo(&getStatusResult); + UNIT_ASSERT_VALUES_EQUAL(getStatusResult.storage_channel_info_size(), 3); + }); + } + + Y_UNIT_TEST(SimpleCreateAlterDropVolume) { + TKikimrWithGrpcAndRootSchema server; + ui16 grpc = server.GetPort(); + TString location = TStringBuilder() << "localhost:" << grpc; + + std::shared_ptr<grpc::Channel> channel; + channel = grpc::CreateChannel("localhost:" + ToString(grpc), grpc::InsecureChannelCredentials()); + + TString path = "/Root/mydb/"; + TString tablePath = "/Root/mydb/mytable"; + MakeDirectory(channel, path); + MakeTable(channel, tablePath); + + Ydb::Scheme::ListDirectoryResult listDirectoryResult = ListDirectory(channel, path); + UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.self().name(), "mydb"); + UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.children(0).name(), "mytable"); + + UNIT_ASSERT_VALUES_EQUAL(1, DescribeVolume(channel, tablePath).partition_count()); + + AlterVolume(channel, tablePath, 2); + listDirectoryResult = ListDirectory(channel, path); + UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.self().name(), "mydb"); + UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.children(0).name(), "mytable"); + + + UNIT_ASSERT_VALUES_EQUAL(2, DescribeVolume(channel, tablePath).partition_count()); + + DropVolume(channel, tablePath); + listDirectoryResult = ListDirectory(channel, path); + UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.self().name(), "mydb"); + UNIT_ASSERT_VALUES_EQUAL(listDirectoryResult.children_size(), 0); + } + + Y_UNIT_TEST(SimpleListPartitions) { + return; // delete it after adding ydb_token to requests in tests + TKikimrWithGrpcAndRootSchema server; + ui16 grpc = server.GetPort(); + TString location = TStringBuilder() << "localhost:" << grpc; + + std::shared_ptr<grpc::Channel> channel; + channel = grpc::CreateChannel("localhost:" + ToString(grpc), grpc::InsecureChannelCredentials()); + + TString path = "/Root/mydb/"; + TString tablePath = "/Root/mydb/mytable"; + MakeDirectory(channel, path); + MakeTable(channel, tablePath); + + std::unique_ptr<Ydb::KeyValue::V1::KeyValueService::Stub> stub; + stub = Ydb::KeyValue::V1::KeyValueService::NewStub(channel); + + Write(tablePath, 0, "key1", "value1", 1, stub); + + Ydb::KeyValue::ListLocalPartitionsRequest enumerateRequest; + enumerateRequest.set_path(tablePath); + enumerateRequest.set_node_id(2); + + Ydb::KeyValue::ListLocalPartitionsResult enumerateResult; + Ydb::KeyValue::ListLocalPartitionsResponse eumerateResponse; + + grpc::ClientContext enumerateCtx; + AdjustCtxForDB(enumerateCtx); + stub->ListLocalPartitions(&enumerateCtx, enumerateRequest, &eumerateResponse); + + UNIT_ASSERT_CHECK_STATUS(eumerateResponse.operation(), Ydb::StatusIds::SUCCESS); + eumerateResponse.operation().result().UnpackTo(&enumerateResult); + UNIT_ASSERT_VALUES_EQUAL(enumerateResult.partition_ids_size(), 1); + + auto writeRes = Write(tablePath, enumerateResult.partition_ids(0), "key2", "value2", 1, stub); + UNIT_ASSERT_VALUES_EQUAL(writeRes.node_id(), 2); + } + +} // Y_UNIT_TEST_SUITE(KeyValueGRPCService) + +} // NKikimr::NGRpcService diff --git a/ydb/services/keyvalue/ut/CMakeLists.darwin.txt b/ydb/services/keyvalue/ut/CMakeLists.darwin.txt new file mode 100644 index 0000000000..6c9a38b222 --- /dev/null +++ b/ydb/services/keyvalue/ut/CMakeLists.darwin.txt @@ -0,0 +1,49 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-services-keyvalue-ut) +target_compile_options(ydb-services-keyvalue-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-services-keyvalue-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue +) +target_link_libraries(ydb-services-keyvalue-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-services-keyvalue + library-cpp-logger + ydb-core-protos + core-testlib-default +) +target_link_options(ydb-services-keyvalue-ut PRIVATE + -Wl,-no_deduplicate + -Wl,-sdk_version,10.15 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(ydb-services-keyvalue-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service_ut.cpp +) +add_test( + NAME + ydb-services-keyvalue-ut + COMMAND + ydb-services-keyvalue-ut + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(ydb-services-keyvalue-ut) diff --git a/ydb/services/keyvalue/ut/CMakeLists.linux-aarch64.txt b/ydb/services/keyvalue/ut/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..6b64e4360d --- /dev/null +++ b/ydb/services/keyvalue/ut/CMakeLists.linux-aarch64.txt @@ -0,0 +1,52 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-services-keyvalue-ut) +target_compile_options(ydb-services-keyvalue-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-services-keyvalue-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue +) +target_link_libraries(ydb-services-keyvalue-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-lfalloc + cpp-testing-unittest_main + ydb-services-keyvalue + library-cpp-logger + ydb-core-protos + core-testlib-default +) +target_link_options(ydb-services-keyvalue-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-services-keyvalue-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service_ut.cpp +) +add_test( + NAME + ydb-services-keyvalue-ut + COMMAND + ydb-services-keyvalue-ut + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(ydb-services-keyvalue-ut) diff --git a/ydb/services/keyvalue/ut/CMakeLists.linux.txt b/ydb/services/keyvalue/ut/CMakeLists.linux.txt new file mode 100644 index 0000000000..d754baa780 --- /dev/null +++ b/ydb/services/keyvalue/ut/CMakeLists.linux.txt @@ -0,0 +1,54 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-services-keyvalue-ut) +target_compile_options(ydb-services-keyvalue-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-services-keyvalue-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue +) +target_link_libraries(ydb-services-keyvalue-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-services-keyvalue + library-cpp-logger + ydb-core-protos + core-testlib-default +) +target_link_options(ydb-services-keyvalue-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-services-keyvalue-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/services/keyvalue/grpc_service_ut.cpp +) +add_test( + NAME + ydb-services-keyvalue-ut + COMMAND + ydb-services-keyvalue-ut + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(ydb-services-keyvalue-ut) diff --git a/ydb/services/keyvalue/ut/CMakeLists.txt b/ydb/services/keyvalue/ut/CMakeLists.txt new file mode 100644 index 0000000000..3e0811fb22 --- /dev/null +++ b/ydb/services/keyvalue/ut/CMakeLists.txt @@ -0,0 +1,15 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux-aarch64.txt) +elseif (APPLE) + include(CMakeLists.darwin.txt) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID) + include(CMakeLists.linux.txt) +endif() diff --git a/ydb/tests/functional/autoconfig/test_actorsystem.py b/ydb/tests/functional/autoconfig/test_actorsystem.py new file mode 100644 index 0000000000..352c750662 --- /dev/null +++ b/ydb/tests/functional/autoconfig/test_actorsystem.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys + + +from ydb.tests.library.harness.kikimr_cluster import kikimr_cluster_factory +from ydb.tests.library.harness.kikimr_config import KikimrConfigGenerator + + +def make_test_for_specific_actor_system(node_type, cpu_count): + class TestWithSpecificActorSystem(object): + @classmethod + def setup_class(cls): + actor_system_config = { + "node_type": node_type, + "cpu_count": cpu_count, + "use_auto_config": True + } + configuration = KikimrConfigGenerator(overrided_actor_system_config=actor_system_config) + cls.kikimr_cluster = kikimr_cluster_factory(configuration) + cls.kikimr_cluster.start() + + @classmethod + def teardown_class(cls): + cls.kikimr_cluster.stop() + + def test(self): + pass + + return TestWithSpecificActorSystem + + +for node_type in ("Compute", "Storage", "Hybrid"): + for cpu_count in range(1, 40): + test = make_test_for_specific_actor_system(node_type.upper(), cpu_count) + setattr(sys.modules[__name__], "TestWith%sNodeWith%dCpu" % (node_type, cpu_count), test) diff --git a/ydb/tests/library/harness/kikimr_config.py b/ydb/tests/library/harness/kikimr_config.py index b40fbe3e62..129d4c3f66 100644 --- a/ydb/tests/library/harness/kikimr_config.py +++ b/ydb/tests/library/harness/kikimr_config.py @@ -142,6 +142,7 @@ class KikimrConfigGenerator(object): enable_alter_database_create_hive_first=False, disable_iterator_reads=False, disable_iterator_lookups=False, + overrided_actor_system_config=None ): self._version = version self.use_log_files = use_log_files @@ -212,6 +213,9 @@ class KikimrConfigGenerator(object): self.yaml_config = load_default_yaml(self.__node_ids, self.domain_name, self.static_erasure, self.__additional_log_configs) + if overrided_actor_system_config: + self.yaml_config["actor_system_config"] = overrided_actor_system_config + if disable_iterator_reads: if "table_service_config" not in self.yaml_config: self.yaml_config["table_service_config"] = {} |