diff options
author | Daniil Cherednik <dan.cherednik@gmail.com> | 2023-02-09 11:44:35 +0300 |
---|---|---|
committer | Daniil Cherednik <dan.cherednik@gmail.com> | 2023-02-09 11:46:17 +0300 |
commit | b0967c30d3706b650b679fe119b6bd7b0924d328 (patch) | |
tree | 25579dfda238c2cc5b00324878303b3a05d09f45 | |
parent | 9b78acb9998e4a817a21fe60443c7c5d6a06b947 (diff) | |
download | ydb-stable-22-5.tar.gz |
Ydb stable 22-5-1022.5.10stable-22-5
x-stable-origin-commit: f696baac1a4b8d48eb52b52b35930eef6d0eab42
304 files changed, 11843 insertions, 3143 deletions
diff --git a/library/cpp/actors/core/CMakeLists.txt b/library/cpp/actors/core/CMakeLists.txt index 64c617307c..51379561db 100644 --- a/library/cpp/actors/core/CMakeLists.txt +++ b/library/cpp/actors/core/CMakeLists.txt @@ -42,6 +42,7 @@ target_sources(cpp-actors-core PRIVATE ${CMAKE_SOURCE_DIR}/library/cpp/actors/core/executor_pool_io.cpp ${CMAKE_SOURCE_DIR}/library/cpp/actors/core/executor_pool_united.cpp ${CMAKE_SOURCE_DIR}/library/cpp/actors/core/executor_thread.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/actors/core/harmonizer.cpp ${CMAKE_SOURCE_DIR}/library/cpp/actors/core/interconnect.cpp ${CMAKE_SOURCE_DIR}/library/cpp/actors/core/io_dispatcher.cpp ${CMAKE_SOURCE_DIR}/library/cpp/actors/core/log.cpp diff --git a/library/cpp/actors/core/actor_ut.cpp b/library/cpp/actors/core/actor_ut.cpp index a6752f7d4f..ab53e3ec3e 100644 --- a/library/cpp/actors/core/actor_ut.cpp +++ b/library/cpp/actors/core/actor_ut.cpp @@ -543,8 +543,12 @@ Y_UNIT_TEST_SUITE(TestDecorator) { setup->NodeId = 0; setup->ExecutorsCount = 1; setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]); + + ui64 ts = GetCycleCountFast(); + THolder<IHarmonizer> harmonizer(MakeHarmonizer(ts)); for (ui32 i = 0; i < setup->ExecutorsCount; ++i) { - setup->Executors[i] = new TBasicExecutorPool(i, 1, 10, "basic"); + setup->Executors[i] = new TBasicExecutorPool(i, 1, 10, "basic", harmonizer.Get()); + harmonizer->AddPool(setup->Executors[i].Get()); } setup->Scheduler = new TBasicSchedulerThread; diff --git a/library/cpp/actors/core/actorsystem.h b/library/cpp/actors/core/actorsystem.h index 40499d7586..4801350067 100644 --- a/library/cpp/actors/core/actorsystem.h +++ b/library/cpp/actors/core/actorsystem.h @@ -124,10 +124,55 @@ namespace NActors { return 1; } + virtual i16 GetPriority() const { + return 0; + } + // generic virtual TAffinity* Affinity() const = 0; virtual void SetRealTimeMode() const {} + + virtual ui32 GetThreadCount() const { + return 1; + }; + + virtual void SetThreadCount(ui32 threads) { + Y_UNUSED(threads); + } + + virtual i16 GetBlockingThreadCount() const { + return 0; + } + + virtual i16 GetDefaultThreadCount() const { + return 1; + } + + virtual i16 GetMinThreadCount() const { + return 1; + } + + virtual i16 GetMaxThreadCount() const { + return 1; + + } + + virtual bool IsThreadBeingStopped(i16 threadIdx) const { + Y_UNUSED(threadIdx); + return false; + } + + virtual double GetThreadConsumedUs(i16 threadIdx) { + Y_UNUSED(threadIdx); + return 0.0; + } + + virtual double GetThreadBookedUs(i16 threadIdx) { + Y_UNUSED(threadIdx); + return 0.0; + } + }; // could be proxy to in-pool schedulers (for NUMA-aware executors) diff --git a/library/cpp/actors/core/balancer.cpp b/library/cpp/actors/core/balancer.cpp index 3dcc45c56b..d82701bbfb 100644 --- a/library/cpp/actors/core/balancer.cpp +++ b/library/cpp/actors/core/balancer.cpp @@ -2,8 +2,9 @@ #include "probes.h" -#include <library/cpp/actors/util/intrinsics.h> +#include <library/cpp/actors/util/cpu_load_log.h> #include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/util/intrinsics.h> #include <util/system/spinlock.h> @@ -27,11 +28,11 @@ namespace NActors { TLevel() {} - TLevel(const TBalancingConfig& cfg, TPoolId poolId, ui64 currentCpus, double cpuIdle) { + TLevel(const TBalancingConfig& cfg, TPoolId poolId, ui64 currentCpus, double cpuIdle, ui64 addLatencyUs, ui64 worstLatencyUs) { ScaleFactor = double(currentCpus) / cfg.Cpus; - if (cpuIdle > 1.3) { // TODO: add a better underload criterion, based on estimated latency w/o 1 cpu + if ((worstLatencyUs + addLatencyUs) < 2000 && cpuIdle > 1.0) { // Uderload criterion, based on estimated latency w/o 1 cpu LoadClass = Underloaded; - } else if (cpuIdle < 0.2) { // TODO: add a better overload criterion, based on latency + } else if (worstLatencyUs > 2000 || cpuIdle < 0.2) { // Overload criterion, based on latency LoadClass = Overloaded; } else { LoadClass = Moderate; @@ -82,6 +83,8 @@ namespace NActors { TBalancerConfig Config; public: + + ui64 GetPeriodUs() override; // Setup TBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts); bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) override; @@ -238,9 +241,12 @@ namespace NActors { } // Compute levels - pool.CurLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus, pool.CpuIdle); - pool.AddLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus + 1, pool.CpuIdle); // we expect taken cpu to became utilized - pool.SubLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus - 1, pool.CpuIdle - 1); + pool.CurLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus, pool.CpuIdle, + pool.Next.ExpectedLatencyIncreaseUs, pool.Next.WorstActivationTimeUs); + pool.AddLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus + 1, pool.CpuIdle, + 0, pool.Next.WorstActivationTimeUs); // we expect taken cpu to became utilized + pool.SubLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus - 1, pool.CpuIdle - 1, + pool.Next.ExpectedLatencyIncreaseUs, pool.Next.WorstActivationTimeUs); // Prepare for balancing pool.PrevCpus = pool.CurrentCpus; @@ -263,7 +269,7 @@ namespace NActors { TPool& from = **fromIter; if (from.CurrentCpus == from.PrevCpus && // if not balanced yet from.CurrentCpus > from.Config.MinCpus && // and constraints would not be violated - from.SubLevel.Importance < to.AddLevel.Importance) // and which of two pools is more important would not change after cpu movement + from.SubLevel.Importance <= to.AddLevel.Importance) // and which of two pools is more important would not change after cpu movement { MoveCpu(from, to); from.CurrentCpus--; @@ -295,6 +301,10 @@ namespace NActors { Lock.Release(); } + ui64 TBalancer::GetPeriodUs() { + return Config.PeriodUs; + } + IBalancer* MakeBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts) { return new TBalancer(config, unitedPools, ts); } diff --git a/library/cpp/actors/core/balancer.h b/library/cpp/actors/core/balancer.h index 9763ec79e1..e1f6f33bf3 100644 --- a/library/cpp/actors/core/balancer.h +++ b/library/cpp/actors/core/balancer.h @@ -10,6 +10,8 @@ namespace NActors { ui64 Ts = 0; // Measurement timestamp ui64 CpuUs = 0; // Total cpu microseconds consumed by pool on all cpus since start ui64 IdleUs = ui64(-1); // Total cpu microseconds in spinning or waiting on futex + ui64 WorstActivationTimeUs = 0; + ui64 ExpectedLatencyIncreaseUs = 0; }; // Pool cpu balancer @@ -20,6 +22,7 @@ namespace NActors { virtual void SetPoolStats(TPoolId pool, const TBalancerStats& stats) = 0; virtual void Balance() = 0; virtual void Unlock() = 0; + virtual ui64 GetPeriodUs() = 0; // TODO: add method for reconfiguration on fly }; diff --git a/library/cpp/actors/core/config.h b/library/cpp/actors/core/config.h index 0d65815fd9..0bf4b871d7 100644 --- a/library/cpp/actors/core/config.h +++ b/library/cpp/actors/core/config.h @@ -41,6 +41,10 @@ namespace NActors { ui32 EventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX; int RealtimePriority = 0; ui32 MaxActivityType = 5; + i16 MinThreadCount = 0; + i16 MaxThreadCount = 0; + i16 DefaultThreadCount = 0; + i16 Priority = 0; }; struct TIOExecutorPoolConfig { @@ -88,11 +92,18 @@ namespace NActors { TBalancerConfig Balancer; }; + struct TSelfPingInfo { + NMonitoring::TDynamicCounters::TCounterPtr AvgPingCounter; + NMonitoring::TDynamicCounters::TCounterPtr AvgPingCounterWithSmallWindow; + ui32 MaxAvgPingUs; + }; + struct TCpuManagerConfig { TUnitedWorkersConfig UnitedWorkers; TVector<TBasicExecutorPoolConfig> Basic; TVector<TIOExecutorPoolConfig> IO; TVector<TUnitedExecutorPoolConfig> United; + TVector<TSelfPingInfo> PingInfoByPool; ui32 GetExecutorsCount() const { return Basic.size() + IO.size() + United.size(); diff --git a/library/cpp/actors/core/cpu_manager.cpp b/library/cpp/actors/core/cpu_manager.cpp index 39089b5d83..0736caa539 100644 --- a/library/cpp/actors/core/cpu_manager.cpp +++ b/library/cpp/actors/core/cpu_manager.cpp @@ -16,10 +16,18 @@ namespace NActors { UnitedWorkers.Reset(new TUnitedWorkers(Config.UnitedWorkers, Config.United, allocation, Balancer.Get())); } + ui64 ts = GetCycleCountFast(); + Harmonizer.Reset(MakeHarmonizer(ts)); + Executors.Reset(new TAutoPtr<IExecutorPool>[ExecutorPoolCount]); for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { Executors[excIdx].Reset(CreateExecutorPool(excIdx)); + if (excIdx < Config.PingInfoByPool.size()) { + Harmonizer->AddPool(Executors[excIdx].Get(), &Config.PingInfoByPool[excIdx]); + } else { + Harmonizer->AddPool(Executors[excIdx].Get()); + } } } @@ -89,7 +97,7 @@ namespace NActors { IExecutorPool* TCpuManager::CreateExecutorPool(ui32 poolId) { for (TBasicExecutorPoolConfig& cfg : Config.Basic) { if (cfg.PoolId == poolId) { - return new TBasicExecutorPool(cfg); + return new TBasicExecutorPool(cfg, Harmonizer.Get()); } } for (TIOExecutorPoolConfig& cfg : Config.IO) { diff --git a/library/cpp/actors/core/cpu_manager.h b/library/cpp/actors/core/cpu_manager.h index 454035477b..42bede91b8 100644 --- a/library/cpp/actors/core/cpu_manager.h +++ b/library/cpp/actors/core/cpu_manager.h @@ -1,6 +1,7 @@ #pragma once #include "actorsystem.h" +#include "harmonizer.h" #include "executor_pool_basic.h" #include "executor_pool_io.h" #include "executor_pool_united.h" @@ -11,6 +12,7 @@ namespace NActors { TArrayHolder<TAutoPtr<IExecutorPool>> Executors; THolder<TUnitedWorkers> UnitedWorkers; THolder<IBalancer> Balancer; + THolder<IHarmonizer> Harmonizer; TCpuManagerConfig Config; public: explicit TCpuManager(THolder<TActorSystemSetup>& setup) diff --git a/library/cpp/actors/core/executor_pool_basic.cpp b/library/cpp/actors/core/executor_pool_basic.cpp index 4dce16939a..00e557fcb4 100644 --- a/library/cpp/actors/core/executor_pool_basic.cpp +++ b/library/cpp/actors/core/executor_pool_basic.cpp @@ -18,11 +18,16 @@ namespace NActors { ui32 threads, ui64 spinThreshold, const TString& poolName, + IHarmonizer *harmonizer, TAffinity* affinity, TDuration timePerMailbox, ui32 eventsPerMailbox, int realtimePriority, - ui32 maxActivityType) + ui32 maxActivityType, + i16 minThreadCount, + i16 maxThreadCount, + i16 defaultThreadCount, + i16 priority) : TExecutorPoolBase(poolId, threads, affinity, maxActivityType) , SpinThreshold(spinThreshold) , SpinThresholdCycles(spinThreshold * NHPTimer::GetCyclesPerSecond() * 0.000001) // convert microseconds to cycles @@ -34,21 +39,50 @@ namespace NActors { , ThreadUtilization(0) , MaxUtilizationCounter(0) , MaxUtilizationAccumulator(0) + , WrongWakenedThreadCount(0) , ThreadCount(threads) + , MinThreadCount(minThreadCount) + , MaxThreadCount(maxThreadCount) + , DefaultThreadCount(defaultThreadCount) + , Harmonizer(harmonizer) + , Priority(priority) { + i16 limit = Min(threads, (ui32)Max<i16>()); + if (DefaultThreadCount) { + DefaultThreadCount = Min(DefaultThreadCount, limit); + } else { + DefaultThreadCount = limit; + } + + MaxThreadCount = Min(Max(MaxThreadCount, DefaultThreadCount), limit); + + if (MinThreadCount) { + MinThreadCount = Max((i16)1, Min(MinThreadCount, DefaultThreadCount)); + } else { + MinThreadCount = DefaultThreadCount; + } + ThreadCount = MaxThreadCount; + auto semaphore = TSemaphore(); + semaphore.CurrentThreadCount = ThreadCount; + Semaphore = semaphore.ConverToI64(); } - TBasicExecutorPool::TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg) + TBasicExecutorPool::TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg, IHarmonizer *harmonizer) : TBasicExecutorPool( cfg.PoolId, cfg.Threads, cfg.SpinThreshold, cfg.PoolName, + harmonizer, new TAffinity(cfg.Affinity), cfg.TimePerMailbox, cfg.EventsPerMailbox, cfg.RealtimePriority, - cfg.MaxActivityType + cfg.MaxActivityType, + cfg.MinThreadCount, + cfg.MaxThreadCount, + cfg.DefaultThreadCount, + cfg.Priority ) {} @@ -56,126 +90,166 @@ namespace NActors { Threads.Destroy(); } + bool TBasicExecutorPool::GoToBeBlocked(TThreadCtx& threadCtx, TTimers &timers) { + do { + if (AtomicCas(&threadCtx.BlockedFlag, TThreadCtx::BS_BLOCKED, TThreadCtx::BS_BLOCKING)) { + timers.HPNow = GetCycleCountFast(); + timers.Elapsed += timers.HPNow - timers.HPStart; + if (threadCtx.BlockedPad.Park()) // interrupted + return true; + timers.HPStart = GetCycleCountFast(); + timers.Blocked += timers.HPStart - timers.HPNow; + } + } while (AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE && !RelaxedLoad(&StopFlag)); + return false; + } + + bool TBasicExecutorPool::GoToSleep(TThreadCtx& threadCtx, TTimers &timers) { + do { + timers.HPNow = GetCycleCountFast(); + timers.Elapsed += timers.HPNow - timers.HPStart; + if (threadCtx.Pad.Park()) // interrupted + return true; + timers.HPStart = GetCycleCountFast(); + timers.Parked += timers.HPStart - timers.HPNow; + } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED && !RelaxedLoad(&StopFlag)); + return false; + } + + void TBasicExecutorPool::GoToSpin(TThreadCtx& threadCtx) { + ui64 start = GetCycleCountFast(); + bool doSpin = true; + while (true) { + for (ui32 j = 0; doSpin && j < 12; ++j) { + if (GetCycleCountFast() >= (start + SpinThresholdCycles)) { + doSpin = false; + break; + } + for (ui32 i = 0; i < 12; ++i) { + if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) { + SpinLockPause(); + } else { + doSpin = false; + break; + } + } + } + if (!doSpin) { + break; + } + if (RelaxedLoad(&StopFlag)) { + break; + } + } + } + + bool TBasicExecutorPool::GoToWaiting(TThreadCtx& threadCtx, TTimers &timers, bool needToBlock) { +#if defined ACTORSLIB_COLLECT_EXEC_STATS + if (AtomicGetAndIncrement(ThreadUtilization) == 0) { + // Initially counter contains -t0, the pool start timestamp + // When the first thread goes to sleep we add t1, so the counter + // becomes t1-t0 >= 0, or the duration of max utilization so far. + // If the counter was negative and becomes positive, that means + // counter just turned into a duration and we should store that + // duration. Otherwise another thread raced with us and + // subtracted some other timestamp t2. + const i64 t = GetCycleCountFast(); + const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, t); + if (x < 0 && x + t > 0) + AtomicStore(&MaxUtilizationAccumulator, x + t); + } +#endif + + Y_VERIFY(AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_NONE); + + if (SpinThreshold > 0 && !needToBlock) { + // spin configured period + AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_ACTIVE); + GoToSpin(threadCtx); + // then - sleep + if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) { + if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED, TThreadCtx::WS_ACTIVE)) { + if (GoToSleep(threadCtx, timers)) { // interrupted + return true; + } + } + } + } else { + AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED); + if (GoToSleep(threadCtx, timers)) { // interrupted + return true; + } + } + + Y_VERIFY_DEBUG(AtomicLoad(&StopFlag) || AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_RUNNING); + +#if defined ACTORSLIB_COLLECT_EXEC_STATS + if (AtomicDecrement(ThreadUtilization) == 0) { + // When we started sleeping counter contained t1-t0, or the + // last duration of max utilization. Now we subtract t2 >= t1, + // which turns counter negative again, and the next sleep cycle + // at timestamp t3 would be adding some new duration t3-t2. + // If the counter was positive and becomes negative that means + // there are no current races with other threads and we should + // store the last positive duration we observed. Multiple + // threads may be adding and subtracting values in potentially + // arbitrary order, which would cause counter to oscillate + // around zero. When it crosses zero is a good indication of a + // correct value. + const i64 t = GetCycleCountFast(); + const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, -t); + if (x > 0 && x - t < 0) + AtomicStore(&MaxUtilizationAccumulator, x); + } +#endif + return false; + } + ui32 TBasicExecutorPool::GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) { ui32 workerId = wctx.WorkerId; Y_VERIFY_DEBUG(workerId < PoolThreads); - NHPTimer::STime elapsed = 0; - NHPTimer::STime parked = 0; - NHPTimer::STime blocked = 0; - NHPTimer::STime hpstart = GetCycleCountFast(); - NHPTimer::STime hpnow; + TTimers timers; + + if (Harmonizer) { + LWPROBE(TryToHarmonize, PoolId, PoolName); + Harmonizer->Harmonize(timers.HPStart); + } TThreadCtx& threadCtx = Threads[workerId]; AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_NONE); if (Y_UNLIKELY(AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE)) { - do { - if (AtomicCas(&threadCtx.BlockedFlag, TThreadCtx::BS_BLOCKED, TThreadCtx::BS_BLOCKING)) { - hpnow = GetCycleCountFast(); - elapsed += hpnow - hpstart; - if (threadCtx.BlockedPad.Park()) // interrupted - return 0; - hpstart = GetCycleCountFast(); - blocked += hpstart - hpnow; - } - } while (AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE && !AtomicLoad(&StopFlag)); + if (GoToBeBlocked(threadCtx, timers)) { // interrupted + return 0; + } } - const TAtomic x = AtomicDecrement(Semaphore); + bool needToWait = false; + bool needToBlock = false; - if (x < 0) { -#if defined ACTORSLIB_COLLECT_EXEC_STATS - if (AtomicGetAndIncrement(ThreadUtilization) == 0) { - // Initially counter contains -t0, the pool start timestamp - // When the first thread goes to sleep we add t1, so the counter - // becomes t1-t0 >= 0, or the duration of max utilization so far. - // If the counter was negative and becomes positive, that means - // counter just turned into a duration and we should store that - // duration. Otherwise another thread raced with us and - // subtracted some other timestamp t2. - const i64 t = GetCycleCountFast(); - const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, t); - if (x < 0 && x + t > 0) - AtomicStore(&MaxUtilizationAccumulator, x + t); - } -#endif + TAtomic x = AtomicGet(Semaphore); + do { + i64 oldX = x; + TSemaphore semaphore = TSemaphore::GetSemaphore(x); + needToBlock = semaphore.CurrentSleepThreadCount < 0; + needToWait = needToBlock || semaphore.OldSemaphore <= -semaphore.CurrentSleepThreadCount; - Y_VERIFY(AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_NONE); - - if (SpinThreshold > 0) { - // spin configured period - AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_ACTIVE); - ui64 start = GetCycleCountFast(); - bool doSpin = true; - while (true) { - for (ui32 j = 0; doSpin && j < 12; ++j) { - if (GetCycleCountFast() >= (start + SpinThresholdCycles)) { - doSpin = false; - break; - } - for (ui32 i = 0; i < 12; ++i) { - if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) { - SpinLockPause(); - } else { - doSpin = false; - break; - } - } - } - if (!doSpin) { - break; - } - if (RelaxedLoad(&StopFlag)) { - break; - } - } - // then - sleep - if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) { - if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED, TThreadCtx::WS_ACTIVE)) { - do { - hpnow = GetCycleCountFast(); - elapsed += hpnow - hpstart; - if (threadCtx.Pad.Park()) // interrupted - return 0; - hpstart = GetCycleCountFast(); - parked += hpstart - hpnow; - } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED); - } - } - } else { - AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED); - do { - hpnow = GetCycleCountFast(); - elapsed += hpnow - hpstart; - if (threadCtx.Pad.Park()) // interrupted - return 0; - hpstart = GetCycleCountFast(); - parked += hpstart - hpnow; - } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED); + semaphore.OldSemaphore--; + if (needToWait) { + semaphore.CurrentSleepThreadCount++; } - Y_VERIFY_DEBUG(AtomicLoad(&StopFlag) || AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_RUNNING); + x = AtomicGetAndCas(&Semaphore, semaphore.ConverToI64(), x); + if (x == oldX) { + break; + } + } while (!StopFlag); -#if defined ACTORSLIB_COLLECT_EXEC_STATS - if (AtomicDecrement(ThreadUtilization) == 0) { - // When we started sleeping counter contained t1-t0, or the - // last duration of max utilization. Now we subtract t2 >= t1, - // which turns counter negative again, and the next sleep cycle - // at timestamp t3 would be adding some new duration t3-t2. - // If the counter was positive and becomes negative that means - // there are no current races with other threads and we should - // store the last positive duration we observed. Multiple - // threads may be adding and subtracting values in potentially - // arbitrary order, which would cause counter to oscillate - // around zero. When it crosses zero is a good indication of a - // correct value. - const i64 t = GetCycleCountFast(); - const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, -t); - if (x > 0 && x - t < 0) - AtomicStore(&MaxUtilizationAccumulator, x); + if (needToWait) { + if (GoToWaiting(threadCtx, timers, needToBlock)) { // interrupted + return 0; } -#endif } else { AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING); } @@ -183,14 +257,14 @@ namespace NActors { // ok, has work suggested, must dequeue while (!RelaxedLoad(&StopFlag)) { if (const ui32 activation = Activations.Pop(++revolvingCounter)) { - hpnow = GetCycleCountFast(); - elapsed += hpnow - hpstart; - wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, elapsed); - if (parked > 0) { - wctx.AddParkedCycles(parked); + timers.HPNow = GetCycleCountFast(); + timers.Elapsed += timers.HPNow - timers.HPStart; + wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timers.Elapsed); + if (timers.Parked > 0) { + wctx.AddParkedCycles(timers.Parked); } - if (blocked > 0) { - wctx.AddBlockedCycles(blocked); + if (timers.Blocked > 0) { + wctx.AddBlockedCycles(timers.Blocked); } return activation; } @@ -201,22 +275,26 @@ namespace NActors { return 0; } - inline void TBasicExecutorPool::WakeUpLoop() { - for (ui32 i = 0;;) { - TThreadCtx& threadCtx = Threads[i % PoolThreads]; - switch (AtomicLoad(&threadCtx.WaitingFlag)) { + inline void TBasicExecutorPool::WakeUpLoop(i16 currentThreadCount) { + for (i16 i = 0;;) { + TThreadCtx& threadCtx = Threads[i]; + TThreadCtx::EWaitState state = static_cast<TThreadCtx::EWaitState>(AtomicLoad(&threadCtx.WaitingFlag)); + switch (state) { case TThreadCtx::WS_NONE: case TThreadCtx::WS_RUNNING: - ++i; - break; - case TThreadCtx::WS_ACTIVE: // in active spin-lock, just set flag - if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_ACTIVE)) { - return; + if (++i >= MaxThreadCount) { + i = 0; } break; + case TThreadCtx::WS_ACTIVE: case TThreadCtx::WS_BLOCKED: - if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_BLOCKED)) { - threadCtx.Pad.Unpark(); + if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, state)) { + if (state == TThreadCtx::WS_BLOCKED) { + threadCtx.Pad.Unpark(); + } + if (i >= currentThreadCount) { + AtomicIncrement(WrongWakenedThreadCount); + } return; } break; @@ -228,14 +306,42 @@ namespace NActors { void TBasicExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) { Activations.Push(activation, revolvingCounter); - const TAtomic x = AtomicIncrement(Semaphore); - if (x <= 0) { // we must find someone to wake-up - WakeUpLoop(); + bool needToWakeUp = false; + + TAtomic x = AtomicGet(Semaphore); + TSemaphore semaphore = TSemaphore::GetSemaphore(x); + do { + needToWakeUp = semaphore.CurrentSleepThreadCount > 0; + i64 oldX = semaphore.ConverToI64(); + semaphore.OldSemaphore++; + if (needToWakeUp) { + semaphore.CurrentSleepThreadCount--; + } + x = AtomicGetAndCas(&Semaphore, semaphore.ConverToI64(), oldX); + if (x == oldX) { + break; + } + semaphore = TSemaphore::GetSemaphore(x); + } while (true); + + if (needToWakeUp) { // we must find someone to wake-up + WakeUpLoop(semaphore.CurrentThreadCount); } } void TBasicExecutorPool::GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const { poolStats.MaxUtilizationTime = RelaxedLoad(&MaxUtilizationAccumulator) / (i64)(NHPTimer::GetCyclesPerSecond() / 1000); + poolStats.WrongWakenedThreadCount = RelaxedLoad(&WrongWakenedThreadCount); + poolStats.CurrentThreadCount = RelaxedLoad(&ThreadCount); + if (Harmonizer) { + TPoolHarmonizedStats stats = Harmonizer->GetPoolStats(PoolId); + poolStats.IsNeedy = stats.IsNeedy; + poolStats.IsStarved = stats.IsStarved; + poolStats.IsHoggish = stats.IsHoggish; + poolStats.IncreasingThreadsByNeedyState = stats.IncreasingThreadsByNeedyState; + poolStats.DecreasingThreadsByStarvedState = stats.DecreasingThreadsByStarvedState; + poolStats.DecreasingThreadsByHoggishState = stats.DecreasingThreadsByHoggishState; + } statsCopy.resize(PoolThreads + 1); // Save counters from the pool object @@ -345,87 +451,71 @@ namespace NActors { with_lock (ChangeThreadsLock) { size_t prevCount = GetThreadCount(); AtomicSet(ThreadCount, threads); - if (prevCount < threads) { - for (size_t i = prevCount; i < threads; ++i) { - bool repeat = true; - while (repeat) { - switch (AtomicGet(Threads[i].BlockedFlag)) { - case TThreadCtx::BS_BLOCKING: - if (AtomicCas(&Threads[i].BlockedFlag, TThreadCtx::BS_NONE, TThreadCtx::BS_BLOCKING)) { - // thread not entry to blocked loop - repeat = false; - } - break; - case TThreadCtx::BS_BLOCKED: - // thread entry to blocked loop and we wake it - AtomicSet(Threads[i].BlockedFlag, TThreadCtx::BS_NONE); - Threads[i].BlockedPad.Unpark(); - repeat = false; - break; - default: - // thread mustn't has TThreadCtx::BS_NONE because last time it was started to block - Y_FAIL("BlockedFlag is not TThreadCtx::BS_BLOCKING and TThreadCtx::BS_BLOCKED when thread was waked up"); - } - } - } - } else if (prevCount > threads) { - // at first, start to block - for (size_t i = threads; i < prevCount; ++i) { - Y_VERIFY(AtomicGet(Threads[i].BlockedFlag) == TThreadCtx::BS_NONE); - AtomicSet(Threads[i].BlockedFlag, TThreadCtx::BS_BLOCKING); - } - // after check need to wake up threads - for (size_t idx = threads; idx < prevCount; ++idx) { - TThreadCtx& threadCtx = Threads[idx]; - auto waitingFlag = AtomicGet(threadCtx.WaitingFlag); - auto blockedFlag = AtomicGet(threadCtx.BlockedFlag); - // while thread has this states (WS_NONE and BS_BLOCKING) we can't guess which way thread will go. - // Either go to sleep and it will have to wake up, - // or go to execute task and after completion will be blocked. - while (waitingFlag == TThreadCtx::WS_NONE && blockedFlag == TThreadCtx::BS_BLOCKING) { - waitingFlag = AtomicGet(threadCtx.WaitingFlag); - blockedFlag = AtomicGet(threadCtx.BlockedFlag); - } - // next states: - // 1) WS_ACTIVE BS_BLOCKING - waiting and start spinig | need wake up to block - // 2) WS_BLOCKED BS_BLOCKING - waiting and start sleep | need wake up to block - // 3) WS_RUNNING BS_BLOCKING - start execute | not need wake up, will block after executing - // 4) WS_NONE BS_BLOCKED - blocked | not need wake up, already blocked - - if (waitingFlag == TThreadCtx::WS_ACTIVE || waitingFlag == TThreadCtx::WS_BLOCKED) { - // need wake up - Y_VERIFY(blockedFlag == TThreadCtx::BS_BLOCKING); - - // creaty empty mailBoxHint, where LineIndex == 1 and LineHint == 0, and activations will be ignored - constexpr auto emptyMailBoxHint = TMailboxTable::LineIndexMask & -TMailboxTable::LineIndexMask; - ui64 revolvingCounter = AtomicGet(ActivationsRevolvingCounter); - - Activations.Push(emptyMailBoxHint, revolvingCounter); - - auto x = AtomicIncrement(Semaphore); - if (x <= 0) { - // try wake up. if success then go to next thread - switch (waitingFlag){ - case TThreadCtx::WS_ACTIVE: // in active spin-lock, just set flag - if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_ACTIVE)) { - continue; - } - break; - case TThreadCtx::WS_BLOCKED: - if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_BLOCKED)) { - threadCtx.Pad.Unpark(); - continue; - } - break; - default: - ; // other thread woke this sleeping thread - } - // if thread has already been awakened then we must awaken the other - WakeUpLoop(); - } - } - } + TSemaphore semaphore = TSemaphore::GetSemaphore(AtomicGet(Semaphore)); + i64 oldX = semaphore.ConverToI64(); + semaphore.CurrentThreadCount = threads; + if (threads > prevCount) { + semaphore.CurrentSleepThreadCount += (i64)threads - prevCount; + semaphore.OldSemaphore -= (i64)threads - prevCount; + } else { + semaphore.CurrentSleepThreadCount -= (i64)prevCount - threads; + semaphore.OldSemaphore += prevCount - threads; } + AtomicAdd(Semaphore, semaphore.ConverToI64() - oldX); + LWPROBE(ThreadCount, PoolId, PoolName, threads, MinThreadCount, MaxThreadCount, DefaultThreadCount); + } + } + + i16 TBasicExecutorPool::GetDefaultThreadCount() const { + return DefaultThreadCount; + } + + i16 TBasicExecutorPool::GetMinThreadCount() const { + return MinThreadCount; + } + + i16 TBasicExecutorPool::GetMaxThreadCount() const { + return MaxThreadCount; + } + + bool TBasicExecutorPool::IsThreadBeingStopped(i16 threadIdx) const { + if ((ui32)threadIdx >= PoolThreads) { + return false; + } + auto blockedFlag = AtomicGet(Threads[threadIdx].BlockedFlag); + if (blockedFlag == TThreadCtx::BS_BLOCKING) { + return true; + } + return false; + } + + double TBasicExecutorPool::GetThreadConsumedUs(i16 threadIdx) { + if ((ui32)threadIdx >= PoolThreads) { + return 0; + } + TThreadCtx& threadCtx = Threads[threadIdx]; + TExecutorThreadStats stats; + threadCtx.Thread->GetCurrentStats(stats); + return Ts2Us(stats.ElapsedTicks); + } + + double TBasicExecutorPool::GetThreadBookedUs(i16 threadIdx) { + if ((ui32)threadIdx >= PoolThreads) { + return 0; } + TThreadCtx& threadCtx = Threads[threadIdx]; + TExecutorThreadStats stats; + threadCtx.Thread->GetCurrentStats(stats); + return stats.CpuNs / 1000.0; + } + + i16 TBasicExecutorPool::GetBlockingThreadCount() const { + TAtomic x = AtomicGet(Semaphore); + TSemaphore semaphore = TSemaphore::GetSemaphore(x); + return -Min<i16>(semaphore.CurrentSleepThreadCount, 0); + } + + i16 TBasicExecutorPool::GetPriority() const { + return Priority; } } diff --git a/library/cpp/actors/core/executor_pool_basic.h b/library/cpp/actors/core/executor_pool_basic.h index 023190f7fe..cd94a998f1 100644 --- a/library/cpp/actors/core/executor_pool_basic.h +++ b/library/cpp/actors/core/executor_pool_basic.h @@ -4,6 +4,7 @@ #include "executor_thread.h" #include "scheduler_queue.h" #include "executor_pool_base.h" +#include "harmonizer.h" #include <library/cpp/actors/util/unordered_cache.h> #include <library/cpp/actors/util/threadparkpad.h> #include <library/cpp/monlib/dynamic_counters/counters.h> @@ -45,6 +46,14 @@ namespace NActors { } }; + struct TTimers { + NHPTimer::STime Elapsed = 0; + NHPTimer::STime Parked = 0; + NHPTimer::STime Blocked = 0; + NHPTimer::STime HPStart = GetCycleCountFast(); + NHPTimer::STime HPNow; + }; + const ui64 SpinThreshold; const ui64 SpinThresholdCycles; @@ -62,11 +71,42 @@ namespace NActors { TAtomic ThreadUtilization; TAtomic MaxUtilizationCounter; TAtomic MaxUtilizationAccumulator; + TAtomic WrongWakenedThreadCount; TAtomic ThreadCount; TMutex ChangeThreadsLock; + i16 MinThreadCount; + i16 MaxThreadCount; + i16 DefaultThreadCount; + IHarmonizer *Harmonizer; + + const i16 Priority = 0; + public: + struct TSemaphore { + i64 OldSemaphore = 0; // 34 bits + // Sign bit + i16 CurrentSleepThreadCount = 0; // 14 bits + // Sign bit + i16 CurrentThreadCount = 0; // 14 bits + + inline i64 ConverToI64() { + i64 value = (1ll << 34) + OldSemaphore; + return value + | (((i64)CurrentSleepThreadCount + (1 << 14)) << 35) + | ((i64)CurrentThreadCount << 50); + } + + static inline TSemaphore GetSemaphore(i64 value) { + TSemaphore semaphore; + semaphore.OldSemaphore = (value & 0x7ffffffffll) - (1ll << 34); + semaphore.CurrentSleepThreadCount = ((value >> 35) & 0x7fff) - (1 << 14); + semaphore.CurrentThreadCount = (value >> 50) & 0x3fff; + return semaphore; + } + }; + static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TBasicExecutorPoolConfig::DEFAULT_TIME_PER_MAILBOX; static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX; @@ -74,12 +114,17 @@ namespace NActors { ui32 threads, ui64 spinThreshold, const TString& poolName = "", + IHarmonizer *harmonizer = nullptr, TAffinity* affinity = nullptr, TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX, ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX, int realtimePriority = 0, - ui32 maxActivityType = 1); - explicit TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg); + ui32 maxActivityType = 1, + i16 minThreadCount = 0, + i16 maxThreadCount = 0, + i16 defaultThreadCount = 0, + i16 priority = 0); + explicit TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg, IHarmonizer *harmonizer); ~TBasicExecutorPool(); ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override; @@ -102,10 +147,22 @@ namespace NActors { void SetRealTimeMode() const override; - ui32 GetThreadCount() const; - void SetThreadCount(ui32 threads); + ui32 GetThreadCount() const override; + void SetThreadCount(ui32 threads) override; + i16 GetDefaultThreadCount() const override; + i16 GetMinThreadCount() const override; + i16 GetMaxThreadCount() const override; + bool IsThreadBeingStopped(i16 threadIdx) const override; + double GetThreadConsumedUs(i16 threadIdx) override; + double GetThreadBookedUs(i16 threadIdx) override; + i16 GetBlockingThreadCount() const override; + i16 GetPriority() const override; private: - void WakeUpLoop(); + void WakeUpLoop(i16 currentThreadCount); + bool GoToWaiting(TThreadCtx& threadCtx, TTimers &timers, bool needToBlock); + void GoToSpin(TThreadCtx& threadCtx); + bool GoToSleep(TThreadCtx& threadCtx, TTimers &timers); + bool GoToBeBlocked(TThreadCtx& threadCtx, TTimers &timers); }; } diff --git a/library/cpp/actors/core/executor_pool_basic_ut.cpp b/library/cpp/actors/core/executor_pool_basic_ut.cpp index 76dff693af..067fb30a1c 100644 --- a/library/cpp/actors/core/executor_pool_basic_ut.cpp +++ b/library/cpp/actors/core/executor_pool_basic_ut.cpp @@ -6,10 +6,14 @@ #include <library/cpp/actors/util/should_continue.h> #include <library/cpp/testing/unittest/registar.h> -#include <library/cpp/actors/protos/unittests.pb.h> using namespace NActors; +#define VALUES_EQUAL(a, b, ...) \ + UNIT_ASSERT_VALUES_EQUAL_C((a), (b), (i64)semaphore.OldSemaphore \ + << ' ' << (i64)semaphore.CurrentSleepThreadCount \ + << ' ' << (i64)semaphore.CurrentThreadCount __VA_ARGS__); + //////////////////////////////////////////////////////////////////////////////// struct TEvMsg : public NActors::TEventBase<TEvMsg, 10347> { @@ -90,138 +94,59 @@ THolder<TActorSystemSetup> GetActorSystemSetup(TBasicExecutorPool* pool) Y_UNIT_TEST_SUITE(BasicExecutorPool) { - Y_UNIT_TEST(DecreaseIncreaseThreadsCount) { - const size_t msgCount = 1e4; - const size_t size = 4; - const size_t halfSize = size / 2; - TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); + Y_UNIT_TEST(Semaphore) { + TBasicExecutorPool::TSemaphore semaphore; + semaphore = TBasicExecutorPool::TSemaphore::GetSemaphore(0); - auto setup = GetActorSystemSetup(executorPool); - TActorSystem actorSystem(setup); - actorSystem.Start(); + VALUES_EQUAL(0, semaphore.ConverToI64()); + semaphore = TBasicExecutorPool::TSemaphore::GetSemaphore(-1); + VALUES_EQUAL(-1, semaphore.ConverToI64()); + semaphore = TBasicExecutorPool::TSemaphore::GetSemaphore(1); + VALUES_EQUAL(1, semaphore.ConverToI64()); - executorPool->SetThreadCount(halfSize); - TTestSenderActor* actors[size]; - TActorId actorIds[size]; - for (size_t i = 0; i < size; ++i) { - actors[i] = new TTestSenderActor(); - actorIds[i] = actorSystem.Register(actors[i]); + for (i64 value = -1'000'000; value <= 1'000'000; ++value) { + VALUES_EQUAL(TBasicExecutorPool::TSemaphore::GetSemaphore(value).ConverToI64(), value); } - const int testCount = 2; - - TExecutorPoolStats poolStats[testCount]; - TVector<TExecutorThreadStats> statsCopy[testCount]; - - for (size_t testIdx = 0; testIdx < testCount; ++testIdx) { - for (size_t i = 0; i < size; ++i) { - actors[i]->Start(actors[i]->SelfId(), msgCount); + for (i8 sleepThreads = -10; sleepThreads <= 10; ++sleepThreads) { + + semaphore = TBasicExecutorPool::TSemaphore(); + semaphore.CurrentSleepThreadCount = sleepThreads; + i64 initialValue = semaphore.ConverToI64(); + + semaphore = TBasicExecutorPool::TSemaphore::GetSemaphore(initialValue - 1); + VALUES_EQUAL(-1, semaphore.OldSemaphore); + + i64 value = initialValue; + value -= 100; + for (i32 expected = -100; expected <= 100; ++expected) { + semaphore = TBasicExecutorPool::TSemaphore::GetSemaphore(value); + UNIT_ASSERT_VALUES_EQUAL_C(expected, semaphore.OldSemaphore, (i64)semaphore.OldSemaphore + << ' ' << (i64)semaphore.CurrentSleepThreadCount + << ' ' << (i64)semaphore.CurrentThreadCount); + UNIT_ASSERT_VALUES_EQUAL_C(sleepThreads, semaphore.CurrentSleepThreadCount, (i64)semaphore.OldSemaphore + << ' ' << (i64)semaphore.CurrentSleepThreadCount + << ' ' << (i64)semaphore.CurrentThreadCount); + semaphore = TBasicExecutorPool::TSemaphore(); + semaphore.OldSemaphore = expected; + semaphore.CurrentSleepThreadCount = sleepThreads; + UNIT_ASSERT_VALUES_EQUAL(semaphore.ConverToI64(), value); + value++; } - for (size_t i = 0; i < size; ++i) { - actorSystem.Send(actorIds[i], new TEvMsg()); - } - - Sleep(TDuration::MilliSeconds(100)); - for (size_t i = 0; i < size; ++i) { - actors[i]->Stop(); + for (i32 expected = 101; expected >= -101; --expected) { + semaphore = TBasicExecutorPool::TSemaphore::GetSemaphore(value); + UNIT_ASSERT_VALUES_EQUAL_C(expected, semaphore.OldSemaphore, (i64)semaphore.OldSemaphore + << ' ' << (i64)semaphore.CurrentSleepThreadCount + << ' ' << (i64)semaphore.CurrentThreadCount); + UNIT_ASSERT_VALUES_EQUAL_C(sleepThreads, semaphore.CurrentSleepThreadCount, (i64)semaphore.OldSemaphore + << ' ' << (i64)semaphore.CurrentSleepThreadCount + << ' ' << (i64)semaphore.CurrentThreadCount); + value--; } - - executorPool->GetCurrentStats(poolStats[testIdx], statsCopy[testIdx]); } - for (size_t i = 1; i <= halfSize; ++i) { - UNIT_ASSERT_UNEQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents); - } - - for (size_t i = halfSize + 1; i <= size; ++i) { - UNIT_ASSERT_EQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents); - } - - executorPool->SetThreadCount(size); - - for (size_t testIdx = 0; testIdx < testCount; ++testIdx) { - for (size_t i = 0; i < size; ++i) { - actors[i]->Start(actors[i]->SelfId(), msgCount); - } - for (size_t i = 0; i < size; ++i) { - actorSystem.Send(actorIds[i], new TEvMsg()); - } - - Sleep(TDuration::MilliSeconds(100)); - - for (size_t i = 0; i < size; ++i) { - actors[i]->Stop(); - } - - executorPool->GetCurrentStats(poolStats[testIdx], statsCopy[testIdx]); - } - - for (size_t i = 1; i <= size; ++i) { - UNIT_ASSERT_UNEQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents); - } - } - - Y_UNIT_TEST(ChangeCount) { - const size_t msgCount = 1e3; - const size_t size = 4; - const size_t halfSize = size / 2; - TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); - - auto begin = TInstant::Now(); - - auto setup = GetActorSystemSetup(executorPool); - TActorSystem actorSystem(setup); - actorSystem.Start(); - executorPool->SetThreadCount(halfSize); - - TTestSenderActor* actors[size]; - TActorId actorIds[size]; - for (size_t i = 0; i < size; ++i) { - actors[i] = new TTestSenderActor(); - actorIds[i] = actorSystem.Register(actors[i]); - } - - for (size_t i = 0; i < size; ++i) { - actors[i]->Start(actorIds[i], msgCount); - } - for (size_t i = 0; i < size; ++i) { - actorSystem.Send(actorIds[i], new TEvMsg()); - } - - const i32 N = 6; - const i32 threadsCouns[N] = { 1, 3, 2, 3, 1, 4 }; - - ui64 counter = 0; - - TTestSenderActor* changerActor = new TTestSenderActor([&]{ - executorPool->SetThreadCount(threadsCouns[counter]); - counter++; - if (counter == N) { - counter = 0; - } - }); - TActorId changerActorId = actorSystem.Register(changerActor); - changerActor->Start(changerActorId, msgCount); - actorSystem.Send(changerActorId, new TEvMsg()); - - while (true) { - size_t maxCounter = 0; - for (size_t i = 0; i < size; ++i) { - maxCounter = Max(maxCounter, actors[i]->GetCounter()); - } - - if (maxCounter == 0) { - break; - } - - auto now = TInstant::Now(); - UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter); - - Sleep(TDuration::MilliSeconds(1)); - } - - changerActor->Stop(); + //UNIT_ASSERT_VALUES_EQUAL_C(-1, TBasicExecutorPool::TSemaphore::GetSemaphore(value-1).OldSemaphore); } Y_UNIT_TEST(CheckCompleteOne) { @@ -433,3 +358,182 @@ Y_UNIT_TEST_SUITE(BasicExecutorPool) { UNIT_ASSERT_VALUES_EQUAL(stats[0].MailboxPushedOutBySoftPreemption, 0); } } + +Y_UNIT_TEST_SUITE(ChangingThreadsCountInBasicExecutorPool) { + + struct TMockState { + void ActorDo() {} + }; + + struct TTestActors { + const size_t Count; + TArrayHolder<TTestSenderActor*> Actors; + TArrayHolder<TActorId> ActorIds; + + TTestActors(size_t count) + : Count(count) + , Actors(new TTestSenderActor*[count]) + , ActorIds(new TActorId[count]) + { } + + void Start(TActorSystem &actorSystem, size_t msgCount) { + for (size_t i = 0; i < Count; ++i) { + Actors[i]->Start(Actors[i]->SelfId(), msgCount); + } + for (size_t i = 0; i < Count; ++i) { + actorSystem.Send(ActorIds[i], new TEvMsg()); + } + } + + void Stop() { + for (size_t i = 0; i < Count; ++i) { + Actors[i]->Stop(); + } + } + }; + + template <typename TState = TMockState> + struct TTestCtx { + const size_t MaxThreadCount; + const size_t SendingMessageCount; + std::unique_ptr<TBasicExecutorPool> ExecutorPool; + THolder<TActorSystemSetup> Setup; + TActorSystem ActorSystem; + + TState State; + + TTestCtx(size_t maxThreadCount, size_t sendingMessageCount) + : MaxThreadCount(maxThreadCount) + , SendingMessageCount(sendingMessageCount) + , ExecutorPool(new TBasicExecutorPool(0, MaxThreadCount, 50)) + , Setup(GetActorSystemSetup(ExecutorPool.get())) + , ActorSystem(Setup) + { + } + + TTestCtx(size_t maxThreadCount, size_t sendingMessageCount, const TState &state) + : MaxThreadCount(maxThreadCount) + , SendingMessageCount(sendingMessageCount) + , ExecutorPool(new TBasicExecutorPool(0, MaxThreadCount, 50)) + , Setup(GetActorSystemSetup(ExecutorPool.get())) + , ActorSystem(Setup) + , State(state) + { + } + + ~TTestCtx() { + ExecutorPool.release(); + } + + TTestActors RegisterCheckActors(size_t actorCount) { + TTestActors res(actorCount); + for (size_t i = 0; i < actorCount; ++i) { + res.Actors[i] = new TTestSenderActor([&] { + State.ActorDo(); + }); + res.ActorIds[i] = ActorSystem.Register(res.Actors[i]); + } + return res; + } + }; + + struct TCheckingInFlightState { + TAtomic ExpectedMaximum = 0; + TAtomic CurrentInFlight = 0; + + void ActorStartProcessing() { + ui32 inFlight = AtomicIncrement(CurrentInFlight); + ui32 maximum = AtomicGet(ExpectedMaximum); + if (maximum) { + UNIT_ASSERT_C(inFlight <= maximum, "inFlight# " << inFlight << " maximum# " << maximum); + } + } + + void ActorStopProcessing() { + AtomicDecrement(CurrentInFlight); + } + + void ActorDo() { + ActorStartProcessing(); + NanoSleep(1'000'000); + ActorStopProcessing(); + } + }; + + Y_UNIT_TEST(DecreaseIncreaseThreadCount) { + const size_t msgCount = 1e2; + const size_t size = 4; + const size_t testCount = 2; + TTestCtx<TCheckingInFlightState> ctx(size, msgCount); + ctx.ActorSystem.Start(); + + TVector<TExecutorThreadStats> statsCopy[testCount]; + + TTestActors testActors = ctx.RegisterCheckActors(size); + + const size_t N = 6; + const size_t threadsCounts[N] = { 1, 3, 2, 3, 1, 4 }; + for (ui32 idx = 0; idx < 4 * N; ++idx) { + size_t currentThreadCount = threadsCounts[idx]; + ctx.ExecutorPool->SetThreadCount(currentThreadCount); + AtomicSet(ctx.State.ExpectedMaximum, currentThreadCount); + + for (size_t testIdx = 0; testIdx < testCount; ++testIdx) { + testActors.Start(ctx.ActorSystem, msgCount); + Sleep(TDuration::MilliSeconds(100)); + testActors.Stop(); + } + Sleep(TDuration::MilliSeconds(10)); + } + ctx.ActorSystem.Stop(); + } + + Y_UNIT_TEST(ContiniousChangingThreadCount) { + const size_t msgCount = 1e2; + const size_t size = 4; + + auto begin = TInstant::Now(); + TTestCtx<TCheckingInFlightState> ctx(size, msgCount, TCheckingInFlightState{msgCount}); + ctx.ActorSystem.Start(); + TTestActors testActors = ctx.RegisterCheckActors(size); + + testActors.Start(ctx.ActorSystem, msgCount); + + const size_t N = 6; + const size_t threadsCouns[N] = { 1, 3, 2, 3, 1, 4 }; + + ui64 counter = 0; + + TTestSenderActor* changerActor = new TTestSenderActor([&]{ + ctx.State.ActorStartProcessing(); + AtomicSet(ctx.State.ExpectedMaximum, 0); + ctx.ExecutorPool->SetThreadCount(threadsCouns[counter]); + NanoSleep(10'000'000); + AtomicSet(ctx.State.ExpectedMaximum, threadsCouns[counter]); + counter++; + if (counter == N) { + counter = 0; + } + ctx.State.ActorStopProcessing(); + }); + TActorId changerActorId = ctx.ActorSystem.Register(changerActor); + changerActor->Start(changerActorId, msgCount); + ctx.ActorSystem.Send(changerActorId, new TEvMsg()); + + while (true) { + size_t maxCounter = 0; + for (size_t i = 0; i < size; ++i) { + maxCounter = Max(maxCounter, testActors.Actors[i]->GetCounter()); + } + if (maxCounter == 0) { + break; + } + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter); + Sleep(TDuration::MilliSeconds(1)); + } + + changerActor->Stop(); + ctx.ActorSystem.Stop(); + } +} diff --git a/library/cpp/actors/core/executor_pool_united.cpp b/library/cpp/actors/core/executor_pool_united.cpp index dac6245635..a2cb269280 100644 --- a/library/cpp/actors/core/executor_pool_united.cpp +++ b/library/cpp/actors/core/executor_pool_united.cpp @@ -7,6 +7,7 @@ #include "mailbox.h" #include "scheduler_queue.h" #include <library/cpp/actors/util/affinity.h> +#include <library/cpp/actors/util/cpu_load_log.h> #include <library/cpp/actors/util/datetime.h> #include <library/cpp/actors/util/futex.h> #include <library/cpp/actors/util/intrinsics.h> @@ -953,6 +954,8 @@ namespace NActors { // Thread-safe per pool stats // NOTE: It's guaranteed that cpu never executes two instance of the same pool TVector<TExecutorThreadStats> PoolStats; + TCpuLoadLog<1024> LoadLog; + // Configuration TCpuId CpuId; @@ -1000,7 +1003,9 @@ namespace NActors { } bool ActiveWait(ui64 spinThresholdTs, TPoolId& result) { - ui64 deadline = GetCycleCountFast() + spinThresholdTs; + ui64 ts = GetCycleCountFast(); + LoadLog.RegisterBusyPeriod(ts); + ui64 deadline = ts + spinThresholdTs; while (GetCycleCountFast() < deadline) { for (ui32 i = 0; i < 12; ++i) { TPoolId current = State.CurrentPool(); @@ -1008,6 +1013,7 @@ namespace NActors { SpinLockPause(); } else { result = current; + LoadLog.RegisterIdlePeriod(GetCycleCountFast()); return true; // wakeup } } @@ -1269,15 +1275,25 @@ namespace NActors { if (Pools[pool].IsUnited()) { ui64 ElapsedTs = 0; ui64 ParkedTs = 0; + TStackVec<TCpuLoadLog<1024>*, 128> logs; + ui64 worstActivationTimeUs = 0; for (TCpu* cpu : Pools[pool].WakeOrderCpus) { - const TExecutorThreadStats& cpuStats = cpu->PoolStats[pool]; + TExecutorThreadStats& cpuStats = cpu->PoolStats[pool]; ElapsedTs += cpuStats.ElapsedTicks; ParkedTs += cpuStats.ParkedTicks; + worstActivationTimeUs = Max(worstActivationTimeUs, cpuStats.WorstActivationTimeUs); + cpuStats.WorstActivationTimeUs = 0; + logs.push_back(&cpu->LoadLog); } + ui64 minPeriodTs = Min(ui64(Us2Ts(Balancer->GetPeriodUs())), ui64((1024ull-2ull)*64ull*128ull*1024ull)); + ui64 estimatedTs = MinusOneCpuEstimator.MaxLatencyIncreaseWithOneLessCpu( + &logs[0], logs.size(), ts, minPeriodTs); TBalancerStats stats; stats.Ts = ts; stats.CpuUs = Ts2Us(ElapsedTs); stats.IdleUs = Ts2Us(ParkedTs); + stats.ExpectedLatencyIncreaseUs = Ts2Us(estimatedTs); + stats.WorstActivationTimeUs = worstActivationTimeUs; Balancer->SetPoolStats(pool, stats); } } @@ -1332,11 +1348,13 @@ namespace NActors { return result; } wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed()); + cpu.LoadLog.RegisterBusyPeriod(GetCycleCountFast()); bool wakeup; do { wakeup = cpu.BlockedWait(result, Config.Balancer.PeriodUs * 1000); wctx.AddParkedCycles(timeTracker.Elapsed()); } while (!wakeup); + cpu.LoadLog.RegisterIdlePeriod(GetCycleCountFast()); return result; } diff --git a/library/cpp/actors/core/executor_pool_united.h b/library/cpp/actors/core/executor_pool_united.h index a090ba2466..0895b06462 100644 --- a/library/cpp/actors/core/executor_pool_united.h +++ b/library/cpp/actors/core/executor_pool_united.h @@ -8,6 +8,7 @@ #include <library/cpp/actors/util/unordered_cache.h> #include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/actors/util/cpu_load_log.h> #include <library/cpp/actors/util/unordered_cache.h> #include <library/cpp/containers/stack_vector/stack_vec.h> @@ -34,6 +35,7 @@ namespace NActors { TCpuAllocationConfig Allocation; volatile bool StopFlag = false; + TMinusOneCpuEstimator<1024> MinusOneCpuEstimator; public: TUnitedWorkers( diff --git a/library/cpp/actors/core/executor_pool_united_ut.cpp b/library/cpp/actors/core/executor_pool_united_ut.cpp index a1595d8588..88b04e6472 100644 --- a/library/cpp/actors/core/executor_pool_united_ut.cpp +++ b/library/cpp/actors/core/executor_pool_united_ut.cpp @@ -111,6 +111,10 @@ struct TRoundRobinBalancer: public IBalancer { State->Load(assigned, current); State->AssignPool(NextPool[assigned]); } + + ui64 GetPeriodUs() override { + return 1000; + } }; void AddUnitedPool(THolder<TActorSystemSetup>& setup, ui32 concurrency = 0) { diff --git a/library/cpp/actors/core/executor_thread.cpp b/library/cpp/actors/core/executor_thread.cpp index 446b651efd..4271dadab2 100644 --- a/library/cpp/actors/core/executor_thread.cpp +++ b/library/cpp/actors/core/executor_thread.cpp @@ -87,7 +87,7 @@ namespace NActors { } template <class T> - inline TString SafeTypeName(T* t) { + inline TString SafeTypeName(const T* t) { if (t == nullptr) { return "nullptr"; } @@ -98,11 +98,7 @@ namespace NActors { } } - inline TString ActorTypeName(const IActor* actor, ui32 activityType) { - return actor ? SafeTypeName(actor) : ("activityType_" + ToString(activityType) + " (destroyed)"); - } - - inline void LwTraceSlowDelivery(IEventHandle* ev, const IActor* actor, ui32 poolId, const TActorId& currentRecipient, + inline void LwTraceSlowDelivery(IEventHandle* ev, const std::type_info* actorType, ui32 poolId, const TActorId& currentRecipient, double delivMs, double sinceActivationMs, ui32 eventsExecutedBefore) { const auto baseEv = (ev && ev->HasEvent()) ? ev->GetBase() : nullptr; LWPROBE(EventSlowDelivery, @@ -112,10 +108,10 @@ namespace NActors { eventsExecutedBefore, baseEv ? SafeTypeName(baseEv) : (ev ? ToString(ev->Type) : TString("nullptr")), currentRecipient.ToString(), - SafeTypeName(actor)); + SafeTypeName(actorType)); } - inline void LwTraceSlowEvent(IEventHandle* ev, ui32 evTypeForTracing, const IActor* actor, ui32 poolId, ui32 activityType, + inline void LwTraceSlowEvent(IEventHandle* ev, ui32 evTypeForTracing, const std::type_info* actorType, ui32 poolId, const TActorId& currentRecipient, double eventMs) { // Event could have been destroyed by actor->Receive(); const auto baseEv = (ev && ev->HasEvent()) ? ev->GetBase() : nullptr; @@ -124,7 +120,7 @@ namespace NActors { eventMs, baseEv ? SafeTypeName(baseEv) : ToString(evTypeForTracing), currentRecipient.ToString(), - ActorTypeName(actor, activityType)); + SafeTypeName(actorType)); } template <typename TMailbox> @@ -137,6 +133,7 @@ namespace NActors { NHPTimer::STime hpprev = hpstart; IActor* actor = nullptr; + const std::type_info* actorType = nullptr; ui32 prevActivityType = std::numeric_limits<ui32>::max(); TActorId recipient; for (ui32 executed = 0; executed < Ctx.EventsPerMailbox; ++executed) { @@ -148,6 +145,9 @@ namespace NActors { TActorContext ctx(*mailbox, *this, hpprev, recipient); TlsActivationContext = &ctx; + // Since actor is not null there should be no exceptions + actorType = &typeid(*actor); + #ifdef USE_ACTOR_CALLSTACK TCallstack::GetTlsCallstack() = ev->Callstack; TCallstack::GetTlsCallstack().SetLinesToSkip(); @@ -165,7 +165,7 @@ namespace NActors { i64 usecDeliv = Ctx.AddEventDeliveryStats(ev->SendTime, hpprev); if (usecDeliv > 5000) { double sinceActivationMs = NHPTimer::GetSeconds(hpprev - hpstart) * 1000.0; - LwTraceSlowDelivery(ev.Get(), actor, Ctx.PoolId, CurrentRecipient, NHPTimer::GetSeconds(hpprev - ev->SendTime) * 1000.0, sinceActivationMs, executed); + LwTraceSlowDelivery(ev.Get(), actorType, Ctx.PoolId, CurrentRecipient, NHPTimer::GetSeconds(hpprev - ev->SendTime) * 1000.0, sinceActivationMs, executed); } ui32 evTypeForTracing = ev->Type; @@ -191,7 +191,7 @@ namespace NActors { hpnow = GetCycleCountFast(); NHPTimer::STime elapsed = Ctx.AddEventProcessingStats(hpprev, hpnow, activityType, CurrentActorScheduledEventsCounter); if (elapsed > 1000000) { - LwTraceSlowEvent(ev.Get(), evTypeForTracing, actor, Ctx.PoolId, activityType, CurrentRecipient, NHPTimer::GetSeconds(elapsed) * 1000.0); + LwTraceSlowEvent(ev.Get(), evTypeForTracing, actorType, Ctx.PoolId, CurrentRecipient, NHPTimer::GetSeconds(elapsed) * 1000.0); } // The actor might have been destroyed @@ -200,6 +200,8 @@ namespace NActors { CurrentRecipient = TActorId(); } else { + actorType = nullptr; + TAutoPtr<IEventHandle> nonDelivered = ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown); if (nonDelivered.Get()) { ActorSystem->Send(nonDelivered); @@ -223,7 +225,7 @@ namespace NActors { CyclesToDuration(hpnow - hpstart), Ctx.WorkerId, recipient.ToString(), - SafeTypeName(actor)); + SafeTypeName(actorType)); break; } @@ -239,7 +241,7 @@ namespace NActors { CyclesToDuration(hpnow - hpstart), Ctx.WorkerId, recipient.ToString(), - SafeTypeName(actor)); + SafeTypeName(actorType)); break; } @@ -254,7 +256,7 @@ namespace NActors { CyclesToDuration(hpnow - hpstart), Ctx.WorkerId, recipient.ToString(), - SafeTypeName(actor)); + SafeTypeName(actorType)); break; } } else { diff --git a/library/cpp/actors/core/harmonizer.cpp b/library/cpp/actors/core/harmonizer.cpp new file mode 100644 index 0000000000..f318d8909c --- /dev/null +++ b/library/cpp/actors/core/harmonizer.cpp @@ -0,0 +1,431 @@ +#include "harmonizer.h" + +#include "probes.h" +#include "actorsystem.h" + +#include <library/cpp/actors/util/cpu_load_log.h> +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/util/intrinsics.h> + +#include <util/system/spinlock.h> + +#include <algorithm> + +namespace NActors { + +LWTRACE_USING(ACTORLIB_PROVIDER); + +constexpr bool CheckBinaryPower(ui64 value) { + return !(value & (value - 1)); +} + +struct TValueHistory { + static constexpr ui64 HistoryBufferSize = 8; + static_assert(CheckBinaryPower(HistoryBufferSize)); + + double History[HistoryBufferSize] = {0.0}; + ui64 HistoryIdx = 0; + ui64 LastTs = Max<ui64>(); + double LastUs = 0.0; + double AccumulatedUs = 0.0; + ui64 AccumulatedTs = 0; + + template <bool WithTail=false> + double GetAvgPartForLastSeconds(ui8 seconds) { + double sum = AccumulatedUs; + size_t idx = HistoryIdx; + ui8 leftSeconds = seconds; + do { + idx--; + leftSeconds--; + if (idx >= HistoryBufferSize) { + idx = HistoryBufferSize - 1; + } + if (WithTail || leftSeconds) { + sum += History[idx]; + } else { + ui64 tsInSecond = Us2Ts(1'000'000.0); + sum += History[idx] * (tsInSecond - AccumulatedTs) / tsInSecond; + } + } while (leftSeconds); + double duration = 1'000'000.0 * seconds + (WithTail ? Ts2Us(AccumulatedTs): 0.0); + double avg = sum / duration; + return avg; + } + + double GetAvgPart() { + return GetAvgPartForLastSeconds<true>(HistoryBufferSize); + } + + void Register(ui64 ts, double valueUs) { + if (ts < LastTs) { + LastTs = ts; + LastUs = valueUs; + AccumulatedUs = 0.0; + AccumulatedTs = 0; + return; + } + ui64 lastTs = std::exchange(LastTs, ts); + ui64 dTs = ts - lastTs; + double lastUs = std::exchange(LastUs, valueUs); + double dUs = valueUs - lastUs; + LWPROBE(RegisterValue, ts, lastTs, dTs, Us2Ts(8'000'000.0), valueUs, lastUs, dUs); + + if (dTs > Us2Ts(8'000'000.0)) { + dUs = dUs * 1'000'000.0 / Ts2Us(dTs); + for (size_t idx = 0; idx < HistoryBufferSize; ++idx) { + History[idx] = dUs; + } + AccumulatedUs = 0.0; + AccumulatedTs = 0; + return; + } + + while (dTs > 0) { + if (AccumulatedTs + dTs < Us2Ts(1'000'000.0)) { + AccumulatedTs += dTs; + AccumulatedUs += dUs; + break; + } else { + ui64 addTs = Us2Ts(1'000'000.0) - AccumulatedTs; + double addUs = dUs * addTs / dTs; + dTs -= addTs; + dUs -= addUs; + History[HistoryIdx] = AccumulatedUs + addUs; + HistoryIdx = (HistoryIdx + 1) % HistoryBufferSize; + AccumulatedUs = 0.0; + AccumulatedTs = 0; + } + } + } +}; + +struct TThreadInfo { + TValueHistory Consumed; + TValueHistory Booked; +}; + +struct TPoolInfo { + std::vector<TThreadInfo> ThreadInfo; + IExecutorPool* Pool = nullptr; + i16 DefaultThreadCount = 0; + i16 MinThreadCount = 0; + i16 MaxThreadCount = 0; + i16 Priority = 0; + NMonitoring::TDynamicCounters::TCounterPtr AvgPingCounter; + NMonitoring::TDynamicCounters::TCounterPtr AvgPingCounterWithSmallWindow; + ui32 MaxAvgPingUs = 0; + ui64 LastUpdateTs = 0; + + TAtomic LastFlags = 0; // 0 - isNeedy; 1 - isStarved; 2 - isHoggish + TAtomic IncreasingThreadsByNeedyState = 0; + TAtomic DecreasingThreadsByStarvedState = 0; + TAtomic DecreasingThreadsByHoggishState = 0; + + bool IsBeingStopped(i16 threadIdx); + double GetBooked(i16 threadIdx); + double GetlastSecondPoolBooked(i16 threadIdx); + double GetConsumed(i16 threadIdx); + double GetlastSecondPoolConsumed(i16 threadIdx); + void PullStats(ui64 ts); + i16 GetThreadCount(); + void SetThreadCount(i16 threadCount); + bool IsAvgPingGood(); +}; + +bool TPoolInfo::IsBeingStopped(i16 threadIdx) { + return Pool->IsThreadBeingStopped(threadIdx); +} + +double TPoolInfo::GetBooked(i16 threadIdx) { + if ((size_t)threadIdx < ThreadInfo.size()) { + return ThreadInfo[threadIdx].Booked.GetAvgPart(); + } + return 0.0; +} + +double TPoolInfo::GetlastSecondPoolBooked(i16 threadIdx) { + if ((size_t)threadIdx < ThreadInfo.size()) { + return ThreadInfo[threadIdx].Booked.GetAvgPartForLastSeconds(1); + } + return 0.0; +} + +double TPoolInfo::GetConsumed(i16 threadIdx) { + if ((size_t)threadIdx < ThreadInfo.size()) { + return ThreadInfo[threadIdx].Consumed.GetAvgPart(); + } + return 0.0; +} + +double TPoolInfo::GetlastSecondPoolConsumed(i16 threadIdx) { + if ((size_t)threadIdx < ThreadInfo.size()) { + return ThreadInfo[threadIdx].Consumed.GetAvgPartForLastSeconds(1); + } + return 0.0; +} + +#define UNROLL_HISTORY(history) (history)[0], (history)[1], (history)[2], (history)[3], (history)[4], (history)[5], (history)[6], (history)[7] +void TPoolInfo::PullStats(ui64 ts) { + for (i16 threadIdx = 0; threadIdx < MaxThreadCount; ++threadIdx) { + TThreadInfo &threadInfo = ThreadInfo[threadIdx]; + threadInfo.Consumed.Register(ts, Pool->GetThreadConsumedUs(threadIdx)); + LWPROBE(SavedValues, Pool->PoolId, Pool->GetName(), "consumed", UNROLL_HISTORY(threadInfo.Consumed.History)); + threadInfo.Booked.Register(ts, Pool->GetThreadBookedUs(threadIdx)); + LWPROBE(SavedValues, Pool->PoolId, Pool->GetName(), "booked", UNROLL_HISTORY(threadInfo.Booked.History)); + } +} +#undef UNROLL_HISTORY + +i16 TPoolInfo::GetThreadCount() { + return Pool->GetThreadCount(); +} + +void TPoolInfo::SetThreadCount(i16 threadCount) { + Pool->SetThreadCount(threadCount); +} + +bool TPoolInfo::IsAvgPingGood() { + bool res = true; + if (AvgPingCounter) { + res &= *AvgPingCounter > MaxAvgPingUs; + } + if (AvgPingCounterWithSmallWindow) { + res &= *AvgPingCounterWithSmallWindow > MaxAvgPingUs; + } + return res; +} + +class THarmonizer: public IHarmonizer { +private: + std::atomic<bool> IsDisabled = false; + TSpinLock Lock; + std::atomic<ui64> NextHarmonizeTs = 0; + std::vector<TPoolInfo> Pools; + std::vector<ui16> PriorityOrder; + + void PullStats(ui64 ts); + void HarmonizeImpl(ui64 ts); + void CalculatePriorityOrder(); +public: + THarmonizer(ui64 ts); + virtual ~THarmonizer(); + double Rescale(double value) const; + void Harmonize(ui64 ts) override; + void DeclareEmergency(ui64 ts) override; + void AddPool(IExecutorPool* pool, TSelfPingInfo *pingInfo) override; + void Enable(bool enable) override; + TPoolHarmonizedStats GetPoolStats(i16 poolId) const override; +}; + +THarmonizer::THarmonizer(ui64 ts) { + NextHarmonizeTs = ts; +} + +THarmonizer::~THarmonizer() { +} + +double THarmonizer::Rescale(double value) const { + return Max(0.0, Min(1.0, value * (1.0/0.9))); +} + +void THarmonizer::PullStats(ui64 ts) { + for (TPoolInfo &pool : Pools) { + pool.PullStats(ts); + } +} + +Y_FORCE_INLINE bool IsStarved(double consumed, double booked) { + return Max(consumed, booked) > 0.1 && consumed < booked * 0.7; +} + +Y_FORCE_INLINE bool IsHoggish(double booked, ui16 currentThreadCount) { + return booked < currentThreadCount - 1; +} + +void THarmonizer::HarmonizeImpl(ui64 ts) { + bool isStarvedPresent = false; + double booked = 0.0; + double consumed = 0.0; + double lastSecondBooked = 0.0; + i64 beingStopped = 0; + i64 total = 0; + TStackVec<size_t, 8> needyPools; + TStackVec<size_t, 8> hoggishPools; + for (size_t poolIdx = 0; poolIdx < Pools.size(); ++poolIdx) { + TPoolInfo& pool = Pools[poolIdx]; + total += pool.DefaultThreadCount; + double poolBooked = 0.0; + double poolConsumed = 0.0; + double lastSecondPoolBooked = 0.0; + double lastSecondPoolConsumed = 0.0; + beingStopped += pool.Pool->GetBlockingThreadCount(); + for (i16 threadIdx = 0; threadIdx < pool.MaxThreadCount; ++threadIdx) { + poolBooked += Rescale(pool.GetBooked(threadIdx)); + lastSecondPoolBooked += Rescale(pool.GetlastSecondPoolBooked(threadIdx)); + poolConsumed += Rescale(pool.GetConsumed(threadIdx)); + lastSecondPoolConsumed += Rescale(pool.GetlastSecondPoolConsumed(threadIdx)); + } + bool isStarved = IsStarved(poolConsumed, poolBooked) || IsStarved(lastSecondPoolConsumed, lastSecondPoolBooked); + if (isStarved) { + isStarvedPresent = true; + } + ui32 currentThreadCount = pool.GetThreadCount(); + bool isNeedy = pool.IsAvgPingGood() && poolBooked >= currentThreadCount; + if (pool.AvgPingCounter) { + if (pool.LastUpdateTs + Us2Ts(3'000'000ull) > ts) { + isNeedy = false; + } else { + pool.LastUpdateTs = ts; + } + } + if (isNeedy) { + needyPools.push_back(poolIdx); + } + bool isHoggish = IsHoggish(poolBooked, currentThreadCount) + || IsHoggish(lastSecondPoolBooked, currentThreadCount); + if (isHoggish) { + hoggishPools.push_back(poolIdx); + } + booked += poolBooked; + consumed += poolConsumed; + AtomicSet(pool.LastFlags, (i64)isNeedy | ((i64)isStarved << 1) | ((i64)isHoggish << 2)); + LWPROBE(HarmonizeCheckPool, poolIdx, pool.Pool->GetName(), poolBooked, poolConsumed, lastSecondPoolBooked, lastSecondPoolConsumed, pool.GetThreadCount(), pool.MaxThreadCount, isStarved, isNeedy, isHoggish); + } + double budget = total - Max(booked, lastSecondBooked); + if (budget < -0.1) { + isStarvedPresent = true; + } + double overbooked = consumed - booked; + if (isStarvedPresent) { + // last_starved_at_consumed_value = сумма по всем пулам consumed; + // TODO(cthulhu): использовать как лимит планвно устремлять этот лимит к total, + // использовать вместо total + if (beingStopped && beingStopped >= overbooked) { + // do nothing + } else { + TStackVec<size_t> reorder; + for (size_t i = 0; i < Pools.size(); ++i) { + reorder.push_back(i); + } + for (ui16 poolIdx : PriorityOrder) { + TPoolInfo &pool = Pools[poolIdx]; + i64 threadCount = pool.GetThreadCount(); + if (threadCount > pool.DefaultThreadCount) { + pool.SetThreadCount(threadCount - 1); + AtomicIncrement(pool.DecreasingThreadsByStarvedState); + overbooked--; + LWPROBE(HarmonizeOperation, poolIdx, pool.Pool->GetName(), "decrease", threadCount - 1, pool.DefaultThreadCount, pool.MaxThreadCount); + if (overbooked < 1) { + break; + } + } + } + } + } else { + for (size_t needyPoolIdx : needyPools) { + TPoolInfo &pool = Pools[needyPoolIdx]; + if (budget >= 1.0) { + i64 threadCount = pool.GetThreadCount(); + if (threadCount + 1 <= pool.MaxThreadCount) { + AtomicIncrement(pool.IncreasingThreadsByNeedyState); + pool.SetThreadCount(threadCount + 1); + budget -= 1.0; + LWPROBE(HarmonizeOperation, needyPoolIdx, pool.Pool->GetName(), "increase", threadCount + 1, pool.DefaultThreadCount, pool.MaxThreadCount); + } + } + } + } + for (size_t hoggishPoolIdx : hoggishPools) { + TPoolInfo &pool = Pools[hoggishPoolIdx]; + i64 threadCount = pool.GetThreadCount(); + if (threadCount > pool.MinThreadCount) { + AtomicIncrement(pool.DecreasingThreadsByHoggishState); + LWPROBE(HarmonizeOperation, hoggishPoolIdx, pool.Pool->GetName(), "decrease", threadCount - 1, pool.DefaultThreadCount, pool.MaxThreadCount); + pool.SetThreadCount(threadCount - 1); + } + } +} + +void THarmonizer::CalculatePriorityOrder() { + PriorityOrder.resize(Pools.size()); + Iota(PriorityOrder.begin(), PriorityOrder.end(), 0); + Sort(PriorityOrder.begin(), PriorityOrder.end(), [&] (i16 lhs, i16 rhs) { + if (Pools[lhs].Priority != Pools[rhs].Priority) { + return Pools[lhs].Priority < Pools[rhs].Priority; + } + return Pools[lhs].Pool->PoolId > Pools[rhs].Pool->PoolId; + }); +} + +void THarmonizer::Harmonize(ui64 ts) { + if (IsDisabled || NextHarmonizeTs > ts || !Lock.TryAcquire()) { + LWPROBE(TryToHarmonizeFailed, ts, NextHarmonizeTs, IsDisabled, false); + return; + } + // Check again under the lock + if (IsDisabled) { + LWPROBE(TryToHarmonizeFailed, ts, NextHarmonizeTs, IsDisabled, true); + Lock.Release(); + return; + } + // Will never reach this line disabled + ui64 previousNextHarmonizeTs = NextHarmonizeTs.exchange(ts + Us2Ts(1'000'000ull)); + LWPROBE(TryToHarmonizeSuccess, ts, NextHarmonizeTs, previousNextHarmonizeTs); + + if (PriorityOrder.empty()) { + CalculatePriorityOrder(); + } + + PullStats(ts); + HarmonizeImpl(ts); + + Lock.Release(); +} + +void THarmonizer::DeclareEmergency(ui64 ts) { + NextHarmonizeTs = ts; +} + +void THarmonizer::AddPool(IExecutorPool* pool, TSelfPingInfo *pingInfo) { + TGuard<TSpinLock> guard(Lock); + TPoolInfo poolInfo; + poolInfo.Pool = pool; + poolInfo.DefaultThreadCount = pool->GetDefaultThreadCount(); + poolInfo.MinThreadCount = pool->GetMinThreadCount(); + poolInfo.MaxThreadCount = pool->GetMaxThreadCount(); + poolInfo.ThreadInfo.resize(poolInfo.MaxThreadCount); + poolInfo.Priority = pool->GetPriority(); + pool->SetThreadCount(poolInfo.DefaultThreadCount); + if (pingInfo) { + poolInfo.AvgPingCounter = pingInfo->AvgPingCounter; + poolInfo.AvgPingCounterWithSmallWindow = pingInfo->AvgPingCounterWithSmallWindow; + poolInfo.MaxAvgPingUs = pingInfo->MaxAvgPingUs; + } + Pools.push_back(poolInfo); + PriorityOrder.clear(); +}; + +void THarmonizer::Enable(bool enable) { + TGuard<TSpinLock> guard(Lock); + IsDisabled = enable; +} + +IHarmonizer* MakeHarmonizer(ui64 ts) { + return new THarmonizer(ts); +} + +TPoolHarmonizedStats THarmonizer::GetPoolStats(i16 poolId) const { + const TPoolInfo &pool = Pools[poolId]; + ui64 flags = RelaxedLoad(&pool.LastFlags); + return TPoolHarmonizedStats { + .IncreasingThreadsByNeedyState = static_cast<ui64>(RelaxedLoad(&pool.IncreasingThreadsByNeedyState)), + .DecreasingThreadsByStarvedState = static_cast<ui64>(RelaxedLoad(&pool.DecreasingThreadsByStarvedState)), + .DecreasingThreadsByHoggishState = static_cast<ui64>(RelaxedLoad(&pool.DecreasingThreadsByHoggishState)), + .IsNeedy = static_cast<bool>(flags & 1), + .IsStarved = static_cast<bool>(flags & 2), + .IsHoggish = static_cast<bool>(flags & 4), + }; +} + +} diff --git a/library/cpp/actors/core/harmonizer.h b/library/cpp/actors/core/harmonizer.h new file mode 100644 index 0000000000..61f13e43ac --- /dev/null +++ b/library/cpp/actors/core/harmonizer.h @@ -0,0 +1,30 @@ +#pragma once + +#include "defs.h" +#include "config.h" + +namespace NActors { + class IExecutorPool; + + struct TPoolHarmonizedStats { + ui64 IncreasingThreadsByNeedyState = 0; + ui64 DecreasingThreadsByStarvedState = 0; + ui64 DecreasingThreadsByHoggishState = 0; + bool IsNeedy = false; + bool IsStarved = false; + bool IsHoggish = false; + }; + + // Pool cpu harmonizer + class IHarmonizer { + public: + virtual ~IHarmonizer() {} + virtual void Harmonize(ui64 ts) = 0; + virtual void DeclareEmergency(ui64 ts) = 0; + virtual void AddPool(IExecutorPool* pool, TSelfPingInfo *pingInfo = nullptr) = 0; + virtual void Enable(bool enable) = 0; + virtual TPoolHarmonizedStats GetPoolStats(i16 poolId) const = 0; + }; + + IHarmonizer* MakeHarmonizer(ui64 ts); +} diff --git a/library/cpp/actors/core/mon_stats.h b/library/cpp/actors/core/mon_stats.h index 6d482926d1..117d2ad41d 100644 --- a/library/cpp/actors/core/mon_stats.h +++ b/library/cpp/actors/core/mon_stats.h @@ -60,6 +60,14 @@ namespace NActors { struct TExecutorPoolStats { ui64 MaxUtilizationTime = 0; + ui64 IncreasingThreadsByNeedyState = 0; + ui64 DecreasingThreadsByStarvedState = 0; + ui64 DecreasingThreadsByHoggishState = 0; + i16 WrongWakenedThreadCount = 0; + i16 CurrentThreadCount = 0; + bool IsNeedy = false; + bool IsStarved = false; + bool IsHoggish = false; }; struct TExecutorThreadStats { @@ -69,6 +77,7 @@ namespace NActors { ui64 NonDeliveredEvents = 0; ui64 EmptyMailboxActivation = 0; ui64 CpuNs = 0; // nanoseconds thread was executing on CPU (accounts for preemtion) + ui64 WorstActivationTimeUs = 0; NHPTimer::STime ElapsedTicks = 0; NHPTimer::STime ParkedTicks = 0; NHPTimer::STime BlockedTicks = 0; @@ -111,6 +120,9 @@ namespace NActors { NonDeliveredEvents += RelaxedLoad(&other.NonDeliveredEvents); EmptyMailboxActivation += RelaxedLoad(&other.EmptyMailboxActivation); CpuNs += RelaxedLoad(&other.CpuNs); + RelaxedStore( + &WorstActivationTimeUs, + std::max(RelaxedLoad(&WorstActivationTimeUs), RelaxedLoad(&other.WorstActivationTimeUs))); ElapsedTicks += RelaxedLoad(&other.ElapsedTicks); ParkedTicks += RelaxedLoad(&other.ParkedTicks); BlockedTicks += RelaxedLoad(&other.BlockedTicks); diff --git a/library/cpp/actors/core/probes.h b/library/cpp/actors/core/probes.h index 4912d6dd26..11bbf81287 100644 --- a/library/cpp/actors/core/probes.h +++ b/library/cpp/actors/core/probes.h @@ -166,6 +166,30 @@ PROBE(MoveCpu, GROUPS("PoolCpuBalancer"), \ TYPES(ui32, ui64, TString, TString, ui32), \ NAMES("fromPoolId", "toPoolId", "fromPool", "toPool", "cpu")) \ + PROBE(ThreadCount, GROUPS("BasicThreadPool"), \ + TYPES(ui32, TString, ui32, ui32, ui32, ui32), \ + NAMES("poolId", "pool", "threacCount", "minThreadCount", "maxThreadCount", "defaultThreadCount")) \ + PROBE(HarmonizeCheckPool, GROUPS("Harmonizer"), \ + TYPES(ui32, TString, double, double, double, double, ui32, ui32, bool, bool, bool), \ + NAMES("poolId", "pool", "booked", "consumed", "lastSecondBooked", "lastSecondConsumed", "threadCount", "maxThreadCount", "isStarved", "isNeedy", "isHoggish")) \ + PROBE(HarmonizeOperation, GROUPS("Harmonizer"), \ + TYPES(ui32, TString, TString, ui32, ui32, ui32), \ + NAMES("poolId", "pool", "operation", "newCount", "minCount", "maxCount")) \ + PROBE(TryToHarmonize, GROUPS("Harmonizer"), \ + TYPES(ui32, TString), \ + NAMES("poolId", "pool")) \ + PROBE(SavedValues, GROUPS("Harmonizer"), \ + TYPES(ui32, TString, TString, double, double, double, double, double, double, double, double), \ + NAMES("poolId", "pool", "valueName", "[0]", "[1]", "[2]", "[3]", "[4]", "[5]", "[6]", "[7]")) \ + PROBE(RegisterValue, GROUPS("Harmonizer"), \ + TYPES(ui64, ui64, ui64, ui64, double, double, double), \ + NAMES("ts", "lastTs", "dTs", "8sTs", "us", "lastUs", "dUs")) \ + PROBE(TryToHarmonizeFailed, GROUPS("Harmonizer"), \ + TYPES(ui64, ui64, bool, bool), \ + NAMES("ts", "nextHarmonizeTs", "isDisabled", "withLock")) \ + PROBE(TryToHarmonizeSuccess, GROUPS("Harmonizer"), \ + TYPES(ui64, ui64, ui64), \ + NAMES("ts", "nextHarmonizeTs", "previousNextHarmonizeTs")) \ /**/ LWTRACE_DECLARE_PROVIDER(ACTORLIB_PROVIDER) diff --git a/library/cpp/actors/core/worker_context.h b/library/cpp/actors/core/worker_context.h index b4c37a7629..384a13c5ee 100644 --- a/library/cpp/actors/core/worker_context.h +++ b/library/cpp/actors/core/worker_context.h @@ -95,6 +95,7 @@ namespace NActors { i64 ts = deliveredTs > scheduleTs ? deliveredTs - scheduleTs : 0; double usec = NHPTimer::GetSeconds(ts) * 1000000.0; Stats->ActivationTimeHistogram.Add(usec); + Stats->WorstActivationTimeUs = Max(Stats->WorstActivationTimeUs, (ui64)usec); return usec; } diff --git a/library/cpp/actors/helpers/pool_stats_collector.h b/library/cpp/actors/helpers/pool_stats_collector.h index 61d0b45780..b1217b1d63 100644 --- a/library/cpp/actors/helpers/pool_stats_collector.h +++ b/library/cpp/actors/helpers/pool_stats_collector.h @@ -124,6 +124,15 @@ private: NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutBySoftPreemption; NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutByTime; NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutByEventCount; + NMonitoring::TDynamicCounters::TCounterPtr WrongWakenedThreadCount; + NMonitoring::TDynamicCounters::TCounterPtr CurrentThreadCount; + NMonitoring::TDynamicCounters::TCounterPtr IsNeedy; + NMonitoring::TDynamicCounters::TCounterPtr IsStarved; + NMonitoring::TDynamicCounters::TCounterPtr IsHoggish; + NMonitoring::TDynamicCounters::TCounterPtr IncreasingThreadsByNeedyState; + NMonitoring::TDynamicCounters::TCounterPtr DecreasingThreadsByStarvedState; + NMonitoring::TDynamicCounters::TCounterPtr DecreasingThreadsByHoggishState; + THistogramCounters LegacyActivationTimeHistogram; NMonitoring::THistogramPtr ActivationTimeHistogram; @@ -167,6 +176,14 @@ private: MailboxPushedOutBySoftPreemption = PoolGroup->GetCounter("MailboxPushedOutBySoftPreemption", true); MailboxPushedOutByTime = PoolGroup->GetCounter("MailboxPushedOutByTime", true); MailboxPushedOutByEventCount = PoolGroup->GetCounter("MailboxPushedOutByEventCount", true); + WrongWakenedThreadCount = PoolGroup->GetCounter("WrongWakenedThreadCount", true); + CurrentThreadCount = PoolGroup->GetCounter("CurrentThreadCount", false); + IsNeedy = PoolGroup->GetCounter("IsNeedy", false); + IsStarved = PoolGroup->GetCounter("IsStarved", false); + IsHoggish = PoolGroup->GetCounter("IsHoggish", false); + IncreasingThreadsByNeedyState = PoolGroup->GetCounter("IncreasingThreadsByNeedyState", true); + DecreasingThreadsByStarvedState = PoolGroup->GetCounter("DecreasingThreadsByStarvedState", true); + DecreasingThreadsByHoggishState = PoolGroup->GetCounter("DecreasingThreadsByHoggishState", true); LegacyActivationTimeHistogram.Init(PoolGroup.Get(), "ActivationTime", "usec", 5*1000*1000); ActivationTimeHistogram = PoolGroup->GetHistogram( @@ -203,6 +220,14 @@ private: *MailboxPushedOutBySoftPreemption = stats.MailboxPushedOutBySoftPreemption; *MailboxPushedOutByTime = stats.MailboxPushedOutByTime; *MailboxPushedOutByEventCount = stats.MailboxPushedOutByEventCount; + *WrongWakenedThreadCount = poolStats.WrongWakenedThreadCount; + *CurrentThreadCount = poolStats.CurrentThreadCount; + *IsNeedy = poolStats.IsNeedy; + *IsStarved = poolStats.IsStarved; + *IsHoggish = poolStats.IsHoggish; + *IncreasingThreadsByNeedyState = poolStats.IncreasingThreadsByNeedyState; + *DecreasingThreadsByStarvedState = poolStats.DecreasingThreadsByStarvedState; + *DecreasingThreadsByHoggishState = poolStats.DecreasingThreadsByHoggishState; LegacyActivationTimeHistogram.Set(stats.ActivationTimeHistogram); ActivationTimeHistogram->Reset(); diff --git a/library/cpp/actors/helpers/selfping_actor.cpp b/library/cpp/actors/helpers/selfping_actor.cpp index f9bfaf8dc0..dc383f8c4c 100644 --- a/library/cpp/actors/helpers/selfping_actor.cpp +++ b/library/cpp/actors/helpers/selfping_actor.cpp @@ -61,10 +61,14 @@ struct TAvgOperation { class TSelfPingActor : public TActorBootstrapped<TSelfPingActor> { private: const TDuration SendInterval; - const NMonitoring::TDynamicCounters::TCounterPtr Counter; + const NMonitoring::TDynamicCounters::TCounterPtr MaxPingCounter; + const NMonitoring::TDynamicCounters::TCounterPtr AvgPingCounter; + const NMonitoring::TDynamicCounters::TCounterPtr AvgPingCounterWithSmallWindow; const NMonitoring::TDynamicCounters::TCounterPtr CalculationTimeCounter; - NSlidingWindow::TSlidingWindow<NSlidingWindow::TMaxOperation<ui64>> SlidingWindow; + NSlidingWindow::TSlidingWindow<NSlidingWindow::TMaxOperation<ui64>> MaxPingSlidingWindow; + NSlidingWindow::TSlidingWindow<TAvgOperation<ui64>> AvgPingSlidingWindow; + NSlidingWindow::TSlidingWindow<TAvgOperation<ui64>> AvgPingSmallSlidingWindow; NSlidingWindow::TSlidingWindow<TAvgOperation<ui64>> CalculationSlidingWindow; THPTimer Timer; @@ -74,12 +78,19 @@ public: return SELF_PING_ACTOR; } - TSelfPingActor(TDuration sendInterval, const NMonitoring::TDynamicCounters::TCounterPtr& counter, + TSelfPingActor(TDuration sendInterval, + const NMonitoring::TDynamicCounters::TCounterPtr& maxPingCounter, + const NMonitoring::TDynamicCounters::TCounterPtr& avgPingCounter, + const NMonitoring::TDynamicCounters::TCounterPtr& avgPingSmallWindowCounter, const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter) : SendInterval(sendInterval) - , Counter(counter) + , MaxPingCounter(maxPingCounter) + , AvgPingCounter(avgPingCounter) + , AvgPingCounterWithSmallWindow(avgPingSmallWindowCounter) , CalculationTimeCounter(calculationTimeCounter) - , SlidingWindow(TDuration::Seconds(15), 100) + , MaxPingSlidingWindow(TDuration::Seconds(15), 100) + , AvgPingSlidingWindow(TDuration::Seconds(15), 100) + , AvgPingSmallSlidingWindow(TDuration::Seconds(1), 100) , CalculationSlidingWindow(TDuration::Seconds(15), 100) { } @@ -154,11 +165,23 @@ public: const double passedTime = hpNow - e.TimeStart; const ui64 delayUs = passedTime > 0.0 ? static_cast<ui64>(passedTime * 1e6) : 0; - *Counter = SlidingWindow.Update(delayUs, now); + if (MaxPingCounter) { + *MaxPingCounter = MaxPingSlidingWindow.Update(delayUs, now); + } + if (AvgPingCounter) { + auto res = AvgPingSlidingWindow.Update({1, delayUs}, now); + *AvgPingCounter = double(res.Sum) / double(res.Count + 1); + } + if (AvgPingCounterWithSmallWindow) { + auto res = AvgPingSmallSlidingWindow.Update({1, delayUs}, now); + *AvgPingCounterWithSmallWindow = double(res.Sum) / double(res.Count + 1); + } - ui64 d = MeasureTaskDurationNs(); - auto res = CalculationSlidingWindow.Update({1, d}, now); - *CalculationTimeCounter = double(res.Sum) / double(res.Count + 1); + if (CalculationTimeCounter) { + ui64 d = MeasureTaskDurationNs(); + auto res = CalculationSlidingWindow.Update({1, d}, now); + *CalculationTimeCounter = double(res.Sum) / double(res.Count + 1); + } SchedulePing(ctx, hpNow); } @@ -174,10 +197,12 @@ private: IActor* CreateSelfPingActor( TDuration sendInterval, - const NMonitoring::TDynamicCounters::TCounterPtr& counter, + const NMonitoring::TDynamicCounters::TCounterPtr& maxPingCounter, + const NMonitoring::TDynamicCounters::TCounterPtr& avgPingCounter, + const NMonitoring::TDynamicCounters::TCounterPtr& avgPingSmallWindowCounter, const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter) { - return new TSelfPingActor(sendInterval, counter, calculationTimeCounter); + return new TSelfPingActor(sendInterval, maxPingCounter, avgPingCounter, avgPingSmallWindowCounter, calculationTimeCounter); } } // NActors diff --git a/library/cpp/actors/helpers/selfping_actor.h b/library/cpp/actors/helpers/selfping_actor.h index d7d07f9fa8..a976a4f425 100644 --- a/library/cpp/actors/helpers/selfping_actor.h +++ b/library/cpp/actors/helpers/selfping_actor.h @@ -7,7 +7,9 @@ namespace NActors { NActors::IActor* CreateSelfPingActor( TDuration sendInterval, - const NMonitoring::TDynamicCounters::TCounterPtr& counter, + const NMonitoring::TDynamicCounters::TCounterPtr& maxPingCounter, + const NMonitoring::TDynamicCounters::TCounterPtr& avgPingCounter, + const NMonitoring::TDynamicCounters::TCounterPtr& avgPingSmallWindowCounter, const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter); } // NActors diff --git a/library/cpp/actors/helpers/selfping_actor_ut.cpp b/library/cpp/actors/helpers/selfping_actor_ut.cpp index 459635fa24..542f817755 100644 --- a/library/cpp/actors/helpers/selfping_actor_ut.cpp +++ b/library/cpp/actors/helpers/selfping_actor_ut.cpp @@ -22,13 +22,17 @@ Y_UNIT_TEST_SUITE(TSelfPingTest) { NMonitoring::TDynamicCounters::TCounterPtr counter(new NMonitoring::TCounterForPtr()); NMonitoring::TDynamicCounters::TCounterPtr counter2(new NMonitoring::TCounterForPtr()); + NMonitoring::TDynamicCounters::TCounterPtr counter3(new NMonitoring::TCounterForPtr()); + NMonitoring::TDynamicCounters::TCounterPtr counter4(new NMonitoring::TCounterForPtr()); auto actor = CreateSelfPingActor( TDuration::MilliSeconds(100), // sendInterval (unused in test) - counter, counter2); + counter, counter2, counter3, counter4); UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 0); UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter3->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter4->Val(), 0); const TActorId actorId = runtime->Register(actor); Y_UNUSED(actorId); diff --git a/library/cpp/actors/util/CMakeLists.txt b/library/cpp/actors/util/CMakeLists.txt index 40d958d75e..233e1fe0fc 100644 --- a/library/cpp/actors/util/CMakeLists.txt +++ b/library/cpp/actors/util/CMakeLists.txt @@ -12,6 +12,7 @@ target_link_libraries(cpp-actors-util PUBLIC contrib-libs-cxxsupp yutil cpp-deprecated-atomic + library-cpp-pop_count ) target_sources(cpp-actors-util PRIVATE ${CMAKE_SOURCE_DIR}/library/cpp/actors/util/affinity.cpp diff --git a/library/cpp/actors/util/cpu_load_log.h b/library/cpp/actors/util/cpu_load_log.h new file mode 100644 index 0000000000..e4ae612246 --- /dev/null +++ b/library/cpp/actors/util/cpu_load_log.h @@ -0,0 +1,227 @@ +#pragma once + +#include "defs.h" +#include <library/cpp/deprecated/atomic/atomic.h> +#include <library/cpp/pop_count/popcount.h> + +static constexpr ui64 BitDurationNs = 131'072; // A power of 2 + +template <ui64 DataSize> +struct TCpuLoadLog { + static constexpr ui64 BitsSize = DataSize * 64; + TAtomic LastTimeNs = 0; + ui64 Data[DataSize]; + + TCpuLoadLog() { + LastTimeNs = 0; + for (size_t i = 0; i < DataSize; ++i) { + Data[i] = 0; + } + } + + TCpuLoadLog(ui64 timeNs) { + LastTimeNs = timeNs; + for (size_t i = 0; i < DataSize; ++i) { + Data[i] = 0; + } + } + + void RegisterBusyPeriod(ui64 timeNs) { + RegisterBusyPeriod<true>(timeNs, AtomicGet(LastTimeNs)); + } + + template <bool ModifyLastTime> + void RegisterBusyPeriod(ui64 timeNs, ui64 lastTimeNs) { + timeNs |= 1ull; + if (timeNs < lastTimeNs) { + for (ui64 i = 0; i < DataSize; ++i) { + AtomicSet(Data[i], ~0ull); + } + if (ModifyLastTime) { + AtomicSet(LastTimeNs, timeNs); + } + return; + } + const ui64 lastIdx = timeNs / BitDurationNs; + const ui64 curIdx = lastTimeNs / BitDurationNs; + ui64 firstElementIdx = curIdx / 64; + const ui64 firstBitIdx = curIdx % 64; + const ui64 lastElementIdx = lastIdx / 64; + const ui64 lastBitIdx = lastIdx % 64; + if (firstElementIdx == lastElementIdx) { + ui64 prevValue = 0; + if (firstBitIdx != 0) { + prevValue = AtomicGet(Data[firstElementIdx % DataSize]); + } + const ui64 bits = (((~0ull) << (firstBitIdx + (63-lastBitIdx))) >> (63-lastBitIdx)); + const ui64 newValue = prevValue | bits; + AtomicSet(Data[firstElementIdx % DataSize], newValue); + if (ModifyLastTime) { + AtomicSet(LastTimeNs, timeNs); + } + return; + } + // process the first element + ui64 prevValue = 0; + if (firstBitIdx != 0) { + prevValue = AtomicGet(Data[firstElementIdx % DataSize]); + } + const ui64 bits = ((~0ull) << firstBitIdx); + const ui64 newValue = (prevValue | bits); + AtomicSet(Data[firstElementIdx % DataSize], newValue); + ++firstElementIdx; + // process the fully filled elements + const ui64 firstLoop = firstElementIdx / DataSize; + const ui64 lastLoop = lastElementIdx / DataSize; + const ui64 lastOffset = lastElementIdx % DataSize; + if (firstLoop < lastLoop) { + for (ui64 i = firstElementIdx % DataSize; i < DataSize; ++i) { + AtomicSet(Data[i], ~0ull); + } + for (ui64 i = 0; i < lastOffset; ++i) { + AtomicSet(Data[i], ~0ull); + } + } else { + for (ui64 i = firstElementIdx % DataSize; i < lastOffset; ++i) { + AtomicSet(Data[i], ~0ull); + } + } + // process the last element + const ui64 newValue2 = ((~0ull) >> (63-lastBitIdx)); + AtomicSet(Data[lastOffset], newValue2); + if (ModifyLastTime) { + AtomicSet(LastTimeNs, timeNs); + } + } + + void RegisterIdlePeriod(ui64 timeNs) { + timeNs &= ~1ull; + ui64 lastTimeNs = AtomicGet(LastTimeNs); + if (timeNs < lastTimeNs) { + // Fast check first, slower chec later + if ((timeNs | 1ull) < lastTimeNs) { + // Time goes back, dont panic, just mark the whole array 'busy' + for (ui64 i = 0; i < DataSize; ++i) { + AtomicSet(Data[i], ~0ull); + } + AtomicSet(LastTimeNs, timeNs); + return; + } + } + const ui64 curIdx = lastTimeNs / BitDurationNs; + const ui64 lastIdx = timeNs / BitDurationNs; + ui64 firstElementIdx = curIdx / 64; + const ui64 lastElementIdx = lastIdx / 64; + if (firstElementIdx >= lastElementIdx) { + AtomicSet(LastTimeNs, timeNs); + return; + } + // process the first partially filled element + ++firstElementIdx; + // process all other elements + const ui64 firstLoop = firstElementIdx / DataSize; + const ui64 lastLoop = lastElementIdx / DataSize; + const ui64 lastOffset = lastElementIdx % DataSize; + if (firstLoop < lastLoop) { + for (ui64 i = firstElementIdx % DataSize; i < DataSize; ++i) { + AtomicSet(Data[i], 0); + } + for (ui64 i = 0; i <= lastOffset; ++i) { + AtomicSet(Data[i], 0); + } + } else { + for (ui64 i = firstElementIdx % DataSize; i <= lastOffset; ++i) { + AtomicSet(Data[i], 0); + } + } + AtomicSet(LastTimeNs, timeNs); + } +}; + +template <ui64 DataSize> +struct TMinusOneCpuEstimator { + static constexpr ui64 BitsSize = DataSize * 64; + ui64 BeginDelayIdx; + ui64 EndDelayIdx; + ui64 Idle; + ui64 Delay[BitsSize]; + + ui64 MaxLatencyIncreaseWithOneLessCpu(TCpuLoadLog<DataSize>** logs, i64 logCount, ui64 timeNs, ui64 periodNs) { + Y_VERIFY(logCount > 0); + ui64 endTimeNs = timeNs; + + ui64 lastTimeNs = timeNs; + for (i64 log_idx = 0; log_idx < logCount; ++log_idx) { + ui64 x = AtomicGet(logs[log_idx]->LastTimeNs); + if ((x & 1) == 1) { + lastTimeNs = Min(lastTimeNs, x); + } else { + logs[log_idx]->template RegisterBusyPeriod<false>(endTimeNs, x); + } + } + const ui64 beginTimeNs = periodNs < timeNs ? timeNs - periodNs : 0; + + ui64 beginIdx = beginTimeNs / BitDurationNs; + ui64 lastIdx = lastTimeNs / BitDurationNs; + ui64 beginElementIdx = beginIdx / 64; + ui64 lastElementIdx = lastIdx / 64; + + BeginDelayIdx = 0; + EndDelayIdx = 0; + Idle = 0; + ui64 maxDelay = 0; + ui64 bucket = 0; + for (ui64 idx = beginElementIdx; idx <= lastElementIdx; ++idx) { + ui64 i = idx % DataSize; + ui64 input = AtomicGet(logs[0]->Data[i]); + ui64 all_busy = ~0ull; + for (i64 log_idx = 1; log_idx < logCount; ++log_idx) { + ui64 x = AtomicGet(logs[log_idx]->Data[i]); + all_busy &= x; + } + if (!input) { + if (!bucket) { + Idle += 64 - PopCount(all_busy); + continue; + } + } + for (i64 bit_idx = 0; bit_idx < 64; ++bit_idx) { + ui64 x = (1ull << bit_idx); + if (all_busy & x) { + if (input & x) { + // Push into the queue + bucket++; + Delay[EndDelayIdx] = EndDelayIdx; + ++EndDelayIdx; + } else { + // All busy + } + } else { + if (input & x) { + // Move success + } else { + if (bucket) { + // Remove from the queue + bucket--; + ui64 stored = Delay[BeginDelayIdx]; + ++BeginDelayIdx; + ui64 delay = EndDelayIdx - stored; + maxDelay = Max(maxDelay, delay); + //Cerr << "bit_idx: " << bit_idx << " stored: " << stored << " delay: " << delay << Endl; + } else { + Idle++; + } + } + } + } + } + if (bucket) { + ui64 stored = Delay[BeginDelayIdx]; + ui64 delay = EndDelayIdx - stored; + maxDelay = Max(maxDelay, delay); + //Cerr << "last stored: " << stored << " delay: " << delay << Endl; + } + return maxDelay * BitDurationNs; + } +}; + diff --git a/library/cpp/actors/util/cpu_load_log_ut.cpp b/library/cpp/actors/util/cpu_load_log_ut.cpp new file mode 100644 index 0000000000..7109123c6e --- /dev/null +++ b/library/cpp/actors/util/cpu_load_log_ut.cpp @@ -0,0 +1,275 @@ +#include "cpu_load_log.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <util/random/random.h> +#include <util/system/hp_timer.h> +#include <util/system/sanitizers.h> +#include <util/system/thread.h> + +Y_UNIT_TEST_SUITE(CpuLoadLog) { + + TString PrintBits(ui64 x) { + TStringStream str; + for (ui64 i = 0; i < 64; ++i) { + if (x & (1ull << i)) { + str << "1"; + } else { + str << "0"; + } + } + return str.Str(); + } + + Y_UNIT_TEST(FillAll) { + TCpuLoadLog<5> log(100*BitDurationNs); + log.RegisterBusyPeriod(101*BitDurationNs); + log.RegisterBusyPeriod(163*BitDurationNs); + log.RegisterBusyPeriod(164*BitDurationNs); + log.RegisterBusyPeriod(165*BitDurationNs); + log.RegisterBusyPeriod(331*BitDurationNs); + log.RegisterBusyPeriod(340*BitDurationNs); + log.RegisterBusyPeriod(420*BitDurationNs); + log.RegisterBusyPeriod(511*BitDurationNs); + //for (ui64 i = 0; i < 5; ++i) { + // Cerr << "i: " << i << " bits: " << PrintBits(log.Data[i]) << Endl; + //} + for (ui64 i = 0; i < 5; ++i) { + UNIT_ASSERT_C((ui64(log.Data[i]) == ~ui64(0)), "Unequal at " << i << "\n got: " << PrintBits(log.Data[i]) + << "\n expected: " << PrintBits(~ui64(0))); + } + } + + Y_UNIT_TEST(PartialFill) { + TCpuLoadLog<5> log(0*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0b0ull)); + log.RegisterBusyPeriod(0*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0b1ull)); + log.RegisterBusyPeriod(0*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0b1ull)); + log.RegisterBusyPeriod(1*BitDurationNs/2); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0b1ull)); + log.RegisterBusyPeriod(1*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0b11ull)); + log.RegisterIdlePeriod(3*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0b11ull)); + log.RegisterBusyPeriod(3*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0b1011ull)); + log.RegisterBusyPeriod(63*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits((~0ull)^0b0100ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(0b0ull)); + log.RegisterBusyPeriod(128*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits((~0ull)^0b0100ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(0b1ull)); + log.RegisterBusyPeriod(1*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[3]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[4]), PrintBits(~0ull)); + log.RegisterBusyPeriod(2*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[3]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[4]), PrintBits(~0ull)); + log.RegisterBusyPeriod(64*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(0b1ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[3]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[4]), PrintBits(~0ull)); + log.RegisterIdlePeriod(128*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(0b1ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[3]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[4]), PrintBits(~0ull)); + log.RegisterIdlePeriod(192*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(0b1ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[3]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[4]), PrintBits(~0ull)); + log.RegisterBusyPeriod(192*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(~0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(0b1ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[3]), PrintBits(0b1ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[4]), PrintBits(~0ull)); + log.RegisterIdlePeriod((192+5*64-1)*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[3]), PrintBits(0b1ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[4]), PrintBits(0ull)); + log.RegisterIdlePeriod((192+15*64)*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[0]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[1]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[2]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[3]), PrintBits(0ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log.Data[4]), PrintBits(0ull)); + } + + Y_UNIT_TEST(Estimator) { + TCpuLoadLog<5> *log[10]; + log[0] = new TCpuLoadLog<5>(0*BitDurationNs); + log[1] = new TCpuLoadLog<5>(0*BitDurationNs); + TMinusOneCpuEstimator<5> estimator; + + + for (ui64 i = 0; i < 5*64; i+=2) { + log[0]->RegisterIdlePeriod(i*BitDurationNs); + log[0]->RegisterBusyPeriod(i*BitDurationNs); + } + log[0]->RegisterIdlePeriod((5*64-2)*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log[0]->Data[0]), + PrintBits(0b0101010101010101010101010101010101010101010101010101010101010101ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log[0]->Data[4]), + PrintBits(0b0101010101010101010101010101010101010101010101010101010101010101ull)); + for (ui64 i = 0; i < 5*64-1; i+=2) { + log[1]->RegisterIdlePeriod((i+1)*BitDurationNs); + log[1]->RegisterBusyPeriod((i+1)*BitDurationNs); + } + log[1]->RegisterIdlePeriod((5*64-2+1)*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log[1]->Data[0]), + PrintBits(0b1010101010101010101010101010101010101010101010101010101010101010ull)); + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log[1]->Data[4]), + PrintBits(0b1010101010101010101010101010101010101010101010101010101010101010ull)); + + ui64 value = estimator.MaxLatencyIncreaseWithOneLessCpu(log, 2, (5*64)*BitDurationNs-1, 3*64*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(value/BitDurationNs, 1); + + value = estimator.MaxLatencyIncreaseWithOneLessCpu(log, 2, (5*64+10)*BitDurationNs, 3*64*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(value/BitDurationNs, 12); + + delete log[0]; + delete log[1]; + } + + Y_UNIT_TEST(Estimator2) { + TCpuLoadLog<5> *log[2]; + log[0] = new TCpuLoadLog<5>(0*BitDurationNs); + log[1] = new TCpuLoadLog<5>(0*BitDurationNs); + TMinusOneCpuEstimator<5> estimator; + + for (ui64 i = 0; i < 5*64; i+=2) { + log[0]->RegisterIdlePeriod(i*BitDurationNs); + log[0]->RegisterBusyPeriod(i*BitDurationNs); + } + for (ui64 i = 0; i < 5; ++i) { + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log[0]->Data[i]), + PrintBits(0b0101010101010101010101010101010101010101010101010101010101010101ull)); + } + for (ui64 i = 0; i < 5*64-1; i+=2) { + log[1]->RegisterIdlePeriod((i+1)*BitDurationNs); + log[1]->RegisterBusyPeriod((i+1)*BitDurationNs); + } + for (ui64 i = 0; i < 5; ++i) { + UNIT_ASSERT_VALUES_EQUAL(PrintBits(log[1]->Data[i]), + PrintBits(0b1010101010101010101010101010101010101010101010101010101010101010ull)); + } + + log[0]->Data[2] = ~0ull; + ui64 value = estimator.MaxLatencyIncreaseWithOneLessCpu(log, 2, (5*64-1)*BitDurationNs, 3*64*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(value/BitDurationNs, 32); + + delete log[0]; + delete log[1]; + } + + Y_UNIT_TEST(Estimator3) { + TCpuLoadLog<5> *log[3]; + log[0] = new TCpuLoadLog<5>(0*BitDurationNs); + log[1] = new TCpuLoadLog<5>(0*BitDurationNs); + log[2] = new TCpuLoadLog<5>(0*BitDurationNs); + TMinusOneCpuEstimator<5> estimator; + + for (ui64 i = 0; i < 5*64; i+=8) { + log[0]->RegisterIdlePeriod(i*BitDurationNs); + log[0]->RegisterBusyPeriod((i+3)*BitDurationNs); + log[1]->RegisterIdlePeriod(i*BitDurationNs); + log[1]->RegisterBusyPeriod((i+3)*BitDurationNs); + log[2]->RegisterIdlePeriod(i*BitDurationNs); + log[2]->RegisterBusyPeriod((i+3)*BitDurationNs); + } + for (ui64 i = 0; i < 5; ++i) { + for (ui64 n = 0; n < 3; ++n) { + UNIT_ASSERT_VALUES_EQUAL_C(PrintBits(log[n]->Data[i]), + PrintBits(0b0000111100001111000011110000111100001111000011110000111100001111ull), + " i: " << i << " n: " << n); + } + } + + ui64 value = estimator.MaxLatencyIncreaseWithOneLessCpu(log, 3, (5*64-5)*BitDurationNs, 3*64*BitDurationNs); + UNIT_ASSERT_VALUES_EQUAL(value/BitDurationNs, 4); + + delete log[0]; + delete log[1]; + delete log[2]; + } + /* + class TWorkerThread : public ISimpleThread { + private: + std::function<void()> Func; + double Time = 0.0; + + public: + TWorkerThread(std::function<void()> func) + : Func(std::move(func)) + { } + + double GetTime() const { + return Time; + } + + static THolder<TWorkerThread> Spawn(std::function<void()> func) { + THolder<TWorkerThread> thread = MakeHolder<TWorkerThread>(std::move(func)); + thread->Start(); + return thread; + } + + private: + void* ThreadProc() noexcept override { + THPTimer timer; + Func(); + Time = timer.Passed(); + return nullptr; + } + }; + + void DoConcurrentPushPop(size_t threads, ui64 perThreadCount) { + // Concurrency factor 4 is up to 16 threads + + auto workerFunc = [&](size_t threadIndex) { + }; + + TVector<THolder<TWorkerThread>> workers(threads); + for (size_t i = 0; i < threads; ++i) { + workers[i] = TWorkerThread::Spawn([workerFunc, i]() { + workerFunc(i); + }); + } + + double maxTime = 0; + for (size_t i = 0; i < threads; ++i) { + workers[i]->Join(); + maxTime = Max(maxTime, workers[i]->GetTime()); + } + + UNIT_ASSERT_VALUES_EQUAL(popped, 0u); + + Cerr << "Concurrent with " << threads << " threads: " << maxTime << " seconds" << Endl; + } + + void DoConcurrentPushPop_3times(size_t threads, ui64 perThreadCount) { + for (size_t i = 0; i < 3; ++i) { + DoConcurrentPushPop(threads, perThreadCount); + } + } + + static constexpr ui64 PER_THREAD_COUNT = NSan::PlainOrUnderSanitizer(1000000, 100000); + + Y_UNIT_TEST(ConcurrentPushPop_1thread) { DoConcurrentPushPop_3times(1, PER_THREAD_COUNT); } + */ +} diff --git a/library/cpp/actors/util/thread_load_log.h b/library/cpp/actors/util/thread_load_log.h new file mode 100644 index 0000000000..b4b34d47bb --- /dev/null +++ b/library/cpp/actors/util/thread_load_log.h @@ -0,0 +1,363 @@ +#pragma once + +#include "defs.h" + +#include <util/system/types.h> + +#include <type_traits> +#include <algorithm> +#include <atomic> +#include <limits> +#include <queue> + +template <ui64 TIME_SLOT_COUNT, ui64 TIME_SLOT_LENGTH_NS = 131'072, typename Type = std::uint8_t> +class TThreadLoad { +public: + using TimeSlotType = Type; + +private: + static constexpr auto TIME_SLOT_MAX_VALUE = std::numeric_limits<TimeSlotType>::max(); + static constexpr ui64 TIME_SLOT_PART_COUNT = TIME_SLOT_MAX_VALUE + 1; + static constexpr auto TIME_SLOT_PART_LENGTH_NS = TIME_SLOT_LENGTH_NS / TIME_SLOT_PART_COUNT; + + template <typename T> + static void AtomicAddBound(std::atomic<T>& val, i64 inc) { + if (inc == 0) { + return; + } + + auto newVal = val.load(); + auto oldVal = newVal; + + do { + static constexpr auto MAX_VALUE = std::numeric_limits<T>::max(); + + if (oldVal >= MAX_VALUE) { + return; + } + newVal = std::min<i64>(MAX_VALUE, static_cast<i64>(oldVal) + inc); + } while (!val.compare_exchange_weak(oldVal, newVal)); + } + + template <typename T> + static void AtomicSubBound(std::atomic<T>& val, i64 sub) { + if (sub == 0) { + return; + } + + auto newVal = val.load(); + auto oldVal = newVal; + + do { + if (oldVal == 0) { + return; + } + newVal = std::max<i64>(0, static_cast<i64>(oldVal) - sub); + } while (!val.compare_exchange_weak(oldVal, newVal)); + } + + void UpdateCompleteTimeSlots(ui64 firstSlotNumber, ui64 lastSlotNumber, TimeSlotType timeSlotValue) { + ui32 firstSlotIndex = firstSlotNumber % TIME_SLOT_COUNT; + ui32 lastSlotIndex = lastSlotNumber % TIME_SLOT_COUNT; + + const ui64 firstTimeSlotsPass = firstSlotNumber / TIME_SLOT_COUNT; + const ui64 lastTimeSlotsPass = lastSlotNumber / TIME_SLOT_COUNT; + + if (firstTimeSlotsPass == lastTimeSlotsPass) { + // first and last time slots are in the same pass + for (auto slotNumber = firstSlotNumber + 1; slotNumber < lastSlotNumber; ++slotNumber) { + auto slotIndex = slotNumber % TIME_SLOT_COUNT; + TimeSlots[slotIndex] = timeSlotValue; + } + } else if (firstTimeSlotsPass + 1 == lastTimeSlotsPass) { + for (auto slotIndex = (firstSlotNumber + 1) % TIME_SLOT_COUNT; firstSlotIndex < slotIndex && slotIndex < TIME_SLOT_COUNT; ++slotIndex) { + TimeSlots[slotIndex] = timeSlotValue; + } + for (auto slotIndex = 0u; slotIndex < lastSlotIndex; ++slotIndex) { + TimeSlots[slotIndex] = timeSlotValue; + } + } else { + for (auto slotIndex = 0u; slotIndex < TIME_SLOT_COUNT; ++slotIndex) { + TimeSlots[slotIndex] = timeSlotValue; + } + } + } + +public: + std::atomic<ui64> LastTimeNs; + std::atomic<TimeSlotType> TimeSlots[TIME_SLOT_COUNT]; + std::atomic<bool> LastRegisteredPeriodIsBusy = false; + + explicit TThreadLoad(ui64 timeNs = 0) { + static_assert(std::is_unsigned<TimeSlotType>::value); + + LastTimeNs = timeNs; + for (size_t i = 0; i < TIME_SLOT_COUNT; ++i) { + TimeSlots[i] = 0; + } + } + + static constexpr auto GetTimeSlotCount() { + return TIME_SLOT_COUNT; + } + + static constexpr auto GetTimeSlotLengthNs() { + return TIME_SLOT_LENGTH_NS; + } + + static constexpr auto GetTimeSlotPartLengthNs() { + return TIME_SLOT_PART_LENGTH_NS; + } + + static constexpr auto GetTimeSlotPartCount() { + return TIME_SLOT_PART_COUNT; + } + + static constexpr auto GetTimeSlotMaxValue() { + return TIME_SLOT_MAX_VALUE; + } + + static constexpr auto GetTimeWindowLengthNs() { + return TIME_SLOT_COUNT * TIME_SLOT_LENGTH_NS; + } + + void RegisterBusyPeriod(ui64 timeNs) { + RegisterBusyPeriod<true>(timeNs, LastTimeNs.load()); + } + + template <bool ModifyLastTime> + void RegisterBusyPeriod(ui64 timeNs, ui64 lastTimeNs) { + LastRegisteredPeriodIsBusy = true; + + if (timeNs < lastTimeNs) { + // when time goes back, mark all time slots as 'free' + for (size_t i = 0u; i < TIME_SLOT_COUNT; ++i) { + TimeSlots[i] = 0; + } + + if (ModifyLastTime) { + LastTimeNs = timeNs; + } + + return; + } + + // lastTimeNs <= timeNs + ui64 firstSlotNumber = lastTimeNs / TIME_SLOT_LENGTH_NS; + ui32 firstSlotIndex = firstSlotNumber % TIME_SLOT_COUNT; + ui64 lastSlotNumber = timeNs / TIME_SLOT_LENGTH_NS; + ui32 lastSlotIndex = lastSlotNumber % TIME_SLOT_COUNT; + + if (firstSlotNumber == lastSlotNumber) { + ui32 slotLengthNs = timeNs - lastTimeNs; + ui32 slotPartsCount = (slotLengthNs + TIME_SLOT_PART_LENGTH_NS - 1) / TIME_SLOT_PART_LENGTH_NS; + AtomicAddBound(TimeSlots[firstSlotIndex], slotPartsCount); + + if (ModifyLastTime) { + LastTimeNs = timeNs; + } + return; + } + + ui32 firstSlotLengthNs = TIME_SLOT_LENGTH_NS - (lastTimeNs % TIME_SLOT_LENGTH_NS); + ui32 firstSlotPartsCount = (firstSlotLengthNs + TIME_SLOT_PART_LENGTH_NS - 1) / TIME_SLOT_PART_LENGTH_NS; + ui32 lastSlotLengthNs = timeNs % TIME_SLOT_LENGTH_NS; + ui32 lastSlotPartsCount = (lastSlotLengthNs + TIME_SLOT_PART_LENGTH_NS - 1) / TIME_SLOT_PART_LENGTH_NS; + + // process first time slot + AtomicAddBound(TimeSlots[firstSlotIndex], firstSlotPartsCount); + + // process complete time slots + UpdateCompleteTimeSlots(firstSlotNumber, lastSlotNumber, TIME_SLOT_MAX_VALUE); + + // process last time slot + AtomicAddBound(TimeSlots[lastSlotIndex], lastSlotPartsCount); + + if (ModifyLastTime) { + LastTimeNs = timeNs; + } + } + + void RegisterIdlePeriod(ui64 timeNs) { + LastRegisteredPeriodIsBusy = false; + + ui64 lastTimeNs = LastTimeNs.load(); + if (timeNs < lastTimeNs) { + // when time goes back, mark all time slots as 'busy' + for (size_t i = 0u; i < TIME_SLOT_COUNT; ++i) { + TimeSlots[i] = TIME_SLOT_MAX_VALUE; + } + LastTimeNs = timeNs; + return; + } + + // lastTimeNs <= timeNs + ui64 firstSlotNumber = lastTimeNs / TIME_SLOT_LENGTH_NS; + ui32 firstSlotIndex = firstSlotNumber % TIME_SLOT_COUNT; + ui64 lastSlotNumber = timeNs / TIME_SLOT_LENGTH_NS; + ui32 lastSlotIndex = lastSlotNumber % TIME_SLOT_COUNT; + + if (firstSlotNumber == lastSlotNumber) { + ui32 slotLengthNs = timeNs - lastTimeNs; + ui32 slotPartsCount = slotLengthNs / TIME_SLOT_PART_LENGTH_NS; + + AtomicSubBound(TimeSlots[firstSlotIndex], slotPartsCount); + + LastTimeNs = timeNs; + return; + } + + ui32 firstSlotLengthNs = TIME_SLOT_LENGTH_NS - (lastTimeNs % TIME_SLOT_LENGTH_NS); + ui32 firstSlotPartsCount = (firstSlotLengthNs + TIME_SLOT_PART_LENGTH_NS - 1) / TIME_SLOT_PART_LENGTH_NS; + ui32 lastSlotLengthNs = timeNs % TIME_SLOT_LENGTH_NS; + ui32 lastSlotPartsCount = (lastSlotLengthNs + TIME_SLOT_PART_LENGTH_NS - 1) / TIME_SLOT_PART_LENGTH_NS; + + // process first time slot + AtomicSubBound(TimeSlots[firstSlotIndex], firstSlotPartsCount); + + // process complete time slots + UpdateCompleteTimeSlots(firstSlotNumber, lastSlotNumber, 0); + + // process last time slot + AtomicSubBound(TimeSlots[lastSlotIndex], lastSlotPartsCount); + + LastTimeNs = timeNs; + } +}; + +class TMinusOneThreadEstimator { +private: + template <typename T, int MaxSize> + class TArrayQueue { + public: + bool empty() const { + return FrontIndex == -1; + } + + bool full() const { + return (RearIndex + 1) % MaxSize == FrontIndex; + } + + T& front() { + return Data[FrontIndex]; + } + + bool push(T &&t) { + if (full()) { + return false; + } + + if (FrontIndex == -1) { + FrontIndex = 0; + } + + RearIndex = (RearIndex + 1) % MaxSize; + Data[RearIndex] = std::move(t); + return true; + } + + bool pop() { + if (empty()) { + return false; + } + + if (FrontIndex == RearIndex) { + FrontIndex = RearIndex = -1; + } else { + FrontIndex = (FrontIndex + 1) % MaxSize; + } + + return true; + } + + private: + int FrontIndex = -1; + int RearIndex = -1; + T Data[MaxSize]; + }; + +public: + template <typename T> + ui64 MaxLatencyIncreaseWithOneLessCpu(T **threadLoads, ui32 threadCount, ui64 timeNs, ui64 periodNs) { + Y_VERIFY(threadCount > 0); + + struct TTimeSlotData { + typename T::TimeSlotType Load; + ui64 Index; + }; + + ui64 lastTimeNs = timeNs; + for (auto threadIndex = 0u; threadIndex < threadCount; ++threadIndex) { + if (threadLoads[threadIndex]->LastRegisteredPeriodIsBusy.load()) { + lastTimeNs = std::min(lastTimeNs, threadLoads[threadIndex]->LastTimeNs.load()); + } else { + // make interval [lastTimeNs, timeNs] 'busy' + threadLoads[threadIndex]->template RegisterBusyPeriod<false>(timeNs, threadLoads[threadIndex]->LastTimeNs.load()); + } + } + + periodNs = std::min(T::GetTimeWindowLengthNs(), periodNs); + + ui64 beginTimeNs = periodNs < timeNs ? timeNs - periodNs : 0; + + ui64 firstSlotNumber = beginTimeNs / T::GetTimeSlotLengthNs(); + ui64 lastSlotNumber = (lastTimeNs + T::GetTimeSlotLengthNs() - 1) / T::GetTimeSlotLengthNs(); + + ui64 maxTimeSlotShiftCount = 0u; + TArrayQueue<TTimeSlotData, T::GetTimeSlotCount()> firstThreadLoadDataQueue; + + for (auto slotNumber = firstSlotNumber; slotNumber < lastSlotNumber; ++slotNumber) { + ui64 slotIndex = slotNumber % T::GetTimeSlotCount(); + + typename T::TimeSlotType firstThreadTimeSlotValue = threadLoads[0]->TimeSlots[slotIndex].load(); + + // distribute previous load of the first thread by other threads + auto foundIdleThread = false; + + for (auto threadIndex = 1u; threadIndex < threadCount; ++threadIndex) { + typename T::TimeSlotType thisThreadAvailableTimeSlotLoad = threadLoads[threadIndex]->GetTimeSlotMaxValue() - threadLoads[threadIndex]->TimeSlots[slotIndex].load(); + + while (!firstThreadLoadDataQueue.empty() && thisThreadAvailableTimeSlotLoad > 0) { + auto& firstThreadLoadData = firstThreadLoadDataQueue.front(); + + auto distributedLoad = std::min(thisThreadAvailableTimeSlotLoad, firstThreadLoadData.Load); + + thisThreadAvailableTimeSlotLoad -= distributedLoad; + firstThreadLoadData.Load -= distributedLoad; + + if (firstThreadLoadData.Load == 0) { + auto timeSlotShiftCount = slotIndex - firstThreadLoadData.Index; + maxTimeSlotShiftCount = std::max(maxTimeSlotShiftCount, timeSlotShiftCount); + auto res = firstThreadLoadDataQueue.pop(); + Y_VERIFY(res); + } + } + + if (thisThreadAvailableTimeSlotLoad == threadLoads[threadIndex]->GetTimeSlotMaxValue()) { + foundIdleThread = true; + } + } + + // distribute current load of the first thread by other threads + if (firstThreadTimeSlotValue > 0) { + if (foundIdleThread) { + // The current load of the first thead can be + // moved to the idle thread so there is nothing to do + } else { + // The current load of the first thread can be later + // processed by the following time slots of other threads + auto res = firstThreadLoadDataQueue.push({firstThreadTimeSlotValue, slotIndex}); + Y_VERIFY(res); + } + } + } + + if (!firstThreadLoadDataQueue.empty()) { + const auto& timeSlotData = firstThreadLoadDataQueue.front(); + auto timeSlotShiftCount = T::GetTimeSlotCount() - timeSlotData.Index; + maxTimeSlotShiftCount = std::max(maxTimeSlotShiftCount, timeSlotShiftCount); + } + + return maxTimeSlotShiftCount * T::GetTimeSlotLengthNs(); + } +}; diff --git a/library/cpp/actors/util/thread_load_log_ut.cpp b/library/cpp/actors/util/thread_load_log_ut.cpp new file mode 100644 index 0000000000..20e776cff6 --- /dev/null +++ b/library/cpp/actors/util/thread_load_log_ut.cpp @@ -0,0 +1,966 @@ +#include "thread_load_log.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/random/random.h> +#include <util/system/hp_timer.h> +#include <util/system/thread.h> +#include <util/system/types.h> +#include <util/system/sanitizers.h> + +#include <limits> + +Y_UNIT_TEST_SUITE(ThreadLoadLog) { + + Y_UNIT_TEST(TThreadLoad8BitSlotType) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + + using TSlotType = std::uint8_t; + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, TSlotType>; + + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeWindowLengthNs(), timeWindowLengthNs); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotLengthNs(), timeSlotLengthNs); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotCount(), timeSlotCount); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotMaxValue(), std::numeric_limits<TSlotType>::max()); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotPartCount(), (ui64)std::numeric_limits<TSlotType>::max() + 1); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotPartLengthNs(), T::GetTimeSlotLengthNs() / T::GetTimeSlotPartCount()); + } + + Y_UNIT_TEST(TThreadLoad16BitSlotType) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + + using TSlotType = std::uint16_t; + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, TSlotType>; + + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeWindowLengthNs(), timeWindowLengthNs); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotLengthNs(), timeSlotLengthNs); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotCount(), timeSlotCount); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotMaxValue(), std::numeric_limits<TSlotType>::max()); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotPartCount(), (ui64)std::numeric_limits<TSlotType>::max() + 1); + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotPartLengthNs(), T::GetTimeSlotLengthNs() / T::GetTimeSlotPartCount()); + } + + Y_UNIT_TEST(TThreadLoad8BitSlotTypeWindowBusy) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + + using TSlotType = std::uint8_t; + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, TSlotType>; + + T threadLoad; + threadLoad.RegisterBusyPeriod(T::GetTimeWindowLengthNs()); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), T::GetTimeWindowLengthNs()); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), T::GetTimeSlotMaxValue()); + } + } + + Y_UNIT_TEST(TThreadLoad16BitSlotTypeWindowBusy) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + + using TSlotType = std::uint16_t; + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, TSlotType>; + + T threadLoad; + threadLoad.RegisterBusyPeriod(T::GetTimeWindowLengthNs()); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), T::GetTimeWindowLengthNs()); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), T::GetTimeSlotMaxValue()); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstTimeSlot1) { + TThreadLoad<38400> threadLoad; + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs() - 1; + threadLoad.RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstTimeSlot2) { + using T = TThreadLoad<38400>; + + ui32 startNs = 2 * T::GetTimeSlotPartLengthNs(); + T threadLoad(startNs); + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 3 * T::GetTimeSlotPartLengthNs() - 1; + threadLoad.RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstTimeSlot3) { + TThreadLoad<38400> threadLoad; + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstTimeSlot4) { + using T = TThreadLoad<38400>; + + ui32 startNs = 2 * T::GetTimeSlotPartLengthNs(); + T threadLoad(startNs); + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 3 * T::GetTimeSlotPartLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), (timeNs - startNs) / T::GetTimeSlotPartLengthNs()); + + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstTwoTimeSlots1) { + TThreadLoad<38400> threadLoad; + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 2 * threadLoad.GetTimeSlotLengthNs() - 1; + threadLoad.RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotMaxValue()); + + for (auto slotIndex = 2u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstTwoTimeSlots2) { + TThreadLoad<38400> threadLoad; + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 2 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + + for (auto slotIndex = 2u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstThreeTimeSlots1) { + TThreadLoad<38400> threadLoad; + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 3 * threadLoad.GetTimeSlotLengthNs() - 1; + threadLoad.RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[2].load(), threadLoad.GetTimeSlotMaxValue()); + + for (auto slotIndex = 3u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstThreeTimeSlots2) { + TThreadLoad<38400> threadLoad; + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 3 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[2].load(), threadLoad.GetTimeSlotMaxValue()); + + for (auto slotIndex = 3u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterBusyPeriodFirstThreeTimeSlots3) { + using T = TThreadLoad<38400>; + + ui32 startNs = 3 * T::GetTimeSlotPartLengthNs(); + T threadLoad(startNs); + + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 0; + threadLoad.RegisterBusyPeriod(timeNs); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstTimeSlot1) { + using T = TThreadLoad<38400>; + + ui64 timeNs = T::GetTimeSlotPartLengthNs(); + T threadLoad(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 2 * T::GetTimeSlotPartLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 3 * T::GetTimeSlotPartLengthNs(); + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 0); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 4 * T::GetTimeSlotPartLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstTimeSlot2) { + using T = TThreadLoad<38400>; + + ui64 timeNs = T::GetTimeSlotPartLengthNs(); + T threadLoad(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 2 * T::GetTimeSlotPartLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 3 * T::GetTimeSlotPartLengthNs() - 1; + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 4 * T::GetTimeSlotPartLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 3); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstTimeSlot3) { + using T = TThreadLoad<38400>; + + ui64 timeNs = T::GetTimeSlotPartLengthNs(); + T threadLoad(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 2 * T::GetTimeSlotPartLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 3 * T::GetTimeSlotPartLengthNs() - 1; + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 4 * T::GetTimeSlotPartLengthNs() - 2; + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 5 * T::GetTimeSlotPartLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 3); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstTwoTimeSlots1) { + using T = TThreadLoad<38400>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 2 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 0); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 2u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstTwoTimeSlots2) { + using T = TThreadLoad<38400>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs() - 1; + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 2 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 2u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstTwoTimeSlots3) { + using T = TThreadLoad<38400>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs() - 1; + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = 2 * threadLoad.GetTimeSlotLengthNs() - 1; + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 1); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 2u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstThreeTimeSlots1) { + using T = TThreadLoad<38400>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + timeNs = 2 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterIdlePeriod(timeNs); + + timeNs = 3 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), 0); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[2].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 3u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstThreeTimeSlots2) { + using T = TThreadLoad<38400>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + timeNs = 3 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 1u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstThreeTimeSlots3) { + using T = TThreadLoad<38400>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterIdlePeriod(timeNs); + + timeNs = 3 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 0); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[2].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 3u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstThreeTimeSlots4) { + using T = TThreadLoad<38400>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = threadLoad.GetTimeSlotLengthNs() + 2 * threadLoad.GetTimeSlotPartLengthNs(); + threadLoad.RegisterIdlePeriod(timeNs); + + timeNs = 3 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 0); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotPartCount() - 2); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[2].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 3u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodFirstThreeTimeSlots5) { + using T = TThreadLoad<38400>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 2 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 2u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = timeNs + threadLoad.GetTimeWindowLengthNs() + threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(TThreadLoadRegisterIdlePeriodOverTimeWindow) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, std::uint8_t>; + + T threadLoad; + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), 0); + for (auto slotIndex = 0u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + ui64 timeNs = 5 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[2].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[3].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[4].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 5u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + + timeNs = timeNs + threadLoad.GetTimeWindowLengthNs() - 3 * threadLoad.GetTimeSlotLengthNs(); + threadLoad.RegisterIdlePeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoad.LastTimeNs.load(), timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[0].load(), 0); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[1].load(), 0); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[2].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[3].load(), threadLoad.GetTimeSlotMaxValue()); + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[4].load(), threadLoad.GetTimeSlotMaxValue()); + for (auto slotIndex = 5u; slotIndex < threadLoad.GetTimeSlotCount(); ++slotIndex) { + UNIT_ASSERT_VALUES_EQUAL(threadLoad.TimeSlots[slotIndex].load(), 0); + } + } + + Y_UNIT_TEST(MinusOneThreadEstimatorTwoThreadLoadsZeroShiftNs) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, std::uint16_t>; + + UNIT_ASSERT_VALUES_EQUAL(T::GetTimeSlotPartCount(), (ui64)std::numeric_limits<std::uint16_t>::max() + 1); + + T *threadLoads[2]; + threadLoads[0] = new T; + threadLoads[1] = new T; + + for (ui64 i = 1; i < timeSlotCount; i += 2) { + threadLoads[0]->RegisterIdlePeriod(i * T::GetTimeSlotLengthNs()); + threadLoads[0]->RegisterBusyPeriod((i + 1) * T::GetTimeSlotLengthNs()); + } + + for (ui64 i = 1; i < timeSlotCount; i += 2) { + threadLoads[1]->RegisterBusyPeriod(i * T::GetTimeSlotLengthNs()); + threadLoads[1]->RegisterIdlePeriod((i + 1) * T::GetTimeSlotLengthNs()); + } + + TMinusOneThreadEstimator estimator; + ui64 value = estimator.MaxLatencyIncreaseWithOneLessCpu(threadLoads, 2, T::GetTimeWindowLengthNs(), T::GetTimeWindowLengthNs()); + UNIT_ASSERT_VALUES_EQUAL(value, 0); + + delete threadLoads[0]; + delete threadLoads[1]; + } + + Y_UNIT_TEST(MinusOneThreadEstimatorTwoThreadLoadsOneTimeSlotShift1) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + constexpr auto threadCount = 2; + + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, std::uint16_t>; + + T *threadLoads[threadCount]; + + for (auto t = 0u; t < threadCount; ++t) { + threadLoads[t] = new T; + + for (ui64 i = 2; i < threadLoads[t]->GetTimeSlotCount(); i += 2) { + threadLoads[t]->RegisterIdlePeriod((i - 1) * T::GetTimeSlotLengthNs()); + threadLoads[t]->RegisterBusyPeriod(i * T::GetTimeSlotLengthNs()); + } + + threadLoads[t]->RegisterIdlePeriod((threadLoads[t]->GetTimeSlotCount() - 1) * T::GetTimeSlotLengthNs()); + threadLoads[t]->RegisterBusyPeriod(threadLoads[t]->GetTimeSlotCount() * T::GetTimeSlotLengthNs()); + + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + if (s % 2 == 1) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + } + } + + TMinusOneThreadEstimator estimator; + auto result = estimator.MaxLatencyIncreaseWithOneLessCpu(threadLoads, threadCount, T::GetTimeWindowLengthNs(), T::GetTimeWindowLengthNs()); + + for (ui64 t = 0; t < threadCount; ++t) { + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + if (s % 2 == 1) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + } + } + + UNIT_ASSERT_VALUES_EQUAL(result, T::GetTimeSlotLengthNs()); + + for (auto t = 0u; t < threadCount; ++t) { + delete threadLoads[t]; + } + } + + Y_UNIT_TEST(MinusOneThreadEstimatorTwoThreadLoadsOneTimeSlotShift2) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + constexpr auto threadCount = 2; + + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, std::uint16_t>; + + T *threadLoads[threadCount]; + + for (auto t = 0u; t < threadCount; ++t) { + threadLoads[t] = new T; + + for (ui64 i = 2; i < threadLoads[t]->GetTimeSlotCount(); i += 2) { + threadLoads[t]->RegisterBusyPeriod((i - 1) * T::GetTimeSlotLengthNs()); + threadLoads[t]->RegisterIdlePeriod(i * T::GetTimeSlotLengthNs()); + } + + threadLoads[t]->RegisterBusyPeriod((threadLoads[t]->GetTimeSlotCount() - 1) * T::GetTimeSlotLengthNs()); + threadLoads[t]->RegisterIdlePeriod(threadLoads[t]->GetTimeSlotCount() * T::GetTimeSlotLengthNs()); + + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + if (s % 2 == 0) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + } + } + + TMinusOneThreadEstimator estimator; + auto result = estimator.MaxLatencyIncreaseWithOneLessCpu(threadLoads, threadCount, T::GetTimeWindowLengthNs(), T::GetTimeWindowLengthNs()); + + for (ui64 t = 0; t < threadCount; ++t) { + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + if (s % 2 == 0) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + } + } + + UNIT_ASSERT_VALUES_EQUAL(result, T::GetTimeSlotLengthNs()); + + for (auto t = 0u; t < threadCount; ++t) { + delete threadLoads[t]; + } + } + + Y_UNIT_TEST(MinusOneThreadEstimatorTwoThreadLoadsTwoTimeSlotsShift1) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + constexpr auto threadCount = 2; + + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, std::uint16_t>; + + T *threadLoads[threadCount]; + + for (auto t = 0u; t < threadCount; ++t) { + threadLoads[t] = new T; + + for (ui64 i = 4; i < threadLoads[t]->GetTimeSlotCount(); i += 4) { + threadLoads[t]->RegisterIdlePeriod((i - 2) * T::GetTimeSlotLengthNs()); + threadLoads[t]->RegisterBusyPeriod(i * T::GetTimeSlotLengthNs()); + } + + threadLoads[t]->RegisterIdlePeriod((threadLoads[t]->GetTimeSlotCount() - 2) * T::GetTimeSlotLengthNs()); + threadLoads[t]->RegisterBusyPeriod(threadLoads[t]->GetTimeSlotCount() * T::GetTimeSlotLengthNs()); + + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + if (s % 4 == 2 || s % 4 == 3) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + } + } + + TMinusOneThreadEstimator estimator; + auto result = estimator.MaxLatencyIncreaseWithOneLessCpu(threadLoads, threadCount, T::GetTimeWindowLengthNs(), T::GetTimeWindowLengthNs()); + + for (ui64 t = 0; t < threadCount; ++t) { + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + if (s % 4 == 2 || s % 4 == 3) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } else { + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->TimeSlots[s].load(), 0); + } + } + } + + UNIT_ASSERT_VALUES_EQUAL(result, 2 * T::GetTimeSlotLengthNs()); + + for (auto t = 0u; t < threadCount; ++t) { + delete threadLoads[t]; + } + } + + Y_UNIT_TEST(MinusOneThreadEstimatorTwoThreadLoadsTwoTimeSlotsShift2) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + constexpr auto threadCount = 2; + + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, std::uint16_t>; + + T *threadLoads[threadCount]; + + for (auto t = 0u; t < threadCount; ++t) { + threadLoads[t] = new T; + + for (ui64 i = 4; i < threadLoads[t]->GetTimeSlotCount(); i += 4) { + threadLoads[t]->RegisterBusyPeriod((i - 2) * T::GetTimeSlotLengthNs()); + threadLoads[t]->RegisterIdlePeriod(i * T::GetTimeSlotLengthNs()); + } + + threadLoads[t]->RegisterBusyPeriod((threadLoads[t]->GetTimeSlotCount() - 2) * T::GetTimeSlotLengthNs()); + threadLoads[t]->RegisterIdlePeriod(threadLoads[t]->GetTimeSlotCount() * T::GetTimeSlotLengthNs()); + + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + if (s % 4 == 0 || s % 4 == 1) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + } + } + + TMinusOneThreadEstimator estimator; + auto result = estimator.MaxLatencyIncreaseWithOneLessCpu(threadLoads, threadCount, T::GetTimeWindowLengthNs(), T::GetTimeWindowLengthNs()); + + for (ui64 t = 0; t < threadCount; ++t) { + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + if (s % 4 == 0 || s % 4 == 1) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + } + } + + UNIT_ASSERT_VALUES_EQUAL(result, 2 * T::GetTimeSlotLengthNs()); + + for (auto t = 0u; t < threadCount; ++t) { + delete threadLoads[t]; + } + } + + Y_UNIT_TEST(MinusOneThreadEstimatorTwoThreadLoadsTwoTimeSlotsShift3) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + constexpr auto threadCount = 2; + + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, std::uint16_t>; + + T *threadLoads[threadCount]; + + for (auto t = 0u; t < threadCount; ++t) { + threadLoads[t] = new T; + + auto timeNs = T::GetTimeWindowLengthNs() - 1.5 * T::GetTimeSlotLengthNs(); + threadLoads[t]->RegisterIdlePeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->LastTimeNs.load(), timeNs); + + timeNs = T::GetTimeWindowLengthNs(); + threadLoads[t]->RegisterBusyPeriod(timeNs); + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->LastTimeNs.load(), timeNs); + + for (ui64 s = 0; s + 2 < threadLoads[t]->GetTimeSlotCount(); ++s) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->TimeSlots[timeSlotCount - 2].load(), T::GetTimeSlotPartCount() / 2); + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->TimeSlots[timeSlotCount - 1].load(), T::GetTimeSlotMaxValue()); + } + + TMinusOneThreadEstimator estimator; + auto result = estimator.MaxLatencyIncreaseWithOneLessCpu(threadLoads, threadCount, T::GetTimeWindowLengthNs(), T::GetTimeWindowLengthNs()); + + for (auto t = 0u; t < threadCount; ++t) { + for (ui64 s = 0; s + 2 < threadLoads[t]->GetTimeSlotCount(); ++s) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), 0, ToString(s).c_str()); + } + + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->TimeSlots[timeSlotCount - 2].load(), T::GetTimeSlotPartCount() / 2); + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->TimeSlots[timeSlotCount - 1].load(), T::GetTimeSlotMaxValue()); + } + + UNIT_ASSERT_VALUES_EQUAL(result, 2 * T::GetTimeSlotLengthNs()); + + for (auto t = 0u; t < threadCount; ++t) { + delete threadLoads[t]; + } + } + + Y_UNIT_TEST(MinusOneThreadEstimator16ThreadLoadsAllTimeSlots) { + constexpr auto timeWindowLengthNs = 5368709120ull; // 5 * 2 ^ 30 ~5 sec + constexpr auto timeSlotLengthNs = 524288ull; // 2 ^ 19 ns ~ 512 usec + constexpr auto timeSlotCount = timeWindowLengthNs / timeSlotLengthNs; + constexpr auto threadCount = 16; + constexpr auto estimatesCount = 16; + + using T = TThreadLoad<timeSlotCount, timeSlotLengthNs, std::uint16_t>; + + for (auto e = 0u; e < estimatesCount; ++e) { + T *threadLoads[threadCount]; + + for (auto t = 0u; t < threadCount; ++t) { + threadLoads[t] = new T; + auto timeNs = threadLoads[t]->GetTimeWindowLengthNs(); + threadLoads[t]->RegisterBusyPeriod(timeNs); + + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->LastTimeNs.load(), timeNs); + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } + } + + ui64 result = 0; + { + THPTimer timer; + TMinusOneThreadEstimator estimator; + result = estimator.MaxLatencyIncreaseWithOneLessCpu(threadLoads, threadCount, T::GetTimeWindowLengthNs(), T::GetTimeWindowLengthNs()); + // output in microseconds + auto passed = timer.Passed() * 1000000; + Y_UNUSED(passed); + // Cerr << "timer : " << passed << " " << __LINE__ << Endl; + } + + for (ui64 t = 0; t < threadCount; ++t) { + UNIT_ASSERT_VALUES_EQUAL(threadLoads[t]->LastTimeNs.load(), T::GetTimeWindowLengthNs()); + for (ui64 s = 0; s < threadLoads[t]->GetTimeSlotCount(); ++s) { + UNIT_ASSERT_VALUES_EQUAL_C(threadLoads[t]->TimeSlots[s].load(), T::GetTimeSlotMaxValue(), ToString(s).c_str()); + } + } + + UNIT_ASSERT_VALUES_EQUAL(result, T::GetTimeWindowLengthNs()); + + for (auto t = 0u; t < threadCount; ++t) { + delete threadLoads[t]; + } + } + } +} diff --git a/library/cpp/mime/types/mime.cpp b/library/cpp/mime/types/mime.cpp index e4cbcc86eb..74eeabea48 100644 --- a/library/cpp/mime/types/mime.cpp +++ b/library/cpp/mime/types/mime.cpp @@ -250,5 +250,6 @@ const char* MimeNames[MIME_MAX] = { "woff", // MIME_WOFF // 43 "woff2", // MIME_WOFF2 // 44 "ttf", // MIME_TTF // 45 - "webmanifest" // MIME_WEBMANIFEST // 46 + "webmanifest", // MIME_WEBMANIFEST // 46 + "cbor", // MIME_CBOR // 47 }; diff --git a/library/cpp/string_utils/CMakeLists.txt b/library/cpp/string_utils/CMakeLists.txt index d256782733..bbdcba85d9 100644 --- a/library/cpp/string_utils/CMakeLists.txt +++ b/library/cpp/string_utils/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(base64) +add_subdirectory(csv) add_subdirectory(indent_text) add_subdirectory(levenshtein_diff) add_subdirectory(parse_size) diff --git a/library/cpp/string_utils/csv/CMakeLists.txt b/library/cpp/string_utils/csv/CMakeLists.txt new file mode 100644 index 0000000000..7dffad3566 --- /dev/null +++ b/library/cpp/string_utils/csv/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-string_utils-csv) +target_link_libraries(cpp-string_utils-csv PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-string_utils-csv PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/string_utils/csv/csv.cpp +) diff --git a/library/cpp/string_utils/csv/csv.cpp b/library/cpp/string_utils/csv/csv.cpp new file mode 100644 index 0000000000..218473c62c --- /dev/null +++ b/library/cpp/string_utils/csv/csv.cpp @@ -0,0 +1,82 @@ +#include "csv.h" + +TStringBuf NCsvFormat::CsvSplitter::Consume() { + if (Begin == End) { + return nullptr; + } + TString::iterator TokenStart = Begin; + TString::iterator TokenEnd = Begin; + if (Quote == '\0') { + while (1) { + if (TokenEnd == End || *TokenEnd == Delimeter) { + Begin = TokenEnd; + return TStringBuf(TokenStart, TokenEnd); + } + ++TokenEnd; + } + } else { + bool Escape = false; + if (*Begin == Quote) { + Escape = true; + ++TokenStart; + ++TokenEnd; + Y_ENSURE(TokenStart != End, TStringBuf("RFC4180 violation: quotation mark must be followed by something")); + } + while (1) { + if (TokenEnd == End || (!Escape && *TokenEnd == Delimeter)) { + Begin = TokenEnd; + return TStringBuf(TokenStart, TokenEnd); + } else if (*TokenEnd == Quote) { + Y_ENSURE(Escape, TStringBuf("RFC4180 violation: quotation mark must be in the escaped string only")); + if (TokenEnd + 1 == End) { + Begin = TokenEnd + 1; + } else if (*(TokenEnd + 1) == Delimeter) { + Begin = TokenEnd + 1; + } else if (*(TokenEnd + 1) == Quote) { + CustomStringBufs.push_back(TStringBuf(TokenStart, (TokenEnd + 1))); + TokenEnd += 2; + TokenStart = TokenEnd; + continue; + } else { + Y_ENSURE(false, TStringBuf("RFC4180 violation: in escaped string quotation mark must be followed by a delimiter, EOL or another quotation mark")); + } + if (CustomStringBufs.size()) { + CustomString.clear(); + for (auto CustomStringBuf : CustomStringBufs) { + CustomString += TString{ CustomStringBuf }; + } + CustomString += TString{ TStringBuf(TokenStart, TokenEnd) }; + CustomStringBufs.clear(); + return TStringBuf(CustomString); + } else { + return TStringBuf(TokenStart, TokenEnd); + } + } + ++TokenEnd; + } + } +}; + +TString NCsvFormat::TLinesSplitter::ConsumeLine() { + bool Escape = false; + TString result; + TString line; + while (Input.ReadLine(line)) { + for (auto it = line.begin(); it != line.end(); ++it) { + if (*it == Quote) { + Escape = !Escape; + } + } + if (!result) { + result = line; + } else { + result += line; + } + if (!Escape) { + break; + } else { + result += "\n"; + } + } + return result; +}; diff --git a/library/cpp/string_utils/csv/csv.h b/library/cpp/string_utils/csv/csv.h new file mode 100644 index 0000000000..8cb96e6bb9 --- /dev/null +++ b/library/cpp/string_utils/csv/csv.h @@ -0,0 +1,64 @@ +#pragma once + +#include <util/generic/yexception.h> +#include <util/generic/strbuf.h> +#include <util/generic/vector.h> +#include <util/stream/input.h> + +/* + Split string by rfc4180 +*/ + +namespace NCsvFormat { + class TLinesSplitter { + private: + IInputStream& Input; + const char Quote; + public: + TLinesSplitter(IInputStream& input, const char quote = '"') + : Input(input) + , Quote(quote) { + } + TString ConsumeLine(); + }; + + class CsvSplitter { + public: + CsvSplitter(TString& data, const char delimeter = ',', const char quote = '"') + // quote = '\0' ignores quoting in values and words like simple split + : Delimeter(delimeter) + , Quote(quote) + , Begin(data.begin()) + , End(data.end()) + { + } + + bool Step() { + if (Begin == End) { + return false; + } + ++Begin; + return true; + } + + TStringBuf Consume(); + explicit operator TVector<TString>() { + TVector<TString> ret; + + do { + TStringBuf buf = Consume(); + ret.push_back(TString{buf}); + } while (Step()); + + return ret; + } + + private: + const char Delimeter; + const char Quote; + TString::iterator Begin; + const TString::const_iterator End; + TString CustomString; + TVector<TStringBuf> CustomStringBufs; + }; +} diff --git a/ydb/core/base/feature_flags.h b/ydb/core/base/feature_flags.h index 75b7158fa7..2909d33625 100644 --- a/ydb/core/base/feature_flags.h +++ b/ydb/core/base/feature_flags.h @@ -40,6 +40,10 @@ public: SetEnableOlapSchemaOperations(value); } + inline void SetEnableBorrowedSplitCompactionForTest(bool value) { + SetEnableBorrowedSplitCompaction(value); + } + inline void SetEnableMvccForTest(bool value) { SetEnableMvcc(value ? NKikimrConfig::TFeatureFlags::VALUE_TRUE diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h index 5990a2ee87..394f79ff15 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h @@ -220,10 +220,9 @@ struct TPDiskConfig : public TThrRefBase { } bool CheckSerial(const TString& deviceSerial) const { - if (ExpectedSerial || - SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::CHECK_SERIAL || + if (SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::CHECK_SERIAL || SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::ONLY_SERIAL) { - if (ExpectedSerial != deviceSerial) { + if (ExpectedSerial && ExpectedSerial != deviceSerial) { return false; } } diff --git a/ydb/core/blobstorage/vdisk/repl/query_donor.h b/ydb/core/blobstorage/vdisk/repl/query_donor.h index aa8ead2005..2c2fa170f1 100644 --- a/ydb/core/blobstorage/vdisk/repl/query_donor.h +++ b/ydb/core/blobstorage/vdisk/repl/query_donor.h @@ -11,6 +11,7 @@ namespace NKikimr { std::unique_ptr<TEvBlobStorage::TEvVGetResult> Result; TActorId ParentId; std::deque<std::pair<TVDiskID, TActorId>> Donors; + TDynBitMap UnresolvedItems; public: TDonorQueryActor(TEvBlobStorage::TEvEnrichNotYet& msg, std::deque<std::pair<TVDiskID, TActorId>> donors) @@ -27,6 +28,13 @@ namespace NKikimr { ParentId = parentId; Become(&TThis::StateFunc); LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::BS_VDISK_GET, SelfId() << " starting Donor-mode query"); + + const auto& result = Result->Record; + UnresolvedItems.Reserve(result.ResultSize()); + for (size_t i = 0; i < result.ResultSize(); ++i) { + UnresolvedItems[i] = result.GetResult(i).GetStatus() == NKikimrProto::NOT_YET; + } + Step(); } @@ -50,14 +58,11 @@ namespace NKikimr { auto query = fun(vdiskId, TInstant::Max(), NKikimrBlobStorage::EGetHandleClass::AsyncRead, flags, {}, {}, 0); bool action = false; - - const auto& result = Result->Record; - for (ui64 i = 0; i < result.ResultSize(); ++i) { - const auto& r = result.GetResult(i); - if (r.GetStatus() == NKikimrProto::NOT_YET) { - query->AddExtremeQuery(LogoBlobIDFromLogoBlobID(r.GetBlobID()), r.GetShift(), r.GetSize(), &i); - action = true; - } + Y_FOR_EACH_BIT(i, UnresolvedItems) { + const auto& r = Result->Record.GetResult(i); + const ui64 cookie = i; + query->AddExtremeQuery(LogoBlobIDFromLogoBlobID(r.GetBlobID()), r.GetShift(), r.GetSize(), &cookie); + action = true; } if (action) { @@ -74,8 +79,12 @@ namespace NKikimr { LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::BS_VDISK_GET, SelfId() << " received " << ev->Get()->ToString()); auto& result = Result->Record; for (const auto& item : ev->Get()->Record.GetResult()) { - auto *res = result.MutableResult(item.GetCookie()); - if (item.GetStatus() == NKikimrProto::OK || (item.GetStatus() == NKikimrProto::ERROR && res->GetStatus() == NKikimrProto::NOT_YET)) { + const ui64 index = item.GetCookie(); + Y_VERIFY_DEBUG(UnresolvedItems[index]); + + if (item.GetStatus() == NKikimrProto::OK /* || item.GetStatus() == NKikimrProto::ERROR */) { + auto *res = result.MutableResult(index); + std::optional<ui64> cookie = res->HasCookie() ? std::make_optional(res->GetCookie()) : std::nullopt; res->CopyFrom(item); if (cookie) { // retain original cookie @@ -83,6 +92,10 @@ namespace NKikimr { } else { res->ClearCookie(); } + + if (res->GetStatus() == NKikimrProto::OK) { + UnresolvedItems[index] = false; + } } } Step(); diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp index 3d52439103..ff571e9536 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp @@ -58,16 +58,13 @@ namespace NKikimr::NPrivate { struct TXorDiffs { TVector<TDiff> Diffs; ui8 PartId; - std::unique_ptr<TEvBlobStorage::TEvVPatchXorDiffResult> ResultEvent; TActorId Sender; ui64 Cookie; - TXorDiffs(TVector<TDiff> &&diffs, ui8 partId, std::unique_ptr<TEvBlobStorage::TEvVPatchXorDiffResult> &&result, - const TActorId &sender, ui64 cookie) + TXorDiffs(TVector<TDiff> &&diffs, ui8 partId, const TActorId &sender, ui64 cookie) : Diffs(std::move(diffs)) , PartId(partId) - , ResultEvent(std::move(result)) , Sender(sender) , Cookie(cookie) { @@ -157,7 +154,7 @@ namespace NKikimr::NPrivate { void Bootstrap() { STLOG(PRI_INFO, BS_VDISK_PATCH, BSVSP03, - VDiskLogPrefix << " TEvVPatch: bootsrapped;", + VDiskLogPrefix << " TEvVPatch: bootstrapped;", (OriginalBlobId, OriginalBlobId), (Deadline, Deadline)); ui32 cookie = 0; @@ -167,9 +164,18 @@ namespace NKikimr::NPrivate { TLogoBlobID::MaxPartId, nullptr, false); Send(LeaderId, msg.release()); + TInstant now = TActivationContext::Now(); + if (Deadline != TInstant::Zero() && Deadline < now) { + ErrorReason = "DEADLINE"; + SendVPatchFoundParts(NKikimrProto::ERROR); + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); + return; + } + Become(&TThis::StartState); - TDuration liveDuration = Deadline - TActivationContext::Now(); + TDuration liveDuration = Deadline - now; if (!Deadline || liveDuration > CommonLiveTime) { liveDuration = CommonLiveTime; } @@ -183,6 +189,7 @@ namespace NKikimr::NPrivate { (OriginalBlobId, OriginalBlobId), (FoundParts, FormatList(FoundOriginalParts)), (Status, status)); + FoundPartsEvent->Record.SetErrorReason(ErrorReason); for (ui8 part : FoundOriginalParts) { FoundPartsEvent->AddPart(part); } @@ -218,14 +225,16 @@ namespace NKikimr::NPrivate { ErrorReason = TStringBuilder() << "Recieve not OK status from VGetRange," << " received status# " << NKikimrProto::EReplyStatus_Name(record.GetStatus()); SendVPatchFoundParts(NKikimrProto::ERROR); - NotifySkeletonAndDie(); + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); return; } if (record.ResultSize() != 1) { ErrorReason = TStringBuilder() << "Expected only one result, but given " << record.ResultSize() << " received status# " << NKikimrProto::EReplyStatus_Name(record.GetStatus()); SendVPatchFoundParts(NKikimrProto::ERROR); - NotifySkeletonAndDie(); + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); return; } @@ -241,7 +250,9 @@ namespace NKikimr::NPrivate { SendVPatchFoundParts(NKikimrProto::OK); if (FoundOriginalParts.empty()) { - NotifySkeletonAndDie(); + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); + return; } } @@ -304,9 +315,11 @@ namespace NKikimr::NPrivate { (PatchedBlobId, PatchedBlobId), (OriginalPartId, (ui32)OriginalPartId), (PatchedPartId, (ui32)PatchedPartId), + (DataParts, (ui32)GType.DataParts()), (ReceivedBlobId, blobId), (Status, record.GetStatus()), - (ResultSize, record.ResultSize())); + (ResultSize, record.ResultSize()), + (ParityPart, (blobId.PartId() <= GType.DataParts() ? "no" : "yes"))); ui8 *buffer = reinterpret_cast<ui8*>(const_cast<char*>(Buffer.data())); if (blobId.PartId() <= GType.DataParts()) { @@ -320,9 +333,8 @@ namespace NKikimr::NPrivate { ui32 dataSize = blobId.BlobSize(); for (ui32 idx = ReceivedXorDiffs.size(); idx != 0; --idx) { - auto &[diffs, partId, result, sender, cookie] = ReceivedXorDiffs.back(); + auto &[diffs, partId, sender, cookie] = ReceivedXorDiffs.back(); GType.ApplyXorDiff(TErasureType::CrcModeNone, dataSize, buffer, diffs, partId - 1, toPart - 1); - SendVDiskResponse(TActivationContext::AsActorContext(), sender, result.release(), cookie); ReceivedXorDiffs.pop_back(); } @@ -418,7 +430,9 @@ namespace NKikimr::NPrivate { (OriginalBlobId, OriginalBlobId), (PatchedBlobId, PatchedBlobId), (OriginalPartId, (ui32)OriginalPartId), - (PatchedPartId, (ui32)PatchedPartId)); + (PatchedPartId, (ui32)PatchedPartId), + (ReceivedXorDiffs, ReceivedXorDiffCount), + (ExpectedXorDiffs, WaitedXorDiffCount)); ui64 cookie = OriginalBlobId.Hash(); std::unique_ptr<IEventBase> put = std::make_unique<TEvBlobStorage::TEvVPut>(TLogoBlobID(PatchedBlobId, PatchedPartId), Buffer, VDiskId, false, &cookie, Deadline, NKikimrBlobStorage::AsyncBlob); @@ -438,6 +452,18 @@ namespace NKikimr::NPrivate { SendVPatchResult(NKikimrProto::ERROR); } + void HandleForceEnd(TEvBlobStorage::TEvVPatchDiff::TPtr &ev) { + bool forceEnd = ev->Get()->IsForceEnd(); + SendVPatchFoundParts(NKikimrProto::ERROR); + if (forceEnd) { + SendVPatchResult(NKikimrProto::OK); + } else { + SendVPatchResult(NKikimrProto::ERROR); + } + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); + } + void Handle(TEvBlobStorage::TEvVPatchDiff::TPtr &ev) { NKikimrBlobStorage::TEvVPatchDiff &record = ev->Get()->Record; Y_VERIFY(record.HasCookie()); @@ -465,6 +491,7 @@ namespace NKikimr::NPrivate { (OriginalPartId, (ui32)OriginalPartId), (PatchedPartId, (ui32)PatchedPartId), (XorReceiver, (isXorReceiver ? "yes" : "no")), + (ParityPart, (PatchedPartId <= GType.DataParts() ? "no" : "yes")), (ForceEnd, (forceEnd ? "yes" : "no"))); Y_VERIFY(!ResultEvent); @@ -479,7 +506,8 @@ namespace NKikimr::NPrivate { if (forceEnd) { SendVPatchResult(NKikimrProto::OK); - NotifySkeletonAndDie(); + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); return; } @@ -525,7 +553,8 @@ namespace NKikimr::NPrivate { ResultEvent->SetStatusFlagsAndFreeSpace(record.GetStatusFlags(), record.GetApproximateFreeSpaceShare()); SendVPatchResult(status); - NotifySkeletonAndDie(); + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); } void HandleError(TEvBlobStorage::TEvVPatchXorDiff::TPtr &ev) { @@ -557,17 +586,13 @@ namespace NKikimr::NPrivate { TInstant now = TActivationContext::Now(); std::unique_ptr<TEvBlobStorage::TEvVPatchXorDiffResult> resultEvent = std::make_unique<TEvBlobStorage::TEvVPatchXorDiffResult>( NKikimrProto::OK, now, &record, SkeletonFrontIDPtr, VPatchResMsgsPtr, nullptr, std::move(ev->TraceId)); + SendVDiskResponse(TActivationContext::AsActorContext(), ev->Sender, resultEvent.release(), ev->Cookie); if (!CheckDiff(xorDiffs, "XorDiff from datapart")) { - for (auto &[diffs, partId, result, sender, cookie] : ReceivedXorDiffs) { - SendVDiskResponse(TActivationContext::AsActorContext(), sender, result.release(), cookie); - } - SendVDiskResponse(TActivationContext::AsActorContext(), ev->Sender, resultEvent.release(), ev->Cookie); - if (ResultEvent) { SendVPatchResult(NKikimrProto::ERROR); - NotifySkeletonAboutDying(); } + NotifySkeletonAboutDying(); Become(&TThis::ErrorState); return; } @@ -575,6 +600,7 @@ namespace NKikimr::NPrivate { if (Buffer) { ui8 *buffer = reinterpret_cast<ui8*>(const_cast<char*>(Buffer.data())); ui32 dataSize = OriginalBlobId.BlobSize(); + GType.ApplyXorDiff(TErasureType::CrcModeNone, dataSize, buffer, xorDiffs, fromPart - 1, toPart - 1); if (ReceivedXorDiffCount == WaitedXorDiffCount) { @@ -582,19 +608,14 @@ namespace NKikimr::NPrivate { } xorDiffs.clear(); - SendVDiskResponse(TActivationContext::AsActorContext(), ev->Sender, resultEvent.release(), ev->Cookie); } else { - ReceivedXorDiffs.emplace_back(std::move(xorDiffs), fromPart, std::move(resultEvent), + ReceivedXorDiffs.emplace_back(std::move(xorDiffs), fromPart, ev->Sender, ev->Cookie); } } - void NotifySkeletonAndDie() { - NotifySkeletonAboutDying(); - PassAway(); - } - void NotifySkeletonAboutDying() { + STLOG(PRI_DEBUG, BS_VDISK_PATCH, BSVSP17, VDiskLogPrefix << " NotifySkeletonAboutDying;"); Send(LeaderId, new TEvVPatchDyingRequest(PatchedBlobId)); } @@ -602,7 +623,8 @@ namespace NKikimr::NPrivate { ErrorReason = "TEvVPatch: the vpatch actor died due to a deadline, before receiving diff"; STLOG(PRI_ERROR, BS_VDISK_PATCH, BSVSP11, VDiskLogPrefix << " " << ErrorReason << ";"); SendVPatchFoundParts(NKikimrProto::ERROR); - NotifySkeletonAndDie(); + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); } void HandleInWaitState(TKikimrEvents::TEvWakeup::TPtr &/*ev*/) { @@ -616,12 +638,13 @@ namespace NKikimr::NPrivate { ErrorReason = "TEvVPatch: the vpatch actor died due to a deadline, after receiving diff"; STLOG(PRI_ERROR, BS_VDISK_PATCH, BSVSP12, VDiskLogPrefix << " " << ErrorReason << ";"); SendVPatchResult(NKikimrProto::ERROR); - NotifySkeletonAndDie(); + NotifySkeletonAboutDying(); + Become(&TThis::ErrorState); } void HandleInParityStates(TKikimrEvents::TEvWakeup::TPtr &/*ev*/) { ErrorReason = "TEvVPatch: the vpatch actor died due to a deadline, after receiving diff"; - STLOG(PRI_ERROR, BS_VDISK_PATCH, BSVSP12, VDiskLogPrefix << " " << ErrorReason << ";"); + STLOG(PRI_ERROR, BS_VDISK_PATCH, BSVSP20, VDiskLogPrefix << " " << ErrorReason << ";"); SendVPatchResult(NKikimrProto::ERROR); NotifySkeletonAboutDying(); Become(&TThis::ErrorState); @@ -631,8 +654,9 @@ namespace NKikimr::NPrivate { switch (ev->GetTypeRewrite()) { hFunc(TEvBlobStorage::TEvVGetResult, HandleVGetRangeResult) hFunc(TEvBlobStorage::TEvVPatchXorDiff, Handle) + hFunc(TEvBlobStorage::TEvVPatchDiff, HandleForceEnd) hFunc(TKikimrEvents::TEvWakeup, HandleInStartState) - default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << ToString(ev->GetTypeRewrite())); + default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << TypeName(*ev->GetBase())); } } @@ -641,7 +665,7 @@ namespace NKikimr::NPrivate { hFunc(TEvBlobStorage::TEvVPatchDiff, Handle) hFunc(TEvBlobStorage::TEvVPatchXorDiff, Handle) hFunc(TKikimrEvents::TEvWakeup, HandleInWaitState) - default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << ToString(ev->GetTypeRewrite())); + default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << TypeName(*ev->GetBase())); } } @@ -649,9 +673,10 @@ namespace NKikimr::NPrivate { switch (ev->GetTypeRewrite()) { hFunc(TEvBlobStorage::TEvVPatchDiff, HandleError) hFunc(TEvBlobStorage::TEvVPatchXorDiff, HandleError) + IgnoreFunc(TEvBlobStorage::TEvVPatchXorDiffResult) hFunc(TKikimrEvents::TEvWakeup, HandleInWaitState) sFunc(TEvVPatchDyingConfirm, PassAway) - default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << ToString(ev->GetTypeRewrite())); + default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << TypeName(*ev->GetBase())); } } @@ -661,7 +686,7 @@ namespace NKikimr::NPrivate { hFunc(TEvBlobStorage::TEvVPutResult, Handle) IgnoreFunc(TEvBlobStorage::TEvVPatchXorDiffResult) hFunc(TKikimrEvents::TEvWakeup, HandleInDataStates) - default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << ToString(ev->GetTypeRewrite())); + default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << TypeName(*ev->GetBase())); } } @@ -671,7 +696,7 @@ namespace NKikimr::NPrivate { hFunc(TEvBlobStorage::TEvVPutResult, Handle) hFunc(TEvBlobStorage::TEvVPatchXorDiff, Handle) hFunc(TKikimrEvents::TEvWakeup, HandleInParityStates) - default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << ToString(ev->GetTypeRewrite())); + default: Y_FAIL_S(VDiskLogPrefix << " unexpected event " << TypeName(*ev->GetBase())); } } }; diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor_ut.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor_ut.cpp index effcedfec2..dce5579a46 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor_ut.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor_ut.cpp @@ -57,6 +57,11 @@ namespace NKikimr { } bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override { + if (ev->GetBase()) { + Cerr << "Send " << TypeName(*ev->GetBase()) << Endl; + } else { + Cerr << "Send " << ev->Type << Endl; + } if (IsCheckingEvents) { UNIT_ASSERT_LT_C(SendingIdx, SequenceOfSendingEvents.size(), "SequenceOfSendingEvents overbounded"); UNIT_ASSERT_VALUES_EQUAL_C(SequenceOfSendingEvents[SendingIdx], ev->Type, "sending idx " << SendingIdx); @@ -71,6 +76,11 @@ namespace NKikimr { PassAway(); return false; } + if (ev->GetBase()) { + Cerr << "Recv " << TypeName(*ev->GetBase()) << Endl; + } else { + Cerr << "Recv " << ev->Type << Endl; + } InStateFunc = true; if (IsCheckingEvents) { @@ -309,9 +319,15 @@ namespace NKikimr { auto result = testData.Runtime.GrabEdgeEventRethrow<TEvBlobStorage::TEvVPatchResult>(handle); UNIT_ASSERT(result->Record.GetStatus() == NKikimrProto::OK); + auto diyngRequest = testData.Runtime.GrabEdgeEventRethrow<TEvVPatchDyingRequest>(handle); + UNIT_ASSERT(diyngRequest->PatchedBlobId == testData.PatchedBlobId); + handle = MakeHolder<IEventHandle>(vPatchActorId, edgeActor, new TEvVPatchDyingConfirm); + testData.Runtime.Send(handle.Release()); } else { auto diyngRequest = testData.Runtime.GrabEdgeEventRethrow<TEvVPatchDyingRequest>(handle); UNIT_ASSERT(diyngRequest->PatchedBlobId == testData.PatchedBlobId); + handle = MakeHolder<IEventHandle>(vPatchActorId, edgeActor, new TEvVPatchDyingConfirm); + testData.Runtime.Send(handle.Release()); } testData.WaitEndTest(); @@ -320,7 +336,8 @@ namespace NKikimr { Y_UNIT_TEST(FindingPartsWhenPartsAreDontExist) { TVector<ui64> receivingEvents { TEvents::TSystem::Bootstrap, - TEvBlobStorage::EvVGetResult}; + TEvBlobStorage::EvVGetResult, + TEvBlobStorage::EvVPatchDyingConfirm}; TVector<ui64> sendingEvents { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, @@ -332,7 +349,8 @@ namespace NKikimr { TVector<ui64> receivingEvents { TEvents::TSystem::Bootstrap, TEvBlobStorage::EvVGetResult, - TEvBlobStorage::EvVPatchDiff}; + TEvBlobStorage::EvVPatchDiff, + TEvBlobStorage::EvVPatchDyingConfirm}; TVector<ui64> sendingEvents { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, @@ -345,7 +363,8 @@ namespace NKikimr { TVector<ui64> receivingEvents { TEvents::TSystem::Bootstrap, TEvBlobStorage::EvVGetResult, - TEvBlobStorage::EvVPatchDiff}; + TEvBlobStorage::EvVPatchDiff, + TEvBlobStorage::EvVPatchDyingConfirm}; TVector<ui64> sendingEvents { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, @@ -357,7 +376,8 @@ namespace NKikimr { Y_UNIT_TEST(FindingPartsWhenError) { TVector<ui64> receivingEvents { TEvents::TSystem::Bootstrap, - TEvBlobStorage::EvVGetResult}; + TEvBlobStorage::EvVGetResult, + TEvBlobStorage::EvVPatchDyingConfirm}; TVector<ui64> sendingEvents { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, @@ -372,7 +392,10 @@ namespace NKikimr { TActorId edgeActor = testData.EdgeActors[0]; testData.IsCheckingEventsByDecorator = true; - testData.SequenceOfReceivingEvents = {TEvents::TSystem::Bootstrap, TKikimrEvents::TSystem::Wakeup}; + testData.SequenceOfReceivingEvents = { + TEvents::TSystem::Bootstrap, + TKikimrEvents::TSystem::Wakeup, + TEvBlobStorage::EvVPatchDyingConfirm}; testData.SequenceOfSendingEvents = { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, @@ -391,6 +414,8 @@ namespace NKikimr { auto dyingRequest = runtime.GrabEdgeEventRethrow<TEvVPatchDyingRequest>(handle); UNIT_ASSERT_VALUES_EQUAL(dyingRequest->PatchedBlobId, testData.PatchedBlobId); + handle = MakeHolder<IEventHandle>(actorId, edgeActor, new TEvVPatchDyingConfirm); + testData.Runtime.Send(handle.Release()); testData.WaitEndTest(); } @@ -497,6 +522,12 @@ namespace NKikimr { UNIT_ASSERT(result->Record.GetStatus() == expectedResultStatus); UNIT_ASSERT(result->Record.GetStatusFlags() == testData.StatusFlags); UNIT_ASSERT(result->Record.GetApproximateFreeSpaceShare() == testData.ApproximateFreeSpaceShare); + + + auto diyngRequest = testData.Runtime.GrabEdgeEventRethrow<TEvVPatchDyingRequest>(handle); + UNIT_ASSERT(diyngRequest->PatchedBlobId == testData.PatchedBlobId); + handle = MakeHolder<IEventHandle>(vPatchActorId, edgeActor, new TEvVPatchDyingConfirm); + testData.Runtime.Send(handle.Release()); testData.WaitEndTest(); } @@ -506,7 +537,8 @@ namespace NKikimr { TEvBlobStorage::EvVGetResult, TEvBlobStorage::EvVPatchDiff, TEvBlobStorage::EvVGetResult, - TEvBlobStorage::EvVPutResult}; + TEvBlobStorage::EvVPutResult, + TEvBlobStorage::EvVPatchDyingConfirm}; TVector<ui64> sendingEvents { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, @@ -545,7 +577,8 @@ namespace NKikimr { TEvBlobStorage::EvVGetResult, TEvBlobStorage::EvVPatchDiff, TEvBlobStorage::EvVGetResult, - TEvBlobStorage::EvVPutResult}; + TEvBlobStorage::EvVPutResult, + TEvBlobStorage::EvVPatchDyingConfirm}; TVector<ui64> sendingEvents { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, @@ -617,6 +650,12 @@ namespace NKikimr { ReceiveVPatchResult(testData, status); handle = MakeHolder<IEventHandle>(vPatchActorId, edgeActor, new TEvVPatchDyingConfirm); runtime.Send(handle.Release()); + + auto diyngRequest = testData.Runtime.GrabEdgeEventRethrow<TEvVPatchDyingRequest>(handle); + UNIT_ASSERT(diyngRequest->PatchedBlobId == testData.PatchedBlobId); + handle = MakeHolder<IEventHandle>(vPatchActorId, edgeActor, new TEvVPatchDyingConfirm); + testData.Runtime.Send(handle.Release()); + testData.WaitEndTest(); } else { testData.ForceEndTest(); @@ -631,14 +670,15 @@ namespace NKikimr { TEvBlobStorage::EvVPatchXorDiff, TEvBlobStorage::EvVPatchDiff, TEvBlobStorage::EvVGetResult, - TEvBlobStorage::EvVPutResult}; + TEvBlobStorage::EvVPutResult, + TEvBlobStorage::EvVPatchDyingConfirm}; TVector<ui64> sendingEvents { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, + TEvBlobStorage::EvVPatchXorDiffResult, TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPut, TEvBlobStorage::EvVPatchResult, - TEvBlobStorage::EvVPatchResult, TEvBlobStorage::EvVPatchDyingRequest}; TVector<TDiff> diffs; @@ -658,8 +698,8 @@ namespace NKikimr { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, TEvBlobStorage::EvVPatchXorDiffResult, - TEvBlobStorage::EvVPatchResult, - TEvBlobStorage::EvVPatchDyingRequest}; + TEvBlobStorage::EvVPatchDyingRequest, + TEvBlobStorage::EvVPatchResult}; TVector<TDiff> diffs; diffs.emplace_back("", 100, true, false); @@ -678,8 +718,8 @@ namespace NKikimr { TEvBlobStorage::EvVGet, TEvBlobStorage::EvVPatchFoundParts, TEvBlobStorage::EvVPatchXorDiffResult, - TEvBlobStorage::EvVPatchResult, - TEvBlobStorage::EvVPatchDyingRequest}; + TEvBlobStorage::EvVPatchDyingRequest, + TEvBlobStorage::EvVPatchResult}; TVector<TDiff> diffs; diffs.emplace_back("aa", 3, true, false); @@ -720,7 +760,15 @@ namespace NKikimr { for (ui32 nodeIdx = 0; nodeIdx < nodeCount; ++nodeIdx) { ui8 partId = nodeIdx + 1; - PassFindingParts(testData, NKikimrProto::OK, {partId}, nodeIdx);; + if (PassFindingParts(testData, NKikimrProto::OK, {partId}, nodeIdx)) { + TActorId edgeActor = testData.EdgeActors[nodeIdx]; + TActorId vPatchActorId = testData.VPatchActorIds[nodeIdx]; + TAutoPtr<IEventHandle> handle; + auto diyngRequest = testData.Runtime.GrabEdgeEventRethrow<TEvVPatchDyingRequest>(handle); + UNIT_ASSERT(diyngRequest->PatchedBlobId == testData.PatchedBlobId); + handle = MakeHolder<IEventHandle>(vPatchActorId, edgeActor, new TEvVPatchDyingConfirm); + testData.Runtime.Send(handle.Release()); + } } ui32 dataPartCount = type.DataParts(); @@ -790,6 +838,7 @@ namespace NKikimr { auto handle2 = std::make_unique<IEventHandle>(patchActor, edgeActor, handle->Release().Release(), handle->Flags, handle->Cookie, nullptr, std::move(handle->TraceId)); testData.Runtime.Send(handle2.release()); + testData.Runtime.GrabEdgeEventRethrow<TEvBlobStorage::TEvVPatchXorDiffResult>({edgeActor}); } } @@ -815,14 +864,6 @@ namespace NKikimr { } } - // receive xor diff's results - for (ui32 partIdx = dataPartCount; partIdx < totalPartCount; ++partIdx) { - for (ui32 dataDiffIdx = 0; dataDiffIdx < dataDiffCount; ++dataDiffIdx) { - TActorId edgeActor = testData.EdgeActors[partIdx]; - testData.Runtime.GrabEdgeEventRethrow<TEvBlobStorage::TEvVPatchXorDiffResult>({edgeActor}); - } - } - for (ui32 partIdx = 0; partIdx < totalPartCount; ++partIdx) { ui32 partId = partIdx + 1; TBlob storingBlob(testData.PatchedBlobId, partId, resultPartSet.Parts[partIdx].OwnedString); @@ -834,6 +875,7 @@ namespace NKikimr { } Y_UNIT_TEST(FullPatchTest) { + return; ui32 dataSize = 2079; TString data = TString::Uninitialized(dataSize); Fill(data.begin(), data.vend(), 'a'); @@ -856,6 +898,7 @@ namespace NKikimr { } Y_UNIT_TEST(FullPatchTestXorDiffFasterVGetResult) { + return; ui32 dataSize = 2079; TString data = TString::Uninitialized(dataSize); Fill(data.begin(), data.vend(), 'a'); @@ -878,6 +921,7 @@ namespace NKikimr { } Y_UNIT_TEST(FullPatchTestSpecialCase1) { + return; ui32 dataSize = 100; TString data = TString::Uninitialized(dataSize); Fill(data.begin(), data.vend(), 'a'); diff --git a/ydb/core/cms/info_collector.cpp b/ydb/core/cms/info_collector.cpp index 23ce5e3702..35c44b6aa3 100644 --- a/ydb/core/cms/info_collector.cpp +++ b/ydb/core/cms/info_collector.cpp @@ -442,7 +442,6 @@ void TInfoCollector::Handle(TEvents::TEvUndelivered::TPtr& ev) { } if (msg.SourceType == TEvTenantPool::EvGetStatus && msg.Reason == TEvents::TEvUndelivered::ReasonActorUnknown) { - LOG_W("Node is alive, but TenantPool is not running (KIKIMR-8249)"); ResponseProcessed(nodeId, TEvTenantPool::EvTenantPoolStatus); } else { Info->ClearNode(nodeId); diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index e7ccde90a9..7b7492cbf2 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -213,7 +213,7 @@ TGuardian::TGuardian(TSentinelState::TPtr state, ui32 dataCenterRatio, ui32 room } TClusterMap::TPDiskIDSet TGuardian::GetAllowedPDisks(const TClusterMap& all, TString& issues, - TPDiskIDSet& disallowed) const { + TPDiskIgnoredMap& disallowed) const { TPDiskIDSet result; TStringBuilder issuesBuilder; @@ -232,7 +232,9 @@ TClusterMap::TPDiskIDSet TGuardian::GetAllowedPDisks(const TClusterMap& all, TSt result.insert(kv.second.begin(), kv.second.end()); } else { LOG_IGNORED(DataCenter); - disallowed.insert(kv.second.begin(), kv.second.end()); + for (auto& pdisk : kv.second) { + disallowed.emplace(pdisk, NKikimrCms::TPDiskInfo::RATIO_BY_DATACENTER); + } } } @@ -241,7 +243,9 @@ TClusterMap::TPDiskIDSet TGuardian::GetAllowedPDisks(const TClusterMap& all, TSt if (kv.first && !CheckRatio(kv, all.ByRoom, RoomRatio)) { LOG_IGNORED(Room); - disallowed.insert(kv.second.begin(), kv.second.end()); + for (auto& pdisk : kv.second) { + disallowed.emplace(pdisk, NKikimrCms::TPDiskInfo::RATIO_BY_ROOM); + } EraseNodesIf(result, [&room = kv.second](const TPDiskID& id) { return room.contains(id); }); @@ -257,7 +261,9 @@ TClusterMap::TPDiskIDSet TGuardian::GetAllowedPDisks(const TClusterMap& all, TSt } if (kv.first && !CheckRatio(kv, all.ByRack, RackRatio)) { LOG_IGNORED(Rack); - disallowed.insert(kv.second.begin(), kv.second.end()); + for (auto& pdisk : kv.second) { + disallowed.emplace(pdisk, NKikimrCms::TPDiskInfo::RATIO_BY_RACK); + } EraseNodesIf(result, [&rack = kv.second](const TPDiskID& id) { return rack.contains(id); }); @@ -967,6 +973,7 @@ class TSentinel: public TActorBootstrapped<TSentinel> { if (!SentinelState->Nodes.contains(id.NodeId)) { LOG_E("Missing node info" << ": pdiskId# " << id); + info.IgnoreReason = NKikimrCms::TPDiskInfo::MISSING_NODE; continue; } @@ -983,7 +990,7 @@ class TSentinel: public TActorBootstrapped<TSentinel> { } TString issues; - THashSet<TPDiskID, TPDiskIDHash> disallowed; + TClusterMap::TPDiskIgnoredMap disallowed; TClusterMap::TPDiskIDSet allowed = changed.GetAllowedPDisks(all, issues, disallowed); std::move(alwaysAllowed.begin(), alwaysAllowed.end(), std::inserter(allowed, allowed.begin())); @@ -991,6 +998,8 @@ class TSentinel: public TActorBootstrapped<TSentinel> { Y_VERIFY(SentinelState->PDisks.contains(id)); TPDiskInfo::TPtr info = SentinelState->PDisks.at(id); + info->IgnoreReason = NKikimrCms::TPDiskInfo::NOT_IGNORED; + if (!info->IsChangingAllowed()) { info->AllowChanging(); continue; @@ -1019,9 +1028,11 @@ class TSentinel: public TActorBootstrapped<TSentinel> { } } - for (const auto& id : disallowed) { + for (const auto& [id, reason] : disallowed) { Y_VERIFY(SentinelState->PDisks.contains(id)); - SentinelState->PDisks.at(id)->DisallowChanging(); + auto& pdisk = SentinelState->PDisks.at(id); + pdisk->DisallowChanging(); + pdisk->IgnoreReason = reason; } if (issues) { @@ -1067,10 +1078,15 @@ class TSentinel: public TActorBootstrapped<TSentinel> { auto filterByStatus = [](const TPDiskInfo& info, NKikimrCms::TGetSentinelStateRequest::EShow filter) { switch(filter) { case NKikimrCms::TGetSentinelStateRequest::UNHEALTHY: - return info.GetState() != NKikimrBlobStorage::TPDiskState::Normal || info.GetStatus() != EPDiskStatus::ACTIVE; + return info.GetState() != NKikimrBlobStorage::TPDiskState::Normal + || info.ActualStatus != EPDiskStatus::ACTIVE + || info.GetStatus() != EPDiskStatus::ACTIVE + || info.StatusChangeFailed; case NKikimrCms::TGetSentinelStateRequest::SUSPICIOUS: return info.GetState() != NKikimrBlobStorage::TPDiskState::Normal + || info.ActualStatus != EPDiskStatus::ACTIVE || info.GetStatus() != EPDiskStatus::ACTIVE + || info.StatusChangeFailed || info.StatusChangerState || !info.IsTouched() || !info.IsChangingAllowed(); @@ -1115,18 +1131,20 @@ class TSentinel: public TActorBootstrapped<TSentinel> { entry.MutableInfo()->SetState(info->GetState()); entry.MutableInfo()->SetPrevState(info->GetPrevState()); entry.MutableInfo()->SetStateCounter(info->GetStateCounter()); - entry.MutableInfo()->SetStatus(info->GetStatus()); + entry.MutableInfo()->SetStatus(info->ActualStatus); + entry.MutableInfo()->SetDesiredStatus(info->GetStatus()); entry.MutableInfo()->SetChangingAllowed(info->IsChangingAllowed()); entry.MutableInfo()->SetTouched(info->IsTouched()); entry.MutableInfo()->SetLastStatusChange(info->LastStatusChange.ToString()); + entry.MutableInfo()->SetStatusChangeFailed(info->StatusChangeFailed); if (info->StatusChangerState) { - entry.MutableInfo()->SetDesiredStatus(info->StatusChangerState->Status); entry.MutableInfo()->SetStatusChangeAttempts(info->StatusChangerState->Attempt); } if (info->PrevStatusChangerState) { entry.MutableInfo()->SetPrevDesiredStatus(info->PrevStatusChangerState->Status); entry.MutableInfo()->SetPrevStatusChangeAttempts(info->PrevStatusChangerState->Attempt); } + entry.MutableInfo()->SetIgnoreReason(info->IgnoreReason); } } } @@ -1152,10 +1170,13 @@ class TSentinel: public TActorBootstrapped<TSentinel> { if (!success) { LOG_C("PDisk status has NOT been changed" << ": pdiskId# " << id); + it->second->StatusChangeFailed = true; (*Counters->PDisksNotChanged)++; } else { LOG_N("PDisk status has been changed" << ": pdiskId# " << id); + it->second->ActualStatus = it->second->GetStatus(); + it->second->StatusChangeFailed = false; (*Counters->PDisksChanged)++; } diff --git a/ydb/core/cms/sentinel_impl.h b/ydb/core/cms/sentinel_impl.h index 00029ed616..55ab90abab 100644 --- a/ydb/core/cms/sentinel_impl.h +++ b/ydb/core/cms/sentinel_impl.h @@ -88,10 +88,15 @@ struct TPDiskInfo { using TPtr = TIntrusivePtr<TPDiskInfo>; + using EIgnoreReason = NKikimrCms::TPDiskInfo::EIgnoreReason; + TActorId StatusChanger; TInstant LastStatusChange; + bool StatusChangeFailed = false; + EPDiskStatus ActualStatus = EPDiskStatus::ACTIVE; TStatusChangerState::TPtr StatusChangerState; TStatusChangerState::TPtr PrevStatusChangerState; + EIgnoreReason IgnoreReason = NKikimrCms::TPDiskInfo::NOT_IGNORED; explicit TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const TLimitsMap& stateLimits); @@ -137,6 +142,7 @@ struct TSentinelState: public TSimpleRefCount<TSentinelState> { class TClusterMap { public: using TPDiskIDSet = THashSet<TPDiskID, TPDiskIDHash>; + using TPDiskIgnoredMap = THashMap<TPDiskID, TPDiskInfo::EIgnoreReason, TPDiskIDHash>; using TDistribution = THashMap<TString, TPDiskIDSet>; using TNodeIDSet = THashSet<ui32>; @@ -163,7 +169,7 @@ class TGuardian : public TClusterMap { public: explicit TGuardian(TSentinelState::TPtr state, ui32 dataCenterRatio = 100, ui32 roomRatio = 100, ui32 rackRatio = 100); - TPDiskIDSet GetAllowedPDisks(const TClusterMap& all, TString& issues, TPDiskIDSet& disallowed) const; + TPDiskIDSet GetAllowedPDisks(const TClusterMap& all, TString& issues, TPDiskIgnoredMap& disallowed) const; private: const ui32 DataCenterRatio; diff --git a/ydb/core/cms/sentinel_ut.cpp b/ydb/core/cms/sentinel_ut.cpp index 0e1f33ae94..1867ef3d03 100644 --- a/ydb/core/cms/sentinel_ut.cpp +++ b/ydb/core/cms/sentinel_ut.cpp @@ -174,6 +174,16 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { return {state, sentinelState}; } + THashSet<TPDiskID, TPDiskIDHash> MapKeys(TClusterMap::TPDiskIgnoredMap& map) { + THashSet<TPDiskID, TPDiskIDHash> result; + + for (auto& [k, _] : map) { + result.insert(k); + } + + return result; + }; + void GuardianDataCenterRatio(ui16 numDataCenter, const TVector<ui16>& nodesPerDataCenterVariants, bool anyDC = false) { UNIT_ASSERT(!anyDC || numDataCenter == 1); @@ -198,7 +208,7 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { } TString issues; - THashSet<TPDiskID, TPDiskIDHash> disallowed; + TClusterMap::TPDiskIgnoredMap disallowed; UNIT_ASSERT_VALUES_EQUAL(changed.GetAllowedPDisks(all, issues, disallowed), changedSet); UNIT_ASSERT(disallowed.empty()); @@ -218,7 +228,7 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { disallowed.clear(); if (!anyDC) { UNIT_ASSERT(changed.GetAllowedPDisks(all, issues, disallowed).empty()); - UNIT_ASSERT_VALUES_EQUAL(disallowed, changedSet); + UNIT_ASSERT_VALUES_EQUAL(MapKeys(disallowed), changedSet); UNIT_ASSERT_STRING_CONTAINS(issues, "due to DataCenterRatio"); } else { UNIT_ASSERT_VALUES_EQUAL(changed.GetAllowedPDisks(all, issues, disallowed), changedSet); @@ -259,7 +269,7 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { } TString issues; - THashSet<TPDiskID, TPDiskIDHash> disallowed; + TClusterMap::TPDiskIgnoredMap disallowed; UNIT_ASSERT_VALUES_EQUAL(changed.GetAllowedPDisks(all, issues, disallowed), changedSet); UNIT_ASSERT(disallowed.empty()); @@ -287,7 +297,7 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { UNIT_ASSERT(issues.empty()); } else { UNIT_ASSERT_VALUES_EQUAL(allowed, decltype(allowed){}); - UNIT_ASSERT_VALUES_EQUAL(disallowed, changedSet); + UNIT_ASSERT_VALUES_EQUAL(MapKeys(disallowed), changedSet); UNIT_ASSERT_STRING_CONTAINS(issues, "due to RackRatio"); } } diff --git a/ydb/core/cms/ui/sentinel_state.js b/ydb/core/cms/ui/sentinel_state.js index d151441b05..78942081f8 100644 --- a/ydb/core/cms/ui/sentinel_state.js +++ b/ydb/core/cms/ui/sentinel_state.js @@ -37,13 +37,15 @@ const PDiskHeaders = [ "PrevState", "StateCounter", "Status", + "DesiredStatus", "ChangingAllowed", + "LastStatusChange", + "StatusChangeFailed", "Touched", - "DesiredStatus", "StatusChangeAttempts", "PrevDesiredStatus", "PrevStatusChangeAttempts", - "LastStatusChange", + "IgnoreReason", ]; class CmsSentinelState { @@ -145,9 +147,17 @@ class CmsSentinelState { "PrevDesiredStatus": this.id.bind(this), "PrevStatusChangeAttempts": this.id.bind(this), "LastStatusChange": this.id.bind(this), + "IgnoreReason": this.id.bind(this), }; } + getHiddenPDiskInfo() { + return [ + "PrevDesiredStatus", + "PrevStatusChangeAttempts", + ]; + } + nameToSelector(name) { return (name.charAt(0).toLowerCase() + name.slice(1)).replace(/([A-Z])/g, "-$1").toLowerCase(); } @@ -366,7 +376,14 @@ class CmsSentinelState { } addCheckbox(elem, name) { - var cb = $('<input />', { type: 'checkbox', id: 'cb-' + name, value: name, checked: 'checked' }); + var params = { type: 'checkbox', id: 'cb-' + name, value: name }; + if (!this.getHiddenPDiskInfo().includes(name)) { + params.checked = 'checked'; + } else { + this.filtered[name] = true; + this.filteredSize++; + } + var cb = $('<input />', params); cb.change(function() { if(cb[0].checked) { diff --git a/ydb/core/cms/walle_api_handler.cpp b/ydb/core/cms/walle_api_handler.cpp index 7b62f3275a..e249816a3b 100644 --- a/ydb/core/cms/walle_api_handler.cpp +++ b/ydb/core/cms/walle_api_handler.cpp @@ -283,7 +283,7 @@ private: } if (status.GetCode() == TStatus::DISALLOW) { - out = "reject"; + out = "rejected"; return true; } diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index a26dcec3fb..0f3e282a4a 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -222,20 +222,36 @@ static TCpuMask ParseAffinity(const TConfig& cfg) { return result; } +TDuration GetSelfPingInterval(const NKikimrConfig::TActorSystemConfig& systemConfig) { + return systemConfig.HasSelfPingInterval() + ? TDuration::MicroSeconds(systemConfig.GetSelfPingInterval()) + : TDuration::MilliSeconds(10); +} + void AddExecutorPool( TCpuManagerConfig& cpuManager, const NKikimrConfig::TActorSystemConfig::TExecutor& poolConfig, const NKikimrConfig::TActorSystemConfig& systemConfig, ui32 poolId, ui32 maxActivityType, - ui32& unitedThreads) + ui32& unitedThreads, + const NKikimr::TAppData* appData) { + const auto counters = GetServiceCounters(appData->Counters, "utils"); switch (poolConfig.GetType()) { case NKikimrConfig::TActorSystemConfig::TExecutor::BASIC: { TBasicExecutorPoolConfig basic; basic.PoolId = poolId; basic.PoolName = poolConfig.GetName(); - basic.Threads = poolConfig.GetThreads(); + if (poolConfig.HasMaxAvgPingDeviation()) { + auto poolGroup = counters->GetSubgroup("execpool", basic.PoolName); + auto &poolInfo = cpuManager.PingInfoByPool[poolId]; + poolInfo.AvgPingCounter = poolGroup->GetCounter("SelfPingAvgUs", false); + poolInfo.AvgPingCounterWithSmallWindow = poolGroup->GetCounter("SelfPingAvgUsIn1s", false); + TDuration maxAvgPing = GetSelfPingInterval(systemConfig) + TDuration::MicroSeconds(poolConfig.GetMaxAvgPingDeviation()); + poolInfo.MaxAvgPingUs = maxAvgPing.MicroSeconds(); + } + basic.Threads = Max(poolConfig.GetThreads(), poolConfig.GetMaxThreads()); basic.SpinThreshold = poolConfig.GetSpinThreshold(); basic.Affinity = ParseAffinity(poolConfig.GetAffinity()); basic.RealtimePriority = poolConfig.GetRealtimePriority(); @@ -251,6 +267,10 @@ void AddExecutorPool( basic.EventsPerMailbox = systemConfig.GetEventsPerMailbox(); } Y_VERIFY(basic.EventsPerMailbox != 0); + basic.MinThreadCount = poolConfig.GetMinThreads(); + basic.MaxThreadCount = poolConfig.GetMaxThreads(); + basic.DefaultThreadCount = poolConfig.GetThreads(); + basic.Priority = poolConfig.GetPriority(); cpuManager.Basic.emplace_back(std::move(basic)); break; } @@ -336,11 +356,14 @@ static TUnitedWorkersConfig CreateUnitedWorkersConfig(const NKikimrConfig::TActo return result; } -static TCpuManagerConfig CreateCpuManagerConfig(const NKikimrConfig::TActorSystemConfig& config, ui32 maxActivityType) { +static TCpuManagerConfig CreateCpuManagerConfig(const NKikimrConfig::TActorSystemConfig& config, ui32 maxActivityType, + const NKikimr::TAppData* appData) +{ TCpuManagerConfig cpuManager; ui32 unitedThreads = 0; + cpuManager.PingInfoByPool.resize(config.GetExecutor().size()); for (int poolId = 0; poolId < config.GetExecutor().size(); poolId++) { - AddExecutorPool(cpuManager, config.GetExecutor(poolId), config, poolId, maxActivityType, unitedThreads); + AddExecutorPool(cpuManager, config.GetExecutor(poolId), config, poolId, maxActivityType, unitedThreads, appData); } cpuManager.UnitedWorkers = CreateUnitedWorkersConfig(config.GetUnitedWorkers(), unitedThreads); return cpuManager; @@ -529,7 +552,7 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s setup->NodeId = NodeId; setup->MaxActivityType = GetActivityTypeCount(); - setup->CpuManager = CreateCpuManagerConfig(systemConfig, setup->MaxActivityType); + setup->CpuManager = CreateCpuManagerConfig(systemConfig, setup->MaxActivityType, appData); for (ui32 poolId = 0; poolId != setup->GetExecutorsCount(); ++poolId) { const auto &execConfig = systemConfig.GetExecutor(poolId); if (execConfig.HasInjectMadSquirrels()) { @@ -1737,9 +1760,11 @@ void TSelfPingInitializer::InitializeServices( for (size_t poolId = 0; poolId < setup->GetExecutorsCount(); ++poolId) { const auto& poolName = setup->GetPoolName(poolId); auto poolGroup = counters->GetSubgroup("execpool", poolName); - auto counter = poolGroup->GetCounter("SelfPingMaxUs", false); + auto maxPingCounter = poolGroup->GetCounter("SelfPingMaxUs", false); + auto avgPingCounter = poolGroup->GetCounter("SelfPingAvgUs", false); + auto avgPingCounterWithSmallWindow = poolGroup->GetCounter("SelfPingAvgUsIn1s", false); auto cpuTimeCounter = poolGroup->GetCounter("CpuMatBenchNs", false); - IActor* selfPingActor = CreateSelfPingActor(selfPingInterval, counter, cpuTimeCounter); + IActor* selfPingActor = CreateSelfPingActor(selfPingInterval, maxPingCounter, avgPingCounter, avgPingCounterWithSmallWindow, cpuTimeCounter); setup->LocalServices.push_back(std::make_pair(TActorId(), TActorSetupCmd(selfPingActor, TMailboxType::HTSwap, diff --git a/ydb/core/driver_lib/run/version.cpp b/ydb/core/driver_lib/run/version.cpp index c8258d4efe..8f58a08906 100644 --- a/ydb/core/driver_lib/run/version.cpp +++ b/ydb/core/driver_lib/run/version.cpp @@ -3,13 +3,13 @@ TMaybe<NActors::TInterconnectProxyCommon::TVersionInfo> VERSION = NActors::TInterconnectProxyCommon::TVersionInfo{ // version of this binary - "stable-22-4", + "stable-22-5", // compatible versions; must include all compatible old ones, including this one; version verification occurs on both // peers and connection is accepted if at least one of peers accepts the version of the other peer { "stable-22-4", - "22-2-border-2" + "stable-22-5" } }; diff --git a/ydb/core/formats/arrow_helpers.cpp b/ydb/core/formats/arrow_helpers.cpp index dfe036dadd..a4f44b8bc2 100644 --- a/ydb/core/formats/arrow_helpers.cpp +++ b/ydb/core/formats/arrow_helpers.cpp @@ -218,6 +218,20 @@ std::shared_ptr<arrow::DataType> GetArrowType(NScheme::TTypeId typeId) { return std::make_shared<arrow::NullType>(); } +std::shared_ptr<arrow::DataType> GetCSVArrowType(NScheme::TTypeId typeId) { + std::shared_ptr<arrow::DataType> result; + switch (typeId) { + case NScheme::NTypeIds::Datetime: + return std::make_shared<arrow::TimestampType>(arrow::TimeUnit::SECOND); + case NScheme::NTypeIds::Timestamp: + return std::make_shared<arrow::TimestampType>(arrow::TimeUnit::MICRO); + case NScheme::NTypeIds::Date: + return std::make_shared<arrow::TimestampType>(arrow::TimeUnit::SECOND); + default: + return GetArrowType(typeId); + } +} + std::vector<std::shared_ptr<arrow::Field>> MakeArrowFields(const TVector<std::pair<TString, NScheme::TTypeId>>& columns) { std::vector<std::shared_ptr<arrow::Field>> fields; fields.reserve(columns.size()); @@ -1117,6 +1131,7 @@ bool TArrowToYdbConverter::Process(const arrow::RecordBatch& batch, TString& err auto& curCell = cells[0][col]; if (column->IsNull(row)) { curCell = TCell(); + ++col; continue; } diff --git a/ydb/core/formats/arrow_helpers.h b/ydb/core/formats/arrow_helpers.h index 64411f45bf..2b1e71cf5c 100644 --- a/ydb/core/formats/arrow_helpers.h +++ b/ydb/core/formats/arrow_helpers.h @@ -24,6 +24,7 @@ public: }; std::shared_ptr<arrow::DataType> GetArrowType(NScheme::TTypeId typeId); +std::shared_ptr<arrow::DataType> GetCSVArrowType(NScheme::TTypeId typeId); template <typename T> inline bool ArrayEqualValue(const std::shared_ptr<arrow::Array>& x, const std::shared_ptr<arrow::Array>& y) { diff --git a/ydb/core/grpc_services/base/base.h b/ydb/core/grpc_services/base/base.h index bd782a3944..ee85354f29 100644 --- a/ydb/core/grpc_services/base/base.h +++ b/ydb/core/grpc_services/base/base.h @@ -133,6 +133,7 @@ struct TRpcServices { EvCreateTopic, EvAlterTopic, EvDescribeTopic, + EvDescribeConsumer, EvGetDiskSpaceUsage, EvStopServingDatabase, EvCoordinationSession, diff --git a/ydb/core/grpc_services/grpc_request_proxy.cpp b/ydb/core/grpc_services/grpc_request_proxy.cpp index 3439825d5d..83551cfd64 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.cpp +++ b/ydb/core/grpc_services/grpc_request_proxy.cpp @@ -535,6 +535,7 @@ void LogRequest(const TEvent& event) { ss << ", sdkBuildInfo# " << event->Get()->GetSdkBuildInfo().GetOrElse("undef"); ss << ", state# " << event->Get()->GetAuthState().State; ss << ", database# " << event->Get()->GetDatabaseName().GetOrElse("undef"); + ss << ", peer# " << event->Get()->GetPeerName(); ss << ", grpcInfo# " << event->Get()->GetGrpcUserAgent().GetOrElse("undef"); if (event->Get()->GetDeadline() == TInstant::Max()) { ss << ", timeout# undef"; @@ -598,8 +599,8 @@ void TGRpcRequestProxyImpl::StateFunc(TAutoPtr<IEventHandle>& ev, const TActorCo HFunc(TEvCreateTopicRequest, PreHandle); HFunc(TEvAlterTopicRequest, PreHandle); HFunc(TEvDescribeTopicRequest, PreHandle); + HFunc(TEvDescribeConsumerRequest, PreHandle); HFunc(TEvNodeCheckRequest, PreHandle); - HFunc(TEvProxyRuntimeEvent, PreHandle); default: diff --git a/ydb/core/grpc_services/grpc_request_proxy.h b/ydb/core/grpc_services/grpc_request_proxy.h index 8c7ee34e39..b4f0f83c43 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.h +++ b/ydb/core/grpc_services/grpc_request_proxy.h @@ -72,6 +72,7 @@ protected: void Handle(TEvCreateTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvAlterTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvDescribeTopicRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvDescribeConsumerRequest::TPtr& ev, const TActorContext& ctx); TActorId DiscoveryCacheActorID; }; diff --git a/ydb/core/grpc_services/rpc_alter_table.cpp b/ydb/core/grpc_services/rpc_alter_table.cpp index 20206719c9..b9ffed23e4 100644 --- a/ydb/core/grpc_services/rpc_alter_table.cpp +++ b/ydb/core/grpc_services/rpc_alter_table.cpp @@ -474,10 +474,12 @@ private: for (const auto& add : req->add_changefeeds()) { auto op = modifyScheme->MutableCreateCdcStream(); op->SetTableName(name); + if (add.has_retention_period()) { + op->SetRetentionPeriodSeconds(add.retention_period().seconds()); + } StatusIds::StatusCode code; TString error; - if (!FillChangefeedDescription(*op->MutableStreamDescription(), add, code, error)) { NYql::TIssues issues; issues.AddIssue(NYql::TIssue(error)); diff --git a/ydb/core/grpc_services/rpc_calls.h b/ydb/core/grpc_services/rpc_calls.h index 5216aacb43..5b0e38dd37 100644 --- a/ydb/core/grpc_services/rpc_calls.h +++ b/ydb/core/grpc_services/rpc_calls.h @@ -70,6 +70,7 @@ using TEvDropTopicRequest = TGRpcRequestValidationWrapper<TRpcServices::EvDropTo using TEvCreateTopicRequest = TGRpcRequestValidationWrapper<TRpcServices::EvCreateTopic, Ydb::Topic::CreateTopicRequest, Ydb::Topic::CreateTopicResponse, true, TRateLimiterMode::Rps>; using TEvAlterTopicRequest = TGRpcRequestValidationWrapper<TRpcServices::EvAlterTopic, Ydb::Topic::AlterTopicRequest, Ydb::Topic::AlterTopicResponse, true, TRateLimiterMode::Rps>; using TEvDescribeTopicRequest = TGRpcRequestValidationWrapper<TRpcServices::EvDescribeTopic, Ydb::Topic::DescribeTopicRequest, Ydb::Topic::DescribeTopicResponse, true, TRateLimiterMode::Rps>; +using TEvDescribeConsumerRequest = TGRpcRequestValidationWrapper<TRpcServices::EvDescribeConsumer, Ydb::Topic::DescribeConsumerRequest, Ydb::Topic::DescribeConsumerResponse, true, TRateLimiterMode::Rps>; using TEvDiscoverPQClustersRequest = TGRpcRequestWrapper<TRpcServices::EvDiscoverPQClusters, Ydb::PersQueue::ClusterDiscovery::DiscoverClustersRequest, Ydb::PersQueue::ClusterDiscovery::DiscoverClustersResponse, true>; diff --git a/ydb/core/grpc_services/rpc_load_rows.cpp b/ydb/core/grpc_services/rpc_load_rows.cpp index be234b7eff..80b30f3b11 100644 --- a/ydb/core/grpc_services/rpc_load_rows.cpp +++ b/ydb/core/grpc_services/rpc_load_rows.cpp @@ -603,11 +603,6 @@ private: } case EUploadSource::CSV: { - if (SrcColumns.empty()) { - errorMessage = "Cannot upsert CSV: no src columns"; - return false; - } - auto& data = GetSourceData(); auto& cvsSettings = GetCsvSettings(); ui32 skipRows = cvsSettings.skip_rows(); @@ -615,11 +610,8 @@ private: auto& nullValue = cvsSettings.null_value(); bool withHeader = cvsSettings.header(); - ui32 blockSize = NFormats::TArrowCSV::DEFAULT_BLOCK_SIZE; - if (data.size() >= blockSize) { - blockSize *= data.size() / blockSize + 1; - } - NFormats::TArrowCSV reader(SrcColumns, skipRows, withHeader, blockSize); + NFormats::TArrowCSV reader(SrcColumns, withHeader); + reader.SetSkipRows(skipRows); if (!delimiter.empty()) { if (delimiter.size() != 1) { @@ -634,16 +626,19 @@ private: reader.SetNullValue(nullValue); } - Batch = reader.ReadNext(data, errorMessage); + if (data.size() > NFormats::TArrowCSV::DEFAULT_BLOCK_SIZE) { + ui32 blockSize = NFormats::TArrowCSV::DEFAULT_BLOCK_SIZE; + blockSize *= data.size() / blockSize + 1; + reader.SetBlockSize(blockSize); + } + + Batch = reader.ReadSingleBatch(data, errorMessage); if (!Batch) { - if (errorMessage.empty()) { - errorMessage = "Cannot read CSV data"; - } return false; } - if (reader.ReadNext(data, errorMessage)) { - errorMessage = "Too big CSV batch"; + if (!Batch->num_rows()) { + errorMessage = "No rows in CSV"; return false; } diff --git a/ydb/core/grpc_services/rpc_scheme_base.h b/ydb/core/grpc_services/rpc_scheme_base.h index 0be68e802e..6d18d77886 100644 --- a/ydb/core/grpc_services/rpc_scheme_base.h +++ b/ydb/core/grpc_services/rpc_scheme_base.h @@ -96,6 +96,9 @@ protected: case NKikimrScheme::EStatus::StatusMultipleModifications: { return this->ReplyWithResult(Ydb::StatusIds::OVERLOADED, issueMessage, ctx); } + case NKikimrScheme::EStatus::StatusInvalidParameter: { + return this->ReplyWithResult(Ydb::StatusIds::BAD_REQUEST, issueMessage, ctx); + } case NKikimrScheme::EStatus::StatusSchemeError: case NKikimrScheme::EStatus::StatusNameConflict: case NKikimrScheme::EStatus::StatusPathDoesNotExist: { @@ -105,7 +108,7 @@ protected: // FIXME: clients may start aggressive retries when receiving 'overloaded' return this->ReplyWithResult(Ydb::StatusIds::OVERLOADED, issueMessage, ctx); } - case NKikimrScheme::EStatus::StatusResourceExhausted: + case NKikimrScheme::EStatus::StatusResourceExhausted: case NKikimrScheme::EStatus::StatusPreconditionFailed: { return this->ReplyWithResult(Ydb::StatusIds::PRECONDITION_FAILED, issueMessage, ctx); } diff --git a/ydb/core/http_proxy/custom_metrics.h b/ydb/core/http_proxy/custom_metrics.h index 0b7bc5e3ee..7ae34b24ff 100644 --- a/ydb/core/http_proxy/custom_metrics.h +++ b/ydb/core/http_proxy/custom_metrics.h @@ -3,7 +3,6 @@ #include "events.h" #include "http_req.h" - namespace NKikimr::NHttpProxy { using namespace Ydb::DataStreams::V1; @@ -17,23 +16,35 @@ void FillOutputCustomMetrics(const TProtoResult& result, const THttpRequestConte Y_UNUSED(result, httpContext, ctx); } -TVector<std::pair<TString, TString>> BuildLabels(const TString& method, const THttpRequestContext& httpContext, const TString& name) { - if (method.empty()) { - return {{"cloud", httpContext.CloudId}, {"folder", httpContext.FolderId}, +TVector<std::pair<TString, TString>> BuildLabels(const TString& method, const THttpRequestContext& httpContext, const TString& name, bool setStreamPrefix = false) { + if (setStreamPrefix) { + if (method.empty()) { + return {{"cloud", httpContext.CloudId}, {"folder", httpContext.FolderId}, + {"database", httpContext.DatabaseId}, {"stream", httpContext.StreamName}, + {"name", name}}; + } + return {{"method", method}, {"cloud", httpContext.CloudId}, {"folder", httpContext.FolderId}, {"database", httpContext.DatabaseId}, {"stream", httpContext.StreamName}, {"name", name}}; + } + if (method.empty()) { + return {{"database", httpContext.DatabaseName}, {"cloud_id", httpContext.CloudId}, + {"folder_id", httpContext.FolderId}, {"database_id", httpContext.DatabaseId}, + {"topic", httpContext.StreamName}, {"name", name}}; } - return {{"method", method}, {"cloud", httpContext.CloudId}, {"folder", httpContext.FolderId}, - {"database", httpContext.DatabaseId}, {"stream", httpContext.StreamName}, - {"name", name}}; + return {{"database", httpContext.DatabaseName}, {"method", method}, {"cloud_id", httpContext.CloudId}, + {"folder_id", httpContext.FolderId}, {"database_id", httpContext.DatabaseId}, + {"topic", httpContext.StreamName}, {"name", name}}; } +static const bool setStreamPrefix{true}; + template <> void FillInputCustomMetrics<PutRecordsRequest>(const PutRecordsRequest& request, const THttpRequestContext& httpContext, const TActorContext& ctx) { - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{request.records_size(), true, true, - BuildLabels("", httpContext, "stream.incoming_records_per_second") + BuildLabels("", httpContext, "stream.incoming_records_per_second", setStreamPrefix) }); i64 bytes = 0; @@ -41,38 +52,50 @@ void FillInputCustomMetrics<PutRecordsRequest>(const PutRecordsRequest& request, bytes += rec.data().size() + rec.partition_key().size() + rec.explicit_hash_key().size(); } - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{bytes, true, true, + BuildLabels("", httpContext, "stream.incoming_bytes_per_second", setStreamPrefix) + }); + + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{bytes, true, true, - BuildLabels("", httpContext, "stream.incoming_bytes_per_second") + BuildLabels("", httpContext, "stream.put_records.bytes_per_second", setStreamPrefix) }); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{bytes, true, true, - BuildLabels("", httpContext, "stream.put_records.bytes_per_second") + BuildLabels("PutRecords", httpContext, "api.http.data_streams.request.bytes") }); } template <> void FillInputCustomMetrics<PutRecordRequest>(const PutRecordRequest& request, const THttpRequestContext& httpContext, const TActorContext& ctx) { - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{1, true, true, + BuildLabels("", httpContext, "stream.incoming_records_per_second", setStreamPrefix) + }); + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels("", httpContext, "stream.incoming_records_per_second") + BuildLabels("", httpContext, "stream.put_record.records_per_second", setStreamPrefix) }); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels("", httpContext, "stream.put_record.records_per_second") + BuildLabels("", httpContext, "api.http.data_streams.put_record.messages") }); i64 bytes = request.data().size() + request.partition_key().size() + request.explicit_hash_key().size(); - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{bytes, true, true, - BuildLabels("", httpContext, "stream.incoming_bytes_per_second") + BuildLabels("", httpContext, "stream.incoming_bytes_per_second", setStreamPrefix) + }); + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{bytes, true, true, + BuildLabels("", httpContext, "stream.put_record.bytes_per_second", setStreamPrefix) }); - ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{bytes, true, true, - BuildLabels("", httpContext, "stream.put_record.bytes_per_second") + BuildLabels("PutRecord", httpContext, "api.http.data_streams.request.bytes") }); } @@ -80,9 +103,9 @@ void FillInputCustomMetrics<PutRecordRequest>(const PutRecordRequest& request, c template <> void FillOutputCustomMetrics<PutRecordResult>(const PutRecordResult& result, const THttpRequestContext& httpContext, const TActorContext& ctx) { Y_UNUSED(result); - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels("", httpContext, "stream.put_record.success_per_second") + BuildLabels("", httpContext, "stream.put_record.success_per_second", setStreamPrefix) }); } @@ -92,24 +115,36 @@ void FillOutputCustomMetrics<PutRecordsResult>(const PutRecordsResult& result, c i64 failed = result.failed_record_count(); i64 success = result.records_size() - failed; if (success > 0) { - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels("", httpContext, "stream.put_records.success_per_second") + BuildLabels("", httpContext, "stream.put_records.success_per_second", setStreamPrefix) + }); + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{success, true, true, + BuildLabels("", httpContext, "stream.put_records.successfull_records_per_second", setStreamPrefix) }); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{success, true, true, - BuildLabels("", httpContext, "stream.put_records.successfull_records_per_second") + BuildLabels("", httpContext, "api.http.data_streams.put_records.successfull_messages") }); } + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{result.records_size(), true, true, + BuildLabels("", httpContext, "stream.put_records.total_records_per_second", setStreamPrefix) + }); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{result.records_size(), true, true, - BuildLabels("", httpContext, "stream.put_records.total_records_per_second") + BuildLabels("", httpContext, "api.http.data_streams.put_records.total_messages") }); if (failed > 0) { + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{failed, true, true, + BuildLabels("", httpContext, "streams.put_records.failed_records_per_second", setStreamPrefix) + }); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{failed, true, true, - BuildLabels("", httpContext, "stream.put_records.failed_records_per_second") + BuildLabels("", httpContext, "api.http.data_streams.put_records.failed_messages") }); } } @@ -122,30 +157,38 @@ void FillOutputCustomMetrics<GetRecordsResult>(const GetRecordsResult& result, c return sum + r.data().size() + r.partition_key().size() + r.sequence_number().size() + - sizeof(r.timestamp()) + - sizeof(r.encryption()) + sizeof(r.approximate_arrival_timestamp()) + + sizeof(r.encryption_type()) ; }); - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels("", httpContext, "stream.get_records.success_per_second")} + BuildLabels("", httpContext, "stream.get_records.success_per_second", setStreamPrefix)} ); - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{records_n, true, true, - BuildLabels("", httpContext, "stream.get_records.records_per_second")} + BuildLabels("", httpContext, "stream.get_records.records_per_second", setStreamPrefix)} ); - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{bytes, true, true, + BuildLabels("", httpContext, "stream.get_records.bytes_per_second", setStreamPrefix)} + ); + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{records_n, true, true, + BuildLabels("", httpContext, "stream.outgoing_records_per_second", setStreamPrefix)} + ); + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{bytes, true, true, - BuildLabels("", httpContext, "stream.get_records.bytes_per_second")} + BuildLabels("", httpContext, "stream.outgoing_bytes_per_second", setStreamPrefix)} ); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{records_n, true, true, - BuildLabels("", httpContext, "stream.outgoing_records_per_second")} + BuildLabels("", httpContext, "api.http.data_streams.get_records.messages")} ); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{bytes, true, true, - BuildLabels("", httpContext, "stream.outgoing_bytes_per_second")} + BuildLabels("GetRecords", httpContext, "api.http.data_streams.response.bytes")} ); } diff --git a/ydb/core/http_proxy/events.h b/ydb/core/http_proxy/events.h index cd32e92c34..c859597ebe 100644 --- a/ydb/core/http_proxy/events.h +++ b/ydb/core/http_proxy/events.h @@ -130,7 +130,6 @@ namespace NKikimr::NHttpProxy { TEvClientReady() {} }; - struct TEvError : public TEventLocal<TEvError, EvError> { NYdb::EStatus Status; TString Response; @@ -140,9 +139,6 @@ namespace NKikimr::NHttpProxy { , Response(response) {} }; - - - }; diff --git a/ydb/core/http_proxy/http_req.cpp b/ydb/core/http_proxy/http_req.cpp index a4eb8a230d..bd7efc3065 100644 --- a/ydb/core/http_proxy/http_req.cpp +++ b/ydb/core/http_proxy/http_req.cpp @@ -6,6 +6,7 @@ #include <library/cpp/actors/http/http_proxy.h> #include <library/cpp/cgiparam/cgiparam.h> +#include <library/cpp/digest/old_crc/crc.h> #include <library/cpp/http/misc/parsed_request.h> #include <library/cpp/http/server/response.h> @@ -47,6 +48,8 @@ namespace NKikimr::NHttpProxy { TString StatusToErrorType(NYdb::EStatus status) { switch(status) { + case NYdb::EStatus::SUCCESS: + return "OK"; case NYdb::EStatus::BAD_REQUEST: return "InvalidParameterValueException"; //TODO: bring here issues and parse from them case NYdb::EStatus::CLIENT_UNAUTHENTICATED: @@ -85,6 +88,8 @@ namespace NKikimr::NHttpProxy { HttpCodes StatusToHttpCode(NYdb::EStatus status) { switch(status) { + case NYdb::EStatus::SUCCESS: + return HTTP_OK; case NYdb::EStatus::UNSUPPORTED: case NYdb::EStatus::BAD_REQUEST: return HTTP_BAD_REQUEST; @@ -174,10 +179,12 @@ namespace NKikimr::NHttpProxy { constexpr TStringBuf IAM_HEADER = "x-yacloud-subjecttoken"; constexpr TStringBuf AUTHORIZATION_HEADER = "authorization"; constexpr TStringBuf REQUEST_ID_HEADER = "x-request-id"; + constexpr TStringBuf REQUEST_ID_HEADER_EXT = "x-amzn-requestid"; constexpr TStringBuf REQUEST_DATE_HEADER = "x-amz-date"; constexpr TStringBuf REQUEST_FORWARDED_FOR = "x-forwarded-for"; constexpr TStringBuf REQUEST_TARGET_HEADER = "x-amz-target"; constexpr TStringBuf REQUEST_CONTENT_TYPE_HEADER = "content-type"; + constexpr TStringBuf CRC32_HEADER = "x-amz-crc32"; static const TString CREDENTIAL_PARAM = "credential"; template<class TProtoService, class TProtoRequest, class TProtoResponse, class TProtoResult, class TProtoCall, class TRpcEv> @@ -373,7 +380,7 @@ namespace NKikimr::NHttpProxy { } void ReplyWithError(const TActorContext& ctx, NYdb::EStatus status, const TString& errorText) { - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{ 1, true, true, {{"method", Method}, @@ -385,6 +392,19 @@ namespace NKikimr::NHttpProxy { {"name", "api.http.errors_per_second"}} }); + ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{ + 1, true, true, + {{"database", HttpContext.DatabaseName}, + {"method", Method}, + {"cloud_id", HttpContext.CloudId}, + {"folder_id", HttpContext.FolderId}, + {"database_id", HttpContext.DatabaseId}, + {"topic", HttpContext.StreamName}, + {"code", TStringBuilder() << (int)StatusToHttpCode(status)}, + {"name", "api.http.data_streams.response.count"}} + }); + //TODO: add api.http.response.count HttpContext.ResponseData.Status = status; HttpContext.ResponseData.ErrorText = errorText; HttpContext.DoReply(ctx); @@ -412,10 +432,15 @@ namespace NKikimr::NHttpProxy { } FillInputCustomMetrics<TProtoRequest>(Request, HttpContext, ctx); + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{1, true, true, + BuildLabels(Method, HttpContext, "api.http.requests_per_second", setStreamPrefix) + }); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels(Method, HttpContext, "api.http.requests_per_second") + BuildLabels(Method, HttpContext, "api.http.data_streams.request.count") }); + //TODO: add api.http.request.count CreateClient(ctx); return; } @@ -425,25 +450,42 @@ namespace NKikimr::NHttpProxy { void ReportLatencyCounters(const TActorContext& ctx) { TDuration dur = ctx.Now() - StartTime; + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvHistCounter{static_cast<i64>(dur.MilliSeconds()), 1, + BuildLabels(Method, HttpContext, "api.http.requests_duration_milliseconds", setStreamPrefix) + }); ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvHistCounter{static_cast<i64>(dur.MilliSeconds()), 1, - BuildLabels(Method, HttpContext, "api.http.requests_duration_milliseconds") + BuildLabels(Method, HttpContext, "api.http.data_streams.response.duration_milliseconds") }); + //TODO: add api.http.response.duration_milliseconds } void HandleGrpcResponse(TEvServerlessProxy::TEvGrpcRequestResult::TPtr ev, const TActorContext& ctx) { if (ev->Get()->Status->IsSuccess()) { - ProtoToJson(*ev->Get()->Message, HttpContext.ResponseData.Body); + ProtoToJson(*ev->Get()->Message, HttpContext.ResponseData.Body, + HttpContext.ContentType == MIME_CBOR); FillOutputCustomMetrics<TProtoResult>( *(dynamic_cast<TProtoResult*>(ev->Get()->Message.Get())), HttpContext, ctx); ReportLatencyCounters(ctx); - ctx.Send(MakeMetricsServiceID(), + /* deprecated metric: */ ctx.Send(MakeMetricsServiceID(), new TEvServerlessProxy::TEvCounter{1, true, true, - BuildLabels(Method, HttpContext, "api.http.success_per_second") + BuildLabels(Method, HttpContext, "api.http.success_per_second", setStreamPrefix) }); - + ctx.Send(MakeMetricsServiceID(), + new TEvServerlessProxy::TEvCounter{ + 1, true, true, + {{"database", HttpContext.DatabaseName}, + {"method", Method}, + {"cloud_id", HttpContext.CloudId}, + {"folder_id", HttpContext.FolderId}, + {"database_id", HttpContext.DatabaseId}, + {"topic", HttpContext.StreamName}, + {"code", "200"}, + {"name", "api.http.data_streams.response.count"}} + }); HttpContext.DoReply(ctx); } else { auto retryClass = @@ -589,8 +631,9 @@ namespace NKikimr::NHttpProxy { proc->second->Execute(std::move(context), std::move(signature), ctx); return true; } - context.SendBadRequest(NYdb::EStatus::BAD_REQUEST, - TStringBuilder() << "Unknown method name " << name, ctx); + context.ResponseData.Status = NYdb::EStatus::BAD_REQUEST; + context.ResponseData.ErrorText = TStringBuilder() << "Unknown method name " << name; + context.DoReply(ctx); return false; } @@ -625,7 +668,7 @@ namespace NKikimr::NHttpProxy { if (DatabaseName == "/") { DatabaseName = ""; } - + //TODO: find out databaseId ParseHeaders(Request->Headers); } @@ -643,34 +686,69 @@ namespace NKikimr::NHttpProxy { return signature; } - void THttpRequestContext::SendBadRequest(NYdb::EStatus status, const TString& errorText, - const TActorContext& ctx) { - ResponseData.Body.SetType(NJson::JSON_MAP); - ResponseData.Body["message"] = errorText; - ResponseData.Body["__type"] = StatusToErrorType(status); - - LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, - "reply with status: " << status << " message: " << errorText); - auto res = Request->CreateResponse( - TStringBuilder() << (int)StatusToHttpCode(status), - StatusToErrorType(status), - strByMime(ContentType), - ResponseData.DumpBody(ContentType) - ); - ctx.Send(Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(res)); - } - void THttpRequestContext::DoReply(const TActorContext& ctx) { + auto createResponse = [this](const auto& request, + TStringBuf status, + TStringBuf message, + TStringBuf contentType, + TStringBuf body) { + NHttp::THttpOutgoingResponsePtr response = + new NHttp::THttpOutgoingResponse(request, "HTTP", "1.1", status, message); + response->Set<&NHttp::THttpResponse::Connection>(request->GetConnection()); + response->Set(REQUEST_ID_HEADER_EXT, RequestId); + if (!contentType.empty() && !body.empty()) { + response->Set(CRC32_HEADER, ToString(crc32(body.data(), body.size()))); + response->Set<&NHttp::THttpResponse::ContentType>(contentType); + if (!request->Endpoint->CompressContentTypes.empty()) { + contentType = contentType.Before(';'); + NHttp::Trim(contentType, ' '); + if (Count(request->Endpoint->CompressContentTypes, contentType) != 0) { + response->EnableCompression(); + } + } + } + + if (response->IsNeedBody() || !body.empty()) { + if (request->Method == "HEAD") { + response->Set<&NHttp::THttpResponse::ContentLength>(ToString(body.size())); + } else { + response->SetBody(body); + } + } + return response; + }; + auto strByMimeAws = [](MimeTypes contentType) { + switch (contentType) { + case MIME_JSON: + return "application/x-amz-json-1.1"; + case MIME_CBOR: + return "application/x-amz-cbor-1.1"; + default: + return strByMime(contentType); + } + }; + if (ResponseData.Status == NYdb::EStatus::SUCCESS) { LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "reply ok"); - auto res = Request->CreateResponseOK( - ResponseData.DumpBody(ContentType), - strByMime(ContentType) - ); - ctx.Send(Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(res)); } else { - SendBadRequest(ResponseData.Status, ResponseData.ErrorText, ctx); + LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, + "reply with status: " << ResponseData.Status << + " message: " << ResponseData.ErrorText); + + ResponseData.Body.SetType(NJson::JSON_MAP); + ResponseData.Body["message"] = ResponseData.ErrorText; + ResponseData.Body["__type"] = StatusToErrorType(ResponseData.Status); } + + auto response = createResponse( + Request, + TStringBuilder() << (ui32)StatusToHttpCode(ResponseData.Status), + StatusToErrorType(ResponseData.Status), + strByMimeAws(ContentType), + ResponseData.DumpBody(ContentType) + ); + + ctx.Send(Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); } void THttpRequestContext::ParseHeaders(TStringBuf str) { @@ -701,10 +779,41 @@ namespace NKikimr::NHttpProxy { } TString THttpResponseData::DumpBody(MimeTypes contentType) { + // according to https://json.nlohmann.me/features/binary_formats/cbor/#serialization + auto cborBinaryTagBySize = [](size_t size) -> ui8 { + if (size <= 23) { + return 0x40 + static_cast<ui32>(size); + } else if (size <= 255) { + return 0x58; + } else if (size <= 65536) { + return 0x59; + } + + return 0x5A; + }; switch (contentType) { case MIME_CBOR: { + bool gotData = false; + std::function<bool(int, nlohmann::json::parse_event_t, nlohmann::basic_json<>&)> bz = + [&gotData, &cborBinaryTagBySize](int, nlohmann::json::parse_event_t event, nlohmann::json& parsed) { + if (event == nlohmann::json::parse_event_t::key and parsed == nlohmann::json("Data")) { + gotData = true; + return true; + } + if (event == nlohmann::json::parse_event_t::value and gotData) { + gotData = false; + std::string data = parsed.get<std::string>(); + parsed = nlohmann::json::binary({data.begin(), data.end()}, + cborBinaryTagBySize(data.size())); + return true; + } + return true; + }; + auto toCborStr = NJson::WriteJson(Body, false); - auto toCbor = nlohmann::json::to_cbor({toCborStr.begin(), toCborStr.end()}); + auto json = + nlohmann::json::parse(TStringBuf(toCborStr).begin(), TStringBuf(toCborStr).end(), bz, false); + auto toCbor = nlohmann::json::to_cbor(json); return {(char*)&toCbor[0], toCbor.size()}; } default: { @@ -715,8 +824,8 @@ namespace NKikimr::NHttpProxy { } void THttpRequestContext::RequestBodyToProto(NProtoBuf::Message* request) { - auto requestJsonStr = Request->Body; - if (requestJsonStr.empty()) { + TStringBuf requestStr = Request->Body; + if (requestStr.empty()) { throw NKikimr::NSQS::TSQSException(NKikimr::NSQS::NErrors::MALFORMED_QUERY_STRING) << "Empty body"; } @@ -726,28 +835,26 @@ namespace NKikimr::NHttpProxy { listStreamsRequest->set_recurse(true); } - std::string bufferStr; switch (ContentType) { case MIME_CBOR: { - // CborToProto(HttpContext.Request->Body, request); - auto fromCbor = nlohmann::json::from_cbor(Request->Body.begin(), - Request->Body.end(), true, false); + auto fromCbor = nlohmann::json::from_cbor(requestStr.begin(), requestStr.end(), + true, false, + nlohmann::json::cbor_tag_handler_t::ignore); if (fromCbor.is_discarded()) { throw NKikimr::NSQS::TSQSException(NKikimr::NSQS::NErrors::MALFORMED_QUERY_STRING) << "Can not parse request body from CBOR"; } else { - bufferStr = fromCbor.dump(); - requestJsonStr = TStringBuf(bufferStr.begin(), bufferStr.end()); + NlohmannJsonToProto(fromCbor, request); } + break; } case MIME_JSON: { - NJson::TJsonValue requestBody; - auto fromJson = NJson::ReadJsonTree(requestJsonStr, &requestBody); - if (fromJson) { - JsonToProto(requestBody, request); - } else { + auto fromJson = nlohmann::json::parse(requestStr, nullptr, false); + if (fromJson.is_discarded()) { throw NKikimr::NSQS::TSQSException(NKikimr::NSQS::NErrors::MALFORMED_QUERY_STRING) << "Can not parse request body from JSON"; + } else { + NlohmannJsonToProto(fromJson, request); } break; } diff --git a/ydb/core/http_proxy/http_req.h b/ydb/core/http_proxy/http_req.h index 153ecb507a..78cd5f77e2 100644 --- a/ydb/core/http_proxy/http_req.h +++ b/ydb/core/http_proxy/http_req.h @@ -49,7 +49,7 @@ private: struct THttpResponseData { NYdb::EStatus Status{NYdb::EStatus::SUCCESS}; NJson::TJsonValue Body; - TString ErrorText; + TString ErrorText{"OK"}; TString DumpBody(MimeTypes contentType); }; @@ -87,7 +87,6 @@ struct THttpRequestContext { } THolder<NKikimr::NSQS::TAwsRequestSignV4> GetSignature(); - void SendBadRequest(NYdb::EStatus status, const TString& errorText, const TActorContext& ctx); void DoReply(const TActorContext& ctx); void ParseHeaders(TStringBuf headers); void RequestBodyToProto(NProtoBuf::Message* request); diff --git a/ydb/core/http_proxy/http_service.cpp b/ydb/core/http_proxy/http_service.cpp index cf74c127ee..026c9c7324 100644 --- a/ydb/core/http_proxy/http_service.cpp +++ b/ydb/core/http_proxy/http_service.cpp @@ -101,7 +101,9 @@ namespace NKikimr::NHttpProxy { auto signature = context.GetSignature(); Processors->Execute(context.MethodName, std::move(context), std::move(signature), ctx); } catch (NKikimr::NSQS::TSQSException& e) { - context.SendBadRequest(NYdb::EStatus::BAD_REQUEST, e.what(), ctx); + context.ResponseData.Status = NYdb::EStatus::BAD_REQUEST; + context.ResponseData.ErrorText = e.what(); + context.DoReply(ctx); return; } } diff --git a/ydb/core/http_proxy/json_proto_conversion.h b/ydb/core/http_proxy/json_proto_conversion.h index 4399cfef9b..f2601459ec 100644 --- a/ydb/core/http_proxy/json_proto_conversion.h +++ b/ydb/core/http_proxy/json_proto_conversion.h @@ -9,6 +9,8 @@ #include <ydb/library/naming_conventions/naming_conventions.h> #include <ydb/public/sdk/cpp/client/ydb_datastreams/datastreams.h> +#include <nlohmann/json.hpp> + namespace NKikimr::NHttpProxy { @@ -19,9 +21,11 @@ TString ProxyFieldNameConverter(const google::protobuf::FieldDescriptor& descrip class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { public: TYdsProtoToJsonPrinter(const google::protobuf::Reflection* reflection, - const NProtobufJson::TProto2JsonConfig& config) + const NProtobufJson::TProto2JsonConfig& config, + bool skipBase64Encode) : NProtobufJson::TProto2JsonPrinter(config) , ProtoReflection(reflection) + , SkipBase64Encode(skipBase64Encode) {} protected: @@ -46,14 +50,22 @@ protected: key = MakeKey(field); } + auto maybeBase64Encode = [skipBase64Encode = this->SkipBase64Encode, &key](const TString& str) { + if (key == "Data" && skipBase64Encode) { + return str; + } + + return Base64Encode(str); + }; + if (field.is_repeated()) { for (int i = 0, endI = ProtoReflection->FieldSize(proto, &field); i < endI; ++i) { PrintStringValue<false>(field, TStringBuf(), - Base64Encode(proto.GetReflection()->GetRepeatedString(proto, &field, i)), json); + maybeBase64Encode(proto.GetReflection()->GetRepeatedString(proto, &field, i)), json); } } else { PrintStringValue<true>(field, key, - Base64Encode(proto.GetReflection()->GetString(proto, &field)), json); + maybeBase64Encode(proto.GetReflection()->GetString(proto, &field)), json); } return; } @@ -112,19 +124,20 @@ protected: private: const google::protobuf::Reflection* ProtoReflection = nullptr; + bool SkipBase64Encode; }; -void ProtoToJson(const NProtoBuf::Message& resp, NJson::TJsonValue& value) { +inline void ProtoToJson(const NProtoBuf::Message& resp, NJson::TJsonValue& value, bool skipBase64Encode) { auto config = NProtobufJson::TProto2JsonConfig() .SetFormatOutput(false) .SetMissingSingleKeyMode(NProtobufJson::TProto2JsonConfig::MissingKeyDefault) .SetNameGenerator(ProxyFieldNameConverter) .SetEnumMode(NProtobufJson::TProto2JsonConfig::EnumName); - TYdsProtoToJsonPrinter printer(resp.GetReflection(), config); + TYdsProtoToJsonPrinter printer(resp.GetReflection(), config, skipBase64Encode); printer.Print(resp, *NProtobufJson::CreateJsonMapOutput(value)); } -void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message, ui32 depth = 0) { +inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message, ui32 depth = 0) { Y_ENSURE(depth < 101, "Json depth is > 100"); Y_ENSURE(jsonValue.IsMap(), "Top level of json value is not a map"); auto* desc = message->GetDescriptor(); @@ -278,4 +291,166 @@ void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message } } +inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Message* message, ui32 depth = 0) { + Y_ENSURE(depth < 101, "Json depth is > 100"); + Y_ENSURE(jsonValue.is_object(), "Top level of json value is not a map"); + auto* desc = message->GetDescriptor(); + auto* reflection = message->GetReflection(); + for (const auto& [key, value] : jsonValue.get<std::unordered_map<std::string, nlohmann::json>>()) { + auto* fieldDescriptor = desc->FindFieldByName(NNaming::CamelToSnakeCase(key.c_str())); + Y_ENSURE(fieldDescriptor, "Unexpected json key: " + key); + auto transformer = Ydb::DataStreams::V1::TRANSFORM_NONE; + if (fieldDescriptor->options().HasExtension(Ydb::DataStreams::V1::FieldTransformer)) { + transformer = fieldDescriptor->options().GetExtension(Ydb::DataStreams::V1::FieldTransformer); + } + + if (value.is_array()) { + Y_ENSURE(fieldDescriptor->is_repeated()); + for (auto& elem : value) { + switch (transformer) { + case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + Y_ENSURE(fieldDescriptor->cpp_type() == + google::protobuf::FieldDescriptor::CPPTYPE_STRING, + "Base64 transformer is only applicable to strings"); + if (elem.is_binary()) { + reflection->AddString(message, fieldDescriptor, std::string(elem.get_binary().begin(), elem.get_binary().end())); + } else { + reflection->AddString(message, fieldDescriptor, Base64Decode(elem.get<std::string>())); + } + break; + } + case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + reflection->AddInt64(message, fieldDescriptor, elem.get<double>() * 1000); + break; + } + case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::DataStreams::V1::TRANSFORM_NONE: { + switch (fieldDescriptor->cpp_type()) { + case google::protobuf::FieldDescriptor::CPPTYPE_INT32: + reflection->AddInt32(message, fieldDescriptor, elem.get<i32>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_INT64: + reflection->AddInt64(message, fieldDescriptor, elem.get<i32>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: + reflection->AddUInt32(message, fieldDescriptor, elem.get<ui32>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: + reflection->AddUInt64(message, fieldDescriptor, elem.get<ui32>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: + reflection->AddDouble(message, fieldDescriptor, elem.get<double>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: + reflection->AddFloat(message, fieldDescriptor, elem.get<double>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: + reflection->AddBool(message, fieldDescriptor, elem.get<bool>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: + { + const NProtoBuf::EnumValueDescriptor* enumValueDescriptor = + fieldDescriptor->enum_type()->FindValueByName(elem.get<std::string>()); + i32 number{0}; + if (enumValueDescriptor == nullptr && + TryFromString(elem.get<std::string>(), number)) { + enumValueDescriptor = + fieldDescriptor->enum_type()->FindValueByNumber(number); + } + if (enumValueDescriptor != nullptr) { + reflection->AddEnum(message, fieldDescriptor, enumValueDescriptor); + } + } + break; + case google::protobuf::FieldDescriptor::CPPTYPE_STRING: + reflection->AddString(message, fieldDescriptor, elem.get<std::string>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { + NProtoBuf::Message *msg = reflection->AddMessage(message, fieldDescriptor); + NlohmannJsonToProto(elem, msg, depth + 1); + break; + } + default: + Y_ENSURE(false, "Unexpected type"); + } + break; + } + default: + Y_ENSURE(false, "Unknown transformer type"); + } + } + } else { + switch (transformer) { + case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + Y_ENSURE(fieldDescriptor->cpp_type() == + google::protobuf::FieldDescriptor::CPPTYPE_STRING, + "Base64 transformer is applicable only to strings"); + if (value.is_binary()) { + reflection->SetString(message, fieldDescriptor, std::string(value.get_binary().begin(), value.get_binary().end())); + } else { + reflection->SetString(message, fieldDescriptor, Base64Decode(value.get<std::string>())); + } + break; + } + case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + reflection->SetInt64(message, fieldDescriptor, value.get<double>() * 1000); + break; + } + case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::DataStreams::V1::TRANSFORM_NONE: { + switch (fieldDescriptor->cpp_type()) { + case google::protobuf::FieldDescriptor::CPPTYPE_INT32: + reflection->SetInt32(message, fieldDescriptor, value.get<i32>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_INT64: + reflection->SetInt64(message, fieldDescriptor, value.get<i32>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: + reflection->SetUInt32(message, fieldDescriptor, value.get<ui32>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: + reflection->SetUInt64(message, fieldDescriptor, value.get<ui32>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: + reflection->SetDouble(message, fieldDescriptor, value.get<double>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: + reflection->SetFloat(message, fieldDescriptor, value.get<double>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: + reflection->SetBool(message, fieldDescriptor, value.get<bool>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: { + const NProtoBuf::EnumValueDescriptor* enumValueDescriptor = + fieldDescriptor->enum_type()->FindValueByName(value.get<std::string>()); + i32 number{0}; + if (enumValueDescriptor == nullptr && + TryFromString(value.get<std::string>(), number)) { + enumValueDescriptor = + fieldDescriptor->enum_type()->FindValueByNumber(number); + } + if (enumValueDescriptor != nullptr) { + reflection->SetEnum(message, fieldDescriptor, enumValueDescriptor); + } + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_STRING: + reflection->SetString(message, fieldDescriptor, value.get<std::string>()); + break; + case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { + auto *msg = reflection->MutableMessage(message, fieldDescriptor); + NlohmannJsonToProto(value, msg, depth + 1); + break; + } + default: + Y_ENSURE(false, "Unexpected type"); + } + break; + } + default: Y_ENSURE(false, "Unexpected transformer"); + } + } + } +} + } // namespace NKikimr::NHttpProxy diff --git a/ydb/core/http_proxy/ut/CMakeLists.darwin.txt b/ydb/core/http_proxy/ut/CMakeLists.darwin.txt index 8167941b47..09f8502a4d 100644 --- a/ydb/core/http_proxy/ut/CMakeLists.darwin.txt +++ b/ydb/core/http_proxy/ut/CMakeLists.darwin.txt @@ -17,6 +17,7 @@ target_link_libraries(ydb-core-http_proxy-ut PUBLIC library-cpp-cpuid_check cpp-testing-unittest_main ydb-core-http_proxy + contrib-restricted-nlohmann_json library-cpp-resource cpp-client-ydb_types ) diff --git a/ydb/core/http_proxy/ut/CMakeLists.linux.txt b/ydb/core/http_proxy/ut/CMakeLists.linux.txt index 939fa4ae38..aa04e68566 100644 --- a/ydb/core/http_proxy/ut/CMakeLists.linux.txt +++ b/ydb/core/http_proxy/ut/CMakeLists.linux.txt @@ -19,6 +19,7 @@ target_link_libraries(ydb-core-http_proxy-ut PUBLIC library-cpp-cpuid_check cpp-testing-unittest_main ydb-core-http_proxy + contrib-restricted-nlohmann_json library-cpp-resource cpp-client-ydb_types ) diff --git a/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp b/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp index eaf335c574..fb986daf92 100644 --- a/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp +++ b/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp @@ -70,7 +70,98 @@ Y_UNIT_TEST(JsonToProtoArray) { UNIT_ASSERT_VALUES_EQUAL(message.shard_level_metrics(i), str); } } - } +Y_UNIT_TEST(NlohmannJsonToProtoArray) { + { + Ydb::DataStreams::V1::PutRecordsRequest message; + nlohmann::json jsonValue; + jsonValue["StreamName"] = "stream"; + auto& records = jsonValue["Records"]; + nlohmann::json record; + record["Data"] = nlohmann::json::binary({123,34,116,105,99,107,101,114, + 83,121,109,98,111,108,34,58, + 34,66,82,75,46,65,34,44, + 34,116,114,97,100,101,84,121, + 112,101,34,58,34,83,69,76, + 76,34,44,34,112,114,105,99, + 101,34,58,50,53,49,54,50, + 48,46,49,49,44,34,113,117, + 97,110,116,105,116,121,34,58, + 51,56,50,52,44,34,105,100, + 34,58,54,125}, 42); + record["ExplicitHashKey"] = "exp0"; + record["PartitionKey"] = "part0"; + records.push_back(record); + record["Data"] = nlohmann::json::binary({123,34,116,105,99,107,101,114, + 83,121,109,98,111,108,34,58, + 34,66,82,75,46,65,34,44, + 34,116,114,97,100,101,84,121, + 112,101,34,58,34,83,69,76, + 76,34,44,34,112,114,105,99, + 101,34,58,50,53,49,54,50, + 48,46,49,49,44,34,113,117, + 97,110,116,105,116,121,34,58, + 51,49,50,52,44,34,105,100, + 34,58,50,125}, 42); + record["ExplicitHashKey"] = "exp1"; + record["PartitionKey"] = "part1"; + records.push_back(record); + record["Data"] = nlohmann::json::binary({116,105,99,107,101,114,83,121, + 109,98,111,108,66,82,75,46, + 65,116,114,97,100,101,84,121, + 112,101,83,69,76,76,112,114, + 105,99,101,50,53,49,54,50, + 48,46,0,0,113,117,97,110, + 116,105,116,121,51}, 42); + record["ExplicitHashKey"] = "exp2"; + record["PartitionKey"] = "part2"; + records.push_back(record); + NKikimr::NHttpProxy::NlohmannJsonToProto(jsonValue, &message); + + UNIT_ASSERT_VALUES_EQUAL(message.stream_name(), "stream"); + + UNIT_ASSERT_VALUES_EQUAL(message.records(0).explicit_hash_key(), "exp0"); + UNIT_ASSERT_VALUES_EQUAL(message.records(0).partition_key(), "part0"); + UNIT_ASSERT_VALUES_EQUAL(message.records(0).data(), + "{\"tickerSymbol\":\"BRK.A\",\"tradeType\":\"SELL\",\"price\":251620.11,\"quantity\":3824,\"id\":6}"); + UNIT_ASSERT_VALUES_EQUAL(message.records(1).explicit_hash_key(), "exp1"); + UNIT_ASSERT_VALUES_EQUAL(message.records(1).partition_key(), "part1"); + UNIT_ASSERT_VALUES_EQUAL(message.records(1).data(), + "{\"tickerSymbol\":\"BRK.A\",\"tradeType\":\"SELL\",\"price\":251620.11,\"quantity\":3124,\"id\":2}"); + // This one last record is just an array of bytes with 0 bytes in it + UNIT_ASSERT_VALUES_EQUAL(message.records(2).explicit_hash_key(), "exp2"); + UNIT_ASSERT_VALUES_EQUAL(message.records(2).partition_key(), "part2"); + std::string binaryWithNull{'t','i','c','k','e','r','S','y','m','b','o','l', + 'B','R','K','.','A','t','r','a','d','e','T','y', + 'p','e','S','E','L','L','p','r','i','c','e','2', + '5','1','6','2','0','.','\0','\0','q','u','a','n', + 't','i','t','y','3'}; + UNIT_ASSERT_VALUES_EQUAL(message.records(2).data().size(), binaryWithNull.size()); + for (size_t i = 0; i < binaryWithNull.size(); ++i) { + UNIT_ASSERT_VALUES_EQUAL(binaryWithNull[i], message.records(2).data()[i]); + } + } + + { + Ydb::DataStreams::V1::PutRecordsRequest message; + nlohmann::json jsonValue; + jsonValue["StreamName"] = "stream"; + auto& records = jsonValue["Records"]; + nlohmann::json record; + record["Data"] = "MTIzCg=="; + record["ExplicitHashKey"] = "exp0"; + record["PartitionKey"] = "part0"; + records.push_back(record); + + NKikimr::NHttpProxy::NlohmannJsonToProto(jsonValue, &message); + + UNIT_ASSERT_VALUES_EQUAL(message.stream_name(), "stream"); + + UNIT_ASSERT_VALUES_EQUAL(message.records(0).data(), "123\n"); + UNIT_ASSERT_VALUES_EQUAL(message.records(0).explicit_hash_key(), "exp0"); + UNIT_ASSERT_VALUES_EQUAL(message.records(0).partition_key(), "part0"); + } + +} } // Y_UNIT_TEST_SUITE(JsonProtoConversion) diff --git a/ydb/core/io_formats/CMakeLists.txt b/ydb/core/io_formats/CMakeLists.txt index 45aa4994be..12ffede84b 100644 --- a/ydb/core/io_formats/CMakeLists.txt +++ b/ydb/core/io_formats/CMakeLists.txt @@ -6,9 +6,11 @@ # original buildsystem will not be accepted. +add_subdirectory(ut) add_library(ydb-core-io_formats) target_compile_options(ydb-core-io_formats PRIVATE + -Wno-unused-parameter -DUSE_CURRENT_UDF_ABI_VERSION ) target_link_libraries(ydb-core-io_formats PUBLIC diff --git a/ydb/core/io_formats/csv.h b/ydb/core/io_formats/csv.h index d3e75a487d..fe30b3a6c1 100644 --- a/ydb/core/io_formats/csv.h +++ b/ydb/core/io_formats/csv.h @@ -23,17 +23,27 @@ public: /// If header is true read column names from first line after skipRows. Parse columns as strings in this case. /// @note It's possible to skip header with skipRows and use typed columns instead. - TArrowCSV(const TVector<std::pair<TString, NScheme::TTypeId>>& columns, - ui32 skipRows = 0, bool header = false, ui32 blockSize = DEFAULT_BLOCK_SIZE); + TArrowCSV(const TVector<std::pair<TString, NScheme::TTypeId>>& columns, bool header = false); std::shared_ptr<arrow::RecordBatch> ReadNext(const TString& csv, TString& errString); + std::shared_ptr<arrow::RecordBatch> ReadSingleBatch(const TString& csv, TString& errString); void Reset() { Reader = {}; } - void SetDelimiter(char delimiter = ',') { - ParseOptions.delimiter = delimiter; + void SetSkipRows(ui32 skipRows) { + ReadOptions.skip_rows = skipRows; + } + + void SetBlockSize(ui32 blockSize = DEFAULT_BLOCK_SIZE) { + ReadOptions.block_size = blockSize; + } + + void SetDelimiter(std::optional<char> delimiter) { + if (delimiter) { + ParseOptions.delimiter = *delimiter; + } } void SetQuoting(bool quoting = true, char quoteChar = '"', bool doubleQuote = true) { @@ -47,17 +57,7 @@ public: ParseOptions.escape_char = escapeChar; } - void SetNullValue(const TString& null) { - if (!null.empty()) { - ConvertOptions.null_values = { std::string(null.data(), null.size()) }; - ConvertOptions.strings_can_be_null = true; - ConvertOptions.quoted_strings_can_be_null = true; - } else { - ConvertOptions.null_values.clear(); - ConvertOptions.strings_can_be_null = false; - ConvertOptions.quoted_strings_can_be_null = true; - } - } + void SetNullValue(const TString& null = ""); private: arrow::csv::ReadOptions ReadOptions; @@ -65,6 +65,13 @@ private: arrow::csv::ConvertOptions ConvertOptions; std::shared_ptr<arrow::csv::StreamingReader> Reader; std::vector<TString> ResultColumns; + std::unordered_map<std::string, std::shared_ptr<arrow::DataType>> OriginalColumnTypes; + + std::shared_ptr<arrow::RecordBatch> ConvertColumnTypes(std::shared_ptr<arrow::RecordBatch> parsedBatch) const; + + static TString ErrorPrefix() { + return "Cannot read CSV: "; + } }; } diff --git a/ydb/core/io_formats/csv_arrow.cpp b/ydb/core/io_formats/csv_arrow.cpp index 6a79825e56..90c55409c5 100644 --- a/ydb/core/io_formats/csv_arrow.cpp +++ b/ydb/core/io_formats/csv_arrow.cpp @@ -1,18 +1,56 @@ #include "csv.h" #include <ydb/core/formats/arrow_helpers.h> +#include <contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h> +#include <contrib/libs/apache/arrow/cpp/src/arrow/util/value_parsing.h> namespace NKikimr::NFormats { -TArrowCSV::TArrowCSV(const TVector<std::pair<TString, NScheme::TTypeId>>& columns, ui32 skipRows, bool header, - ui32 blockSize) +namespace { +class TimestampIntParser: public arrow::TimestampParser { +public: + TimestampIntParser() {} + + bool operator()(const char* s, size_t length, arrow::TimeUnit::type out_unit, + int64_t* out) const override { + int64_t unitsCount; + if (!TryFromString(TString(s, length), unitsCount)) { + return false; + } + *out = unitsCount; + switch (out_unit) { + case arrow::TimeUnit::NANO: + *out *= 1000000000; + break; + case arrow::TimeUnit::MICRO: + *out *= 1000000; + break; + case arrow::TimeUnit::MILLI: + *out *= 1000; + break; + case arrow::TimeUnit::SECOND: + *out *= 1; + break; + } + return true; + } + + const char* kind() const override { return "ts_int"; } +}; + +} + +TArrowCSV::TArrowCSV(const TVector<std::pair<TString, NScheme::TTypeId>>& columns, bool header) : ReadOptions(arrow::csv::ReadOptions::Defaults()) , ParseOptions(arrow::csv::ParseOptions::Defaults()) , ConvertOptions(arrow::csv::ConvertOptions::Defaults()) { ConvertOptions.check_utf8 = false; - ReadOptions.block_size = blockSize; + ConvertOptions.timestamp_parsers.clear(); + ConvertOptions.timestamp_parsers.emplace_back(arrow::TimestampParser::MakeISO8601()); + ConvertOptions.timestamp_parsers.emplace_back(std::make_shared<TimestampIntParser>()); + + ReadOptions.block_size = DEFAULT_BLOCK_SIZE; ReadOptions.use_threads = false; - ReadOptions.skip_rows = skipRows; ReadOptions.autogenerate_column_names = false; if (header) { // !autogenerate + column_names.empty() => read from CSV @@ -21,7 +59,8 @@ TArrowCSV::TArrowCSV(const TVector<std::pair<TString, NScheme::TTypeId>>& column for (auto& [name, type] : columns) { ResultColumns.push_back(name); std::string columnName(name.data(), name.size()); - ConvertOptions.column_types[columnName] = NArrow::GetArrowType(type); + ConvertOptions.column_types[columnName] = NArrow::GetCSVArrowType(type); + OriginalColumnTypes[columnName] = NArrow::GetArrowType(type); } } else if (!columns.empty()) { // !autogenerate + !column_names.empty() => specified columns @@ -30,39 +69,143 @@ TArrowCSV::TArrowCSV(const TVector<std::pair<TString, NScheme::TTypeId>>& column for (auto& [name, type] : columns) { std::string columnName(name.data(), name.size()); ReadOptions.column_names.push_back(columnName); - ConvertOptions.column_types[columnName] = NArrow::GetArrowType(type); + ConvertOptions.column_types[columnName] = NArrow::GetCSVArrowType(type); + OriginalColumnTypes[columnName] = NArrow::GetArrowType(type); } +#if 0 } else { ReadOptions.autogenerate_column_names = true; +#endif } + + SetNullValue(); // set default null value +} + +std::shared_ptr<arrow::RecordBatch> TArrowCSV::ConvertColumnTypes(std::shared_ptr<arrow::RecordBatch> parsedBatch) const { + if (!parsedBatch) { + return parsedBatch; + } + std::shared_ptr<arrow::Schema> schema; + { + arrow::SchemaBuilder sBuilder; + for (auto&& f : parsedBatch->schema()->fields()) { + Y_VERIFY(sBuilder.AddField(std::make_shared<arrow::Field>(f->name(), f->type())).ok()); + + } + auto resultSchema = sBuilder.Finish(); + Y_VERIFY(resultSchema.ok()); + schema = *resultSchema; + } + + std::vector<std::shared_ptr<arrow::Array>> resultColumns; + std::set<std::string> columnsFilter(ResultColumns.begin(), ResultColumns.end()); + for (auto&& f : schema->fields()) { + auto fArr = parsedBatch->GetColumnByName(f->name()); + std::shared_ptr<arrow::DataType> originalType; + if (columnsFilter.contains(f->name()) || columnsFilter.empty()) { + auto it = OriginalColumnTypes.find(f->name()); + Y_VERIFY(it != OriginalColumnTypes.end()); + originalType = it->second; + } else { + originalType = f->type(); + } + if (fArr->type()->Equals(originalType)) { + resultColumns.emplace_back(fArr); + } else if (fArr->type()->id() == arrow::TimestampType::type_id) { + arrow::Result<std::shared_ptr<arrow::Array>> arrResult; + { + std::shared_ptr<arrow::TimestampArray> i64Arr = std::make_shared<arrow::TimestampArray>(fArr->data()); + if (originalType->id() == arrow::UInt16Type::type_id) { + arrow::UInt16Builder aBuilder; + Y_VERIFY(aBuilder.Reserve(parsedBatch->num_rows()).ok()); + for (long i = 0; i < parsedBatch->num_rows(); ++i) { + if (i64Arr->IsNull(i)) { + Y_VERIFY(aBuilder.AppendNull().ok()); + } else { + aBuilder.UnsafeAppend(i64Arr->Value(i) / 86400ull); + } + } + arrResult = aBuilder.Finish(); + } else if (originalType->id() == arrow::UInt32Type::type_id) { + arrow::UInt32Builder aBuilder; + Y_VERIFY(aBuilder.Reserve(parsedBatch->num_rows()).ok()); + for (long i = 0; i < parsedBatch->num_rows(); ++i) { + if (i64Arr->IsNull(i)) { + Y_VERIFY(aBuilder.AppendNull().ok()); + } else { + aBuilder.UnsafeAppend(i64Arr->Value(i)); + } + } + arrResult = aBuilder.Finish(); + } else { + Y_VERIFY(false); + } + } + Y_VERIFY(arrResult.ok()); + resultColumns.emplace_back(*arrResult); + } else { + Y_VERIFY(false); + } + } + + return arrow::RecordBatch::Make(schema, parsedBatch->num_rows(), resultColumns); } std::shared_ptr<arrow::RecordBatch> TArrowCSV::ReadNext(const TString& csv, TString& errString) { - if (!Reader && csv.Size()) { + if (!Reader) { + if (ConvertOptions.column_types.empty()) { + errString = ErrorPrefix() + "no columns specified"; + return {}; + } + auto buffer = std::make_shared<NArrow::TBufferOverString>(csv); auto input = std::make_shared<arrow::io::BufferReader>(buffer); auto res = arrow::csv::StreamingReader::Make(arrow::io::default_io_context(), input, ReadOptions, ParseOptions, ConvertOptions); if (!res.ok()) { - errString = TStringBuilder() << "Cannot read CSV: " << res.status().ToString(); + errString = ErrorPrefix() + res.status().ToString(); return {}; } Reader = *res; } if (!Reader) { - errString = "Cannot read CSV: no reader"; + errString = ErrorPrefix() + "cannot make reader"; return {}; } - std::shared_ptr<arrow::RecordBatch> batch; - Reader->ReadNext(&batch).ok(); + std::shared_ptr<arrow::RecordBatch> batchParsed; + Reader->ReadNext(&batchParsed).ok(); + + std::shared_ptr<arrow::RecordBatch> batch = ConvertColumnTypes(batchParsed); if (batch && !ResultColumns.empty()) { batch = NArrow::ExtractColumns(batch, ResultColumns); if (!batch) { - errString = "Cannot read CSV: not all result columns present"; + errString = ErrorPrefix() + "not all result columns present"; + } + } + return batch; +} + +void TArrowCSV::SetNullValue(const TString& null) { + ConvertOptions.null_values = { std::string(null.data(), null.size()) }; + ConvertOptions.strings_can_be_null = true; + ConvertOptions.quoted_strings_can_be_null = false; +} + +std::shared_ptr<arrow::RecordBatch> TArrowCSV::ReadSingleBatch(const TString& csv, TString& errString) { + auto batch = ReadNext(csv, errString); + if (!batch) { + if (errString.empty()) { + errString = ErrorPrefix(); } + return {}; + } + + if (ReadNext(csv, errString)) { + errString = ErrorPrefix() + "too big CSV data portion"; + return {}; } return batch; } diff --git a/ydb/core/io_formats/ut/CMakeLists.darwin.txt b/ydb/core/io_formats/ut/CMakeLists.darwin.txt new file mode 100644 index 0000000000..d7632c83b9 --- /dev/null +++ b/ydb/core/io_formats/ut/CMakeLists.darwin.txt @@ -0,0 +1,48 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-core-io_formats-ut) +target_compile_options(ydb-core-io_formats-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-core-io_formats-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/io_formats +) +target_link_libraries(ydb-core-io_formats-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-core-io_formats + udf-service-exception_policy + yql-sql-pg_dummy +) +target_link_options(ydb-core-io_formats-ut PRIVATE + -Wl,-no_deduplicate + -Wl,-sdk_version,10.15 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(ydb-core-io_formats-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/io_formats/ut_csv.cpp +) +add_test( + NAME + ydb-core-io_formats-ut + COMMAND + ydb-core-io_formats-ut + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(ydb-core-io_formats-ut) diff --git a/ydb/core/io_formats/ut/CMakeLists.linux.txt b/ydb/core/io_formats/ut/CMakeLists.linux.txt new file mode 100644 index 0000000000..3df69f44ed --- /dev/null +++ b/ydb/core/io_formats/ut/CMakeLists.linux.txt @@ -0,0 +1,52 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-core-io_formats-ut) +target_compile_options(ydb-core-io_formats-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_include_directories(ydb-core-io_formats-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/io_formats +) +target_link_libraries(ydb-core-io_formats-ut PUBLIC + contrib-libs-cxxsupp + yutil + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache + library-cpp-cpuid_check + cpp-testing-unittest_main + ydb-core-io_formats + udf-service-exception_policy + yql-sql-pg_dummy +) +target_link_options(ydb-core-io_formats-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-core-io_formats-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/core/io_formats/ut_csv.cpp +) +add_test( + NAME + ydb-core-io_formats-ut + COMMAND + ydb-core-io_formats-ut + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +vcs_info(ydb-core-io_formats-ut) diff --git a/ydb/core/io_formats/ut/CMakeLists.txt b/ydb/core/io_formats/ut/CMakeLists.txt new file mode 100644 index 0000000000..dbfe6fa2c4 --- /dev/null +++ b/ydb/core/io_formats/ut/CMakeLists.txt @@ -0,0 +1,13 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (APPLE) + include(CMakeLists.darwin.txt) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE) + include(CMakeLists.linux.txt) +endif() diff --git a/ydb/core/io_formats/ut_csv.cpp b/ydb/core/io_formats/ut_csv.cpp new file mode 100644 index 0000000000..c04adff5e9 --- /dev/null +++ b/ydb/core/io_formats/ut_csv.cpp @@ -0,0 +1,349 @@ +#include "csv.h" + +#include <ydb/core/formats/arrow_helpers.h> +#include <library/cpp/testing/unittest/registar.h> + +namespace NKikimr::NFormats { + +namespace { + +TString MakeHeader(const TVector<std::pair<TString, NScheme::TTypeId>>& columns, char delimiter) { + TString header; + for (auto& [name, _] : columns) { + header += name + delimiter; + } + if (header.size()) { + header.resize(header.size() - 1); + } + return header; +} + +TString TestIntsData(const TVector<std::pair<TString, NScheme::TTypeId>>& columns, ui32 numRows, + char delimiter, TString endLine = "\n", bool addEmptyLine = false) { + TString data; + for (ui32 row = 0; row < numRows; ++row) { + if (data.size()) { + data.resize(data.size() - 1); + data += endLine; + } + for (size_t i = 0; i < columns.size(); ++i) { + data += ToString(row) + delimiter; + } + } + data.resize(data.size() - 1); + if (addEmptyLine) { + data += endLine; + } + return data; +} + +std::shared_ptr<arrow::RecordBatch> +TestReadSingleBatch(TArrowCSV& reader, + const TVector<std::pair<TString, NScheme::TTypeId>>& columns, const TString& data, ui32 numRows) { + TString errorMessage; + auto batch = reader.ReadSingleBatch(data, errorMessage); + if (!errorMessage.empty()) { + Cerr << errorMessage << "\n"; + } + UNIT_ASSERT(batch); + UNIT_ASSERT(errorMessage.empty()); + UNIT_ASSERT(batch->ValidateFull().ok()); + UNIT_ASSERT_EQUAL(batch->num_rows(), numRows); + UNIT_ASSERT_EQUAL((size_t)batch->num_columns(), columns.size()); + + for (size_t i = 0; i < columns.size(); ++i) { + UNIT_ASSERT_EQUAL(columns[i].first, batch->schema()->field(i)->name()); + UNIT_ASSERT(NArrow::GetArrowType(columns[i].second)->Equals(batch->schema()->field(i)->type())); + // TODO: check data + } + return batch; +} + +std::shared_ptr<arrow::RecordBatch> +TestReadSingleBatch(const TVector<std::pair<TString, NScheme::TTypeId>>& columns, const TString& data, + char delimiter, bool header, ui32 numRows, ui32 skipRows = 0, std::optional<char> escape = {}) { + TArrowCSV reader(columns, header); + reader.SetDelimiter(delimiter); + if (skipRows) { + reader.SetSkipRows(skipRows); + } + if (escape) { + reader.SetEscaping(true, *escape); + } + + return TestReadSingleBatch(reader, columns, data, numRows); +} + +} + +Y_UNIT_TEST_SUITE(FormatCSV) { + Y_UNIT_TEST(Instants) { + const TString dateTimeString = "2005-08-09T18:31:42"; + const TString data = "11,12,2013-07-15," + dateTimeString + "," + dateTimeString + ",,,"; + TVector<std::pair<TString, NScheme::TTypeId>> columns; + + { + columns = { + {"datetime_int", NScheme::NTypeIds::Datetime}, + {"timestamp_int", NScheme::NTypeIds::Timestamp}, + {"date", NScheme::NTypeIds::Date}, + {"datetime", NScheme::NTypeIds::Datetime}, + {"timestamp", NScheme::NTypeIds::Timestamp}, + {"date_null", NScheme::NTypeIds::Timestamp}, + {"datetime_null", NScheme::NTypeIds::Timestamp}, + {"timestamp_null", NScheme::NTypeIds::Timestamp}, + }; + TInstant dtInstant; + Y_VERIFY(TInstant::TryParseIso8601(dateTimeString, dtInstant)); + TArrowCSV reader(columns, false); + + TString errorMessage; + auto batch = reader.ReadNext(data, errorMessage); + Cerr << errorMessage << "\n"; + UNIT_ASSERT(!!batch); + UNIT_ASSERT(errorMessage.empty()); + + auto cDateNull = batch->GetColumnByName("date_null"); + auto cDatetimeNull = batch->GetColumnByName("datetime_null"); + auto cTimestampNull = batch->GetColumnByName("timestamp_null"); + + auto cDatetimeInt = batch->GetColumnByName("datetime_int"); + auto cTimestampInt = batch->GetColumnByName("timestamp_int"); + auto cDate = batch->GetColumnByName("date"); + auto cDatetime = batch->GetColumnByName("datetime"); + auto cTimestamp = batch->GetColumnByName("timestamp"); + + Y_VERIFY(cDate->type()->id() == arrow::UInt16Type::type_id); + Y_VERIFY(cDatetime->type()->id() == arrow::UInt32Type::type_id); + Y_VERIFY(cTimestamp->type()->id() == arrow::TimestampType::type_id); + Y_VERIFY(cDatetimeInt->type()->id() == arrow::UInt32Type::type_id); + Y_VERIFY(cTimestampInt->type()->id() == arrow::TimestampType::type_id); + Y_VERIFY(batch->num_rows() == 1); + + { + auto& ui16Column = static_cast<arrow::UInt32Array&>(*cDateNull); + Y_VERIFY(ui16Column.IsNull(0)); + } + { + auto& ui32Column = static_cast<arrow::UInt32Array&>(*cDatetimeNull); + Y_VERIFY(ui32Column.IsNull(0)); + } + { + auto& tsColumn = static_cast<arrow::TimestampArray&>(*cTimestampNull); + Y_VERIFY(tsColumn.IsNull(0)); + } + { + auto& ui32Column = static_cast<arrow::UInt32Array&>(*cDatetimeInt); + Y_VERIFY(ui32Column.Value(0) == 11, "%d", ui32Column.Value(0)); + } + { + auto& tsColumn = static_cast<arrow::TimestampArray&>(*cTimestampInt); + Cerr << tsColumn.Value(0) << Endl; + Y_VERIFY(tsColumn.Value(0) == 12 * 1000000); + } + auto& ui16Column = static_cast<arrow::UInt16Array&>(*cDate); + Y_VERIFY(ui16Column.Value(0) == 15901, "%d", ui16Column.Value(0)); + auto& ui32Column = static_cast<arrow::UInt32Array&>(*cDatetime); + Y_VERIFY(ui32Column.Value(0) == dtInstant.Seconds(), "%d", ui32Column.Value(0)); + auto& tsColumn = static_cast<arrow::TimestampArray&>(*cTimestamp); + Y_VERIFY(tsColumn.Value(0) == (i64)dtInstant.MicroSeconds()); + } + } + + Y_UNIT_TEST(EmptyData) { + TString data = ""; + TVector<std::pair<TString, NScheme::TTypeId>> columns; + + { + TArrowCSV reader(columns, false); + + TString errorMessage; + auto batch = reader.ReadNext(data, errorMessage); + Cerr << errorMessage << "\n"; + UNIT_ASSERT(!batch); + UNIT_ASSERT(!errorMessage.empty()); + } + + { + columns = { + {"u32", NScheme::NTypeIds::Uint32}, + {"i64", NScheme::NTypeIds::Int64} + }; + + TArrowCSV reader(columns, false); + + TString errorMessage; + auto batch = reader.ReadNext(data, errorMessage); + Cerr << errorMessage << "\n"; + UNIT_ASSERT(!batch); + UNIT_ASSERT(!errorMessage.empty()); + } + } + + Y_UNIT_TEST(Common) { + TVector<std::pair<TString, NScheme::TTypeId>> columns = { + {"u8", NScheme::NTypeIds::Uint8}, + {"u16", NScheme::NTypeIds::Uint16}, + {"u32", NScheme::NTypeIds::Uint32}, + {"u64", NScheme::NTypeIds::Uint64}, + {"i8", NScheme::NTypeIds::Int8}, + {"i16", NScheme::NTypeIds::Int16}, + {"i32", NScheme::NTypeIds::Int32}, + {"i64", NScheme::NTypeIds::Int64} + }; + + // half of columns + auto uColumns = columns; + uColumns.resize(columns.size() / 2); + + // another half of columns + TVector<std::pair<TString, NScheme::TTypeId>> sColumns( + columns.begin() + (columns.size() / 2), columns.end()); + + std::vector<char> delimiters = {',', ';', '\t'}; + std::vector<TString> endlines = {"\n", "\r\n", "\r"}; + bool addEmptyLine = false; + ui32 numRows = 10; + + for (auto& endLine : endlines) { + for (auto delim : delimiters) { + // no header + addEmptyLine = !addEmptyLine; + TString csv = TestIntsData(columns, numRows, delim, endLine, addEmptyLine); + TestReadSingleBatch(columns, csv, delim, false, numRows); + + // header, all columns + TString header = MakeHeader(columns, delim); + TestReadSingleBatch(columns, header + endLine + csv, delim, true, numRows); + + // header, skip rows, all columns + TestReadSingleBatch(columns, TString("line1") + endLine + "line2" + endLine + header + endLine + csv, + delim, true, numRows, 2); + + // header, some columns + TestReadSingleBatch(uColumns, header + endLine + csv, delim, true, numRows); + TestReadSingleBatch(sColumns, header + endLine + csv, delim, true, numRows); + + // header, skip rows, some columns + TestReadSingleBatch(uColumns, endLine + header + endLine + csv, delim, true, numRows, 1); + TestReadSingleBatch(sColumns, endLine + header + endLine + csv, delim, true, numRows, 1); + } + } + } + + Y_UNIT_TEST(Strings) { + TVector<std::pair<TString, NScheme::TTypeId>> columns = { + {"string", NScheme::NTypeIds::String}, + {"utf8", NScheme::NTypeIds::Utf8} + }; + + // TODO: SetQuoting + std::vector<TString> quotes = {"\"", "\'", ""}; + + char delimiter = ','; + TString endLine = "\n"; + + for (auto& q : quotes) { + TString csv; + csv += q + "aaa" + q + delimiter + q + "bbbbb" + q + endLine; + csv += q + "123" + q + delimiter + q + "456" + q + endLine; + csv += q + "+-/*=" + q + delimiter + q + "~!@#$%^&*()?" + q + endLine; + + TestReadSingleBatch(columns, csv, delimiter, false, 3); + } + + for (auto& q : quotes) { + TString csv; + csv += q + "d\\'Artagnan" + q + delimiter + q + "Jeanne d'Arc" + q + endLine; + csv += q + "\\\'\\\"\\\'" + q + delimiter + q + "\\\"\\\'\\\"" + q + endLine; + + auto batch = TestReadSingleBatch(columns, csv, delimiter, false, 2, 0, '\\'); + for (auto& col : batch->columns()) { + auto& typedColumn = static_cast<arrow::BinaryArray&>(*col); + for (int i = 0; i < typedColumn.length(); ++i) { + auto view = typedColumn.GetView(i); + std::string_view value(view.data(), view.size()); + Cerr << value << "\n"; + } + } + } + } + + Y_UNIT_TEST(Nulls) { + TVector<std::pair<TString, NScheme::TTypeId>> columns = { + {"u32", NScheme::NTypeIds::Uint32}, + {"string", NScheme::NTypeIds::String}, + {"utf8", NScheme::NTypeIds::Utf8} + }; + + std::vector<TString> nulls = {"", "", "\\N", "NULL"}; + bool defaultNull = true; + + char delimiter = ','; + TString endLine = "\n"; + TString q = "\""; + + std::string nullChar = "ᴺᵁᴸᴸ"; + + for (auto& null : nulls) { + TString csv; + csv += TString() + null + delimiter + q + q + delimiter + q + q + endLine; + csv += TString() + null + delimiter + q + null + q + delimiter + q + null + q + endLine; + csv += TString() + null + delimiter + null + delimiter + null + endLine; + + TArrowCSV reader(columns, false); + if (!nulls.empty() || !defaultNull) { + reader.SetNullValue(null); + } else { + defaultNull = false; + } + + auto batch = TestReadSingleBatch(reader, columns, csv, 3); + + Cerr << "src:\n" << csv; + + auto& ui32Column = static_cast<arrow::UInt32Array&>(*batch->columns()[0]); + auto& strColumn = static_cast<arrow::BinaryArray&>(*batch->columns()[1]); + auto& utf8Column = static_cast<arrow::StringArray&>(*batch->columns()[2]); + + Cerr << "parsed:\n"; + + for (int i = 0; i < batch->num_rows(); ++i) { + if (ui32Column.IsNull(i)) { + Cerr << nullChar << delimiter; + } else { + Cerr << ui32Column.Value(i) << delimiter; + } + + if (strColumn.IsNull(i)) { + Cerr << nullChar << delimiter; + } else { + auto view = strColumn.GetView(i); + std::string_view value(view.data(), view.size()); + Cerr << value << delimiter; + } + + if (utf8Column.IsNull(i)) { + Cerr << nullChar << "\n"; + } else { + auto view = utf8Column.GetView(i); + std::string_view value(view.data(), view.size()); + Cerr << value << "\n"; + } + + UNIT_ASSERT(ui32Column.IsNull(i)); + UNIT_ASSERT(i == 2 || !strColumn.IsNull(i)); + UNIT_ASSERT(i == 2 || !utf8Column.IsNull(i)); + UNIT_ASSERT(i != 2 || strColumn.IsNull(i)); + UNIT_ASSERT(i != 2 || utf8Column.IsNull(i)); + } + } + } +#if 0 + Y_UNIT_TEST(Dates) { + // TODO + } +#endif +} + +} diff --git a/ydb/core/keyvalue/keyvalue_collector.cpp b/ydb/core/keyvalue/keyvalue_collector.cpp index 25dc777d24..b4c72dbd14 100644 --- a/ydb/core/keyvalue/keyvalue_collector.cpp +++ b/ydb/core/keyvalue/keyvalue_collector.cpp @@ -15,7 +15,8 @@ namespace NKeyValue { struct TGroupCollector { TDeque<TLogoBlobID> Keep; TDeque<TLogoBlobID> DoNotKeep; - ui32 Step = 0; + ui32 CountOfSentFlags = 0; + ui32 NextCountOfSentFlags = 0; }; class TKeyValueCollector : public TActorBootstrapped<TKeyValueCollector> { @@ -27,6 +28,7 @@ class TKeyValueCollector : public TActorBootstrapped<TKeyValueCollector> { TBackoffTimer BackoffTimer; ui64 CollectorErrors; bool IsSpringCleanup; + bool IsRepeatedRequest = false; // [channel][groupId] TVector<TMap<ui32, TGroupCollector>> CollectorForGroupForChannel; @@ -143,7 +145,8 @@ public: bool isLastRequestInCollector = false; { TGroupCollector &collector = currentCollectorIterator->second; - isLastRequestInCollector = (collector.Step == collector.Keep.size() + collector.DoNotKeep.size()); + collector.CountOfSentFlags = collector.NextCountOfSentFlags; + isLastRequestInCollector = (collector.CountOfSentFlags == collector.Keep.size() + collector.DoNotKeep.size()); } if (isLastRequestInCollector) { STLOG(NLog::PRI_DEBUG, NKikimrServices::KEYVALUE_GC, KVC08, "Last group was empty, it's erased", @@ -180,6 +183,7 @@ public: } // Rertry + IsRepeatedRequest = true; ui64 backoffMs = BackoffTimer.NextBackoffMs(); STLOG(NLog::PRI_DEBUG, NKikimrServices::KEYVALUE_GC, KVC02, "Collector got not OK status, retry", (TabletId, TabletInfo->TabletID), (GroupId, groupId), (Channel, channelId), @@ -230,10 +234,11 @@ public: THolder<TVector<TLogoBlobID>> doNotKeep; TGroupCollector &collector = currentCollectorIterator->second; + collector.NextCountOfSentFlags = collector.CountOfSentFlags; ui32 doNotKeepSize = collector.DoNotKeep.size(); - if (collector.Step < doNotKeepSize) { - doNotKeepSize -= collector.Step; + if (collector.NextCountOfSentFlags < doNotKeepSize) { + doNotKeepSize -= collector.NextCountOfSentFlags; } else { doNotKeepSize = 0; } @@ -241,17 +246,19 @@ public: if (doNotKeepSize) { doNotKeepSize = Min(doNotKeepSize, (ui32)MaxCollectGarbageFlagsPerMessage); doNotKeep.Reset(new TVector<TLogoBlobID>(doNotKeepSize)); - auto begin = collector.DoNotKeep.begin() + collector.Step; + auto begin = collector.DoNotKeep.begin() + collector.NextCountOfSentFlags; auto end = begin + doNotKeepSize; - collector.Step += doNotKeepSize; + collector.NextCountOfSentFlags += doNotKeepSize; Copy(begin, end, doNotKeep->begin()); - Copy(doNotKeep->cbegin(), doNotKeep->cend(), std::back_inserter(CollectedDoNotKeep)); + if (!IsRepeatedRequest) { + Copy(doNotKeep->cbegin(), doNotKeep->cend(), std::back_inserter(CollectedDoNotKeep)); + } } ui32 keepStartIdx = 0; - if (collector.Step >= collector.DoNotKeep.size()) { - keepStartIdx = collector.Step - collector.DoNotKeep.size(); + if (collector.NextCountOfSentFlags >= collector.DoNotKeep.size()) { + keepStartIdx = collector.NextCountOfSentFlags - collector.DoNotKeep.size(); } ui32 keepSize = Min(collector.Keep.size() - keepStartIdx, MaxCollectGarbageFlagsPerMessage - doNotKeepSize); if (keepSize) { @@ -269,10 +276,10 @@ public: } (*keep)[idx] = *it; } - collector.Step += idx; + collector.NextCountOfSentFlags += idx; } - bool isLast = (collector.Keep.size() + collector.DoNotKeep.size() == collector.Step); + bool isLast = (collector.Keep.size() + collector.DoNotKeep.size() == collector.NextCountOfSentFlags); ui32 collectGeneration = CollectOperation->Header.CollectGeneration; ui32 collectStep = CollectOperation->Header.CollectStep; @@ -288,6 +295,7 @@ public: new TEvBlobStorage::TEvCollectGarbage(TabletInfo->TabletID, RecordGeneration, PerGenerationCounter, channelIdx, isLast, collectGeneration, collectStep, keep.Release(), doNotKeep.Release(), TInstant::Max(), true), (ui64)TKeyValueState::ECollectCookie::Soft); + IsRepeatedRequest = false; } void HandleContinueGC(TEvKeyValue::TEvContinueGC::TPtr &ev) { diff --git a/ydb/core/keyvalue/keyvalue_collector_ut.cpp b/ydb/core/keyvalue/keyvalue_collector_ut.cpp index 835f1be056..94d8314b8e 100644 --- a/ydb/core/keyvalue/keyvalue_collector_ut.cpp +++ b/ydb/core/keyvalue/keyvalue_collector_ut.cpp @@ -100,6 +100,10 @@ public: TEvent* GrabEvent(TAutoPtr<IEventHandle>& handle) { return Runtime->GrabEdgeEventRethrow<TEvent>(handle); } + + void AllowSchedule(TActorId actorId) { + Runtime->EnableScheduleForActor(actorId); + } }; @@ -169,6 +173,54 @@ Y_UNIT_TEST(TestKeyValueCollectorSingle) { UNIT_ASSERT(eraseCollect); } +Y_UNIT_TEST(TestKeyValueCollectorSingleWithOneError) { + TContext context; + context.Setup(); + + TVector<TLogoBlobID> keep; + keep.emplace_back(0x10010000001000Bull, 5, 58949, NKeyValue::BLOB_CHANNEL, 1209816, 10); + TVector<TLogoBlobID> doNotKeep; + TIntrusivePtr<NKeyValue::TCollectOperation> operation(new NKeyValue::TCollectOperation(100, 100, std::move(keep), std::move(doNotKeep))); + context.SetActor(CreateKeyValueCollector( + context.GetTabletActorId(), operation, context.GetTabletInfo().Get(), 200, 200, true)); + + ui32 erased = 0; + for (ui32 idx = 0; idx < 6; ++idx) { + TAutoPtr<IEventHandle> handle; + auto collect = context.GrabEvent<TEvBlobStorage::TEvCollectGarbage>(handle); + UNIT_ASSERT(collect); + if (handle->Recipient == context.GetProxyActorId(NKeyValue::BLOB_CHANNEL, 5)) { + UNIT_ASSERT(collect->Keep.Get()); + UNIT_ASSERT(collect->Keep->size() == 1); + auto keep = *collect->Keep; + ui32 generation = (*collect->Keep)[0].Generation(); + UNIT_ASSERT(handle->Recipient == context.GetProxyActorId(collect->Channel, generation)); + context.AllowSchedule(handle->Sender); + + context.Send(new TEvBlobStorage::TEvCollectGarbageResult(NKikimrProto::ERROR, collect->TabletId, + collect->RecordGeneration, collect->PerGenerationCounter, collect->Channel)); + collect = context.GrabEvent<TEvBlobStorage::TEvCollectGarbage>(handle); + UNIT_ASSERT(collect->Keep.Get()); + UNIT_ASSERT(collect->Keep->size() == 1); + UNIT_ASSERT(keep == *collect->Keep); + generation = (*collect->Keep)[0].Generation(); + UNIT_ASSERT(handle->Recipient == context.GetProxyActorId(collect->Channel, generation)); + + ++erased; + } else { + UNIT_ASSERT(!collect->Keep.Get()); + } + + context.Send(new TEvBlobStorage::TEvCollectGarbageResult(NKikimrProto::OK, collect->TabletId, + collect->RecordGeneration, collect->PerGenerationCounter, collect->Channel)); + } + UNIT_ASSERT(erased == 1); + + TAutoPtr<IEventHandle> handle; + auto eraseCollect = context.GrabEvent<TEvKeyValue::TEvCompleteGC>(handle); + UNIT_ASSERT(eraseCollect); +} + Y_UNIT_TEST(TestKeyValueCollectorMultiple) { TContext context; context.Setup(); @@ -199,6 +251,12 @@ Y_UNIT_TEST(TestKeyValueCollectorMultiple) { TAutoPtr<IEventHandle> handle; auto collect = context.GrabEvent<TEvBlobStorage::TEvCollectGarbage>(handle); UNIT_ASSERT(collect); + if (collect->DoNotKeep && collect->DoNotKeep->size()) { + context.AllowSchedule(handle->Sender); + context.Send(new TEvBlobStorage::TEvCollectGarbageResult(NKikimrProto::ERROR, collect->TabletId, + collect->RecordGeneration, collect->PerGenerationCounter, collect->Channel)); + collect = context.GrabEvent<TEvBlobStorage::TEvCollectGarbage>(handle); + } bool isPresent = false; for (auto it = ids.begin(); it != ids.end(); ++it) { if (handle->Recipient == context.GetProxyActorId(it->Channel(), it->Generation())) { @@ -220,6 +278,9 @@ Y_UNIT_TEST(TestKeyValueCollectorMultiple) { if (collect && collect->DoNotKeep && collect->DoNotKeep->size()) { auto complete = context.GrabEvent<TEvKeyValue::TEvPartialCompleteGC>(handle); + THashSet<TLogoBlobID> uniqueBlobs; + uniqueBlobs.insert(complete->CollectedDoNotKeep.begin(), complete->CollectedDoNotKeep.end()); + UNIT_ASSERT_VALUES_EQUAL(uniqueBlobs.size(), complete->CollectedDoNotKeep.size()); complete->CollectedDoNotKeep.clear(); auto cont = std::make_unique<TEvKeyValue::TEvContinueGC>(std::move(complete->CollectedDoNotKeep)); context.Send(cont.release()); diff --git a/ydb/core/kqp/kqp_compile_actor.cpp b/ydb/core/kqp/kqp_compile_actor.cpp index 19c3449656..737ba733c3 100644 --- a/ydb/core/kqp/kqp_compile_actor.cpp +++ b/ydb/core/kqp/kqp_compile_actor.cpp @@ -270,6 +270,9 @@ private: if (status == Ydb::StatusIds::SUCCESS) { YQL_ENSURE(kqpResult.PreparingQuery); + if (Config->EnableLlvm.Get()) { + kqpResult.PreparingQuery->SetEnableLlvm(*Config->EnableLlvm.Get()); + } KqpCompileResult->PreparedQuery.reset(kqpResult.PreparingQuery.release()); KqpCompileResult->QueryTraits = kqpResult.QueryTraits; @@ -341,6 +344,9 @@ private: if (status == Ydb::StatusIds::SUCCESS && !FailForcedNewEngineCompilationStatus.load(std::memory_order_relaxed)) { YQL_ENSURE(kqpResult.PreparingQuery); + if (Config->EnableLlvm.Get()) { + kqpResult.PreparingQuery->SetEnableLlvm(*Config->EnableLlvm.Get()); + } KqpCompileResult->PreparedQueryNewEngine.reset(kqpResult.PreparingQuery.release()); auto duration = TInstant::Now() - RecompileStartTime; diff --git a/ydb/core/kqp/kqp_ic_gateway.cpp b/ydb/core/kqp/kqp_ic_gateway.cpp index 9b479ad065..d6c817f93c 100644 --- a/ydb/core/kqp/kqp_ic_gateway.cpp +++ b/ydb/core/kqp/kqp_ic_gateway.cpp @@ -2430,6 +2430,10 @@ private: toType->set_type_id(Ydb::Type::UINT32); toValue->set_uint32_value(FromString<ui32>(fromValue)); break; + case NYql::EDataSlot::Int64: + toType->set_type_id(Ydb::Type::INT64); + toValue->set_int64_value(FromString<i64>(fromValue)); + break; case NYql::EDataSlot::Uint64: toType->set_type_id(Ydb::Type::UINT64); toValue->set_uint64_value(FromString<ui64>(fromValue)); diff --git a/ydb/core/kqp/kqp_session_actor.cpp b/ydb/core/kqp/kqp_session_actor.cpp index 371536db99..a75513b9f3 100644 --- a/ydb/core/kqp/kqp_session_actor.cpp +++ b/ydb/core/kqp/kqp_session_actor.cpp @@ -597,6 +597,12 @@ public: QueryState->TxCtx->SnapshotHandle.ManagingActor = snapMgrActorId; } + void DiscardPersistentSnapshot(const IKqpGateway::TKqpSnapshotHandle& handle) { + if (handle.ManagingActor) { // persistent snapshot was acquired + Send(handle.ManagingActor, new TEvKqpSnapshot::TEvDiscardSnapshot(handle.Snapshot)); + } + } + void AcquireMvccSnapshot() { LOG_D("AcquireMvccSnapshot"); auto timeout = QueryState->QueryDeadlines.TimeoutAt - TAppData::TimeProvider->Now(); @@ -854,6 +860,8 @@ public: request.DisableLlvmForUdfStages = Config->DisableLlvmForUdfStages(); request.LlvmEnabled = Config->GetEnableLlvm() != EOptionalFlag::Disabled; YQL_ENSURE(queryState); + bool enableLlvm = queryState->PreparedQuery->HasEnableLlvm() ? queryState->PreparedQuery->GetEnableLlvm() : true; + request.LlvmEnabled = enableLlvm && QueryState->Request.GetType() == NKikimrKqp::QUERY_TYPE_SQL_SCAN; request.Snapshot = queryState->TxCtx->GetSnapshot(); return request; @@ -1190,8 +1198,6 @@ public: LOG_I(SelfId() << " " << requestInfo << " TEvTxResponse has non-success status, CurrentTx: " << QueryState->CurrentTx << " response->DebugString(): " << response->DebugString()); - QueryState->TxCtx->Invalidate(); - auto status = response->GetStatus(); TIssues issues; IssuesFromMessage(response->GetIssues(), issues); @@ -1819,8 +1825,12 @@ public: void Cleanup(bool isFinal = false) { isFinal = isFinal || !QueryState->KeepSession; - if (QueryState && QueryState->TxCtx && QueryState->TxCtx->IsInvalidated()) { - InvalidateExplicitTransaction(QueryState->TxCtx, QueryState->TxId); + if (QueryState && QueryState->TxCtx) { + auto& txCtx = QueryState->TxCtx; + if (txCtx->IsInvalidated()) { + InvalidateExplicitTransaction(QueryState->TxCtx, QueryState->TxId); + } + DiscardPersistentSnapshot(txCtx->SnapshotHandle); } if (isFinal) @@ -1943,6 +1953,7 @@ public: auto* response = QueryResponse->Record.GetRef().MutableResponse(); + Y_ENSURE(QueryState); if (QueryState->CompileResult) { AddQueryIssues(*response, QueryState->CompileResult->Issues); } @@ -1957,14 +1968,13 @@ public: IssueToMessage(TIssue{message}, response->AddQueryIssues()); } - if (QueryState) { - if (QueryState->TxCtx) { - QueryState->TxCtx->Invalidate(); - } - - FillTxInfo(response); + if (QueryState->TxCtx) { + QueryState->TxCtx->OnEndQuery(); + QueryState->TxCtx->Invalidate(); } + FillTxInfo(response); + Cleanup(IsFatalError(ydbStatus)); } diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp index 808815314e..7fec9ca57f 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp @@ -103,8 +103,9 @@ protected: return output; } - TMaybeNode<TExprBase> BuildReadTableRangesStage(TExprBase node, TExprContext& ctx) { - TExprBase output = KqpBuildReadTableRangesStage(node, ctx, KqpCtx); + TMaybeNode<TExprBase> BuildReadTableRangesStage(TExprBase node, TExprContext& ctx, const TGetParents& getParents) { + auto parents = getParents(); + TExprBase output = KqpBuildReadTableRangesStage(node, ctx, KqpCtx, *parents); DumpAppliedRule("BuildReadTableRangesStage", node.Ptr(), output.Ptr(), ctx); return output; } diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_build_stage.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_build_stage.cpp index 821ecbb2da..a97c65bedb 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_build_stage.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_build_stage.cpp @@ -180,7 +180,7 @@ TExprBase KqpBuildReadTableStage(TExprBase node, TExprContext& ctx, const TKqpOp } TExprBase KqpBuildReadTableRangesStage(TExprBase node, TExprContext& ctx, - const TKqpOptimizeContext& kqpCtx) + const TKqpOptimizeContext& kqpCtx, const TParentsMap& parents) { if (!node.Maybe<TKqlReadTableRanges>()) { return node; @@ -228,6 +228,10 @@ TExprBase KqpBuildReadTableRangesStage(TExprBase node, TExprContext& ctx, if (!input.Maybe<TDqCnUnionAll>()) { return node; } + + if (!IsSingleConsumerConnection(input, parents, false)) { + continue; + } inputs.push_back(input); stageInputs.push_back( diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_rules.h b/ydb/core/kqp/opt/physical/kqp_opt_phy_rules.h index 54eb24f227..9bbe1a746a 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_rules.h +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_rules.h @@ -15,7 +15,7 @@ NYql::NNodes::TExprBase KqpBuildReadTableStage(NYql::NNodes::TExprBase node, NYq const TKqpOptimizeContext& kqpCtx); NYql::NNodes::TExprBase KqpBuildReadTableRangesStage(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx, - const TKqpOptimizeContext& kqpCtx); + const TKqpOptimizeContext& kqpCtx, const NYql::TParentsMap& parents); NYql::NNodes::TExprBase KqpBuildLookupTableStage(NYql::NNodes::TExprBase node, NYql::TExprContext& ctx); diff --git a/ydb/core/kqp/provider/yql_kikimr_exec.cpp b/ydb/core/kqp/provider/yql_kikimr_exec.cpp index 5ff0685bb0..4ee806fbae 100644 --- a/ydb/core/kqp/provider/yql_kikimr_exec.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_exec.cpp @@ -864,6 +864,28 @@ public: TStringBuilder() << "Unknown changefeed format: " << format)); return SyncError(); } + } else if (name == "virtual_timestamps") { + auto vt = TString( + setting.Value().Cast<TCoDataCtor>().Literal().Cast<TCoAtom>().Value() + ); + + add_changefeed->set_virtual_timestamps(FromString<bool>(to_lower(vt))); + } else if (name == "retention_period") { + YQL_ENSURE(setting.Value().Maybe<TCoInterval>()); + const auto value = FromString<i64>( + setting.Value().Cast<TCoInterval>().Literal().Value() + ); + + if (value <= 0) { + ctx.AddError(TIssue(ctx.GetPosition(setting.Name().Pos()), + TStringBuilder() << name << " must be positive")); + return SyncError(); + } + + const auto duration = TDuration::FromValue(value); + auto& retention = *add_changefeed->mutable_retention_period(); + retention.set_seconds(duration.Seconds()); + retention.set_nanos(duration.NanoSecondsOfSecond()); } else if (name == "local") { // nop } else { diff --git a/ydb/core/kqp/ut/CMakeLists.darwin.txt b/ydb/core/kqp/ut/CMakeLists.darwin.txt index 6f8de5cd9d..e3065668c6 100644 --- a/ydb/core/kqp/ut/CMakeLists.darwin.txt +++ b/ydb/core/kqp/ut/CMakeLists.darwin.txt @@ -30,6 +30,8 @@ target_link_libraries(ydb-core-kqp-ut PUBLIC kqp-ut-common cpp-client-draft cpp-client-ydb_proto + cpp-client-ydb_scheme + cpp-client-ydb_topic re2_udf ) target_link_options(ydb-core-kqp-ut PRIVATE diff --git a/ydb/core/kqp/ut/CMakeLists.linux.txt b/ydb/core/kqp/ut/CMakeLists.linux.txt index c66efa1e2a..d4346266cf 100644 --- a/ydb/core/kqp/ut/CMakeLists.linux.txt +++ b/ydb/core/kqp/ut/CMakeLists.linux.txt @@ -32,6 +32,8 @@ target_link_libraries(ydb-core-kqp-ut PUBLIC kqp-ut-common cpp-client-draft cpp-client-ydb_proto + cpp-client-ydb_scheme + cpp-client-ydb_topic re2_udf ) target_link_options(ydb-core-kqp-ut PRIVATE diff --git a/ydb/core/kqp/ut/kqp_not_null_columns_ut.cpp b/ydb/core/kqp/ut/kqp_not_null_columns_ut.cpp index 372d06566a..bc24fba0c6 100644 --- a/ydb/core/kqp/ut/kqp_not_null_columns_ut.cpp +++ b/ydb/core/kqp/ut/kqp_not_null_columns_ut.cpp @@ -491,7 +491,7 @@ Y_UNIT_TEST_SUITE(KqpNotNullColumns) { { const auto query = Q_("ALTER TABLE `/Root/TestAddNotNullColumn` ADD COLUMN Value2 String NOT NULL"); auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::BAD_REQUEST, result.GetIssues().ToString()); } } diff --git a/ydb/core/kqp/ut/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/kqp_scheme_ut.cpp index 424397379f..4043013b3d 100644 --- a/ydb/core/kqp/ut/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/kqp_scheme_ut.cpp @@ -1,5 +1,7 @@ #include <ydb/core/kqp/ut/common/kqp_ut_common.h> #include <ydb/public/sdk/cpp/client/ydb_proto/accessor.h> +#include <ydb/public/sdk/cpp/client/ydb_scheme/scheme.h> +#include <ydb/public/sdk/cpp/client/ydb_topic/topic.h> #include <library/cpp/threading/local_executor/local_executor.h> @@ -1576,17 +1578,17 @@ Y_UNIT_TEST_SUITE(KqpScheme) { EStatus::GENERIC_ERROR, "Interval value cannot be negative"); AlterTableSetttings(session, tableName, {{"TTL", R"(Interval("P0D") ON CreatedAt)"}}, compat, - EStatus::GENERIC_ERROR, "Cannot enable TTL on unknown column"); + EStatus::BAD_REQUEST, "Cannot enable TTL on unknown column"); AlterTableSetttings(session, tableName, {{"TTL", R"(Interval("P0D") ON StringValue)"}}, compat, - EStatus::GENERIC_ERROR, "Unsupported column type"); + EStatus::BAD_REQUEST, "Unsupported column type"); AlterTableSetttings(session, tableName, {{"TTL", R"(Interval("P0D") ON Uint32Value)"}}, compat, - EStatus::GENERIC_ERROR, "'ValueSinceUnixEpochModeSettings' should be specified"); + EStatus::BAD_REQUEST, "'ValueSinceUnixEpochModeSettings' should be specified"); AlterTableSetttings(session, tableName, {{"TTL", R"(Interval("P0D") ON Uint64Value)"}}, compat, - EStatus::GENERIC_ERROR, "'ValueSinceUnixEpochModeSettings' should be specified"); + EStatus::BAD_REQUEST, "'ValueSinceUnixEpochModeSettings' should be specified"); AlterTableSetttings(session, tableName, {{"TTL", R"(Interval("P0D") ON DyNumberValue)"}}, compat, - EStatus::GENERIC_ERROR, "'ValueSinceUnixEpochModeSettings' should be specified"); + EStatus::BAD_REQUEST, "'ValueSinceUnixEpochModeSettings' should be specified"); AlterTableSetttings(session, tableName, {{"TTL", R"(Interval("P0D") ON Ts)"}}, compat); { @@ -1605,7 +1607,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { --!syntax_v1 ALTER TABLE `)" << tableName << R"(` DROP COLUMN Ts;)"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::BAD_REQUEST, result.GetIssues().ToString()); UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Can't drop TTL column"); } @@ -2659,6 +2661,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { pqConfig.SetEnabled(true); pqConfig.SetEnableProtoSourceIdInfo(true); pqConfig.SetTopicsAreFirstClassCitizen(true); + pqConfig.SetRequireCredentialsInNewProtocol(false); pqConfig.AddClientServiceType()->SetName("data-streams"); return pqConfig; } @@ -2705,6 +2708,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { PRIMARY KEY (Key) ); )"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } @@ -2771,6 +2775,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { PRIMARY KEY (Key) ); )"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } @@ -2814,6 +2819,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { PRIMARY KEY (Key) ); )"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } @@ -2879,6 +2885,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { PRIMARY KEY (Key) ); )"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } @@ -2893,6 +2900,141 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SCHEME_ERROR, result.GetIssues().ToString()); } } + + Y_UNIT_TEST(ChangefeedRetentionPeriod) { + using namespace NTopic; + + TKikimrRunner kikimr(TKikimrSettings().SetPQConfig(DefaultPQConfig())); + auto pq = TTopicClient(kikimr.GetDriver(), TTopicClientSettings().Database("/Root")); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + --!syntax_v1 + CREATE TABLE `/Root/table` ( + Key Uint64, + Value String, + PRIMARY KEY (Key) + ); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { // default (1d) + auto query = R"( + --!syntax_v1 + ALTER TABLE `/Root/table` ADD CHANGEFEED `feed_1` WITH ( + MODE = 'KEYS_ONLY', FORMAT = 'JSON' + ); + )"; + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto desc = pq.DescribeTopic("/Root/table/feed_1").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetRetentionPeriod(), TDuration::Days(1)); + } + + { // custom (1h) + auto query = R"( + --!syntax_v1 + ALTER TABLE `/Root/table` ADD CHANGEFEED `feed_2` WITH ( + MODE = 'KEYS_ONLY', FORMAT = 'JSON', RETENTION_PERIOD = Interval("PT1H") + ); + )"; + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto desc = pq.DescribeTopic("/Root/table/feed_2").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetRetentionPeriod(), TDuration::Hours(1)); + } + + { // non-positive (invalid) + auto query = R"( + --!syntax_v1 + ALTER TABLE `/Root/table` ADD CHANGEFEED `feed_3` WITH ( + MODE = 'KEYS_ONLY', FORMAT = 'JSON', RETENTION_PERIOD = Interval("PT0H") + ); + )"; + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::GENERIC_ERROR, result.GetIssues().ToString()); + } + + { // too much (32d) + auto query = R"( + --!syntax_v1 + ALTER TABLE `/Root/table` ADD CHANGEFEED `feed_3` WITH ( + MODE = 'KEYS_ONLY', FORMAT = 'JSON', RETENTION_PERIOD = Interval("P32D") + ); + )"; + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::BAD_REQUEST, result.GetIssues().ToString()); + } + } + + Y_UNIT_TEST(CreatedAt) { + TKikimrRunner kikimr(TKikimrSettings().SetPQConfig(DefaultPQConfig())); + auto scheme = NYdb::NScheme::TSchemeClient(kikimr.GetDriver(), TCommonClientSettings().Database("/Root")); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + --!syntax_v1 + CREATE TABLE `/Root/dir/table` ( + Key Uint64, + Value String, + PRIMARY KEY (Key) + ); + )"; + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + NYdb::NScheme::TVirtualTimestamp createdAt; + + { // describe table + auto desc = session.DescribeTable("/Root/dir/table").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + + createdAt = desc.GetEntry().CreatedAt; + UNIT_ASSERT(createdAt.PlanStep > 0); + UNIT_ASSERT(createdAt.TxId != 0); + } + + { // describe dir + auto desc = scheme.DescribePath("/Root/dir").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(desc.GetEntry().CreatedAt, createdAt); + } + + { // list dir + auto desc = scheme.ListDirectory("/Root/dir").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(desc.GetEntry().CreatedAt, createdAt); + + UNIT_ASSERT_VALUES_EQUAL(desc.GetChildren().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(desc.GetChildren()[0].CreatedAt, createdAt); + } + + { // copy table + const auto result = session.CopyTable("/Root/dir/table", "/Root/dir/copy").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto desc = session.DescribeTable("/Root/dir/copy").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT(desc.GetEntry().CreatedAt > createdAt); + } + } } } // namespace NKqp diff --git a/ydb/core/mind/bscontroller/cmds_box.cpp b/ydb/core/mind/bscontroller/cmds_box.cpp index 2b38f3cfb9..6bf40fba75 100644 --- a/ydb/core/mind/bscontroller/cmds_box.cpp +++ b/ydb/core/mind/bscontroller/cmds_box.cpp @@ -23,6 +23,9 @@ namespace NKikimr::NBsController { TBoxInfo::THostInfo info; info.HostConfigId = host.GetHostConfigId(); + if (const ui32 nodeId = host.GetEnforcedNodeId()) { + info.EnforcedNodeId = nodeId; + } const auto &hostConfigs = HostConfigs.Get(); if (!hostConfigs.count(info.HostConfigId)) { diff --git a/ydb/core/mind/bscontroller/config_cmd.cpp b/ydb/core/mind/bscontroller/config_cmd.cpp index 37e07a3eb2..8fb06123ff 100644 --- a/ydb/core/mind/bscontroller/config_cmd.cpp +++ b/ydb/core/mind/bscontroller/config_cmd.cpp @@ -342,7 +342,7 @@ namespace NKikimr::NBsController { void Complete(const TActorContext&) override { if (auto state = std::exchange(State, std::nullopt)) { - STLOG(PRI_INFO, BS_CONTROLLER_AUDIT, BSCA09, "Transaction complete", (UniqueId, State->UniqueId)); + STLOG(PRI_INFO, BS_CONTROLLER_AUDIT, BSCA09, "Transaction complete", (UniqueId, state->UniqueId)); state->ApplyConfigUpdates(); } TActivationContext::Send(new IEventHandle(NotifyId, Self->SelfId(), Ev.Release(), 0, Cookie)); diff --git a/ydb/core/node_whiteboard/node_whiteboard.h b/ydb/core/node_whiteboard/node_whiteboard.h index 8a986dead8..b33e4d39ca 100644 --- a/ydb/core/node_whiteboard/node_whiteboard.h +++ b/ydb/core/node_whiteboard/node_whiteboard.h @@ -110,7 +110,48 @@ struct TEvWhiteboard{ struct TEvTabletStateRequest : public TEventPB<TEvTabletStateRequest, NKikimrWhiteboard::TEvTabletStateRequest, EvTabletStateRequest> {}; - struct TEvTabletStateResponse : public TEventPB<TEvTabletStateResponse, NKikimrWhiteboard::TEvTabletStateResponse, EvTabletStateResponse> {}; +#pragma pack(push, 1) + struct TEvTabletStateResponsePacked5 { + ui64 TabletId; + ui32 FollowerId; + ui32 Generation; + NKikimrTabletBase::TTabletTypes::EType Type; + NKikimrWhiteboard::TTabletStateInfo::ETabletState State; + + TEvTabletStateResponsePacked5() = default; + TEvTabletStateResponsePacked5(const NKikimrWhiteboard::TTabletStateInfo& elem) + : TabletId(elem.GetTabletId()) + , FollowerId(elem.GetFollowerId()) + , Generation(elem.GetGeneration()) + , Type(elem.GetType()) + , State(elem.GetState()) + {} + + operator NKikimrWhiteboard::TTabletStateInfo() const { + NKikimrWhiteboard::TTabletStateInfo result; + Fill(result); + return result; + } + + void Fill(NKikimrWhiteboard::TTabletStateInfo& result) const { + result.SetTabletId(TabletId); + result.SetFollowerId(FollowerId); + result.SetGeneration(Generation); + result.SetType(Type); + result.SetState(State); + } + } Y_PACKED; + + static_assert(sizeof(TEvTabletStateResponsePacked5) == 24); +#pragma pack(pop) + + struct TEvTabletStateResponse : public TEventPB<TEvTabletStateResponse, NKikimrWhiteboard::TEvTabletStateResponse, EvTabletStateResponse> { + TEvTabletStateResponsePacked5* AllocatePackedResponse(size_t count) { + auto& packed5 = *Record.MutablePacked5(); + packed5.resize(count * sizeof(TEvTabletStateResponsePacked5)); + return reinterpret_cast<TEvTabletStateResponsePacked5*>(packed5.Detach()); + } + }; struct TEvPDiskStateUpdate : TEventPB<TEvPDiskStateUpdate, NKikimrWhiteboard::TPDiskStateInfo, EvPDiskStateUpdate> { TEvPDiskStateUpdate() = default; @@ -264,6 +305,8 @@ struct TEvWhiteboard{ Record.SetErasureSpecies(groupInfo->Type.ErasureSpeciesName(groupInfo->Type.GetErasure())); for (ui32 i = 0; i < groupInfo->GetTotalVDisksNum(); ++i) { VDiskIDFromVDiskID(groupInfo->GetVDiskId(i), Record.AddVDiskIds()); + const TActorId& actorId = groupInfo->GetActorId(i); + Record.AddVDiskNodeIds(actorId.NodeId()); } Record.SetStoragePoolName(groupInfo->GetStoragePoolName()); if (groupInfo->GetEncryptionMode() != TBlobStorageGroupInfo::EEM_NONE) { diff --git a/ydb/core/persqueue/events/global.h b/ydb/core/persqueue/events/global.h index e1957e7a16..2a2539dbb9 100644 --- a/ydb/core/persqueue/events/global.h +++ b/ydb/core/persqueue/events/global.h @@ -79,7 +79,11 @@ struct TEvPersQueue { struct TEvGetReadSessionsInfo: public TEventPB<TEvGetReadSessionsInfo, NKikimrPQ::TGetReadSessionsInfo, EvGetReadSessionsInfo> { - TEvGetReadSessionsInfo() {} + TEvGetReadSessionsInfo(const TString& consumer = "") { + if (!consumer.empty()) { + Record.SetClientId(consumer); + } + } }; struct TEvReadSessionsInfoResponse: public TEventPB<TEvReadSessionsInfoResponse, @@ -123,9 +127,11 @@ struct TEvPersQueue { struct TEvStatus : public TEventPB<TEvStatus, NKikimrPQ::TStatus, EvStatus> { - explicit TEvStatus(const TString& consumer = "") { + explicit TEvStatus(const TString& consumer = "", bool getStatForAllConsumers = false) { if (!consumer.empty()) Record.SetClientId(consumer); + if (getStatForAllConsumers) + Record.SetGetStatForAllConsumers(true); } }; diff --git a/ydb/core/persqueue/events/internal.h b/ydb/core/persqueue/events/internal.h index afc8bf6651..6d60b347d5 100644 --- a/ydb/core/persqueue/events/internal.h +++ b/ydb/core/persqueue/events/internal.h @@ -310,13 +310,15 @@ struct TEvPQ { }; struct TEvPartitionStatus : public TEventLocal<TEvPartitionStatus, EvPartitionStatus> { - explicit TEvPartitionStatus(const TActorId& sender, const TString& clientId) + explicit TEvPartitionStatus(const TActorId& sender, const TString& clientId, bool getStatForAllConsumers) : Sender(sender) , ClientId(clientId) + , GetStatForAllConsumers(getStatForAllConsumers) {} TActorId Sender; TString ClientId; + bool GetStatForAllConsumers; }; struct TEvPartitionStatusResponse : public TEventLocal<TEvPartitionStatusResponse, EvPartitionStatusResponse> { diff --git a/ydb/core/persqueue/metering_sink.cpp b/ydb/core/persqueue/metering_sink.cpp index 603e6f7d65..8d982498cc 100644 --- a/ydb/core/persqueue/metering_sink.cpp +++ b/ydb/core/persqueue/metering_sink.cpp @@ -135,8 +135,7 @@ void TMeteringSink::Flush(TInstant now, bool force) { break; } ui64 duration = (now - LastFlush_[whichOne]).MilliSeconds(); - ui64 cus = CurrentUsedStorage_ * 1024 * 1024; // in bytes - ui64 avgUsage = cus * 1000 / duration; + ui64 avgUsage = CurrentUsedStorage_ * 1_MB * 1000 / duration; CurrentUsedStorage_ = 0; const THashMap<TString, ui64> tags = { {"ydb_size", avgUsage} diff --git a/ydb/core/persqueue/partition.cpp b/ydb/core/persqueue/partition.cpp index 8738861be4..a4d6294e0c 100644 --- a/ydb/core/persqueue/partition.cpp +++ b/ydb/core/persqueue/partition.cpp @@ -424,7 +424,7 @@ void TPartition::FillReadFromTimestamps(const NKikimrPQ::TPQTabletConfig& config } TPartition::TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, const TActorId& blobCache, - const NPersQueue::TTopicConverterPtr& topicConverter, bool isLocalDC, TString dcId, + const NPersQueue::TTopicConverterPtr& topicConverter, bool isLocalDC, TString dcId, bool isServerless, const NKikimrPQ::TPQTabletConfig& config, const TTabletCountersBase& counters, const TActorContext &ctx, bool newPartition) : TabletID(tabletId) @@ -446,10 +446,12 @@ TPartition::TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, co , GapSize(0) , CloudId(config.GetYcCloudId()) , DbId(config.GetYdbDatabaseId()) + , DbPath(config.GetYdbDatabasePath()) + , IsServerless(isServerless) , FolderId(config.GetYcFolderId()) , UsersInfoStorage( DCId, TabletID, TopicConverter, Partition, counters, Config, - CloudId, DbId, config.GetYdbDatabasePath(), FolderId + CloudId, DbId, config.GetYdbDatabasePath(), IsServerless, FolderId ) , ReadingTimestamp(false) , SetOffsetCookie(0) @@ -722,7 +724,7 @@ void TPartition::Bootstrap(const TActorContext& ctx) { userInfo.ReadQuota.UpdateConfig(readQuota.GetBurstSize(), readQuota.GetSpeedInBytesPerSecond()); } - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "boostrapping " << Partition << " " << ctx.SelfID); + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "bootstrapping " << Partition << " " << ctx.SelfID); if (NewPartition) { InitComplete(ctx); @@ -771,10 +773,10 @@ void TPartition::SetupTopicCounters(const TActorContext& ctx) { {10240_KB, "10240kb"}, {65536_KB, "65536kb"}, {999'999'999, "99999999kb"}}, true)); subGroup = GetServiceCounters(counters, "pqproxy|writeSession"); - BytesWritten = NKikimr::NPQ::TMultiCounter(subGroup, labels, {}, {"BytesWritten" + suffix}, true); + BytesWrittenTotal = NKikimr::NPQ::TMultiCounter(subGroup, labels, {}, {"BytesWritten" + suffix}, true); BytesWrittenUncompressed = NKikimr::NPQ::TMultiCounter(subGroup, labels, {}, {"UncompressedBytesWritten" + suffix}, true); BytesWrittenComp = NKikimr::NPQ::TMultiCounter(subGroup, labels, {}, {"CompactedBytesWritten" + suffix}, true); - MsgsWritten = NKikimr::NPQ::TMultiCounter(subGroup, labels, {}, {"MessagesWritten" + suffix}, true); + MsgsWrittenTotal = NKikimr::NPQ::TMultiCounter(subGroup, labels, {}, {"MessagesWritten" + suffix}, true); TVector<NPersQueue::TPQLabelsInfo> aggr = {{{{"Account", TopicConverter->GetAccount()}}, {"total"}}}; ui32 border = AppData(ctx)->PQConfig.GetWriteLatencyBigMs(); @@ -809,30 +811,36 @@ void TPartition::SetupTopicCounters(const TActorContext& ctx) { void TPartition::SetupStreamCounters(const TActorContext& ctx) { const auto topicName = TopicConverter->GetModernName(); auto counters = AppData(ctx)->Counters; - auto labels = NPersQueue::GetLabelsForStream(TopicConverter, CloudId, DbId, FolderId); - + auto subgroups = NPersQueue::GetSubgroupsForTopic(TopicConverter, CloudId, DbId, DbPath, FolderId); +/* WriteBufferIsFullCounter.SetCounter( - NPersQueue::GetCountersForStream(counters), - {{"cloud", CloudId}, - {"folder", FolderId}, - {"database", DbId}, - {"stream", TopicConverter->GetFederationPath()}, + NPersQueue::GetCountersForTopic(counters, IsServerless), + { + {"database", DbPath}, + {"cloud_id", CloudId}, + {"folder_id", FolderId}, + {"database_id", DbId}, + {"topic", TopicConverter->GetFederationPath()}, {"host", DCId}, - {"shard", ToString<ui32>(Partition)}}, - {"name", "stream.internal_write.buffer_brimmed_duration_ms", true}); + {"partition", ToString<ui32>(Partition)}}, + {"name", "api.grpc.topic.stream_write.buffer_brimmed_milliseconds", true}); +*/ + + subgroups.push_back({"name", "topic.write.lag_milliseconds"}); InputTimeLag = THolder<NKikimr::NPQ::TPercentileCounter>(new NKikimr::NPQ::TPercentileCounter( - NPersQueue::GetCountersForStream(counters), labels, - {{"name", "stream.internal_write.time_lags_milliseconds"}}, "bin", + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, + subgroups, "bin", TVector<std::pair<ui64, TString>>{ {100, "100"}, {200, "200"}, {500, "500"}, {1000, "1000"}, {2000, "2000"}, {5000, "5000"}, {10'000, "10000"}, {30'000, "30000"}, {60'000, "60000"}, {180'000,"180000"}, {9'999'999, "999999"}}, true)); + subgroups.back().second = "topic.write.message_size_bytes"; MessageSize = THolder<NKikimr::NPQ::TPercentileCounter>(new NKikimr::NPQ::TPercentileCounter( - NPersQueue::GetCountersForStream(counters), labels, - {{"name", "stream.internal_write.record_size_bytes"}}, "bin", + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, + subgroups, "bin", TVector<std::pair<ui64, TString>>{ {1024, "1024"}, {5120, "5120"}, {10'240, "10240"}, {20'480, "20480"}, {51'200, "51200"}, {102'400, "102400"}, @@ -840,23 +848,26 @@ void TPartition::SetupStreamCounters(const TActorContext& ctx) { {2'097'152,"2097152"}, {5'242'880, "5242880"}, {10'485'760, "10485760"}, {67'108'864, "67108864"}, {999'999'999, "99999999"}}, true)); - BytesWritten = NKikimr::NPQ::TMultiCounter( + subgroups.pop_back(); + BytesWrittenGrpc = NKikimr::NPQ::TMultiCounter( + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, subgroups, + {"api.grpc.topic.stream_write.bytes"} , true, "name"); + BytesWrittenTotal = NKikimr::NPQ::TMultiCounter( + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, subgroups, + {"topic.write.bytes"} , true, "name"); + + MsgsWrittenGrpc = NKikimr::NPQ::TMultiCounter( + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, subgroups, + {"api.grpc.topic.stream_write.messages"}, true, "name"); + MsgsWrittenTotal = NKikimr::NPQ::TMultiCounter( + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, subgroups, + {"topic.write.messages"}, true, "name"); - NPersQueue::GetCountersForStream(counters), labels, {}, - {"stream.internal_write.bytes_per_second", - "stream.incoming_bytes_per_second"} , true, "name"); - MsgsWritten = NKikimr::NPQ::TMultiCounter( - NPersQueue::GetCountersForStream(counters), labels, {}, - {"stream.internal_write.records_per_second", - "stream.incoming_records_per_second"}, true, "name"); BytesWrittenUncompressed = NKikimr::NPQ::TMultiCounter( - NPersQueue::GetCountersForStream(counters), labels, {}, - {"stream.internal_write.uncompressed_bytes_per_second"}, true, "name"); - BytesWrittenComp = NKikimr::NPQ::TMultiCounter( - NPersQueue::GetCountersForStream(counters), labels, {}, - {"stream.internal_write.compacted_bytes_per_second"}, true, "name"); + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, subgroups, + {"topic.write.uncompressed_bytes"}, true, "name"); TVector<NPersQueue::TPQLabelsInfo> aggr = {{{{"Account", TopicConverter->GetAccount()}}, {"total"}}}; ui32 border = AppData(ctx)->PQConfig.GetWriteLatencyBigMs(); @@ -867,21 +878,23 @@ void TPartition::SetupStreamCounters(const TActorContext& ctx) { SLIBigLatency = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"WriteBigLatency"}, true, "name", false); WritesTotal = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"WritesTotal"}, true, "name", false); if (IsQuotingEnabled() && !TopicWriteQuotaResourcePath.empty()) { + subgroups.push_back({"name", "api.grpc.topic.stream_write.topic_throttled_milliseconds"}); TopicWriteQuotaWaitCounter = THolder<NKikimr::NPQ::TPercentileCounter>( new NKikimr::NPQ::TPercentileCounter( - NPersQueue::GetCountersForStream(counters), labels, - {{"name", "stream.internal_write.topic_write_quota_wait_milliseconds"}}, "bin", + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, + subgroups, "bin", TVector<std::pair<ui64, TString>>{ {0, "0"}, {1, "1"}, {5, "5"}, {10, "10"}, {20, "20"}, {50, "50"}, {100, "100"}, {500, "500"}, {1000, "1000"}, {2500, "2500"}, {5000, "5000"}, {10'000, "10000"}, {9'999'999, "999999"}}, true)); + subgroups.pop_back(); } + subgroups.push_back({"name", "api.grpc.topic.stream_write.partition_throttled_milliseconds"}); PartitionWriteQuotaWaitCounter = THolder<NKikimr::NPQ::TPercentileCounter>( new NKikimr::NPQ::TPercentileCounter( - NPersQueue::GetCountersForStream(counters), labels, - {{"name", "stream.internal_write.partition_write_quota_wait_milliseconds"}}, "bin", + NPersQueue::GetCountersForTopic(counters, IsServerless), {}, subgroups, "bin", TVector<std::pair<ui64, TString>>{ {0, "0"}, {1, "1"}, {5, "5"}, {10, "10"}, {20, "20"}, {50, "50"}, {100, "100"}, {500, "500"}, @@ -1004,7 +1017,6 @@ void TPartition::HandleWakeup(const TActorContext& ctx) { avg.Update(now); } } - WriteBufferIsFullCounter.UpdateWorkingTime(now); WriteLagMs.Update(0, now); @@ -1725,6 +1737,7 @@ void TPartition::InitComplete(const TActorContext& ctx) { PartitionCountersLabeled->GetCounters()[METRIC_INIT_TIME] = InitDuration.MilliSeconds(); PartitionCountersLabeled->GetCounters()[METRIC_LIFE_TIME] = CreationTime.MilliSeconds(); PartitionCountersLabeled->GetCounters()[METRIC_PARTITIONS] = 1; + PartitionCountersLabeled->GetCounters()[METRIC_PARTITIONS_TOTAL] = Config.PartitionIdsSize(); ctx.Send(Tablet, new TEvPQ::TEvPartitionLabeledCounters(Partition, *PartitionCountersLabeled)); } UpdateUserInfoEndOffset(ctx.Now()); @@ -2056,6 +2069,22 @@ void TPartition::Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext ui64 totalLag = clientInfo->GetReadLagMs() + userInfo.GetWriteLagMs() + (ctx.Now() - userInfo.GetReadTimestamp()).MilliSeconds(); clientInfo->SetTotalLagMs(totalLag); } + + if (ev->Get()->GetStatForAllConsumers) { //fill lags + auto* clientInfo = result.AddConsumerResult(); + clientInfo->SetConsumer(userInfo.User); + auto readTimestamp = (userInfo.GetReadWriteTimestamp() ? userInfo.GetReadWriteTimestamp() : GetWriteTimeEstimate(userInfo.GetReadOffset())).MilliSeconds(); + clientInfo->SetReadLagMs(userInfo.GetReadOffset() < (i64)EndOffset + ? (userInfo.GetReadTimestamp() - TInstant::MilliSeconds(readTimestamp)).MilliSeconds() + : 0); + clientInfo->SetLastReadTimestampMs(userInfo.GetReadTimestamp().MilliSeconds()); + clientInfo->SetWriteLagMs(userInfo.GetWriteLagMs()); + + clientInfo->SetAvgReadSpeedPerMin(userInfo.AvgReadBytes[1].GetValue()); + clientInfo->SetAvgReadSpeedPerHour(userInfo.AvgReadBytes[2].GetValue()); + clientInfo->SetAvgReadSpeedPerDay(userInfo.AvgReadBytes[3].GetValue()); + } + } result.SetAvgReadSpeedPerSec(resSpeed[0]); result.SetAvgReadSpeedPerMin(resSpeed[1]); @@ -3636,14 +3665,19 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { TabletCounters.Cumulative()[COUNTER_PQ_WRITE_BYTES_OK].Increment(WriteNewSize); TabletCounters.Percentile()[COUNTER_PQ_WRITE_CYCLE_BYTES].IncrementFor(WriteCycleSize); TabletCounters.Percentile()[COUNTER_PQ_WRITE_NEW_BYTES].IncrementFor(WriteNewSize); - if (BytesWritten) - BytesWritten.Inc(WriteNewSizeInternal); + if (BytesWrittenGrpc) + BytesWrittenGrpc.Inc(WriteNewSizeInternal); + if (BytesWrittenTotal) + BytesWrittenTotal.Inc(WriteNewSize); + if (BytesWrittenUncompressed) BytesWrittenUncompressed.Inc(WriteNewSizeUncompressed); if (BytesWrittenComp) BytesWrittenComp.Inc(WriteCycleSize); - if (MsgsWritten) - MsgsWritten.Inc(WriteNewMessagesInternal); + if (MsgsWrittenGrpc) + MsgsWrittenGrpc.Inc(WriteNewMessagesInternal); + if (MsgsWrittenTotal) + MsgsWrittenTotal.Inc(WriteNewMessages); //All ok auto now = ctx.Now(); @@ -4286,7 +4320,7 @@ bool TPartition::AppendHeadWithNewWrites(TEvKeyValue::TEvRequest* request, const } WriteNewSize += p.Msg.SourceId.size() + p.Msg.Data.size(); - WriteNewSizeInternal = p.Msg.External ? 0 : WriteNewSize; + WriteNewSizeInternal += p.Msg.External ? 0 : (p.Msg.SourceId.size() + p.Msg.Data.size()); WriteNewSizeUncompressed += p.Msg.UncompressedSize + p.Msg.SourceId.size(); if (p.Msg.PartNo == 0) { ++WriteNewMessages; diff --git a/ydb/core/persqueue/partition.h b/ydb/core/persqueue/partition.h index c387af7af9..bda3971572 100644 --- a/ydb/core/persqueue/partition.h +++ b/ydb/core/persqueue/partition.h @@ -174,7 +174,7 @@ public: } TPartition(ui64 tabletId, ui32 partition, const TActorId& tablet, const TActorId& blobCache, - const NPersQueue::TTopicConverterPtr& topicConverter, bool isLocalDC, TString dcId, + const NPersQueue::TTopicConverterPtr& topicConverter, bool isLocalDC, TString dcId, bool isServerless, const NKikimrPQ::TPQTabletConfig& config, const TTabletCountersBase& counters, const TActorContext& ctx, bool newPartition = false); @@ -382,6 +382,8 @@ private: TString CloudId; TString DbId; + TString DbPath; + bool IsServerless; TString FolderId; TUsersInfoStorage UsersInfoStorage; @@ -455,10 +457,12 @@ private: NKikimr::NPQ::TMultiCounter SLIBigLatency; NKikimr::NPQ::TMultiCounter WritesTotal; - NKikimr::NPQ::TMultiCounter BytesWritten; + NKikimr::NPQ::TMultiCounter BytesWrittenTotal; + NKikimr::NPQ::TMultiCounter BytesWrittenGrpc; NKikimr::NPQ::TMultiCounter BytesWrittenUncompressed; NKikimr::NPQ::TMultiCounter BytesWrittenComp; - NKikimr::NPQ::TMultiCounter MsgsWritten; + NKikimr::NPQ::TMultiCounter MsgsWrittenTotal; + NKikimr::NPQ::TMultiCounter MsgsWrittenGrpc;; // Writing blob with topic quota variables ui64 TopicQuotaRequestCookie = 0; diff --git a/ydb/core/persqueue/pq_impl.cpp b/ydb/core/persqueue/pq_impl.cpp index 3a5cdf3759..1949038e55 100644 --- a/ydb/core/persqueue/pq_impl.cpp +++ b/ydb/core/persqueue/pq_impl.cpp @@ -655,7 +655,7 @@ void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) if (Partitions.find(partitionId) == Partitions.end()) { Partitions.emplace(partitionId, TPartitionInfo( ctx.Register(new TPartition(TabletID(), partitionId, ctx.SelfID, CacheActor, TopicConverter, - IsLocalDC, DCId, Config, *Counters, ctx, true)), + IsLocalDC, DCId, IsServerless, Config, *Counters, ctx, true)), GetPartitionKeyRange(partition), true, *Counters @@ -783,7 +783,7 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& const auto partitionId = partition.GetPartitionId(); Partitions.emplace(partitionId, TPartitionInfo( ctx.Register(new TPartition(TabletID(), partitionId, ctx.SelfID, CacheActor, TopicConverter, - IsLocalDC, DCId, Config, *Counters, ctx, false)), + IsLocalDC, DCId, IsServerless, Config, *Counters, ctx, false)), GetPartitionKeyRange(partition), false, *Counters @@ -1415,7 +1415,8 @@ void TPersQueue::Handle(TEvPersQueue::TEvStatus::TPtr& ev, const TActorContext& for (auto& p : Partitions) { if (!p.second.InitDone) continue; - THolder<TEvPQ::TEvPartitionStatus> event = MakeHolder<TEvPQ::TEvPartitionStatus>(ans, ev->Get()->Record.HasClientId() ? ev->Get()->Record.GetClientId() : ""); + THolder<TEvPQ::TEvPartitionStatus> event = MakeHolder<TEvPQ::TEvPartitionStatus>(ans, ev->Get()->Record.HasClientId() ? ev->Get()->Record.GetClientId() : "", + ev->Get()->Record.HasGetStatForAllConsumers() ? ev->Get()->Record.GetGetStatForAllConsumers() : false); ctx.Send(p.second.Actor, event.Release()); } } @@ -2137,6 +2138,9 @@ TPersQueue::TPersQueue(const TActorId& tablet, TTabletStorageInfo *info) void TPersQueue::CreatedHook(const TActorContext& ctx) { + + IsServerless = AppData(ctx)->FeatureFlags.GetEnableDbCounters(); //TODO: find out it via describe + ctx.Send(GetNameserviceActorId(), new TEvInterconnect::TEvGetNode(ctx.SelfID.NodeId())); } diff --git a/ydb/core/persqueue/pq_impl.h b/ydb/core/persqueue/pq_impl.h index 07ba60620c..fa858e2506 100644 --- a/ydb/core/persqueue/pq_impl.h +++ b/ydb/core/persqueue/pq_impl.h @@ -148,6 +148,7 @@ private: NPersQueue::TTopicConverterPtr TopicConverter; bool IsLocalDC; TString DCId; + bool IsServerless = false; TVector<NScheme::TTypeId> KeySchema; NKikimrPQ::TPQTabletConfig Config; diff --git a/ydb/core/persqueue/read_balancer.cpp b/ydb/core/persqueue/read_balancer.cpp index 7b183f066b..89676eef2e 100644 --- a/ydb/core/persqueue/read_balancer.cpp +++ b/ydb/core/persqueue/read_balancer.cpp @@ -904,11 +904,13 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr& pi->SetProxyNodeId(jt->second.ProxyNodeId); pi->SetSession(jt->second.Session); pi->SetTimestamp(jt->second.Timestamp.Seconds()); + pi->SetTimestampMs(jt->second.Timestamp.MilliSeconds()); } else { pi->SetClientNode(""); pi->SetProxyNodeId(0); pi->SetSession(""); pi->SetTimestamp(0); + pi->SetTimestampMs(0); } } for (auto& s : c.second.SessionsInfo) { diff --git a/ydb/core/persqueue/user_info.cpp b/ydb/core/persqueue/user_info.cpp index c0aa732db3..94e97aea5c 100644 --- a/ydb/core/persqueue/user_info.cpp +++ b/ydb/core/persqueue/user_info.cpp @@ -38,6 +38,7 @@ TUsersInfoStorage::TUsersInfoStorage( const TString& cloudId, const TString& dbId, const TString& dbPath, + const bool isServerless, const TString& folderId ) : DCId(std::move(dcId)) @@ -48,6 +49,7 @@ TUsersInfoStorage::TUsersInfoStorage( , CloudId(cloudId) , DbId(dbId) , DbPath(dbPath) + , IsServerless(isServerless) , FolderId(folderId) , CurReadRuleGeneration(0) { @@ -174,18 +176,15 @@ TUserInfo& TUsersInfoStorage::Create( bool meterRead = userServiceType.empty() || userServiceType == defaultServiceType; - TMaybe<TString> dbPath = AppData()->PQConfig.GetTopicsAreFirstClassCitizen() ? TMaybe<TString>(DbPath) : Nothing(); - auto result = UsersInfo.emplace( std::piecewise_construct, std::forward_as_tuple(user), std::forward_as_tuple( ctx, CreateReadSpeedLimiter(user), user, readRuleGeneration, important, TopicConverter, Partition, - session, gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, CloudId, DbId, dbPath, FolderId, - meterRead, burst, speed + session, gen, step, offset, readOffsetRewindSum, DCId, readFromTimestamp, CloudId, DbId, + DbPath, IsServerless, FolderId, meterRead, burst, speed ) - ); - Y_VERIFY(result.second); + ); Y_VERIFY(result.second); return result.first->second; } diff --git a/ydb/core/persqueue/user_info.h b/ydb/core/persqueue/user_info.h index 2ec5fffc83..542400ff95 100644 --- a/ydb/core/persqueue/user_info.h +++ b/ydb/core/persqueue/user_info.h @@ -270,7 +270,7 @@ struct TUserInfo { const ui64 readRuleGeneration, const bool important, const NPersQueue::TTopicConverterPtr& topicConverter, const ui32 partition, const TString &session, ui32 gen, ui32 step, i64 offset, const ui64 readOffsetRewindSum, const TString& dcId, TInstant readFromTimestamp, - const TString& cloudId, const TString& dbId, const TMaybe<TString>& dbPath, const TString& folderId, + const TString& cloudId, const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId, bool meterRead, ui64 burst = 1'000'000'000, ui64 speed = 1'000'000'000 ) : ReadSpeedLimiter(std::move(readSpeedLimiter)) @@ -307,11 +307,9 @@ struct TUserInfo { if (AppData(ctx)->Counters) { if (AppData()->PQConfig.GetTopicsAreFirstClassCitizen()) { LabeledCounters.Reset(new TUserLabeledCounters( - user + "|$x|" + topicConverter->GetClientsideName(), partition, *dbPath)); + user + "||" + topicConverter->GetClientsideName(), partition, dbPath)); - if (DoInternalRead) { - SetupStreamCounters(ctx, dcId, ToString<ui32>(partition), cloudId, dbId, folderId); - } + SetupStreamCounters(ctx, dcId, ToString<ui32>(partition), cloudId, dbId, dbPath, isServerless, folderId); } else { LabeledCounters.Reset(new TUserLabeledCounters( user + "/" + (important ? "1" : "0") + "/" + topicConverter->GetClientsideName(), @@ -324,30 +322,39 @@ struct TUserInfo { void SetupStreamCounters( const TActorContext& ctx, const TString& dcId, const TString& partition, - const TString& cloudId, const TString& dbId, const TString& folderId + const TString& cloudId, const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId ) { - auto subgroup = NPersQueue::GetCountersForStream(AppData(ctx)->Counters); - auto aggregates = - NPersQueue::GetLabelsForStream(TopicConverter, cloudId, dbId, folderId); - - BytesRead = TMultiCounter(subgroup, - aggregates, {{"consumer", User}}, - {"stream.internal_read.bytes_per_second", - "stream.outgoing_bytes_per_second"}, true, "name"); - MsgsRead = TMultiCounter(subgroup, - aggregates, {{"consumer", User}}, - {"stream.internal_read.records_per_second", - "stream.outgoing_records_per_second"}, true, "name"); - - Counter.SetCounter(subgroup, - {{"cloud", cloudId}, {"folder", folderId}, {"database", dbId}, - {"stream", TopicConverter->GetFederationPath()}, - {"consumer", User}, {"host", dcId}, {"shard", partition}}, - {"name", "stream.await_operating_milliseconds", true}); - + auto subgroup = NPersQueue::GetCountersForTopic(AppData(ctx)->Counters, isServerless); + auto subgroups = + NPersQueue::GetSubgroupsForTopic(TopicConverter, cloudId, dbId, dbPath, folderId); + if (DoInternalRead) { + subgroups.push_back({"consumer", User}); + + BytesRead = TMultiCounter(subgroup, {}, subgroups, + {"api.grpc.topic.stream_read.bytes", + "topic.read.bytes"}, true, "name"); + MsgsRead = TMultiCounter(subgroup, {}, subgroups, + {"api.grpc.topic.stream_read.messages", + "topic.read.messages"}, true, "name"); + } else { + BytesRead = TMultiCounter(subgroup, {}, subgroups, + {"topic.read.bytes"}, true, "name"); + MsgsRead = TMultiCounter(subgroup, {}, subgroups, + {"topic.read.messages"}, true, "name"); + } + Y_UNUSED(dcId); + Y_UNUSED(partition); + /* + Counter.SetCounter(subgroup, + {{"database", dbPath}, {"cloud_id", cloudId}, {"folder_id", folderId}, {"database_id", dbId}, + {"topic", TopicConverter->GetFederationPath()}, + {"consumer", User}, {"host", dcId}, {"partition", partition}}, + {"name", "topic.read.awaiting_consume_milliseconds", true}); + */ + + subgroups.push_back({"name", "topic.read.lag_milliseconds"}); ReadTimeLag.reset(new TPercentileCounter( - NPersQueue::GetCountersForStream(AppData(ctx)->Counters), aggregates, - {{"consumer", User}, {"name", "stream.internal_read.time_lags_milliseconds"}}, "bin", + NPersQueue::GetCountersForTopic(AppData(ctx)->Counters, isServerless), {}, subgroups, "bin", TVector<std::pair<ui64, TString>>{{100, "100"}, {200, "200"}, {500, "500"}, {1000, "1000"}, {2000, "2000"}, {5000, "5000"}, {10'000, "10000"}, @@ -448,7 +455,7 @@ struct TUserInfo { void SetImportant(bool important) { Important = important; - if (LabeledCounters) { + if (LabeledCounters && !AppData()->PQConfig.GetTopicsAreFirstClassCitizen()) { LabeledCounters->SetGroup(User + "/" + (important ? "1" : "0") + "/" + TopicConverter->GetClientsideName()); } } @@ -491,7 +498,7 @@ class TUsersInfoStorage { public: TUsersInfoStorage(TString dcId, ui64 tabletId, const NPersQueue::TTopicConverterPtr& topicConverter, ui32 partition, const TTabletCountersBase& counters, const NKikimrPQ::TPQTabletConfig& config, - const TString& CloudId, const TString& DbId, const TString& DbPath, const TString& FolderId); + const TString& CloudId, const TString& DbId, const TString& DbPath, const bool isServerless, const TString& FolderId); void Init(TActorId tabletActor, TActorId partitionActor); @@ -535,6 +542,7 @@ private: TString CloudId; TString DbId; TString DbPath; + bool IsServerless; TString FolderId; ui64 CurReadRuleGeneration; }; diff --git a/ydb/core/persqueue/ut/common/pq_ut_common.cpp b/ydb/core/persqueue/ut/common/pq_ut_common.cpp index 22d61d6a1d..11fc2b18ef 100644 --- a/ydb/core/persqueue/ut/common/pq_ut_common.cpp +++ b/ydb/core/persqueue/ut/common/pq_ut_common.cpp @@ -14,10 +14,24 @@ #include <library/cpp/testing/unittest/registar.h> -namespace NKikimr { +namespace NKikimr::NPQ { + +void FillPQConfig(NKikimrPQ::TPQConfig& pqConfig, const TString& dbRoot, bool isFirstClass) { + pqConfig.SetEnabled(true); + // NOTE(shmel1k@): KIKIMR-14221 + pqConfig.SetTopicsAreFirstClassCitizen(isFirstClass); + pqConfig.SetRequireCredentialsInNewProtocol(false); + pqConfig.SetRoot(dbRoot); + pqConfig.SetClusterTablePath(TStringBuilder() << dbRoot << "/Config/V2/Cluster"); + pqConfig.SetVersionTablePath(TStringBuilder() << dbRoot << "/Config/V2/Versions"); + pqConfig.MutableQuotingConfig()->SetEnableQuoting(false); +} void PQTabletPrepare(const TTabletPreparationParameters& parameters, - const TVector<std::pair<TString, bool>>& users, TTestContext& tc) { + const TVector<std::pair<TString, bool>>& users, + TTestActorRuntime& runtime, + ui64 tabletId, + TActorId edge) { TAutoPtr<IEventHandle> handle; static int version = 0; if (parameters.specVersion) { @@ -27,7 +41,7 @@ void PQTabletPrepare(const TTabletPreparationParameters& parameters, } for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { try { - tc.Runtime->ResetScheduledCount(); + runtime.ResetScheduledCount(); THolder<TEvPersQueue::TEvUpdateConfig> request(new TEvPersQueue::TEvUpdateConfig()); for (ui32 i = 0; i < parameters.partitions; ++i) { @@ -36,9 +50,9 @@ void PQTabletPrepare(const TTabletPreparationParameters& parameters, request->Record.MutableTabletConfig()->SetCacheSize(10_MB); request->Record.SetTxId(12345); auto tabletConfig = request->Record.MutableTabletConfig(); - if (tc.Runtime->GetAppData().PQConfig.GetTopicsAreFirstClassCitizen()) { + if (runtime.GetAppData().PQConfig.GetTopicsAreFirstClassCitizen()) { tabletConfig->SetTopicName("topic"); - tabletConfig->SetTopicPath(tc.Runtime->GetAppData().PQConfig.GetDatabase() + "/topic"); + tabletConfig->SetTopicPath(runtime.GetAppData().PQConfig.GetDatabase() + "/topic"); tabletConfig->SetYcCloudId(parameters.cloudId); tabletConfig->SetYcFolderId(parameters.folderId); tabletConfig->SetYdbDatabaseId(parameters.databaseId); @@ -73,15 +87,15 @@ void PQTabletPrepare(const TTabletPreparationParameters& parameters, if (u.first != "user") tabletConfig->AddReadRules(u.first); } - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); + runtime.SendToPipe(tabletId, edge, request.Release(), 0, GetPipeConfigWithRetries()); TEvPersQueue::TEvUpdateConfigResponse* result = - tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvUpdateConfigResponse>(handle); + runtime.GrabEdgeEvent<TEvPersQueue::TEvUpdateConfigResponse>(handle); UNIT_ASSERT(result); auto& rec = result->Record; UNIT_ASSERT(rec.HasStatus() && rec.GetStatus() == NKikimrPQ::OK); UNIT_ASSERT(rec.HasTxId() && rec.GetTxId() == 12345); - UNIT_ASSERT(rec.HasOrigin() && result->GetOrigin() == 1); + UNIT_ASSERT(rec.HasOrigin() && result->GetOrigin() == tabletId); retriesLeft = 0; } catch (NActors::TSchedulingLimitReachedException) { UNIT_ASSERT(retriesLeft >= 1); @@ -96,8 +110,8 @@ void PQTabletPrepare(const TTabletPreparationParameters& parameters, auto read = request->Record.AddCmdRead(); read->SetKey("_config"); - tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - result = tc.Runtime->GrabEdgeEvent<TEvKeyValue::TEvResponse>(handle); + runtime.SendToPipe(tabletId, edge, request.Release(), 0, GetPipeConfigWithRetries()); + result = runtime.GrabEdgeEvent<TEvKeyValue::TEvResponse>(handle); UNIT_ASSERT(result); UNIT_ASSERT(result->Record.HasStatus()); @@ -109,6 +123,13 @@ void PQTabletPrepare(const TTabletPreparationParameters& parameters, } } +void PQTabletPrepare(const TTabletPreparationParameters& parameters, + const TVector<std::pair<TString, bool>>& users, + TTestContext& context) { + PQTabletPrepare(parameters, users, *context.Runtime, context.TabletId, context.Edge); +} + + void CmdGetOffset(const ui32 partition, const TString& user, i64 offset, TTestContext& tc, i64 ctime, ui64 writeTime) { TAutoPtr<IEventHandle> handle; @@ -141,7 +162,7 @@ void CmdGetOffset(const ui32 partition, const TString& user, i64 offset, TTestCo UNIT_ASSERT_EQUAL(resp.HasCreateTimestampMS(), ctime > 0); if (ctime > 0) { if (ctime == Max<i64>()) { - UNIT_ASSERT(resp.GetCreateTimestampMS() + 86000000 < TAppData::TimeProvider->Now().MilliSeconds()); + UNIT_ASSERT(resp.GetCreateTimestampMS() + 86'000'000 < TAppData::TimeProvider->Now().MilliSeconds()); } else { UNIT_ASSERT_EQUAL((i64)resp.GetCreateTimestampMS(), ctime); } @@ -160,14 +181,20 @@ void CmdGetOffset(const ui32 partition, const TString& user, i64 offset, TTestCo } } -void BalancerPrepare(const TString topic, const TVector<std::pair<ui32, std::pair<ui64, ui32>>>& map, const ui64 ssId, TTestContext& tc, const bool requireAuth) { +void PQBalancerPrepare(const TString topic, const TVector<std::pair<ui32, std::pair<ui64, ui32>>>& map, const ui64 ssId, + TTestContext& context, const bool requireAuth) { + PQBalancerPrepare(topic, map, ssId, *context.Runtime, context.BalancerTabletId, context.Edge, requireAuth); +} + +void PQBalancerPrepare(const TString topic, const TVector<std::pair<ui32, std::pair<ui64, ui32>>>& map, const ui64 ssId, + TTestActorRuntime& runtime, ui64 balancerTabletId, TActorId edge, const bool requireAuth) { TAutoPtr<IEventHandle> handle; static int version = 0; ++version; for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { try { - tc.Runtime->ResetScheduledCount(); + runtime.ResetScheduledCount(); THolder<TEvPersQueue::TEvUpdateBalancerConfig> request(new TEvPersQueue::TEvUpdateBalancerConfig()); for (const auto& p : map) { @@ -191,23 +218,21 @@ void BalancerPrepare(const TString topic, const TVector<std::pair<ui32, std::pai request->Record.MutableTabletConfig()->SetRequireAuthWrite(requireAuth); request->Record.MutableTabletConfig()->SetRequireAuthRead(requireAuth); - tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); - TEvPersQueue::TEvUpdateConfigResponse* result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvUpdateConfigResponse>(handle); + runtime.SendToPipe(balancerTabletId, edge, request.Release(), 0, GetPipeConfigWithRetries()); + TEvPersQueue::TEvUpdateConfigResponse* result = runtime.GrabEdgeEvent<TEvPersQueue::TEvUpdateConfigResponse>(handle); UNIT_ASSERT(result); auto& rec = result->Record; UNIT_ASSERT(rec.HasStatus() && rec.GetStatus() == NKikimrPQ::OK); UNIT_ASSERT(rec.HasTxId() && rec.GetTxId() == 12345); - UNIT_ASSERT(rec.HasOrigin() && result->GetOrigin() == tc.BalancerTabletId); + UNIT_ASSERT(rec.HasOrigin() && result->GetOrigin() == balancerTabletId); retriesLeft = 0; } catch (NActors::TSchedulingLimitReachedException) { UNIT_ASSERT(retriesLeft >= 1); } } //TODO: check state - TTestActorRuntime& runtime = *tc.Runtime; - - ForwardToTablet(runtime, tc.BalancerTabletId, tc.Edge, new TEvents::TEvPoisonPill()); + ForwardToTablet(runtime, balancerTabletId, edge, new TEvents::TEvPoisonPill()); TDispatchOptions rebootOptions; rebootOptions.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvRestored, 2)); runtime.DispatchEvents(rebootOptions); @@ -246,9 +271,11 @@ void PQGetPartInfo(ui64 startOffset, ui64 endOffset, TTestContext& tc) { } void PQTabletRestart(TTestContext& tc) { - TTestActorRuntime& runtime = *tc.Runtime; + PQTabletRestart(*tc.Runtime, tc.TabletId, tc.Edge); +} - ForwardToTablet(runtime, tc.TabletId, tc.Edge, new TEvents::TEvPoisonPill()); +void PQTabletRestart(TTestActorRuntime& runtime, ui64 tabletId, TActorId edge) { + ForwardToTablet(runtime, tabletId, edge, new TEvents::TEvPoisonPill()); TDispatchOptions rebootOptions; rebootOptions.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvRestored, 2)); runtime.DispatchEvents(rebootOptions); @@ -483,10 +510,13 @@ void CmdWrite(const ui32 partition, const TString& sourceId, const TVector<std:: for (i32 retriesLeft = 2; retriesLeft > 0; --retriesLeft) { try { WriteData(partition, sourceId, data, tc, cookie, msgSeqNo, offset, disableDeduplication); - result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, [](const TEvPersQueue::TEvResponse& ev){ - if (ev.Record.HasPartitionResponse() && ev.Record.GetPartitionResponse().CmdWriteResultSize() > 0 || ev.Record.GetErrorCode() != NPersQueue::NErrorCode::OK) - return true; - return false; + result = tc.Runtime->GrabEdgeEventIf<TEvPersQueue::TEvResponse>(handle, + [](const TEvPersQueue::TEvResponse& ev){ + if (ev.Record.HasPartitionResponse() && + ev.Record.GetPartitionResponse().CmdWriteResultSize() > 0 || + ev.Record.GetErrorCode() != NPersQueue::NErrorCode::OK) + return true; + return false; }); //there could be outgoing reads in TestReadSubscription test UNIT_ASSERT(result); @@ -497,14 +527,16 @@ void CmdWrite(const ui32 partition, const TString& sourceId, const TVector<std:: continue; } - if (!treatWrongCookieAsError && result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE) { + if (!treatWrongCookieAsError && + result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRONG_COOKIE) { cookie = CmdSetOwner(partition, tc).first; msgSeqNo = 0; retriesLeft = 3; continue; } - if (!treatBadOffsetAsError && result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRITE_ERROR_BAD_OFFSET) { + if (!treatBadOffsetAsError && + result->Record.GetErrorCode() == NPersQueue::NErrorCode::WRITE_ERROR_BAD_OFFSET) { return; } @@ -910,4 +942,4 @@ void FillDeprecatedUserInfo(NKikimrClient::TKeyValueRequest_TCmdWrite* write, co write->SetValue(idataDeprecated.Data(), idataDeprecated.Size()); } -} // namespace NKikimr +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/ut/common/pq_ut_common.h b/ydb/core/persqueue/ut/common/pq_ut_common.h index 9cde0e1943..fa6123ff79 100644 --- a/ydb/core/persqueue/ut/common/pq_ut_common.h +++ b/ydb/core/persqueue/ut/common/pq_ut_common.h @@ -10,8 +10,7 @@ const bool ENABLE_DETAILED_PQ_LOG = false; const bool ENABLE_DETAILED_KV_LOG = false; -namespace NKikimr { -namespace { +namespace NKikimr::NPQ { template <typename T> inline constexpr static T PlainOrSoSlow(T plain, T slow) noexcept { @@ -23,29 +22,7 @@ inline constexpr static T PlainOrSoSlow(T plain, T slow) noexcept { constexpr ui32 NUM_WRITES = PlainOrSoSlow(100, 1); -void SetupLogging(TTestActorRuntime& runtime) { - NActors::NLog::EPriority pqPriority = ENABLE_DETAILED_PQ_LOG ? NLog::PRI_TRACE : NLog::PRI_ERROR; - NActors::NLog::EPriority priority = ENABLE_DETAILED_KV_LOG ? NLog::PRI_DEBUG : NLog::PRI_ERROR; - NActors::NLog::EPriority otherPriority = NLog::PRI_INFO; - - runtime.SetLogPriority(NKikimrServices::PERSQUEUE, pqPriority); - runtime.SetLogPriority(NKikimrServices::SYSTEM_VIEWS, pqPriority); - runtime.SetLogPriority(NKikimrServices::KEYVALUE, priority); - runtime.SetLogPriority(NKikimrServices::BOOTSTRAPPER, priority); - runtime.SetLogPriority(NKikimrServices::TABLET_MAIN, priority); - runtime.SetLogPriority(NKikimrServices::TABLET_EXECUTOR, priority); - runtime.SetLogPriority(NKikimrServices::BS_PROXY, priority); - - runtime.SetLogPriority(NKikimrServices::HIVE, otherPriority); - runtime.SetLogPriority(NKikimrServices::LOCAL, otherPriority); - runtime.SetLogPriority(NKikimrServices::BS_NODE, otherPriority); - runtime.SetLogPriority(NKikimrServices::BS_CONTROLLER, otherPriority); - runtime.SetLogPriority(NKikimrServices::TABLET_RESOLVER, otherPriority); - - runtime.SetLogPriority(NKikimrServices::PIPE_CLIENT, otherPriority); - runtime.SetLogPriority(NKikimrServices::PIPE_SERVER, otherPriority); - -} +void FillPQConfig(NKikimrPQ::TPQConfig& pqConfig, const TString& dbRoot, bool isFirstClass); class TInitialEventsFilter : TNonCopyable { bool IsDone; @@ -68,11 +45,9 @@ public: } }; -} // anonymous namespace - - struct TTestContext { - TTabletTypes::EType TabletType; + const TTabletTypes::EType PQTabletType = TTabletTypes::PersQueue; + const TTabletTypes::EType BalancerTabletType = TTabletTypes::PersQueueReadBalancer; ui64 TabletId; ui64 BalancerTabletId; TInitialEventsFilter InitialEventsFilter; @@ -83,7 +58,6 @@ struct TTestContext { TTestContext() { - TabletType = TTabletTypes::PersQueue; TabletId = MakeTabletID(0, 0, 1); TabletIds.push_back(TabletId); @@ -91,6 +65,31 @@ struct TTestContext { TabletIds.push_back(BalancerTabletId); } + static void SetupLogging(TTestActorRuntime& runtime) { + NActors::NLog::EPriority pqPriority = ENABLE_DETAILED_PQ_LOG ? NLog::PRI_TRACE : NLog::PRI_DEBUG; + NActors::NLog::EPriority priority = ENABLE_DETAILED_KV_LOG ? NLog::PRI_DEBUG : NLog::PRI_ERROR; + NActors::NLog::EPriority otherPriority = NLog::PRI_INFO; + + runtime.SetLogPriority(NKikimrServices::PERSQUEUE, pqPriority); + runtime.SetLogPriority(NKikimrServices::SYSTEM_VIEWS, pqPriority); + runtime.SetLogPriority(NKikimrServices::KEYVALUE, priority); + runtime.SetLogPriority(NKikimrServices::BOOTSTRAPPER, priority); + runtime.SetLogPriority(NKikimrServices::TABLET_MAIN, priority); + runtime.SetLogPriority(NKikimrServices::TABLET_EXECUTOR, priority); + runtime.SetLogPriority(NKikimrServices::BS_PROXY, priority); + + runtime.SetLogPriority(NKikimrServices::HIVE, otherPriority); + runtime.SetLogPriority(NKikimrServices::LOCAL, otherPriority); + runtime.SetLogPriority(NKikimrServices::BS_NODE, otherPriority); + runtime.SetLogPriority(NKikimrServices::BS_CONTROLLER, otherPriority); + runtime.SetLogPriority(NKikimrServices::TABLET_RESOLVER, otherPriority); + + runtime.SetLogPriority(NKikimrServices::PIPE_CLIENT, otherPriority); + runtime.SetLogPriority(NKikimrServices::PIPE_SERVER, otherPriority); + + runtime.SetLogPriority(NKikimrServices::SYSTEM_VIEWS, otherPriority); + } + static bool RequestTimeoutFilter(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration duration, TInstant& deadline) { if (event->GetTypeRewrite() == TEvents::TSystem::Wakeup) { TActorId actorId = event->GetRecipientRewrite(); @@ -116,7 +115,8 @@ struct TTestContext { return RequestTimeoutFilter(runtime, event, duration, deadline); } - void Prepare(const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone, bool isFirstClass = false, bool enableMonitoring = false, bool enableDbCounters = false) { + void Prepare(const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone, bool isFirstClass = false, + bool enableMonitoring = false, bool enableDbCounters = false) { Y_UNUSED(dispatchName); outActiveZone = false; TTestBasicRuntime* runtime = new TTestBasicRuntime; @@ -127,29 +127,23 @@ struct TTestContext { Runtime->SetScheduledLimit(200); TAppPrepare appData; + appData.SetEnablePersistentQueryStats(enableDbCounters); appData.SetEnableDbCounters(enableDbCounters); SetupLogging(*Runtime); SetupTabletServices(*Runtime, &appData); setup(*Runtime); CreateTestBootstrapper(*Runtime, - CreateTestTabletInfo(TabletId, TabletType, TErasureType::ErasureNone), + CreateTestTabletInfo(TabletId, PQTabletType, TErasureType::ErasureNone), &CreatePersQueue); - Runtime->GetAppData(0).PQConfig.SetEnabled(true); - // NOTE(shmel1k@): KIKIMR-14221 - Runtime->GetAppData(0).PQConfig.SetTopicsAreFirstClassCitizen(isFirstClass); - Runtime->GetAppData(0).PQConfig.SetRequireCredentialsInNewProtocol(false); - Runtime->GetAppData(0).PQConfig.SetClusterTablePath("/Root/PQ/Config/V2/Cluster"); - Runtime->GetAppData(0).PQConfig.SetVersionTablePath("/Root/PQ/Config/V2/Versions"); - Runtime->GetAppData(0).PQConfig.SetRoot("/Root/PQ"); - Runtime->GetAppData(0).PQConfig.MutableQuotingConfig()->SetEnableQuoting(false); + FillPQConfig(Runtime->GetAppData(0).PQConfig, "/Root/PQ", isFirstClass); TDispatchOptions options; options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); Runtime->DispatchEvents(options); CreateTestBootstrapper(*Runtime, - CreateTestTabletInfo(BalancerTabletId, TTabletTypes::PersQueueReadBalancer, TErasureType::ErasureNone), + CreateTestTabletInfo(BalancerTabletId, BalancerTabletType, TErasureType::ErasureNone), &CreatePersQueueReadBalancer); options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); @@ -168,7 +162,7 @@ struct TTestContext { SetupLogging(*Runtime); SetupTabletServices(*Runtime); CreateTestBootstrapper(*Runtime, - CreateTestTabletInfo(TabletId, TabletType, TErasureType::ErasureNone), + CreateTestTabletInfo(TabletId, PQTabletType, TErasureType::ErasureNone), &CreatePersQueue); Runtime->GetAppData(0).PQConfig.SetEnabled(true); @@ -178,7 +172,7 @@ struct TTestContext { Runtime->DispatchEvents(options); CreateTestBootstrapper(*Runtime, - CreateTestTabletInfo(BalancerTabletId, TTabletTypes::PersQueueReadBalancer, TErasureType::ErasureNone), + CreateTestTabletInfo(BalancerTabletId, BalancerTabletType, TErasureType::ErasureNone), &CreatePersQueueReadBalancer); options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); @@ -207,16 +201,9 @@ struct TFinalizer { } }; -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// SINGLE COMMAND TEST FUNCTIONS -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void BalancerPrepare( - const TString topic, - const TVector<std::pair<ui32, std::pair<ui64, ui32>>>& map, - const ui64 ssId, - TTestContext& tc, - const bool requireAuth = false); +/* +** SINGLE COMMAND TEST FUNCTIONS +*/ struct TTabletPreparationParameters { ui32 maxCountInPartition{20'000'000}; @@ -238,8 +225,42 @@ struct TTabletPreparationParameters { void PQTabletPrepare( const TTabletPreparationParameters& parameters, const TVector<std::pair<TString, bool>>& users, - TTestContext& tc); -void PQTabletRestart(TTestContext& tc); + TTestActorRuntime& runtime, + ui64 tabletId, + TActorId edge); + +void PQBalancerPrepare( + const TString topic, + const TVector<std::pair<ui32, std::pair<ui64, ui32>>>& map, + const ui64 ssId, + TTestActorRuntime& runtime, + ui64 tabletId, + TActorId edge, + const bool requireAuth = false); + +void PQTabletRestart( + TTestActorRuntime& runtime, + ui64 tabletId, + TActorId edge); + + +/* +** TTestContext requiring functions +*/ + +void PQTabletPrepare( + const TTabletPreparationParameters& parameters, + const TVector<std::pair<TString, bool>>& users, + TTestContext& context); + +void PQBalancerPrepare( + const TString topic, + const TVector<std::pair<ui32, std::pair<ui64, ui32>>>& map, + const ui64 ssId, + TTestContext& context, + const bool requireAuth = false); + +void PQTabletRestart(TTestContext& context); TActorId RegisterReadSession( const TString& session, @@ -270,15 +291,6 @@ void PQGetPartInfo( ui64 endOffset, TTestContext& tc); -void ReserveBytes( - const ui32 partition, - TTestContext& tc, - const TString& cookie, - i32 msgSeqNo, - i64 size, - const TActorId& pipeClient, - bool lastRequest); - void WaitPartition( const TString &session, TTestContext& tc, @@ -408,4 +420,4 @@ void CmdWrite( bool treatBadOffsetAsError = true, bool disableDeduplication = false); -} // namespace NKikimr +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/ut/counters_ut.cpp b/ydb/core/persqueue/ut/counters_ut.cpp index ae32964879..8c1bce2c57 100644 --- a/ydb/core/persqueue/ut/counters_ut.cpp +++ b/ydb/core/persqueue/ut/counters_ut.cpp @@ -6,14 +6,15 @@ #include <ydb/core/persqueue/ut/common/pq_ut_common.h> #include <ydb/core/sys_view/service/sysview_service.h> -namespace NKikimr { +namespace NKikimr::NPQ { namespace { TVector<std::pair<ui64, TString>> TestData() { TVector<std::pair<ui64, TString>> data; TString s{32, 'c'}; - ui32 pp = 8 + 4 + 2 + 9; + // FIXME: replace magic numbers and add VERIFY on sizes + const ui32 pp = 8 + 4 + 2 + 9; for (ui32 i = 0; i < 10; ++i) { data.push_back({i + 1, s.substr(pp)}); } @@ -81,6 +82,7 @@ Y_UNIT_TEST(Partition) { CmdWrite(0, "sourceid0", TestData(), tc, false, {}, true); CmdWrite(0, "sourceid1", TestData(), tc, false); CmdWrite(0, "sourceid2", TestData(), tc, false); + PQGetPartInfo(0, 30, tc); { auto counters = tc.Runtime->GetAppData(0).Counters; @@ -111,6 +113,7 @@ Y_UNIT_TEST(PartitionFirstClass) { CmdWrite(0, "sourceid0", TestData(), tc, false, {}, true); CmdWrite(0, "sourceid1", TestData(), tc, false); CmdWrite(0, "sourceid2", TestData(), tc, false); + PQGetPartInfo(0, 30, tc); { auto counters = tc.Runtime->GetAppData(0).Counters; @@ -127,7 +130,7 @@ Y_UNIT_TEST(PartitionFirstClass) { TStringStream countersStr; dbGroup->OutputHtml(countersStr); const TString referenceCounters = NResource::Find(TStringBuf("counters_datastreams.html")); - UNIT_ASSERT_EQUAL(countersStr.Str() + "\n", referenceCounters); + UNIT_ASSERT_VALUES_EQUAL(countersStr.Str() + "\n", referenceCounters); } } @@ -221,9 +224,18 @@ Y_UNIT_TEST(PartitionFirstClass) { }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& activeZone) { TFinalizer finalizer(tc); activeZone = false; + bool dbRegistered{false}; tc.Prepare(dispatchName, setup, activeZone, true, true, true); tc.Runtime->SetScheduledLimit(1000); + tc.Runtime->SetObserverFunc([&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { + if (event->GetTypeRewrite() == NSysView::TEvSysView::EvRegisterDbCounters) { + auto database = event.Get()->Get<NSysView::TEvSysView::TEvRegisterDbCounters>()->Database; + UNIT_ASSERT_VALUES_EQUAL(database, "/Root/PQ"); + dbRegistered = true; + } + return TTestActorRuntime::DefaultObserverFunc(runtime, event); + }); PQTabletPrepare({}, {}, tc); @@ -241,15 +253,7 @@ Y_UNIT_TEST(PartitionFirstClass) { options.FinalEvents.emplace_back(TEvTabletCounters::EvTabletAddLabeledCounters); tc.Runtime->DispatchEvents(options); } - - IActor* actorX = CreateClusterLabeledCountersAggregatorActor(tc.Edge, TTabletTypes::PersQueue); - tc.Runtime->Register(actorX); - - TAutoPtr<IEventHandle> handle; - TEvTabletCounters::TEvTabletLabeledCountersResponse *result; - result = tc.Runtime->GrabEdgeEvent<TEvTabletCounters::TEvTabletLabeledCountersResponse>(handle); - UNIT_ASSERT(result); - UNIT_ASSERT_VALUES_EQUAL(result->Record.LabeledCountersByGroupSize(), 0); + UNIT_ASSERT(dbRegistered); }); } @@ -369,4 +373,4 @@ Y_UNIT_TEST(ImportantFlagSwitching) { } } // Y_UNIT_TEST_SUITE(PQCountersLabeled) -} // namespace NKikimr +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/ut/internals_ut.cpp b/ydb/core/persqueue/ut/internals_ut.cpp index b981f6ac15..da59be4d04 100644 --- a/ydb/core/persqueue/ut/internals_ut.cpp +++ b/ydb/core/persqueue/ut/internals_ut.cpp @@ -2,8 +2,7 @@ #include <library/cpp/testing/unittest/registar.h> #include <util/generic/size_literals.h> -namespace NKikimr { -namespace NPQ { +namespace NKikimr::NPQ { namespace { Y_UNIT_TEST_SUITE(TPQTestInternal) { @@ -205,5 +204,4 @@ Y_UNIT_TEST(TestBatchPacking) { } // TInternalsTest -} // NPQ -} // NKikimr +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/ut/pq_ut.cpp b/ydb/core/persqueue/ut/pq_ut.cpp index dd0de2f913..51c2dc6509 100644 --- a/ydb/core/persqueue/ut/pq_ut.cpp +++ b/ydb/core/persqueue/ut/pq_ut.cpp @@ -14,7 +14,7 @@ #include <util/system/valgrind.h> -namespace NKikimr { +namespace NKikimr::NPQ { const static TString TOPIC_NAME = "rt3.dc1--topic"; @@ -32,7 +32,7 @@ Y_UNIT_TEST(TestGroupsBalancer) { ui64 ssId = 325; BootFakeSchemeShard(*tc.Runtime, ssId, state); - BalancerPrepare(TOPIC_NAME, {{0,{1, 1}}, {11,{1, 1}}, {1,{1, 2}}, {2,{1, 2}}}, ssId, tc); + PQBalancerPrepare(TOPIC_NAME, {{0,{1, 1}}, {11,{1, 1}}, {1,{1, 2}}, {2,{1, 2}}}, ssId, tc); TActorId pipe = RegisterReadSession("session1", tc); Y_UNUSED(pipe); @@ -80,7 +80,7 @@ Y_UNIT_TEST(TestGroupsBalancer2) { ui64 ssId = 325; BootFakeSchemeShard(*tc.Runtime, ssId, state); - BalancerPrepare(TOPIC_NAME, {{0, {1, 1}}, {1, {1, 2}}, {2, {1, 3}}, {3, {1, 4}}}, ssId, tc); + PQBalancerPrepare(TOPIC_NAME, {{0, {1, 1}}, {1, {1, 2}}, {2, {1, 3}}, {3, {1, 4}}}, ssId, tc); TActorId pipe = RegisterReadSession("session1", tc, {1,2}); Y_UNUSED(pipe); @@ -108,7 +108,7 @@ Y_UNIT_TEST(TestGroupsBalancer3) { ui64 ssId = 325; BootFakeSchemeShard(*tc.Runtime, ssId, state); - BalancerPrepare(TOPIC_NAME, {{0, {1, 1}}, {1, {1, 2}} }, ssId, tc); + PQBalancerPrepare(TOPIC_NAME, {{0, {1, 1}}, {1, {1, 2}} }, ssId, tc); TActorId pipe = RegisterReadSession("session", tc, {2}); @@ -288,18 +288,18 @@ Y_UNIT_TEST(TestCreateBalancer) { ui64 ssId = 325; BootFakeSchemeShard(*tc.Runtime, ssId, state); - BalancerPrepare(TOPIC_NAME, {{1,{1,2}}}, ssId, tc); + PQBalancerPrepare(TOPIC_NAME, {{1,{1,2}}}, ssId, tc); TActorId pipe1 = RegisterReadSession("session0", tc, {1}); - BalancerPrepare(TOPIC_NAME, {{1,{1,2}}, {2,{1,3}}}, ssId, tc); + PQBalancerPrepare(TOPIC_NAME, {{1,{1,2}}, {2,{1,3}}}, ssId, tc); tc.Runtime->Send(new IEventHandle(pipe1, tc.Edge, new TEvents::TEvPoisonPill()), 0, true); //will cause dying of pipe and first session -// BalancerPrepare(TOPIC_NAME, {{2,1}}, tc); //TODO: not supported yet -// BalancerPrepare(TOPIC_NAME, {{1,1}}, tc); // TODO: not supported yet - BalancerPrepare(TOPIC_NAME, {{1,{1, 2}}, {2,{1, 3}}, {3,{1, 4}}}, ssId, tc); +// PQBalancerPrepare(TOPIC_NAME, {{2,1}}, tc); //TODO: not supported yet +// PQBalancerPrepare(TOPIC_NAME, {{1,1}}, tc); // TODO: not supported yet + PQBalancerPrepare(TOPIC_NAME, {{1,{1, 2}}, {2,{1, 3}}, {3,{1, 4}}}, ssId, tc); activeZone = false; TActorId pipe = RegisterReadSession("session1", tc); @@ -336,7 +336,7 @@ Y_UNIT_TEST(TestDescribeBalancer) { tc.Runtime->SetScheduledLimit(50); tc.Runtime->SetDispatchTimeout(TDuration::MilliSeconds(100)); - BalancerPrepare(TOPIC_NAME, {{1,{1, 2}}}, ssId, tc); + PQBalancerPrepare(TOPIC_NAME, {{1,{1, 2}}}, ssId, tc); TAutoPtr<IEventHandle> handle; tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, new TEvPersQueue::TEvDescribe(), 0, GetPipeConfigWithRetries()); TEvPersQueue::TEvDescribeResponse* result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvDescribeResponse>(handle); @@ -377,7 +377,7 @@ Y_UNIT_TEST(TestCheckACL) { tc.Runtime->SetScheduledLimit(600); tc.Runtime->SetDispatchTimeout(TDuration::MilliSeconds(100)); - BalancerPrepare(TOPIC_NAME, {{1,{1, 2}}}, ssId, tc); + PQBalancerPrepare(TOPIC_NAME, {{1,{1, 2}}}, ssId, tc); { TDispatchOptions options; @@ -459,7 +459,7 @@ Y_UNIT_TEST(TestCheckACL) { request->Record.SetOperation(NKikimrPQ::EOperation::READ_OP); request->Record.SetToken(""); - BalancerPrepare(TOPIC_NAME, {{1,{1, 2}}}, ssId, tc, true); + PQBalancerPrepare(TOPIC_NAME, {{1,{1, 2}}}, ssId, tc, true); tc.Runtime->SendToPipe(tc.BalancerTabletId, tc.Edge, request.Release(), 0, GetPipeConfigWithRetries()); result = tc.Runtime->GrabEdgeEvent<TEvPersQueue::TEvCheckACLResponse>(handle); auto& rec7 = result->Record; @@ -2087,4 +2087,4 @@ Y_UNIT_TEST(TestTabletRestoreEventsOrder) { } } // Y_UNIT_TEST_SUITE(TPQTest) -} // NKikimr +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/ut/resources/counters_datastreams.html b/ydb/core/persqueue/ut/resources/counters_datastreams.html index dfe910e132..bdf68ef1ac 100644 --- a/ydb/core/persqueue/ut/resources/counters_datastreams.html +++ b/ydb/core/persqueue/ut/resources/counters_datastreams.html @@ -1,94 +1,71 @@ <pre> -cloud=somecloud: +topic=topic: + name=api.grpc.topic.stream_write.bytes: 540 + name=api.grpc.topic.stream_write.messages: 30 + name=topic.write.bytes: 540 + name=topic.write.messages: 30 + name=topic.write.uncompressed_bytes: 270 - folder=somefolder: + consumer=user: + name=api.grpc.topic.stream_read.bytes: 0 + name=api.grpc.topic.stream_read.messages: 0 + name=topic.read.bytes: 0 + name=topic.read.messages: 0 - database=PQ: + name=topic.read.lag_milliseconds: + bin=100: 0 + bin=1000: 0 + bin=10000: 0 + bin=180000: 0 + bin=200: 0 + bin=2000: 0 + bin=30000: 0 + bin=500: 0 + bin=5000: 0 + bin=60000: 0 + bin=999999: 0 - stream=topic: - name=stream.incoming_bytes_per_second: 2700 - name=stream.incoming_records_per_second: 150 - name=stream.internal_write.bytes_per_second: 2700 - name=stream.internal_write.compacted_bytes_per_second: 3720 - name=stream.internal_write.records_per_second: 150 - name=stream.internal_write.uncompressed_bytes_per_second: 1350 + name=api.grpc.topic.stream_write.partition_throttled_milliseconds: + bin=0: 30 + bin=1: 0 + bin=10: 0 + bin=100: 0 + bin=1000: 0 + bin=10000: 0 + bin=20: 0 + bin=2500: 0 + bin=5: 0 + bin=50: 0 + bin=500: 0 + bin=5000: 0 + bin=999999: 0 - consumer=user: - name=stream.internal_read.bytes_per_second: 0 - name=stream.internal_read.records_per_second: 0 - name=stream.outgoing_bytes_per_second: 0 - name=stream.outgoing_records_per_second: 0 + name=topic.write.lag_milliseconds: + bin=100: 30 + bin=1000: 0 + bin=10000: 0 + bin=180000: 0 + bin=200: 0 + bin=2000: 0 + bin=30000: 0 + bin=500: 0 + bin=5000: 0 + bin=60000: 0 + bin=999999: 0 - host=1: - - shard=0: - name=stream.await_operating_milliseconds: 0 - - shard=1: - name=stream.await_operating_milliseconds: 0 - - name=stream.internal_read.time_lags_milliseconds: - bin=100: 0 - bin=1000: 0 - bin=10000: 0 - bin=180000: 0 - bin=200: 0 - bin=2000: 0 - bin=30000: 0 - bin=500: 0 - bin=5000: 0 - bin=60000: 0 - bin=999999: 0 - - host=1: - - shard=0: - name=stream.internal_write.buffer_brimmed_duration_ms: 0 - - shard=1: - name=stream.internal_write.buffer_brimmed_duration_ms: 0 - - name=stream.internal_write.partition_write_quota_wait_milliseconds: - bin=0: 150 - bin=1: 0 - bin=10: 0 - bin=100: 0 - bin=1000: 0 - bin=10000: 0 - bin=20: 0 - bin=2500: 0 - bin=5: 0 - bin=50: 0 - bin=500: 0 - bin=5000: 0 - bin=999999: 0 - - name=stream.internal_write.record_size_bytes: - bin=1024: 150 - bin=10240: 0 - bin=102400: 0 - bin=1048576: 0 - bin=10485760: 0 - bin=20480: 0 - bin=204800: 0 - bin=2097152: 0 - bin=5120: 0 - bin=51200: 0 - bin=524288: 0 - bin=5242880: 0 - bin=67108864: 0 - bin=99999999: 0 - - name=stream.internal_write.time_lags_milliseconds: - bin=100: 150 - bin=1000: 0 - bin=10000: 0 - bin=180000: 0 - bin=200: 0 - bin=2000: 0 - bin=30000: 0 - bin=500: 0 - bin=5000: 0 - bin=60000: 0 - bin=999999: 0 + name=topic.write.message_size_bytes: + bin=1024: 30 + bin=10240: 0 + bin=102400: 0 + bin=1048576: 0 + bin=10485760: 0 + bin=20480: 0 + bin=204800: 0 + bin=2097152: 0 + bin=5120: 0 + bin=51200: 0 + bin=524288: 0 + bin=5242880: 0 + bin=67108864: 0 + bin=99999999: 0 </pre> diff --git a/ydb/core/persqueue/ut/resources/counters_labeled.json b/ydb/core/persqueue/ut/resources/counters_labeled.json index f326beb2cd..e4bc14b3dd 100644 --- a/ydb/core/persqueue/ut/resources/counters_labeled.json +++ b/ydb/core/persqueue/ut/resources/counters_labeled.json @@ -1 +1 @@ -{"sensors":[{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/GapsCount"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/GapsMaxCount"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/GapsMaxSize"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/GapsSize"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MaxPartSize"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionInitTimeMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionLifeTimeMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionMaxWriteQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesMaxPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesMaxPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesMaxPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesMaxPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SourceIdCount"},"value":3},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SourceIdMaxCount"},"value":3},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SourceIdMinLifetimeMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalPartSize"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesAvailAvgMin"},"value":49999998},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesAvailAvgSec"},"value":50000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesMaxPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesMaxPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesMaxPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesMaxPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesQuota"},"value":50000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastWrite"},"value":32},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/GapsCount"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/GapsMaxCount"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/GapsMaxSize"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/GapsSize"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/MaxPartSize"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionInitTimeMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionLifeTimeMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionMaxWriteQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesMaxPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesMaxPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesMaxPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesMaxPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/SourceIdCount"},"value":3},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/SourceIdMaxCount"},"value":3},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/SourceIdMinLifetimeMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/TotalPartSize"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesAvailAvgMin"},"value":49999998},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesAvailAvgSec"},"value":50000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesMaxPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesMaxPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesMaxPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesMaxPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesQuota"},"value":50000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteTimeLagMsByLastWrite"},"value":32}]} +{"sensors":[{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"total","important":"total","topic":"total","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"0","topic":"total","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/MessageLagByCommitted"},"value":30},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/MessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/PartitionMaxReadQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesAvailAvgMin"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesAvailAvgSec"},"value":1000000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesMaxPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesPerDay"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesPerHour"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesPerMin"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesPerSec"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadBytesQuota"},"value":1000000000},{"kind":"RATE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadOffsetRewindSum"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/ReadTimeLagMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/SizeLagByCommitted"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/SizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/TimeSinceLastReadMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/TotalMessageLagByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/TotalSizeLagByLastRead"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/TotalTimeLagMsByLastRead"},"value":4929},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/UserPartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/WriteTimeLagMsByLastRead"},"value":29},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","client":"user","important":"total","topic":"total","sensor":"PQ/WriteTimeLagMsByLastReadOld"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/GapsCount"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/GapsMaxCount"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/GapsMaxSize"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/GapsSize"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/MaxPartSize"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionInitTimeMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionLifeTimeMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionMaxWriteQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/PartitionsTotal"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesMaxPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesMaxPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesMaxPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesMaxPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/QuotaBytesPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SourceIdCount"},"value":3},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SourceIdMaxCount"},"value":3},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/SourceIdMinLifetimeMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/TotalPartSize"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesAvailAvgMin"},"value":49999998},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesAvailAvgSec"},"value":50000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesMaxPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesMaxPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesMaxPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesMaxPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteBytesQuota"},"value":50000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"rt3.dc1--asdfgs--topic","sensor":"PQ/WriteTimeLagMsByLastWrite"},"value":32},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/GapsCount"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/GapsMaxCount"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/GapsMaxSize"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/GapsSize"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/MaxPartSize"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionInitTimeMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionLifeTimeMs"},"value":5000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionMaxWriteQuotaUsage"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionsAnswered"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/PartitionsTotal"},"value":2},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesMaxPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesMaxPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesMaxPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesMaxPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/QuotaBytesPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/SourceIdCount"},"value":3},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/SourceIdMaxCount"},"value":3},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/SourceIdMinLifetimeMs"},"value":0},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/TotalPartSize"},"value":744},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesAvailAvgMin"},"value":49999998},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesAvailAvgSec"},"value":50000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesMaxPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesMaxPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesMaxPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesMaxPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesPerDay"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesPerHour"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesPerMin"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesPerSec"},"value":540},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteBytesQuota"},"value":50000000},{"kind":"GAUGE","labels":{"user_counters":"PersQueue","topic":"total","sensor":"PQ/WriteTimeLagMsByLastWrite"},"value":32}]} diff --git a/ydb/core/persqueue/ut/slow/CMakeLists.darwin.txt b/ydb/core/persqueue/ut/slow/CMakeLists.darwin.txt index 63691ad6c5..9eee505416 100644 --- a/ydb/core/persqueue/ut/slow/CMakeLists.darwin.txt +++ b/ydb/core/persqueue/ut/slow/CMakeLists.darwin.txt @@ -23,8 +23,8 @@ target_link_libraries(ydb-core-persqueue-ut-slow PUBLIC library-cpp-getopt cpp-regex-pcre library-cpp-svnversion - ydb-core-testlib persqueue-ut-common + ydb-core-testlib ) target_link_options(ydb-core-persqueue-ut-slow PRIVATE -Wl,-no_deduplicate diff --git a/ydb/core/persqueue/ut/slow/CMakeLists.linux.txt b/ydb/core/persqueue/ut/slow/CMakeLists.linux.txt index 40a526635c..b830953927 100644 --- a/ydb/core/persqueue/ut/slow/CMakeLists.linux.txt +++ b/ydb/core/persqueue/ut/slow/CMakeLists.linux.txt @@ -25,8 +25,8 @@ target_link_libraries(ydb-core-persqueue-ut-slow PUBLIC library-cpp-getopt cpp-regex-pcre library-cpp-svnversion - ydb-core-testlib persqueue-ut-common + ydb-core-testlib ) target_link_options(ydb-core-persqueue-ut-slow PRIVATE -ldl diff --git a/ydb/core/persqueue/ut/slow/pq_ut.cpp b/ydb/core/persqueue/ut/slow/pq_ut.cpp index e50533be51..6227c8bc6a 100644 --- a/ydb/core/persqueue/ut/slow/pq_ut.cpp +++ b/ydb/core/persqueue/ut/slow/pq_ut.cpp @@ -19,7 +19,7 @@ #include <ydb/public/lib/base/msgbus.h> -namespace NKikimr { +namespace NKikimr::NPQ { Y_UNIT_TEST_SUITE(TPQTestSlow) { @@ -148,5 +148,5 @@ Y_UNIT_TEST(TestOnDiskStoredSourceIds) { }); } -} // TKeyValueTest -} // namespace NKikimr +} // Y_UNIT_TEST_SUITE(TPQTestSlow) +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/ut/sourceid_ut.cpp b/ydb/core/persqueue/ut/sourceid_ut.cpp index 46e03b2d3f..07a0a3944b 100644 --- a/ydb/core/persqueue/ut/sourceid_ut.cpp +++ b/ydb/core/persqueue/ut/sourceid_ut.cpp @@ -5,8 +5,7 @@ #include <library/cpp/testing/unittest/registar.h> -namespace NKikimr { -namespace NPQ { +namespace NKikimr::NPQ { Y_UNIT_TEST_SUITE(TSourceIdTests) { inline static TString TestSourceId(ui64 idx = 0) { @@ -326,5 +325,4 @@ Y_UNIT_TEST_SUITE(TSourceIdTests) { } // TSourceIdTests -} // NPQ -} // NKikimr +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/ut/type_codecs_ut.cpp b/ydb/core/persqueue/ut/type_codecs_ut.cpp index d9f0b64755..af0e1e1f53 100644 --- a/ydb/core/persqueue/ut/type_codecs_ut.cpp +++ b/ydb/core/persqueue/ut/type_codecs_ut.cpp @@ -8,7 +8,7 @@ #include <util/random/fast.h> #include <util/datetime/base.h> -namespace NKikimr { +namespace NKikimr::NPQ { using ICodec = NScheme::ICodec; using TTypeCodecs = NScheme::TTypeCodecs; @@ -256,5 +256,4 @@ Y_UNIT_TEST_SUITE(TTypeCodecsTest) { } -} // namespace NKikimr - +} // namespace NKikimr::NPQ diff --git a/ydb/core/persqueue/ut/user_info_ut.cpp b/ydb/core/persqueue/ut/user_info_ut.cpp index 8f8ec0aa58..1285253448 100644 --- a/ydb/core/persqueue/ut/user_info_ut.cpp +++ b/ydb/core/persqueue/ut/user_info_ut.cpp @@ -32,4 +32,4 @@ namespace NKikimr::NPQ { } } } -} +} // namespace NKikimr::NPQ diff --git a/ydb/core/protos/cms.proto b/ydb/core/protos/cms.proto index 85168eab00..3d9a359df7 100644 --- a/ydb/core/protos/cms.proto +++ b/ydb/core/protos/cms.proto @@ -601,11 +601,20 @@ message TGetSentinelStateRequest { SUSPICIOUS = 2; ALL = 3; } + optional EShow Show = 1; repeated TFilterRange Ranges = 2; } message TPDiskInfo { + enum EIgnoreReason { + NOT_IGNORED = 1; + MISSING_NODE = 2; + RATIO_BY_DATACENTER = 3; + RATIO_BY_ROOM = 4; + RATIO_BY_RACK = 5; + } + optional uint32 State = 1; // EPDiskState optional uint32 PrevState = 2; // EPDiskState optional uint64 StateCounter = 3; @@ -617,6 +626,8 @@ message TPDiskInfo { optional uint32 PrevDesiredStatus = 9; optional uint32 PrevStatusChangeAttempts = 10; optional string LastStatusChange = 11; + optional EIgnoreReason IgnoreReason = 12; + optional bool StatusChangeFailed = 13; } message TPDisk { diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index b4e86c332c..8ee2919f09 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -65,6 +65,10 @@ message TActorSystemConfig { optional uint32 MaxThreads = 13; // Higher balancing bound, should be not lower than `Threads` optional uint32 BalancingPriority = 14; // Priority of pool to obtain cpu due to balancing (higher is better) optional uint64 ToleratedLatencyUs = 15; // p100-latency threshold indicating that more cpus are required by pool + + // Actorsystem 1.4 + optional int32 Priority = 16; + optional int32 MaxAvgPingDeviation = 17; } message TScheduler { @@ -680,7 +684,7 @@ message TFeatureFlags { optional bool AllowStreamExecuteYqlScript = 42 [default = true]; optional bool EnableKqpScanOverPersistentSnapshot = 43 [default = true]; // deprecated: always true optional bool EnableOlapSchemaOperations = 44 [default = false]; - optional bool EnableVPatch = 45 [default = true]; + optional bool EnableVPatch = 45 [default = false]; optional bool EnableMvccSnapshotReads = 46 [default = true]; optional Tribool EnableMvcc = 47 [default = VALUE_TRUE]; optional bool EnableSchemeTransactionsAtSchemeShard = 48 [default = true]; @@ -708,6 +712,8 @@ message TFeatureFlags { optional bool EnableMoveIndex = 70 [default = false]; optional bool EnableNotNullDataColumns = 73 [default = false]; optional bool EnableGrpcAudit = 74 [default = false]; + optional bool EnableKqpDataQueryStreamLookup = 75 [default = false]; + optional bool EnableBorrowedSplitCompaction = 76 [default = true]; } @@ -1149,7 +1155,7 @@ message TTableServiceConfig { optional uint32 ForceNewEngineLevel = 22 [default = 0]; optional uint32 CompileQueryCacheTTLSec = 20 [default = 0]; optional TQueryReplayConfig QueryReplayConfig = 21; - optional bool EnableKqpSessionActor = 23 [default = false]; + optional bool EnableKqpSessionActor = 23 [default = true]; }; // Config describes immediate controls and allows diff --git a/ydb/core/protos/counters.proto b/ydb/core/protos/counters.proto index 1c2552ed42..f0e2d4ffc5 100644 --- a/ydb/core/protos/counters.proto +++ b/ydb/core/protos/counters.proto @@ -16,7 +16,7 @@ message TCounterOptions { optional string Name = 1; repeated TRange Ranges = 2; optional bool Integral = 3; - optional string ServerlessName = 4; + optional string SVName = 4; } message TTxTypeOptions { @@ -39,7 +39,7 @@ message TLabeledCounterOptions { optional string Name = 1; optional EAggregateFunc AggrFunc = 2; optional ECounterType Type = 3 [default = CT_SIMPLE]; - optional string ServerlessName = 4 [default = ""]; + optional string SVName = 4 [default = ""]; } message TLabeledCounterGroupNamesOptions { diff --git a/ydb/core/protos/counters_datashard.proto b/ydb/core/protos/counters_datashard.proto index bcc87adcb1..8ebee53343 100644 --- a/ydb/core/protos/counters_datashard.proto +++ b/ydb/core/protos/counters_datashard.proto @@ -20,6 +20,9 @@ enum ESimpleCounters { COUNTER_MVCC_STATE_CHANGE_WAIT_TX_IN_FLY = 10 [(CounterOpts) = {Name: "MvccStateChangeWaitTxInFly"}]; COUNTER_MVCC_STATE_CHANGE_WAIT_IMMEDIATE_TX_IN_FLY = 11 [(CounterOpts) = {Name: "MvccStateChangeWaitImmediateTxInFly"}]; COUNTER_MVCC_ENABLED = 12 [(CounterOpts) = {Name: "MvccEnabled"}]; + COUNTER_CHANGE_QUEUE_SIZE = 13 [(CounterOpts) = {Name: "ChangeQueueSize"}]; + COUNTER_READ_ITERATORS_WAITING = 14 [(CounterOpts) = {Name: "ReadIteratorsWaiting"}]; + COUNTER_READ_ITERATORS_COUNT = 15 [(CounterOpts) = {Name: "ReadIteratorsCount"}]; } enum ECumulativeCounters { @@ -111,6 +114,14 @@ enum ECumulativeCounters { COUNTER_FULL_COMPACTION_DONE = 85 [(CounterOpts) = {Name: "FullCompactionCount"}]; COUNTER_TX_BACKGROUND_COMPACTION_FAILED_LOANED = 86 [(CounterOpts) = {Name: "TxCompactTableFailedLoaned"}]; COUNTER_TX_COMPACT_BORROWED = 87 [(CounterOpts) = {Name: "TxCompactBorrowed"}]; + COUNTER_CHANGE_RECORDS_ENQUEUED = 88 [(CounterOpts) = {Name: "ChangeRecordsEnqueued"}]; + COUNTER_CHANGE_RECORDS_REMOVED = 89 [(CounterOpts) = {Name: "ChangeRecordsRemoved"}]; + COUNTER_READ_ITERATOR_NO_QUOTA = 90 [(CounterOpts) = {Name: "ReadIteratorNoQuota"}]; + COUNTER_READ_ITERATOR_MAX_ROWS_REACHED = 91 [(CounterOpts) = {Name: "ReadIteratorMaxRowsReached"}]; + COUNTER_READ_ITERATOR_MAX_TIME_REACHED = 92 [(CounterOpts) = {Name: "ReadIteratorMaxTimeReached"}]; + COUNTER_READ_ITERATOR_ROWS_READ = 93 [(CounterOpts) = {Name: "ReadIteratorRowsRead"}]; + COUNTER_READ_ITERATOR_BYTES_READ = 94 [(CounterOpts) = {Name: "ReadIteratorBytesRead"}]; + COUNTER_READ_ITERATOR_CANCEL = 95 [(CounterOpts) = {Name: "ReadIteratorCancel"}]; } enum EPercentileCounters { @@ -336,6 +347,30 @@ enum EPercentileCounters { Ranges: { Value: 15000 Name: "15000"}, Ranges: { Value: 30000 Name: "30000"} }]; + + COUNTER_READ_ITERATOR_LIFETIME_MS = 18 [(CounterOpts) = { + Name: "ReadIteratorLifetimeMs", + Ranges: { Value: 0 Name: "0"}, + Ranges: { Value: 1 Name: "1"}, + Ranges: { Value: 2 Name: "2"}, + Ranges: { Value: 5 Name: "5"}, + Ranges: { Value: 10 Name: "10"}, + Ranges: { Value: 25 Name: "25"}, + Ranges: { Value: 50 Name: "50"}, + Ranges: { Value: 125 Name: "125"}, + Ranges: { Value: 250 Name: "250"}, + Ranges: { Value: 500 Name: "500"}, + Ranges: { Value: 1000 Name: "1000"}, + }]; + + COUNTER_READ_ITERATOR_ITERATION_LATENCY_MS = 19 [(CounterOpts) = { + Name: "ReadIteratorIterationLatencyMs", + Ranges: { Value: 0 Name: "0"}, + Ranges: { Value: 1 Name: "1"}, + Ranges: { Value: 2 Name: "2"}, + Ranges: { Value: 5 Name: "5"}, + Ranges: { Value: 10 Name: "10"}, + }]; } enum ETxTypes { diff --git a/ydb/core/protos/counters_pq.proto b/ydb/core/protos/counters_pq.proto index ca19aef8de..981e4820bd 100644 --- a/ydb/core/protos/counters_pq.proto +++ b/ydb/core/protos/counters_pq.proto @@ -147,38 +147,38 @@ enum EClientLabeledCounters { Names: "topic" }; - METRIC_COMMIT_WRITE_TIME = 0 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG ServerlessName: "topic.read.max_time_lags_by_committed_milliseconds"}]; - METRIC_COMMIT_CREATE_TIME = 1 [(LabeledCounterOpts) = {Name: "CreateTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG ServerlessName: "topic.read.max_total_time_lag_by_committed_milliseconds"}]; - METRIC_COMMIT_MESSAGE_LAG = 2 [(LabeledCounterOpts) = {Name: "MessageLagByCommitted" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_COMMIT_SIZE_LAG = 3 [(LabeledCounterOpts) = {Name: "SizeLagByCommitted" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_READ_WRITE_TIME = 4 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastReadOld" AggrFunc : EAF_MIN Type : CT_TIMELAG ServerlessName: ""}]; - METRIC_READ_TOTAL_TIME = 5 [(LabeledCounterOpts) = {Name: "TotalTimeLagMsByLastRead" AggrFunc : EAF_MAX ServerlessName: "topic.read.max_time_lags_milliseconds"}]; - METRIC_READ_MESSAGE_LAG = 6 [(LabeledCounterOpts) = {Name: "MessageLagByLastRead" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_READ_SIZE_LAG = 7 [(LabeledCounterOpts) = {Name: "SizeLagByLastRead" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_USER_PARTITIONS = 8 [(LabeledCounterOpts) = {Name: "UserPartitionsAnswered" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_READ_TOTAL_MESSAGE_LAG = 9 [(LabeledCounterOpts) = {Name: "TotalMessageLagByLastRead" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_READ_TOTAL_SIZE_LAG = 10 [(LabeledCounterOpts) = {Name: "TotalSizeLagByLastRead" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MIN_READ_QUOTA_BYTES_AVAIL_SEC = 11 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgSec" AggrFunc : EAF_MIN ServerlessName: ""}]; - METRIC_MIN_READ_QUOTA_BYTES_AVAIL_MIN = 12 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgMin" AggrFunc : EAF_MIN ServerlessName: ""}]; - - METRIC_READ_OFFSET_REWIND_SUM = 13 [(LabeledCounterOpts) = {Name: "ReadOffsetRewindSum" AggrFunc : EAF_SUM Type : CT_DERIV ServerlessName: ""}]; - - METRIC_TOTAL_READ_SPEED_1 = 14 [(LabeledCounterOpts) = {Name: "ReadBytesPerSec" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_READ_SPEED_1 = 15 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerSec" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_TOTAL_READ_SPEED_2 = 16 [(LabeledCounterOpts) = {Name: "ReadBytesPerMin" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_READ_SPEED_2 = 17 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerMin" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_TOTAL_READ_SPEED_3 = 18 [(LabeledCounterOpts) = {Name: "ReadBytesPerHour" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_READ_SPEED_3 = 19 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerHour" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_TOTAL_READ_SPEED_4 = 20 [(LabeledCounterOpts) = {Name: "ReadBytesPerDay" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_READ_SPEED_4 = 21 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerDay" AggrFunc : EAF_MAX ServerlessName: ""}]; - - METRIC_READ_QUOTA_BYTES = 22 [(LabeledCounterOpts) = {Name: "ReadBytesQuota" AggrFunc : EAF_MIN ServerlessName: ""}]; - - METRIC_READ_TIME_LAG = 23 [(LabeledCounterOpts) = {Name: "ReadTimeLagMs" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_WRITE_TIME_LAG = 24 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastRead" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_LAST_READ_TIME = 25 [(LabeledCounterOpts) = {Name: "TimeSinceLastReadMs" AggrFunc : EAF_MIN Type : CT_TIMELAG ServerlessName: ""}]; - - METRIC_READ_QUOTA_USAGE = 26 [(LabeledCounterOpts) = {Name: "PartitionMaxReadQuotaUsage" AggrFunc : EAF_MAX ServerlessName: ""}]; + METRIC_COMMIT_WRITE_TIME = 0 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG SVName: "topic.partition.committed_read_lag_milliseconds_max"}]; + METRIC_COMMIT_CREATE_TIME = 1 [(LabeledCounterOpts) = {Name: "CreateTimeLagMsByCommitted" AggrFunc : EAF_MIN Type : CT_TIMELAG SVName: "topic.partition.committed_end_to_end_lag_milliseconds_max"}]; + METRIC_COMMIT_MESSAGE_LAG = 2 [(LabeledCounterOpts) = {Name: "MessageLagByCommitted" AggrFunc : EAF_MAX SVName: "topic.partition.committed_lag_messages_max"}]; + METRIC_COMMIT_SIZE_LAG = 3 [(LabeledCounterOpts) = {Name: "SizeLagByCommitted" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_READ_WRITE_TIME = 4 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastReadOld" AggrFunc : EAF_MIN Type : CT_TIMELAG SVName: ""}]; + METRIC_READ_TOTAL_TIME = 5 [(LabeledCounterOpts) = {Name: "TotalTimeLagMsByLastRead" AggrFunc : EAF_MAX SVName: "topic.partition.end_to_end_lag_milliseconds_max"}]; + METRIC_READ_MESSAGE_LAG = 6 [(LabeledCounterOpts) = {Name: "MessageLagByLastRead" AggrFunc : EAF_MAX SVName: "topic.partition.read.lag_messages_max"}]; + METRIC_READ_SIZE_LAG = 7 [(LabeledCounterOpts) = {Name: "SizeLagByLastRead" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_USER_PARTITIONS = 8 [(LabeledCounterOpts) = {Name: "UserPartitionsAnswered" AggrFunc : EAF_SUM SVName: "topic.partition.alive_count"}]; + METRIC_READ_TOTAL_MESSAGE_LAG = 9 [(LabeledCounterOpts) = {Name: "TotalMessageLagByLastRead" AggrFunc : EAF_SUM SVName: "topic.read.lag_messages"}]; + METRIC_READ_TOTAL_SIZE_LAG = 10 [(LabeledCounterOpts) = {Name: "TotalSizeLagByLastRead" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MIN_READ_QUOTA_BYTES_AVAIL_SEC = 11 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgSec" AggrFunc : EAF_MIN SVName: ""}]; + METRIC_MIN_READ_QUOTA_BYTES_AVAIL_MIN = 12 [(LabeledCounterOpts) = {Name: "ReadBytesAvailAvgMin" AggrFunc : EAF_MIN SVName: ""}]; + + METRIC_READ_OFFSET_REWIND_SUM = 13 [(LabeledCounterOpts) = {Name: "ReadOffsetRewindSum" AggrFunc : EAF_SUM Type : CT_DERIV SVName: ""}]; + + METRIC_TOTAL_READ_SPEED_1 = 14 [(LabeledCounterOpts) = {Name: "ReadBytesPerSec" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_READ_SPEED_1 = 15 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerSec" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_TOTAL_READ_SPEED_2 = 16 [(LabeledCounterOpts) = {Name: "ReadBytesPerMin" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_READ_SPEED_2 = 17 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerMin" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_TOTAL_READ_SPEED_3 = 18 [(LabeledCounterOpts) = {Name: "ReadBytesPerHour" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_READ_SPEED_3 = 19 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerHour" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_TOTAL_READ_SPEED_4 = 20 [(LabeledCounterOpts) = {Name: "ReadBytesPerDay" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_READ_SPEED_4 = 21 [(LabeledCounterOpts) = {Name: "ReadBytesMaxPerDay" AggrFunc : EAF_MAX SVName: ""}]; + + METRIC_READ_QUOTA_BYTES = 22 [(LabeledCounterOpts) = {Name: "ReadBytesQuota" AggrFunc : EAF_MIN SVName: ""}]; + + METRIC_READ_TIME_LAG = 23 [(LabeledCounterOpts) = {Name: "ReadTimeLagMs" AggrFunc : EAF_MAX SVName: "topic.partition.read.lag_milliseconds_max"}]; + METRIC_WRITE_TIME_LAG = 24 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastRead" AggrFunc : EAF_MAX SVName: "topic.partition.write.lag_milliseconds_max"}]; + METRIC_LAST_READ_TIME = 25 [(LabeledCounterOpts) = {Name: "TimeSinceLastReadMs" AggrFunc : EAF_MIN Type : CT_TIMELAG SVName: "topic.partition.read.idle_milliseconds_max"}]; + + METRIC_READ_QUOTA_USAGE = 26 [(LabeledCounterOpts) = {Name: "PartitionMaxReadQuotaUsage" AggrFunc : EAF_MAX SVName: ""}]; } @@ -188,44 +188,44 @@ enum EPartitionLabeledCounters { Names: "topic" }; - METRIC_LIFE_TIME = 0 [(LabeledCounterOpts) = {Name: "PartitionLifeTimeMs" AggrFunc : EAF_MAX Type : CT_TIMELAG ServerlessName: "topic.min_partition_uptime_milliseconds"}]; - METRIC_INIT_TIME = 1 [(LabeledCounterOpts) = {Name: "PartitionInitTimeMs" AggrFunc : EAF_MAX ServerlessName: "topic.partition.max_init_duration_milliseconds"}]; - METRIC_PARTITIONS = 2 [(LabeledCounterOpts) = {Name: "PartitionsAnswered" AggrFunc : EAF_SUM ServerlessName: "topic.partitions_alive"}]; - METRIC_NUM_SIDS = 3 [(LabeledCounterOpts) = {Name: "SourceIdCount" AggrFunc : EAF_SUM ServerlessName: "topic.producers"}]; - METRIC_MAX_NUM_SIDS = 4 [(LabeledCounterOpts) = {Name: "SourceIdMaxCount" AggrFunc : EAF_MAX ServerlessName: "topic.partition.max_producers"}]; - METRIC_GAPS_COUNT = 5 [(LabeledCounterOpts) = {Name: "GapsCount" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_GAPS_COUNT = 6 [(LabeledCounterOpts) = {Name: "GapsMaxCount" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_GAPS_SIZE = 7 [(LabeledCounterOpts) = {Name: "GapsSize" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_GAPS_SIZE = 8 [(LabeledCounterOpts) = {Name: "GapsMaxSize" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC = 9 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgSec" AggrFunc : EAF_MIN ServerlessName: ""}]; - METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN = 10 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgMin" AggrFunc : EAF_MIN ServerlessName: ""}]; - METRIC_TOTAL_WRITE_SPEED_1 = 11 [(LabeledCounterOpts) = {Name: "WriteBytesPerSec" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_WRITE_SPEED_1 = 12 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerSec" AggrFunc : EAF_MAX ServerlessName: "topic.partition.max_incoming_per_second"}]; - METRIC_TOTAL_WRITE_SPEED_2 = 13 [(LabeledCounterOpts) = {Name: "WriteBytesPerMin" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_WRITE_SPEED_2 = 14 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerMin" AggrFunc : EAF_MAX ServerlessName: "topic.partition.max_incoming_bytes_per_minute"}]; - METRIC_TOTAL_WRITE_SPEED_3 = 15 [(LabeledCounterOpts) = {Name: "WriteBytesPerHour" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_WRITE_SPEED_3 = 16 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerHour" AggrFunc : EAF_MAX ServerlessName: "topic.partition.max_incoming_per_hour"}]; - METRIC_TOTAL_WRITE_SPEED_4 = 17 [(LabeledCounterOpts) = {Name: "WriteBytesPerDay" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_WRITE_SPEED_4 = 18 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerDay" AggrFunc : EAF_MAX ServerlessName: ""}]; - - METRIC_TOTAL_PART_SIZE = 19 [(LabeledCounterOpts) = {Name: "TotalPartSize" AggrFunc : EAF_SUM ServerlessName: "topic.storage_bytes"}]; - METRIC_MAX_PART_SIZE = 20 [(LabeledCounterOpts) = {Name: "MaxPartSize" AggrFunc : EAF_MAX ServerlessName: "topic.partition.max_storage_bytes"}]; - - METRIC_WRITE_QUOTA_BYTES = 21 [(LabeledCounterOpts) = {Name: "WriteBytesQuota" AggrFunc : EAF_MIN ServerlessName: "topic.partition.max_write_quota_usage"}]; - - METRIC_WRITE_TIME_LAG_MS = 22 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastWrite" AggrFunc : EAF_MAX ServerlessName: "topic.partition.max_write_time_lag_milliseconds"}]; - METRIC_LAST_WRITE_TIME = 23 [(LabeledCounterOpts) = {Name: "TimeSinceLastWriteMs" AggrFunc : EAF_MIN Type : CT_TIMELAG ServerlessName: "topic.write.max_time_since_last_write_milliseconds"}]; - - METRIC_TOTAL_QUOTA_SPEED_1 = 24 [(LabeledCounterOpts) = {Name: "QuotaBytesPerSec" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_QUOTA_SPEED_1 = 25 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerSec" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_TOTAL_QUOTA_SPEED_2 = 26 [(LabeledCounterOpts) = {Name: "QuotaBytesPerMin" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_QUOTA_SPEED_2 = 27 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerMin" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_TOTAL_QUOTA_SPEED_3 = 28 [(LabeledCounterOpts) = {Name: "QuotaBytesPerHour" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_QUOTA_SPEED_3 = 29 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerHour" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_TOTAL_QUOTA_SPEED_4 = 30 [(LabeledCounterOpts) = {Name: "QuotaBytesPerDay" AggrFunc : EAF_SUM ServerlessName: ""}]; - METRIC_MAX_QUOTA_SPEED_4 = 31 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerDay" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_WRITE_QUOTA_USAGE = 32 [(LabeledCounterOpts) = {Name: "PartitionMaxWriteQuotaUsage" AggrFunc : EAF_MAX ServerlessName: ""}]; - METRIC_MIN_SID_LIFETIME = 33 [(LabeledCounterOpts) = {Name: "SourceIdMinLifetimeMs" AggrFunc : EAF_MIN ServerlessName: ""}]; - + METRIC_LIFE_TIME = 0 [(LabeledCounterOpts) = {Name: "PartitionLifeTimeMs" AggrFunc : EAF_MAX Type : CT_TIMELAG SVName: "topic.partition.uptime_milliseconds_min"}]; + METRIC_INIT_TIME = 1 [(LabeledCounterOpts) = {Name: "PartitionInitTimeMs" AggrFunc : EAF_MAX SVName: "topic.partition.init_duration_milliseconds_max"}]; + METRIC_PARTITIONS = 2 [(LabeledCounterOpts) = {Name: "PartitionsAnswered" AggrFunc : EAF_SUM SVName: "topic.partition.alive_count"}]; + METRIC_NUM_SIDS = 3 [(LabeledCounterOpts) = {Name: "SourceIdCount" AggrFunc : EAF_SUM SVName: "topic.producers_count"}]; + METRIC_MAX_NUM_SIDS = 4 [(LabeledCounterOpts) = {Name: "SourceIdMaxCount" AggrFunc : EAF_MAX SVName: "topic.partition.producers_count_max"}]; + METRIC_GAPS_COUNT = 5 [(LabeledCounterOpts) = {Name: "GapsCount" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_GAPS_COUNT = 6 [(LabeledCounterOpts) = {Name: "GapsMaxCount" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_GAPS_SIZE = 7 [(LabeledCounterOpts) = {Name: "GapsSize" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_GAPS_SIZE = 8 [(LabeledCounterOpts) = {Name: "GapsMaxSize" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_SEC = 9 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgSec" AggrFunc : EAF_MIN SVName: ""}]; + METRIC_MIN_WRITE_QUOTA_BYTES_AVAIL_MIN = 10 [(LabeledCounterOpts) = {Name: "WriteBytesAvailAvgMin" AggrFunc : EAF_MIN SVName: ""}]; + METRIC_TOTAL_WRITE_SPEED_1 = 11 [(LabeledCounterOpts) = {Name: "WriteBytesPerSec" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_WRITE_SPEED_1 = 12 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerSec" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_TOTAL_WRITE_SPEED_2 = 13 [(LabeledCounterOpts) = {Name: "WriteBytesPerMin" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_WRITE_SPEED_2 = 14 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerMin" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_TOTAL_WRITE_SPEED_3 = 15 [(LabeledCounterOpts) = {Name: "WriteBytesPerHour" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_WRITE_SPEED_3 = 16 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerHour" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_TOTAL_WRITE_SPEED_4 = 17 [(LabeledCounterOpts) = {Name: "WriteBytesPerDay" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_WRITE_SPEED_4 = 18 [(LabeledCounterOpts) = {Name: "WriteBytesMaxPerDay" AggrFunc : EAF_MAX SVName: ""}]; + + METRIC_TOTAL_PART_SIZE = 19 [(LabeledCounterOpts) = {Name: "TotalPartSize" AggrFunc : EAF_SUM SVName: "topic.storage_bytes"}]; + METRIC_MAX_PART_SIZE = 20 [(LabeledCounterOpts) = {Name: "MaxPartSize" AggrFunc : EAF_MAX SVName: "topic.partition.storage_bytes_max"}]; + + METRIC_WRITE_QUOTA_BYTES = 21 [(LabeledCounterOpts) = {Name: "WriteBytesQuota" AggrFunc : EAF_MIN SVName: "topic.partition.write.speed_limit_bytes_per_second"}]; + + METRIC_WRITE_TIME_LAG_MS = 22 [(LabeledCounterOpts) = {Name: "WriteTimeLagMsByLastWrite" AggrFunc : EAF_MAX SVName: "topic.partition.write.lag_milliseconds_max"}]; + METRIC_LAST_WRITE_TIME = 23 [(LabeledCounterOpts) = {Name: "TimeSinceLastWriteMs" AggrFunc : EAF_MIN Type : CT_TIMELAG SVName: "topic.partition.write.idle_milliseconds_max"}]; + + METRIC_TOTAL_QUOTA_SPEED_1 = 24 [(LabeledCounterOpts) = {Name: "QuotaBytesPerSec" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_QUOTA_SPEED_1 = 25 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerSec" AggrFunc : EAF_MAX SVName: ""}]; + METRIC_TOTAL_QUOTA_SPEED_2 = 26 [(LabeledCounterOpts) = {Name: "QuotaBytesPerMin" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_QUOTA_SPEED_2 = 27 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerMin" AggrFunc : EAF_MAX SVName: "topic.partition.write.bytes_per_minute_max"}]; + METRIC_TOTAL_QUOTA_SPEED_3 = 28 [(LabeledCounterOpts) = {Name: "QuotaBytesPerHour" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_QUOTA_SPEED_3 = 29 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerHour" AggrFunc : EAF_MAX SVName: "topic.partition.write.bytes_per_hour_max"}]; + METRIC_TOTAL_QUOTA_SPEED_4 = 30 [(LabeledCounterOpts) = {Name: "QuotaBytesPerDay" AggrFunc : EAF_SUM SVName: ""}]; + METRIC_MAX_QUOTA_SPEED_4 = 31 [(LabeledCounterOpts) = {Name: "QuotaBytesMaxPerDay" AggrFunc : EAF_MAX SVName: "topic.partition.write.bytes_per_day_max"}]; + METRIC_WRITE_QUOTA_USAGE = 32 [(LabeledCounterOpts) = {Name: "PartitionMaxWriteQuotaUsage" AggrFunc : EAF_MAX SVName: "topic.partition.write.throttled_nanoseconds_max"}]; + METRIC_MIN_SID_LIFETIME = 33 [(LabeledCounterOpts) = {Name: "SourceIdMinLifetimeMs" AggrFunc : EAF_MIN SVName: ""}]; + + METRIC_PARTITIONS_TOTAL = 34 [(LabeledCounterOpts) = {Name: "PartitionsTotal" AggrFunc : EAF_MAX SVName: "topic.partition.total_count"}]; } - diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 15d1596a66..7a72def34d 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -725,6 +725,7 @@ message TCdcStreamDescription { optional string Name = 1; optional ECdcStreamMode Mode = 2; optional ECdcStreamFormat Format = 6; + optional bool VirtualTimestamps = 7; optional NKikimrProto.TPathID PathId = 3; optional ECdcStreamState State = 4; optional uint64 SchemaVersion = 5; diff --git a/ydb/core/protos/kqp.proto b/ydb/core/protos/kqp.proto index 59de1116b4..0c8d37a85b 100644 --- a/ydb/core/protos/kqp.proto +++ b/ydb/core/protos/kqp.proto @@ -243,6 +243,7 @@ message TPreparedQuery { repeated TParameterDescription Parameters = 4; optional string Text = 5; optional NKqpProto.TKqpPhyQuery PhysicalQuery = 6; + optional bool EnableLlvm = 7; }; message TQueryResponse { diff --git a/ydb/core/protos/node_whiteboard.proto b/ydb/core/protos/node_whiteboard.proto index 0793bcf936..19dbcf8bc1 100644 --- a/ydb/core/protos/node_whiteboard.proto +++ b/ydb/core/protos/node_whiteboard.proto @@ -68,7 +68,9 @@ message TTabletStateInfo { message TEvTabletStateRequest { optional uint64 ChangedSince = 1; + optional string Format = 5; // it could be "packed5" optional string GroupBy = 20; // it's either empty or "Type,State" for now + repeated fixed64 FilterTabletId = 22; } message TEvTabletStateResponse { @@ -76,6 +78,7 @@ message TEvTabletStateResponse { optional uint64 ResponseTime = 2; // ms, filled during processing and merging optional uint64 ResponseDuration = 3; // us, filled during collect optional uint64 ProcessDuration = 4; // us, filled during processing + optional bytes Packed5 = 5; } message TNodeStateInfo { @@ -229,6 +232,7 @@ message TBSGroupStateInfo { optional uint32 Count = 13; // filled during group count optional string StoragePoolName = 14; // from BS_CONTROLLER optional bool Encryption = 19; + repeated uint32 VDiskNodeIds = 20; } message TEvBSGroupStateRequest { diff --git a/ydb/core/protos/pqconfig.proto b/ydb/core/protos/pqconfig.proto index ff7e1697d6..66d6fb513f 100644 --- a/ydb/core/protos/pqconfig.proto +++ b/ydb/core/protos/pqconfig.proto @@ -468,7 +468,6 @@ message TReadSessionStatusResponse { optional string ClientNode = 6; optional uint32 ProxyNodeId = 7; - } @@ -479,6 +478,7 @@ message TReadSessionsInfoResponse { optional uint32 ProxyNodeId = 3; optional string Session = 4; optional uint64 Timestamp = 5; + optional uint64 TimestampMs = 6; } repeated TPartitionInfo PartitionInfo = 1; optional uint64 TabletId = 2; @@ -585,6 +585,7 @@ message TOffsetsResponse { message TStatus { optional string ClientId = 1; + optional bool GetStatForAllConsumers = 2; } message TClientPosition { @@ -660,6 +661,20 @@ message TStatusResponse { optional int64 SourceIdRetentionPeriodSec = 28; repeated TErrorMessage Errors = 29; + + repeated TConsumerResult ConsumerResult = 30; + } + + message TConsumerResult { + optional string Consumer = 1; + + optional int64 AvgReadSpeedPerMin = 2; + optional int64 AvgReadSpeedPerHour = 3; + optional int64 AvgReadSpeedPerDay = 4; + + optional uint64 WriteLagMs = 5; + optional uint64 ReadLagMs = 6; + optional uint64 LastReadTimestampMs = 7; } optional uint64 TabletId = 1; @@ -743,7 +758,7 @@ message TYdsShardIterator { required string StreamArn = 2; required uint32 ShardId = 3; required uint64 ReadTimestampMs = 4; - required uint32 SequenceNumber = 5; + required uint64 SequenceNumber = 5; required uint64 CreationTimestampMs = 6; optional ETopicKind Kind = 7; } diff --git a/ydb/core/protos/services.proto b/ydb/core/protos/services.proto index 2f90652549..e250d18eb8 100644 --- a/ydb/core/protos/services.proto +++ b/ydb/core/protos/services.proto @@ -307,6 +307,8 @@ enum EServiceKikimr { FQ_INTERNAL_SERVICE = 1153; FQ_QUOTA_SERVICE = 1154; + VIEWER = 1166; + // 1024 - 1099 is reserved for nbs // Change exchange (async indexes & CDC) diff --git a/ydb/core/protos/sys_view.proto b/ydb/core/protos/sys_view.proto index a3ff84b707..234646e124 100644 --- a/ydb/core/protos/sys_view.proto +++ b/ydb/core/protos/sys_view.proto @@ -2,6 +2,7 @@ package NKikimrSysView; option java_package = "ru.yandex.kikimr.proto"; +import "ydb/core/protos/labeled_counters.proto"; import "ydb/core/protos/tablet.proto"; message TPartitionStatsKey { @@ -519,11 +520,16 @@ message TDbGRpcProxyCounters { optional TDbCounters RequestCounters = 1; } +message TDbLabeledCounters { + optional NKikimrLabeledCounters.TTabletLabeledCounters AggregatedPerTablets = 1; +} + message TDbServiceCounters { optional TDbCounters Main = 1; repeated TDbTabletCounters TabletCounters = 2; repeated TDbGRpcCounters GRpcCounters = 3; optional TDbGRpcProxyCounters GRpcProxyCounters = 4; + repeated TDbLabeledCounters LabeledCounters = 5; } enum EDbCountersService { @@ -531,8 +537,9 @@ enum EDbCountersService { TABLETS = 2; GRPC = 3; GRPC_PROXY = 4; - RESERVED_2 = 5; + LABELED = 5; RESERVED_3 = 6; + RESERVED_4 = 7; } // node -> sysview processor tablet @@ -552,6 +559,22 @@ message TEvSendDbCountersResponse { optional uint64 Generation = 2; // confirmed generation } +message TEvSendDbLabeledCountersRequest { + message TServiceCounters { + optional EDbCountersService Service = 1; + optional TDbServiceCounters Counters = 2; + } + repeated TServiceCounters ServiceCounters = 1; + optional uint64 NodeId = 2; + optional uint64 Generation = 3; +} + +message TEvSendDbLabeledCountersResponse { + optional string Database = 1; + optional uint64 Generation = 2; // confirmed generation +} + + // ---- Top partitions message TTopPartitionsKey { diff --git a/ydb/core/protos/table_stats.proto b/ydb/core/protos/table_stats.proto index a2a22e8ff5..4a44614a6c 100644 --- a/ydb/core/protos/table_stats.proto +++ b/ydb/core/protos/table_stats.proto @@ -11,8 +11,9 @@ message THistogram { } message TTableStats { - optional uint64 DataSize = 1; - optional uint64 RowCount = 2; + optional uint64 DataSize = 1; // both inMem and ondisk + optional uint64 RowCount = 2; // both inMem and ondisk + optional uint64 IndexSize = 3; optional uint64 InMemSize = 4; diff --git a/ydb/core/protos/tx_datashard.proto b/ydb/core/protos/tx_datashard.proto index af2b61ff3d..e2ac1d37cc 100644 --- a/ydb/core/protos/tx_datashard.proto +++ b/ydb/core/protos/tx_datashard.proto @@ -784,6 +784,8 @@ message TEvPeriodicTableStats { optional uint64 StartTime = 11; // milliseconds since epoch optional uint64 TableOwnerId = 12; + + optional bool IsDstSplit = 13; } message TEvS3ListingRequest { diff --git a/ydb/core/quoter/quoter_service.cpp b/ydb/core/quoter/quoter_service.cpp index b6cb58d2f5..24897018b7 100644 --- a/ydb/core/quoter/quoter_service.cpp +++ b/ydb/core/quoter/quoter_service.cpp @@ -254,7 +254,9 @@ void TResource::ChargeUsedAmount(double amount, TInstant now) { FreeBalance -= amount; Balance -= amount; AmountConsumed += amount; - History.Add(now, amount); + if (StatUpdatePolicy != EStatUpdatePolicy::Never) { + History.Add(now, amount); + } Counters.Consumed->Add(static_cast<i64>(amount)); if (Balance >= 0.0) { StopStarvation(now); @@ -291,7 +293,9 @@ TDuration TResource::Charge(double amount, TInstant now) { LastAllocated = Max(now - QuoterServiceConfig.ScheduleTickSize * 2, timeToFullfill); Balance -= amount; AmountConsumed += amount; - History.Add(now, amount); + if (StatUpdatePolicy != EStatUpdatePolicy::Never) { + History.Add(now, amount); + } if (FreeBalance > Balance) FreeBalance = Balance; @@ -306,7 +310,9 @@ TDuration TResource::Charge(double amount, TInstant now) { FreeBalance -= amount; Balance -= amount; AmountConsumed += amount; - History.Add(now, amount); + if (StatUpdatePolicy != EStatUpdatePolicy::Never) { + History.Add(now, amount); + } Counters.Consumed->Add(static_cast<i64>(amount)); StopStarvation(now); diff --git a/ydb/core/sys_view/common/db_counters.h b/ydb/core/sys_view/common/db_counters.h index 0a1850e78a..afd0e4386c 100644 --- a/ydb/core/sys_view/common/db_counters.h +++ b/ydb/core/sys_view/common/db_counters.h @@ -14,12 +14,22 @@ class TDbServiceCounters { using TGRpcRequestDesc = std::pair<TString, TString>; THashMap<TGRpcRequestDesc, NKikimrSysView::TDbGRpcCounters*> ByGRpcRequest; + THashMap<TString, NKikimrSysView::TDbLabeledCounters*> ByGroupName; public: + void Clear() + { + ProtoCounters.Clear(); + ByTabletType.clear(); + ByGRpcRequest.clear(); + ByGroupName.clear(); + } + void Swap(TDbServiceCounters& other) { ProtoCounters.Swap(&other.ProtoCounters); ByTabletType.swap(other.ByTabletType); ByGRpcRequest.swap(other.ByGRpcRequest); + ByGroupName.swap(other.ByGroupName); } NKikimrSysView::TDbServiceCounters& Proto() { return ProtoCounters; } @@ -73,6 +83,29 @@ public: return counters; } + + NKikimrSysView::TDbLabeledCounters* FindLabeledCounters(const TString& groupName) const + { + if (auto it = ByGroupName.find(groupName); it != ByGroupName.end()) { + return it->second; + } + return {}; + } + + NKikimrSysView::TDbLabeledCounters* FindOrAddLabeledCounters(const TString& groupName) + { + if (auto it = ByGroupName.find(groupName); it != ByGroupName.end()) { + return it->second; + } + + auto* counters = ProtoCounters.AddLabeledCounters(); + auto lCounters = counters->MutableAggregatedPerTablets(); + lCounters->SetGroup(groupName); + lCounters->SetDelimiter("|"); + ByGroupName[groupName] = counters; + + return counters; + } }; } // NSysView diff --git a/ydb/core/sys_view/common/events.h b/ydb/core/sys_view/common/events.h index fb4347db61..b170395a54 100644 --- a/ydb/core/sys_view/common/events.h +++ b/ydb/core/sys_view/common/events.h @@ -59,6 +59,8 @@ struct TEvSysView { EvRegisterDbCounters, EvSendDbCountersRequest, EvSendDbCountersResponse, + EvSendDbLabeledCountersRequest, + EvSendDbLabeledCountersResponse, EvWatchDatabase, EvUpdateTtlStats, @@ -330,6 +332,18 @@ struct TEvSysView { EvSendDbCountersResponse> {}; + struct TEvSendDbLabeledCountersRequest : public TEventPB< + TEvSendDbLabeledCountersRequest, + NKikimrSysView::TEvSendDbLabeledCountersRequest, + EvSendDbLabeledCountersRequest> + {}; + + struct TEvSendDbLabeledCountersResponse : public TEventPB< + TEvSendDbLabeledCountersResponse, + NKikimrSysView::TEvSendDbLabeledCountersResponse, + EvSendDbLabeledCountersResponse> + {}; + struct TEvWatchDatabase : public TEventLocal< TEvWatchDatabase, EvWatchDatabase> diff --git a/ydb/core/sys_view/processor/db_counters.cpp b/ydb/core/sys_view/processor/db_counters.cpp index e2f359e4d6..9cb9a63232 100644 --- a/ydb/core/sys_view/processor/db_counters.cpp +++ b/ydb/core/sys_view/processor/db_counters.cpp @@ -5,7 +5,8 @@ #include <ydb/core/grpc_services/counters/counters.h> #include <ydb/core/grpc_services/counters/proxy_counters.h> #include <ydb/core/kqp/counters/kqp_counters.h> -#include <ydb/core/tablet/tablet_counters_aggregator.h> +#include <ydb/core/tablet/labeled_db_counters.h> +#include <ydb/core/tablet/labeled_counters_merger.h> #include <ydb/core/tablet_flat/flat_executor_counters.h> namespace NKikimr { @@ -120,6 +121,10 @@ static void SwapMaxCounters(NKikimrSysView::TDbCounters* dst, NKikimrSysView::TD dst->SetCumulativeCount(src.GetCumulativeCount()); }; +static void SwapLabeledCounters(NKikimrLabeledCounters::TTabletLabeledCounters* dst, NKikimrLabeledCounters::TTabletLabeledCounters& src) { + dst->MutableLabeledCounter()->Swap(src.MutableLabeledCounter()); +}; + static void ResetSimpleCounters(NKikimrSysView::TDbCounters* dst) { auto simpleSize = dst->SimpleSize(); auto* to = dst->MutableSimple(); @@ -137,6 +142,38 @@ static void ResetMaxCounters(NKikimrSysView::TDbCounters* dst) { } } +static void ResetLabeledCounters(NKikimrLabeledCounters::TTabletLabeledCounters* dst) { + auto labeledSize = dst->LabeledCounterSize(); + auto* to = dst->MutableLabeledCounter(); + for (size_t i = 0; i < labeledSize; ++i) { + auto& counter = (*to)[i]; + TLabeledCounterOptions::ECounterType type(counter.GetType()); + TLabeledCounterOptions::EAggregateFunc aggrFunc(counter.GetAggregateFunc()); + const bool switchResetValue = (type == TLabeledCounterOptions::CT_TIMELAG); + switch (aggrFunc) { + case TLabeledCounterOptions::EAF_MIN: + if (switchResetValue) { + counter.SetValue(0); + } else { + counter.SetValue(std::numeric_limits<ui64>::max()); + } + break; + case TLabeledCounterOptions::EAF_MAX: + if (switchResetValue) { + counter.SetValue(std::numeric_limits<ui64>::max()); + } else { + counter.SetValue(0); + } + break; + case TLabeledCounterOptions::EAF_SUM: + counter.SetValue(0); + break; + default: + Y_FAIL("bad aggrFunc value"); + } + } +} + template <typename TAggrSum, typename TAggrMax> static void AggregateCounters(NKikimr::NSysView::TDbServiceCounters* dst, const NKikimrSysView::TDbServiceCounters& src) @@ -162,6 +199,24 @@ static void AggregateCounters(NKikimr::NSysView::TDbServiceCounters* dst, TAggrSum::Apply(dst->Proto().MutableGRpcProxyCounters()->MutableRequestCounters(), src.GetGRpcProxyCounters().GetRequestCounters()); } + + for (const auto& srcReq : src.GetLabeledCounters()) { + auto* dstReq = dst->FindOrAddLabeledCounters(srcReq.GetAggregatedPerTablets().GetGroup()); + if (dstReq->GetAggregatedPerTablets().GetLabeledCounter().size() < + srcReq.GetAggregatedPerTablets().GetLabeledCounter().size()) { + const ui32 n = srcReq.GetAggregatedPerTablets().GetLabeledCounter().size() - + dstReq->GetAggregatedPerTablets().GetLabeledCounter().size(); + for (ui32 i = 0; i < n; ++i) { + dstReq->MutableAggregatedPerTablets()->AddLabeledCounter(); + } + } + + for (int i = 0; i < srcReq.GetAggregatedPerTablets().GetLabeledCounter().size(); ++i) { + const auto& srcCounter = srcReq.GetAggregatedPerTablets().GetLabeledCounter(i); + auto* trgCounter = dstReq->MutableAggregatedPerTablets()->MutableLabeledCounter(i); + NKikimr::TMerger::MergeOne(srcCounter, *trgCounter); + } + } } static void AggregateIncrementalCounters(NKikimr::NSysView::TDbServiceCounters* dst, @@ -195,6 +250,11 @@ static void SwapStatefulCounters(NKikimr::NSysView::TDbServiceCounters* dst, auto* dstReq = dst->FindOrAddGRpcCounters(srcReq.GetGRpcService(), srcReq.GetGRpcRequest()); SwapSimpleCounters(dstReq->MutableRequestCounters(), *srcReq.MutableRequestCounters()); } + + for (auto& srcReq : *src.MutableLabeledCounters()) { + auto* dstReq = dst->FindOrAddLabeledCounters(srcReq.GetAggregatedPerTablets().GetGroup()); + SwapLabeledCounters(dstReq->MutableAggregatedPerTablets(), *srcReq.MutableAggregatedPerTablets()); + } } static void ResetStatefulCounters(NKikimrSysView::TDbServiceCounters* dst) { @@ -210,6 +270,9 @@ static void ResetStatefulCounters(NKikimrSysView::TDbServiceCounters* dst) { for (auto& dstReq : *dst->MutableGRpcCounters()) { ResetSimpleCounters(dstReq.MutableRequestCounters()); } + for (auto& dstReq : *dst->MutableLabeledCounters()) { + ResetLabeledCounters(dstReq.MutableAggregatedPerTablets()); + } } void TSysViewProcessor::SendNavigate() { @@ -261,6 +324,10 @@ TIntrusivePtr<IDbCounters> TSysViewProcessor::CreateCountersForService( result = NGRpcService::CreateGRpcProxyDbCounters(ExternalGroup, group); break; } + case NKikimrSysView::LABELED: { + result = NKikimr::CreateLabeledDbCounters(LabeledGroup); + break; + } default: break; } @@ -282,6 +349,13 @@ void TSysViewProcessor::AttachExternalCounters() { ->GetSubgroup("folder_id", FolderId) ->GetSubgroup("database_id", DatabaseId) ->RegisterSubgroup("host", "", ExternalGroup); + + GetServiceCounters(AppData()->Counters, "labeled_serverless", false) + ->GetSubgroup("database", Database) + ->GetSubgroup("cloud_id", CloudId) + ->GetSubgroup("folder_id", FolderId) + ->GetSubgroup("database_id", DatabaseId) + ->RegisterSubgroup("host", "", LabeledGroup); } void TSysViewProcessor::AttachInternalCounters() { @@ -303,6 +377,9 @@ void TSysViewProcessor::DetachExternalCounters() { GetServiceCounters(AppData()->Counters, "ydb_serverless", false) ->RemoveSubgroup("database", Database); + + GetServiceCounters(AppData()->Counters, "labeled_serverless", false) + ->RemoveSubgroup("database", Database); } void TSysViewProcessor::DetachInternalCounters() { @@ -348,6 +425,7 @@ void TSysViewProcessor::Handle(TEvSysView::TEvSendDbCountersRequest::TPtr& ev) { incomingServicesSet.insert(service); auto& simpleState = state.Simple[service]; + simpleState.Clear(); SwapStatefulCounters(&simpleState, *serviceCounters.MutableCounters()); auto& aggrState = AggregatedCountersState[service]; @@ -374,6 +452,61 @@ void TSysViewProcessor::Handle(TEvSysView::TEvSendDbCountersRequest::TPtr& ev) { Send(ev->Sender, std::move(response)); } +void TSysViewProcessor::Handle(TEvSysView::TEvSendDbLabeledCountersRequest::TPtr& ev) { + if (!AppData()->FeatureFlags.GetEnableDbCounters()) { + return; + } + + auto& record = ev->Get()->Record; + auto nodeId = record.GetNodeId(); + + auto& state = NodeLabeledCountersStates[nodeId]; + state.FreshCount = 0; + + if (state.Generation == record.GetGeneration()) { + SVLOG_D("[" << TabletID() << "] TEvSendDbLabeledCountersRequest, known generation: " + << "node id# " << nodeId + << ", generation# " << record.GetGeneration()); + + auto response = MakeHolder<TEvSysView::TEvSendDbLabeledCountersResponse>(); + response->Record.SetDatabase(Database); + response->Record.SetGeneration(state.Generation); + Send(ev->Sender, std::move(response)); + return; + } + + state.Generation = record.GetGeneration(); + + std::unordered_set<NKikimrSysView::EDbCountersService> incomingServicesSet; + + for (auto& serviceCounters : *record.MutableServiceCounters()) { + const auto service = serviceCounters.GetService(); + incomingServicesSet.insert(service); + + auto& simpleState = state.Simple[service]; + simpleState.Clear(); + SwapStatefulCounters(&simpleState, *serviceCounters.MutableCounters()); + } + + for (auto it = state.Simple.begin(); it != state.Simple.end(); ) { + if (incomingServicesSet.find(it->first) == incomingServicesSet.end()) { + it = state.Simple.erase(it); + } else { + ++it; + } + } + + SVLOG_D("[" << TabletID() << "] TEvSendDbLabeledCountersRequest: " + << "node id# " << nodeId + << ", generation# " << state.Generation + << ", request size# " << record.ByteSize()); + + auto response = MakeHolder<TEvSysView::TEvSendDbLabeledCountersResponse>(); + response->Record.SetDatabase(Database); + response->Record.SetGeneration(state.Generation); + Send(ev->Sender, std::move(response)); +} + void TSysViewProcessor::Handle(TEvPrivate::TEvApplyCounters::TPtr&) { for (auto& [_, counters] : AggregatedCountersState) { ResetStatefulCounters(&counters.Proto()); @@ -391,7 +524,6 @@ void TSysViewProcessor::Handle(TEvPrivate::TEvApplyCounters::TPtr&) { } ++it; } - for (auto& [service, aggrCounters] : AggregatedCountersState) { TIntrusivePtr<IDbCounters> counters; if (auto it = Counters.find(service); it != Counters.end()) { @@ -411,6 +543,43 @@ void TSysViewProcessor::Handle(TEvPrivate::TEvApplyCounters::TPtr&) { ScheduleApplyCounters(); } +void TSysViewProcessor::Handle(TEvPrivate::TEvApplyLabeledCounters::TPtr&) { + for (auto& [_, counters] : AggregatedLabeledState) { + ResetStatefulCounters(&counters.Proto()); + } + + for (auto it = NodeLabeledCountersStates.begin(); it != NodeLabeledCountersStates.end(); ) { + auto& state = it->second; + if (state.FreshCount > 1) { + it = NodeLabeledCountersStates.erase(it); + continue; + } + ++state.FreshCount; + for (const auto& [service, counters] : state.Simple) { + AggregateStatefulCounters(&AggregatedLabeledState[service], counters.Proto()); + } + ++it; + } + + for (auto& [service, aggrCounters] : AggregatedLabeledState) { + TIntrusivePtr<IDbCounters> counters; + if (auto it = Counters.find(service); it != Counters.end()) { + counters = it->second; + } else { + counters = CreateCountersForService(service); + } + if (!counters) { + continue; + } + counters->FromProto(aggrCounters); + } + + SVLOG_D("[" << TabletID() << "] TEvApplyLabeledCounters: " + << "services count# " << AggregatedLabeledState.size()); + + ScheduleApplyLabeledCounters(); +} + void TSysViewProcessor::Handle(TEvPrivate::TEvSendNavigate::TPtr&) { SendNavigate(); } diff --git a/ydb/core/sys_view/processor/processor_impl.cpp b/ydb/core/sys_view/processor/processor_impl.cpp index f3c5437984..1bd8bb5b05 100644 --- a/ydb/core/sys_view/processor/processor_impl.cpp +++ b/ydb/core/sys_view/processor/processor_impl.cpp @@ -14,6 +14,7 @@ TSysViewProcessor::TSysViewProcessor(const TActorId& tablet, TTabletStorageInfo* , TotalInterval(TDuration::Seconds(processorMode == EProcessorMode::FAST ? 1 : 60)) , CollectInterval(TotalInterval / 2) , ExternalGroup(new ::NMonitoring::TDynamicCounters) + , LabeledGroup(new ::NMonitoring::TDynamicCounters) { InternalGroups["kqp_serverless"] = new ::NMonitoring::TDynamicCounters; InternalGroups["tablets_serverless"] = new ::NMonitoring::TDynamicCounters; @@ -214,6 +215,10 @@ void TSysViewProcessor::ScheduleApplyCounters() { Schedule(ProcessCountersInterval, new TEvPrivate::TEvApplyCounters); } +void TSysViewProcessor::ScheduleApplyLabeledCounters() { + Schedule(ProcessLabeledCountersInterval, new TEvPrivate::TEvApplyLabeledCounters); +} + void TSysViewProcessor::ScheduleSendNavigate() { Schedule(SendNavigateInterval, new TEvPrivate::TEvSendNavigate); } diff --git a/ydb/core/sys_view/processor/processor_impl.h b/ydb/core/sys_view/processor/processor_impl.h index 3867fd457a..cf31af7232 100644 --- a/ydb/core/sys_view/processor/processor_impl.h +++ b/ydb/core/sys_view/processor/processor_impl.h @@ -50,6 +50,7 @@ private: EvSendRequests, EvProcess, EvApplyCounters, + EvApplyLabeledCounters, EvSendNavigate, EvEnd }; @@ -64,6 +65,8 @@ private: struct TEvApplyCounters : public TEventLocal<TEvApplyCounters, EvApplyCounters> {}; + struct TEvApplyLabeledCounters : public TEventLocal<TEvApplyLabeledCounters, EvApplyLabeledCounters> {}; + struct TEvSendNavigate : public TEventLocal<TEvSendNavigate, EvSendNavigate> {}; }; @@ -119,7 +122,9 @@ private: void Handle(TEvSysView::TEvGetTopPartitionsRequest::TPtr& ev); void Handle(TEvSysView::TEvSendDbCountersRequest::TPtr& ev); + void Handle(TEvSysView::TEvSendDbLabeledCountersRequest::TPtr& ev); void Handle(TEvPrivate::TEvApplyCounters::TPtr& ev); + void Handle(TEvPrivate::TEvApplyLabeledCounters::TPtr& ev); void Handle(TEvPrivate::TEvSendNavigate::TPtr& ev); void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); void Handle(TEvTxProxySchemeCache::TEvWatchNotifyUpdated::TPtr& ev); @@ -148,6 +153,7 @@ private: void ScheduleCollect(); void ScheduleSendRequests(); void ScheduleApplyCounters(); + void ScheduleApplyLabeledCounters(); void ScheduleSendNavigate(); template <typename TSchema, typename TMap> @@ -229,7 +235,9 @@ private: hFunc(TEvSysView::TEvSendTopPartitions, Handle); hFunc(TEvSysView::TEvGetTopPartitionsRequest, Handle); hFunc(TEvSysView::TEvSendDbCountersRequest, Handle); + hFunc(TEvSysView::TEvSendDbLabeledCountersRequest, Handle); hFunc(TEvPrivate::TEvApplyCounters, Handle); + hFunc(TEvPrivate::TEvApplyLabeledCounters, Handle); hFunc(TEvPrivate::TEvSendNavigate, Handle); hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); hFunc(TEvTxProxySchemeCache::TEvWatchNotifyUpdated, Handle); @@ -263,6 +271,8 @@ private: static constexpr size_t BatchSizeLimit = 4 << 20; // interval of db counters processing static constexpr TDuration ProcessCountersInterval = TDuration::Seconds(5); + // interval of db labeled counters processing + static constexpr TDuration ProcessLabeledCountersInterval = TDuration::Seconds(60); // interval of sending next navigate request static constexpr TDuration SendNavigateInterval = TDuration::Seconds(5); @@ -355,6 +365,7 @@ private: TString DatabaseId; ::NMonitoring::TDynamicCounterPtr ExternalGroup; + ::NMonitoring::TDynamicCounterPtr LabeledGroup; std::unordered_map<TString, ::NMonitoring::TDynamicCounterPtr> InternalGroups; using TDbCountersServiceMap = std::unordered_map<NKikimrSysView::EDbCountersService, @@ -366,7 +377,9 @@ private: size_t FreshCount = 0; }; std::unordered_map<TNodeId, TNodeCountersState> NodeCountersStates; + std::unordered_map<TNodeId, TNodeCountersState> NodeLabeledCountersStates; TDbCountersServiceMap AggregatedCountersState; + TDbCountersServiceMap AggregatedLabeledState; std::unordered_map<NKikimrSysView::EDbCountersService, TIntrusivePtr<IDbCounters>> Counters; }; diff --git a/ydb/core/sys_view/processor/tx_init.cpp b/ydb/core/sys_view/processor/tx_init.cpp index 71a5a13581..11ddac0755 100644 --- a/ydb/core/sys_view/processor/tx_init.cpp +++ b/ydb/core/sys_view/processor/tx_init.cpp @@ -461,6 +461,7 @@ struct TSysViewProcessor::TTxInit : public TTxBase { if (AppData()->FeatureFlags.GetEnableDbCounters()) { Self->ScheduleApplyCounters(); + Self->ScheduleApplyLabeledCounters(); Self->SendNavigate(); } diff --git a/ydb/core/sys_view/processor/tx_init_schema.cpp b/ydb/core/sys_view/processor/tx_init_schema.cpp index 4e1ed9f6cc..8a1df28d68 100644 --- a/ydb/core/sys_view/processor/tx_init_schema.cpp +++ b/ydb/core/sys_view/processor/tx_init_schema.cpp @@ -40,7 +40,7 @@ struct TSysViewProcessor::TTxInitSchema : public TTxBase { void Complete(const TActorContext& ctx) override { SVLOG_D("[" << Self->TabletID() << "] TTxInitSchema::Complete"); - if (!AppData()->FeatureFlags.GetEnableSystemViews()) { + if (!AppData()->FeatureFlags.GetEnablePersistentQueryStats()) { SVLOG_D("[" << Self->TabletID() << "] tablet is offline"); Self->SignalTabletActive(ctx); Self->Become(&TThis::StateOffline); diff --git a/ydb/core/sys_view/service/sysview_service.cpp b/ydb/core/sys_view/service/sysview_service.cpp index 7556deefd8..be948ee6a4 100644 --- a/ydb/core/sys_view/service/sysview_service.cpp +++ b/ydb/core/sys_view/service/sysview_service.cpp @@ -7,6 +7,7 @@ #include <ydb/core/sys_view/common/events.h> #include <ydb/core/base/appdata.h> #include <ydb/core/base/tablet_pipecache.h> +#include <ydb/core/tablet/tablet_counters_aggregator.h> #include <ydb/core/tx/scheme_cache/scheme_cache.h> #include <ydb/core/mind/tenant_pool.h> @@ -321,6 +322,7 @@ public: , TotalInterval(TDuration::Seconds(processorMode == EProcessorMode::FAST ? 6 : 60)) , CollectInterval(TDuration::Seconds(processorMode == EProcessorMode::FAST ? 3 : 30)) , SendInterval(TDuration::Seconds(processorMode == EProcessorMode::FAST ? 2 : 20)) + , ProcessLabeledCountersInterval(TDuration::Seconds(processorMode == EProcessorMode::FAST ? 5 : 60)) {} void Bootstrap(const TActorContext &ctx) { @@ -350,14 +352,25 @@ public: ScanLimiter = MakeIntrusive<TScanLimiter>(ConcurrentScansLimit); - IntervalEnd = GetNextIntervalEnd(); - Schedule(IntervalEnd, new TEvPrivate::TEvProcessInterval(IntervalEnd)); + if (AppData()->FeatureFlags.GetEnablePersistentQueryStats()) { + IntervalEnd = GetNextIntervalEnd(); + Schedule(IntervalEnd, new TEvPrivate::TEvProcessInterval(IntervalEnd)); + } if (AppData()->FeatureFlags.GetEnableDbCounters()) { - auto intervalSize = ProcessCountersInterval.MicroSeconds(); - auto deadline = (Now().MicroSeconds() / intervalSize + 1) * intervalSize; - deadline += RandomNumber<ui64>(intervalSize / 5); - Schedule(TInstant::MicroSeconds(deadline), new TEvPrivate::TEvProcessCounters()); + { + auto intervalSize = ProcessCountersInterval.MicroSeconds(); + auto deadline = (TInstant::Now().MicroSeconds() / intervalSize + 1) * intervalSize; + deadline += RandomNumber<ui64>(intervalSize / 5); + Schedule(TInstant::MicroSeconds(deadline), new TEvPrivate::TEvProcessCounters()); + } + + { + auto intervalSize = ProcessLabeledCountersInterval.MicroSeconds(); + auto deadline = (TInstant::Now().MicroSeconds() / intervalSize + 1) * intervalSize; + deadline += RandomNumber<ui64>(intervalSize / 5); + Schedule(TInstant::MicroSeconds(deadline), new TEvPrivate::TEvProcessLabeledCounters()); + } auto callback = MakeIntrusive<TServiceDbWatcherCallback>(ctx.ActorSystem()); DbWatcherActorId = ctx.Register(CreateDbWatcherActor(callback)); @@ -380,9 +393,11 @@ public: hFunc(TEvPrivate::TEvProcessInterval, Handle); hFunc(TEvPrivate::TEvSendSummary, Handle); hFunc(TEvPrivate::TEvProcessCounters, Handle); + hFunc(TEvPrivate::TEvProcessLabeledCounters, Handle); hFunc(TEvPrivate::TEvRemoveDatabase, Handle); hFunc(TEvSysView::TEvRegisterDbCounters, Handle); hFunc(TEvSysView::TEvSendDbCountersResponse, Handle); + hFunc(TEvSysView::TEvSendDbLabeledCountersResponse, Handle); hFunc(TEvSysView::TEvGetIntervalMetricsRequest, Handle); hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); @@ -399,6 +414,7 @@ private: EvProcessInterval = EventSpaceBegin(TEvents::ES_PRIVATE), EvSendSummary, EvProcessCounters, + EvProcessLabeledCounters, EvRemoveDatabase, EvEnd }; @@ -424,6 +440,9 @@ private: struct TEvProcessCounters : public TEventLocal<TEvProcessCounters, EvProcessCounters> { }; + struct TEvProcessLabeledCounters : public TEventLocal<TEvProcessLabeledCounters, EvProcessLabeledCounters> { + }; + struct TEvRemoveDatabase : public TEventLocal<TEvRemoveDatabase, EvRemoveDatabase> { TString Database; TPathId PathId; @@ -566,15 +585,18 @@ private: Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); } + template <typename T> + requires std::is_same_v<T, TEvSysView::TEvSendDbCountersRequest> || + std::is_same_v<T, TEvSysView::TEvSendDbLabeledCountersRequest> void SendCounters(const TString& database) { auto processorId = GetProcessorId(database); if (!processorId) { return; } - auto& dbCounters = DatabaseCounters[database]; - - auto sendEv = MakeHolder<TEvSysView::TEvSendDbCountersRequest>(); + constexpr bool isLabeled = std::is_same<T, TEvSysView::TEvSendDbLabeledCountersRequest>::value; + auto& dbCounters = isLabeled ? DatabaseLabeledCounters[database] : DatabaseCounters[database]; + auto sendEv = MakeHolder<T>(); auto& record = sendEv->Record; if (dbCounters.IsConfirmed) { @@ -596,7 +618,11 @@ private: serviceCounters->SetService(service); auto* diff = serviceCounters->MutableCounters(); - CalculateCountersDiff(diff, state.Current, state.Confirmed); + if (isLabeled) { + diff->CopyFrom(state.Current.Proto()); + } else { + CalculateCountersDiff(diff, state.Current, state.Confirmed); + } } SVLOG_D("Send counters: " @@ -605,7 +631,8 @@ private: << ", database# " << database << ", generation# " << record.GetGeneration() << ", node id# " << record.GetNodeId() - << ", is retrying# " << dbCounters.IsRetrying); + << ", is retrying# " << dbCounters.IsRetrying + << ", is labeled# " << isLabeled); Send(MakePipePeNodeCacheID(false), new TEvPipeCache::TEvForward(sendEv.Release(), processorId, true), @@ -643,6 +670,23 @@ private: it->second.States[service].Counters = counters; } + void RegisterDbLabeledCounters(const TString& database, NKikimrSysView::EDbCountersService service, + TIntrusivePtr<IDbCounters> counters) + { + auto [it, inserted] = DatabaseLabeledCounters.try_emplace(database, TDbCounters()); + if (inserted) { + if (ProcessorIds.find(database) == ProcessorIds.end()) { + RequestProcessorId(database); + } + + if (DbWatcherActorId) { + auto evWatch = MakeHolder<NSysView::TEvSysView::TEvWatchDatabase>(database); + Send(DbWatcherActorId, evWatch.Release()); + } + } + it->second.States[service].Counters = counters; + } + void Handle(TEvPrivate::TEvSendSummary::TPtr& ev) { auto prevIntervalEnd = IntervalEnd - TotalInterval; auto intervalEnd = ev->Get()->IntervalEnd; @@ -669,6 +713,11 @@ private: void Handle(TEvSysView::TEvGetIntervalMetricsRequest::TPtr& ev) { auto response = MakeHolder<TEvSysView::TEvGetIntervalMetricsResponse>(); + if (!AppData()->FeatureFlags.GetEnablePersistentQueryStats()) { + Send(ev->Sender, std::move(response), 0, ev->Cookie); + return; + } + const auto& record = ev->Get()->Record; response->Record.SetIntervalEndUs(record.GetIntervalEndUs()); const auto& database = record.GetDatabase(); @@ -722,12 +771,23 @@ private: << "service id# " << SelfId()); for (auto& [database, dbCounters] : DatabaseCounters) { - SendCounters(database); + SendCounters<TEvSysView::TEvSendDbCountersRequest>(database); } Schedule(ProcessCountersInterval, new TEvPrivate::TEvProcessCounters()); } + void Handle(TEvPrivate::TEvProcessLabeledCounters::TPtr&) { + SVLOG_D("Handle TEvPrivate::TEvProcessLabeledCounters: " + << "service id# " << SelfId()); + + for (auto& [database, dbCounters] : DatabaseLabeledCounters) { + SendCounters<TEvSysView::TEvSendDbLabeledCountersRequest>(database); + } + + Schedule(ProcessLabeledCountersInterval, new TEvPrivate::TEvProcessLabeledCounters()); + } + void Handle(TEvPrivate::TEvRemoveDatabase::TPtr& ev) { auto database = ev->Get()->Database; auto pathId = ev->Get()->PathId; @@ -745,6 +805,7 @@ private: ProcessorIds.erase(database); Attempts.erase(database); DatabaseCounters.erase(database); + DatabaseLabeledCounters.erase(database); UnresolvedTabletCounters.erase(pathId); } @@ -777,7 +838,7 @@ private: } if (dbCounters.IsRetrying) { - SendCounters(database); + SendCounters<TEvSysView::TEvSendDbCountersRequest>(database); } SVLOG_D("Handle TEvSysView::TEvSendDbCountersResponse: " @@ -786,6 +847,36 @@ private: << ", generation# " << generation); } + void Handle(TEvSysView::TEvSendDbLabeledCountersResponse::TPtr& ev) { + const auto& record = ev->Get()->Record; + const auto& database = record.GetDatabase(); + const auto generation = record.GetGeneration(); + + auto it = DatabaseLabeledCounters.find(database); + if (it == DatabaseLabeledCounters.end()) { + return; + } + + auto& dbCounters = it->second; + if (generation != dbCounters.Generation) { + return; + } + + dbCounters.IsConfirmed = true; + for (auto& [_, state] : dbCounters.States) { + state.Confirmed.Swap(state.Current); + } + + if (dbCounters.IsRetrying) { + SendCounters<TEvSysView::TEvSendDbLabeledCountersRequest>(database); + } + + SVLOG_D("Handle TEvSysView::TEvSendDbLabeledCountersResponse: " + << "service id# " << SelfId() + << ", database# " << database + << ", generation# " << generation); + } + void Handle(TEvSysView::TEvRegisterDbCounters::TPtr& ev) { const auto service = ev->Get()->Service; @@ -799,6 +890,15 @@ private: << ", path id# " << pathId << ", service# " << (int)service); + } else if (service == NKikimrSysView::LABELED) { + const auto& database = ev->Get()->Database; + RegisterDbLabeledCounters(database, service, ev->Get()->Counters); + + SVLOG_D("Handle TEvSysView::TEvRegisterDbLabeledCounters: " + << "service id# " << SelfId() + << ", database# " << database + << ", service# " << (int)service); + } else { // register by database name const auto& database = ev->Get()->Database; RegisterDbCounters(database, service, ev->Get()->Counters); @@ -932,7 +1032,7 @@ private: << ", query hash# " << stats->GetQueryTextHash() << ", cpu time# " << stats->GetTotalCpuTimeUs()); - if (!database.empty()) { + if (AppData()->FeatureFlags.GetEnablePersistentQueryStats() && !database.empty()) { auto queryEnd = TInstant::MilliSeconds(stats->GetEndTimeMs()); if (queryEnd < IntervalEnd - TotalInterval) { return; @@ -1035,6 +1135,7 @@ private: const TDuration TotalInterval; const TDuration CollectInterval; const TDuration SendInterval; + const TDuration ProcessLabeledCountersInterval; template <typename TInterval> struct TDbWindow { @@ -1125,6 +1226,7 @@ private: }; std::unordered_map<TString, TDbCounters> DatabaseCounters; + std::unordered_map<TString, TDbCounters> DatabaseLabeledCounters; THashMap<TPathId, TIntrusivePtr<IDbCounters>> UnresolvedTabletCounters; TActorId DbWatcherActorId; diff --git a/ydb/core/sys_view/ut_common.cpp b/ydb/core/sys_view/ut_common.cpp index 85a5702127..754f5386e2 100644 --- a/ydb/core/sys_view/ut_common.cpp +++ b/ydb/core/sys_view/ut_common.cpp @@ -1,4 +1,5 @@ #include "ut_common.h" +#include <ydb/core/persqueue/ut/common/pq_ut_common.h> namespace NKikimr { namespace NSysView { @@ -25,7 +26,7 @@ NKikimrSubDomains::TSubDomainSettings GetSubDomainDefaultSettings(const TString return subdomain; } -TTestEnv::TTestEnv(ui32 staticNodes, ui32 dynamicNodes, ui32 storagePools, bool enableSVP) { +TTestEnv::TTestEnv(ui32 staticNodes, ui32 dynamicNodes, ui32 storagePools, ui32 pqTabletsN, bool enableSVP) { auto mbusPort = PortManager.GetPort(); auto grpcPort = PortManager.GetPort(); @@ -68,6 +69,11 @@ TTestEnv::TTestEnv(ui32 staticNodes, ui32 dynamicNodes, ui32 storagePools, bool Client->InitRootScheme("Root"); + if (pqTabletsN) { + NKikimr::NPQ::FillPQConfig(Settings->PQConfig, "/Root/PQ", true); + PqTabletIds = Server->StartPQTablets(pqTabletsN); + } + Endpoint = "localhost:" + ToString(grpcPort); DriverConfig = NYdb::TDriverConfig().SetEndpoint(Endpoint); Driver = MakeHolder<NYdb::TDriver>(DriverConfig); diff --git a/ydb/core/sys_view/ut_common.h b/ydb/core/sys_view/ut_common.h index e142b96909..0c25bfab58 100644 --- a/ydb/core/sys_view/ut_common.h +++ b/ydb/core/sys_view/ut_common.h @@ -19,7 +19,7 @@ NKikimrSubDomains::TSubDomainSettings GetSubDomainDefaultSettings( class TTestEnv { public: TTestEnv(ui32 staticNodes = 1, ui32 dynamicNodes = 4, ui32 storagePools = 0, - bool enableSVP = false); + ui32 pqTabletsN = 0, bool enableSVP = false); ~TTestEnv(); Tests::TServer& GetServer() const { @@ -42,6 +42,14 @@ public: return Endpoint; } + const Tests::TServerSettings::TPtr GetSettings() const { + return Settings; + } + + const TVector<ui64>& GetPqTabletIds() const { + return PqTabletIds; + } + TStoragePools GetPools() const; TStoragePools CreatePoolsForTenant(const TString& tenant); @@ -57,6 +65,7 @@ private: TString Endpoint; NYdb::TDriverConfig DriverConfig; THolder<NYdb::TDriver> Driver; + TVector<ui64> PqTabletIds; }; } // NSysView diff --git a/ydb/core/sys_view/ut_counters.cpp b/ydb/core/sys_view/ut_counters.cpp index 51bc0c4965..6662308465 100644 --- a/ydb/core/sys_view/ut_counters.cpp +++ b/ydb/core/sys_view/ut_counters.cpp @@ -75,7 +75,7 @@ void CreateDatabasesAndTables(TTestEnv& env) { Y_UNIT_TEST_SUITE(DbCounters) { Y_UNIT_TEST(TabletsSimple) { - TTestEnv env(1, 2, 0, true); + TTestEnv env(1, 2, 0, 0, true); CreateDatabasesAndTables(env); for (size_t iter = 0; iter < 30; ++iter) { diff --git a/ydb/core/sys_view/ut_kqp.cpp b/ydb/core/sys_view/ut_kqp.cpp index bdc326c543..a28c40f6ed 100644 --- a/ydb/core/sys_view/ut_kqp.cpp +++ b/ydb/core/sys_view/ut_kqp.cpp @@ -1177,7 +1177,7 @@ Y_UNIT_TEST_SUITE(SystemView) { auto nowUs = TInstant::Now().MicroSeconds(); - TTestEnv env(1, 4, 0, true); + TTestEnv env(1, 4, 0, 0, true); CreateTenantsAndTables(env); TTableClient client(env.GetDriver()); @@ -1229,7 +1229,7 @@ Y_UNIT_TEST_SUITE(SystemView) { constexpr ui64 partitionCount = 5; - TTestEnv env(1, 4, 0, true); + TTestEnv env(1, 4, 0, 0, true); CreateTenantsAndTables(env, true, partitionCount); TTableClient client(env.GetDriver()); @@ -1259,7 +1259,7 @@ Y_UNIT_TEST_SUITE(SystemView) { constexpr ui64 partitionCount = 5; - TTestEnv env(1, 4, 0, true); + TTestEnv env(1, 4, 0, 0, true); CreateTenantsAndTables(env, true, partitionCount); TTableClient client(env.GetDriver()); diff --git a/ydb/core/sys_view/ut_kqp/CMakeLists.darwin.txt b/ydb/core/sys_view/ut_kqp/CMakeLists.darwin.txt index f4db6ac068..6d741903d2 100644 --- a/ydb/core/sys_view/ut_kqp/CMakeLists.darwin.txt +++ b/ydb/core/sys_view/ut_kqp/CMakeLists.darwin.txt @@ -23,6 +23,7 @@ target_link_libraries(ydb-core-sys_view-ut_kqp PUBLIC cpp-testing-unittest cpp-yson-node kqp-ut-common + persqueue-ut-common ydb-core-testlib cpp-client-draft ) @@ -38,6 +39,7 @@ target_sources(ydb-core-sys_view-ut_kqp PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/sys_view/ut_kqp.cpp ${CMAKE_SOURCE_DIR}/ydb/core/sys_view/ut_common.cpp ${CMAKE_SOURCE_DIR}/ydb/core/sys_view/ut_counters.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/sys_view/ut_labeled.cpp ) add_test( NAME diff --git a/ydb/core/sys_view/ut_kqp/CMakeLists.linux.txt b/ydb/core/sys_view/ut_kqp/CMakeLists.linux.txt index 60cbd08188..687a68a060 100644 --- a/ydb/core/sys_view/ut_kqp/CMakeLists.linux.txt +++ b/ydb/core/sys_view/ut_kqp/CMakeLists.linux.txt @@ -25,6 +25,7 @@ target_link_libraries(ydb-core-sys_view-ut_kqp PUBLIC cpp-testing-unittest cpp-yson-node kqp-ut-common + persqueue-ut-common ydb-core-testlib cpp-client-draft ) @@ -42,6 +43,7 @@ target_sources(ydb-core-sys_view-ut_kqp PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/sys_view/ut_kqp.cpp ${CMAKE_SOURCE_DIR}/ydb/core/sys_view/ut_common.cpp ${CMAKE_SOURCE_DIR}/ydb/core/sys_view/ut_counters.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/sys_view/ut_labeled.cpp ) add_test( NAME diff --git a/ydb/core/sys_view/ut_labeled.cpp b/ydb/core/sys_view/ut_labeled.cpp new file mode 100644 index 0000000000..02acea4916 --- /dev/null +++ b/ydb/core/sys_view/ut_labeled.cpp @@ -0,0 +1,286 @@ +#include "ut_common.h" + +#include "ut_common.h" + +#include <ydb/core/base/counters.h> +#include <ydb/core/kqp/ut/common/kqp_ut_common.h> +#include <ydb/core/persqueue/ut/common/pq_ut_common.h> + +namespace NKikimr { +namespace NSysView { + +using namespace NYdb; +using namespace NYdb::NTable; +using namespace NYdb::NScheme; + +const ui32 partitionsN = 32; +const TString topicName = "topic"; + + +namespace { + +void CreateDatabase(TTestEnv& env, const TString& databaseName) { + auto subdomain = GetSubDomainDeclareSettings(databaseName); + UNIT_ASSERT_VALUES_EQUAL(NMsgBusProxy::MSTATUS_OK, + env.GetClient().CreateExtSubdomain("/Root", subdomain)); + + env.GetTenants().Run("/Root/" + databaseName, 1); + + auto subdomainSettings = GetSubDomainDefaultSettings(databaseName, env.GetPools()); + subdomainSettings.SetExternalSysViewProcessor(true); + subdomainSettings.SetExternalSchemeShard(true); + UNIT_ASSERT_VALUES_EQUAL(NMsgBusProxy::MSTATUS_OK, + env.GetClient().AlterExtSubdomain("/Root", subdomainSettings)); +} + +bool CheckCounter(::NMonitoring::TDynamicCounterPtr group, const char* sensorName, ui32 refValue, + bool isDerivative) { + auto value = group->GetNamedCounter("name", sensorName, isDerivative)->Val(); + Cerr << "CHECK COUNTER " << sensorName << " wait " << refValue << " got " << value << "\n"; + return (value == refValue); +} + +bool CheckLtCounter(::NMonitoring::TDynamicCounterPtr group, const char* sensorName, ui32 refValue, + bool isDerivative) { + auto value = group->GetNamedCounter("name", sensorName, isDerivative)->Val(); + Cerr << "CHECK COUNTER " << sensorName << " wait less than " << refValue << " got " << value << "\n"; + return (value <= refValue); +} + +bool CheckLabeledCounters(::NMonitoring::TDynamicCounterPtr databaseGroup, const TString& dbId, + std::function<bool(::NMonitoring::TDynamicCounterPtr)> particularCountersCheck) { + bool isGood{true}; + Y_UNUSED(dbId); + auto topicGroup = databaseGroup + ->GetSubgroup("cloud_id", "") + ->GetSubgroup("folder_id", "") + ->GetSubgroup("database_id", "") + ->GetSubgroup("host", "") + ->GetSubgroup("topic", topicName); + { + { + TStringStream ss; + topicGroup->OutputHtml(ss); + Cerr << ss.Str() << Endl; + } + + isGood &= particularCountersCheck(topicGroup); + } + + return isGood; +} + +void GetCounters(TTestEnv& env, const TString& databaseName, const TString& databasePath, + std::function<bool(::NMonitoring::TDynamicCounterPtr)> particularCountersCheck) { + for (size_t iter = 0; iter < 30; ++iter) { + Cerr << "iteration " << iter << Endl; + + bool checkDb = false; + + for (ui32 nodeId = 0; nodeId < env.GetServer().GetRuntime()->GetNodeCount(); ++nodeId) { + auto counters = env.GetServer().GetRuntime()->GetAppData(nodeId).Counters; + auto labeledGroup = GetServiceCounters(counters, "labeled_serverless", false); + Y_VERIFY(labeledGroup); + + auto databaseGroup = labeledGroup->FindSubgroup("database", databasePath); + if (databaseGroup) { + checkDb = CheckLabeledCounters(databaseGroup, databaseName, particularCountersCheck); + } + } + + if (checkDb) { + return; + } + + Sleep(TDuration::Seconds(5)); + } + UNIT_ASSERT_C(false, "out of iterations"); +} + +} // namespace + +Y_UNIT_TEST_SUITE(LabeledDbCounters) { + + Y_UNIT_TEST(OneTablet) { + TTestEnv env(1, 2, 0, 1, true); + const TString databaseName = NPQ::TTabletPreparationParameters().databaseId; + const TString databasePath = NPQ::TTabletPreparationParameters().databasePath; + auto edge = env.GetServer().GetRuntime()->AllocateEdgeActor(); + auto check = [](::NMonitoring::TDynamicCounterPtr topicGroup) { + bool isGood{true}; + + isGood &= CheckCounter(topicGroup, "topic.partition.alive_count", partitionsN, false); + isGood &= CheckCounter(topicGroup, "topic.partition.write.speed_limit_bytes_per_second", 50'000'000, false); + isGood &= CheckCounter(topicGroup, "topic.producers_count", 0, false); + + return isGood; + }; + + CreateDatabase(env, databaseName); + NPQ::PQTabletPrepare({.partitions=partitionsN}, {}, *env.GetServer().GetRuntime(), + env.GetPqTabletIds()[0], edge); + GetCounters(env, databaseName, databasePath, check); + } + + Y_UNIT_TEST(OneTabletRestart) { + TTestEnv env(1, 2, 0, 1, true); + const TString databaseName = NPQ::TTabletPreparationParameters().databaseId; + const TString databasePath = NPQ::TTabletPreparationParameters().databasePath; + auto edge = env.GetServer().GetRuntime()->AllocateEdgeActor(); + + CreateDatabase(env, databaseName); + NPQ::PQTabletPrepare({.partitions=partitionsN}, {}, *env.GetServer().GetRuntime(), + env.GetPqTabletIds()[0], edge); + + { + auto check = [](::NMonitoring::TDynamicCounterPtr topicGroup) { + bool isGood{true}; + + { + TStringStream ss; + topicGroup->OutputHtml(ss); + Cerr << ss.Str() << Endl; + } + + isGood &= CheckCounter(topicGroup, "topic.partition.alive_count", partitionsN, false); + isGood &= CheckCounter(topicGroup, "topic.partition.write.speed_limit_bytes_per_second", 50'000'000, false); + isGood &= CheckCounter(topicGroup, "topic.producers_count", 0, false); + + return isGood; + }; + GetCounters(env, databaseName, databasePath, check); + } + + Sleep(TDuration::Seconds(60)); + env.GetServer().GetRuntime()->Register(CreateTabletKiller(env.GetPqTabletIds()[0])); + + { + auto check = [](::NMonitoring::TDynamicCounterPtr topicGroup) { + bool isGood{true}; + + isGood &= CheckLtCounter(topicGroup, "topic.partition.uptime_milliseconds_min", + TDuration::Seconds(60).MilliSeconds() + 200, false); + isGood &= CheckCounter(topicGroup, "topic.partition.alive_count", partitionsN, false); + return isGood; + }; + GetCounters(env, databaseName, databasePath, check); + } + } + + Y_UNIT_TEST(TwoTablets) { + TTestEnv env(1, 2, 0, 2, true); + const TString databaseName = NPQ::TTabletPreparationParameters().databaseId; + const TString databasePath = NPQ::TTabletPreparationParameters().databasePath; + auto check = [](::NMonitoring::TDynamicCounterPtr topicGroup) { + bool isGood{true}; + + isGood &= CheckCounter(topicGroup, "topic.partition.alive_count", partitionsN*2, false); + isGood &= CheckCounter(topicGroup, "topic.partition.write.speed_limit_bytes_per_second", 50'000'000, false); + isGood &= CheckCounter(topicGroup, "topic.producers_count", 0, false); + + return isGood; + }; + + CreateDatabase(env, databaseName); + for (auto& tbId : env.GetPqTabletIds()) { + NPQ::PQTabletPrepare({.partitions=partitionsN}, {}, *env.GetServer().GetRuntime(), + tbId, env.GetServer().GetRuntime()->AllocateEdgeActor()); + } + + GetCounters(env, databaseName, databasePath, check); + } + + Y_UNIT_TEST(TwoTabletsKillOneTablet) { + TTestEnv env(1, 2, 0, 2, true); + const TString databaseName = NPQ::TTabletPreparationParameters().databaseId; + const TString databasePath = NPQ::TTabletPreparationParameters().databasePath; + auto edge = env.GetServer().GetRuntime()->AllocateEdgeActor(); + CreateDatabase(env, databaseName); + for (auto& tbId : env.GetPqTabletIds()) { + NPQ::PQTabletPrepare({.partitions=partitionsN}, {}, *env.GetServer().GetRuntime(), + tbId, edge); + } + + { + auto check = [](::NMonitoring::TDynamicCounterPtr topicGroup) { + bool isGood{true}; + + isGood &= CheckCounter(topicGroup, "topic.partition.alive_count", partitionsN*2, false); + isGood &= CheckCounter(topicGroup, "topic.partition.write.speed_limit_bytes_per_second", 50'000'000, false); + isGood &= CheckCounter(topicGroup, "topic.producers_count", 0, false); + + return isGood; + }; + + GetCounters(env, databaseName, databasePath, check); + } + + for (ui32 i = 0; i < env.GetServer().StaticNodes() + env.GetServer().DynamicNodes(); i++) { + env.GetClient().MarkNodeInHive(env.GetServer().GetRuntime(), i, false); + } + env.GetServer().GetRuntime()->Register(CreateTabletKiller(env.GetPqTabletIds()[0])); + + { + auto check = [](::NMonitoring::TDynamicCounterPtr topicGroup) { + bool isGood{true}; + + isGood &= CheckCounter(topicGroup, "topic.partition.alive_count", partitionsN, false); + + return isGood; + }; + + GetCounters(env, databaseName, databasePath, check); + } + } + + Y_UNIT_TEST(TwoTabletsDisconnectOneNode) { + TTestEnv env(1, 2, 0, 2, true); + const TString databaseName = NPQ::TTabletPreparationParameters().databaseId; + const TString databasePath = NPQ::TTabletPreparationParameters().databasePath; + auto edge = env.GetServer().GetRuntime()->AllocateEdgeActor(); + CreateDatabase(env, databaseName); + for (auto& tbId : env.GetPqTabletIds()) { + NPQ::PQTabletPrepare({.partitions=partitionsN}, {}, *env.GetServer().GetRuntime(), + tbId, edge); + } + + { + auto check = [](::NMonitoring::TDynamicCounterPtr topicGroup) { + bool isGood{true}; + + isGood &= CheckCounter(topicGroup, "topic.partition.alive_count", partitionsN*2, false); + isGood &= CheckCounter(topicGroup, "topic.partition.write.speed_limit_bytes_per_second", 50'000'000, false); + isGood &= CheckCounter(topicGroup, "topic.producers_count", 0, false); + + return isGood; + }; + + GetCounters(env, databaseName, databasePath, check); + } + + for (ui32 i = 0; i < env.GetServer().StaticNodes() + env.GetServer().DynamicNodes(); i++) { + env.GetClient().MarkNodeInHive(env.GetServer().GetRuntime(), i, false); + } + env.GetServer().GetRuntime()->DisconnectNodes(0, 1, false); + env.GetServer().GetRuntime()->DisconnectNodes(1, 0, false); + env.GetServer().GetRuntime()->DisconnectNodes(0, 2, false); + env.GetServer().GetRuntime()->DisconnectNodes(2, 0, false); + + { + auto check = [](::NMonitoring::TDynamicCounterPtr topicGroup) { + bool isGood{true}; + + isGood &= CheckCounter(topicGroup, "topic.partition.alive_count", partitionsN, false); + isGood &= CheckCounter(topicGroup, "topic.partition.total_count", partitionsN, false); + return isGood; + }; + + GetCounters(env, databaseName, databasePath, check); + } + + env.GetServer().GetRuntime()->Register(CreateTabletKiller(env.GetPqTabletIds()[0])); + } +} + +} // NSysView +} // NKikimr diff --git a/ydb/core/tablet/CMakeLists.txt b/ydb/core/tablet/CMakeLists.txt index 80834d9cfc..ec3f900097 100644 --- a/ydb/core/tablet/CMakeLists.txt +++ b/ydb/core/tablet/CMakeLists.txt @@ -33,6 +33,7 @@ target_link_libraries(ydb-core-tablet PUBLIC target_sources(ydb-core-tablet PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet/bootstrapper.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet/labeled_counters_merger.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet/labeled_db_counters.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet/node_tablet_monitor.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet/node_whiteboard.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet/pipe_tracker.cpp @@ -58,4 +59,5 @@ target_sources(ydb-core-tablet PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/tablet/tablet_sys.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet/tablet_tracing_signals.cpp ${CMAKE_SOURCE_DIR}/ydb/core/tablet/private/aggregated_counters.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/tablet/private/labeled_db_counters.cpp ) diff --git a/ydb/core/tablet/labeled_counters_merger.h b/ydb/core/tablet/labeled_counters_merger.h index c3c1a9653a..6e92a4bc79 100644 --- a/ydb/core/tablet/labeled_counters_merger.h +++ b/ydb/core/tablet/labeled_counters_merger.h @@ -14,17 +14,17 @@ public: NKikimrLabeledCounters::TTabletLabeledCounter& target) { ui64 value(source.GetValue()); TLabeledCounterOptions::ECounterType type(source.GetType()); - NKikimr::TLabeledCounterOptions::EAggregateFunc func(source.GetAggregateFunc()); + TLabeledCounterOptions::EAggregateFunc func(source.GetAggregateFunc()); if (type == TLabeledCounterOptions::CT_TIMELAG) { type = TLabeledCounterOptions::CT_SIMPLE; auto now = TInstant::Now().MilliSeconds(); value = now > value ? now - value : 0; switch (func) { - case NKikimr::TLabeledCounterOptions::EAF_MIN: - func = NKikimr::TLabeledCounterOptions::EAF_MAX; + case TLabeledCounterOptions::EAF_MIN: + func = TLabeledCounterOptions::EAF_MAX; break; - case NKikimr::TLabeledCounterOptions::EAF_MAX: - func = NKikimr::TLabeledCounterOptions::EAF_MIN; + case TLabeledCounterOptions::EAF_MAX: + func = TLabeledCounterOptions::EAF_MIN; break; default: break; @@ -32,13 +32,13 @@ public: } if (target.HasValue()) { switch (func) { - case NKikimr::TLabeledCounterOptions::EAF_MIN: + case TLabeledCounterOptions::EAF_MIN: target.SetValue(std::min(target.GetValue(), value)); break; - case NKikimr::TLabeledCounterOptions::EAF_MAX: + case TLabeledCounterOptions::EAF_MAX: target.SetValue(std::max(target.GetValue(), value)); break; - case NKikimr::TLabeledCounterOptions::EAF_SUM: + case TLabeledCounterOptions::EAF_SUM: target.SetValue(target.GetValue() + value); break; } @@ -46,6 +46,7 @@ public: target.SetValue(value); target.SetType(type); target.SetAggregateFunc(func); + target.SetNameId(source.GetNameId()); } } diff --git a/ydb/core/tablet/labeled_db_counters.cpp b/ydb/core/tablet/labeled_db_counters.cpp new file mode 100644 index 0000000000..1b4e1b8616 --- /dev/null +++ b/ydb/core/tablet/labeled_db_counters.cpp @@ -0,0 +1,12 @@ +#include "labeled_db_counters.h" +#include "private/labeled_db_counters.h" + + +namespace NKikimr { + +TIntrusivePtr<NSysView::IDbCounters> CreateLabeledDbCounters( + ::NMonitoring::TDynamicCounterPtr externalGroup) { + return MakeIntrusive<NPrivate::TDbLabeledCounters>(externalGroup); +} + +} // namespace NKikimr diff --git a/ydb/core/tablet/labeled_db_counters.h b/ydb/core/tablet/labeled_db_counters.h new file mode 100644 index 0000000000..123f1b9ad7 --- /dev/null +++ b/ydb/core/tablet/labeled_db_counters.h @@ -0,0 +1,20 @@ +#pragma once + +#include <util/generic/ptr.h> +#include "tablet_counters.h" + + +namespace NKikimr { + +class ILabeledCounters : public virtual TThrRefBase { +public: + using TPtr = TIntrusivePtr<ILabeledCounters>; + + virtual void Apply(ui64 TabletID, const NKikimr::TTabletLabeledCountersBase* labeledCounters) = 0; + virtual void ForgetTablet(ui64 tabletID) = 0; + virtual void UseDatabase(const TString& database) { Y_UNUSED(database); } +}; + +TIntrusivePtr<NSysView::IDbCounters> CreateLabeledDbCounters(::NMonitoring::TDynamicCounterPtr externalGroup); + +} // namespace NKikimr diff --git a/ydb/core/tablet/node_whiteboard.cpp b/ydb/core/tablet/node_whiteboard.cpp index 0677480cf3..289e67c788 100644 --- a/ydb/core/tablet/node_whiteboard.cpp +++ b/ydb/core/tablet/node_whiteboard.cpp @@ -49,13 +49,13 @@ public: auto versionCounter = GetServiceCounters(AppData(ctx)->Counters, "utils")->GetSubgroup("revision", version); *versionCounter->GetCounter("version", false) = 1; } - + // TODO(t1mursadykov): Add role for static nodes with sys tablets only if (AppData(ctx)->DynamicNameserviceConfig) { if (SelfId().NodeId() <= AppData(ctx)->DynamicNameserviceConfig->MaxStaticNodeId) ctx.Send(ctx.SelfID, new TEvWhiteboard::TEvSystemStateAddRole("Storage")); } - + SystemStateInfo.SetStartTime(ctx.Now().MilliSeconds()); ProcessStats.Fill(getpid()); if (ProcessStats.CGroupMemLim != 0) { @@ -655,34 +655,66 @@ protected: } } + static void CopyTabletStateInfo( + NKikimrWhiteboard::TTabletStateInfo& dst, + const NKikimrWhiteboard::TTabletStateInfo& src, + const NKikimrWhiteboard::TEvTabletStateRequest&) + { + dst = src; + } + void Handle(TEvWhiteboard::TEvTabletStateRequest::TPtr &ev, const TActorContext &ctx) { auto now = TMonotonic::Now(); const auto& request = ev->Get()->Record; std::unique_ptr<TEvWhiteboard::TEvTabletStateResponse> response = std::make_unique<TEvWhiteboard::TEvTabletStateResponse>(); auto& record = response->Record; - if (request.groupby().empty()) { - ui64 changedSince = request.has_changedsince() ? request.changedsince() : 0; - for (const auto& pr : TabletStateInfo) { - if (pr.second.changetime() >= changedSince) { - NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); - tabletStateInfo = pr.second; - } + if (request.format() == "packed5") { + TEvWhiteboard::TEvTabletStateResponsePacked5* ptr = response->AllocatePackedResponse(TabletStateInfo.size()); + for (const auto& [tabletId, tabletInfo] : TabletStateInfo) { + ptr->TabletId = tabletInfo.tabletid(); + ptr->FollowerId = tabletInfo.followerid(); + ptr->Generation = tabletInfo.generation(); + ptr->Type = tabletInfo.type(); + ptr->State = tabletInfo.state(); + ++ptr; } - } else if (request.groupby() == "Type,State") { // the only supported group-by for now - std::unordered_map<std::pair<NKikimrTabletBase::TTabletTypes::EType, - NKikimrWhiteboard::TTabletStateInfo::ETabletState>, NKikimrWhiteboard::TTabletStateInfo> stateGroupBy; - for (const auto& [id, stateInfo] : TabletStateInfo) { - NKikimrWhiteboard::TTabletStateInfo& state = stateGroupBy[{stateInfo.type(), stateInfo.state()}]; - auto count = state.count(); - if (count == 0) { - state.set_type(stateInfo.type()); - state.set_state(stateInfo.state()); + } else { + if (request.groupby().empty()) { + ui64 changedSince = request.has_changedsince() ? request.changedsince() : 0; + if (request.filtertabletid_size() == 0) { + for (const auto& pr : TabletStateInfo) { + if (pr.second.changetime() >= changedSince) { + NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); + CopyTabletStateInfo(tabletStateInfo, pr.second, request); + } + } + } else { + for (auto tabletId : request.filtertabletid()) { + auto it = TabletStateInfo.find({tabletId, 0}); + if (it != TabletStateInfo.end()) { + if (it->second.changetime() >= changedSince) { + NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); + CopyTabletStateInfo(tabletStateInfo, it->second, request); + } + } + } + } + } else if (request.groupby() == "Type,State") { // the only supported group-by for now + std::unordered_map<std::pair<NKikimrTabletBase::TTabletTypes::EType, + NKikimrWhiteboard::TTabletStateInfo::ETabletState>, NKikimrWhiteboard::TTabletStateInfo> stateGroupBy; + for (const auto& [id, stateInfo] : TabletStateInfo) { + NKikimrWhiteboard::TTabletStateInfo& state = stateGroupBy[{stateInfo.type(), stateInfo.state()}]; + auto count = state.count(); + if (count == 0) { + state.set_type(stateInfo.type()); + state.set_state(stateInfo.state()); + } + state.set_count(count + 1); + } + for (auto& pr : stateGroupBy) { + NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); + tabletStateInfo = std::move(pr.second); } - state.set_count(count + 1); - } - for (auto& pr : stateGroupBy) { - NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); - tabletStateInfo = std::move(pr.second); } } response->Record.set_responsetime(ctx.Now().MilliSeconds()); diff --git a/ydb/core/tablet/private/aggregated_counters.cpp b/ydb/core/tablet/private/aggregated_counters.cpp index be31a8da04..8c03a6855e 100644 --- a/ydb/core/tablet/private/aggregated_counters.cpp +++ b/ydb/core/tablet/private/aggregated_counters.cpp @@ -530,6 +530,43 @@ void TAggregatedLabeledCounters::FillGetRequestV2( } } +void TAggregatedLabeledCounters::ToProto(NKikimrLabeledCounters::TTabletLabeledCounters& labeledCounters) const { + if (Changed) { + for (ui32 idx : xrange(CountersByTabletID.size())) { + Recalc(idx); + } + Changed = false; + } + ui32 updatedCount{0}; + for (ui32 i = 0; i < Size(); ++i) { + if (strlen(Names[i]) != 0) { + if (labeledCounters.LabeledCounterSize() <= updatedCount) { + labeledCounters.AddLabeledCounter(); + } + auto& labeledCounter = *labeledCounters.MutableLabeledCounter(updatedCount); + labeledCounter.SetValue(GetValue(i)); + labeledCounter.SetNameId(i); + labeledCounter.SetAggregateFunc(NKikimr::TLabeledCounterOptions::EAggregateFunc(AggrFunc[i])); + labeledCounter.SetType(NKikimr::TLabeledCounterOptions::ECounterType(Types[i])); + ++updatedCount; + } + } +} + +void TAggregatedLabeledCounters::FromProto( + NMonitoring::TDynamicCounterPtr group, + const NKikimrLabeledCounters::TTabletLabeledCounters& labeledCounters) const { + for (const auto& counter : labeledCounters.GetLabeledCounter()) { + const ui32 nameId{counter.GetNameId()}; + if (strlen(Names[nameId]) != 0) { + // TODO: ASDFGS if CT_TIMELAG -> ctx.Now() - counters.GetValue + const bool derived = counter.GetType() == TLabeledCounterOptions::CT_DERIV; + auto namedCounter = group->GetNamedCounter("name", Names[nameId], derived); + *namedCounter = counter.GetValue(); + } + } +} + void TAggregatedLabeledCounters::Recalc(ui32 idx) const { Y_VERIFY(idx < Ids.size()); auto &counters = CountersByTabletID[idx]; @@ -537,7 +574,11 @@ void TAggregatedLabeledCounters::Recalc(ui32 idx) const { std::pair<ui64, ui64> aggrVal{0,0}; ui64 cntCount = counters.size(); - Y_VERIFY(cntCount > 0); + // Y_VERIFY(cntCount > 0); + if (cntCount == 0) { + return; + } + if (aggrFunc == TTabletLabeledCountersBase::EAggregateFunc::EAF_MIN) aggrVal = counters.begin()->second; @@ -557,6 +598,7 @@ void TAggregatedLabeledCounters::Recalc(ui32 idx) const { Y_FAIL("bad aggrFunc value"); }; } + AggrCounters[idx] = aggrVal.first; Ids[idx] = aggrVal.second; } diff --git a/ydb/core/tablet/private/aggregated_counters.h b/ydb/core/tablet/private/aggregated_counters.h index 44c92cd46f..c7f4d3f373 100644 --- a/ydb/core/tablet/private/aggregated_counters.h +++ b/ydb/core/tablet/private/aggregated_counters.h @@ -163,6 +163,10 @@ public: void FillGetRequestV2(NKikimr::TTabletLabeledCountersResponseContext* context, const TString& group) const; + void ToProto(NKikimrLabeledCounters::TTabletLabeledCounters& labeledCounters) const; + void FromProto(NMonitoring::TDynamicCounterPtr group, + const NKikimrLabeledCounters::TTabletLabeledCounters& labeledCounters) const; + private: // ::NMonitoring::TDynamicCounterPtr CounterGroup; diff --git a/ydb/core/tablet/private/labeled_db_counters.cpp b/ydb/core/tablet/private/labeled_db_counters.cpp new file mode 100644 index 0000000000..feb0d41a1f --- /dev/null +++ b/ydb/core/tablet/private/labeled_db_counters.cpp @@ -0,0 +1,117 @@ +#include "labeled_db_counters.h" + +#include <library/cpp/actors/core/actorsystem.h> +#include <util/string/split.h> +#include <ydb/core/sys_view/service/sysview_service.h> + +namespace NKikimr::NPrivate { + +/* +** class TPQCounters + */ + +THashMap<TString, TAutoPtr<TAggregatedLabeledCounters>> TPQCounters::LabeledCountersByGroupReference = {}; + +TPQCounters::TPQCounters(NMonitoring::TDynamicCounterPtr counters) { + Group = counters; +} + +void TPQCounters::Apply(ui64 tabletId, const NKikimr::TTabletLabeledCountersBase* labeledCounters) { + const TString group = labeledCounters->GetGroup(); + TString groupNames; + + if (!LabeledCountersByGroup.Has(group)) { + TVector<TString> rr; + StringSplitter(group).Split('|').Collect(&rr); + for (ui32 i = 0; i < rr.size(); ++i) { + if (i > 0) + groupNames += '|'; + groupNames += labeledCounters->GetGroupName(i); + } + + if (!LabeledCountersByGroupReference.contains(groupNames)) { + LabeledCountersByGroupReference.emplace(groupNames, new TAggregatedLabeledCounters( + labeledCounters->GetCounters().Size(), labeledCounters->GetAggrFuncs(), + labeledCounters->GetNames(), labeledCounters->GetTypes(), groupNames)); + } + } + + auto& el = LabeledCountersByGroup.InsertIfAbsent(group, new TAggregatedLabeledCounters( + labeledCounters->GetCounters().Size(), labeledCounters->GetAggrFuncs(), + labeledCounters->GetNames(), labeledCounters->GetTypes(), groupNames)); + + for (ui32 i = 0, N = labeledCounters->GetCounters().Size(); i < N; ++i) { + const ui64& value = labeledCounters->GetCounters()[i].Get(); + // FIXME (?): + // const ui64& id = labeledCounters->GetIds()[i].Get(); + const ui64 id = i; + el->SetValue(tabletId, i, value, id); + } +} + +void TPQCounters::ForgetTablet(ui64 tabletId) { + for (auto& bucket : LabeledCountersByGroup.Buckets) { + TWriteGuard guard(bucket.GetLock()); + auto& map = bucket.GetMap(); + for (auto iterator = map.begin(); iterator != map.end();) { + bool empty = iterator->second->ForgetTablet(tabletId); + if (empty) { + auto eraseIterator = iterator; + ++iterator; + map.erase(eraseIterator); + } else { + ++iterator; + } + } + } +} + +/* +** class TDbLabeledCounters + */ + +TDbLabeledCounters::TDbLabeledCounters() +: TPQCounters(MakeIntrusive<::NMonitoring::TDynamicCounters>()) +{} + +TDbLabeledCounters::TDbLabeledCounters(::NMonitoring::TDynamicCounterPtr counters) +: TPQCounters(counters) +{} + +void TDbLabeledCounters::ToProto(NKikimr::NSysView::TDbServiceCounters& counters) { + counters.Clear(); + for (auto& bucket : LabeledCountersByGroup.Buckets) { + TWriteGuard guard(bucket.GetLock()); + for (auto& [group, labeledCounters] : bucket.GetMap()) { + auto* proto = counters.FindOrAddLabeledCounters(group); + auto* labeled = proto->MutableAggregatedPerTablets(); + labeledCounters->ToProto(*labeled); + } + } +} + +void TDbLabeledCounters::FromProto(NKikimr::NSysView::TDbServiceCounters& counters) { + for (auto& proto : *counters.Proto().MutableLabeledCounters()) { + TVector<TString> groups; + TVector<TString> groupNames = {"topic", "important", "consumer"}; + Y_VERIFY(proto.GetAggregatedPerTablets().GetDelimiter() == "|"); + StringSplitter(proto.GetAggregatedPerTablets().GetGroup()).Split('|').Collect(&groups); + auto countersGroup = Group; + // FIXME: a little hack here: we have consumer - important - topic group names in proto + // that's why we iterate the group in reverse order + // this comes from: ydb/core/persqueue/user_info.h:310 (TUserInfo::TUserInfo) + std::reverse(groups.begin(), groups.end()); + + for (size_t i = 0; i < groups.size(); ++i) { + if (i != 1) { + countersGroup = countersGroup->GetSubgroup(groupNames[i], groups[i]); + } + } + const TString groupNamesStr = (groups.size() == 3) ? "client|important|topic" : "topic"; + + LabeledCountersByGroupReference[groupNamesStr]->FromProto(countersGroup, + proto.GetAggregatedPerTablets()); + } +} + +} // namespace NKikimr::NPrivate diff --git a/ydb/core/tablet/private/labeled_db_counters.h b/ydb/core/tablet/private/labeled_db_counters.h new file mode 100644 index 0000000000..8359381c73 --- /dev/null +++ b/ydb/core/tablet/private/labeled_db_counters.h @@ -0,0 +1,40 @@ +#pragma once + +#include <ydb/core/sys_view/service/db_counters.h> +#include <ydb/core/tablet/labeled_db_counters.h> +#include <ydb/core/util/concurrent_rw_hash.h> + +#include "aggregated_counters.h" + + +namespace NKikimr::NPrivate { + + +class TPQCounters : public ILabeledCounters { +protected: + TConcurrentRWHashMap<TString, TAutoPtr<TAggregatedLabeledCounters>, 256> LabeledCountersByGroup; + NMonitoring::TDynamicCounterPtr Group; + +public: + using TPtr = TIntrusivePtr<TPQCounters>; + + explicit TPQCounters(NMonitoring::TDynamicCounterPtr counters); + + void Apply(ui64 tabletID, const NKikimr::TTabletLabeledCountersBase* labeledCounters) override; + void ForgetTablet(ui64 tabletID) override; + + static THashMap<TString, TAutoPtr<TAggregatedLabeledCounters>> LabeledCountersByGroupReference; +}; + +class TDbLabeledCounters : public TPQCounters, public NSysView::IDbCounters { +public: + using TPtr = TIntrusivePtr<TDbLabeledCounters>; + + TDbLabeledCounters(); + explicit TDbLabeledCounters(::NMonitoring::TDynamicCounterPtr counters); + + void ToProto(NKikimr::NSysView::TDbServiceCounters& counters) override; + void FromProto(NKikimr::NSysView::TDbServiceCounters& counters) override; +}; + +} // namespace NKikimr::NPrivate diff --git a/ydb/core/tablet/tablet_counters_aggregator.cpp b/ydb/core/tablet/tablet_counters_aggregator.cpp index 78a106277f..80dd5e0c86 100644 --- a/ydb/core/tablet/tablet_counters_aggregator.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator.cpp @@ -1,7 +1,9 @@ #include "tablet_counters_aggregator.h" #include "tablet_counters_app.h" #include "labeled_counters_merger.h" +#include "labeled_db_counters.h" #include "private/aggregated_counters.h" +#include "private/labeled_db_counters.h" #include <library/cpp/actors/core/log.h> #include <ydb/core/mon/mon.h> @@ -161,21 +163,33 @@ public: } } - void ForgetTablet(ui64 tabletID, TTabletTypes::EType tabletType, TPathId tenantPathId) { - AllTypes->Forget(tabletID); + void ApplyLabeledDbCounters(const TString& dbName, ui64 tabletId, + const TTabletLabeledCountersBase* labeledCounters, const TActorContext& ctx) { + auto iterDbLabeled = GetLabeledDbCounters(dbName, ctx); + iterDbLabeled->Apply(tabletId, labeledCounters); + } + + void ForgetTablet(ui64 tabletId, TTabletTypes::EType tabletType, TPathId tenantPathId) { + AllTypes->Forget(tabletId); // and now erase from every other path auto iterTabletType = CountersByTabletType.find(tabletType); if (iterTabletType != CountersByTabletType.end()) { - iterTabletType->second->Forget(tabletID); + iterTabletType->second->Forget(tabletId); } // from db counters if (auto itPath = CountersByPathId.find(tenantPathId); itPath != CountersByPathId.end()) { - itPath->second->Forget(tabletID, tabletType); + itPath->second->Forget(tabletId, tabletType); + } + + for (auto iter = LabeledDbCounters.begin(); iter != LabeledDbCounters.end(); ++iter) { + iter->second->ForgetTablet(tabletId); } - //and from all labeledCounters that could have this tablet - auto iterTabletTypeAndGroup = LabeledCountersByTabletTypeAndGroup.lower_bound(std::make_pair(tabletType, TString())); - for (; iterTabletTypeAndGroup != LabeledCountersByTabletTypeAndGroup.end() && iterTabletTypeAndGroup->first.first == tabletType; ) { - bool empty = iterTabletTypeAndGroup->second->ForgetTablet(tabletID); + // and from all labeledCounters that could have this tablet + auto iterTabletTypeAndGroup = + LabeledCountersByTabletTypeAndGroup.lower_bound(std::make_pair(tabletType, TString())); + for (; iterTabletTypeAndGroup != LabeledCountersByTabletTypeAndGroup.end() && + iterTabletTypeAndGroup->first.first == tabletType; ) { + bool empty = iterTabletTypeAndGroup->second->ForgetTablet(tabletId); if (empty) { iterTabletTypeAndGroup = LabeledCountersByTabletTypeAndGroup.erase(iterTabletTypeAndGroup); } else { @@ -183,9 +197,9 @@ public: } } - QuietTabletCounters.erase(tabletID); + QuietTabletCounters.erase(tabletId); - TString tabletIdStr = Sprintf("%" PRIu64, tabletID); + TString tabletIdStr = Sprintf("%" PRIu64, tabletId); Counters->RemoveSubgroup("tabletid", tabletIdStr.data()); } @@ -310,6 +324,10 @@ public: CountersByPathId.erase(pathId); } + void RemoveTabletsByDbPath(const TString& dbPath) { + LabeledDbCounters.erase(dbPath); + } + private: // subgroups class TTabletCountersForTabletType : public TThrRefBase { @@ -1043,8 +1061,8 @@ public: : ActorSystem(actorSystem) {} - void OnDatabaseRemoved(const TString&, TPathId pathId) override { - auto evRemove = MakeHolder<TEvTabletCounters::TEvRemoveDatabase>(pathId); + void OnDatabaseRemoved(const TString& dbPath, TPathId pathId) override { + auto evRemove = MakeHolder<TEvTabletCounters::TEvRemoveDatabase>(dbPath, pathId); auto aggregator = MakeTabletCountersAggregatorID(ActorSystem->NodeId, false); ActorSystem->Send(aggregator, evRemove.Release()); } @@ -1072,6 +1090,27 @@ private: return dbCounters; } + NPrivate::TDbLabeledCounters::TPtr GetLabeledDbCounters(const TString& dbName, const TActorContext& ctx) { + auto it = LabeledDbCounters.find(dbName); + if (it != LabeledDbCounters.end()) { + return it->second; + } + + auto dbCounters = MakeIntrusive<NPrivate::TDbLabeledCounters>(); + LabeledDbCounters[dbName] = dbCounters; + + auto evRegister = MakeHolder<NSysView::TEvSysView::TEvRegisterDbCounters>( + NKikimrSysView::LABELED, dbName, dbCounters); + ctx.Send(NSysView::MakeSysViewServiceID(ctx.SelfID.NodeId()), evRegister.Release()); + + if (DbWatcherActorId) { + auto evWatch = MakeHolder<NSysView::TEvSysView::TEvWatchDatabase>(dbName); + ctx.Send(DbWatcherActorId, evWatch.Release()); + } + + return dbCounters; + } + private: ::NMonitoring::TDynamicCounterPtr Counters; TTabletCountersForTabletTypePtr AllTypes; @@ -1079,6 +1118,7 @@ private: typedef THashMap<TPathId, TIntrusivePtr<TTabletCountersForDb>> TCountersByPathId; typedef TMap<TTabletTypes::EType, THolder<TTabletCountersBase>> TAppCountersByTabletType; + typedef THashMap<TString, TIntrusivePtr<NPrivate::TDbLabeledCounters>> TLabeledCountersByDbPath; typedef TMap<std::pair<TTabletTypes::EType, TString>, TAutoPtr<NPrivate::TAggregatedLabeledCounters>> TLabeledCountersByTabletTypeAndGroup; typedef THashMap<ui64, std::pair<TAutoPtr<TTabletCountersBase>, TAutoPtr<TTabletCountersBase>>> TQuietTabletCounters; @@ -1087,6 +1127,7 @@ private: TActorId DbWatcherActorId; TAppCountersByTabletType LimitedAppCounters; // without txs TYdbTabletCountersPtr YdbCounters; + TLabeledCountersByDbPath LabeledDbCounters; TLabeledCountersByTabletTypeAndGroup LabeledCountersByTabletTypeAndGroup; TQuietTabletCounters QuietTabletCounters; }; @@ -1196,8 +1237,14 @@ TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletAddLabele if (msg->LabeledCounters.Get()->GetDatabasePath()) { if (msg->TabletType == TTabletTypes::PersQueue) { LOG_DEBUG_S(ctx, NKikimrServices::TABLET_AGGREGATOR, - "got labeledCounters from db" << msg->LabeledCounters.Get()->GetDatabasePath()); + "got labeledCounters from db: " << msg->LabeledCounters.Get()->GetDatabasePath() << + "; tablet: " << msg->TabletID); + TabletMon->ApplyLabeledDbCounters(msg->LabeledCounters.Get()->GetDatabasePath().GetRef(), msg->TabletID, msg->LabeledCounters.Get(), ctx); } else { + LOG_ERROR_S(ctx, NKikimrServices::TABLET_AGGREGATOR, + "got labeledCounters from unknown Tablet Type: " << msg->TabletType << + "; db: " << msg->LabeledCounters.Get()->GetDatabasePath() << + "; tablet: " << msg->TabletID); return; } } else { @@ -1347,8 +1394,8 @@ TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletLabeledCo //////////////////////////////////////////// void TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvRemoveDatabase::TPtr& ev) { - TabletMon->RemoveTabletsByPathId(ev->Get()->PathId); + TabletMon->RemoveTabletsByDbPath(ev->Get()->DbPath); } //////////////////////////////////////////// diff --git a/ydb/core/tablet/tablet_counters_aggregator.h b/ydb/core/tablet/tablet_counters_aggregator.h index 1c39b3295e..be9e336d85 100644 --- a/ydb/core/tablet/tablet_counters_aggregator.h +++ b/ydb/core/tablet/tablet_counters_aggregator.h @@ -105,10 +105,12 @@ struct TEvTabletCounters { }; struct TEvRemoveDatabase : public TEventLocal<TEvRemoveDatabase, EvRemoveDatabase> { + const TString DbPath; const TPathId PathId; - explicit TEvRemoveDatabase(TPathId pathId) - : PathId(pathId) + TEvRemoveDatabase(const TString& dbPath, TPathId pathId) + : DbPath(dbPath) + , PathId(pathId) {} }; diff --git a/ydb/core/tablet/tablet_counters_aggregator_ut.cpp b/ydb/core/tablet/tablet_counters_aggregator_ut.cpp index 013d292c24..f94a11f898 100644 --- a/ydb/core/tablet/tablet_counters_aggregator_ut.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator_ut.cpp @@ -1,5 +1,7 @@ #include "tablet_counters_aggregator.h" +#include "private/labeled_db_counters.h" +#include <ydb/core/base/counters.h> #include <ydb/core/testlib/basics/runtime.h> #include <ydb/core/testlib/basics/appdata.h> @@ -824,6 +826,101 @@ Y_UNIT_TEST_SUITE(TTabletLabeledCountersAggregator) { UNIT_ASSERT_VALUES_EQUAL(res[1], "cons/aaa|1|aba/caba/daba|man"); } + Y_UNIT_TEST(DbAggregation) { + TVector<TActorId> cc; + TActorId aggregatorId; + + TTestBasicRuntime runtime(1); + + runtime.Initialize(TAppPrepare().Unwrap()); + runtime.GetAppData().PQConfig.SetTopicsAreFirstClassCitizen(true); + + TActorId edge = runtime.AllocateEdgeActor(); + + runtime.SetRegistrationObserverFunc([&cc, &aggregatorId] + (TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId) { + TTestActorRuntime::DefaultRegistrationObserver(runtime, parentId, actorId); + if (parentId == aggregatorId) { + cc.push_back(actorId); + } + }); + + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); + runtime.DispatchEvents(options); + for (const auto& a : cc) { + THolder<TEvInterconnect::TEvNodesInfo> nodesInfo = MakeHolder<TEvInterconnect::TEvNodesInfo>(); + nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(1, "::", "localhost", "localhost", 1234, TNodeLocation())); + nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(2, "::", "localhost", "localhost", 1234, TNodeLocation())); + nodesInfo->Nodes.emplace_back(TEvInterconnect::TNodeInfo(3, "::", "localhost", "localhost", 1234, TNodeLocation())); + runtime.Send(new NActors::IEventHandle(a, edge, nodesInfo.Release()), 0, true); + } + + NPrivate::TDbLabeledCounters PQCounters; + + const size_t namesN{5}; + std::array<const char *, namesN> names; + names.fill(""); + names[0] = "whatever"; + names[1] = "whenever"; + std::array<const char *, namesN> groupNames; + groupNames.fill("topic"); + groupNames[1] = "user||topic"; + std::array<ui8, namesN> types; + types.fill(static_cast<ui8>(TLabeledCounterOptions::CT_SIMPLE)); + + std::array<ui8, namesN> functions; + functions.fill(static_cast<ui8>(TLabeledCounterOptions::EAF_SUM)); + functions[1] = static_cast<ui8>(TLabeledCounterOptions::EAF_MAX); + + { + NKikimr::TTabletLabeledCountersBase labeledCounters(namesN, &names[0], &types[0], &functions[0], + "some_stream", &groupNames[0], 1, "/Root/PQ1"); + labeledCounters.GetCounters()[0].Set(10); + labeledCounters.GetCounters()[1].Set(10); + PQCounters.Apply(0, &labeledCounters); + labeledCounters.GetCounters()[0].Set(11); + labeledCounters.GetCounters()[1].Set(100); + PQCounters.Apply(1, &labeledCounters); + labeledCounters.GetCounters()[0].Set(12); + labeledCounters.GetCounters()[1].Set(10); + PQCounters.Apply(2, &labeledCounters); + // SUM 33 + // MAX 100 + } + + { + NKikimr::TTabletLabeledCountersBase labeledCounters(namesN, &names[0], &types[0], &functions[0], + "some_stream", &groupNames[0], 1, "/Root/PQ2"); + labeledCounters.GetCounters()[0].Set(20); + labeledCounters.GetCounters()[1].Set(1); + PQCounters.Apply(0, &labeledCounters); + labeledCounters.GetCounters()[0].Set(21); + labeledCounters.GetCounters()[1].Set(11); + PQCounters.Apply(1, &labeledCounters); + labeledCounters.GetCounters()[0].Set(22); + labeledCounters.GetCounters()[1].Set(10); + PQCounters.Apply(2, &labeledCounters); + // SUM 63 + // MAX 11 + } + + NKikimr::NSysView::TDbServiceCounters counters; + + // Here we check that consequent calls do not interfere + for (int i = 10; i >= 0; --i) { + PQCounters.ToProto(counters); + + auto pqCounters = counters.FindOrAddLabeledCounters("some_stream"); + UNIT_ASSERT_VALUES_EQUAL(pqCounters->GetAggregatedPerTablets().group(), "some_stream"); + UNIT_ASSERT_VALUES_EQUAL(pqCounters->GetAggregatedPerTablets().delimiter(), "|"); + UNIT_ASSERT_VALUES_EQUAL(pqCounters->GetAggregatedPerTablets().GetLabeledCounter().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(pqCounters->GetAggregatedPerTablets().GetLabeledCounter(0).value(), 63); + UNIT_ASSERT_VALUES_EQUAL(pqCounters->GetAggregatedPerTablets().GetLabeledCounter(1).value(), 11); + + PQCounters.FromProto(counters); + } + } } } diff --git a/ydb/core/tablet/tablet_counters_protobuf.h b/ydb/core/tablet/tablet_counters_protobuf.h index 5f56071b94..ea707420e2 100644 --- a/ydb/core/tablet/tablet_counters_protobuf.h +++ b/ydb/core/tablet/tablet_counters_protobuf.h @@ -269,8 +269,8 @@ public: protected: TVector<TString> NamesStrings; TVector<const char*> Names; - TVector<TString> ServerlessNamesStrings; - TVector<const char*> ServerlessNames; + TVector<TString> SVNamesStrings; + TVector<const char*> SVNames; TVector<ui8> AggregateFuncs; TVector<ui8> Types; TVector<TString> GroupNamesStrings; @@ -282,8 +282,8 @@ public: const NProtoBuf::EnumDescriptor* labeledCounterDesc = LabeledCountersDesc(); NamesStrings.reserve(Size); Names.reserve(Size); - ServerlessNamesStrings.reserve(Size); - ServerlessNames.reserve(Size); + SVNamesStrings.reserve(Size); + SVNames.reserve(Size); AggregateFuncs.reserve(Size); Types.reserve(Size); @@ -295,17 +295,17 @@ public: const TLabeledCounterOptions& co = vdesc->options().GetExtension(LabeledCounterOpts); NamesStrings.push_back(GetFilePrefix(labeledCounterDesc->file()) + co.GetName()); - ServerlessNamesStrings.push_back(co.GetServerlessName()); + SVNamesStrings.push_back(co.GetSVName()); AggregateFuncs.push_back(co.GetAggrFunc()); Types.push_back(co.GetType()); } // Make plain strings out of Strokas to fullfil interface of TTabletCountersBase std::transform(NamesStrings.begin(), NamesStrings.end(), - std::back_inserter(Names), [](auto string) { return string.data(); } ); + std::back_inserter(Names), [](auto& string) { return string.data(); } ); - std::transform(ServerlessNamesStrings.begin(), ServerlessNamesStrings.end(), - std::back_inserter(ServerlessNames), [](auto string) { return string.data(); } ); + std::transform(SVNamesStrings.begin(), SVNamesStrings.end(), + std::back_inserter(SVNames), [](auto& string) { return string.data(); } ); //parse types for counter groups; const TLabeledCounterGroupNamesOptions& gn = labeledCounterDesc->options().GetExtension(GlobalGroupNamesOpts); @@ -317,7 +317,7 @@ public: } std::transform(GroupNamesStrings.begin(), GroupNamesStrings.end(), - std::back_inserter(GroupNames), [](auto string) { return string.data(); } ); + std::back_inserter(GroupNames), [](auto& string) { return string.data(); } ); } virtual ~TLabeledCounterParsedOpts() @@ -328,9 +328,9 @@ public: return Names.begin(); } - const char* const * GetServerlessNames() const + const char* const * GetSVNames() const { - return ServerlessNames.begin(); + return SVNames.begin(); } const ui8* GetCounterTypes() const @@ -649,11 +649,11 @@ public: TProtobufTabletLabeledCounters(const TString& group, const ui64 id, const TString& databasePath) : TTabletLabeledCountersBase( - SimpleOpts()->Size, SimpleOpts()->GetServerlessNames(), SimpleOpts()->GetCounterTypes(), + SimpleOpts()->Size, SimpleOpts()->GetSVNames(), SimpleOpts()->GetCounterTypes(), SimpleOpts()->GetAggregateFuncs(), group, SimpleOpts()->GetGroupNames(), id, databasePath) { TVector<TString> groups; - StringSplitter(group).Split('|').SkipEmpty().Collect(&groups); + StringSplitter(group).Split('|').Collect(&groups); Y_VERIFY(SimpleOpts()->GetGroupNamesSize() == groups.size()); } diff --git a/ydb/core/testlib/basics/appdata.cpp b/ydb/core/testlib/basics/appdata.cpp index 1786eaeceb..a940ed2083 100644 --- a/ydb/core/testlib/basics/appdata.cpp +++ b/ydb/core/testlib/basics/appdata.cpp @@ -197,5 +197,4 @@ namespace NKikimr { { FeatureFlags.SetEnableDbCounters(value); } - } diff --git a/ydb/core/testlib/basics/feature_flags.h b/ydb/core/testlib/basics/feature_flags.h index 95f2d8c0e9..7354b03bfa 100644 --- a/ydb/core/testlib/basics/feature_flags.h +++ b/ydb/core/testlib/basics/feature_flags.h @@ -30,6 +30,7 @@ public: FEATURE_FLAG_SETTER(EnableMvccSnapshotReads) FEATURE_FLAG_SETTER(EnableBackgroundCompaction) FEATURE_FLAG_SETTER(EnableBackgroundCompactionServerless) + FEATURE_FLAG_SETTER(EnableBorrowedSplitCompaction) FEATURE_FLAG_SETTER(EnableNotNullColumns) FEATURE_FLAG_SETTER(EnableTtlOnAsyncIndexedTables) FEATURE_FLAG_SETTER(EnableBulkUpsertToAsyncIndexedTables) diff --git a/ydb/core/testlib/tablet_helpers.cpp b/ydb/core/testlib/tablet_helpers.cpp index 6b0c84baa7..f2827d7dd5 100644 --- a/ydb/core/testlib/tablet_helpers.cpp +++ b/ydb/core/testlib/tablet_helpers.cpp @@ -1189,6 +1189,8 @@ namespace NKikimr { bootstrapperActorId = Boot(ctx, type, &NSequenceShard::CreateSequenceShard, DataGroupErasure); } else if (type == TTabletTypes::ReplicationController) { bootstrapperActorId = Boot(ctx, type, &NReplication::CreateController, DataGroupErasure); + } else if (type == TTabletTypes::PersQueue) { + bootstrapperActorId = Boot(ctx, type, &CreatePersQueue, DataGroupErasure); } else { status = NKikimrProto::ERROR; } diff --git a/ydb/core/testlib/tenant_runtime.cpp b/ydb/core/testlib/tenant_runtime.cpp index 9e94fb853d..d0d1fea843 100644 --- a/ydb/core/testlib/tenant_runtime.cpp +++ b/ydb/core/testlib/tenant_runtime.cpp @@ -23,6 +23,7 @@ #include <ydb/core/tx/tx_allocator/txallocator.h> #include <ydb/core/tx/tx_proxy/proxy.h> #include <ydb/core/sys_view/processor/processor.h> +#include <ydb/core/persqueue/pq.h> #include <library/cpp/actors/core/interconnect.h> #include <library/cpp/actors/interconnect/interconnect.h> @@ -436,6 +437,8 @@ class TFakeHive : public TActor<TFakeHive>, public TTabletExecutedFlat { bootstrapperActorId = Boot(ctx, type, &NSequenceShard::CreateSequenceShard, DataGroupErasure); } else if (type == TTabletTypes::ReplicationController) { bootstrapperActorId = Boot(ctx, type, &NReplication::CreateController, DataGroupErasure); + } else if (type == TTabletTypes::PersQueue) { + bootstrapperActorId = Boot(ctx, type, &NKikimr::CreatePersQueue, DataGroupErasure); } else { status = NKikimrProto::ERROR; } diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index 0d05bc47d6..ab951d025d 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -103,8 +103,6 @@ namespace NKikimr { namespace Tests { - - TServerSettings& TServerSettings::SetDomainName(const TString& value) { StoragePoolTypes.erase("test"); DomainName = value; @@ -366,6 +364,56 @@ namespace Tests { app.AddDomain(domain.Release()); } + TVector<ui64> TServer::StartPQTablets(ui32 pqTabletsN) { + auto getChannelBind = [](const TString& storagePool) { + TChannelBind bind; + bind.SetStoragePoolName(storagePool); + return bind; + }; + TVector<ui64> ids; + ids.reserve(pqTabletsN); + for (ui32 i = 0; i < pqTabletsN; ++i) { + auto tabletId = Tests::ChangeStateStorage(Tests::DummyTablet2 + i + 1, Settings->Domain); + TIntrusivePtr<TTabletStorageInfo> tabletInfo = + CreateTestTabletInfo(tabletId, TTabletTypes::PersQueue); + TIntrusivePtr<TTabletSetupInfo> setupInfo = + new TTabletSetupInfo(&CreatePersQueue, TMailboxType::Simple, 0, TMailboxType::Simple, 0); + + static TString STORAGE_POOL = "/Root:test"; + static TChannelsBindings BINDED_CHANNELS = + {getChannelBind(STORAGE_POOL), getChannelBind(STORAGE_POOL), getChannelBind(STORAGE_POOL)}; + + ui32 nodeIndex = 0; + auto ev = + MakeHolder<TEvHive::TEvCreateTablet>(tabletId, 0, TTabletTypes::PersQueue, BINDED_CHANNELS); + + TActorId senderB = Runtime->AllocateEdgeActor(nodeIndex); + ui64 hive = ChangeStateStorage(Tests::Hive, Settings->Domain); + Runtime->SendToPipe(hive, senderB, ev.Release(), 0, GetPipeConfigWithRetries()); + TAutoPtr<IEventHandle> handle; + auto createTabletReply = Runtime->GrabEdgeEventRethrow<TEvHive::TEvCreateTabletReply>(handle); + UNIT_ASSERT(createTabletReply); + auto expectedStatus = NKikimrProto::OK; + UNIT_ASSERT_EQUAL_C(createTabletReply->Record.GetStatus(), expectedStatus, + (ui32)createTabletReply->Record.GetStatus() << " != " << (ui32)expectedStatus); + UNIT_ASSERT_EQUAL_C(createTabletReply->Record.GetOwner(), tabletId, + createTabletReply->Record.GetOwner() << " != " << tabletId); + ui64 id = createTabletReply->Record.GetTabletID(); + while (true) { + auto tabletCreationResult = + Runtime->GrabEdgeEventRethrow<TEvHive::TEvTabletCreationResult>(handle); + UNIT_ASSERT(tabletCreationResult); + if (id == tabletCreationResult->Record.GetTabletID()) { + UNIT_ASSERT_EQUAL_C(tabletCreationResult->Record.GetStatus(), NKikimrProto::OK, + (ui32)tabletCreationResult->Record.GetStatus() << " != " << (ui32)NKikimrProto::OK); + break; + } + } + ids.push_back(id); + } + return ids; + } + void TServer::CreateBootstrapTablets() { const ui32 domainId = Settings->Domain; Y_VERIFY(TDomainsInfo::MakeTxAllocatorIDFixed(domainId, 1) == ChangeStateStorage(TxAllocator, domainId)); diff --git a/ydb/core/testlib/test_client.h b/ydb/core/testlib/test_client.h index 17ef8f61cb..584963c806 100644 --- a/ydb/core/testlib/test_client.h +++ b/ydb/core/testlib/test_client.h @@ -165,6 +165,7 @@ namespace Tests { TServerSettings& SetFeatureFlags(const NKikimrConfig::TFeatureFlags& value) { FeatureFlags = value; return *this; } TServerSettings& SetCompactionConfig(const NKikimrConfig::TCompactionConfig& value) { CompactionConfig = value; return *this; } TServerSettings& SetEnableDbCounters(bool value) { FeatureFlags.SetEnableDbCounters(value); return *this; } + TServerSettings& SetEnablePersistentQueryStats(bool value) { FeatureFlags.SetEnablePersistentQueryStats(value); return *this; } TServerSettings& SetEnableYq(bool value) { EnableYq = value; return *this; } TServerSettings& SetKeepSnapshotTimeout(TDuration value) { KeepSnapshotTimeout = value; return *this; } TServerSettings& SetChangesQueueItemsLimit(ui64 value) { ChangesQueueItemsLimit = value; return *this; } @@ -251,6 +252,7 @@ namespace Tests { } } void StartDummyTablets(); + TVector<ui64> StartPQTablets(ui32 pqTabletsN); TTestActorRuntime* GetRuntime() const; const TServerSettings& GetSettings() const; const NScheme::TTypeRegistry* GetTypeRegistry(); diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp index 564dc32578..313f374dad 100644 --- a/ydb/core/tx/columnshard/columnshard__write_index.cpp +++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp @@ -295,10 +295,6 @@ void TTxWriteIndex::Complete(const TActorContext& ctx) { void TColumnShard::Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorContext& ctx) { auto& blobs = ev->Get()->Blobs; - bool isCompaction = ev->Get()->GranuleCompaction; - if (isCompaction && blobs.empty()) { - ev->Get()->PutStatus = NKikimrProto::OK; - } if (ev->Get()->PutStatus == NKikimrProto::UNKNOWN) { if (IsAnyChannelYellowStop()) { diff --git a/ydb/core/tx/columnshard/compaction_actor.cpp b/ydb/core/tx/columnshard/compaction_actor.cpp index 3ef17bac05..e5b6f048f6 100644 --- a/ydb/core/tx/columnshard/compaction_actor.cpp +++ b/ydb/core/tx/columnshard/compaction_actor.cpp @@ -125,6 +125,9 @@ private: TxEvent->IndexChanges->SetBlobs(std::move(Blobs)); TxEvent->Blobs = NOlap::TColumnEngineForLogs::CompactBlobs(TxEvent->IndexInfo, TxEvent->IndexChanges); + if (TxEvent->Blobs.empty()) { + TxEvent->PutStatus = NKikimrProto::OK; // nothing to write, commit + } } TxEvent->Duration = TAppData::TimeProvider->Now() - LastActivationTime; ui32 blobsSize = TxEvent->Blobs.size(); diff --git a/ydb/core/tx/datashard/change_collector_async_index.cpp b/ydb/core/tx/datashard/change_collector_async_index.cpp index d012f96189..9f55f049c5 100644 --- a/ydb/core/tx/datashard/change_collector_async_index.cpp +++ b/ydb/core/tx/datashard/change_collector_async_index.cpp @@ -35,7 +35,9 @@ public: } for (const auto tag : DataTags) { - tags.push_back(tag); + if (!IndexTags.contains(tag)) { + tags.push_back(tag); + } } Y_VERIFY(!tags.empty()); diff --git a/ydb/core/tx/datashard/change_record.cpp b/ydb/core/tx/datashard/change_record.cpp index 1ec9cb145e..165fbdeb05 100644 --- a/ydb/core/tx/datashard/change_record.cpp +++ b/ydb/core/tx/datashard/change_record.cpp @@ -143,7 +143,7 @@ static void SerializeJsonValue(TUserTable::TCPtr schema, NJson::TJsonValue& valu } } -void TChangeRecord::SerializeTo(NJson::TJsonValue& json) const { +void TChangeRecord::SerializeTo(NJson::TJsonValue& json, bool virtualTimestamps) const { switch (Kind) { case EKind::CdcDataChange: { Y_VERIFY(Schema); @@ -182,6 +182,12 @@ void TChangeRecord::SerializeTo(NJson::TJsonValue& json) const { Y_FAIL_S("Unexpected row operation: " << static_cast<int>(body.GetRowOperationCase())); } + if (virtualTimestamps) { + for (auto v : {Step, TxId}) { + json["ts"].AppendValue(v); + } + } + break; } diff --git a/ydb/core/tx/datashard/change_record.h b/ydb/core/tx/datashard/change_record.h index db7d920ddb..7f5ba3cf26 100644 --- a/ydb/core/tx/datashard/change_record.h +++ b/ydb/core/tx/datashard/change_record.h @@ -41,7 +41,7 @@ public: ui64 GetSchemaVersion() const { return SchemaVersion; } void SerializeTo(NKikimrChangeExchange::TChangeRecord& record) const; - void SerializeTo(NJson::TJsonValue& json) const; + void SerializeTo(NJson::TJsonValue& json, bool virtualTimestamps) const; TConstArrayRef<TCell> GetKey() const; i64 GetSeqNo() const; diff --git a/ydb/core/tx/datashard/change_sender_cdc_stream.cpp b/ydb/core/tx/datashard/change_sender_cdc_stream.cpp index 9dc68a32af..288d8fafbe 100644 --- a/ydb/core/tx/datashard/change_sender_cdc_stream.cpp +++ b/ydb/core/tx/datashard/change_sender_cdc_stream.cpp @@ -1,6 +1,7 @@ #include "change_exchange.h" #include "change_exchange_impl.h" #include "change_sender_common_ops.h" +#include "datashard_user_table.h" #include <library/cpp/actors/core/actor_bootstrapped.h> #include <library/cpp/actors/core/hfunc.h> @@ -98,7 +99,7 @@ class TCdcChangeSenderPartition: public TActorBootstrapped<TCdcChangeSenderParti NKikimrPQClient::TDataChunk data; data.SetCodec(0 /* CODEC_RAW */); - switch (Format) { + switch (Stream.Format) { case NKikimrSchemeOp::ECdcStreamFormatProto: { NKikimrChangeExchange::TChangeRecord protoRecord; record.SerializeTo(protoRecord); @@ -108,7 +109,7 @@ class TCdcChangeSenderPartition: public TActorBootstrapped<TCdcChangeSenderParti case NKikimrSchemeOp::ECdcStreamFormatJson: { NJson::TJsonValue json; - record.SerializeTo(json); + record.SerializeTo(json, Stream.VirtualTimestamps); TStringStream str; NJson::TJsonWriterConfig jsonConfig; @@ -123,7 +124,7 @@ class TCdcChangeSenderPartition: public TActorBootstrapped<TCdcChangeSenderParti default: { LOG_E("Unknown format" - << ": format# " << static_cast<int>(Format)); + << ": format# " << static_cast<int>(Stream.Format)); return Leave(); } } @@ -225,12 +226,12 @@ public: const TDataShardId& dataShard, ui32 partitionId, ui64 shardId, - NKikimrSchemeOp::ECdcStreamFormat format) + const TUserTable::TCdcStream& stream) : Parent(parent) , DataShard(dataShard) , PartitionId(partitionId) , ShardId(shardId) - , Format(format) + , Stream(stream) , SourceId(ToString(DataShard.TabletId)) { } @@ -255,7 +256,7 @@ private: const TDataShardId DataShard; const ui32 PartitionId; const ui64 ShardId; - const NKikimrSchemeOp::ECdcStreamFormat Format; + const TUserTable::TCdcStream Stream; const TString SourceId; mutable TMaybe<TString> LogPrefix; @@ -487,7 +488,7 @@ class TCdcChangeSenderMain: public TActorBootstrapped<TCdcChangeSenderMain> return; } - Format = entry.CdcStreamInfo->Description.GetFormat(); + Stream = TUserTable::TCdcStream(entry.CdcStreamInfo->Description); Y_VERIFY(entry.ListNodeEntry->Children.size() == 1); const auto& topic = entry.ListNodeEntry->Children.at(0); @@ -623,7 +624,7 @@ class TCdcChangeSenderMain: public TActorBootstrapped<TCdcChangeSenderMain> Y_VERIFY(KeyDesc); Y_VERIFY(KeyDesc->Partitions); - switch (Format) { + switch (Stream.Format) { case NKikimrSchemeOp::ECdcStreamFormatProto: { const auto range = TTableRange(record.GetKey()); Y_VERIFY(range.Point); @@ -653,7 +654,7 @@ class TCdcChangeSenderMain: public TActorBootstrapped<TCdcChangeSenderMain> default: { Y_FAIL_S("Unknown format" - << ": format# " << static_cast<int>(Format)); + << ": format# " << static_cast<int>(Stream.Format)); } } } @@ -661,7 +662,7 @@ class TCdcChangeSenderMain: public TActorBootstrapped<TCdcChangeSenderMain> IActor* CreateSender(ui64 partitionId) override { Y_VERIFY(PartitionToShard.contains(partitionId)); const auto shardId = PartitionToShard.at(partitionId); - return new TCdcChangeSenderPartition(SelfId(), DataShard, partitionId, shardId, Format); + return new TCdcChangeSenderPartition(SelfId(), DataShard, partitionId, shardId, Stream); } void Handle(TEvChangeExchange::TEvEnqueueRecords::TPtr& ev) { @@ -732,7 +733,7 @@ public: private: mutable TMaybe<TString> LogPrefix; - NKikimrSchemeOp::ECdcStreamFormat Format; + TUserTable::TCdcStream Stream; TPathId TopicPathId; THolder<TKeyDesc> KeyDesc; THashMap<ui32, ui64> PartitionToShard; diff --git a/ydb/core/tx/datashard/datashard.cpp b/ydb/core/tx/datashard/datashard.cpp index 532a73d492..09a4724354 100644 --- a/ydb/core/tx/datashard/datashard.cpp +++ b/ydb/core/tx/datashard/datashard.cpp @@ -1055,6 +1055,7 @@ TUserTable::TPtr TDataShard::MoveUserTable(TOperation::TPtr op, const NKikimrTxD } } + SnapshotManager.RenameSnapshots(txc.DB, prevId, newId); SchemaSnapshotManager.RenameSnapshots(txc.DB, prevId, newId); if (newTableInfo->NeedSchemaSnapshots()) { AddSchemaSnapshot(newId, version, op->GetStep(), op->GetTxId(), txc, ctx); @@ -2713,24 +2714,34 @@ void TDataShard::ResolveTablePath(const TActorContext &ctx) if (State != TShardState::Ready) return; - for (auto &pr : TableInfos) { - ui64 pathId = pr.first; - const TUserTable &info = *pr.second; + for (auto& [pathId, info] : TableInfos) { + TString reason = "empty path"; - if (!info.Path) { - if (!TableResolvePipe) { - NTabletPipe::TClientConfig clientConfig; - clientConfig.RetryPolicy = SchemeShardPipeRetryPolicy; - TableResolvePipe = ctx.Register(NTabletPipe::CreateClient(ctx.SelfID, CurrentSchemeShardId, clientConfig)); + if (info->Path) { + NKikimrSchemeOp::TTableDescription desc; + info->GetSchema(desc); + + if (desc.GetName() == ExtractBase(desc.GetPath())) { + continue; } - auto *event = new TEvSchemeShard::TEvDescribeScheme(PathOwnerId, - pathId); - event->Record.MutableOptions()->SetReturnPartitioningInfo(false); - event->Record.MutableOptions()->SetReturnPartitionConfig(false); - event->Record.MutableOptions()->SetReturnChildren(false); - NTabletPipe::SendData(ctx, TableResolvePipe, event); + reason = "buggy path"; + } + + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Resolve path at " << TabletID() + << ": reason# " << reason); + + if (!TableResolvePipe) { + NTabletPipe::TClientConfig clientConfig; + clientConfig.RetryPolicy = SchemeShardPipeRetryPolicy; + TableResolvePipe = ctx.Register(NTabletPipe::CreateClient(ctx.SelfID, CurrentSchemeShardId, clientConfig)); } + + auto event = MakeHolder<TEvSchemeShard::TEvDescribeScheme>(PathOwnerId, pathId); + event->Record.MutableOptions()->SetReturnPartitioningInfo(false); + event->Record.MutableOptions()->SetReturnPartitionConfig(false); + event->Record.MutableOptions()->SetReturnChildren(false); + NTabletPipe::SendData(ctx, TableResolvePipe, event.Release()); } } @@ -2778,9 +2789,7 @@ void TDataShard::SerializeKeySample(const TUserTable &tinfo, } -void TDataShard::Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr ev, - const TActorContext &ctx) -{ +void TDataShard::Handle(TEvSchemeShard::TEvDescribeSchemeResult::TPtr ev, const TActorContext &ctx) { const auto &rec = ev->Get()->GetRecord(); LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, diff --git a/ydb/core/tx/datashard/datashard__read_iterator.cpp b/ydb/core/tx/datashard/datashard__read_iterator.cpp index 40bd2c8920..8515508d50 100644 --- a/ydb/core/tx/datashard/datashard__read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard__read_iterator.cpp @@ -222,6 +222,7 @@ class TReader { const TReadIteratorState& State; IBlockBuilder& BlockBuilder; const TShortTableInfo& TableInfo; + const TMonotonic StartTs; std::vector<NKikimr::NScheme::TTypeId> ColumnTypes; @@ -233,6 +234,8 @@ class TReader { ui64 BytesInResult = 0; + ui64 InvisibleRowSkips = 0; + NHPTimer::STime StartTime; NHPTimer::STime EndTime; @@ -247,10 +250,12 @@ class TReader { public: TReader(TReadIteratorState& state, IBlockBuilder& blockBuilder, - const TShortTableInfo& tableInfo) + const TShortTableInfo& tableInfo, + TMonotonic ts) : State(state) , BlockBuilder(blockBuilder) , TableInfo(tableInfo) + , StartTs(ts) , FirstUnprocessedQuery(State.FirstUnprocessedQuery) { GetTimeFast(&StartTime); @@ -335,6 +340,7 @@ public: NTable::TSelectStats stats; auto ready = txc.DB.Select(TableInfo.LocalTid, key, State.Columns, rowState, stats, 0, State.ReadVersion); RowsSinceLastCheck += 1 + stats.InvisibleRowSkips; + InvisibleRowSkips += stats.InvisibleRowSkips; if (ready == NTable::EReady::Page) { return EReadStatus::NeedData; } @@ -438,21 +444,58 @@ public: return false; } - void FillResult(TEvDataShard::TEvReadResult& result) { + void FillResult(TEvDataShard::TEvReadResult& result, TDataShard& datashard, TReadIteratorState& state) { auto& record = result.Record; record.MutableStatus()->SetCode(Ydb::StatusIds::SUCCESS); + auto now = AppData()->MonotonicTimeProvider->Now(); + auto delta = now - StartTs; + datashard.IncCounter(COUNTER_READ_ITERATOR_ITERATION_LATENCY_MS, delta.MilliSeconds()); + + // note that in all metrics below we treat key prefix read as key read + // and not as range read + const bool isKeysRequest = !State.Request->Keys.empty(); + if (HasUnreadQueries()) { if (OutOfQuota()) { + datashard.IncCounter(COUNTER_READ_ITERATOR_NO_QUOTA); record.SetLimitReached(true); + } else if (HasMaxRowsInResult()) { + datashard.IncCounter(COUNTER_READ_ITERATOR_MAX_ROWS_REACHED); + } else { + datashard.IncCounter(COUNTER_READ_ITERATOR_MAX_TIME_REACHED); } } else { + state.IsFinished = true; record.SetFinished(true); + auto fullDelta = now - State.StartTs; + datashard.IncCounter(COUNTER_READ_ITERATOR_LIFETIME_MS, fullDelta.MilliSeconds()); + + if (isKeysRequest) { + datashard.IncCounter(COUNTER_ENGINE_HOST_SELECT_ROW, State.Request->Keys.size()); + datashard.IncCounter(COUNTER_SELECT_ROWS_PER_REQUEST, State.Request->Keys.size()); + } else { + datashard.IncCounter(COUNTER_ENGINE_HOST_SELECT_RANGE, State.Request->Ranges.size()); + } } + if (!isKeysRequest) + datashard.IncCounter(COUNTER_ENGINE_HOST_SELECT_RANGE_ROW_SKIPS, InvisibleRowSkips); + BytesInResult = BlockBuilder.Bytes(); + if (BytesInResult) { + datashard.IncCounter(COUNTER_READ_ITERATOR_ROWS_READ, RowsRead); + datashard.IncCounter(COUNTER_READ_ITERATOR_BYTES_READ, BytesInResult); + if (isKeysRequest) { + // backward compatibility + datashard.IncCounter(COUNTER_ENGINE_HOST_SELECT_ROW_BYTES, BytesInResult); + } else { + // backward compatibility + datashard.IncCounter(COUNTER_ENGINE_HOST_SELECT_RANGE_ROWS, RowsRead); + datashard.IncCounter(COUNTER_RANGE_READ_ROWS_PER_REQUEST, RowsRead); + datashard.IncCounter(COUNTER_ENGINE_HOST_SELECT_RANGE_BYTES, BytesInResult); + } - if (BytesInResult = BlockBuilder.Bytes()) { switch (State.Format) { case NKikimrTxDataShard::ARROW: { auto& arrowBuilder = static_cast<NArrow::TArrowBatchBuilder&>(BlockBuilder); @@ -549,6 +592,7 @@ private: BlockBuilder.AddRow(TDbTupleRef(), rowValues); ++RowsRead; + InvisibleRowSkips += iter->Stats.InvisibleRowSkips; RowsSinceLastCheck += 1 + ResetRowStats(iter->Stats); if (ShouldStop()) { return EReadStatus::StoppedByLimit; @@ -557,6 +601,7 @@ private: // last iteration to Page or Gone also might have deleted or invisible rows RowsSinceLastCheck += ResetRowStats(iter->Stats); + InvisibleRowSkips += iter->Stats.InvisibleRowSkips; // TODO: consider restart when Page and too few data read // (how much is too few, less than user's limit?) @@ -714,7 +759,7 @@ public: Y_ASSERT(Reader); Y_ASSERT(BlockBuilder); - Reader->FillResult(*Result); + Reader->FillResult(*Result, *Self, state); SendViaSession(state.SessionId, Sender, Self->SelfId(), Result.release()); // note that we save the state only when there're unread queries @@ -1035,7 +1080,7 @@ private: Y_ASSERT(Result); state.State = TReadIteratorState::EState::Executing; - Reader.reset(new TReader(state, *BlockBuilder, TableInfo)); + Reader.reset(new TReader(state, *BlockBuilder, TableInfo, AppData()->MonotonicTimeProvider->Now())); finished = false; } @@ -1128,7 +1173,12 @@ public: LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " ReadContinue: " << Ev->Get()->Reader << "," << Ev->Get()->ReadId); - Reader.reset(new TReader(state, *BlockBuilder, TableInfo)); + Reader.reset(new TReader( + state, + *BlockBuilder, + TableInfo, + AppData()->MonotonicTimeProvider->Now())); + return Reader->Read(txc, ctx); } @@ -1175,7 +1225,7 @@ public: Y_ASSERT(Reader); Y_ASSERT(BlockBuilder); - Reader->FillResult(*Result); + Reader->FillResult(*Result, *Self, state); SendViaSession(state.SessionId, request->Reader, Self->SelfId(), Result.release()); if (Reader->HasUnreadQueries()) { @@ -1237,7 +1287,7 @@ void TDataShard::Handle(TEvDataShard::TEvRead::TPtr& ev, const TActorContext& ct sessionId = ev->InterconnectSession; } - ReadIterators.emplace(readId, new TReadIteratorState(sessionId)); + ReadIterators.emplace(readId, new TReadIteratorState(sessionId, AppData()->MonotonicTimeProvider->Now())); Executor()->Execute(new TTxRead(this, ev), ctx); } @@ -1338,15 +1388,33 @@ void TDataShard::Handle(TEvDataShard::TEvReadCancel::TPtr& ev, const TActorConte LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, TabletID() << " ReadCancel: " << record); TReadIteratorId readId(ev->Sender, record.GetReadId()); - DeleteReadIterator(readId); + auto it = ReadIterators.find(readId); + if (it == ReadIterators.end()) + return; + + const auto& state = it->second; + if (!state->IsFinished) { + auto now = AppData()->MonotonicTimeProvider->Now(); + auto delta = now - state->StartTs; + IncCounter(COUNTER_READ_ITERATOR_LIFETIME_MS, delta.MilliSeconds()); + IncCounter(COUNTER_READ_ITERATOR_CANCEL); + } + + DeleteReadIterator(it); } void TDataShard::CancelReadIterators(Ydb::StatusIds::StatusCode code, const TString& issue, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, TabletID() << " CancelReadIterators #" << ReadIterators.size()); + LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, TabletID() << " CancelReadIterators#" << ReadIterators.size()); + auto now = AppData()->MonotonicTimeProvider->Now(); for (const auto& iterator: ReadIterators) { const auto& readIteratorId = iterator.first; + const auto& state = iterator.second; + if (!state->IsFinished) { + auto delta = now - state->StartTs; + IncCounter(COUNTER_READ_ITERATOR_LIFETIME_MS, delta.MilliSeconds()); + } std::unique_ptr<TEvDataShard::TEvReadResult> result(new TEvDataShard::TEvReadResult()); SetStatusError(result->Record, code, issue); @@ -1358,12 +1426,8 @@ void TDataShard::CancelReadIterators(Ydb::StatusIds::StatusCode code, const TStr ReadIterators.clear(); ReadIteratorSessions.clear(); -} -void TDataShard::DeleteReadIterator(const TReadIteratorId& readId) { - auto it = ReadIterators.find(readId); - if (it != ReadIterators.end()) - DeleteReadIterator(it); + SetCounter(COUNTER_READ_ITERATORS_COUNT, 0); } void TDataShard::DeleteReadIterator(TReadIteratorsMap::iterator it) { @@ -1376,6 +1440,7 @@ void TDataShard::DeleteReadIterator(TReadIteratorsMap::iterator it) { } } ReadIterators.erase(it); + SetCounter(COUNTER_READ_ITERATORS_COUNT, ReadIterators.size()); } void TDataShard::ReadIteratorsOnNodeDisconnected(const TActorId& sessionId, const TActorContext &ctx) { @@ -1387,13 +1452,25 @@ void TDataShard::ReadIteratorsOnNodeDisconnected(const TActorId& sessionId, cons LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, TabletID() << " closed session# " << sessionId << ", iterators# " << session.Iterators.size()); + auto now = AppData()->MonotonicTimeProvider->Now(); for (const auto& readId: session.Iterators) { // we don't send anything to client, because it's up // to client to detect disconnect - ReadIterators.erase(readId); + auto it = ReadIterators.find(readId); + if (it == ReadIterators.end()) + continue; + + const auto& state = it->second; + if (!state->IsFinished) { + auto delta = now - state->StartTs; + IncCounter(COUNTER_READ_ITERATOR_LIFETIME_MS, delta.MilliSeconds()); + } + + ReadIterators.erase(it); } ReadIteratorSessions.erase(itSession); + SetCounter(COUNTER_READ_ITERATORS_COUNT, ReadIterators.size()); } } // NKikimr::NDataShard diff --git a/ydb/core/tx/datashard/datashard__stats.cpp b/ydb/core/tx/datashard/datashard__stats.cpp index 005823dae4..de49cdf1ef 100644 --- a/ydb/core/tx/datashard/datashard__stats.cpp +++ b/ydb/core/tx/datashard/datashard__stats.cpp @@ -140,10 +140,12 @@ public: auto indexSize = txc.DB.GetTableIndexSize(tableInfo.LocalTid); auto memSize = txc.DB.GetTableMemSize(tableInfo.LocalTid); + auto memRowCount = txc.DB.GetTableMemRowCount(tableInfo.LocalTid); if (tableInfo.ShadowTid) { indexSize += txc.DB.GetTableIndexSize(tableInfo.ShadowTid); memSize += txc.DB.GetTableMemSize(tableInfo.ShadowTid); + memRowCount += txc.DB.GetTableMemRowCount(tableInfo.ShadowTid); } Result->Record.MutableTableStats()->SetIndexSize(indexSize); @@ -161,8 +163,8 @@ public: return true; const NTable::TStats& stats = tableInfo.Stats.DataStats; - Result->Record.MutableTableStats()->SetDataSize(stats.DataSize); - Result->Record.MutableTableStats()->SetRowCount(stats.RowCount); + Result->Record.MutableTableStats()->SetDataSize(stats.DataSize + memSize); + Result->Record.MutableTableStats()->SetRowCount(stats.RowCount + memRowCount); FillHistogram(stats.DataSizeHistogram, *Result->Record.MutableTableStats()->MutableDataSizeHistogram()); FillHistogram(stats.RowCountHistogram, *Result->Record.MutableTableStats()->MutableRowCountHistogram()); // Fill key access sample if it was collected not too long ago diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h index 49b5dffbf5..4e28082c8a 100644 --- a/ydb/core/tx/datashard/datashard_impl.h +++ b/ydb/core/tx/datashard/datashard_impl.h @@ -1537,7 +1537,6 @@ public: bool CheckChangesQueueOverflow() const; - void DeleteReadIterator(const TReadIteratorId& readId); void DeleteReadIterator(TReadIteratorsMap::iterator it); void CancelReadIterators(Ydb::StatusIds::StatusCode code, const TString& issue, const TActorContext& ctx); void ReadIteratorsOnNodeDisconnected(const TActorId& sessionId, const TActorContext &ctx); @@ -2582,6 +2581,9 @@ protected: ev->Record.SetNodeId(ctx.ExecutorThread.ActorSystem->NodeId); ev->Record.SetStartTime(StartTime().MilliSeconds()); + if (DstSplitDescription) + ev->Record.SetIsDstSplit(true); + NTabletPipe::SendData(ctx, DbStatsReportPipe, ev.Release()); } diff --git a/ydb/core/tx/datashard/datashard_snapshots.cpp b/ydb/core/tx/datashard/datashard_snapshots.cpp index f0a3680435..b40a9c8057 100644 --- a/ydb/core/tx/datashard/datashard_snapshots.cpp +++ b/ydb/core/tx/datashard/datashard_snapshots.cpp @@ -817,6 +817,32 @@ void TSnapshotManager::EnsureRemovedRowVersions(NTable::TDatabase& db, const TRo } } +void TSnapshotManager::RenameSnapshots(NTable::TDatabase& db, const TPathId& prevTableId, const TPathId& newTableId) { + TSnapshotTableKey prevTableKey(prevTableId.OwnerId, prevTableId.LocalPathId); + TSnapshotTableKey newTableKey(newTableId.OwnerId, newTableId.LocalPathId); + + NIceDb::TNiceDb nicedb(db); + + auto it = Snapshots.lower_bound(prevTableKey); + while (it != Snapshots.end() && it->first == prevTableKey) { + TSnapshotKey oldKey = it->first; + TSnapshotKey newKey(newTableKey.OwnerId, newTableKey.PathId, oldKey.Step, oldKey.TxId); + + Y_VERIFY_DEBUG(!References.contains(oldKey), "Unexpected reference to snapshot during rename"); + + PersistAddSnapshot(nicedb, newKey, it->second.Name, it->second.Flags, it->second.Timeout); + + if (ExpireQueue.Has(&it->second)) { + auto& newSnapshot = Snapshots.at(newKey); + newSnapshot.ExpireTime = it->second.ExpireTime; + ExpireQueue.Add(&newSnapshot); + } + + ++it; + PersistRemoveSnapshot(nicedb, oldKey); + } +} + } // namespace NDataShard } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_snapshots.h b/ydb/core/tx/datashard/datashard_snapshots.h index dd147b4a45..eb868fa4db 100644 --- a/ydb/core/tx/datashard/datashard_snapshots.h +++ b/ydb/core/tx/datashard/datashard_snapshots.h @@ -207,6 +207,8 @@ public: void Fix_KIKIMR_14259(NTable::TDatabase& db); void EnsureRemovedRowVersions(NTable::TDatabase& db, const TRowVersion& from, const TRowVersion& to); + void RenameSnapshots(NTable::TDatabase& db, const TPathId& prevTableId, const TPathId& newTableId); + private: void DoRemoveSnapshot(NTable::TDatabase& db, const TSnapshotKey& key); diff --git a/ydb/core/tx/datashard/datashard_user_table.h b/ydb/core/tx/datashard/datashard_user_table.h index f3b6f8112c..b2c9862fc2 100644 --- a/ydb/core/tx/datashard/datashard_user_table.h +++ b/ydb/core/tx/datashard/datashard_user_table.h @@ -292,6 +292,7 @@ struct TUserTable : public TThrRefBase { EMode Mode; EFormat Format; EState State; + bool VirtualTimestamps = false; TCdcStream() = default; @@ -300,6 +301,7 @@ struct TUserTable : public TThrRefBase { , Mode(streamDesc.GetMode()) , Format(streamDesc.GetFormat()) , State(streamDesc.GetState()) + , VirtualTimestamps(streamDesc.GetVirtualTimestamps()) { } }; diff --git a/ydb/core/tx/datashard/datashard_ut_change_collector.cpp b/ydb/core/tx/datashard/datashard_ut_change_collector.cpp index d087a4a368..e0a2dda2a0 100644 --- a/ydb/core/tx/datashard/datashard_ut_change_collector.cpp +++ b/ydb/core/tx/datashard/datashard_ut_change_collector.cpp @@ -555,6 +555,31 @@ Y_UNIT_TEST_SUITE(AsyncIndexChangeCollector) { }); } + Y_UNIT_TEST(CoverIndexedColumn) { + const auto schema = TShardedTableOptions() + .Columns({ + {"a", "Uint32", true, false}, + {"b", "Uint32", false, false}, + {"c", "Uint32", false, false}, + {"d", "Uint32", false, false}, + }) + .Indexes({ + {"by_bc", {"b", "c"}, {}, NKikimrSchemeOp::EIndexTypeGlobalAsync}, + {"by_d", {"d"}, {"c"}, NKikimrSchemeOp::EIndexTypeGlobalAsync}, + }); + + Run("/Root/path", schema, TVector<TString>{ + "UPSERT INTO `/Root/path` (a, b, c, d) VALUES (1, 10, 100, 1000);", + }, { + {"by_bc", { + TStructRecord(NTable::ERowOp::Upsert, {{"b", 10}, {"c", 100}, {"a", 1}}), + }}, + {"by_d", { + TStructRecord(NTable::ERowOp::Upsert, {{"d", 1000}, {"a", 1}}, {{"c", 100}}), + }}, + }); + } + } // AsyncIndexChangeCollector Y_UNIT_TEST_SUITE(CdcStreamChangeCollector) { diff --git a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp index f157ee8d34..f42998c5a9 100644 --- a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp +++ b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp @@ -802,27 +802,30 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } - TCdcStream KeysOnly(NKikimrSchemeOp::ECdcStreamFormat format, const TString& name = "Stream") { + TCdcStream KeysOnly(NKikimrSchemeOp::ECdcStreamFormat format, const TString& name = "Stream", bool vt = false) { return TCdcStream{ .Name = name, .Mode = NKikimrSchemeOp::ECdcStreamModeKeysOnly, .Format = format, + .VirtualTimestamps = vt, }; } - TCdcStream Updates(NKikimrSchemeOp::ECdcStreamFormat format, const TString& name = "Stream") { + TCdcStream Updates(NKikimrSchemeOp::ECdcStreamFormat format, const TString& name = "Stream", bool vt = false) { return TCdcStream{ .Name = name, .Mode = NKikimrSchemeOp::ECdcStreamModeUpdate, .Format = format, + .VirtualTimestamps = vt, }; } - TCdcStream NewAndOldImages(NKikimrSchemeOp::ECdcStreamFormat format, const TString& name = "Stream") { + TCdcStream NewAndOldImages(NKikimrSchemeOp::ECdcStreamFormat format, const TString& name = "Stream", bool vt = false) { return TCdcStream{ .Name = name, .Mode = NKikimrSchemeOp::ECdcStreamModeNewAndOldImages, .Format = format, + .VirtualTimestamps = vt, }; } @@ -839,7 +842,7 @@ Y_UNIT_TEST_SUITE(Cdc) { struct PqRunner { static void Read(const TShardedTableOptions& tableDesc, const TCdcStream& streamDesc, - const TVector<TString>& queries, const TVector<TString>& records) + const TVector<TString>& queries, const TVector<TString>& records, bool strict = true) { TTestPqEnv env(tableDesc, streamDesc); @@ -873,8 +876,12 @@ Y_UNIT_TEST_SUITE(Cdc) { pStream = data->GetPartitionStream(); for (const auto& item : data->GetMessages()) { const auto& record = records.at(reads++); - UNIT_ASSERT_VALUES_EQUAL(record, item.GetData()); - UNIT_ASSERT_VALUES_EQUAL(CalcPartitionKey(record), item.GetPartitionKey()); + if (strict) { + UNIT_ASSERT_VALUES_EQUAL(item.GetData(), record); + UNIT_ASSERT_VALUES_EQUAL(item.GetPartitionKey(), CalcPartitionKey(record)); + } else { + UNIT_ASSERT_STRING_CONTAINS(item.GetData(), record); + } } } else if (auto* create = std::get_if<TReadSessionEvent::TCreatePartitionStreamEvent>(&*ev)) { pStream = create->GetPartitionStream(); @@ -924,7 +931,7 @@ Y_UNIT_TEST_SUITE(Cdc) { struct YdsRunner { static void Read(const TShardedTableOptions& tableDesc, const TCdcStream& streamDesc, - const TVector<TString>& queries, const TVector<TString>& records) + const TVector<TString>& queries, const TVector<TString>& records, bool strict = true) { TTestYdsEnv env(tableDesc, streamDesc); @@ -975,8 +982,12 @@ Y_UNIT_TEST_SUITE(Cdc) { for (ui32 i = 0; i < records.size(); ++i) { const auto& actual = res.GetResult().records().at(i); const auto& expected = records.at(i); - UNIT_ASSERT_VALUES_EQUAL(actual.data(), expected); - UNIT_ASSERT_VALUES_EQUAL(actual.partition_key(), CalcPartitionKey(expected)); + if (strict) { + UNIT_ASSERT_VALUES_EQUAL(actual.data(), expected); + UNIT_ASSERT_VALUES_EQUAL(actual.partition_key(), CalcPartitionKey(expected)); + } else { + UNIT_ASSERT_STRING_CONTAINS(actual.data(), expected); + } } } @@ -1004,7 +1015,7 @@ Y_UNIT_TEST_SUITE(Cdc) { struct TopicRunner { static void Read(const TShardedTableOptions& tableDesc, const TCdcStream& streamDesc, - const TVector<TString>& queries, const TVector<TString>& records) + const TVector<TString>& queries, const TVector<TString>& records, bool strict = true) { TTestTopicEnv env(tableDesc, streamDesc); @@ -1037,9 +1048,12 @@ Y_UNIT_TEST_SUITE(Cdc) { pStream = data->GetPartitionSession(); for (const auto& item : data->GetMessages()) { const auto& record = records.at(reads++); - UNIT_ASSERT_VALUES_EQUAL(record, item.GetData()); - //TODO: check here partition key -// UNIT_ASSERT_VALUES_EQUAL(CalcPartitionKey(record), item.GetPartitionKey()); + if (strict) { + UNIT_ASSERT_VALUES_EQUAL(item.GetData(), record); + // TODO: check here partition key + } else { + UNIT_ASSERT_STRING_CONTAINS(item.GetData(), record); + } } } else if (auto* create = std::get_if<NYdb::NTopic::TReadSessionEvent::TStartPartitionSessionEvent>(&*ev)) { pStream = create->GetPartitionSession(); @@ -1156,6 +1170,19 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } + Y_UNIT_TEST_TRIPLET(VirtualTimestamps, PqRunner, YdsRunner, TopicRunner) { + TRunner::Read(SimpleTable(), KeysOnly(NKikimrSchemeOp::ECdcStreamFormatJson, "Stream", true), {R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (1, 10), + (2, 20), + (3, 30); + )"}, { + R"({"update":{},"key":[1],"ts":[)", + R"({"update":{},"key":[2],"ts":[)", + R"({"update":{},"key":[3],"ts":[)", + }, false /* non-strict because of variadic timestamps */); + } + Y_UNIT_TEST_TRIPLET(NaN, PqRunner, YdsRunner, TopicRunner) { const auto variants = std::vector<std::pair<const char*, const char*>>{ {"Double", ""}, @@ -1182,18 +1209,15 @@ Y_UNIT_TEST_SUITE(Cdc) { } } - TShardedTableOptions Utf8Table() { - return TShardedTableOptions() + Y_UNIT_TEST_TRIPLET(HugeKey, PqRunner, YdsRunner, TopicRunner) { + const auto key = TString(512_KB, 'A'); + const auto table = TShardedTableOptions() .Columns({ {"key", "Utf8", true, false}, {"value", "Uint32", false, false}, }); - } - Y_UNIT_TEST_TRIPLET(HugeKey, PqRunner, YdsRunner, TopicRunner) { - const auto key = TString(512_KB, 'A'); - - TRunner::Read(Utf8Table(), KeysOnly(NKikimrSchemeOp::ECdcStreamFormatJson), {Sprintf(R"( + TRunner::Read(table, KeysOnly(NKikimrSchemeOp::ECdcStreamFormatJson), {Sprintf(R"( UPSERT INTO `/Root/Table` (key, value) VALUES ("%s", 1); )", key.c_str())}, { @@ -1259,7 +1283,6 @@ Y_UNIT_TEST_SUITE(Cdc) { .BeginAlterAttributes().Add("key", "value").EndAlterAttributes()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(res.GetStatus(), NYdb::EStatus::BAD_REQUEST); } - } // Pq specific diff --git a/ydb/core/tx/datashard/datashard_ut_common.cpp b/ydb/core/tx/datashard/datashard_ut_common.cpp index 8954f967bc..4f3c6a7c62 100644 --- a/ydb/core/tx/datashard/datashard_ut_common.cpp +++ b/ydb/core/tx/datashard/datashard_ut_common.cpp @@ -1674,6 +1674,7 @@ ui64 AsyncAlterAddStream( desc.MutableStreamDescription()->SetName(streamDesc.Name); desc.MutableStreamDescription()->SetMode(streamDesc.Mode); desc.MutableStreamDescription()->SetFormat(streamDesc.Format); + desc.MutableStreamDescription()->SetVirtualTimestamps(streamDesc.VirtualTimestamps); return RunSchemeTx(*server->GetRuntime(), std::move(request)); } diff --git a/ydb/core/tx/datashard/datashard_ut_common.h b/ydb/core/tx/datashard/datashard_ut_common.h index e9d13eb5aa..2ebc235f1b 100644 --- a/ydb/core/tx/datashard/datashard_ut_common.h +++ b/ydb/core/tx/datashard/datashard_ut_common.h @@ -400,6 +400,7 @@ struct TShardedTableOptions { TString Name; EMode Mode; EFormat Format; + bool VirtualTimestamps = false; }; #define TABLE_OPTION_IMPL(type, name, defaultValue) \ diff --git a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp index 29f6642104..f81ece9a6d 100644 --- a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp @@ -1632,7 +1632,7 @@ Y_UNIT_TEST_SUITE(DataShardReadIteratorSysTables) { Y_UNIT_TEST_SUITE(DataShardReadIteratorState) { Y_UNIT_TEST(ShouldCalculateQuota) { - NDataShard::TReadIteratorState state({}); + NDataShard::TReadIteratorState state({}, {}); state.Quota.Rows = 100; state.Quota.Bytes = 1000; state.ConsumeSeqNo(10, 100); // seqno1 diff --git a/ydb/core/tx/datashard/datashard_ut_snapshot.cpp b/ydb/core/tx/datashard/datashard_ut_snapshot.cpp index 1da604332a..0bd77e25ca 100644 --- a/ydb/core/tx/datashard/datashard_ut_snapshot.cpp +++ b/ydb/core/tx/datashard/datashard_ut_snapshot.cpp @@ -1705,6 +1705,57 @@ Y_UNIT_TEST_SUITE(DataShardSnapshots) { } } + Y_UNIT_TEST_WITH_MVCC(VolatileSnapshotRenameTimeout) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetEnableMvcc(WithMvcc) + .SetUseRealThreads(false) + .SetDomainPlanResolution(1000); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto &runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + runtime.SetLogPriority(NKikimrServices::TX_PROXY, NLog::PRI_DEBUG); + runtime.GetAppData().AllowReadTableImmediate = true; + + InitRoot(server, sender); + + CreateShardedTable(server, sender, "/Root", "table-1", 2); + CreateShardedTable(server, sender, "/Root", "table-2", 2); + + ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (1, 1), (2, 2), (3, 3);"); + ExecSQL(server, sender, "UPSERT INTO `/Root/table-2` (key, value) VALUES (10, 10), (20, 20), (30, 30);"); + + auto snapshot = CreateVolatileSnapshot(server, { "/Root/table-1", "/Root/table-2" }, TDuration::MilliSeconds(10000)); + + ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (1, 11), (2, 22), (3, 33), (4, 44);"); + ExecSQL(server, sender, "UPSERT INTO `/Root/table-2` (key, value) VALUES (10, 11), (20, 22), (30, 33), (40, 44);"); + + auto table1snapshot1 = ReadShardedTable(server, "/Root/table-1", snapshot); + UNIT_ASSERT_VALUES_EQUAL(table1snapshot1, + "key = 1, value = 1\n" + "key = 2, value = 2\n" + "key = 3, value = 3\n"); + + WaitTxNotification(server, sender, AsyncMoveTable(server, "/Root/table-1", "/Root/table-1-moved")); + + auto table1snapshot2 = ReadShardedTable(server, "/Root/table-1-moved", snapshot); + UNIT_ASSERT_VALUES_EQUAL(table1snapshot2, + "key = 1, value = 1\n" + "key = 2, value = 2\n" + "key = 3, value = 3\n"); + + Cerr << "---- Sleeping ----" << Endl; + SimulateSleep(server, TDuration::Seconds(60)); + + auto table1snapshot3 = ReadShardedTable(server, "/Root/table-1-moved", snapshot); + UNIT_ASSERT_VALUES_EQUAL(table1snapshot3, + "ERROR: WrongRequest\n"); + } + } } // namespace NKikimr diff --git a/ydb/core/tx/datashard/read_iterator.h b/ydb/core/tx/datashard/read_iterator.h index b89a01ac03..90d1cec1e0 100644 --- a/ydb/core/tx/datashard/read_iterator.h +++ b/ydb/core/tx/datashard/read_iterator.h @@ -63,8 +63,9 @@ struct TReadIteratorState { }; public: - explicit TReadIteratorState(const TActorId& sessionId) + TReadIteratorState(const TActorId& sessionId, TMonotonic ts) : SessionId(sessionId) + , StartTs(ts) {} bool IsExhausted() const { return State == EState::Exhausted; } @@ -184,6 +185,8 @@ public: TQuota AckedReads; TActorId SessionId; + TMonotonic StartTs; + bool IsFinished = false; // note that we send SeqNo's starting from 1 ui64 SeqNo = 0; diff --git a/ydb/core/tx/replication/ydb_proxy/ydb_proxy_ut.cpp b/ydb/core/tx/replication/ydb_proxy/ydb_proxy_ut.cpp index d536a6db19..95ab198fc8 100644 --- a/ydb/core/tx/replication/ydb_proxy/ydb_proxy_ut.cpp +++ b/ydb/core/tx/replication/ydb_proxy/ydb_proxy_ut.cpp @@ -365,7 +365,7 @@ Y_UNIT_TEST_SUITE(YdbProxyTests) { new TEvYdbProxy::TEvAlterTableRequest("/Root/table", settings)); UNIT_ASSERT(ev); UNIT_ASSERT(!ev->Get()->Result.IsSuccess()); - UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Result.GetStatus(), NYdb::EStatus::GENERIC_ERROR); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Result.GetStatus(), NYdb::EStatus::BAD_REQUEST); } } diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 2f478464b3..590037874e 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -2770,6 +2770,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { auto alterVersion = rowset.GetValue<Schema::CdcStream::AlterVersion>(); auto mode = rowset.GetValue<Schema::CdcStream::Mode>(); auto format = rowset.GetValue<Schema::CdcStream::Format>(); + auto vt = rowset.GetValueOrDefault<Schema::CdcStream::VirtualTimestamps>(false); auto state = rowset.GetValue<Schema::CdcStream::State>(); Y_VERIFY_S(Self->PathsById.contains(pathId), "Path doesn't exist, pathId: " << pathId); @@ -2780,7 +2781,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { << ", path type: " << NKikimrSchemeOp::EPathType_Name(path->PathType)); Y_VERIFY(!Self->CdcStreams.contains(pathId)); - Self->CdcStreams[pathId] = new TCdcStreamInfo(alterVersion, mode, format, state); + Self->CdcStreams[pathId] = new TCdcStreamInfo(alterVersion, mode, format, vt, state); Self->IncrementPathDbRefCount(pathId); if (!rowset.Next()) { @@ -2805,6 +2806,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { auto alterVersion = rowset.GetValue<Schema::CdcStreamAlterData::AlterVersion>(); auto mode = rowset.GetValue<Schema::CdcStreamAlterData::Mode>(); auto format = rowset.GetValue<Schema::CdcStreamAlterData::Format>(); + auto vt = rowset.GetValueOrDefault<Schema::CdcStreamAlterData::VirtualTimestamps>(false); auto state = rowset.GetValue<Schema::CdcStreamAlterData::State>(); Y_VERIFY_S(Self->PathsById.contains(pathId), "Path doesn't exist, pathId: " << pathId); @@ -2816,14 +2818,14 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> { if (!Self->CdcStreams.contains(pathId)) { Y_VERIFY(alterVersion == 1); - Self->CdcStreams[pathId] = TCdcStreamInfo::New(mode, format); + Self->CdcStreams[pathId] = TCdcStreamInfo::New(mode, format, vt); Self->IncrementPathDbRefCount(pathId); } auto stream = Self->CdcStreams.at(pathId); Y_VERIFY(stream->AlterData == nullptr); Y_VERIFY(stream->AlterVersion < alterVersion); - stream->AlterData = new TCdcStreamInfo(alterVersion, mode, format, state); + stream->AlterData = new TCdcStreamInfo(alterVersion, mode, format, vt, state); Y_VERIFY_S(Self->PathsById.contains(path->ParentPathId), "Parent path is not found" << ", cdc stream pathId: " << pathId diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_allocate_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_allocate_pq.cpp index b5474675c8..5d532f2a0e 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_allocate_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_allocate_pq.cpp @@ -196,7 +196,7 @@ public: return result; } - pqGroupInfo->AlterVersion = allocateDesc.GetAlterVersion(); + pqGroupInfo->AlterVersion = allocateDesc.GetAlterVersion() + 1; if (!allocateDesc.HasPQTabletConfig()) { auto errStr = TStringBuilder() << "No PQTabletConfig specified"; @@ -445,6 +445,8 @@ public: context.SS->PersQueueGroups[pathId] = emptyGroup; context.SS->PersQueueGroups[pathId]->AlterData = pqGroupInfo; + context.SS->PersQueueGroups[pathId]->AlterVersion = pqGroupInfo->AlterVersion; + context.SS->IncrementPathDbRefCount(pathId); context.SS->PersistPersQueueGroup(db, pathId, emptyGroup); @@ -491,7 +493,7 @@ public: context.SS->ClearDescribePathCaches(parentPath.Base()); context.OnComplete.PublishToSchemeBoard(OperationId, parentPath.Base()->PathId); - context.SS->ClearDescribePathCaches(dstPath.Base()); + context.SS->ClearDescribePathCaches(dstPath.Base(), true); context.OnComplete.PublishToSchemeBoard(OperationId, dstPath.Base()->PathId); dstPath.DomainInfo()->IncPathsInside(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp index 92a172e5a2..ed75af8b0c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp @@ -113,6 +113,19 @@ public: return nullptr; } + if (alterConfig.GetPartitionConfig().HasLifetimeSeconds()) { + const auto lifetimeSeconds = alterConfig.GetPartitionConfig().GetLifetimeSeconds(); + if (lifetimeSeconds <= 0 || (ui32)lifetimeSeconds > TSchemeShard::MaxPQLifetimeSeconds) { + errStr = TStringBuilder() << "Invalid retention period" + << ": specified: " << lifetimeSeconds << "s" + << ", min: " << 1 << "s" + << ", max: " << TSchemeShard::MaxPQLifetimeSeconds << "s"; + return nullptr; + } + } else { + alterConfig.MutablePartitionConfig()->SetLifetimeSeconds(tabletConfig->GetPartitionConfig().GetLifetimeSeconds()); + } + if (alterConfig.GetPartitionConfig().ExplicitChannelProfilesSize() > 0) { // Validate explicit channel profiles alter attempt const auto& ecps = alterConfig.GetPartitionConfig().GetExplicitChannelProfiles(); @@ -524,19 +537,13 @@ public: result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); return result; } - if ((ui32)newTabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() > TSchemeShard::MaxPQWriteSpeedPerPartition) { - errStr = TStringBuilder() - << "Invalid write speed per second in partition specified: " << newTabletConfig.GetPartitionConfig().GetWriteSpeedInBytesPerSecond() - << " vs " << TSchemeShard::MaxPQWriteSpeedPerPartition; - result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); - return result; - } - if ((ui32)newTabletConfig.GetPartitionConfig().GetLifetimeSeconds() > TSchemeShard::MaxPQLifetimeSeconds) { - errStr = TStringBuilder() - << "Invalid retention period specified: " << newTabletConfig.GetPartitionConfig().GetLifetimeSeconds() - << " vs " << TSchemeShard::MaxPQLifetimeSeconds; - result->SetError(NKikimrScheme::StatusInvalidParameter, errStr); + const auto& partConfig = newTabletConfig.GetPartitionConfig(); + + if ((ui32)partConfig.GetWriteSpeedInBytesPerSecond() > TSchemeShard::MaxPQWriteSpeedPerPartition) { + result->SetError(NKikimrScheme::StatusInvalidParameter, TStringBuilder() << "Invalid write speed" + << ": specified: " << partConfig.GetWriteSpeedInBytesPerSecond() << "bps" + << ", max: " << TSchemeShard::MaxPQWriteSpeedPerPartition << "bps"); return result; } @@ -593,7 +600,6 @@ public: // This channel bindings are for PersQueue shards. They either use // explicit channel profiles, or reuse channel profile above. - const auto& partConfig = newTabletConfig.GetPartitionConfig(); TChannelsBindings pqChannelsBinding; if (partConfig.ExplicitChannelProfilesSize() > 0) { // N.B. no validation necessary at this step diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp index f5976ca648..f6213c0d40 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp @@ -53,17 +53,19 @@ TPersQueueGroupInfo::TPtr CreatePersQueueGroup(TOperationContext& context, if ((ui32)op.GetPQTabletConfig().GetPartitionConfig().GetWriteSpeedInBytesPerSecond() > TSchemeShard::MaxPQWriteSpeedPerPartition) { status = NKikimrScheme::StatusInvalidParameter; - errStr = TStringBuilder() - << "Invalid write speed per second in partition specified: " << op.GetPQTabletConfig().GetPartitionConfig().GetWriteSpeedInBytesPerSecond() - << " vs " << TSchemeShard::MaxPQWriteSpeedPerPartition; + errStr = TStringBuilder() << "Invalid write speed" + << ": specified: " << op.GetPQTabletConfig().GetPartitionConfig().GetWriteSpeedInBytesPerSecond() << "bps" + << ", max: " << TSchemeShard::MaxPQWriteSpeedPerPartition << "bps"; return nullptr; } - if ((ui32)op.GetPQTabletConfig().GetPartitionConfig().GetLifetimeSeconds() > TSchemeShard::MaxPQLifetimeSeconds) { + const auto lifetimeSeconds = op.GetPQTabletConfig().GetPartitionConfig().GetLifetimeSeconds(); + if (lifetimeSeconds <= 0 || (ui32)lifetimeSeconds > TSchemeShard::MaxPQLifetimeSeconds) { status = NKikimrScheme::StatusInvalidParameter; - errStr = TStringBuilder() - << "Invalid retention period specified: " << op.GetPQTabletConfig().GetPartitionConfig().GetLifetimeSeconds() - << " vs " << TSchemeShard::MaxPQLifetimeSeconds; + errStr = TStringBuilder() << "Invalid retention period" + << ": specified: " << lifetimeSeconds << "s" + << ", min: " << 1 << "s" + << ", max: " << TSchemeShard::MaxPQLifetimeSeconds << "s"; return nullptr; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_move_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_move_table.cpp index e6a01e494c..d7a52751b9 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_move_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_move_table.cpp @@ -228,7 +228,7 @@ public: Y_VERIFY(!context.SS->Tables.contains(dstPath.Base()->PathId)); Y_VERIFY(context.SS->Tables.contains(srcPath.Base()->PathId)); - TTableInfo::TPtr tableInfo = new TTableInfo(*context.SS->Tables.at(srcPath.Base()->PathId)); + TTableInfo::TPtr tableInfo = TTableInfo::DeepCopy(*context.SS->Tables.at(srcPath.Base()->PathId)); tableInfo->ResetDescriptionCache(); tableInfo->AlterVersion += 1; @@ -401,15 +401,33 @@ public: TTxState* txState = context.SS->FindTx(OperationId); Y_VERIFY(txState); - TPath srcPath = TPath::Init(txState->SourcePathId, context.SS); + auto srcPath = TPath::Init(txState->SourcePathId, context.SS); + auto dstPath = TPath::Init(txState->TargetPathId, context.SS); Y_VERIFY(txState->PlanStep); MarkSrcDropped(db, context, OperationId, *txState, srcPath); + Y_VERIFY(context.SS->Tables.contains(dstPath.Base()->PathId)); + auto tableInfo = context.SS->Tables.at(dstPath.Base()->PathId); + + if (tableInfo->IsTTLEnabled() && !context.SS->TTLEnabledTables.contains(dstPath.Base()->PathId)) { + context.SS->TTLEnabledTables[dstPath.Base()->PathId] = tableInfo; + // MarkSrcDropped() removes srcPath from TTLEnabledTables & decrements the counters + context.SS->TabletCounters->Simple()[COUNTER_TTL_ENABLED_TABLE_COUNT].Add(1); + + const auto now = context.Ctx.Now(); + for (auto& shard : tableInfo->GetPartitions()) { + auto& lag = shard.LastCondEraseLag; + lag = now - shard.LastCondErase; + context.SS->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); + } + } + context.SS->ChangeTxState(db, OperationId, TTxState::ProposedWaitParts); return true; } + bool ProgressState(TOperationContext& context) override { TTabletId ssId = context.SS->SelfTabletId(); context.OnComplete.RouteByTabletsFromOperation(OperationId); diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp index 7f1449f0e3..bc7ccb95de 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp @@ -260,6 +260,10 @@ bool TTxStorePartitionStats::PersistSingleStats(TTransactionContext& txc, const if (!newStats.HasBorrowedData) { Self->RemoveBorrowedCompaction(shardIdx); + } else if (Self->EnableBorrowedSplitCompaction && rec.GetIsDstSplit()) { + // note that we want to compact only shards originating + // from split/merge and not shards created via copytable + Self->EnqueueBorrowedCompaction(shardIdx); } if (!table->IsBackup && !table->IsShardsStatsDetached()) { @@ -336,9 +340,11 @@ bool TTxStorePartitionStats::PersistSingleStats(TTransactionContext& txc, const Self->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); } + const TTableInfo* mainTableForIndex = Self->GetMainTableForIndex(pathId); + const auto forceShardSplitSettings = Self->SplitSettings.GetForceShardSplitSettings(); TVector<TShardIdx> shardsToMerge; - if (table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, shardsToMerge)) { + if (table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, mainTableForIndex)) { TTxId txId = Self->GetCachedTxId(ctx); if (!txId) { @@ -376,7 +382,7 @@ bool TTxStorePartitionStats::PersistSingleStats(TTransactionContext& txc, const } else if (table->GetPartitions().size() >= table->GetMaxPartitionsCount()) { // We cannot split as there are max partitions already return true; - } else if (table->CheckSplitByLoad(Self->SplitSettings, shardIdx, dataSize, rowCount)) { + } else if (table->CheckSplitByLoad(Self->SplitSettings, shardIdx, dataSize, rowCount, mainTableForIndex)) { collectKeySample = true; } else { return true; diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp index 1001f22c76..78814603b6 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp @@ -310,12 +310,14 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex auto shardIdx = Self->TabletIdToShardIdx[datashardId]; const auto forceShardSplitSettings = Self->SplitSettings.GetForceShardSplitSettings(); + const TTableInfo* mainTableForIndex = Self->GetMainTableForIndex(tableId); + ESplitReason splitReason = ESplitReason::NO_SPLIT; if (table->ShouldSplitBySize(dataSize, forceShardSplitSettings)) { splitReason = ESplitReason::SPLIT_BY_SIZE; } - if (splitReason == ESplitReason::NO_SPLIT && table->CheckSplitByLoad(Self->SplitSettings, shardIdx, dataSize, rowCount)) { + if (splitReason == ESplitReason::NO_SPLIT && table->CheckSplitByLoad(Self->SplitSettings, shardIdx, dataSize, rowCount, mainTableForIndex)) { splitReason = ESplitReason::SPLIT_BY_LOAD; } diff --git a/ydb/core/tx/schemeshard/schemeshard_audit_log_fragment.cpp b/ydb/core/tx/schemeshard/schemeshard_audit_log_fragment.cpp index 9e730234d8..5321ef964c 100644 --- a/ydb/core/tx/schemeshard/schemeshard_audit_log_fragment.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_audit_log_fragment.cpp @@ -181,7 +181,6 @@ TString DefineUserOperationName(NKikimrSchemeOp::EOperationType type) { TAuditLogFragment::TAuditLogFragment(const NKikimrSchemeOp::TModifyScheme& tx) : Operation(DefineUserOperationName(tx.GetOperationType())) - , ProtoRequest(tx.ShortDebugString()) { FillPathes(tx); FillACL(tx); @@ -512,10 +511,6 @@ TString TAuditLogFragment::ToString() const { result << ", remove access: " << acl; } - if (ProtoRequest) { - result << ", protobuf request: " << ProtoRequest; - } - return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard_audit_log_fragment.h b/ydb/core/tx/schemeshard/schemeshard_audit_log_fragment.h index 4829f0a795..33aa2a04cd 100644 --- a/ydb/core/tx/schemeshard/schemeshard_audit_log_fragment.h +++ b/ydb/core/tx/schemeshard/schemeshard_audit_log_fragment.h @@ -18,7 +18,6 @@ struct TAuditLogFragment { TVector<TString> AddACL; TVector<TString> RmACL; TMaybe<TString> NewOwner; - TMaybe<TString> ProtoRequest; TAuditLogFragment(const NKikimrSchemeOp::TModifyScheme& tx); diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index a74b2517f0..b6e77b93a2 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -490,10 +490,10 @@ bool TSchemeShard::ApplyStorageConfig( return true; } -void TSchemeShard::ClearDescribePathCaches(const TPathElement::TPtr node) { +void TSchemeShard::ClearDescribePathCaches(const TPathElement::TPtr node, bool force) { Y_VERIFY(node); - if (node->Dropped() || !node->IsCreateFinished()) { + if ((node->Dropped() || !node->IsCreateFinished()) && !force) { return; } @@ -1503,6 +1503,7 @@ void TSchemeShard::PersistCdcStream(NIceDb::TNiceDb& db, const TPathId& pathId) NIceDb::TUpdate<Schema::CdcStream::AlterVersion>(alterData->AlterVersion), NIceDb::TUpdate<Schema::CdcStream::Mode>(alterData->Mode), NIceDb::TUpdate<Schema::CdcStream::Format>(alterData->Format), + NIceDb::TUpdate<Schema::CdcStream::VirtualTimestamps>(alterData->VirtualTimestamps), NIceDb::TUpdate<Schema::CdcStream::State>(alterData->State) ); @@ -1526,6 +1527,7 @@ void TSchemeShard::PersistCdcStreamAlterData(NIceDb::TNiceDb& db, const TPathId& NIceDb::TUpdate<Schema::CdcStreamAlterData::AlterVersion>(alterData->AlterVersion), NIceDb::TUpdate<Schema::CdcStreamAlterData::Mode>(alterData->Mode), NIceDb::TUpdate<Schema::CdcStreamAlterData::Format>(alterData->Format), + NIceDb::TUpdate<Schema::CdcStreamAlterData::VirtualTimestamps>(alterData->VirtualTimestamps), NIceDb::TUpdate<Schema::CdcStreamAlterData::State>(alterData->State) ); } @@ -3866,6 +3868,7 @@ void TSchemeShard::Die(const TActorContext &ctx) { ctx.Send(SVPMigrator, new TEvents::TEvPoisonPill()); } + IndexBuildPipes.Shutdown(ctx); ShardDeleter.Shutdown(ctx); ParentDomainLink.Shutdown(ctx); @@ -3912,6 +3915,7 @@ void TSchemeShard::OnActivateExecutor(const TActorContext &ctx) { EnableBackgroundCompaction = appData->FeatureFlags.GetEnableBackgroundCompaction(); EnableBackgroundCompactionServerless = appData->FeatureFlags.GetEnableBackgroundCompactionServerless(); + EnableBorrowedSplitCompaction = appData->FeatureFlags.GetEnableBorrowedSplitCompaction(); EnableMoveIndex = appData->FeatureFlags.GetEnableMoveIndex(); ConfigureCompactionQueues(appData->CompactionConfig, ctx); @@ -3977,6 +3981,8 @@ void TSchemeShard::StateInit(STFUNC_SIG) { //console configs HFuncTraced(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse, Handle); HFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, Handle); + HFunc(TEvPrivate::TEvConsoleConfigsTimeout, Handle); + default: StateInitImpl(ev, ctx); } @@ -4013,6 +4019,8 @@ void TSchemeShard::StateConfigure(STFUNC_SIG) { //console configs HFuncTraced(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse, Handle); HFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, Handle); + HFunc(TEvPrivate::TEvConsoleConfigsTimeout, Handle); + default: if (!HandleDefaultEvents(ev, ctx)) { LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, @@ -4157,6 +4165,7 @@ void TSchemeShard::StateWork(STFUNC_SIG) { //console configs HFuncTraced(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse, Handle); HFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, Handle); + HFunc(TEvPrivate::TEvConsoleConfigsTimeout, Handle); HFuncTraced(TEvSchemeShard::TEvFindTabletSubDomainPathId, Handle); @@ -6133,6 +6142,15 @@ ui64 TSchemeShard::TDedicatedPipePool::CloseAll(TIndexBuildId ownerTxId, const T return tables.size(); } +void TSchemeShard::TDedicatedPipePool::Shutdown(const TActorContext& ctx) { + for (const auto& [clientId, _] : Owners) { + NTabletPipe::CloseClient(ctx, clientId); + } + + Pipes.clear(); + Owners.clear(); +} + TIndexBuildId TSchemeShard::TDedicatedPipePool::GetOwnerId(TActorId actorId) const { if (!Has(actorId)) { return InvalidIndexBuildId; @@ -6164,16 +6182,17 @@ void TSchemeShard::SubscribeConsoleConfigs(const TActorContext &ctx) { }), IEventHandle::FlagTrackDelivery ); + ctx.Schedule(TDuration::Seconds(15), new TEvPrivate::TEvConsoleConfigsTimeout); +} + +void TSchemeShard::Handle(TEvPrivate::TEvConsoleConfigsTimeout::TPtr&, const TActorContext& ctx) { + LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "Cannot get console configs"); + LoadTableProfiles(nullptr, ctx); } void TSchemeShard::Handle(TEvents::TEvUndelivered::TPtr&, const TActorContext& ctx) { LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "Cannot subscribe to console configs"); - TableProfilesLoaded = true; - - auto waiters = std::move(TableProfilesWaiters); - for (const auto& [importId, itemIdx] : waiters) { - Execute(CreateTxProgressImport(importId, itemIdx), ctx); - } + LoadTableProfiles(nullptr, ctx); } void TSchemeShard::ApplyConsoleConfigs(const NKikimrConfig::TAppConfig& appConfig, const TActorContext& ctx) { @@ -6191,13 +6210,9 @@ void TSchemeShard::ApplyConsoleConfigs(const NKikimrConfig::TAppConfig& appConfi } if (appConfig.HasTableProfilesConfig()) { - TableProfiles.Load(appConfig.GetTableProfilesConfig()); - TableProfilesLoaded = true; - - auto waiters = std::move(TableProfilesWaiters); - for (const auto& [importId, itemIdx] : waiters) { - Execute(CreateTxProgressImport(importId, itemIdx), ctx); - } + LoadTableProfiles(&appConfig.GetTableProfilesConfig(), ctx); + } else { + LoadTableProfiles(nullptr, ctx); } if (IsShemeShardConfigured()) { @@ -6217,6 +6232,7 @@ void TSchemeShard::ApplyConsoleConfigs(const NKikimrConfig::TFeatureFlags& featu EnableBackgroundCompaction = featureFlags.GetEnableBackgroundCompaction(); EnableBackgroundCompactionServerless = featureFlags.GetEnableBackgroundCompactionServerless(); + EnableBorrowedSplitCompaction = featureFlags.GetEnableBorrowedSplitCompaction(); EnableMoveIndex = featureFlags.GetEnableMoveIndex(); } diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index 19306d11a3..aaf00c8b77 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -250,6 +250,7 @@ public: THashSet<TShardIdx> ShardsWithLoaned; // shards have parts loaned to another shards bool EnableBackgroundCompaction = false; bool EnableBackgroundCompactionServerless = false; + bool EnableBorrowedSplitCompaction = false; bool EnableMoveIndex = false; TShardDeleter ShardDeleter; @@ -386,6 +387,7 @@ public: void SubscribeConsoleConfigs(const TActorContext& ctx); void ApplyConsoleConfigs(const NKikimrConfig::TAppConfig& appConfig, const TActorContext& ctx); void ApplyConsoleConfigs(const NKikimrConfig::TFeatureFlags& featureFlags, const TActorContext& ctx); + void Handle(TEvPrivate::TEvConsoleConfigsTimeout::TPtr& ev, const TActorContext& ctx); void ConfigureStatsBatching( const NKikimrConfig::TSchemeShardConfig& config, @@ -406,6 +408,7 @@ public: void StartStopCompactionQueues(); void WaitForTableProfiles(ui64 importId, ui32 itemIdx); + void LoadTableProfiles(const NKikimrConfig::TTableProfilesConfig* config, const TActorContext& ctx); bool ApplyStorageConfig(const TStoragePools& storagePools, const NKikimrSchemeOp::TStorageConfig& storageConfig, @@ -472,7 +475,7 @@ public: static bool ResolveChannelsDetailsAsIs(ui32 /*profileId*/, const TChannelProfiles::TProfile& profile, const TStoragePools& storagePools, TChannelsBindings& channelsBinding); static bool TabletResolveChannelsDetails(ui32 profileId, const TChannelProfiles::TProfile& profile, const TStoragePools& storagePools, TChannelsBindings& channelsBinding); - void ClearDescribePathCaches(const TPathElement::TPtr node); + void ClearDescribePathCaches(const TPathElement::TPtr node, bool force = false); TString PathToString(TPathElement::TPtr item); NKikimrSchemeOp::TPathVersion GetPathVersion(const TPath& pathEl) const; ui64 GetAliveChildren(TPathElement::TPtr pathEl, const std::optional<TPathElement::EPathType>& type = std::nullopt) const; @@ -1119,6 +1122,7 @@ public: void Create(TIndexBuildId ownerTxId, TTabletId dst, THolder<IEventBase> message, const TActorContext& ctx); void Close(TIndexBuildId ownerTxId, TTabletId dst, const TActorContext& ctx); ui64 CloseAll(TIndexBuildId ownerTxId, const TActorContext& ctx); + void Shutdown(const TActorContext& ctx); bool Has(TActorId actorId) const; TTabletId GetTabletId(TActorId actorId) const; diff --git a/ydb/core/tx/schemeshard/schemeshard_import.cpp b/ydb/core/tx/schemeshard/schemeshard_import.cpp index 42a182223f..5127f60f41 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import.cpp @@ -203,5 +203,20 @@ void TSchemeShard::WaitForTableProfiles(ui64 importId, ui32 itemIdx) { TableProfilesWaiters.insert(std::make_pair(importId, itemIdx)); } +void TSchemeShard::LoadTableProfiles(const NKikimrConfig::TTableProfilesConfig* config, const TActorContext& ctx) { + if (config) { + LOG_N("Load table profiles"); + TableProfiles.Load(*config); + } else { + LOG_W("Table profiles were not loaded"); + } + + TableProfilesLoaded = true; + auto waiters = std::move(TableProfilesWaiters); + for (const auto& [importId, itemIdx] : waiters) { + Execute(CreateTxProgressImport(importId, itemIdx), ctx); + } +} + } // NSchemeShard } // NKikimr diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 5fcd1204e8..6facac512b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -279,6 +279,7 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( void TTableInfo::ResetDescriptionCache() { TableDescription.ClearId_Deprecated(); TableDescription.ClearPathId(); + TableDescription.ClearPath(); TableDescription.ClearName(); TableDescription.ClearColumns(); TableDescription.ClearKeyColumnIds(); @@ -1426,7 +1427,8 @@ void TTableInfo::FinishSplitMergeOp(TOperationId opId) { bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings, const TForceShardSplitSettings& forceShardSplitSettings, TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge, - THashSet<TTabletId>& partOwners, ui64& totalSize, float& totalLoad) const + THashSet<TTabletId>& partOwners, ui64& totalSize, float& totalLoad, + const TTableInfo* mainTableForIndex) const { if (ExpectedPartitionCount + 1 - shardsToMerge.size() <= GetMinPartitionsCount()) { return false; @@ -1464,7 +1466,7 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings, // Check if we can try merging by load TInstant now = AppData()->TimeProvider->Now(); TDuration minUptime = TDuration::Seconds(splitSettings.MergeByLoadMinUptimeSec); - if (!canMerge && IsMergeByLoadEnabled() && stats->StartTime && stats->StartTime + minUptime < now) { + if (!canMerge && IsMergeByLoadEnabled(mainTableForIndex) && stats->StartTime && stats->StartTime + minUptime < now) { canMerge = true; } @@ -1478,8 +1480,8 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings, // Check that total load doesn't exceed the limits float shardLoad = stats->GetCurrentRawCpuUsage() * 0.000001; - if (IsMergeByLoadEnabled()) { - const auto& settings = PartitionConfig().GetPartitioningPolicy().GetSplitByLoadSettings(); + if (IsMergeByLoadEnabled(mainTableForIndex)) { + const auto settings = GetEffectiveSplitByLoadSettings(mainTableForIndex); i64 cpuPercentage = settings.GetCpuPercentageThreshold(); float cpuUsageThreshold = 0.01 * (cpuPercentage ? cpuPercentage : (i64)splitSettings.FastSplitCpuPercentageThreshold); @@ -1508,7 +1510,8 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings, bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings, const TForceShardSplitSettings& forceShardSplitSettings, - TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge) const + TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge, + const TTableInfo* mainTableForIndex) const { // Don't split/merge backup tables if (IsBackup) { @@ -1536,12 +1539,12 @@ bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings, THashSet<TTabletId> partOwners; // Make sure we can actually merge current shard first - if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, partOwners, totalSize, totalLoad)) { + if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex)) { return false; } for (i64 pi = partitionIdx - 1; pi >= 0; --pi) { - if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad)) { + if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex)) { break; } } @@ -1549,7 +1552,7 @@ bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings, Reverse(shardsToMerge.begin(), shardsToMerge.end()); for (ui64 pi = partitionIdx + 1; pi < GetPartitions().size(); ++pi) { - if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad)) { + if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex)) { break; } } @@ -1557,7 +1560,11 @@ bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings, return shardsToMerge.size() > 1; } -bool TTableInfo::CheckSplitByLoad(const TSplitSettings& splitSettings, TShardIdx shardIdx, ui64 dataSize, ui64 rowCount) const { +bool TTableInfo::CheckSplitByLoad( + const TSplitSettings& splitSettings, TShardIdx shardIdx, + ui64 dataSize, ui64 rowCount, + const TTableInfo* mainTableForIndex) const +{ // Don't split/merge backup tables if (IsBackup) return false; @@ -1585,11 +1592,11 @@ bool TTableInfo::CheckSplitByLoad(const TSplitSettings& splitSettings, TShardIdx maxShards = splitSettings.SplitByLoadMaxShardsDefault; } - if (!policy.HasSplitByLoadSettings() || !policy.GetSplitByLoadSettings().GetEnabled()) { + if (!IsSplitByLoadEnabled(mainTableForIndex)) { return false; } - const auto& settings = policy.GetSplitByLoadSettings(); + const auto settings = GetEffectiveSplitByLoadSettings(mainTableForIndex); i64 cpuPercentage = settings.GetCpuPercentageThreshold(); float cpuUsageThreshold = 0.01 * (cpuPercentage ? cpuPercentage : (i64)splitSettings.FastSplitCpuPercentageThreshold); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 2fc7710213..986c7e9a9b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -491,6 +491,17 @@ public: TableDescription.Swap(alterData.TableDescriptionFull.Get()); } + static TTableInfo::TPtr DeepCopy(const TTableInfo& other) { + TTableInfo::TPtr copy(new TTableInfo(other)); + // rebuild conditional erase schedule since it uses iterators + copy->CondEraseSchedule.clear(); + for (ui32 i = 0; i < copy->Partitions.size(); ++i) { + copy->CondEraseSchedule.push(copy->Partitions.begin() + i); + } + + return copy; + } + static TAlterDataPtr CreateAlterData( TPtr source, NKikimrSchemeOp::TTableDescription& descr, @@ -586,13 +597,18 @@ public: bool TryAddShardToMerge(const TSplitSettings& splitSettings, const TForceShardSplitSettings& forceShardSplitSettings, TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge, - THashSet<TTabletId>& partOwners, ui64& totalSize, float& totalLoad) const; + THashSet<TTabletId>& partOwners, ui64& totalSize, float& totalLoad, + const TTableInfo* mainTableForIndex) const; bool CheckCanMergePartitions(const TSplitSettings& splitSettings, const TForceShardSplitSettings& forceShardSplitSettings, - TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge) const; + TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge, + const TTableInfo* mainTableForIndex) const; - bool CheckSplitByLoad(const TSplitSettings& splitSettings, TShardIdx shardIdx, ui64 dataSize, ui64 rowCount) const; + bool CheckSplitByLoad( + const TSplitSettings& splitSettings, TShardIdx shardIdx, + ui64 dataSize, ui64 rowCount, + const TTableInfo* mainTableForIndex) const; bool IsSplitBySizeEnabled(const TForceShardSplitSettings& params) const { // Respect unspecified SizeToSplit when force shard splits are disabled @@ -621,12 +637,54 @@ public: return Partitions.size() > GetMaxPartitionsCount() && !params.DisableForceShardSplit; } - bool IsSplitByLoadEnabled() const { - return PartitionConfig().GetPartitioningPolicy().GetSplitByLoadSettings().GetEnabled(); + NKikimrSchemeOp::TSplitByLoadSettings GetEffectiveSplitByLoadSettings( + const TTableInfo* mainTableForIndex) const + { + NKikimrSchemeOp::TSplitByLoadSettings settings; + + if (mainTableForIndex) { + // Merge main table settings first + // Index settings will override these + settings.MergeFrom( + mainTableForIndex->PartitionConfig() + .GetPartitioningPolicy() + .GetSplitByLoadSettings()); + } + + // Merge local table settings last, they take precedence + settings.MergeFrom( + PartitionConfig() + .GetPartitioningPolicy() + .GetSplitByLoadSettings()); + + return settings; + } + + bool IsSplitByLoadEnabled(const TTableInfo* mainTableForIndex) const { + // We cannot split when external blobs are enabled + if (PartitionConfigHasExternalBlobsEnabled(PartitionConfig())) { + return false; + } + + const auto& policy = PartitionConfig().GetPartitioningPolicy(); + if (policy.HasSplitByLoadSettings() && policy.GetSplitByLoadSettings().HasEnabled()) { + // Always prefer any explicit setting + return policy.GetSplitByLoadSettings().GetEnabled(); + } + + if (mainTableForIndex) { + // Enable by default for indexes, when enabled for the main table + // TODO: consider always enabling by default + const auto& mainPolicy = mainTableForIndex->PartitionConfig().GetPartitioningPolicy(); + return mainPolicy.GetSplitByLoadSettings().GetEnabled(); + } + + // Disable by default for normal tables + return false; } - bool IsMergeByLoadEnabled() const { - return IsSplitByLoadEnabled(); + bool IsMergeByLoadEnabled(const TTableInfo* mainTableForIndex) const { + return IsSplitByLoadEnabled(mainTableForIndex); } ui64 GetShardSizeToSplit(const TForceShardSplitSettings& params) const { @@ -2218,10 +2276,11 @@ struct TCdcStreamInfo : public TSimpleRefCount<TCdcStreamInfo> { using EFormat = NKikimrSchemeOp::ECdcStreamFormat; using EState = NKikimrSchemeOp::ECdcStreamState; - TCdcStreamInfo(ui64 version, EMode mode, EFormat format, EState state) + TCdcStreamInfo(ui64 version, EMode mode, EFormat format, bool vt, EState state) : AlterVersion(version) , Mode(mode) , Format(format) + , VirtualTimestamps(vt) , State(state) {} @@ -2235,12 +2294,12 @@ struct TCdcStreamInfo : public TSimpleRefCount<TCdcStreamInfo> { return result; } - static TPtr New(EMode mode, EFormat format) { - return new TCdcStreamInfo(0, mode, format, EState::ECdcStreamStateInvalid); + static TPtr New(EMode mode, EFormat format, bool vt) { + return new TCdcStreamInfo(0, mode, format, vt, EState::ECdcStreamStateInvalid); } static TPtr Create(const NKikimrSchemeOp::TCdcStreamDescription& desc) { - TPtr result = New(desc.GetMode(), desc.GetFormat()); + TPtr result = New(desc.GetMode(), desc.GetFormat(), desc.GetVirtualTimestamps()); TPtr alterData = result->CreateNextVersion(); alterData->State = EState::ECdcStreamStateReady; @@ -2250,6 +2309,7 @@ struct TCdcStreamInfo : public TSimpleRefCount<TCdcStreamInfo> { ui64 AlterVersion = 1; EMode Mode; EFormat Format; + bool VirtualTimestamps; EState State; TCdcStreamInfo::TPtr AlterData = nullptr; diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 78b151e30c..382115d516 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -1055,6 +1055,7 @@ void TSchemeShard::DescribeCdcStream(const TPathId& pathId, const TString& name, desc.SetName(name); desc.SetMode(info->Mode); desc.SetFormat(info->Format); + desc.SetVirtualTimestamps(info->VirtualTimestamps); PathIdFromPathId(pathId, desc.MutablePathId()); desc.SetState(info->State); desc.SetSchemaVersion(info->AlterVersion); diff --git a/ydb/core/tx/schemeshard/schemeshard_private.h b/ydb/core/tx/schemeshard/schemeshard_private.h index e6e1a5f2a9..feace05175 100644 --- a/ydb/core/tx/schemeshard/schemeshard_private.h +++ b/ydb/core/tx/schemeshard/schemeshard_private.h @@ -25,6 +25,7 @@ struct TEvPrivate { EvCompletePublication, EvCompleteBarrier, EvPersistStats, + EvConsoleConfigsTimeout, EvEnd }; @@ -161,6 +162,9 @@ struct TEvPrivate { TEvPersistStats() = default; }; + struct TEvConsoleConfigsTimeout: public TEventLocal<TEvConsoleConfigsTimeout, EvConsoleConfigsTimeout> { + }; + }; // TEvPrivate } // NSchemeShard diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index fce0f91dc3..ff2f5f99f6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -1542,9 +1542,10 @@ struct Schema : NIceDb::Schema { struct State : Column<4, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::ECdcStreamState; static constexpr Type Default = NKikimrSchemeOp::ECdcStreamStateInvalid; }; struct Mode : Column<5, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::ECdcStreamMode; static constexpr Type Default = NKikimrSchemeOp::ECdcStreamModeInvalid; }; struct Format : Column<6, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::ECdcStreamFormat; static constexpr Type Default = NKikimrSchemeOp::ECdcStreamFormatInvalid; }; + struct VirtualTimestamps : Column<7, NScheme::NTypeIds::Bool> {}; using TKey = TableKey<OwnerPathId, LocalPathId>; - using TColumns = TableColumns<OwnerPathId, LocalPathId, AlterVersion, State, Mode, Format>; + using TColumns = TableColumns<OwnerPathId, LocalPathId, AlterVersion, State, Mode, Format, VirtualTimestamps>; }; struct CdcStreamAlterData : Table<96> { @@ -1554,9 +1555,10 @@ struct Schema : NIceDb::Schema { struct State : Column<4, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::ECdcStreamState; static constexpr Type Default = NKikimrSchemeOp::ECdcStreamStateInvalid; }; struct Mode : Column<5, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::ECdcStreamMode; static constexpr Type Default = NKikimrSchemeOp::ECdcStreamModeInvalid; }; struct Format : Column<6, NScheme::NTypeIds::Uint32> { using Type = NKikimrSchemeOp::ECdcStreamFormat; static constexpr Type Default = NKikimrSchemeOp::ECdcStreamFormatInvalid; }; + struct VirtualTimestamps : Column<7, NScheme::NTypeIds::Bool> {}; using TKey = TableKey<OwnerPathId, LocalPathId>; - using TColumns = TableColumns<OwnerPathId, LocalPathId, AlterVersion, State, Mode, Format>; + using TColumns = TableColumns<OwnerPathId, LocalPathId, AlterVersion, State, Mode, Format, VirtualTimestamps>; }; struct Sequences : Table<97> { diff --git a/ydb/core/tx/schemeshard/ut_allocate_pq.cpp b/ydb/core/tx/schemeshard/ut_allocate_pq.cpp index 0934cee0a5..ea9ec7d334 100644 --- a/ydb/core/tx/schemeshard/ut_allocate_pq.cpp +++ b/ydb/core/tx/schemeshard/ut_allocate_pq.cpp @@ -24,21 +24,33 @@ Y_UNIT_TEST_SUITE(TSchemeShardAllocatePQTest) { TestMkDir(runtime, ++txId, "/MyRoot", "DirA"); TestCreatePQGroup(runtime, ++txId, "/MyRoot/DirA", "Name: \"PQGroup\"" - "TotalGroupCount: 10 " + "TotalGroupCount: 9 " "PartitionPerTablet: 4 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"); env.TestWaitNotification(runtime, {txId-1, txId}); + TestAlterPQGroup(runtime, ++txId, "/MyRoot/DirA", + "Name: \"PQGroup\"" + "TotalGroupCount: 10 " + "PartitionPerTablet: 4 " + "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}"); + + env.TestWaitNotification(runtime, {txId-1, txId}); + TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA"), {NLs::Finished, NLs::PathsInsideDomain(2), NLs::ShardsInsideDomain(4)}); - TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup", true), - {NLs::CheckPartCount("PQGroup", 10, 4, 3, 10)}); + + TestDescribeResult( + DescribePath(runtime, "/MyRoot/DirA/PQGroup", true), + {NLs::CheckPartCount("PQGroup", 10, 4, 3, 10, NKikimrSchemeOp::EPathState::EPathStateNoChanges), + NLs::CheckPQAlterVersion("PQGroup", 2)} + ); { auto balancerDescr = GetDescibeFromPQBalancer(runtime, 9437197); - TString expected = R"(TopicName: "PQGroup" Version: 0 Config { PartitionConfig { LifetimeSeconds: 10 } YdbDatabasePath: "/MyRoot" } PartitionPerTablet: 4 Partitions { Partition: 0 TabletId: 9437194 } Partitions { Partition: 1 TabletId: 9437194 } Partitions { Partition: 2 TabletId: 9437195 } Partitions { Partition: 3 TabletId: 9437194 } Partitions { Partition: 4 TabletId: 9437196 } Partitions { Partition: 5 TabletId: 9437195 } Partitions { Partition: 6 TabletId: 9437194 } Partitions { Partition: 7 TabletId: 9437196 } Partitions { Partition: 8 TabletId: 9437195 } Partitions { Partition: 9 TabletId: 9437195 } SchemeShardId: 8751008 BalancerTabletId: 9437197 SecurityObject: "\022\000")"; + TString expected = R"(TopicName: "PQGroup" Version: 2 Config { PartitionConfig { LifetimeSeconds: 10 } YdbDatabasePath: "/MyRoot" } PartitionPerTablet: 4 Partitions { Partition: 0 TabletId: 9437194 } Partitions { Partition: 1 TabletId: 9437194 } Partitions { Partition: 2 TabletId: 9437195 } Partitions { Partition: 3 TabletId: 9437194 } Partitions { Partition: 4 TabletId: 9437196 } Partitions { Partition: 5 TabletId: 9437195 } Partitions { Partition: 6 TabletId: 9437194 } Partitions { Partition: 7 TabletId: 9437195 } Partitions { Partition: 8 TabletId: 9437195 } Partitions { Partition: 9 TabletId: 9437196 } SchemeShardId: 8751008 BalancerTabletId: 9437197 SecurityObject: "\022\000")"; UNIT_ASSERT_NO_DIFF(expected, balancerDescr.ShortUtf8DebugString()); } @@ -99,7 +111,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardAllocatePQTest) { { auto balancerDescr = GetDescibeFromPQBalancer(runtime, 9437197); - TString expected = R"(TopicName: "PQGroup" Version: 1 Config { PartitionConfig { LifetimeSeconds: 10 } YdbDatabasePath: "/MyRoot/Database" } PartitionPerTablet: 4 Partitions { Partition: 0 TabletId: 9437194 } Partitions { Partition: 1 TabletId: 9437194 } Partitions { Partition: 2 TabletId: 9437195 } Partitions { Partition: 3 TabletId: 9437194 } Partitions { Partition: 4 TabletId: 9437196 } Partitions { Partition: 5 TabletId: 9437195 } Partitions { Partition: 6 TabletId: 9437194 } Partitions { Partition: 7 TabletId: 9437196 } Partitions { Partition: 8 TabletId: 9437195 } Partitions { Partition: 9 TabletId: 9437195 } SchemeShardId: 9437200 BalancerTabletId: 9437197 SecurityObject: "\022\000")"; + TString expected = R"(TopicName: "PQGroup" Version: 3 Config { PartitionConfig { LifetimeSeconds: 10 } YdbDatabasePath: "/MyRoot/Database" } PartitionPerTablet: 4 Partitions { Partition: 0 TabletId: 9437194 } Partitions { Partition: 1 TabletId: 9437194 } Partitions { Partition: 2 TabletId: 9437195 } Partitions { Partition: 3 TabletId: 9437194 } Partitions { Partition: 4 TabletId: 9437196 } Partitions { Partition: 5 TabletId: 9437195 } Partitions { Partition: 6 TabletId: 9437194 } Partitions { Partition: 7 TabletId: 9437195 } Partitions { Partition: 8 TabletId: 9437195 } Partitions { Partition: 9 TabletId: 9437196 } SchemeShardId: 9437200 BalancerTabletId: 9437197 SecurityObject: "\022\000")"; UNIT_ASSERT_NO_DIFF(expected, balancerDescr.ShortUtf8DebugString()); } diff --git a/ydb/core/tx/schemeshard/ut_cdc_stream.cpp b/ydb/core/tx/schemeshard/ut_cdc_stream.cpp index 8725a18e59..625bdb9be6 100644 --- a/ydb/core/tx/schemeshard/ut_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/ut_cdc_stream.cpp @@ -38,6 +38,7 @@ Y_UNIT_TEST_SUITE(TCdcStreamTests) { NLs::StreamMode(NKikimrSchemeOp::ECdcStreamModeKeysOnly), NLs::StreamFormat(NKikimrSchemeOp::ECdcStreamFormatProto), NLs::StreamState(NKikimrSchemeOp::ECdcStreamStateReady), + NLs::StreamVirtualTimestamps(false), }); TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream/streamImpl"), {NLs::PathExist}); @@ -65,6 +66,39 @@ Y_UNIT_TEST_SUITE(TCdcStreamTests) { TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream/streamImpl"), {NLs::PathNotExist}); } + Y_UNIT_TEST(VirtualTimestamps) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableProtoSourceIdInfo(true)); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + VirtualTimestamps: true + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), { + NLs::PathExist, + NLs::StreamMode(NKikimrSchemeOp::ECdcStreamModeKeysOnly), + NLs::StreamFormat(NKikimrSchemeOp::ECdcStreamFormatProto), + NLs::StreamState(NKikimrSchemeOp::ECdcStreamStateReady), + NLs::StreamVirtualTimestamps(true), + }); + } + Y_UNIT_TEST(RetentionPeriod) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableProtoSourceIdInfo(true)); diff --git a/ydb/core/tx/schemeshard/ut_cdc_stream_reboots.cpp b/ydb/core/tx/schemeshard/ut_cdc_stream_reboots.cpp index c512c74f30..845bae278c 100644 --- a/ydb/core/tx/schemeshard/ut_cdc_stream_reboots.cpp +++ b/ydb/core/tx/schemeshard/ut_cdc_stream_reboots.cpp @@ -3,7 +3,7 @@ using namespace NSchemeShardUT_Private; Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { - Y_UNIT_TEST(CreateStream) { + void CreateStream(const TMaybe<NKikimrSchemeOp::ECdcStreamState>& state = Nothing(), bool vt = false) { TTestWithReboots t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { { @@ -17,20 +17,46 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { t.TestEnv->TestWaitNotification(runtime, t.TxId); } - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", R"( + NKikimrSchemeOp::TCdcStreamDescription streamDesc; + streamDesc.SetName("Stream"); + streamDesc.SetMode(NKikimrSchemeOp::ECdcStreamModeKeysOnly); + streamDesc.SetFormat(NKikimrSchemeOp::ECdcStreamFormatProto); + streamDesc.SetVirtualTimestamps(vt); + + if (state) { + streamDesc.SetState(*state); + } + + TString strDesc; + const bool ok = google::protobuf::TextFormat::PrintToString(streamDesc, &strDesc); + UNIT_ASSERT_C(ok, "protobuf serialization failed"); + + TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", Sprintf(R"( TableName: "Table" - StreamDescription { - Name: "Stream" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - )"); + StreamDescription { %s } + )", strDesc.c_str())); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), {NLs::PathExist}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), { + NLs::PathExist, + NLs::StreamState(state.GetOrElse(NKikimrSchemeOp::ECdcStreamStateReady)), + NLs::StreamVirtualTimestamps(vt), + }); }); } + Y_UNIT_TEST(CreateStream) { + CreateStream(); + } + + Y_UNIT_TEST(CreateStreamExplicitReady) { + CreateStream(NKikimrSchemeOp::ECdcStreamStateReady); + } + + Y_UNIT_TEST(CreateStreamWithVirtualTimestamps) { + CreateStream({}, true); + } + Y_UNIT_TEST(AlterStream) { TTestWithReboots t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { diff --git a/ydb/core/tx/schemeshard/ut_compaction.cpp b/ydb/core/tx/schemeshard/ut_compaction.cpp index f80f73621e..b18b6e8d0c 100644 --- a/ydb/core/tx/schemeshard/ut_compaction.cpp +++ b/ydb/core/tx/schemeshard/ut_compaction.cpp @@ -84,7 +84,7 @@ void WriteData( TString writeQuery = Sprintf(R"( ( (let key '( '('key (Uint64 '%lu)) ) ) - (let value '('('value (Utf8 'MostMeaninglessValueInTheWorld)) ) ) + (let value '('('value (Utf8 'MostMeaninglessValueInTheWorldButMaybeItIsSizeMeaningFullThusItIsMostMeaningFullValueInTheWorldOfMeaninglessFullness)) ) ) (return (AsList (UpdateRow '__user__%s key value) )) ) )", key, tableName); @@ -100,6 +100,32 @@ void WriteData( } } +void WriteDataSpreadKeys( + TTestActorRuntime &runtime, + const char* name, + ui64 rowCount, + ui64 tabletId = TTestTxConfig::FakeHiveTablets) +{ + auto fnWriteRow = [&] (ui64 tabletId, ui64 key, const char* tableName) { + TString writeQuery = Sprintf(R"( + ( + (let key '( '('key (Uint64 '%lu)) ) ) + (let value '('('value (Utf8 'MostMeaninglessValueInTheWorldButMaybeItIsSizeMeaningFullThusItIsMostMeaningFullValueInTheWorldOfMeaninglessFullness)) ) ) + (return (AsList (UpdateRow '__user__%s key value) )) + ) + )", key, tableName); + NKikimrMiniKQL::TResult result; + TString err; + NKikimrProto::EReplyStatus status = LocalMiniKQL(runtime, tabletId, writeQuery, result, err); + UNIT_ASSERT_VALUES_EQUAL(err, ""); + UNIT_ASSERT_VALUES_EQUAL(status, NKikimrProto::EReplyStatus::OK);; + }; + + for (ui64 key = 0; key < rowCount; ++key) { + fnWriteRow(tabletId, key * 1'000'000, name); + } +} + void CreateTableWithData( TTestActorRuntime &runtime, TTestEnv& env, @@ -172,6 +198,12 @@ void SetBackgroundCompaction(TTestActorRuntime &runtime, TTestEnv& env, ui64 sch SetFeatures(runtime, env, schemeShard, features); } +void SetEnableBorrowedSplitCompaction(TTestActorRuntime &runtime, TTestEnv& env, ui64 schemeShard, bool value) { + NKikimrConfig::TFeatureFlags features; + features.SetEnableBorrowedSplitCompaction(value); + SetFeatures(runtime, env, schemeShard, features); +} + void DisableBackgroundCompactionViaRestart( TTestActorRuntime& runtime, TTestEnv&, @@ -731,15 +763,24 @@ Y_UNIT_TEST_SUITE(TSchemeshardBackgroundCompactionTest) { }; Y_UNIT_TEST_SUITE(TSchemeshardBorrowedCompactionTest) { - Y_UNIT_TEST(SchemeshardShouldCompactBorrowed) { + Y_UNIT_TEST(SchemeshardShouldCompactBorrowedBeforeSplit) { + // In this test we check that + // 1. Copy table is not compacted until we want to split it + // 2. After borrow compaction both src and dst tables are background compacted + + NDataShard::gDbStatsReportInterval = TDuration::Seconds(1); + NDataShard::gDbStatsDataSizeResolution = 10; + NDataShard::gDbStatsRowCountResolution = 10; + TTestBasicRuntime runtime; TTestEnv env(runtime); - runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG); runtime.SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_TRACE); // in case it is not enabled by default SetBackgroundCompaction(runtime, env, TTestTxConfig::SchemeShard, true); + SetEnableBorrowedSplitCompaction(runtime, env, TTestTxConfig::SchemeShard, true); auto configRequest = GetTestCompactionConfig(); auto* compactionConfig = configRequest->Record.MutableConfig()->MutableCompactionConfig(); @@ -755,11 +796,14 @@ Y_UNIT_TEST_SUITE(TSchemeshardBorrowedCompactionTest) { // write to all shards in hacky way auto simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); for (auto shard: simpleInfo.Shards) { - WriteData(runtime, "Simple", 0, 100, shard); + WriteDataSpreadKeys(runtime, "Simple", 100, shard); } } env.SimulateSleep(runtime, TDuration::Seconds(1)); + auto simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); + UNIT_ASSERT_VALUES_EQUAL(simpleInfo.Shards.size(), 5UL); + // copy table TestCreateTable(runtime, ++txId, "/MyRoot", R"( Name: "CopyTable" @@ -769,8 +813,11 @@ Y_UNIT_TEST_SUITE(TSchemeshardBorrowedCompactionTest) { env.SimulateSleep(runtime, TDuration::Seconds(30)); - auto simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); + simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); + UNIT_ASSERT_VALUES_EQUAL(simpleInfo.Shards.size(), 5UL); + auto copyInfo = GetPathInfo(runtime, "/MyRoot/CopyTable"); + UNIT_ASSERT_VALUES_EQUAL(copyInfo.Shards.size(), 5UL); // borrow compaction only runs when we split, so nothing should be borrow compacted yet @@ -798,10 +845,15 @@ Y_UNIT_TEST_SUITE(TSchemeshardBorrowedCompactionTest) { } })"); env.TestWaitNotification(runtime, txId); - - // schemeshard should get stats from DS to start borrower compactions env.SimulateSleep(runtime, TDuration::Seconds(30)); + simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); + UNIT_ASSERT_VALUES_EQUAL(simpleInfo.Shards.size(), 5UL); + + copyInfo = GetPathInfo(runtime, "/MyRoot/CopyTable"); + + UNIT_ASSERT(copyInfo.Shards.size() > 5); + // should compact all borrowed data (note that background will not compact until then) { @@ -842,6 +894,158 @@ Y_UNIT_TEST_SUITE(TSchemeshardBorrowedCompactionTest) { } } + Y_UNIT_TEST(SchemeshardShouldCompactBorrowedAfterSplitMerge) { + // KIKIMR-15632: we want to compact shard right after split, merge. + // I.e. we compact borrowed data ASAP except copy table case, when + // we don't want to compact at all. + + NDataShard::gDbStatsReportInterval = TDuration::Seconds(1); + NDataShard::gDbStatsDataSizeResolution = 10; + NDataShard::gDbStatsRowCountResolution = 10; + + TTestBasicRuntime runtime; + TTestEnv env(runtime); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_TRACE); + + // in case it is not enabled by default + SetBackgroundCompaction(runtime, env, TTestTxConfig::SchemeShard, true); + SetEnableBorrowedSplitCompaction(runtime, env, TTestTxConfig::SchemeShard, true); + + auto configRequest = GetTestCompactionConfig(); + auto* compactionConfig = configRequest->Record.MutableConfig()->MutableCompactionConfig(); + compactionConfig->MutableBorrowedCompactionConfig()->SetInflightLimit(1); + + SetConfig(runtime, TTestTxConfig::SchemeShard, std::move(configRequest)); + + ui64 txId = 1000; + + CreateTableWithData(runtime, env, "/MyRoot", "Simple", 1, txId); + + WriteDataSpreadKeys(runtime, "Simple", 1000); + env.SimulateSleep(runtime, TDuration::Seconds(2)); + + auto simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); + UNIT_ASSERT_VALUES_EQUAL(simpleInfo.Shards.size(), 1UL); + + // borrow compaction only runs when we split, so nothing should be borrow compacted yet + + { + for (auto shard: simpleInfo.Shards) { + CheckShardNotBorrowedCompacted(runtime, simpleInfo.UserTable, shard, simpleInfo.OwnerId); + } + } + + // now force split + + TestAlterTable(runtime, ++txId, "/MyRoot", R"( + Name: "Simple" + PartitionConfig { + PartitioningPolicy { + MinPartitionsCount: 2 + MaxPartitionsCount: 2 + SizeToSplit: 1 + FastSplitSettings { + SizeThreshold: 10 + RowCountThreshold: 10 + } + } + })"); + env.TestWaitNotification(runtime, txId); + env.SimulateSleep(runtime, TDuration::Seconds(30)); + + while (simpleInfo.Shards.size() < 2) { + // schemeshard should get stats from DS to start borrower compactions + env.SimulateSleep(runtime, TDuration::Seconds(1)); + + simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); + } + + // should compact all borrowed data (note that background will not compact until then) + + { + for (auto shard: simpleInfo.Shards) { + CheckShardBorrowedCompacted(runtime, simpleInfo.UserTable, shard, simpleInfo.OwnerId); + } + } + } + + Y_UNIT_TEST(SchemeshardShouldNotCompactBorrowedAfterSplitMergeWhenDisabled) { + + NDataShard::gDbStatsReportInterval = TDuration::Seconds(1); + NDataShard::gDbStatsDataSizeResolution = 10; + NDataShard::gDbStatsRowCountResolution = 10; + + TTestBasicRuntime runtime; + TTestEnv env(runtime); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NActors::NLog::PRI_TRACE); + + // in case it is not enabled by default + SetBackgroundCompaction(runtime, env, TTestTxConfig::SchemeShard, true); + SetEnableBorrowedSplitCompaction(runtime, env, TTestTxConfig::SchemeShard, false); + + auto configRequest = GetTestCompactionConfig(); + auto* compactionConfig = configRequest->Record.MutableConfig()->MutableCompactionConfig(); + compactionConfig->MutableBorrowedCompactionConfig()->SetInflightLimit(1); + + SetConfig(runtime, TTestTxConfig::SchemeShard, std::move(configRequest)); + + ui64 txId = 1000; + + CreateTableWithData(runtime, env, "/MyRoot", "Simple", 1, txId); + + WriteDataSpreadKeys(runtime, "Simple", 1000); + env.SimulateSleep(runtime, TDuration::Seconds(2)); + + auto simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); + UNIT_ASSERT_VALUES_EQUAL(simpleInfo.Shards.size(), 1UL); + + // borrow compaction only runs when we split, so nothing should be borrow compacted yet + + { + for (auto shard: simpleInfo.Shards) { + CheckShardNotBorrowedCompacted(runtime, simpleInfo.UserTable, shard, simpleInfo.OwnerId); + } + } + + // now force split + + TestAlterTable(runtime, ++txId, "/MyRoot", R"( + Name: "Simple" + PartitionConfig { + PartitioningPolicy { + MinPartitionsCount: 2 + MaxPartitionsCount: 2 + SizeToSplit: 1 + FastSplitSettings { + SizeThreshold: 10 + RowCountThreshold: 10 + } + } + })"); + env.TestWaitNotification(runtime, txId); + env.SimulateSleep(runtime, TDuration::Seconds(30)); + + while (simpleInfo.Shards.size() < 2) { + // schemeshard should get stats from DS to start borrower compactions + env.SimulateSleep(runtime, TDuration::Seconds(1)); + + simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); + } + + // should not compact borrowed data + + { + for (auto shard: simpleInfo.Shards) { + CheckShardNotBorrowedCompacted(runtime, simpleInfo.UserTable, shard, simpleInfo.OwnerId); + CheckShardNotBackgroundCompacted(runtime, simpleInfo.UserTable, shard, simpleInfo.OwnerId); + } + } + } + Y_UNIT_TEST(SchemeshardShouldHandleBorrowCompactionTimeouts) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_helpers/CMakeLists.txt b/ydb/core/tx/schemeshard/ut_helpers/CMakeLists.txt index 29055a0b43..70e1cd2961 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/CMakeLists.txt +++ b/ydb/core/tx/schemeshard/ut_helpers/CMakeLists.txt @@ -17,6 +17,7 @@ target_link_libraries(tx-schemeshard-ut_helpers PUBLIC cpp-testing-unittest ydb-core-base core-blockstore-core + core-cms-console ydb-core-engine core-engine-minikql core-filestore-core diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp index 8a5bb30c13..3cccdfb6c6 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp @@ -473,6 +473,20 @@ TCheckFunc CheckPartCount(const TString& name, ui32 partCount, ui32 maxParts, ui }; } +TCheckFunc CheckPQAlterVersion (const TString& name, ui64 alterVersion) { + return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { + UNIT_ASSERT(record.HasPathDescription()); + NKikimrSchemeOp::TPathDescription descr = record.GetPathDescription(); + + UNIT_ASSERT(descr.HasSelf()); + auto self = descr.GetSelf(); + UNIT_ASSERT(self.HasCreateFinished()); + UNIT_ASSERT_STRINGS_EQUAL(self.GetName(), name); + UNIT_ASSERT(descr.HasPersQueueGroup()); + UNIT_ASSERT_VALUES_EQUAL(descr.GetPersQueueGroup().GetAlterVersion(), alterVersion); + }; +} + TCheckFunc PathVersionEqual(ui64 version) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrScheme::StatusSuccess); @@ -699,6 +713,12 @@ TCheckFunc StreamState(NKikimrSchemeOp::ECdcStreamState state) { }; } +TCheckFunc StreamVirtualTimestamps(bool value) { + return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { + UNIT_ASSERT_VALUES_EQUAL(record.GetPathDescription().GetCdcStreamDescription().GetVirtualTimestamps(), value); + }; +} + TCheckFunc RetentionPeriod(const TDuration& value) { return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) { UNIT_ASSERT_VALUES_EQUAL(value.Seconds(), record.GetPathDescription().GetPersQueueGroup() diff --git a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h index 3498591f68..c1030e3f5e 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h +++ b/ydb/core/tx/schemeshard/ut_helpers/ls_checks.h @@ -110,6 +110,7 @@ namespace NLs { TCheckFunc CheckPartCount(const TString& name, ui32 partCount, ui32 maxParts, ui32 tabletCount, ui32 groupCount, NKikimrSchemeOp::EPathState pathState = NKikimrSchemeOp::EPathState::EPathStateNoChanges); + TCheckFunc CheckPQAlterVersion (const TString& name, ui64 alterVersion); TCheckFunc IndexesCount(ui32 count); TCheckFunc IndexType(NKikimrSchemeOp::EIndexType type); @@ -120,6 +121,7 @@ namespace NLs { TCheckFunc StreamMode(NKikimrSchemeOp::ECdcStreamMode mode); TCheckFunc StreamFormat(NKikimrSchemeOp::ECdcStreamFormat format); TCheckFunc StreamState(NKikimrSchemeOp::ECdcStreamState state); + TCheckFunc StreamVirtualTimestamps(bool value); TCheckFunc RetentionPeriod(const TDuration& value); TCheckFunc HasBackupInFly(ui64 txId); diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp index 146bb9f10d..c4f563ee27 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp @@ -3,6 +3,7 @@ #include <ydb/core/blockstore/core/blockstore.h> #include <ydb/core/base/tablet_resolver.h> +#include <ydb/core/cms/console/configs_dispatcher.h> #include <ydb/core/metering/metering.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/tx/datashard/datashard.h> @@ -165,6 +166,25 @@ private: TDeque<TAutoPtr<IEventHandle>> InitialEventsQueue; }; +class TFakeConfigDispatcher : public TActor<TFakeConfigDispatcher> { +public: + TFakeConfigDispatcher() + : TActor<TFakeConfigDispatcher>(&TFakeConfigDispatcher::StateWork) + { + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + HFunc(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest, Handle); + } + } + + void Handle(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ev); + ctx.Send(ev->Sender, new NConsole::TEvConsole::TEvConfigNotificationRequest(), 0, ev->Cookie); + } +}; + // Automatically resend notification requests to Schemeshard if it gets restarted class TTxNotificationSubscriber : public TActor<TTxNotificationSubscriber> { public: @@ -496,6 +516,7 @@ NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTe app.SetEnableDataColumnForIndexTable(true); app.SetEnableSystemViews(opts.EnableSystemViews_); + app.SetEnablePersistentQueryStats(opts.EnablePersistentQueryStats_); app.SetEnablePersistentPartitionStats(opts.EnablePersistentPartitionStats_); app.SetEnableTtlOnAsyncIndexedTables(opts.EnableTtlOnAsyncIndexedTables_); app.SetAllowUpdateChannelsBindingOfSolomonPartitions(opts.AllowUpdateChannelsBindingOfSolomonPartitions_); @@ -505,6 +526,7 @@ NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTe app.SetEnableProtoSourceIdInfo(opts.EnableProtoSourceIdInfo_); app.SetEnablePqBilling(opts.EnablePqBilling_); app.SetEnableBackgroundCompaction(opts.EnableBackgroundCompaction_); + app.SetEnableBorrowedSplitCompaction(opts.EnableBorrowedSplitCompaction_); app.FeatureFlags.SetEnablePublicApiExternalBlobs(true); app.SetEnableMoveIndex(opts.EnableMoveIndex_); @@ -531,6 +553,13 @@ NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTe EnableSchemeshardPipeRetriesGuard = EnableSchemeshardPipeRetries(runtime); } + if (opts.RunFakeConfigDispatcher_) { + for (ui32 node = 0; node < runtime.GetNodeCount(); ++node) { + runtime.RegisterService(NConsole::MakeConfigsDispatcherID(runtime.GetNodeId(node)), + runtime.Register(new TFakeConfigDispatcher(), node), node); + } + } + TActorId sender = runtime.AllocateEdgeActor(); //CreateTestBootstrapper(runtime, CreateTestTabletInfo(MakeBSControllerID(TTestTxConfig::DomainUid), TTabletTypes::BSController), &CreateFlatBsController); BootSchemeShard(runtime, schemeRoot); @@ -839,7 +868,6 @@ void NSchemeShardUT_Private::TTestEnv::InitRootStoragePools(NActors::TTestActorR } } - void NSchemeShardUT_Private::TTestEnv::BootSchemeShard(NActors::TTestActorRuntime &runtime, ui64 schemeRoot) { CreateTestBootstrapper(runtime, CreateTestTabletInfo(schemeRoot, TTabletTypes::SchemeShard), SchemeShardFactory); } diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.h b/ydb/core/tx/schemeshard/ut_helpers/test_env.h index 32f52930a8..ed1f3b8d9b 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.h +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.h @@ -32,7 +32,9 @@ namespace NSchemeShardUT_Private { OPTION(ui32, NChannels, 4); OPTION(bool, EnablePipeRetries, true); + OPTION(bool, RunFakeConfigDispatcher, false); OPTION(std::optional<bool>, EnableSystemViews, std::nullopt); + OPTION(std::optional<bool>, EnablePersistentQueryStats, std::nullopt); OPTION(std::optional<bool>, EnablePersistentPartitionStats, std::nullopt); OPTION(std::optional<bool>, EnableTtlOnAsyncIndexedTables, std::nullopt); OPTION(std::optional<bool>, AllowUpdateChannelsBindingOfSolomonPartitions, std::nullopt); @@ -42,6 +44,7 @@ namespace NSchemeShardUT_Private { OPTION(std::optional<bool>, EnableProtoSourceIdInfo, std::nullopt); OPTION(std::optional<bool>, EnablePqBilling, std::nullopt); OPTION(std::optional<bool>, EnableBackgroundCompaction, std::nullopt); + OPTION(std::optional<bool>, EnableBorrowedSplitCompaction, std::nullopt); OPTION(std::optional<bool>, DisableStatsBatching, std::nullopt); OPTION(THashSet<TString>, SystemBackupSIDs, {}); OPTION(std::optional<bool>, EnableMoveIndex, std::nullopt); diff --git a/ydb/core/tx/schemeshard/ut_move.cpp b/ydb/core/tx/schemeshard/ut_move.cpp index 6568c096cc..1ebe690f31 100644 --- a/ydb/core/tx/schemeshard/ut_move.cpp +++ b/ydb/core/tx/schemeshard/ut_move.cpp @@ -670,6 +670,47 @@ Y_UNIT_TEST_SUITE(TSchemeShardMoveTest) { NLs::ShardsInsideDomain(0)}); } + Y_UNIT_TEST(ResetCachedPath) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint32" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + // split table to cache current path + TestSplitTable(runtime, ++txId, "/MyRoot/Table", Sprintf(R"( + SourceTabletId: %lu + SplitBoundary { + KeyPrefix { + Tuple { Optional { Uint32: 2 } } + } + } + )", TTestTxConfig::FakeHiveTablets)); + env.TestWaitNotification(runtime, txId); + + TestMoveTable(runtime, ++txId, "/MyRoot/Table", "/MyRoot/TableMove"); + env.TestWaitNotification(runtime, txId); + + // another split to override path with a previously cached value + TestSplitTable(runtime, ++txId, "/MyRoot/TableMove", Sprintf(R"( + SourceTabletId: %lu + SourceTabletId: %lu + )", TTestTxConfig::FakeHiveTablets + 1, TTestTxConfig::FakeHiveTablets + 2)); + env.TestWaitNotification(runtime, txId); + + TestAlterTable(runtime, ++txId, "/MyRoot", R"( + Name: "TableMove" + Columns { Name: "add" Type: "Utf8" } + )"); + env.TestWaitNotification(runtime, txId); + } + Y_UNIT_TEST(Index) { TTestBasicRuntime runtime; TTestEnv env(runtime, diff --git a/ydb/core/tx/schemeshard/ut_restore.cpp b/ydb/core/tx/schemeshard/ut_restore.cpp index e454425f54..f32463537d 100644 --- a/ydb/core/tx/schemeshard/ut_restore.cpp +++ b/ydb/core/tx/schemeshard/ut_restore.cpp @@ -1757,6 +1757,38 @@ Y_UNIT_TEST_SUITE(TImportTests) { } } + Y_UNIT_TEST(ShouldSucceedWithoutTableProfiles) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions() + .RunFakeConfigDispatcher(true)); + + const auto data = GenerateTestData(R"( + columns { + name: "key" + type { optional_type { item { type_id: UTF8 } } } + } + columns { + name: "value" + type { optional_type { item { type_id: UTF8 } } } + } + primary_key: "key" + )", {{"a", 1}}); + + Run(runtime, env, ConvertTestData(data), R"( + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_prefix: "" + destination_path: "/MyRoot/Table" + } + } + )"); + + auto content = ReadTable(runtime, TTestTxConfig::FakeHiveTablets); + NKqp::CompareYson(data.Data[0].YsonStr, content); + } + Y_UNIT_TEST(ShouldWriteBillRecordOnServerlessDb) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_ttl.cpp b/ydb/core/tx/schemeshard/ut_ttl.cpp index 4b54114f3a..cab1ca41f6 100644 --- a/ydb/core/tx/schemeshard/ut_ttl.cpp +++ b/ydb/core/tx/schemeshard/ut_ttl.cpp @@ -931,10 +931,9 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTests) { Y_UNIT_TEST(CheckCounters) { TTestBasicRuntime runtime; - TTestEnvOptions opts; - opts.DisableStatsBatching(true); - - TTestEnv env(runtime, opts); + TTestEnv env(runtime, TTestEnvOptions() + .EnablePersistentQueryStats(false) + .DisableStatsBatching(true)); ui64 txId = 100; runtime.UpdateCurrentTime(TInstant::Now()); @@ -1041,6 +1040,30 @@ Y_UNIT_TEST_SUITE(TSchemeShardTTLTests) { WaitForCondErase(runtime); WaitForStats(runtime, 2); CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"900", 2}, {"1800", 0}, {"inf", 0}}); + + // move table + TestMoveTable(runtime, ++txId, "/MyRoot/TTLEnabledTable", "/MyRoot/TTLEnabledTableMoved"); + env.TestWaitNotification(runtime, txId); + + // just after move + CheckSimpleCounter(runtime, "SchemeShard/TTLEnabledTables", 1); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"900", 2}, {"1800", 0}, {"inf", 0}}); + + // after a while + runtime.AdvanceCurrentTime(TDuration::Minutes(20)); + WaitForStats(runtime, 2); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"900", 0}, {"1800", 2}, {"inf", 0}}); + + // after erase + runtime.AdvanceCurrentTime(TDuration::Minutes(40)); + WaitForCondErase(runtime); + WaitForStats(runtime, 2); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"0", 2}, {"900", 0}, {"1800", 0}, {"inf", 0}}); + + // after a while + runtime.AdvanceCurrentTime(TDuration::Minutes(10)); + WaitForStats(runtime, 2); + CheckPercentileCounter(runtime, "SchemeShard/NumShardsByTtlLag", {{"900", 2}, {"1800", 0}, {"inf", 0}}); } } diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h index 9fa2810447..578fd088a3 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h @@ -808,7 +808,12 @@ private: } void ResolveShards(const NActors::TActorContext& ctx) { - Y_VERIFY(!GetRows().empty()); + if (GetRows().empty()) { + // We have already resolved the table and know it exists + // No reason to resolve table range as well + return ReplyIfDone(ctx); + } + Y_VERIFY(ResolveNamesResult); auto& entry = ResolveNamesResult->ResultSet.front(); @@ -939,6 +944,9 @@ private: } TBase::Become(&TThis::StateWaitResults); + + // Sanity check: don't break when we don't have any shards for some reason + ReplyIfDone(ctx); } void Handle(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { diff --git a/ydb/core/viewer/CMakeLists.txt b/ydb/core/viewer/CMakeLists.txt index 0a755eab96..0f999f1ae7 100644 --- a/ydb/core/viewer/CMakeLists.txt +++ b/ydb/core/viewer/CMakeLists.txt @@ -21,9 +21,12 @@ target_link_libraries(ydb-core-viewer PUBLIC cpp-actors-core library-cpp-archive cpp-mime-types + cpp-protobuf-json ydb-core-base core-blobstorage-base core-client-server + ydb-core-grpc_services + core-grpc_services-local_rpc ydb-core-health_check ydb-core-node_whiteboard ydb-core-protos @@ -36,6 +39,9 @@ target_link_libraries(ydb-core-viewer PUBLIC library-persqueue-topic_parser api-protos lib-deprecated-kicli + public-lib-json_value + api-grpc + cpp-client-ydb_types ) target_sources(ydb-core-viewer PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/viewer/viewer.cpp @@ -55,9 +61,12 @@ target_link_libraries(ydb-core-viewer.global PUBLIC cpp-actors-core library-cpp-archive cpp-mime-types + cpp-protobuf-json ydb-core-base core-blobstorage-base core-client-server + ydb-core-grpc_services + core-grpc_services-local_rpc ydb-core-health_check ydb-core-node_whiteboard ydb-core-protos @@ -70,6 +79,9 @@ target_link_libraries(ydb-core-viewer.global PUBLIC library-persqueue-topic_parser api-protos lib-deprecated-kicli + public-lib-json_value + api-grpc + cpp-client-ydb_types ) target_sources(ydb-core-viewer.global PRIVATE ${CMAKE_BINARY_DIR}/ydb/core/viewer/1cdc663173c623f6a008fb99b02498f1.cpp diff --git a/ydb/core/viewer/json_bsgroupinfo.h b/ydb/core/viewer/json_bsgroupinfo.h index 7a95f443b7..119207289f 100644 --- a/ydb/core/viewer/json_bsgroupinfo.h +++ b/ydb/core/viewer/json_bsgroupinfo.h @@ -18,8 +18,8 @@ struct TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse> { static constexpr bool StaticNodesOnly = true; - static ::google::protobuf::RepeatedPtrField<TElementType>* GetElementsField(TResponseType* response) { - return response->Record.MutableBSGroupStateInfo(); + static ::google::protobuf::RepeatedPtrField<TElementType>& GetElementsField(TResponseType* response) { + return *response->Record.MutableBSGroupStateInfo(); } static ui32 GetElementKey(const TElementType& type) { diff --git a/ydb/core/viewer/json_cluster.h b/ydb/core/viewer/json_cluster.h index c7d15c25de..b4ad8b64ce 100644 --- a/ydb/core/viewer/json_cluster.h +++ b/ydb/core/viewer/json_cluster.h @@ -302,7 +302,7 @@ public: ui64 totalStorageSize = 0; ui64 availableStorageSize = 0; - for (auto& element : *TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetElementsField(MergedPDiskInfo.Get())) { + for (auto& element : TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetElementsField(MergedPDiskInfo.Get())) { if (element.HasTotalSize() && element.HasAvailableSize()) { totalStorageSize += element.GetTotalSize(); availableStorageSize += element.GetAvailableSize(); @@ -311,12 +311,12 @@ public: element.SetOverall(GetWhiteboardFlag(GetPDiskOverallFlag(element))); PDisksIndex.emplace(TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetElementKey(element), element); } - for (auto& element : *TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetElementsField(MergedVDiskInfo.Get())) { + for (auto& element : TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetElementsField(MergedVDiskInfo.Get())) { element.SetOverall(GetWhiteboardFlag(GetVDiskOverallFlag(element))); VDisksIndex.emplace(TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetElementKey(element), element); } NKikimrViewer::EFlag flag = NKikimrViewer::Grey; - for (const auto& element : *TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::GetElementsField(MergedBSGroupInfo.Get())) { + for (const auto& element : TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::GetElementsField(MergedBSGroupInfo.Get())) { flag = Max(flag, GetBSGroupOverallFlag(element, VDisksIndex, PDisksIndex)); } ui32 numberOfCpus = 0; diff --git a/ydb/core/viewer/json_counters.h b/ydb/core/viewer/json_counters.h index c56719c77d..43a30b54a0 100644 --- a/ydb/core/viewer/json_counters.h +++ b/ydb/core/viewer/json_counters.h @@ -178,13 +178,13 @@ public: const TVector<const FieldDescriptor*>& groupFields) { THolder<ResponseType> groupedResponse = TWhiteboardGrouper<ResponseType>::GroupResponse(response, groupFields, true); - auto* stateInfo = TWhiteboardInfo<ResponseType>::GetElementsField(groupedResponse.Get()); + auto& stateInfo = TWhiteboardInfo<ResponseType>::GetElementsField(groupedResponse.Get()); TStringBuf host(nodeInfo.Host); size_t pos = host.find('.'); if (pos != TString::npos) { host = host.substr(0, pos); } - for (typename TWhiteboardInfo<ResponseType>::TElementType& info : *stateInfo) { + for (typename TWhiteboardInfo<ResponseType>::TElementType& info : stateInfo) { const Reflection& reflectionFrom = *info.GetReflection(); json << ",{\"labels\":{"; if (nodeInfo.NodeId != 0) { @@ -289,8 +289,8 @@ public: ++itPDiskInfo; if (itPDiskInfo != PDiskInfo.end() && itPDiskInfo->first == nodeInfo.NodeId && itPDiskInfo->second) { RenderStats(json, itPDiskInfo->second, nodeInfo); - auto* stateInfo = TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetElementsField(itPDiskInfo->second.Get()); - for (const typename TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::TElementType& info : *stateInfo) { + auto& stateInfo = TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetElementsField(itPDiskInfo->second.Get()); + for (const typename TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::TElementType& info : stateInfo) { if (info.GetTotalSize() > 0 && info.GetAvailableSize() > 0) { ++pDiskUserSpaceHistogram[std::min((info.GetTotalSize() - info.GetAvailableSize()) * pDiskUserSpaceHistogram.size() / info.GetTotalSize(), pDiskUserSpaceHistogram.size() - 1)]; } @@ -330,14 +330,14 @@ public: std::unordered_map<ui64, int> bsGroupGreenVDisks; std::unordered_map<ui64, int> bsGroupNotGreenVDisks; { - auto* stateInfo = TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::GetElementsField(mergedBSGroupInfo.Get()); - for (const typename TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::TElementType& info : *stateInfo) { + auto& stateInfo = TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::GetElementsField(mergedBSGroupInfo.Get()); + for (const typename TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::TElementType& info : stateInfo) { bsGroupVDisks[info.GetGroupID()] = info.VDiskIdsSize(); } } { - auto* stateInfo = TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetElementsField(mergedVDiskInfo.Get()); - for (const typename TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::TElementType& info : *stateInfo) { + auto& stateInfo = TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetElementsField(mergedVDiskInfo.Get()); + for (const typename TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::TElementType& info : stateInfo) { auto groupId = info.GetVDiskId().GetGroupID(); bsGroupVDisks[groupId]--; auto flag = GetVDiskOverallFlag(info); diff --git a/ydb/core/viewer/json_local_rpc.h b/ydb/core/viewer/json_local_rpc.h new file mode 100644 index 0000000000..bab530b264 --- /dev/null +++ b/ydb/core/viewer/json_local_rpc.h @@ -0,0 +1,288 @@ +#pragma once +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/mon.h> +#include <library/cpp/protobuf/json/json2proto.h> +#include <ydb/core/base/tablet_pipe.h> +#include <ydb/core/protos/services.pb.h> +#include <ydb/core/tx/schemeshard/schemeshard.h> +#include <ydb/core/tx/tx_proxy/proxy.h> +#include "viewer.h" +#include "json_pipe_req.h" + +#include <ydb/public/api/grpc/ydb_topic_v1.grpc.pb.h> +#include <ydb/core/grpc_services/rpc_calls.h> +#include <ydb/core/grpc_services/local_rpc/local_rpc.h> +#include <ydb/public/sdk/cpp/client/ydb_types/status/status.h> + +namespace NKikimr { +namespace NViewer { + +struct TEvLocalRpcPrivate { + enum EEv { + EvGrpcRequestResult = EventSpaceBegin(NActors::TEvents::ES_PRIVATE) + 100, + EvEnd + }; + + static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); + + template<class TProtoResult> + struct TEvGrpcRequestResult : NActors::TEventLocal<TEvGrpcRequestResult<TProtoResult>, EvGrpcRequestResult> { + THolder<TProtoResult> Message; + THolder<NYdb::TStatus> Status; + + TEvGrpcRequestResult() + {} + }; +}; + +using namespace NActors; +using NSchemeShard::TEvSchemeShard; + +template <class TProtoRequest, class TProtoResponse, class TProtoResult, class TProtoService, class TRpcEv> +class TJsonLocalRpc : public TActorBootstrapped<TJsonLocalRpc<TProtoRequest, TProtoResponse, TProtoResult, TProtoService, TRpcEv>> { + using TThis = TJsonLocalRpc<TProtoRequest, TProtoResponse, TProtoResult, TProtoService, TRpcEv>; + using TBase = TActorBootstrapped<TJsonLocalRpc<TProtoRequest, TProtoResponse, TProtoResult, TProtoService, TRpcEv>>; + + using TBase::Send; + using TBase::PassAway; + using TBase::Become; + + IViewer* Viewer; + NMon::TEvHttpInfo::TPtr Event; + TAutoPtr<TEvLocalRpcPrivate::TEvGrpcRequestResult<TProtoResult>> DescribeResult; + + TJsonSettings JsonSettings; + ui32 Timeout = 0; + + TString Database; + + NThreading::TFuture<TProtoResponse> RpcFuture; + +public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::VIEWER_HANDLER; + } + + TJsonLocalRpc(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) + : Viewer(viewer) + , Event(ev) + {} + + TProtoRequest Params2Proto(const TCgiParameters& params) { + TProtoRequest request; + using google::protobuf::Descriptor; + using google::protobuf::Reflection; + using google::protobuf::FieldDescriptor; + using google::protobuf::EnumDescriptor; + using google::protobuf::EnumValueDescriptor; + const Descriptor& descriptor = *request.GetDescriptor(); + const Reflection& reflection = *request.GetReflection(); + for (int idx = 0; idx < descriptor.field_count(); ++idx) { + const FieldDescriptor* field = descriptor.field(idx); + TString name; + name = field->name(); + TString value = params.Get(name); + if (!value.empty()) { + FieldDescriptor::CppType type = field->cpp_type(); + switch (type) { +#define CASE(BT, ST, CT) case FieldDescriptor::CPPTYPE_##BT: {\ + ST res = {};\ + if (TryFromString(value, res)) {\ + reflection.Set##CT(&request, field, res);\ + }\ + break;\ + } + + CASE(INT32, i32, Int32); + CASE(INT64, i64, Int64); + CASE(UINT32, ui32, UInt32); + CASE(UINT64, ui64, UInt64); + CASE(FLOAT, float, Float); + CASE(DOUBLE, double, Double); + CASE(BOOL, bool, Bool); + CASE(STRING, string, String); +#undef CASE + case FieldDescriptor::CPPTYPE_ENUM: { + const EnumDescriptor* enumDescriptor = field->enum_type(); + const EnumValueDescriptor* enumValueDescriptor = enumDescriptor->FindValueByName(value); + int number = 0; + if (enumValueDescriptor == nullptr && TryFromString(value, number)) { + enumValueDescriptor = enumDescriptor->FindValueByNumber(number); + } + if (enumValueDescriptor != nullptr) { + reflection.SetEnum(&request, field, enumValueDescriptor); + } + break; + } + case FieldDescriptor::CPPTYPE_MESSAGE: + break; + } + } + } + return request; + } + + TProtoRequest Params2Proto() { + TProtoRequest request; + NProtobufJson::TJson2ProtoConfig json2ProtoConfig; + auto postData = Event->Get()->Request.GetPostContent(); + if (!postData.empty()) { + try { + NProtobufJson::Json2Proto(postData, request, json2ProtoConfig); + } + catch (const yexception& e) { + Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPBADREQUEST, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + PassAway(); + } + } else { + const auto& params(Event->Get()->Request.GetParams()); + return Params2Proto(params); + } + return request; + } + + void SendGrpcRequest() { + TProtoRequest request = Params2Proto(); + + RpcFuture = NRpcService::DoLocalRpc<TRpcEv>(std::move(request), Database, + Event->Get()->UserToken, TlsActivationContext->ActorSystem()); + RpcFuture.Subscribe([actorId = TBase::SelfId(), actorSystem = TlsActivationContext->ActorSystem()] + (const NThreading::TFuture<TProtoResponse>& future) { + auto& response = future.GetValueSync(); + auto result = MakeHolder<TEvLocalRpcPrivate::TEvGrpcRequestResult<TProtoResult>>(); + Y_VERIFY(response.operation().ready()); + if (response.operation().status() == Ydb::StatusIds::SUCCESS) { + TProtoResult rs; + response.operation().result().UnpackTo(&rs); + result->Message = MakeHolder<TProtoResult>(rs); + } + NYql::TIssues issues; + NYql::IssuesFromMessage(response.operation().issues(), issues); + result->Status = MakeHolder<NYdb::TStatus>(NYdb::EStatus(response.operation().status()), + std::move(issues)); + + actorSystem->Send(actorId, result.Release()); + }); + } + + + void Bootstrap() { + const auto& params(Event->Get()->Request.GetParams()); + JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), false); + JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); + Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); + Database = params.Get("database_path"); + + SendGrpcRequest(); + + Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); + } + + void Handle(typename TEvLocalRpcPrivate::TEvGrpcRequestResult<TProtoResult>::TPtr& ev) { + DescribeResult = ev->Release(); + ReplyAndPassAway(); + } + + STATEFN(StateRequestedDescribe) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvLocalRpcPrivate::TEvGrpcRequestResult<TProtoResult>, Handle); + cFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + void ReplyAndPassAway() { + TStringStream json; + TString headers = Viewer->GetHTTPOKJSON(Event->Get()); + if (DescribeResult) { + if (!DescribeResult->Status->IsSuccess()) { + headers = HTTPBADREQUEST; + if (DescribeResult->Status->GetStatus() == NYdb::EStatus::UNAUTHORIZED) { + headers = HTTPFORBIDDENJSON; + } + } else { + TProtoToJson::ProtoToJson(json, *(DescribeResult->Message), JsonSettings); + } + } else { + json << "null"; + } + + Send(Event->Sender, new NMon::TEvHttpInfoRes(headers + json.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + PassAway(); + } + + void HandleTimeout() { + Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + PassAway(); + } +}; + + +using TJsonDescribeTopic = TJsonLocalRpc<Ydb::Topic::DescribeTopicRequest, + Ydb::Topic::DescribeTopicResponse, + Ydb::Topic::DescribeTopicResult, + Ydb::Topic::V1::TopicService, + NKikimr::NGRpcService::TEvDescribeTopicRequest>; + +using TJsonDescribeConsumer = TJsonLocalRpc<Ydb::Topic::DescribeConsumerRequest, + Ydb::Topic::DescribeConsumerResponse, + Ydb::Topic::DescribeConsumerResult, + Ydb::Topic::V1::TopicService, + NKikimr::NGRpcService::TEvDescribeConsumerRequest>; + +template <> +struct TJsonRequestParameters<TJsonDescribeTopic> { + static TString GetParameters() { + return R"___([{"name":"path","in":"query","description":"schema path","required":false,"type":"string"}, + {"name":"enums","in":"query","description":"convert enums to strings","required":false,"type":"boolean"}, + {"name":"ui64","in":"query","description":"return ui64 as number","required":false,"type":"boolean"}, + {"name":"timeout","in":"query","description":"timeout in ms","required":false,"type":"integer"}, + {"name":"database_path","in":"query","description":"database path","required":false,"type":"string"}, + {"name":"include_stats","in":"query","description":"include stat flag","required":false,"type":"bool"}])___"; + } +}; + +template <> +struct TJsonRequestSummary<TJsonDescribeTopic> { + static TString GetSummary() { + return "\"Topic schema detailed information\""; + } +}; + +template <> +struct TJsonRequestDescription<TJsonDescribeTopic> { + static TString GetDescription() { + return "\"Returns detailed information about topic\""; + } +}; + + +template <> +struct TJsonRequestParameters<TJsonDescribeConsumer> { + static TString GetParameters() { + return R"___([{"name":"path","in":"query","description":"schema path","required":false,"type":"string"}, + {"name":"enums","in":"query","description":"convert enums to strings","required":false,"type":"boolean"}, + {"name":"ui64","in":"query","description":"return ui64 as number","required":false,"type":"boolean"}, + {"name":"timeout","in":"query","description":"timeout in ms","required":false,"type":"integer"}, + {"name":"database_path","in":"query","description":"database path","required":false,"type":"string"}, + {"name":"consumer","in":"query","description":"consumer name","required":false,"type":"string"}, + {"name":"include_stats","in":"query","description":"include stat flag","required":false,"type":"bool"}])___"; + } +}; + +template <> +struct TJsonRequestSummary<TJsonDescribeConsumer> { + static TString GetSummary() { + return "\"Topic's consumer detailed information\""; + } +}; + +template <> +struct TJsonRequestDescription<TJsonDescribeConsumer> { + static TString GetDescription() { + return "\"Returns detailed information about topic's consumer\""; + } +}; + + +} +} diff --git a/ydb/core/viewer/json_nodeinfo.h b/ydb/core/viewer/json_nodeinfo.h index 60245326fa..acd2e64ecf 100644 --- a/ydb/core/viewer/json_nodeinfo.h +++ b/ydb/core/viewer/json_nodeinfo.h @@ -18,8 +18,8 @@ struct TWhiteboardInfo<TEvWhiteboard::TEvNodeStateResponse> { static constexpr bool StaticNodesOnly = false; - static ::google::protobuf::RepeatedPtrField<TElementType>* GetElementsField(TResponseType* response) { - return response->Record.MutableNodeStateInfo(); + static ::google::protobuf::RepeatedPtrField<TElementType>& GetElementsField(TResponseType* response) { + return *response->Record.MutableNodeStateInfo(); } static const TString& GetElementKey(const TElementType& type) { diff --git a/ydb/core/viewer/json_pdiskinfo.h b/ydb/core/viewer/json_pdiskinfo.h index 881c108e51..f106212843 100644 --- a/ydb/core/viewer/json_pdiskinfo.h +++ b/ydb/core/viewer/json_pdiskinfo.h @@ -17,8 +17,8 @@ struct TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse> { static constexpr bool StaticNodesOnly = true; - static ::google::protobuf::RepeatedPtrField<TElementType>* GetElementsField(TResponseType* response) { - return response->Record.MutablePDiskStateInfo(); + static ::google::protobuf::RepeatedPtrField<TElementType>& GetElementsField(TResponseType* response) { + return *response->Record.MutablePDiskStateInfo(); } static std::pair<ui32, ui32> GetElementKey(const TElementType& type) { diff --git a/ydb/core/viewer/json_storage.h b/ydb/core/viewer/json_storage.h index fd6dec6e0f..7df3bd871a 100644 --- a/ydb/core/viewer/json_storage.h +++ b/ydb/core/viewer/json_storage.h @@ -53,6 +53,10 @@ class TJsonStorage : public TViewerPipeClient<TJsonStorage> { THashMap<TTabletId, THolder<TEvHive::TEvResponseHiveStorageStats>> HiveStorageStats; THolder<TEvBlobStorage::TEvControllerConfigResponse> BaseConfig; + // indexes + THashMap<TVDiskID, NKikimrWhiteboard::TVDiskStateInfo*> VDiskId2vDiskStateInfo; + THashMap<ui32, std::vector<TNodeId>> Group2NodeId; + struct TStoragePoolInfo { TString Kind; TSet<TString> Groups; @@ -77,6 +81,11 @@ class TJsonStorage : public TViewerPipeClient<TJsonStorage> { SpaceProblems, }; + enum ETimeoutTag { + TimeoutBSC, + TimeoutFinal, + }; + EWith With = EWith::Everything; public: @@ -146,7 +155,9 @@ public: RequestBSControllerConfig(); - TBase::Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); + TBase::Become(&TThis::StateWork); + Schedule(TDuration::MilliSeconds(Timeout / 100 * 70), new TEvents::TEvWakeup(TimeoutBSC)); // 70% timeout (for bsc) + Schedule(TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup(TimeoutFinal)); // timeout for the rest } void PassAway() override { @@ -198,7 +209,6 @@ public: } } for (TNodeId nodeId : additionalNodeIds) { - FilterNodeIds.insert(nodeId); SendNodeRequests(nodeId); } } @@ -300,7 +310,13 @@ public: void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; - VDiskInfo[nodeId] = ev->Release(); + auto& vDiskInfo = VDiskInfo[nodeId] = ev->Release(); + if (vDiskInfo != nullptr) { + for (auto& vDiskStateInfo : *(vDiskInfo->Record.MutableVDiskStateInfo())) { + vDiskStateInfo.SetNodeId(nodeId); + VDiskId2vDiskStateInfo[VDiskIDFromVDiskID(vDiskStateInfo.GetVDiskId())] = &vDiskStateInfo; + } + } RequestDone(); } @@ -317,6 +333,9 @@ public: continue; } StoragePoolInfo[storagePoolName].Groups.emplace(ToString(info.GetGroupID())); + for (const auto& vDiskNodeId : info.GetVDiskNodeIds()) { + Group2NodeId[info.GetGroupID()].push_back(vDiskNodeId); + } } ui64 nodeId = ev.Get()->Cookie; BSGroupInfo[nodeId] = ev->Release(); @@ -337,7 +356,7 @@ public: hFunc(TEvents::TEvUndelivered, Undelivered); hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); + hFunc(TEvents::TEvWakeup, HandleTimeout); } } @@ -459,11 +478,39 @@ public: } void ReplyAndPassAway() { + if (!FilterNodeIds.empty()) { + for (const auto& [nodeId, vDiskInfo] : VDiskInfo) { + if (FilterNodeIds.count(nodeId) == 0) { + continue; + } + if (vDiskInfo != nullptr) { + THashSet<ui32> additionalNodes; + for (const auto& vDiskStateInfo : vDiskInfo->Record.GetVDiskStateInfo()) { + ui32 groupId = vDiskStateInfo.GetVDiskId().GetGroupID(); + auto itNodes = Group2NodeId.find(groupId); + if (itNodes != Group2NodeId.end()) { + for (TNodeId groupNodeId : itNodes->second) { + if (groupNodeId != nodeId && additionalNodes.insert(groupNodeId).second) { + SendNodeRequests(groupNodeId); + } + } + } + } + } + } + + FilterNodeIds.clear(); // we don't need it anymore + + if (Requests != 0) { + return; // retry requests for neighbours of our groups (when BSC wasn't available) + } + } + TStringStream json; MergedBSGroupInfo = MergeWhiteboardResponses(BSGroupInfo, TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::GetDefaultMergeField()); MergedVDiskInfo = MergeWhiteboardResponses(VDiskInfo, TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetDefaultMergeField()); MergedPDiskInfo = MergeWhiteboardResponses(PDiskInfo, TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetDefaultMergeField()); - for (auto& element : *TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetElementsField(MergedPDiskInfo.Get())) { + for (auto& element : TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetElementsField(MergedPDiskInfo.Get())) { element.SetStateFlag(GetWhiteboardFlag(GetPDiskStateFlag(element))); auto overall = NKikimrViewer::EFlag_Name(GetPDiskOverallFlag(element)); auto key = TWhiteboardInfo<TEvWhiteboard::TEvPDiskStateResponse>::GetElementKey(element); @@ -471,7 +518,7 @@ public: PDisksOverall.emplace(key, overall); PDisksIndex.emplace(key, element); } - for (auto& element : *TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetElementsField(MergedVDiskInfo.Get())) { + for (auto& element : TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetElementsField(MergedVDiskInfo.Get())) { auto overall = NKikimrViewer::EFlag_Name(GetVDiskOverallFlag(element)); auto key = TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse>::GetElementKey(element); element.ClearOverall(); @@ -486,7 +533,7 @@ public: VSlotsIndex.emplace(std::move(slotId), element); } } - for (auto& element : *TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::GetElementsField(MergedBSGroupInfo.Get())) { + for (auto& element : TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::GetElementsField(MergedBSGroupInfo.Get())) { auto state = GetBSGroupOverallState(element, VDisksIndex, PDisksIndex); auto key = ToString(TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::GetElementKey(element)); if (state.MissingDisks > 0) { @@ -654,7 +701,14 @@ public: PassAway(); } - void HandleTimeout() { + void HandleTimeout(TEvents::TEvWakeup::TPtr& ev) { + switch (ev->Get()->Tag) { + case TimeoutBSC: + break; + case TimeoutFinal: + FilterNodeIds.clear(); + break; + } ReplyAndPassAway(); } }; diff --git a/ydb/core/viewer/json_sysinfo.h b/ydb/core/viewer/json_sysinfo.h index 05a477c2df..a279c48535 100644 --- a/ydb/core/viewer/json_sysinfo.h +++ b/ydb/core/viewer/json_sysinfo.h @@ -39,8 +39,8 @@ struct TWhiteboardInfo<TEvWhiteboard::TEvSystemStateResponse> { static constexpr bool StaticNodesOnly = false; - static ::google::protobuf::RepeatedPtrField<TElementType>* GetElementsField(TResponseType* response) { - return response->Record.MutableSystemStateInfo(); + static ::google::protobuf::RepeatedPtrField<TElementType>& GetElementsField(TResponseType* response) { + return *response->Record.MutableSystemStateInfo(); } static TString GetDefaultMergeField() { diff --git a/ydb/core/viewer/json_tabletinfo.h b/ydb/core/viewer/json_tabletinfo.h index 3f273e8e95..e972518dfd 100644 --- a/ydb/core/viewer/json_tabletinfo.h +++ b/ydb/core/viewer/json_tabletinfo.h @@ -9,24 +9,39 @@ #include <ydb/core/base/tablet_pipe.h> #include "json_pipe_req.h" #include "json_wb_req.h" +#include <span> namespace NKikimr { namespace NViewer { -template <> +template<> struct TWhiteboardInfo<TEvWhiteboard::TEvTabletStateResponse> { using TResponseType = TEvWhiteboard::TEvTabletStateResponse; using TElementType = NKikimrWhiteboard::TTabletStateInfo; + using TElementTypePacked5 = NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5; using TElementKeyType = std::pair<ui64, ui32>; static constexpr bool StaticNodesOnly = false; - static ::google::protobuf::RepeatedPtrField<TElementType>* GetElementsField(TResponseType* response) { - return response->Record.MutableTabletStateInfo(); + static ::google::protobuf::RepeatedPtrField<TElementType>& GetElementsField(TResponseType* response) { + return *response->Record.MutableTabletStateInfo(); + } + + static std::span<const TElementTypePacked5> GetElementsFieldPacked5(TResponseType* response) { + const auto& packed5 = response->Record.GetPacked5(); + return std::span{reinterpret_cast<const TElementTypePacked5*>(packed5.data()), packed5.size() / sizeof(TElementTypePacked5)}; + } + + static size_t GetElementsCount(TResponseType* response) { + return response->Record.GetTabletStateInfo().size() + response->Record.GetPacked5().size() / sizeof(TElementTypePacked5); + } + + static TElementKeyType GetElementKey(const TElementType& type) { + return TElementKeyType(type.GetTabletId(), type.GetFollowerId()); } - static std::pair<ui64, ui32> GetElementKey(const TElementType& type) { - return std::pair<ui64, ui32>(type.GetTabletId(), type.GetFollowerId()); + static TElementKeyType GetElementKey(const TElementTypePacked5& type) { + return TElementKeyType(type.TabletId, type.FollowerId); } static TString GetDefaultMergeField() { @@ -35,9 +50,11 @@ struct TWhiteboardInfo<TEvWhiteboard::TEvTabletStateResponse> { static THolder<TResponseType> MergeResponses(TMap<ui32, THolder<TResponseType>>& responses, const TString& fields = GetDefaultMergeField()) { if (fields == GetDefaultMergeField()) { - return TWhiteboardMerger<TResponseType>::MergeResponsesElementKey(responses); + TStaticMergeKey<TResponseType> mergeKey; + return TWhiteboardMerger<TResponseType>::MergeResponsesBaseHybrid(responses, mergeKey); } else { - return TWhiteboardMerger<TResponseType>::MergeResponses(responses, fields); + TWhiteboardMerger<TResponseType>::TDynamicMergeKey mergeKey(fields); + return TWhiteboardMerger<TResponseType>::MergeResponsesBase(responses, mergeKey); } } }; @@ -49,6 +66,13 @@ struct TWhiteboardMergerComparator<NKikimrWhiteboard::TTabletStateInfo> { } }; +template <> +struct TWhiteboardMergerComparator<NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5> { + bool operator ()(const NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5& a, const NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5& b) const { + return a.Generation < b.Generation; + } +}; + class TJsonTabletInfo : public TJsonWhiteboardRequest<TEvWhiteboard::TEvTabletStateRequest, TEvWhiteboard::TEvTabletStateResponse> { static const bool WithRetry = false; using TBase = TJsonWhiteboardRequest<TEvWhiteboard::TEvTabletStateRequest, TEvWhiteboard::TEvTabletStateResponse>; @@ -57,7 +81,10 @@ class TJsonTabletInfo : public TJsonWhiteboardRequest<TEvWhiteboard::TEvTabletSt public: TJsonTabletInfo(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) : TJsonWhiteboardRequest(viewer, ev) - {} + { + static TString prefix = "json/tabletinfo "; + LogPrefix = prefix; + } static NTabletPipe::TClientConfig InitPipeClientConfig() { NTabletPipe::TClientConfig clientConfig; @@ -73,6 +100,7 @@ public: } void Bootstrap() override { + BLOG_TRACE("Bootstrap()"); const auto& params(Event->Get()->Request.GetParams()); Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); if (params.Has("path")) { diff --git a/ydb/core/viewer/json_tenantinfo.h b/ydb/core/viewer/json_tenantinfo.h index 9a27f8e968..9c506e78db 100644 --- a/ydb/core/viewer/json_tenantinfo.h +++ b/ydb/core/viewer/json_tenantinfo.h @@ -15,6 +15,7 @@ #include "json_pipe_req.h" #include "wb_aggregate.h" #include "wb_merge.h" +#include "log.h" namespace NKikimr { namespace NViewer { @@ -31,6 +32,7 @@ class TJsonTenantInfo : public TViewerPipeClient<TJsonTenantInfo> { THashMap<TTabletId, THolder<TEvHive::TEvResponseHiveStorageStats>> HiveStorageStats; NMon::TEvHttpInfo::TPtr Event; THashSet<TNodeId> NodeIds; + THashSet<TNodeId> NodeIdsForTablets; TMap<TNodeId, THolder<TEvWhiteboard::TEvSystemStateResponse>> NodeSysInfo; TMap<TNodeId, THolder<TEvWhiteboard::TEvTabletStateResponse>> NodeTabletInfo; TJsonSettings JsonSettings; @@ -54,11 +56,17 @@ public: , Event(ev) {} + TString GetLogPrefix() { + static TString prefix = "json/tenantinfo "; + return prefix; + } + TString GetDomainId(TPathId pathId) { return TStringBuilder() << pathId.OwnerId << '-' << pathId.LocalPathId; } void Bootstrap() { + BLOG_TRACE("Bootstrap()"); const auto& params(Event->Get()->Request.GetParams()); JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true); JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); @@ -103,8 +111,12 @@ public: void PassAway() override { for (const TNodeId nodeId : NodeIds) { Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); - }; + } + for (const TNodeId nodeId : NodeIdsForTablets) { + Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); + } TBase::PassAway(); + BLOG_TRACE("PassAway()"); } STATEFN(StateRequested) { @@ -124,6 +136,7 @@ public: } void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { + BLOG_TRACE("Received ListTenantsResponse"); Ydb::Cms::ListDatabasesResult listTenantsResult; ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); for (const TString& path : listTenantsResult.paths()) { @@ -137,6 +150,7 @@ public: } void Handle(NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr& ev) { + BLOG_TRACE("Received GetTenantStatusResponse"); Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; ev->Get()->Record.GetResponse().operation().result().UnpackTo(&getTenantStatusResult); TString path = getTenantStatusResult.path(); @@ -192,15 +206,20 @@ public: tenant.SetAliveNodes(hiveStat.GetAliveNodes()); } } + + BLOG_TRACE("Received HiveDomainStats for " << tenant.GetId() << " from " << ev->Cookie); + for (TNodeId nodeId : hiveStat.GetNodeIds()) { + TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); if (NodeIds.insert(nodeId).second) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); THolder<NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest> request = MakeHolder<NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest>(); SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - if (Tablets) { - THolder<NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest> request = MakeHolder<NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest>(); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } + } + if (Tablets && NodeIdsForTablets.insert(nodeId).second) { + THolder<NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest> request = MakeHolder<NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest>(); + request->Record.SetFormat("packed5"); + BLOG_TRACE("Tenant " << tenant.GetId() << " send to " << nodeId << " TEvTabletStateRequest: " << request->Record.ShortDebugString()); + SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); } } } @@ -209,6 +228,7 @@ public: } void Handle(TEvHive::TEvResponseHiveStorageStats::TPtr& ev) { + BLOG_TRACE("Received HiveStorageStats from " << ev->Cookie); HiveStorageStats[ev->Cookie] = std::move(ev->Release()); RequestDone(); } @@ -235,6 +255,7 @@ public: } TString id = GetDomainId(domainInfo->DomainKey); TString path = CanonizePath(ev->Get()->Request->ResultSet.begin()->Path); + BLOG_TRACE("Received Navigate for " << id << " " << path); tenant.SetId(id); tenant.SetName(path); if (tenant.GetType() == NKikimrViewer::UnknownTenantType) { @@ -247,18 +268,21 @@ public: void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { ui32 nodeId = ev.Get()->Cookie; + BLOG_TRACE("Received TEvSystemStateResponse from " << nodeId); NodeSysInfo[nodeId] = ev->Release(); RequestDone(); } void Handle(NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { ui32 nodeId = ev.Get()->Cookie; + BLOG_TRACE("Received TEvTabletStateResponse from " << nodeId << " with " << ev->Get()->Record.TabletStateInfoSize() << " tablets"); NodeTabletInfo[nodeId] = ev->Release(); RequestDone(); } void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { ui32 nodeId = ev.Get()->Cookie; + BLOG_TRACE("Undelivered for node " << nodeId << " event " << ev->Get()->SourceType); if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { if (NodeSysInfo.emplace(nodeId, nullptr).second) { RequestDone(); @@ -273,6 +297,7 @@ public: void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { ui32 nodeId = ev->Get()->NodeId; + BLOG_TRACE("NodeDisconnected for node " << nodeId); if (NodeSysInfo.emplace(nodeId, nullptr).second) { RequestDone(); } @@ -282,13 +307,14 @@ public: } void ReplyAndPassAway() { + BLOG_TRACE("ReplyAndPassAway() started"); TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo; TIntrusivePtr<TDomainsInfo::TDomain> domain = domains->Domains.begin()->second; THolder<TEvWhiteboard::TEvTabletStateResponse> tabletInfo; THashMap<TTabletId, const NKikimrWhiteboard::TTabletStateInfo*> tabletInfoIndex; if (Tablets) { - tabletInfo = MergeWhiteboardResponses(NodeTabletInfo); - for (const auto& info : tabletInfo->Record.GetTabletStateInfo()) { + tabletInfo = TWhiteboardInfo<NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponse>::MergeResponses(NodeTabletInfo); + for (const auto& info : TWhiteboardInfo<NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponse>::GetElementsField(tabletInfo.Get())) { tabletInfoIndex[info.GetTabletId()] = &info; } } @@ -380,10 +406,12 @@ public: uint64 storageMinAvailableSize = std::numeric_limits<ui64>::max(); uint64 storageGroups = 0; for (const NKikimrHive::THiveStoragePoolStats& poolStat : record.GetPools()) { - for (const NKikimrHive::THiveStorageGroupStats& groupStat : poolStat.GetGroups()) { - storageAllocatedSize += groupStat.GetAllocatedSize(); - storageMinAvailableSize = std::min(storageMinAvailableSize, groupStat.GetAvailableSize()); - ++storageGroups; + if (poolStat.GetName().StartsWith(tenantBySubDomainKey.GetName())) { + for (const NKikimrHive::THiveStorageGroupStats& groupStat : poolStat.GetGroups()) { + storageAllocatedSize += groupStat.GetAllocatedSize(); + storageMinAvailableSize = std::min(storageMinAvailableSize, groupStat.GetAvailableSize()); + ++storageGroups; + } } } tenant.SetStorageAllocatedSize(storageAllocatedSize); @@ -504,6 +532,7 @@ public: } void HandleTimeout() { + BLOG_TRACE("Timeout occurred"); Result.AddErrors("Timeout occurred"); ReplyAndPassAway(); } diff --git a/ydb/core/viewer/json_vdiskinfo.h b/ydb/core/viewer/json_vdiskinfo.h index db681f044b..5c015a5258 100644 --- a/ydb/core/viewer/json_vdiskinfo.h +++ b/ydb/core/viewer/json_vdiskinfo.h @@ -54,8 +54,8 @@ struct TWhiteboardInfo<TEvWhiteboard::TEvVDiskStateResponse> { static constexpr bool StaticNodesOnly = true; - static ::google::protobuf::RepeatedPtrField<TElementType>* GetElementsField(TResponseType* response) { - return response->Record.MutableVDiskStateInfo(); + static ::google::protobuf::RepeatedPtrField<TElementType>& GetElementsField(TResponseType* response) { + return *response->Record.MutableVDiskStateInfo(); } static const NKikimrBlobStorage::TVDiskID& GetElementKey(const TElementType& type) { diff --git a/ydb/core/viewer/json_wb_req.h b/ydb/core/viewer/json_wb_req.h index 17dfc6f65e..99b33c2190 100644 --- a/ydb/core/viewer/json_wb_req.h +++ b/ydb/core/viewer/json_wb_req.h @@ -10,6 +10,7 @@ #include "wb_merge.h" #include "wb_group.h" #include "wb_filter.h" +#include "log.h" namespace NKikimr { namespace NViewer { @@ -59,12 +60,18 @@ protected: std::unordered_map<TNodeId, ui32> NodeRetries; bool StaticNodesOnly = TWhiteboardInfo<ResponseType>::StaticNodesOnly; TDuration RetryPeriod = TDuration::MilliSeconds(500); + TString LogPrefix; + TString Format; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::VIEWER_HANDLER; } + TString GetLogPrefix() { + return LogPrefix; + } + TJsonWhiteboardRequest(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) : Viewer(viewer) , Initiator(ev->Sender) @@ -74,6 +81,10 @@ public: THolder<RequestType> BuildRequest(TNodeId nodeId) { Y_UNUSED(nodeId); THolder<RequestType> request = MakeHolder<RequestType>(); + constexpr bool hasFormat = requires(const RequestType* r) {r->Record.GetFormat();}; + if constexpr (hasFormat) { + request->Record.SetFormat(Format); + } if (ChangedSince != 0) { request->Record.SetChangedSince(ChangedSince); } @@ -127,6 +138,7 @@ public: Retries = FromStringWithDefault<ui32>(params.Get("retries"), 0); RetryPeriod = TDuration::MilliSeconds(FromStringWithDefault<ui32>(params.Get("retry_period"), RetryPeriod.MilliSeconds())); StaticNodesOnly = FromStringWithDefault<bool>(params.Get("static"), StaticNodesOnly); + Format = params.Get("format"); if (FilterNodeIds.empty()) { if (AliveOnly) { static const TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(TBase::SelfId().NodeId()); @@ -263,14 +275,35 @@ public: } } - template <typename ResponseRecordType> - void UpdateDuration(ResponseRecordType& record) { + template<typename ResponseRecordType> + TString GetResponseDuration(ResponseRecordType& record) { + constexpr bool hasResponseDuration = requires(const ResponseRecordType& r) {r.GetResponseDuration();}; + if constexpr (hasResponseDuration) { + return TStringBuilder() << " ResponseDuration: " << record.GetResponseDuration() << "us"; + } else { + return {}; + } + } + + template<typename ResponseRecordType> + TString GetProcessDuration(ResponseRecordType& record) { + constexpr bool hasProcessDuration = requires(const ResponseRecordType& r) {r.GetProcessDuration();}; + if constexpr (hasProcessDuration) { + return TStringBuilder() << " ProcessDuration: " << record.GetProcessDuration() << "us"; + } else { + return {}; + } + } + + template<typename ResponseRecordType> + void OnRecordReceived(ResponseRecordType& record, TNodeId nodeId) { record.SetResponseDuration((AppData()->TimeProvider->Now() - NodesRequestedTime).MicroSeconds()); + BLOG_TRACE("Received " << typeid(ResponseType).name() << " from " << nodeId << GetResponseDuration(record) << GetProcessDuration(record)); } void HandleNodeInfo(typename ResponseType::TPtr& ev) { - UpdateDuration(ev->Get()->Record); ui64 nodeId = ev.Get()->Cookie; + OnRecordReceived(ev->Get()->Record, nodeId); PerNodeStateInfo[nodeId] = ev->Release(); NodeErrors.erase(nodeId); TBase::RequestDone(); diff --git a/ydb/core/viewer/log.h b/ydb/core/viewer/log.h new file mode 100644 index 0000000000..8dce2ce572 --- /dev/null +++ b/ydb/core/viewer/log.h @@ -0,0 +1,23 @@ +#pragma once + +#include <ydb/core/protos/services.pb.h> +#include <library/cpp/actors/core/log.h> + +namespace NKikimr { +namespace NViewer { + +inline TString GetLogPrefix() { + return {}; +} + +} +} + +#define BLOG_D(stream) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) +#define BLOG_I(stream) LOG_INFO_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) +#define BLOG_W(stream) LOG_WARN_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) +#define BLOG_NOTICE(stream) LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) +#define BLOG_ERROR(stream) LOG_ERROR_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) +#define BLOG_CRIT(stream) LOG_CRIT_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) +#define BLOG_TRACE(stream) LOG_TRACE_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) +#define Y_ENSURE_LOG(cond, stream) if (!(cond)) { BLOG_ERROR("Failed condition \"" << #cond << "\" " << stream); } diff --git a/ydb/core/viewer/viewer.cpp b/ydb/core/viewer/viewer.cpp index 12e2c9bb97..c182e1af29 100644 --- a/ydb/core/viewer/viewer.cpp +++ b/ydb/core/viewer/viewer.cpp @@ -49,6 +49,7 @@ #include "json_healthcheck.h" #include "json_nodes.h" #include "json_acl.h" +#include "json_local_rpc.h" namespace NKikimr { namespace NViewer { @@ -179,6 +180,8 @@ public: TWhiteboardInfo<TEvWhiteboard::TEvNodeStateResponse>::InitMerger(); TWhiteboardInfo<TEvWhiteboard::TEvBSGroupStateResponse>::InitMerger(); + JsonHandlers["/json/describe_topic"] = new TJsonHandler<TJsonDescribeTopic>; + JsonHandlers["/json/describe_consumer"] = new TJsonHandler<TJsonDescribeConsumer>; JsonHandlers["/json/nodelist"] = new TJsonHandler<TJsonNodeList>; JsonHandlers["/json/nodeinfo"] = new TJsonHandler<TJsonNodeInfo>; JsonHandlers["/json/vdiskinfo"] = new TJsonHandler<TJsonVDiskInfo>; diff --git a/ydb/core/viewer/viewer_ut.cpp b/ydb/core/viewer/viewer_ut.cpp index 020239e8a5..a9d049b828 100644 --- a/ydb/core/viewer/viewer_ut.cpp +++ b/ydb/core/viewer/viewer_ut.cpp @@ -19,25 +19,77 @@ using namespace NKikimrWhiteboard; Y_UNIT_TEST_SUITE(Viewer) { Y_UNIT_TEST(TabletMerging) { - TMap<ui32, THolder<TEvWhiteboard::TEvTabletStateResponse>> nodesData; - for (ui32 nodeId = 1; nodeId <= 1000; ++nodeId) { - THolder<TEvWhiteboard::TEvTabletStateResponse>& nodeData = nodesData[nodeId] = MakeHolder<TEvWhiteboard::TEvTabletStateResponse>(); - nodeData->Record.MutableTabletStateInfo()->Reserve(10000); - for (ui32 tabletId = 1; tabletId <= 10000; ++tabletId) { - NKikimrWhiteboard::TTabletStateInfo* tabletData = nodeData->Record.AddTabletStateInfo(); - tabletData->SetTabletId(tabletId); - tabletData->SetLeader(true); - tabletData->SetGeneration(13); - tabletData->SetChangeTime(TInstant::Now().MilliSeconds()); + THPTimer timer; + { + TMap<ui32, TString> nodesBlob; + timer.Reset(); + for (ui32 nodeId = 1; nodeId <= 10000; ++nodeId) { + THolder<TEvWhiteboard::TEvTabletStateResponse> nodeData = MakeHolder<TEvWhiteboard::TEvTabletStateResponse>(); + nodeData->Record.MutableTabletStateInfo()->Reserve(10000); + for (ui32 tabletId = 1; tabletId <= 10000; ++tabletId) { + NKikimrWhiteboard::TTabletStateInfo* tabletData = nodeData->Record.AddTabletStateInfo(); + tabletData->SetTabletId(tabletId); + tabletData->SetLeader(true); + tabletData->SetGeneration(13); + tabletData->SetChangeTime(TInstant::Now().MilliSeconds()); + tabletData->MutableTenantId()->SetSchemeShard(8); + tabletData->MutableTenantId()->SetPathId(14); + tabletData->MutableChannelGroupIDs()->Add(9); + tabletData->MutableChannelGroupIDs()->Add(10); + tabletData->MutableChannelGroupIDs()->Add(11); + } + nodesBlob[nodeId] = nodeData->Record.SerializeAsString(); + } + Ctest << "Build = " << timer.Passed() << Endl; + timer.Reset(); + TMap<ui32, THolder<TEvWhiteboard::TEvTabletStateResponse>> nodesData; + for (const auto& [nodeId, nodeBlob] : nodesBlob) { + THolder<TEvWhiteboard::TEvTabletStateResponse> nodeData = MakeHolder<TEvWhiteboard::TEvTabletStateResponse>(); + bool res = nodeData->Record.ParseFromString(nodesBlob[nodeId]); + Y_UNUSED(res); + nodesData[nodeId] = std::move(nodeData); } + THolder<TEvWhiteboard::TEvTabletStateResponse> result = MergeWhiteboardResponses(nodesData); + Ctest << "Merge = " << timer.Passed() << Endl; + UNIT_ASSERT_LT(timer.Passed(), 30); + UNIT_ASSERT_VALUES_EQUAL(result->Record.TabletStateInfoSize(), 10000); + timer.Reset(); } - Ctest << "Data has built" << Endl; + Ctest << "Destroy = " << timer.Passed() << Endl; + } + + Y_UNIT_TEST(TabletMergingPacked) { THPTimer timer; - THolder<TEvWhiteboard::TEvTabletStateResponse> result = MergeWhiteboardResponses(nodesData); - Ctest << "Merge = " << timer.Passed() << Endl; - UNIT_ASSERT_LT(timer.Passed(), 10); - UNIT_ASSERT_VALUES_EQUAL(result->Record.TabletStateInfoSize(), 10000); - Ctest << "Data has merged" << Endl; + { + TMap<ui32, TString> nodesBlob; + timer.Reset(); + for (ui32 nodeId = 1; nodeId <= 10000; ++nodeId) { + THolder<TEvWhiteboard::TEvTabletStateResponse> nodeData = MakeHolder<TEvWhiteboard::TEvTabletStateResponse>(); + auto* tabletData = nodeData->AllocatePackedResponse(10000); + for (ui32 tabletId = 1; tabletId <= 10000; ++tabletId) { + tabletData->TabletId = tabletId; + tabletData->FollowerId = 0; + tabletData->Generation = 13; + //tabletData->SetChangeTime(TInstant::Now().MilliSeconds()); + ++tabletData; + } + nodesBlob[nodeId] = nodeData->Record.SerializeAsString(); + } + Ctest << "Build = " << timer.Passed() << Endl; + TMap<ui32, THolder<TEvWhiteboard::TEvTabletStateResponse>> nodesData; + for (const auto& [nodeId, nodeBlob] : nodesBlob) { + THolder<TEvWhiteboard::TEvTabletStateResponse> nodeData = MakeHolder<TEvWhiteboard::TEvTabletStateResponse>(); + bool res = nodeData->Record.ParseFromString(nodesBlob[nodeId]); + Y_UNUSED(res); + nodesData[nodeId] = std::move(nodeData); + } + THolder<TEvWhiteboard::TEvTabletStateResponse> result = MergeWhiteboardResponses(nodesData); + Ctest << "Merge = " << timer.Passed() << Endl; + UNIT_ASSERT_LT(timer.Passed(), 10); + UNIT_ASSERT_VALUES_EQUAL(result->Record.TabletStateInfoSize(), 10000); + timer.Reset(); + } + Ctest << "Destroy = " << timer.Passed() << Endl; } Y_UNIT_TEST(VDiskMerging) { diff --git a/ydb/core/viewer/wb_filter.h b/ydb/core/viewer/wb_filter.h index 99c279b4fa..a09ec3571f 100644 --- a/ydb/core/viewer/wb_filter.h +++ b/ydb/core/viewer/wb_filter.h @@ -230,10 +230,10 @@ public: static THolder<ResponseType> FilterResponse(THolder<TResponseType>& source, const TVector<THolder<IFieldProtoFilter>>& filters) { THolder<TResponseType> result = MakeHolder<TResponseType>(); - auto* field = TWhiteboardInfo<ResponseType>::GetElementsField(result.Get()); - auto* sourceField = TWhiteboardInfo<ResponseType>::GetElementsField(source.Get()); - field->Reserve(sourceField->size()); - for (TElementType& info : *sourceField) { + auto& field = TWhiteboardInfo<ResponseType>::GetElementsField(result.Get()); + auto& sourceField = TWhiteboardInfo<ResponseType>::GetElementsField(source.Get()); + field.Reserve(sourceField.size()); + for (TElementType& info : sourceField) { size_t cnt = 0; for (const THolder<IFieldProtoFilter>& filter : filters) { if (!filter->CheckFilter(info)) @@ -242,7 +242,7 @@ public: } if (cnt == filters.size()) { // TODO: swap already allocated element of repeatedptr field - auto* element = field->Add(); + auto* element = field.Add(); element->Swap(&info); } } diff --git a/ydb/core/viewer/wb_group.h b/ydb/core/viewer/wb_group.h index 9735d8f29e..32357ed99f 100644 --- a/ydb/core/viewer/wb_group.h +++ b/ydb/core/viewer/wb_group.h @@ -244,22 +244,22 @@ public: static THolder<ResponseType> GroupResponse(THolder<TResponseType>& source, const TVector<const FieldDescriptor*>& groupFields, bool allEnums = false) { THolder<TResponseType> result = MakeHolder<TResponseType>(); - TElementsFieldType* field = TWhiteboardInfo<ResponseType>::GetElementsField(result.Get()); + TElementsFieldType& field = TWhiteboardInfo<ResponseType>::GetElementsField(result.Get()); bool allKeys = allEnums && IsEnum(groupFields); TMap<TPartProtoKey, ui32> counters; TMap<TPartProtoKey, TElementType*> elements; if (allKeys) { TPartProtoKeyEnum keyEnum(groupFields); do { - auto* element = field->Add(); + auto* element = field.Add(); TPartProtoKey key(*element, groupFields); key = keyEnum; element->SetCount(0); elements.emplace(key, element); } while (++keyEnum); } - auto* sourceField = TWhiteboardInfo<ResponseType>::GetElementsField(source.Get()); - for (TElementType& info : *sourceField) { + auto& sourceField = TWhiteboardInfo<ResponseType>::GetElementsField(source.Get()); + for (TElementType& info : sourceField) { TPartProtoKey key(info, groupFields); if (key.Exists()) { counters[key]++; @@ -270,7 +270,7 @@ public: if (allKeys) { elements[pr.first]->SetCount(pr.second); } else { - auto* element = field->Add(); + auto* element = field.Add(); TPartProtoKey(*element, groupFields) = pr.first; element->SetCount(pr.second); } diff --git a/ydb/core/viewer/wb_merge.h b/ydb/core/viewer/wb_merge.h index ca8e269e7c..40a5b247c7 100644 --- a/ydb/core/viewer/wb_merge.h +++ b/ydb/core/viewer/wb_merge.h @@ -74,6 +74,16 @@ public: static void ProtoMerge(::google::protobuf::Message& protoTo, const ::google::protobuf::Message& protoFrom); }; +template<typename ResponseType> +struct TStaticMergeKey { + using KeyType = typename TWhiteboardInfo<ResponseType>::TElementKeyType; + + template<typename ElementType> + KeyType GetKey(const ElementType& info) const { + return TWhiteboardInfo<ResponseType>::GetElementKey(info); + } +}; + template <typename ResponseType> class TWhiteboardMerger : public TWhiteboardMergerBase { public: @@ -138,16 +148,106 @@ public: } }; - template <typename ElementKeyType> - struct TStaticMergeKey { - using KeyType = ElementKeyType; + template<typename MergeKey> + static THolder<TResponseType> MergeResponsesBaseHybrid(TMap<ui32, THolder<TResponseType>>& responses, const MergeKey& mergeKey) { + using TElementType = typename TWhiteboardInfo<ResponseType>::TElementType; + using TElementTypePacked5 = typename TWhiteboardInfo<ResponseType>::TElementTypePacked5; + + std::unordered_map<typename MergeKey::KeyType, TElementType*> mergedData; + + struct TPackedDataCtx { + const TElementTypePacked5* Element; + ui32 NodeId; + }; + + std::unordered_map<typename MergeKey::KeyType, TPackedDataCtx> mergedDataPacked5; - ElementKeyType GetKey(TElementType& info) const { - return TWhiteboardInfo<ResponseType>::GetElementKey(info); + size_t projectedSize = 0; + for (auto it = responses.begin(); it != responses.end(); ++it) { + if (it->second != nullptr) { + projectedSize += TWhiteboardInfo<ResponseType>::GetElementsCount(it->second.Get()); + } } - }; + mergedData.reserve(projectedSize); + mergedDataPacked5.reserve(projectedSize); + + ui64 minResponseTime = 0; + ui64 maxResponseDuration = 0; + ui64 sumProcessDuration = 0; + + for (auto it = responses.begin(); it != responses.end(); ++it) { + if (it->second != nullptr) { + { + TWhiteboardMergerComparator<TElementType> comparator; + auto& stateInfo = TWhiteboardInfo<ResponseType>::GetElementsField(it->second.Get()); + for (TElementType& info : stateInfo) { + if (!info.HasNodeId()) { + info.SetNodeId(it->first); + } + auto key = mergeKey.GetKey(info); + auto inserted = mergedData.emplace(key, &info); + if (!inserted.second) { + if (comparator(*inserted.first->second, info)) { + inserted.first->second = &info; + } + } + } + } + { + TWhiteboardMergerComparator<TElementTypePacked5> comparator; + auto stateInfo = TWhiteboardInfo<ResponseType>::GetElementsFieldPacked5(it->second.Get()); + for (auto& info : stateInfo) { + auto key = mergeKey.GetKey(info); + auto inserted = mergedDataPacked5.emplace(key, TPackedDataCtx{ + .Element = &info, + .NodeId = it->first + }); + if (!inserted.second) { + if (comparator(*inserted.first->second.Element, info)) { + inserted.first->second = { + .Element = &info, + .NodeId = it->first + }; + } + } + } + } + if (minResponseTime == 0 || it->second->Record.GetResponseTime() < minResponseTime) { + minResponseTime = it->second->Record.GetResponseTime(); + } + if (maxResponseDuration == 0 || it->second->Record.GetResponseDuration() > maxResponseDuration) { + maxResponseDuration = it->second->Record.GetResponseDuration(); + } + sumProcessDuration += it->second->Record.GetProcessDuration(); + } + } + + THolder<TResponseType> result = MakeHolder<TResponseType>(); + auto& field = TWhiteboardInfo<ResponseType>::GetElementsField(result.Get()); + field.Reserve(mergedData.size() + mergedDataPacked5.size()); + for (auto it = mergedDataPacked5.begin(); it != mergedDataPacked5.end(); ++it) { + auto* element = field.Add(); + it->second.Element->Fill(*element); + element->SetNodeId(it->second.NodeId); + mergedData.erase(it->first); + } + for (auto it = mergedData.begin(); it != mergedData.end(); ++it) { + auto* element = field.Add(); + element->Swap(it->second); + } + if (minResponseTime) { + result->Record.SetResponseTime(minResponseTime); + } + if (maxResponseDuration) { + result->Record.SetResponseDuration(maxResponseDuration); + } + if (sumProcessDuration) { + result->Record.SetProcessDuration(sumProcessDuration); + } + return result; + } - template <typename MergeKey> + template<typename MergeKey> static THolder<TResponseType> MergeResponsesBase(TMap<ui32, THolder<TResponseType>>& responses, const MergeKey& mergeKey) { std::unordered_map<typename MergeKey::KeyType, TElementType*> mergedData; ui64 minResponseTime = 0; @@ -156,8 +256,8 @@ public: TWhiteboardMergerComparator<TElementType> comparator; for (auto it = responses.begin(); it != responses.end(); ++it) { if (it->second != nullptr) { - auto* stateInfo = TWhiteboardInfo<ResponseType>::GetElementsField(it->second.Get()); - for (TElementType& info : *stateInfo) { + auto& stateInfo = TWhiteboardInfo<ResponseType>::GetElementsField(it->second.Get()); + for (TElementType& info : stateInfo) { if (!info.HasNodeId()) { info.SetNodeId(it->first); } @@ -180,10 +280,10 @@ public: } THolder<TResponseType> result = MakeHolder<TResponseType>(); - auto* field = TWhiteboardInfo<ResponseType>::GetElementsField(result.Get()); - field->Reserve(mergedData.size()); + auto& field = TWhiteboardInfo<ResponseType>::GetElementsField(result.Get()); + field.Reserve(mergedData.size()); for (auto it = mergedData.begin(); it != mergedData.end(); ++it) { - auto* element = field->Add(); + auto* element = field.Add(); element->Swap(it->second); } if (minResponseTime) { @@ -199,7 +299,7 @@ public: } static THolder<TResponseType> MergeResponsesElementKey(TMap<ui32, THolder<TResponseType>>& responses) { - TStaticMergeKey<typename TWhiteboardInfo<ResponseType>::TElementKeyType> mergeKey; + TStaticMergeKey<ResponseType> mergeKey; return MergeResponsesBase(responses, mergeKey); } diff --git a/ydb/core/ydb_convert/table_description.cpp b/ydb/core/ydb_convert/table_description.cpp index 16ce535573..5b6eea3bd7 100644 --- a/ydb/core/ydb_convert/table_description.cpp +++ b/ydb/core/ydb_convert/table_description.cpp @@ -371,6 +371,7 @@ void FillChangefeedDescription(Ydb::Table::DescribeTableResult& out, changefeed->set_name(stream.GetName()); changefeed->set_state(Ydb::Table::ChangefeedDescription::STATE_ENABLED); + changefeed->set_virtual_timestamps(stream.GetVirtualTimestamps()); switch (stream.GetMode()) { case NKikimrSchemeOp::ECdcStreamMode::ECdcStreamModeKeysOnly: @@ -398,6 +399,7 @@ bool FillChangefeedDescription(NKikimrSchemeOp::TCdcStreamDescription& out, const Ydb::Table::Changefeed& in, Ydb::StatusIds::StatusCode& status, TString& error) { out.SetName(in.name()); + out.SetVirtualTimestamps(in.virtual_timestamps()); switch (in.mode()) { case Ydb::Table::ChangefeedMode::MODE_KEYS_ONLY: diff --git a/ydb/core/ydb_convert/ydb_convert.cpp b/ydb/core/ydb_convert/ydb_convert.cpp index ccd6828e9c..4b35c7b47e 100644 --- a/ydb/core/ydb_convert/ydb_convert.cpp +++ b/ydb/core/ydb_convert/ydb_convert.cpp @@ -795,6 +795,10 @@ void ConvertDirectoryEntry(const NKikimrSchemeOp::TDirEntry& from, Ydb::Scheme:: to->set_type(static_cast<Ydb::Scheme::Entry::Type>(from.GetPathType())); } + auto& timestamp = *to->mutable_created_at(); + timestamp.set_plan_step(from.GetCreateStep()); + timestamp.set_tx_id(from.GetCreateTxId()); + if (processAcl) { const bool isDir = from.GetPathType() == NKikimrSchemeOp::EPathTypeDir; ConvertAclToYdb(from.GetOwner(), from.GetEffectiveACL(), isDir, to->mutable_effective_permissions()); diff --git a/ydb/core/ymq/actor/queue_leader.cpp b/ydb/core/ymq/actor/queue_leader.cpp index e042d2ea6c..0b8d330405 100644 --- a/ydb/core/ymq/actor/queue_leader.cpp +++ b/ydb/core/ymq/actor/queue_leader.cpp @@ -636,62 +636,73 @@ void TQueueLeader::ReadFifoMessages(TReceiveMessageBatchRequestProcessing& reqIn builder.Start(); } +void TQueueLeader::OnFifoMessagesReadSuccess(const NKikimr::NClient::TValue& value, TReceiveMessageBatchRequestProcessing& reqInfo) { + const NKikimr::NClient::TValue list(value["result"]); + + if (const ui64 movedMessagesCount = value["movedMessagesCount"]) { + ADD_COUNTER(Counters_, MessagesMovedToDLQ, movedMessagesCount); + + const i64 newMessagesCount = value["newMessagesCount"]; + Y_VERIFY(newMessagesCount >= 0); + auto& shardInfo = Shards_[0]; + shardInfo.MessagesCount = static_cast<ui64>(newMessagesCount); + } + + reqInfo.Answer->Messages.resize(list.Size()); + for (size_t i = 0; i < list.Size(); ++i) { + const NKikimr::NClient::TValue& data = list[i]["SourceDataFieldsRead"]; + const NKikimr::NClient::TValue& msg = list[i]["SourceMessageFieldsRead"]; + const ui64 receiveTimestamp = msg["FirstReceiveTimestamp"]; + auto& msgAnswer = reqInfo.Answer->Messages[i]; + + msgAnswer.FirstReceiveTimestamp = (receiveTimestamp ? TInstant::MilliSeconds(receiveTimestamp) : reqInfo.LockSendTs); + msgAnswer.ReceiveCount = ui32(msg["ReceiveCount"]) + 1; // since the query returns old receive count value + msgAnswer.MessageId = data["MessageId"]; + msgAnswer.MessageDeduplicationId = data["DedupId"]; + msgAnswer.MessageGroupId = msg["GroupId"]; + msgAnswer.Data = data["Data"]; + msgAnswer.SentTimestamp = TInstant::MilliSeconds(ui64(msg["SentTimestamp"])); + msgAnswer.SequenceNumber = msg["Offset"]; + + msgAnswer.ReceiptHandle.SetMessageGroupId(TString(msg["GroupId"])); + msgAnswer.ReceiptHandle.SetOffset(msgAnswer.SequenceNumber); + msgAnswer.ReceiptHandle.SetReceiveRequestAttemptId(reqInfo.Event->Get()->ReceiveAttemptId); + msgAnswer.ReceiptHandle.SetLockTimestamp(reqInfo.LockSendTs.MilliSeconds()); + msgAnswer.ReceiptHandle.SetShard(0); + + const NKikimr::NClient::TValue senderIdValue = data["SenderId"]; + if (senderIdValue.HaveValue()) { + if (const TString senderId = TString(senderIdValue)) { + msgAnswer.SenderId = senderId; + } + } + + const NKikimr::NClient::TValue attributesValue = data["Attributes"]; + if (attributesValue.HaveValue()) { + msgAnswer.MessageAttributes = attributesValue; + } + } +} + void TQueueLeader::OnFifoMessagesRead(const TString& requestId, const TSqsEvents::TEvExecuted::TRecord& ev, const bool usedDLQ) { auto reqInfoIt = ReceiveMessageRequests_.find(requestId); Y_VERIFY(reqInfoIt != ReceiveMessageRequests_.end()); auto& reqInfo = reqInfoIt->second; + bool dlqExists = true; + bool success = false; if (ev.GetStatus() == TEvTxUserProxy::TEvProposeTransactionStatus::EStatus::ExecComplete) { - using NKikimr::NClient::TValue; - const TValue value(TValue::Create(ev.GetExecutionEngineEvaluatedResponse())); - const TValue list(value["result"]); - - if (const ui64 movedMessagesCount = value["movedMessagesCount"]) { - ADD_COUNTER(Counters_, MessagesMovedToDLQ, movedMessagesCount); - - const i64 newMessagesCount = value["newMessagesCount"]; - Y_VERIFY(newMessagesCount >= 0); - auto& shardInfo = Shards_[0]; - shardInfo.MessagesCount = static_cast<ui64>(newMessagesCount); + const NKikimr::NClient::TValue value = NKikimr::NClient::TValue::Create(ev.GetExecutionEngineEvaluatedResponse()); + dlqExists = value["dlqExists"]; + if (dlqExists) { + success = true; + OnFifoMessagesReadSuccess(value, reqInfo); } + } - reqInfo.Answer->Messages.resize(list.Size()); - for (size_t i = 0; i < list.Size(); ++i) { - const TValue& data = list[i]["SourceDataFieldsRead"]; - const TValue& msg = list[i]["SourceMessageFieldsRead"]; - const ui64 receiveTimestamp = msg["FirstReceiveTimestamp"]; - auto& msgAnswer = reqInfo.Answer->Messages[i]; - - msgAnswer.FirstReceiveTimestamp = (receiveTimestamp ? TInstant::MilliSeconds(receiveTimestamp) : reqInfo.LockSendTs); - msgAnswer.ReceiveCount = ui32(msg["ReceiveCount"]) + 1; // since the query returns old receive count value - msgAnswer.MessageId = data["MessageId"]; - msgAnswer.MessageDeduplicationId = data["DedupId"]; - msgAnswer.MessageGroupId = msg["GroupId"]; - msgAnswer.Data = data["Data"]; - msgAnswer.SentTimestamp = TInstant::MilliSeconds(ui64(msg["SentTimestamp"])); - msgAnswer.SequenceNumber = msg["Offset"]; - - msgAnswer.ReceiptHandle.SetMessageGroupId(TString(msg["GroupId"])); - msgAnswer.ReceiptHandle.SetOffset(msgAnswer.SequenceNumber); - msgAnswer.ReceiptHandle.SetReceiveRequestAttemptId(reqInfo.Event->Get()->ReceiveAttemptId); - msgAnswer.ReceiptHandle.SetLockTimestamp(reqInfo.LockSendTs.MilliSeconds()); - msgAnswer.ReceiptHandle.SetShard(0); - - const TValue senderIdValue = data["SenderId"]; - if (senderIdValue.HaveValue()) { - if (const TString senderId = TString(senderIdValue)) { - msgAnswer.SenderId = senderId; - } - } - - const TValue attributesValue = data["Attributes"]; - if (attributesValue.HaveValue()) { - msgAnswer.MessageAttributes = attributesValue; - } - } - } else { + if (!success) { const auto errStatus = NKikimr::NTxProxy::TResultStatus::EStatus(ev.GetStatus()); - if (usedDLQ && !NTxProxy::TResultStatus::IsSoftErrorWithoutSideEffects(errStatus)) { + if (usedDLQ && (!dlqExists || !NTxProxy::TResultStatus::IsSoftErrorWithoutSideEffects(errStatus))) { // it's possible that DLQ was removed, hence it'd be wise to refresh corresponding info DlqInfo_.Clear(); reqInfo.Answer->Failed = false; @@ -735,13 +746,13 @@ void TQueueLeader::LoadStdMessages(TReceiveMessageBatchRequestProcessing& reqInf } } -void TQueueLeader::OnLoadStdMessageResult(const TString& requestId, const ui64 offset, const TSqsEvents::TEvExecuted::TRecord& ev, const NKikimr::NClient::TValue* messageRecord, const bool ignoreMessageLoadingErrors) { +void TQueueLeader::OnLoadStdMessageResult(const TString& requestId, const ui64 offset, bool success, const NKikimr::NClient::TValue* messageRecord, const bool ignoreMessageLoadingErrors) { auto reqInfoIt = ReceiveMessageRequests_.find(requestId); Y_VERIFY(reqInfoIt != ReceiveMessageRequests_.end()); auto& reqInfo = reqInfoIt->second; --reqInfo.LoadAnswersLeft; - if (ev.GetStatus() == TEvTxUserProxy::TEvProposeTransactionStatus::EStatus::ExecComplete) { + if (success) { bool deleted = true; bool deadlineChanged = true; const bool exists = (*messageRecord)["Exists"]; @@ -820,6 +831,33 @@ void TQueueLeader::OnLoadStdMessageResult(const TString& requestId, const ui64 o } } +void TQueueLeader::OnLoadStdMessagesBatchSuccess(const NKikimr::NClient::TValue& value, TShardInfo& shardInfo, TIntrusivePtr<TLoadBatch> batch) { + const NKikimr::NClient::TValue list(value["result"]); + Y_VERIFY(list.Size() == batch->Size()); + + if (const ui64 movedMessagesCount = value["movedMessagesCount"]) { + ADD_COUNTER(Counters_, MessagesMovedToDLQ, movedMessagesCount); + + const i64 newMessagesCount = value["newMessagesCount"]; + Y_VERIFY(newMessagesCount >= 0); + shardInfo.MessagesCount = static_cast<ui64>(newMessagesCount); + } + + THashMap<ui64, const TLoadBatchEntry*> offset2entry; + offset2entry.reserve(batch->Entries.size()); + for (const TLoadBatchEntry& entry : batch->Entries) { + offset2entry.emplace(entry.Offset, &entry); + } + + for (size_t i = 0; i < list.Size(); ++i) { + auto msg = list[i]; + const ui64 offset = msg["Offset"]; + const auto entry = offset2entry.find(offset); + Y_VERIFY(entry != offset2entry.end()); + OnLoadStdMessageResult(entry->second->RequestId, offset, true, &msg, false); + } +} + void TQueueLeader::OnLoadStdMessagesBatchExecuted(ui64 shard, ui64 batchId, const bool usedDLQ, const TSqsEvents::TEvExecuted::TRecord& reply) { auto& shardInfo = Shards_[shard]; auto& batchingState = shardInfo.LoadBatchingState; @@ -827,37 +865,22 @@ void TQueueLeader::OnLoadStdMessagesBatchExecuted(ui64 shard, ui64 batchId, cons Y_VERIFY(batchIt != batchingState.BatchesExecuting.end()); auto batch = batchIt->second; auto status = TEvTxUserProxy::TEvProposeTransactionStatus::EStatus(reply.GetStatus()); - bool ignoreMessageLoadingErrors = false; - if (status == TEvTxUserProxy::TEvProposeTransactionStatus::EStatus::ExecComplete) { - using NKikimr::NClient::TValue; - const TValue value(TValue::Create(reply.GetExecutionEngineEvaluatedResponse())); - const TValue list(value["result"]); - Y_VERIFY(list.Size() == batch->Size()); - - if (const ui64 movedMessagesCount = value["movedMessagesCount"]) { - ADD_COUNTER(Counters_, MessagesMovedToDLQ, movedMessagesCount); - const i64 newMessagesCount = value["newMessagesCount"]; - Y_VERIFY(newMessagesCount >= 0); - shardInfo.MessagesCount = static_cast<ui64>(newMessagesCount); - } - - THashMap<ui64, const TLoadBatchEntry*> offset2entry; - offset2entry.reserve(batch->Entries.size()); - for (const TLoadBatchEntry& entry : batch->Entries) { - offset2entry.emplace(entry.Offset, &entry); + bool dlqExists = true; + bool success = false; + if (status == TEvTxUserProxy::TEvProposeTransactionStatus::EStatus::ExecComplete) { + const NKikimr::NClient::TValue value = NKikimr::NClient::TValue::Create(reply.GetExecutionEngineEvaluatedResponse()); + dlqExists = value["dlqExists"]; + if (dlqExists) { + success = true; + OnLoadStdMessagesBatchSuccess(value, shardInfo, batch); } + } - for (size_t i = 0; i < list.Size(); ++i) { - auto msg = list[i]; - const ui64 offset = msg["Offset"]; - const auto entry = offset2entry.find(offset); - Y_VERIFY(entry != offset2entry.end()); - OnLoadStdMessageResult(entry->second->RequestId, offset, reply, &msg, ignoreMessageLoadingErrors); - } - } else { + if (!success) { const auto errStatus = NKikimr::NTxProxy::TResultStatus::EStatus(reply.GetStatus()); - if (usedDLQ && !NTxProxy::TResultStatus::IsSoftErrorWithoutSideEffects(errStatus)) { + bool ignoreMessageLoadingErrors = false; + if (usedDLQ && (!dlqExists || !NTxProxy::TResultStatus::IsSoftErrorWithoutSideEffects(errStatus))) { // it's possible that DLQ was removed, hence it'd be wise to refresh corresponding info DlqInfo_.Clear(); ignoreMessageLoadingErrors = true; @@ -868,9 +891,11 @@ void TQueueLeader::OnLoadStdMessagesBatchExecuted(ui64 shard, ui64 batchId, cons const TLoadBatchEntry& entry = batch->Entries[i]; if (!prevRequestId || *prevRequestId != entry.RequestId) { prevRequestId = &entry.RequestId; - RLOG_SQS_REQ_ERROR(entry.RequestId, "Batch transaction failed: " << reply << ". BatchId: " << batch->BatchId); + RLOG_SQS_REQ_ERROR(entry.RequestId, + "Batch transaction failed: " << reply << ". DlqExists=" << dlqExists << ". BatchId: " << batch->BatchId + ); } - OnLoadStdMessageResult(entry.RequestId, entry.Offset, reply, nullptr, ignoreMessageLoadingErrors); + OnLoadStdMessageResult(entry.RequestId, entry.Offset, success, nullptr, ignoreMessageLoadingErrors); } } batchingState.BatchesExecuting.erase(batchId); diff --git a/ydb/core/ymq/actor/queue_leader.h b/ydb/core/ymq/actor/queue_leader.h index 323ba22341..2af303776b 100644 --- a/ydb/core/ymq/actor/queue_leader.h +++ b/ydb/core/ymq/actor/queue_leader.h @@ -28,6 +28,8 @@ class TQueueLeader : public TActorBootstrapped<TQueueLeader> { struct TDeleteMessageBatchRequestProcessing; struct TChangeMessageVisibilityBatchRequestProcessing; struct TGetRuntimeQueueAttributesRequestProcessing; + struct TShardInfo; + struct TLoadBatch; public: TQueueLeader(TString userName, TString queueName, TString folderId, TString rootUrl, TIntrusivePtr<TQueueCounters> counters, TIntrusivePtr<TUserCounters> userCounters, const TActorId& schemeCache, const TIntrusivePtr<TSqsEvents::TQuoterResourcesForActions>& quoterResourcesForUser); @@ -115,15 +117,17 @@ private: void LockFifoGroup(TReceiveMessageBatchRequestProcessing& reqInfo); void OnFifoGroupLocked(const TString& requestId, const TSqsEvents::TEvExecuted::TRecord& ev); void ReadFifoMessages(TReceiveMessageBatchRequestProcessing& reqInfo); + void OnFifoMessagesReadSuccess(const NKikimr::NClient::TValue& value, TReceiveMessageBatchRequestProcessing& reqInfo); void OnFifoMessagesRead(const TString& requestId, const TSqsEvents::TEvExecuted::TRecord& ev, bool usedDLQ); void GetMessagesFromInfly(TReceiveMessageBatchRequestProcessing& reqInfo); void LoadStdMessages(TReceiveMessageBatchRequestProcessing& reqInfo); - void OnLoadStdMessageResult(const TString& requestId, ui64 offset, const TSqsEvents::TEvExecuted::TRecord& ev, const NKikimr::NClient::TValue* messageRecord, bool ignoreMessageLoadingErrors); + void OnLoadStdMessageResult(const TString& requestId, ui64 offset, bool success, const NKikimr::NClient::TValue* messageRecord, bool ignoreMessageLoadingErrors); void TryReceiveAnotherShard(TReceiveMessageBatchRequestProcessing& reqInfo); void WaitAddMessagesToInflyOrTryAnotherShard(TReceiveMessageBatchRequestProcessing& reqInfo); void Reply(TReceiveMessageBatchRequestProcessing& reqInfo); // batching + void OnLoadStdMessagesBatchSuccess(const NKikimr::NClient::TValue& value, TShardInfo& shardInfo, TIntrusivePtr<TLoadBatch> batch); void OnLoadStdMessagesBatchExecuted(ui64 shard, ui64 batchId, const bool usedDLQ, const TSqsEvents::TEvExecuted::TRecord& reply); // delete diff --git a/ydb/core/ymq/actor/queue_schema.cpp b/ydb/core/ymq/actor/queue_schema.cpp index b25b7bde05..b7b81610ae 100644 --- a/ydb/core/ymq/actor/queue_schema.cpp +++ b/ydb/core/ymq/actor/queue_schema.cpp @@ -450,7 +450,7 @@ void TCreateQueueSchemaActorV2::RequestTablesFormatSettings(const TString& accou auto* trans = ev->Record.MutableTransaction()->MutableMiniKQLTransaction(); TParameters(trans->MutableParams()->MutableProto()) .Utf8("USER_NAME", accountName) - .Uint32("DEFAULT_TABLES_FORMAT", 0); + .Uint32("DEFAULT_TABLES_FORMAT", 1); Register(new TMiniKqlExecutionActor(SelfId(), RequestId_, std::move(ev), false, QueuePath_, GetTransactionCounters(UserCounters_))); } diff --git a/ydb/core/ymq/queues/common/db_queries_maker.cpp b/ydb/core/ymq/queues/common/db_queries_maker.cpp index abaedf5039..87964e7ce3 100644 --- a/ydb/core/ymq/queues/common/db_queries_maker.cpp +++ b/ydb/core/ymq/queues/common/db_queries_maker.cpp @@ -27,7 +27,7 @@ namespace NKikimr::NSQS { }
const char* TDbQueriesMaker::GetDlqStateKeys() const {
- if (TablesFormat_ == 0) {
+ if (DlqTablesFormat_ == 0) {
return IsFifo_ ? "'('State (Uint64 '0))" : "'('State dlqShard)";
}
return IsFifo_ ? GetDlqIdKeys() : DLQ_STD_STATE_KEYS;
diff --git a/ydb/core/ymq/queues/common/db_queries_maker.h b/ydb/core/ymq/queues/common/db_queries_maker.h index 8597ad058d..06dc79604b 100644 --- a/ydb/core/ymq/queues/common/db_queries_maker.h +++ b/ydb/core/ymq/queues/common/db_queries_maker.h @@ -104,7 +104,7 @@ private: return TablesFormat_ == 1 ? QUEUE_ID_AND_SHARD_KEYS : "";
}
const char* GetDlqIdAndShardKeys() const {
- return TablesFormat_ == 1 ? DLQ_ID_AND_SHARD_KEYS : "";
+ return DlqTablesFormat_ == 1 ? DLQ_ID_AND_SHARD_KEYS : "";
}
const char* GetShardColumnName() const {
return TablesFormat_ == 1 ? "Shard" : "State";
diff --git a/ydb/core/ymq/queues/fifo/queries.cpp b/ydb/core/ymq/queues/fifo/queries.cpp index 0d5c5239e2..ecc24b93eb 100644 --- a/ydb/core/ymq/queues/fifo/queries.cpp +++ b/ydb/core/ymq/queues/fifo/queries.cpp @@ -693,6 +693,7 @@ const char* const ReadMessageQuery = R"__( ))))) (return (Extend + (AsList (SetResult 'dlqExists (Bool 'true))) (AsList (SetResult 'result result)) (AsList (SetResult 'movedMessagesCount (Uint64 '0))) @@ -826,6 +827,7 @@ const char* const ReadOrRedriveMessageQuery = R"__( 'WriteOffset 'LastModifiedTimestamp)) (let dlqStateRead (SelectRow dlqStateTable dlqStateRow dlqStateSelect)) + (let dlqExists (Exists dlqStateRead)) (let dlqSentTimestamp (Max now (Member dlqStateRead 'LastModifiedTimestamp))) (let dlqStartOffset (Add (Member dlqStateRead 'WriteOffset) (Uint64 '1))) @@ -860,14 +862,15 @@ const char* const ReadOrRedriveMessageQuery = R"__( '('MessageCount newSourceMsgCount))) (return (Extend + (AsList (SetResult 'dlqExists dlqExists)) (AsList (SetResult 'result messagesToReturnAsStruct)) (AsList (SetResult 'movedMessagesCount (Length messagesToMoveAsStruct))) (AsList (SetResult 'newMessagesCount newSourceMsgCount)) - (ListIf (HasItems messagesToMoveAsStruct) (UpdateRow dlqStateTable dlqStateRow dlqStateUpdate)) - (ListIf (HasItems messagesToMoveAsStruct) (UpdateRow sourceStateTable sourceStateRow sourceStateUpdate)) + (ListIf (And (HasItems messagesToMoveAsStruct) dlqExists) (UpdateRow dlqStateTable dlqStateRow dlqStateUpdate)) + (ListIf (And (HasItems messagesToMoveAsStruct) dlqExists) (UpdateRow sourceStateTable sourceStateRow sourceStateUpdate)) # copy messages to dlq - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let dlqDataRow '( )__" DLQ_ID_KEYS_PARAM R"__( '('RandomId randomId) @@ -879,8 +882,9 @@ const char* const ReadOrRedriveMessageQuery = R"__( '('SenderId (Member (Nth item '1) 'SenderId)) '('MessageId (Member (Nth item '1) 'MessageId)))) (return (UpdateRow dlqDataTable dlqDataRow dlqDataUpdate)))))) + (AsList (Void))) - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let dlqMsgRow '( )__" DLQ_ID_KEYS_PARAM R"__( '('Offset (Nth item '0)))) @@ -893,8 +897,9 @@ const char* const ReadOrRedriveMessageQuery = R"__( '('FirstReceiveTimestamp (Uint64 '0)) '('SentTimestamp dlqSentTimestamp))) (return (UpdateRow dlqMsgTable dlqMsgRow dlqMessageUpdate)))))) + (AsList (Void))) - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let dlqSentTsRow '( )__" DLQ_ID_KEYS_PARAM R"__( '('SentTimestamp dlqSentTimestamp) @@ -906,8 +911,9 @@ const char* const ReadOrRedriveMessageQuery = R"__( '('DelayDeadline delayDeadline) '('GroupId (Member (Nth item '1) 'GroupId)))) (return (UpdateRow dlqSentTsIdx dlqSentTsRow dlqSentTsUpdate)))))) + (AsList (Void))) - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let dlqGroupRow '( )__" DLQ_ID_KEYS_PARAM R"__( '('GroupId (Member (Nth item '1) 'GroupId)))) @@ -930,8 +936,9 @@ const char* const ReadOrRedriveMessageQuery = R"__( (UpdateRow dlqGroupTable dlqGroupRow dlqGroupUpdate) ) ))))) + (AsList (Void))) - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let dlqTail (Member (Nth item '1) 'DlqTail)) (let dlqPrevMessageRow '( )__" DLQ_ID_KEYS_PARAM R"__( @@ -944,29 +951,33 @@ const char* const ReadOrRedriveMessageQuery = R"__( (UpdateRow dlqMsgTable dlqPrevMessageRow dlqPrevMessageUpdate) (Void)) ))))) + (AsList (Void))) # remove dead letters' content from source queue - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let row '( )__" QUEUE_ID_KEYS_PARAM R"__( '('RandomId (Member (Nth item '1) 'SourceRandomId)) '('Offset (Member (Nth item '1) 'SourceOffset)))) (return (EraseRow sourceDataTable row)))))) + (AsList (Void))) - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let row '( )__" QUEUE_ID_KEYS_PARAM R"__( '('Offset (Member (Nth item '1) 'SourceOffset)))) (return (EraseRow sourceMsgTable row)))))) + (AsList (Void))) - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let row '( )__" QUEUE_ID_KEYS_PARAM R"__( '('SentTimestamp (Member (Nth item '1) 'SourceSentTimestamp)) '('Offset (Member (Nth item '1) 'SourceOffset)))) (return (EraseRow sourceSentTsIdx row)))))) + (AsList (Void))) - (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( + (If dlqExists (Map dlqMessagesInfoWithProperIndexesSorted (lambda '(item) (block '( (let row '( )__" QUEUE_ID_KEYS_PARAM R"__( '('GroupId (Member (Nth item '1) 'GroupId)))) @@ -975,14 +986,14 @@ const char* const ReadOrRedriveMessageQuery = R"__( '('Head (Member (Nth item '1) 'SourceNextOffset)) '('LockTimestamp (Uint64 '0)) '('VisibilityDeadline (Uint64 '0)))) - (return (If (Coalesce (Equal (Member (Nth item '1) 'SourceNextOffset) (Uint64 '0)) (Bool 'false)) (EraseRow sourceGroupTable row) (UpdateRow sourceGroupTable row update))))))) + (AsList (Void))) # just return ordinary messages - (Map messagesToReturnAsStruct (lambda '(item) (block '( + (If dlqExists (Map messagesToReturnAsStruct (lambda '(item) (block '( (let message (Member item 'SourceMessageFieldsRead)) (let row '( )__" QUEUE_ID_KEYS_PARAM R"__( @@ -992,7 +1003,9 @@ const char* const ReadOrRedriveMessageQuery = R"__( (let update '( '('FirstReceiveTimestamp receiveTimestamp) '('ReceiveCount (Add (Member message 'ReceiveCount) (Uint32 '1))))) - (return (UpdateRow sourceMsgTable row update)))))))) + (return (UpdateRow sourceMsgTable row update)))))) + (AsList (Void))) + )) ) )__"; diff --git a/ydb/core/ymq/queues/std/queries.cpp b/ydb/core/ymq/queues/std/queries.cpp index 1df5c5a87e..9d7339fa3b 100644 --- a/ydb/core/ymq/queues/std/queries.cpp +++ b/ydb/core/ymq/queues/std/queries.cpp @@ -686,6 +686,7 @@ const char* const LoadMessageQuery = R"__( (return (Coalesce (Member item 'Valid) (Bool 'false)))))))) (return (Extend + (AsList (SetResult 'dlqExists (Bool 'true))) (AsList (SetResult 'result records)) (AsList (SetResult 'movedMessagesCount (Uint64 '0))) @@ -715,12 +716,12 @@ const char* const LoadOrRedriveMessageQuery = R"__( '('VisibilityDeadline (DataType 'Uint64)))))) (let queueIdNumber (Parameter 'QUEUE_ID_NUMBER (DataType 'Uint64))) - (let queueIdNumberAndShardHash (Parameter 'QUEUE_ID_NUMBER_HASH (DataType 'Uint64))) + (let queueIdNumberHash (Parameter 'QUEUE_ID_NUMBER_HASH (DataType 'Uint64))) (let shard (Parameter 'SHARD (DataType ')__" SHARD_TYPE_PARAM R"__())) (let queueIdNumberAndShardHash (Parameter 'QUEUE_ID_NUMBER_AND_SHARD_HASH (DataType 'Uint64))) (let dlqIdNumber (Parameter 'DLQ_ID_NUMBER (DataType 'Uint64))) - (let dlqIdNumberAndShardHash (Parameter 'DLQ_ID_NUMBER_HASH (DataType 'Uint64))) + (let dlqIdNumberHash (Parameter 'DLQ_ID_NUMBER_HASH (DataType 'Uint64))) (let dlqShard (Parameter 'DLQ_SHARD (DataType ')__" DLQ_SHARD_TYPE_PARAM R"__())) (let dlqIdNumberAndShardHash (Parameter 'DLQ_ID_NUMBER_AND_SHARD_HASH (DataType 'Uint64))) @@ -825,6 +826,7 @@ const char* const LoadOrRedriveMessageQuery = R"__( 'WriteOffset 'LastModifiedTimestamp)) (let deadLetterStateRead (SelectRow deadLetterStateTable deadLetterStateRow deadLetterStateSelect)) + (let dlqExists (Exists deadLetterStateRead)) (let newDlqMessagesCount (Add (Member deadLetterStateRead 'MessageCount) (Length messagesToMove))) (let newDlqWriteOffset (Add (Member deadLetterStateRead 'WriteOffset) deadLettersCount)) @@ -844,13 +846,14 @@ const char* const LoadOrRedriveMessageQuery = R"__( '('InflyCount (Sub (Member sourceStateRead 'InflyCount) (Length messagesToMove))))) (return (Extend + (AsList (SetResult 'dlqExists dlqExists)) (AsList (SetResult 'result records)) (AsList (SetResult 'movedMessagesCount (Length messagesToMove))) (AsList (SetResult 'newMessagesCount newSourceMessagesCount)) - (AsList (UpdateRow deadLetterStateTable deadLetterStateRow deadLetterStateUpdate)) - (AsList (UpdateRow sourceStateTable sourceStateRow sourceStateUpdate)) + (ListIf dlqExists (UpdateRow deadLetterStateTable deadLetterStateRow deadLetterStateUpdate)) + (ListIf dlqExists (UpdateRow sourceStateTable sourceStateRow sourceStateUpdate)) - (Map messagesToUpdate (lambda '(item) (block '( + (If dlqExists (Map messagesToUpdate (lambda '(item) (block '( (let row '( )__" QUEUE_ID_AND_SHARD_KEYS_PARAM R"__( '('Offset (Member item 'Offset)))) @@ -861,8 +864,9 @@ const char* const LoadOrRedriveMessageQuery = R"__( '('ReceiveCount (Member item 'ReceiveCount)) '('VisibilityDeadline (Member item 'VisibilityDeadline)))) (return (UpdateRow sourceInflyTable row update)))))) + (AsList (Void))) - (Map messagesToMove (lambda '(item) (block '( + (If dlqExists (Map messagesToMove (lambda '(item) (block '( (let msgRow '( )__" DLQ_ID_AND_SHARD_KEYS_PARAM R"__( '('Offset (Add dlqStartOffset (Member item 'DlqIndex))))) @@ -872,8 +876,9 @@ const char* const LoadOrRedriveMessageQuery = R"__( '('SentTimestamp dlqMostRecentTimestamp) '('DelayDeadline delayDeadline))) (return (UpdateRow deadLetterMessagesTable msgRow messageUpdate)))))) + (AsList (Void))) - (Map messagesToMove (lambda '(item) (block '( + (If dlqExists (Map messagesToMove (lambda '(item) (block '( (let sentTsRow '( )__" DLQ_ID_AND_SHARD_KEYS_PARAM R"__( '('SentTimestamp dlqMostRecentTimestamp) @@ -883,8 +888,9 @@ const char* const LoadOrRedriveMessageQuery = R"__( '('RandomId readId) '('DelayDeadline delayDeadline))) (return (UpdateRow deadLetterSentTsIdxTable sentTsRow sentTsUpdate)))))) + (AsList (Void))) - (Map messagesToMove (lambda '(item) (block '( + (If dlqExists (Map messagesToMove (lambda '(item) (block '( (let dataRow '( )__" DLQ_ID_AND_SHARD_KEYS_PARAM R"__( '('RandomId readId) @@ -896,27 +902,31 @@ const char* const LoadOrRedriveMessageQuery = R"__( '('SenderId (Member item 'SenderId)) '('MessageId (Member item 'MessageId)))) (return (UpdateRow deadLetterMessageDataTable dataRow dataUpdate)))))) + (AsList (Void))) - (Map messagesToMove (lambda '(item) (block '( + (If dlqExists (Map messagesToMove (lambda '(item) (block '( (let inflyRow '( )__" QUEUE_ID_AND_SHARD_KEYS_PARAM R"__( '('Offset (Member item 'Offset)))) (return (EraseRow sourceInflyTable inflyRow)))))) + (AsList (Void))) - (Map messagesToMove (lambda '(item) (block '( + (If dlqExists (Map messagesToMove (lambda '(item) (block '( (let dataRow '( )__" QUEUE_ID_AND_SHARD_KEYS_PARAM R"__( '('RandomId (Member item 'RandomId)) '('Offset (Member item 'Offset)))) (return (EraseRow sourceMessageDataTable dataRow)))))) + (AsList (Void))) - (Map messagesToMove (lambda '(item) (block '( + (If dlqExists (Map messagesToMove (lambda '(item) (block '( (let sentTsRow '( )__" QUEUE_ID_AND_SHARD_KEYS_PARAM R"__( '('SentTimestamp (Member item 'SentTimestamp)) '('Offset (Member item 'Offset)))) (return (EraseRow sourceSentTsIdxTable sentTsRow)))))) + (AsList (Void))) )) ) )__"; diff --git a/ydb/library/persqueue/tests/counters.cpp b/ydb/library/persqueue/tests/counters.cpp index b48ed45e48..ac600b0219 100644 --- a/ydb/library/persqueue/tests/counters.cpp +++ b/ydb/library/persqueue/tests/counters.cpp @@ -20,15 +20,18 @@ NJson::TJsonValue SendQuery(ui16 port, const TString& query, bool mayFail) { TString firstLine = input.FirstLine(); const auto httpCode = ParseHttpRetCode(firstLine); + NJson::TJsonValue value; + bool res = NJson::ReadJsonTree(&input, &value); + + Cerr << "counters: " << value.GetStringRobust() << "\n"; + + UNIT_ASSERT(res); if (mayFail && httpCode != 200u) { return {}; } else { UNIT_ASSERT_VALUES_EQUAL(httpCode, 200u); } - NJson::TJsonValue value; - UNIT_ASSERT(NJson::ReadJsonTree(&input, &value)); - Cerr << "counters: " << value.GetStringRobust() << "\n"; return value; } @@ -84,19 +87,21 @@ NJson::TJsonValue GetClientCountersLegacy(ui16 port, const TString& counters, co } NJson::TJsonValue GetCounters1stClass(ui16 port, const TString& counters, + const TString& databasePath, const TString& cloudId, const TString& databaseId, - const TString& folderId, const TString& streamName, + const TString& folderId, const TString& topicName, const TString& consumer, const TString& host, - const TString& shard) { + const TString& partition) { bool mayFail = false; - TVector<TString> pathItems = SplitString(streamName, "/"); + TVector<TString> pathItems = SplitString(topicName, "/"); TStringBuilder queryBuilder; queryBuilder << "/counters/counters=" << counters << - "/cloud=" << cloudId << - "/folder=" << folderId << - "/database=" << databaseId << - "/stream=" << JoinRange("%2F", pathItems.begin(), pathItems.end()); + "/database=" << databasePath << + "/cloud_id=" << cloudId << + "/folder_id=" << folderId << + "/database_id=" << databaseId << + "/topic=" << JoinRange("%2F", pathItems.begin(), pathItems.end()); if (consumer) { queryBuilder << @@ -108,9 +113,9 @@ NJson::TJsonValue GetCounters1stClass(ui16 port, const TString& counters, "/host=" << host; } - if (shard) { + if (partition) { queryBuilder << - "/shard=" << shard; + "/partition=" << partition; mayFail = true; } diff --git a/ydb/library/persqueue/tests/counters.h b/ydb/library/persqueue/tests/counters.h index 9b6958623e..35f211491a 100644 --- a/ydb/library/persqueue/tests/counters.h +++ b/ydb/library/persqueue/tests/counters.h @@ -16,9 +16,10 @@ NJson::TJsonValue GetClientCountersLegacy(ui16 port, const TString& counters, co const TString& client, const TString& consumerPath); NJson::TJsonValue GetCounters1stClass(ui16 port, const TString& counters, + const TString& databasePath, const TString& cloudId, const TString& databaseId, - const TString& folderId, const TString& streamName, + const TString& folderId, const TString& topicName, const TString& consumer, const TString& host, - const TString& shard); + const TString& partition); } // NKikimr::NPersQueueTests diff --git a/ydb/library/persqueue/topic_parser/counters.cpp b/ydb/library/persqueue/topic_parser/counters.cpp index baf401d165..479812b04f 100644 --- a/ydb/library/persqueue/topic_parser/counters.cpp +++ b/ydb/library/persqueue/topic_parser/counters.cpp @@ -15,9 +15,9 @@ namespace NPersQueue { ->GetSubgroup("Topic", topic->GetShortClientsideName()); } -::NMonitoring::TDynamicCounterPtr GetCountersForStream(::NMonitoring::TDynamicCounterPtr counters) +::NMonitoring::TDynamicCounterPtr GetCountersForTopic(::NMonitoring::TDynamicCounterPtr counters, bool isServerless) { - return counters->GetSubgroup("counters", "datastreams"); + return counters->GetSubgroup("counters", isServerless ? "datastreams_serverless" : "datastreams"); } TVector<TPQLabelsInfo> GetLabelsForCustomCluster(const TTopicConverterPtr& topic, TString cluster) @@ -37,13 +37,14 @@ TVector<TPQLabelsInfo> GetLabels(const TTopicConverterPtr& topic) return GetLabelsForCustomCluster(topic, topic->GetCluster()); } -TVector<TPQLabelsInfo> GetLabelsForStream(const TTopicConverterPtr& topic, const TString& cloudId, - const TString& dbId, const TString& folderId) { - TVector<TPQLabelsInfo> res = { - {{{"cloud", cloudId}}, {cloudId}}, - {{{"folder", folderId}}, {folderId}}, - {{{"database", dbId}}, {dbId}}, - {{{"stream", topic->GetClientsideName()}}, {topic->GetClientsideName()}}}; +TVector<std::pair<TString, TString>> GetSubgroupsForTopic(const TTopicConverterPtr& topic, const TString& cloudId, + const TString& dbId, const TString& dbPath, const TString& folderId) { + TVector<std::pair<TString, TString>> res = { + {"database", dbPath}, + {"cloud_id", cloudId}, + {"folder_id", folderId}, + {"database_id", dbId}, + {"topic", topic->GetClientsideName()}}; return res; } diff --git a/ydb/library/persqueue/topic_parser/counters.h b/ydb/library/persqueue/topic_parser/counters.h index 32af523141..8367e10f82 100644 --- a/ydb/library/persqueue/topic_parser/counters.h +++ b/ydb/library/persqueue/topic_parser/counters.h @@ -9,11 +9,12 @@ namespace NPersQueue { TVector<NPersQueue::TPQLabelsInfo> GetLabels(const TTopicConverterPtr& topic); //TVector<NPersQueue::TPQLabelsInfo> GetLabelsForLegacyName(const TString& topic); TVector<NPersQueue::TPQLabelsInfo> GetLabelsForCustomCluster(const TTopicConverterPtr& topic, TString cluster); -TVector<NPersQueue::TPQLabelsInfo> GetLabelsForStream(const TTopicConverterPtr& topic, const TString& cloudId, - const TString& dbId, const TString& folderId); +TVector<std::pair<TString, TString>> GetSubgroupsForTopic(const TTopicConverterPtr& topic, const TString& cloudId, + const TString& dbId, const TString& dbPath, + const TString& folderId); ::NMonitoring::TDynamicCounterPtr GetCounters(::NMonitoring::TDynamicCounterPtr counters, const TString& subsystem, const TTopicConverterPtr& topic); -::NMonitoring::TDynamicCounterPtr GetCountersForStream(::NMonitoring::TDynamicCounterPtr counters); +::NMonitoring::TDynamicCounterPtr GetCountersForTopic(::NMonitoring::TDynamicCounterPtr counters, bool isServerless); } // namespace NPersQueue diff --git a/ydb/library/yaml_config/yaml_config_parser.cpp b/ydb/library/yaml_config/yaml_config_parser.cpp index fa6f573910..179ecf2a9d 100644 --- a/ydb/library/yaml_config/yaml_config_parser.cpp +++ b/ydb/library/yaml_config/yaml_config_parser.cpp @@ -801,18 +801,18 @@ namespace NKikimr::NYaml { NKikimrBlobStorage::TConfigRequest result; + const auto itemConfigGeneration = json.Has("storage_config_generation") ? + GetUnsignedIntegerSafe(json, "storage_config_generation") : 0; + for(auto hostConfig: json["host_configs"].GetArraySafe()) { auto *hostConfigProto = result.AddCommand()->MutableDefineHostConfig(); NProtobufJson::MergeJson2Proto(hostConfig, *hostConfigProto, GetJsonToProtoConfig()); + hostConfigProto->SetItemConfigGeneration(itemConfigGeneration); } auto *defineBox = result.AddCommand()->MutableDefineBox(); defineBox->SetBoxId(1); - if (json.Has("storage_config_generation")) { - defineBox->SetItemConfigGeneration(GetUnsignedIntegerSafe(json, "storage_config_generation")); - } else { - defineBox->SetItemConfigGeneration(0); - } + defineBox->SetItemConfigGeneration(itemConfigGeneration); for(auto jsonHost: json["hosts"].GetArraySafe()) { auto* host = defineBox->AddHost(); diff --git a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp index ffa37ea263..01d5bf4daf 100644 --- a/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp +++ b/ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp @@ -1063,6 +1063,11 @@ TIndexRange ExtractIndexRangeFromKeys(const TVector<TString>& keys, const THashM return result; } +TExprNode::TPtr MakeRangeAnd(TPositionHandle pos, TExprNodeList&& children, TExprContext& ctx) { + YQL_ENSURE(!children.empty()); + return children.size() == 1 ? children.front() : ctx.NewCallable(pos, "RangeAnd", std::move(children)); +} + TExprNode::TPtr DoRebuildRangeForIndexKeys(const TStructExprType& rowType, const TExprNode::TPtr& range, const THashMap<TString, size_t>& indexKeysOrder, TIndexRange& resultIndexRange, TExprContext& ctx) { @@ -1114,70 +1119,73 @@ TExprNode::TPtr DoRebuildRangeForIndexKeys(const TStructExprType& rowType, const struct TNodeAndIndexRange { TExprNode::TPtr Node; TIndexRange IndexRange; + size_t OriginalPosition = 0; }; TVector<TNodeAndIndexRange> toRebuild; + size_t pos = 0; for (const auto& child : range->ChildrenList()) { toRebuild.emplace_back(); TNodeAndIndexRange& curr = toRebuild.back(); curr.Node = DoRebuildRangeForIndexKeys(rowType, child, indexKeysOrder, curr.IndexRange, ctx); + curr.OriginalPosition = pos++; } - std::stable_sort(toRebuild.begin(), toRebuild.end(), [&](const TNodeAndIndexRange& a, const TNodeAndIndexRange& b) { - // sort children by key order - // move RangeRest/RangeConst to the end while preserving their relative order - return a.IndexRange < b.IndexRange; - }); - - - TExprNodeList rests; - TVector<TExprNodeList> childrenChains; - THashMap<size_t, TSet<size_t>> chainIdxByEndIdx; - + TVector<TNodeAndIndexRange> rests; + TMap<TIndexRange, TVector<TNodeAndIndexRange>> children; for (auto& current : toRebuild) { if (current.IndexRange.IsEmpty()) { YQL_ENSURE(current.Node->IsCallable("RangeRest")); - rests.emplace_back(std::move(current.Node)); - continue; - } - const size_t beginIdx = current.IndexRange.Begin; - const size_t endIdx = current.IndexRange.End; - if (!commonIndexRange || beginIdx == commonIndexRange->Begin) { - if (!commonIndexRange) { - commonIndexRange = current.IndexRange; - } else { - commonIndexRange->End = std::max(commonIndexRange->End, endIdx); - } - chainIdxByEndIdx[endIdx].insert(childrenChains.size()); - childrenChains.emplace_back(); - childrenChains.back().push_back(current.Node); + rests.emplace_back(std::move(current)); } else { - auto it = chainIdxByEndIdx.find(beginIdx); - if (it == chainIdxByEndIdx.end()) { - rests.emplace_back(RebuildAsRangeRest(rowType, *current.Node, ctx)); - continue; - } + children[current.IndexRange].push_back(std::move(current)); + } + } - YQL_ENSURE(!it->second.empty()); - const size_t tgtChainIdx = *it->second.begin(); - it->second.erase(tgtChainIdx); - if (it->second.empty()) { - chainIdxByEndIdx.erase(it); + TVector<TVector<TNodeAndIndexRange>> childrenChains; + for (auto it = children.begin(); it != children.end(); ++it) { + if (!commonIndexRange) { + commonIndexRange = it->first; + childrenChains.emplace_back(std::move(it->second)); + continue; + } + if (commonIndexRange->Begin == it->first.Begin) { + YQL_ENSURE(it->first.End > commonIndexRange->End); + for (auto& asRest : childrenChains) { + rests.insert(rests.end(), asRest.begin(), asRest.end()); } + childrenChains.clear(); + childrenChains.push_back(std::move(it->second)); + commonIndexRange = it->first; + continue; + } - childrenChains[tgtChainIdx].push_back(current.Node); - chainIdxByEndIdx[endIdx].insert(tgtChainIdx); - commonIndexRange->End = std::max(commonIndexRange->End, endIdx); + if (commonIndexRange->End == it->first.Begin) { + commonIndexRange->End = it->first.End; + childrenChains.push_back(std::move(it->second)); + } else { + rests.insert(rests.end(), it->second.begin(), it->second.end()); } } for (auto& chain : childrenChains) { - YQL_ENSURE(!chain.empty()); - rebuilt.push_back(ctx.NewCallable(range->Pos(), "RangeAnd", std::move(chain))); + TExprNodeList chainNodes; + for (auto& entry : chain) { + chainNodes.push_back(entry.Node); + } + rebuilt.push_back(MakeRangeAnd(range->Pos(), std::move(chainNodes), ctx)); } if (!rests.empty()) { - rebuilt.push_back(RebuildAsRangeRest(rowType, *ctx.NewCallable(range->Pos(), "RangeAnd", std::move(rests)), ctx)); + // restore original order in rests + std::sort(rests.begin(), rests.end(), [&](const TNodeAndIndexRange& a, const TNodeAndIndexRange& b) { + return a.OriginalPosition < b.OriginalPosition; + }); + TExprNodeList restsNodes; + for (auto& item : rests) { + restsNodes.push_back(RebuildAsRangeRest(rowType, *item.Node, ctx)); + } + rebuilt.push_back(RebuildAsRangeRest(rowType, *MakeRangeAnd(range->Pos(), std::move(restsNodes), ctx), ctx)); } } diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index cb48db3519..d8286a3226 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -10588,7 +10588,9 @@ template <NKikimr::NUdf::EDataSlot DataSlot> if (op != "Exists" && op != "NotExists") { valueBaseType = RemoveAllOptionals(valueType); YQL_ENSURE(valueBaseType); - if (valueBaseType->GetKind() != ETypeAnnotationKind::Data) { + if (valueBaseType->GetKind() != ETypeAnnotationKind::Data && + valueBaseType->GetKind() != ETypeAnnotationKind::Null) + { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Child(1)->Pos()), TStringBuilder() << "Expecting (optional) Data as second argument, but got: " << *valueType)); return IGraphTransformer::TStatus::Error; @@ -10610,7 +10612,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Error; } - if (valueBaseType && CanCompare<false>(RemoveAllOptionals(keyType), valueBaseType) != ECompareOptions::Comparable) { + if (valueBaseType && CanCompare<false>(RemoveAllOptionals(keyType), valueBaseType) == ECompareOptions::Uncomparable) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() << "Uncompatible key and value types: " << *keyType << " and " << *valueType)); return IGraphTransformer::TStatus::Error; diff --git a/ydb/library/yql/sql/v1/SQLv1.g.in b/ydb/library/yql/sql/v1/SQLv1.g.in index 7385be2253..c8493348da 100644 --- a/ydb/library/yql/sql/v1/SQLv1.g.in +++ b/ydb/library/yql/sql/v1/SQLv1.g.in @@ -524,8 +524,8 @@ local_index: LOCAL; changefeed: CHANGEFEED an_id WITH LPAREN changefeed_settings RPAREN; changefeed_settings: changefeed_settings_entry (COMMA changefeed_settings_entry)*; changefeed_settings_entry: an_id EQUALS changefeed_setting_value; -changefeed_setting_value: STRING_VALUE; -changefeed_alter_settings: +changefeed_setting_value: expr; +changefeed_alter_settings: DISABLE | SET LPAREN changefeed_settings RPAREN ; diff --git a/ydb/library/yql/sql/v1/node.h b/ydb/library/yql/sql/v1/node.h index f1f0787601..664f6cae94 100644 --- a/ydb/library/yql/sql/v1/node.h +++ b/ydb/library/yql/sql/v1/node.h @@ -1114,6 +1114,8 @@ namespace NSQLTranslationV1 { TNodePtr Mode; TNodePtr Format; + TNodePtr VirtualTimestamps; + TNodePtr RetentionPeriod; std::optional<std::variant<TLocalSinkSettings>> SinkSettings; }; diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp index 9d7b22ec9f..d433ab05e2 100644 --- a/ydb/library/yql/sql/v1/query.cpp +++ b/ydb/library/yql/sql/v1/query.cpp @@ -123,6 +123,12 @@ static INode::TPtr CreateChangefeedDesc(const TChangefeedDescription& desc, cons if (desc.Settings.Format) { settings = node.L(settings, node.Q(node.Y(node.Q("format"), desc.Settings.Format))); } + if (desc.Settings.VirtualTimestamps) { + settings = node.L(settings, node.Q(node.Y(node.Q("virtual_timestamps"), desc.Settings.VirtualTimestamps))); + } + if (desc.Settings.RetentionPeriod) { + settings = node.L(settings, node.Q(node.Y(node.Q("retention_period"), desc.Settings.RetentionPeriod))); + } if (const auto& sink = desc.Settings.SinkSettings) { switch (sink->index()) { case 0: // local diff --git a/ydb/library/yql/sql/v1/sql.cpp b/ydb/library/yql/sql/v1/sql.cpp index 25df30ded6..969b2774bf 100644 --- a/ydb/library/yql/sql/v1/sql.cpp +++ b/ydb/library/yql/sql/v1/sql.cpp @@ -641,61 +641,6 @@ static bool CreateTableIndex(const TRule_table_index& node, TTranslation& ctx, T return true; } -static bool ChangefeedSettingsEntry(const TRule_changefeed_settings_entry& node, TTranslation& ctx, TChangefeedSettings& settings, bool alter) { - const auto id = IdEx(node.GetRule_an_id1(), ctx); - const TString value(ctx.Token(node.GetRule_changefeed_setting_value3().GetToken1())); - - if (alter) { - // currently we don't support alter settings - ctx.Error() << to_upper(id.Name) << " alter is not supported"; - return false; - } - - if (to_lower(id.Name) == "sink_type") { - auto parsed = StringContent(ctx.Context(), ctx.Context().Pos(), value); - YQL_ENSURE(parsed.Defined()); - if (to_lower(parsed->Content) == "local") { - settings.SinkSettings = TChangefeedSettings::TLocalSinkSettings(); - } else { - ctx.Error() << "Unknown changefeed sink type: " << to_upper(parsed->Content); - return false; - } - } else if (to_lower(id.Name) == "mode") { - settings.Mode = BuildLiteralSmartString(ctx.Context(), value); - } else if (to_lower(id.Name) == "format") { - settings.Format = BuildLiteralSmartString(ctx.Context(), value); - } else { - ctx.Error() << "Unknown changefeed setting: " << id.Name; - return false; - } - - return true; -} - -static bool ChangefeedSettings(const TRule_changefeed_settings& node, TTranslation& ctx, TChangefeedSettings& settings, bool alter) { - if (!ChangefeedSettingsEntry(node.GetRule_changefeed_settings_entry1(), ctx, settings, alter)) { - return false; - } - - for (auto& block : node.GetBlock2()) { - if (!ChangefeedSettingsEntry(block.GetRule_changefeed_settings_entry2(), ctx, settings, alter)) { - return false; - } - } - - return true; -} - -static bool CreateChangefeed(const TRule_changefeed& node, TTranslation& ctx, TVector<TChangefeedDescription>& changefeeds) { - changefeeds.emplace_back(IdEx(node.GetRule_an_id2(), ctx)); - - if (!ChangefeedSettings(node.GetRule_changefeed_settings5(), ctx, changefeeds.back().Settings, false)) { - return false; - } - - return true; -} - static std::pair<TString, TString> TableKeyImpl(const std::pair<bool, TString>& nameWithAt, TString view, TTranslation& ctx) { if (nameWithAt.first) { view = "@"; @@ -1895,6 +1840,91 @@ bool TSqlTranslation::FillFamilySettings(const TRule_family_settings& settingsNo return true; } +static bool ChangefeedSettingsEntry(const TRule_changefeed_settings_entry& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) { + const auto id = IdEx(node.GetRule_an_id1(), ctx); + if (alter) { + // currently we don't support alter settings + ctx.Error() << to_upper(id.Name) << " alter is not supported"; + return false; + } + + const auto& setting = node.GetRule_changefeed_setting_value3(); + auto exprNode = ctx.Build(setting.GetRule_expr1()); + + if (!exprNode) { + ctx.Context().Error(id.Pos) << "Invalid changefeed setting: " << id.Name; + return false; + } + + if (to_lower(id.Name) == "sink_type") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") { + ctx.Context().Error() << "Literal of String type is expected for " << id.Name; + return false; + } + + const auto value = exprNode->GetLiteralValue(); + if (to_lower(value) == "local") { + settings.SinkSettings = TChangefeedSettings::TLocalSinkSettings(); + } else { + ctx.Context().Error() << "Unknown changefeed sink type: " << value; + return false; + } + } else if (to_lower(id.Name) == "mode") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") { + ctx.Context().Error() << "Literal of String type is expected for " << id.Name; + return false; + } + settings.Mode = exprNode; + } else if (to_lower(id.Name) == "format") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") { + ctx.Context().Error() << "Literal of String type is expected for " << id.Name; + return false; + } + settings.Format = exprNode; + } else if (to_lower(id.Name) == "virtual_timestamps") { + if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") { + ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name; + return false; + } + settings.VirtualTimestamps = exprNode; + } else if (to_lower(id.Name) == "retention_period") { + if (exprNode->GetOpName() != "Interval") { + ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name; + return false; + } + settings.RetentionPeriod = exprNode; + } else { + ctx.Context().Error(id.Pos) << "Unknown changefeed setting: " << id.Name; + return false; + } + + return true; +} + +static bool ChangefeedSettings(const TRule_changefeed_settings& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) { + if (!ChangefeedSettingsEntry(node.GetRule_changefeed_settings_entry1(), ctx, settings, alter)) { + return false; + } + + for (auto& block : node.GetBlock2()) { + if (!ChangefeedSettingsEntry(block.GetRule_changefeed_settings_entry2(), ctx, settings, alter)) { + return false; + } + } + + return true; +} + +static bool CreateChangefeed(const TRule_changefeed& node, TSqlExpression& ctx, TVector<TChangefeedDescription>& changefeeds) { + changefeeds.emplace_back(IdEx(node.GetRule_an_id2(), ctx)); + + if (!ChangefeedSettings(node.GetRule_changefeed_settings5(), ctx, changefeeds.back().Settings, false)) { + return false; + } + + return true; +} + bool TSqlTranslation::CreateTableEntry(const TRule_create_table_entry& node, TCreateTableParameters& params) { switch (node.Alt_case()) { @@ -2012,7 +2042,8 @@ bool TSqlTranslation::CreateTableEntry(const TRule_create_table_entry& node, TCr { // changefeed auto& changefeed = node.GetAlt_create_table_entry5().GetRule_changefeed1(); - if (!CreateChangefeed(changefeed, *this, params.Changefeeds)) { + TSqlExpression expr(Ctx, Mode); + if (!CreateChangefeed(changefeed, expr, params.Changefeeds)) { return false; } break; @@ -9343,7 +9374,8 @@ void TSqlQuery::AlterTableRenameIndexTo(const TRule_alter_table_rename_index_to& } bool TSqlQuery::AlterTableAddChangefeed(const TRule_alter_table_add_changefeed& node, TAlterTableParameters& params) { - return CreateChangefeed(node.GetRule_changefeed2(), *this, params.AddChangefeeds); + TSqlExpression expr(Ctx, Mode); + return CreateChangefeed(node.GetRule_changefeed2(), expr, params.AddChangefeeds); } bool TSqlQuery::AlterTableAlterChangefeed(const TRule_alter_table_alter_changefeed& node, TAlterTableParameters& params) { @@ -9359,7 +9391,8 @@ bool TSqlQuery::AlterTableAlterChangefeed(const TRule_alter_table_alter_changefe case TRule_changefeed_alter_settings::kAltChangefeedAlterSettings2: { // SET const auto& rule = alter.GetAlt_changefeed_alter_settings2().GetRule_changefeed_settings3(); - if (!ChangefeedSettings(rule, *this, params.AlterChangefeeds.back().Settings, true)) { + TSqlExpression expr(Ctx, Mode); + if (!ChangefeedSettings(rule, expr, params.AlterChangefeeds.back().Settings, true)) { return false; } break; diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index 6de785df3e..f65e86e539 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -1471,7 +1471,12 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { auto res = SqlToYql(R"( USE plato; CREATE TABLE tableName ( Key Uint32, PRIMARY KEY (Key), - CHANGEFEED feedName WITH (MODE = 'KEYS_ONLY', FORMAT = 'json') + CHANGEFEED feedName WITH ( + MODE = 'KEYS_ONLY', + FORMAT = 'json', + VIRTUAL_TIMESTAMPS = FALSE, + RETENTION_PERIOD = Interval("P1D") + ) ); )"); UNIT_ASSERT_C(res.Root, Err2Str(res)); @@ -1483,6 +1488,9 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("KEYS_ONLY")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("format")); UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("json")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("virtual_timestamps")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("false")); + UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("retention_period")); } }; @@ -1630,7 +1638,7 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { Y_UNIT_TEST(AlterTableAlterChangefeedIsCorrect) { UNIT_ASSERT(SqlToYql("USE plato; ALTER TABLE table ALTER CHANGEFEED feed DISABLE").IsOk()); ExpectFailWithError("USE plato; ALTER TABLE table ALTER CHANGEFEED feed SET (FORMAT = 'proto');", - "<main>:1:66: Error: FORMAT alter is not supported\n"); + "<main>:1:57: Error: FORMAT alter is not supported\n"); } Y_UNIT_TEST(AlterTableDropChangefeedIsCorrect) { @@ -3048,7 +3056,34 @@ select FormatType($f()); )"; auto res = SqlToYql(req); UNIT_ASSERT(!res.Root); - UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:70: Error: Unknown changefeed setting: FOO\n"); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:64: Error: Unknown changefeed setting: FOO\n"); + } + + + Y_UNIT_TEST(InvalidChangefeedVirtualTimestamps) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", VIRTUAL_TIMESTAMPS = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:101: Error: Literal of Bool type is expected for VIRTUAL_TIMESTAMPS\n"); + } + + Y_UNIT_TEST(InvalidChangefeedRetentionPeriod) { + auto req = R"( + USE plato; + CREATE TABLE tableName ( + Key Uint32, PRIMARY KEY (Key), + CHANGEFEED feedName WITH (MODE = "KEYS_ONLY", FORMAT = "json", RETENTION_PERIOD = "foo") + ); + )"; + auto res = SqlToYql(req); + UNIT_ASSERT(!res.Root); + UNIT_ASSERT_NO_DIFF(Err2Str(res), "<main>:5:99: Error: Literal of Interval type is expected for RETENTION_PERIOD\n"); } Y_UNIT_TEST(ErrJoinWithGroupingSetsWithoutCorrelationName) { diff --git a/ydb/public/api/grpc/ydb_topic_v1.proto b/ydb/public/api/grpc/ydb_topic_v1.proto index d119b3b459..a65012e414 100644 --- a/ydb/public/api/grpc/ydb_topic_v1.proto +++ b/ydb/public/api/grpc/ydb_topic_v1.proto @@ -69,15 +69,15 @@ service TopicService { // Create topic command. rpc CreateTopic(CreateTopicRequest) returns (CreateTopicResponse); - // Describe topic command. rpc DescribeTopic(DescribeTopicRequest) returns (DescribeTopicResponse); + // Describe topic's consumer command. + rpc DescribeConsumer(DescribeConsumerRequest) returns (DescribeConsumerResponse); // Alter topic command. rpc AlterTopic(AlterTopicRequest) returns (AlterTopicResponse); - // Drop topic command. rpc DropTopic(DropTopicRequest) returns (DropTopicResponse); } diff --git a/ydb/public/api/protos/draft/datastreams.proto b/ydb/public/api/protos/draft/datastreams.proto index ef7f748994..06b9bf23ea 100644 --- a/ydb/public/api/protos/draft/datastreams.proto +++ b/ydb/public/api/protos/draft/datastreams.proto @@ -66,15 +66,17 @@ message HashKeyRange { message Record { // Timestamp that the record was inserted into the stream - int64 timestamp = 1 [(FieldTransformer) = TRANSFORM_DOUBLE_S_TO_INT_MS]; + int64 approximate_arrival_timestamp = 1 [(FieldTransformer) = TRANSFORM_DOUBLE_S_TO_INT_MS]; // Data blob bytes data = 2 [(FieldTransformer) = TRANSFORM_BASE64]; // Encryption type used on record - EncryptionType encryption = 3; + EncryptionType encryption_type = 3; // Identifies shard in the stream the record is assigned to string partition_key = 4; // Unique id of the record within shard string sequence_number = 5; + // Codec id from ydb_topic.proto if data is coded + int32 codec = 6; } // Represents the stream description diff --git a/ydb/public/api/protos/ydb_common.proto b/ydb/public/api/protos/ydb_common.proto index 2ecb33fb34..9e865646b4 100644 --- a/ydb/public/api/protos/ydb_common.proto +++ b/ydb/public/api/protos/ydb_common.proto @@ -21,3 +21,9 @@ message CostInfo { message QuotaExceeded { bool disk = 1; } + +// Specifies a point in database time +message VirtualTimestamp { + uint64 plan_step = 1; + uint64 tx_id = 2; +} diff --git a/ydb/public/api/protos/ydb_scheme.proto b/ydb/public/api/protos/ydb_scheme.proto index 7c61de6e7f..cad0b7651f 100644 --- a/ydb/public/api/protos/ydb_scheme.proto +++ b/ydb/public/api/protos/ydb_scheme.proto @@ -5,8 +5,11 @@ package Ydb.Scheme; option java_package = "com.yandex.ydb.scheme"; option java_outer_classname = "SchemeOperationProtos"; +import "ydb/public/api/protos/ydb_common.proto"; import "ydb/public/api/protos/ydb_operation.proto"; +import "google/protobuf/timestamp.proto"; + // Create directory. // All intermediate directories must be created message MakeDirectoryRequest { @@ -74,6 +77,9 @@ message Entry { // - DATABASE. // Empty (zero) in other cases. uint64 size_bytes = 8; + + // Virtual timestamp when the object was created + VirtualTimestamp created_at = 9; } message ListDirectoryResult { diff --git a/ydb/public/api/protos/ydb_table.proto b/ydb/public/api/protos/ydb_table.proto index 1bce7e444f..926bc649c6 100644 --- a/ydb/public/api/protos/ydb_table.proto +++ b/ydb/public/api/protos/ydb_table.proto @@ -12,6 +12,7 @@ import "ydb/public/api/protos/ydb_status_codes.proto"; import "ydb/public/api/protos/ydb_formats.proto"; import "google/protobuf/empty.proto"; +import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; package Ydb.Table; @@ -147,6 +148,10 @@ message Changefeed { ChangefeedMode.Mode mode = 2; // Format of the data ChangefeedFormat.Format format = 3; + // How long data in changefeed's underlying topic should be stored + google.protobuf.Duration retention_period = 4; + // Emit virtual timestamps of changes along with data or not + bool virtual_timestamps = 5; } message ChangefeedDescription { @@ -164,6 +169,8 @@ message ChangefeedDescription { ChangefeedFormat.Format format = 3; // State of the feed State state = 4; + // State of emitting of virtual timestamps along with data + bool virtual_timestamps = 5; } message StoragePool { diff --git a/ydb/public/api/protos/ydb_topic.proto b/ydb/public/api/protos/ydb_topic.proto index 9b13a292e6..6fc5dd74ca 100644 --- a/ydb/public/api/protos/ydb_topic.proto +++ b/ydb/public/api/protos/ydb_topic.proto @@ -289,6 +289,8 @@ message StreamReadMessage { repeated TopicReadSettings topics_read_settings = 1; // Path of consumer that is used for reading by this session. string consumer = 2; + // Optional name. Will be shown in debug stat. + string reader_name = 3; message TopicReadSettings { // Topic path. @@ -499,6 +501,12 @@ message StreamReadMessage { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Control messages +// message representing statistics by seleveral windows +message MultipleWindowsStat { + int64 per_minute = 1; + int64 per_hour = 2; + int64 per_day = 3; +} // Consumer description. message Consumer { @@ -517,6 +525,20 @@ message Consumer { // Attributes of consumer map<string, string> attributes = 6; + + // Filled only when requested statistics in Describe*Request. + ConsumerStats consumer_stats = 7; + + message ConsumerStats { + // Minimal timestamp of last read from partitions. + google.protobuf.Timestamp min_partitions_last_read_time = 1; + // Maximum of differences between timestamp of read and write timestamp for all messages, read during last minute. + google.protobuf.Duration max_read_time_lag = 2; + // Maximum of differences between write timestamp and create timestamp for all messages, read during last minute. + google.protobuf.Duration max_write_time_lag = 3; + // Bytes read stastics. + MultipleWindowsStat bytes_read = 4; + } } // Consumer alter description. @@ -628,6 +650,9 @@ message DescribeTopicRequest { // Topic path. string path = 2; + + // Include topic statistics. + bool include_stats = 3; } // Describe topic response sent from server to client. @@ -678,6 +703,9 @@ message DescribeTopicResult { // Metering settings. MeteringMode metering_mode = 12; + // Statistics of topic. + TopicStats topic_stats = 13; + message PartitionInfo { // Partition identifier. int64 partition_id = 1; @@ -687,7 +715,111 @@ message DescribeTopicResult { repeated int64 child_partition_ids = 3; // Ids of partitions from which this partition was formed by split or merge. repeated int64 parent_partition_ids = 4; + + // Stats for partition, filled only when include_stats in request is true. + PartitionStats partition_stats = 5; } + + message TopicStats { + // Approximate size of topic. + int64 store_size_bytes = 1; + + // Minimum of timestamps of last write among all partitions. + google.protobuf.Timestamp min_last_write_time = 2; + // Maximum of differences between write timestamp and create timestamp for all messages, written during last minute. + google.protobuf.Duration max_write_time_lag = 3; + // How much bytes were written statistics. + MultipleWindowsStat bytes_written = 4; + } +} + + +// Describe topic's consumer request sent from client to server. +message DescribeConsumerRequest { + Ydb.Operations.OperationParams operation_params = 1; + + // Topic path. + string path = 2; + // Consumer name; + string consumer = 3; + // Include consumer statistics. + bool include_stats = 4; +} + +// Describe topic's consumer response sent from server to client. +// If topic is not existed then response status will be "SCHEME_ERROR". +message DescribeConsumerResponse { + // Result of request will be inside operation. + Ydb.Operations.Operation operation = 1; +} + +// Describe topic's consumer result message that will be inside DescribeConsumerResponse.operation. +message DescribeConsumerResult { + // Description of scheme object. + Ydb.Scheme.Entry self = 1; + + Consumer consumer = 2; + + repeated PartitionInfo partitions = 3; + + message PartitionInfo { + // Partition identifier. + int64 partition_id = 1; + // Is partition open for write. + bool active = 2; + // Ids of partitions which was formed when this partition was split or merged. + repeated int64 child_partition_ids = 3; + // Ids of partitions from which this partition was formed by split or merge. + repeated int64 parent_partition_ids = 4; + + // Stats for partition, filled only when include_stats in request is true. + PartitionStats partition_stats = 5; + // Stats for consumer of this partition, filled only when include_stats in request is true. + PartitionConsumerStats partition_consumer_stats = 6; + } + + message PartitionConsumerStats { + // Last read offset from this partition. + int64 last_read_offset = 1; + // Committed offset for this partition. + int64 committed_offset = 2; + // Reading this partition read session identifier. + string read_session_id = 3; + + // Timestamp of providing this partition to this session by server. + google.protobuf.Timestamp partition_read_session_create_time = 4; + + // Timestamp of last read from this partition. + google.protobuf.Timestamp last_read_time = 5; + // Maximum of differences between timestamp of read and write timestamp for all messages, read during last minute. + google.protobuf.Duration max_read_time_lag = 6; + // Maximum of differences between write timestamp and create timestamp for all messages, read during last minute. + google.protobuf.Duration max_write_time_lag = 7; + + // How much bytes were read during several windows statistics from this partiton. + MultipleWindowsStat bytes_read = 8; + + // Read session name, provided by client. + string reader_name = 11; + // Host where read session connected. + int32 connection_node_id = 12; + } +} + +message PartitionStats { + // Partition contains messages with offsets in range [start, end). + OffsetsRange partition_offsets = 1; + // Approximate size of partition. + int64 store_size_bytes = 2; + // Timestamp of last write. + google.protobuf.Timestamp last_write_time = 3; + // Maximum of differences between write timestamp and create timestamp for all messages, written during last minute. + google.protobuf.Duration max_write_time_lag = 4; + // How much bytes were written during several windows in this partition. + MultipleWindowsStat bytes_written = 5; + + // Host where tablet for this partition works. Useful for debugging purposes. + int32 partition_node_id = 8; } diff --git a/ydb/public/lib/ydb_cli/import/CMakeLists.txt b/ydb/public/lib/ydb_cli/import/CMakeLists.txt index d63a8b60fc..09d5b14d2c 100644 --- a/ydb/public/lib/ydb_cli/import/CMakeLists.txt +++ b/ydb/public/lib/ydb_cli/import/CMakeLists.txt @@ -14,6 +14,7 @@ target_link_libraries(lib-ydb_cli-import PUBLIC api-protos common cpp-client-ydb_proto + cpp-string_utils-csv ) target_sources(lib-ydb_cli-import PRIVATE ${CMAKE_SOURCE_DIR}/ydb/public/lib/ydb_cli/import/import.cpp diff --git a/ydb/public/lib/ydb_cli/import/import.cpp b/ydb/public/lib/ydb_cli/import/import.cpp index e9960a21f8..76b50c028b 100644 --- a/ydb/public/lib/ydb_cli/import/import.cpp +++ b/ydb/public/lib/ydb_cli/import/import.cpp @@ -11,6 +11,8 @@ #include <ydb/public/lib/ydb_cli/common/recursive_list.h> #include <ydb/public/lib/ydb_cli/dump/util/util.h> +#include <library/cpp/string_utils/csv/csv.h> + #include <util/generic/vector.h> #include <util/stream/file.h> #include <util/string/builder.h> @@ -111,20 +113,21 @@ TStatus TImportFileClient::UpsertCsv(const TString& dataFile, const TString& dbP special = true; } - // Do not use csvSettings.skip_rows. - for (ui32 i = 0; i < settings.SkipRows_; ++i) { - input.ReadLine(line); - } - + NCsvFormat::TLinesSplitter splitter(input); TString headerRow; if (settings.Header_) { - input.ReadLine(headerRow); + headerRow = splitter.ConsumeLine(); headerRow += '\n'; buffer = headerRow; csvSettings.set_header(true); special = true; } + // Do not use csvSettings.skip_rows. + for (ui32 i = 0; i < settings.SkipRows_; ++i) { + splitter.ConsumeLine(); + } + if (special) { TString formatSettings; Y_PROTOBUF_SUPPRESS_NODISCARD csvSettings.SerializeToString(&formatSettings); @@ -133,14 +136,11 @@ TStatus TImportFileClient::UpsertCsv(const TString& dataFile, const TString& dbP std::deque<TAsyncStatus> inFlightRequests; - // TODO: better read - // * read serveral lines a time - // * support endlines inside quotes - // ReadLine() should count quotes for it and stop the line then counter is odd. - while (size_t sz = input.ReadLine(line)) { + ui64 readSize = 0; + while (TString line = splitter.ConsumeLine()) { buffer += line; - buffer += '\n'; // TODO: keep original endline? - + buffer += '\n'; + readSize += line.size(); if (buffer.Size() >= settings.BytesPerRequest_) { auto status = WaitForQueue(inFlightRequests, settings.MaxInFlightRequests_); if (!status.IsSuccess()) { diff --git a/ydb/public/sdk/cpp/client/ydb_proto/accessor.h b/ydb/public/sdk/cpp/client/ydb_proto/accessor.h index 1b4214208e..0e2f5bc575 100644 --- a/ydb/public/sdk/cpp/client/ydb_proto/accessor.h +++ b/ydb/public/sdk/cpp/client/ydb_proto/accessor.h @@ -39,6 +39,7 @@ public: static const Ydb::Table::DescribeTableResult& GetProto(const NTable::TTableDescription& tableDescription); static const Ydb::PersQueue::V1::DescribeTopicResult& GetProto(const NYdb::NPersQueue::TDescribeTopicResult& topicDescription); static const Ydb::Topic::DescribeTopicResult& GetProto(const NYdb::NTopic::TTopicDescription& topicDescription); + static const Ydb::Topic::DescribeConsumerResult& GetProto(const NYdb::NTopic::TConsumerDescription& consumerDescription); static const Ydb::Monitoring::SelfCheckResult& GetProto(const NYdb::NMonitoring::TSelfCheckResult& selfCheckResult); static NTable::TQueryStats FromProto(const Ydb::TableStats::QueryStats& queryStats); diff --git a/ydb/public/sdk/cpp/client/ydb_scheme/scheme.cpp b/ydb/public/sdk/cpp/client/ydb_scheme/scheme.cpp index 305a7e92d7..181d6a4fa7 100644 --- a/ydb/public/sdk/cpp/client/ydb_scheme/scheme.cpp +++ b/ydb/public/sdk/cpp/client/ydb_scheme/scheme.cpp @@ -15,6 +15,52 @@ namespace NScheme { using namespace NThreading; using namespace Ydb::Scheme; +TVirtualTimestamp::TVirtualTimestamp(ui64 planStep, ui64 txId) + : PlanStep(planStep) + , TxId(txId) +{} + +TVirtualTimestamp::TVirtualTimestamp(const ::Ydb::VirtualTimestamp& proto) + : TVirtualTimestamp(proto.plan_step(), proto.tx_id()) +{} + +TString TVirtualTimestamp::ToString() const { + TString result; + TStringOutput out(result); + Out(out); + return result; +} + +void TVirtualTimestamp::Out(IOutputStream& o) const { + o << "{ plan_step: " << PlanStep + << ", tx_id: " << TxId + << " }"; +} + +bool TVirtualTimestamp::operator<(const TVirtualTimestamp& rhs) const { + return PlanStep < rhs.PlanStep && TxId < rhs.TxId; +} + +bool TVirtualTimestamp::operator<=(const TVirtualTimestamp& rhs) const { + return PlanStep <= rhs.PlanStep && TxId <= rhs.TxId; +} + +bool TVirtualTimestamp::operator>(const TVirtualTimestamp& rhs) const { + return PlanStep > rhs.PlanStep && TxId > rhs.TxId; +} + +bool TVirtualTimestamp::operator>=(const TVirtualTimestamp& rhs) const { + return PlanStep >= rhs.PlanStep && TxId >= rhs.TxId; +} + +bool TVirtualTimestamp::operator==(const TVirtualTimestamp& rhs) const { + return PlanStep == rhs.PlanStep && TxId == rhs.TxId; +} + +bool TVirtualTimestamp::operator!=(const TVirtualTimestamp& rhs) const { + return !(*this == rhs); +} + static ESchemeEntryType ConvertProtoEntryType(::Ydb::Scheme::Entry::Type entry) { switch (entry) { case ::Ydb::Scheme::Entry::DIRECTORY: @@ -44,6 +90,17 @@ static ESchemeEntryType ConvertProtoEntryType(::Ydb::Scheme::Entry::Type entry) } } +TSchemeEntry::TSchemeEntry(const ::Ydb::Scheme::Entry& proto) + : Name(proto.name()) + , Owner(proto.owner()) + , Type(ConvertProtoEntryType(proto.type())) + , SizeBytes(proto.size_bytes()) + , CreatedAt(proto.created_at()) +{ + PermissionToSchemeEntry(proto.effective_permissions(), &EffectivePermissions); + PermissionToSchemeEntry(proto.permissions(), &Permissions); +} + class TSchemeClient::TImpl : public TClientImplCommon<TSchemeClient::TImpl> { public: TImpl(std::shared_ptr<TGRpcConnectionsImpl>&& connections, const TCommonClientSettings& settings) @@ -79,21 +136,12 @@ public: auto extractor = [promise] (google::protobuf::Any* any, TPlainStatus status) mutable { - TSchemeEntry entry; + DescribePathResult result; if (any) { - DescribePathResult result; any->UnpackTo(&result); - entry.Name = result.self().name(); - entry.Owner = result.self().owner(); - entry.Type = ConvertProtoEntryType(result.self().type()); - entry.SizeBytes = result.self().size_bytes(); - PermissionToSchemeEntry(result.self().effective_permissions(), &entry.EffectivePermissions); - PermissionToSchemeEntry(result.self().permissions(), &entry.Permissions); } - TDescribePathResult val(std::move(entry), - TStatus(std::move(status))); - promise.SetValue(std::move(val)); + promise.SetValue(TDescribePathResult(TStatus(std::move(status)), result.self())); }; Connections_->RunDeferred<Ydb::Scheme::V1::SchemeService, DescribePathRequest, DescribePathResponse>( @@ -116,27 +164,17 @@ public: auto extractor = [promise] (google::protobuf::Any* any, TPlainStatus status) mutable { - TSchemeEntry entry; - TVector<TSchemeEntry> children; + ListDirectoryResult result; if (any) { - ListDirectoryResult result; any->UnpackTo(&result); - entry.Name = result.self().name(); - entry.Owner = result.self().owner(); - entry.Type = ConvertProtoEntryType(result.self().type()); - - for (const auto& child : result.children()) { - TSchemeEntry tmp; - tmp.Name = child.name(); - tmp.Owner = child.owner(); - tmp.Type = ConvertProtoEntryType(child.type()); - children.push_back(tmp); - } } - TListDirectoryResult val(std::move(children), std::move(entry), - TStatus(std::move(status))); - promise.SetValue(std::move(val)); + TVector<TSchemeEntry> children(Reserve(result.children().size())); + for (const auto& child : result.children()) { + children.emplace_back(child); + } + + promise.SetValue(TListDirectoryResult(TStatus(std::move(status)), result.self(), std::move(children))); }; Connections_->RunDeferred<Ydb::Scheme::V1::SchemeService, ListDirectoryRequest, ListDirectoryResponse>( @@ -199,23 +237,24 @@ public: //////////////////////////////////////////////////////////////////////////////// -TDescribePathResult::TDescribePathResult(TSchemeEntry&& entry, TStatus&& status) +TDescribePathResult::TDescribePathResult(TStatus&& status, const TSchemeEntry& entry) : TStatus(std::move(status)) - , Entry_(std::move(entry)) {} + , Entry_(entry) +{} -TSchemeEntry TDescribePathResult::GetEntry() const { +const TSchemeEntry& TDescribePathResult::GetEntry() const { CheckStatusOk("TDescribePathResult::GetEntry"); return Entry_; } //////////////////////////////////////////////////////////////////////////////// -TListDirectoryResult::TListDirectoryResult(TVector<TSchemeEntry>&& children, TSchemeEntry&& self, - TStatus&& status) - : TDescribePathResult(std::move(self), std::move(status)) - , Children_(std::move(children)) {} +TListDirectoryResult::TListDirectoryResult(TStatus&& status, const TSchemeEntry& self, TVector<TSchemeEntry>&& children) + : TDescribePathResult(std::move(status), self) + , Children_(std::move(children)) +{} -TVector<TSchemeEntry> TListDirectoryResult::GetChildren() const { +const TVector<TSchemeEntry>& TListDirectoryResult::GetChildren() const { CheckStatusOk("TListDirectoryResult::GetChildren"); return Children_; } @@ -252,3 +291,7 @@ TAsyncStatus TSchemeClient::ModifyPermissions(const TString& path, } // namespace NScheme } // namespace NYdb + +Y_DECLARE_OUT_SPEC(, NYdb::NScheme::TVirtualTimestamp, o, x) { + return x.Out(o); +} diff --git a/ydb/public/sdk/cpp/client/ydb_scheme/scheme.h b/ydb/public/sdk/cpp/client/ydb_scheme/scheme.h index cde748b88a..be67a8a018 100644 --- a/ydb/public/sdk/cpp/client/ydb_scheme/scheme.h +++ b/ydb/public/sdk/cpp/client/ydb_scheme/scheme.h @@ -2,6 +2,13 @@ #include <ydb/public/sdk/cpp/client/ydb_driver/driver.h> +namespace Ydb { + class VirtualTimestamp; + namespace Scheme { + class Entry; + } +} + namespace NYdb { namespace NScheme { @@ -34,6 +41,25 @@ enum class ESchemeEntryType : i32 { Topic = 17 }; +struct TVirtualTimestamp { + ui64 PlanStep = 0; + ui64 TxId = 0; + + TVirtualTimestamp() = default; + TVirtualTimestamp(ui64 planStep, ui64 txId); + TVirtualTimestamp(const ::Ydb::VirtualTimestamp& proto); + + TString ToString() const; + void Out(IOutputStream& o) const; + + bool operator<(const TVirtualTimestamp& rhs) const; + bool operator<=(const TVirtualTimestamp& rhs) const; + bool operator>(const TVirtualTimestamp& rhs) const; + bool operator>=(const TVirtualTimestamp& rhs) const; + bool operator==(const TVirtualTimestamp& rhs) const; + bool operator!=(const TVirtualTimestamp& rhs) const; +}; + struct TSchemeEntry { TString Name; TString Owner; @@ -41,6 +67,10 @@ struct TSchemeEntry { TVector<TPermissions> EffectivePermissions; TVector<TPermissions> Permissions; ui64 SizeBytes = 0; + TVirtualTimestamp CreatedAt; + + TSchemeEntry() = default; + TSchemeEntry(const ::Ydb::Scheme::Entry& proto); }; //////////////////////////////////////////////////////////////////////////////// @@ -126,8 +156,8 @@ private: class TDescribePathResult : public TStatus { public: - TDescribePathResult(TSchemeEntry&& entry, TStatus&& status); - TSchemeEntry GetEntry() const; + TDescribePathResult(TStatus&& status, const TSchemeEntry& entry); + const TSchemeEntry& GetEntry() const; private: TSchemeEntry Entry_; @@ -135,8 +165,8 @@ private: class TListDirectoryResult : public TDescribePathResult { public: - TListDirectoryResult(TVector<TSchemeEntry>&& children, TSchemeEntry&& self, TStatus&& status); - TVector<TSchemeEntry> GetChildren() const; + TListDirectoryResult(TStatus&& status, const TSchemeEntry& self, TVector<TSchemeEntry>&& children); + const TVector<TSchemeEntry>& GetChildren() const; private: TVector<TSchemeEntry> Children_; diff --git a/ydb/public/sdk/cpp/client/ydb_table/CMakeLists.txt b/ydb/public/sdk/cpp/client/ydb_table/CMakeLists.txt index 29702d2e8b..d333a8dcfc 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/CMakeLists.txt +++ b/ydb/public/sdk/cpp/client/ydb_table/CMakeLists.txt @@ -20,6 +20,7 @@ target_link_libraries(cpp-client-ydb_table PUBLIC cpp-client-ydb_params cpp-client-ydb_proto cpp-client-ydb_result + cpp-client-ydb_scheme client-ydb_table-impl client-ydb_table-query_stats client-ydb_types-operation diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.cpp b/ydb/public/sdk/cpp/client/ydb_table/table.cpp index a79995ba17..3366b29484 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.cpp +++ b/ydb/public/sdk/cpp/client/ydb_table/table.cpp @@ -4067,7 +4067,8 @@ bool TPrepareQueryResult::IsQueryFromCache() const { TExplainQueryResult::TExplainQueryResult(TStatus&& status, TString&& plan, TString&& ast) : TStatus(std::move(status)) , Plan_(plan) - , Ast_(ast) {} + , Ast_(ast) +{} const TString& TExplainQueryResult::GetPlan() const { CheckStatusOk("TExplainQueryResult::GetPlan"); @@ -4083,8 +4084,9 @@ const TString& TExplainQueryResult::GetAst() const { TDescribeTableResult::TDescribeTableResult(TStatus&& status, Ydb::Table::DescribeTableResult&& desc, const TDescribeTableSettings& describeSettings) - : TStatus(std::move(status)) - , TableDescription_(std::move(desc), describeSettings) {} + : NScheme::TDescribePathResult(std::move(status), desc.self()) + , TableDescription_(std::move(desc), describeSettings) +{} TTableDescription TDescribeTableResult::GetTableDescription() const { CheckStatusOk("TDescribeTableResult::GetTableDescription"); diff --git a/ydb/public/sdk/cpp/client/ydb_table/table.h b/ydb/public/sdk/cpp/client/ydb_table/table.h index 54ece32d4c..250ae953f1 100644 --- a/ydb/public/sdk/cpp/client/ydb_table/table.h +++ b/ydb/public/sdk/cpp/client/ydb_table/table.h @@ -3,9 +3,10 @@ #include "table_enum.h" #include <ydb/public/sdk/cpp/client/ydb_driver/driver.h> +#include <ydb/public/sdk/cpp/client/ydb_params/params.h> #include <ydb/public/sdk/cpp/client/ydb_result/result.h> +#include <ydb/public/sdk/cpp/client/ydb_scheme/scheme.h> #include <ydb/public/sdk/cpp/client/ydb_table/query_stats/stats.h> -#include <ydb/public/sdk/cpp/client/ydb_params/params.h> #include <ydb/public/sdk/cpp/client/ydb_types/operation/operation.h> #include <util/generic/hash.h> @@ -435,9 +436,11 @@ public: TVector<TChangefeedDescription> GetChangefeedDescriptions() const; TMaybe<TTtlSettings> GetTtlSettings() const; + // Deprecated. Use GetEntry() of TDescribeTableResult instead const TString& GetOwner() const; const TVector<NScheme::TPermissions>& GetPermissions() const; const TVector<NScheme::TPermissions>& GetEffectivePermissions() const; + const TVector<TKeyRange>& GetKeyRanges() const; // Folow options related to table statistics @@ -1745,7 +1748,7 @@ private: }; //! Represents result of DescribeTable call -class TDescribeTableResult : public TStatus { +class TDescribeTableResult : public NScheme::TDescribePathResult { public: TDescribeTableResult(TStatus&& status, Ydb::Table::DescribeTableResult&& desc, const TDescribeTableSettings& describeSettings); @@ -1754,7 +1757,6 @@ public: private: TTableDescription TableDescription_; - }; class TDataQueryResult : public TStatus { diff --git a/ydb/public/sdk/cpp/client/ydb_topic/impl/topic.cpp b/ydb/public/sdk/cpp/client/ydb_topic/impl/topic.cpp index 028271dadb..fb6204ac23 100644 --- a/ydb/public/sdk/cpp/client/ydb_topic/impl/topic.cpp +++ b/ydb/public/sdk/cpp/client/ydb_topic/impl/topic.cpp @@ -27,6 +27,16 @@ const TTopicDescription& TDescribeTopicResult::GetTopicDescription() const { return TopicDescription_; } +TDescribeConsumerResult::TDescribeConsumerResult(TStatus&& status, Ydb::Topic::DescribeConsumerResult&& result) + : TStatus(std::move(status)) + , ConsumerDescription_(std::move(result)) +{ +} + +const TConsumerDescription& TDescribeConsumerResult::GetConsumerDescription() const { + return ConsumerDescription_; +} + TTopicDescription::TTopicDescription(Ydb::Topic::DescribeTopicResult&& result) : Proto_(std::move(result)) , PartitioningSettings_(Proto_.partitioning_settings()) @@ -54,6 +64,16 @@ TTopicDescription::TTopicDescription(Ydb::Topic::DescribeTopicResult&& result) } } +TConsumerDescription::TConsumerDescription(Ydb::Topic::DescribeConsumerResult&& result) + : Proto_(std::move(result)) + , Consumer_(result.consumer()) +{ + for (const auto& part : Proto_.partitions()) { + Partitions_.emplace_back(part); + } +} + + TConsumer::TConsumer(const Ydb::Topic::Consumer& consumer) : ConsumerName_(consumer.name()) , Important_(consumer.important()) @@ -95,7 +115,11 @@ ui32 TTopicDescription::GetTotalPartitionsCount() const { return Partitions_.size(); } -const TVector<TPartitionInfo>& TTopicDescription::GetPartitions() const { +const TVector<TPartitionInfo>& TTopicDescription::GetPartitions() const { + return Partitions_; +} + +const TVector<TPartitionInfo>& TConsumerDescription::GetPartitions() const { return Partitions_; } @@ -140,6 +164,10 @@ const Ydb::Topic::DescribeTopicResult& TTopicDescription::GetProto() const { return Proto_; } +const Ydb::Topic::DescribeConsumerResult& TConsumerDescription::GetProto() const { + return Proto_; +} + const TString& TTopicDescription::GetOwner() const { return Owner_; } @@ -165,9 +193,50 @@ ui64 TPartitioningSettings::GetPartitionCountLimit() const { return PartitionCountLimit_; } +TPartitionStats::TPartitionStats(const Ydb::Topic::PartitionStats& partitionStats) + : StartOffset_(partitionStats.partition_offsets().start()) + , EndOffset_(partitionStats.partition_offsets().end()) +{} + +ui64 TPartitionStats::GetStartOffset() const { + return StartOffset_; +} + +ui64 TPartitionStats::GetEndOffset() const { + return EndOffset_; +} + +TPartitionConsumerStats::TPartitionConsumerStats(const Ydb::Topic::DescribeConsumerResult::PartitionConsumerStats& partitionStats) + : CommittedOffset_(partitionStats.committed_offset()) +{} + +ui64 TPartitionConsumerStats::GetCommittedOffset() const { + return CommittedOffset_; +} + + + TPartitionInfo::TPartitionInfo(const Ydb::Topic::DescribeTopicResult::PartitionInfo& partitionInfo) : PartitionId_(partitionInfo.partition_id()) , Active_(partitionInfo.active()) + , PartitionStats_() +{ + for (const auto& partId : partitionInfo.child_partition_ids()) { + ChildPartitionIds_.push_back(partId); + } + + for (const auto& partId : partitionInfo.parent_partition_ids()) { + ParentPartitionIds_.push_back(partId); + } + if (partitionInfo.has_partition_stats()) { + PartitionStats_ = TPartitionStats{partitionInfo.partition_stats()}; + } +} + +TPartitionInfo::TPartitionInfo(const Ydb::Topic::DescribeConsumerResult::PartitionInfo& partitionInfo) + : PartitionId_(partitionInfo.partition_id()) + , Active_(partitionInfo.active()) + , PartitionStats_() { for (const auto& partId : partitionInfo.child_partition_ids()) { ChildPartitionIds_.push_back(partId); @@ -176,6 +245,18 @@ TPartitionInfo::TPartitionInfo(const Ydb::Topic::DescribeTopicResult::PartitionI for (const auto& partId : partitionInfo.parent_partition_ids()) { ParentPartitionIds_.push_back(partId); } + if (partitionInfo.has_partition_stats()) { + PartitionStats_ = TPartitionStats{partitionInfo.partition_stats()}; + PartitionConsumerStats_ = TPartitionConsumerStats{partitionInfo.partition_consumer_stats()}; + } +} + +const TMaybe<TPartitionStats>& TPartitionInfo::GetPartitionStats() const { + return PartitionStats_; +} + +const TMaybe<TPartitionConsumerStats>& TPartitionInfo::GetPartitionConsumerStats() const { + return PartitionConsumerStats_; } @@ -196,6 +277,10 @@ TAsyncDescribeTopicResult TTopicClient::DescribeTopic(const TString& path, const return Impl_->DescribeTopic(path, settings); } +TAsyncDescribeConsumerResult TTopicClient::DescribeConsumer(const TString& path, const TString& consumer, const TDescribeConsumerSettings& settings) { + return Impl_->DescribeConsumer(path, consumer, settings); +} + IRetryPolicy::TPtr IRetryPolicy::GetDefaultPolicy() { static IRetryPolicy::TPtr policy = GetExponentialBackoffPolicy(); return policy; diff --git a/ydb/public/sdk/cpp/client/ydb_topic/impl/topic_impl.h b/ydb/public/sdk/cpp/client/ydb_topic/impl/topic_impl.h index 15589ab160..61d059c298 100644 --- a/ydb/public/sdk/cpp/client/ydb_topic/impl/topic_impl.h +++ b/ydb/public/sdk/cpp/client/ydb_topic/impl/topic_impl.h @@ -174,6 +174,10 @@ public: auto request = MakeOperationRequest<Ydb::Topic::DescribeTopicRequest>(settings); request.set_path(path); + if (settings.IncludeStats_) { + request.set_include_stats(true); + } + auto promise = NThreading::NewPromise<TDescribeTopicResult>(); auto extractor = [promise] @@ -199,6 +203,40 @@ public: return promise.GetFuture(); } + TAsyncDescribeConsumerResult DescribeConsumer(const TString& path, const TString& consumer, const TDescribeConsumerSettings& settings) { + auto request = MakeOperationRequest<Ydb::Topic::DescribeConsumerRequest>(settings); + request.set_path(path); + request.set_consumer(consumer); + + if (settings.IncludeStats_) { + request.set_include_stats(true); + } + + auto promise = NThreading::NewPromise<TDescribeConsumerResult>(); + + auto extractor = [promise] + (google::protobuf::Any* any, TPlainStatus status) mutable { + Ydb::Topic::DescribeConsumerResult result; + if (any) { + any->UnpackTo(&result); + } + + TDescribeConsumerResult val(TStatus(std::move(status)), std::move(result)); + promise.SetValue(std::move(val)); + }; + + Connections_->RunDeferred<Ydb::Topic::V1::TopicService, Ydb::Topic::DescribeConsumerRequest, Ydb::Topic::DescribeConsumerResponse>( + std::move(request), + extractor, + &Ydb::Topic::V1::TopicService::Stub::AsyncDescribeConsumer, + DbDriverState_, + INITIAL_DEFERRED_CALL_DELAY, + TRpcRequestSettings::Make(settings), + settings.ClientTimeout_); + + return promise.GetFuture(); + } + // Runtime API. std::shared_ptr<IReadSession> CreateReadSession(const TReadSessionSettings& settings); diff --git a/ydb/public/sdk/cpp/client/ydb_topic/proto_accessor.cpp b/ydb/public/sdk/cpp/client/ydb_topic/proto_accessor.cpp index b0e7353d25..9ae37fc3d0 100644 --- a/ydb/public/sdk/cpp/client/ydb_topic/proto_accessor.cpp +++ b/ydb/public/sdk/cpp/client/ydb_topic/proto_accessor.cpp @@ -5,6 +5,10 @@ namespace NYdb { return topicDescription.GetProto(); } + const Ydb::Topic::DescribeConsumerResult& TProtoAccessor::GetProto(const NTopic::TConsumerDescription& consumerDescription) { + return consumerDescription.GetProto(); + } + Ydb::Topic::MeteringMode TProtoAccessor::GetProto(NTopic::EMeteringMode mode) { switch (mode) { case NTopic::EMeteringMode::Unspecified: diff --git a/ydb/public/sdk/cpp/client/ydb_topic/topic.h b/ydb/public/sdk/cpp/client/ydb_topic/topic.h index 8daa9ca262..96e8af8ee8 100644 --- a/ydb/public/sdk/cpp/client/ydb_topic/topic.h +++ b/ydb/public/sdk/cpp/client/ydb_topic/topic.h @@ -62,19 +62,46 @@ private: TVector<ECodec> SupportedCodecs_; }; +class TPartitionStats { +public: + TPartitionStats(const Ydb::Topic::PartitionStats& partitionStats); + + ui64 GetStartOffset() const; + ui64 GetEndOffset() const; +private: + ui64 StartOffset_; + ui64 EndOffset_; +}; + +class TPartitionConsumerStats { +public: + TPartitionConsumerStats(const Ydb::Topic::DescribeConsumerResult::PartitionConsumerStats& partitionStats); + ui64 GetCommittedOffset() const; + +private: + ui64 CommittedOffset_; +}; + class TPartitionInfo { public: TPartitionInfo(const Ydb::Topic::DescribeTopicResult::PartitionInfo& partitionInfo); + TPartitionInfo(const Ydb::Topic::DescribeConsumerResult::PartitionInfo& partitionInfo); + ui64 GetPartitionId() const; bool GetActive() const; const TVector<ui64> GetChildPartitionIds() const; const TVector<ui64> GetParentPartitionIds() const; + const TMaybe<TPartitionStats>& GetPartitionStats() const; + const TMaybe<TPartitionConsumerStats>& GetPartitionConsumerStats() const; + private: ui64 PartitionId_; bool Active_; TVector<ui64> ChildPartitionIds_; TVector<ui64> ParentPartitionIds_; + TMaybe<TPartitionStats> PartitionStats_; + TMaybe<TPartitionConsumerStats> PartitionConsumerStats_; }; @@ -149,6 +176,27 @@ private: }; +class TConsumerDescription { + friend class NYdb::TProtoAccessor; + +public: + TConsumerDescription(Ydb::Topic::DescribeConsumerResult&& desc); + + const TVector<TPartitionInfo>& GetPartitions() const; + + const TConsumer& GetConsumer() const; + +private: + + const Ydb::Topic::DescribeConsumerResult& GetProto() const; + + + const Ydb::Topic::DescribeConsumerResult Proto_; + TVector<TPartitionInfo> Partitions_; + TConsumer Consumer_; +}; + + // Result for describe resource request. struct TDescribeTopicResult : public TStatus { friend class NYdb::TProtoAccessor; @@ -162,7 +210,22 @@ private: TTopicDescription TopicDescription_; }; +// Result for describe resource request. +struct TDescribeConsumerResult : public TStatus { + friend class NYdb::TProtoAccessor; + + + TDescribeConsumerResult(TStatus&& status, Ydb::Topic::DescribeConsumerResult&& result); + + const TConsumerDescription& GetConsumerDescription() const; + +private: + TConsumerDescription ConsumerDescription_; +}; + + using TAsyncDescribeTopicResult = NThreading::TFuture<TDescribeTopicResult>; +using TAsyncDescribeConsumerResult = NThreading::TFuture<TDescribeConsumerResult>; template <class TSettings> class TAlterAttributesBuilderImpl { @@ -418,7 +481,14 @@ struct TAlterTopicSettings : public TOperationRequestSettings<TAlterTopicSetting struct TDropTopicSettings : public TOperationRequestSettings<TDropTopicSettings> {}; // Settings for describe resource request. -struct TDescribeTopicSettings : public TOperationRequestSettings<TDescribeTopicSettings> {}; +struct TDescribeTopicSettings : public TOperationRequestSettings<TDescribeTopicSettings> { + FLUENT_SETTING_DEFAULT(bool, IncludeStats, false); +}; + +// Settings for describe resource request. +struct TDescribeConsumerSettings : public TOperationRequestSettings<TDescribeConsumerSettings> { + FLUENT_SETTING_DEFAULT(bool, IncludeStats, false); +}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1191,6 +1261,9 @@ public: // Describe settings of topic. TAsyncDescribeTopicResult DescribeTopic(const TString& path, const TDescribeTopicSettings& = {}); + // Describe settings of topic's consumer. + TAsyncDescribeConsumerResult DescribeConsumer(const TString& path, const TString& consumer, const TDescribeConsumerSettings& = {}); + //! Create read session. std::shared_ptr<IReadSession> CreateReadSession(const TReadSessionSettings& settings); diff --git a/ydb/services/datastreams/datastreams_proxy.cpp b/ydb/services/datastreams/datastreams_proxy.cpp index 95e1edadd0..b6af95629e 100644 --- a/ydb/services/datastreams/datastreams_proxy.cpp +++ b/ydb/services/datastreams/datastreams_proxy.cpp @@ -1161,12 +1161,12 @@ namespace NKikimr::NDataStreams::V1 { using TIteratorType = Ydb::DataStreams::V1::ShardIteratorType; void SendResponse(const TActorContext& ctx, const TShardIterator& shardIt); - std::optional<ui32> SequenceNumberToInt(const TString& sequenceNumberStr); + std::optional<ui64> SequenceNumberToInt(const TString& sequenceNumberStr); TString StreamName; TString ShardId; TIteratorType IteratorType; - ui32 SequenceNumber; + ui64 SequenceNumber; ui64 ReadTimestampMs; }; @@ -1273,9 +1273,9 @@ namespace NKikimr::NDataStreams::V1 { Die(ctx); } - std::optional<ui32> TGetShardIteratorActor::SequenceNumberToInt(const TString& sequenceNumberStr) { + std::optional<ui64> TGetShardIteratorActor::SequenceNumberToInt(const TString& sequenceNumberStr) { try { - return std::stoi(sequenceNumberStr.c_str()); + return std::stoull(sequenceNumberStr.c_str()); } catch(...) { return std::nullopt; } @@ -1309,7 +1309,6 @@ namespace NKikimr::NDataStreams::V1 { private: void SendReadRequest(const TActorContext& ctx); - void PrepareResponse(const std::vector<Ydb::DataStreams::V1::Record>& records, ui64 millisBehindLatestMs); void SendResponse(const TActorContext& ctx); ui64 GetPayloadSize() const; @@ -1435,7 +1434,9 @@ namespace NKikimr::NDataStreams::V1 { switch (record.GetErrorCode()) { case NPersQueue::NErrorCode::READ_ERROR_TOO_SMALL_OFFSET: case NPersQueue::NErrorCode::READ_ERROR_TOO_BIG_OFFSET: - PrepareResponse({}, 0); + Result.set_next_shard_iterator(TShardIterator(ShardIterator).Serialize()); + Result.set_millis_behind_latest(0); + if (IsQuotaRequired()) { Y_VERIFY(MaybeRequestQuota(1, EWakeupTag::RlAllowed, ctx)); } else { @@ -1451,28 +1452,34 @@ namespace NKikimr::NDataStreams::V1 { default: {} } - ui64 millisBehindLatestMs = 0; - std::vector<Ydb::DataStreams::V1::Record> records; + TShardIterator shardIterator(ShardIterator); const auto& response = record.GetPartitionResponse(); if (response.HasCmdReadResult()) { const auto& results = response.GetCmdReadResult().GetResult(); - records.reserve(results.size()); for (auto& r : results) { auto proto(NKikimr::GetDeserializedData(r.GetData())); - Ydb::DataStreams::V1::Record record; - record.set_data(proto.GetData()); - record.set_timestamp(r.GetCreateTimestampMS()); - record.set_encryption(Ydb::DataStreams::V1::EncryptionType::NONE); - record.set_partition_key(r.GetPartitionKey()); - record.set_sequence_number(std::to_string(r.GetOffset()).c_str()); - records.push_back(record); + auto record = Result.add_records(); + record->set_data(proto.GetData()); + record->set_approximate_arrival_timestamp(r.GetCreateTimestampMS()); + record->set_encryption_type(Ydb::DataStreams::V1::EncryptionType::NONE); + record->set_partition_key(r.GetPartitionKey()); + record->set_sequence_number(std::to_string(r.GetOffset()).c_str()); + if (proto.GetCodec() > 0) { + record->set_codec(proto.GetCodec() + 1); + } + } + if (!results.empty()) { + auto last = results.rbegin(); + shardIterator.SetReadTimestamp(last->GetCreateTimestampMS() + 1); + shardIterator.SetSequenceNumber(last->GetOffset() + 1); + Result.set_millis_behind_latest(TInstant::Now().MilliSeconds() - last->GetWriteTimestampMS()); + } else { // remove else? + Result.set_millis_behind_latest(0); } - millisBehindLatestMs = records.size() > 0 - ? TInstant::Now().MilliSeconds() - results.rbegin()->GetWriteTimestampMS() - : 0; } - PrepareResponse(records, millisBehindLatestMs); + Result.set_next_shard_iterator(shardIterator.Serialize()); + if (IsQuotaRequired()) { const auto ru = 1 + CalcRuConsumption(GetPayloadSize()); Y_VERIFY(MaybeRequestQuota(ru, EWakeupTag::RlAllowed, ctx)); @@ -1504,24 +1511,6 @@ namespace NKikimr::NDataStreams::V1 { } } - void TGetRecordsActor::PrepareResponse(const std::vector<Ydb::DataStreams::V1::Record>& records, ui64 millisBehindLatestMs) { - for (auto& r : records) { - auto record = Result.add_records(); - *record = r; - } - - auto timestamp = records.size() > 0 ? records.back().Gettimestamp() + 1 - : ShardIterator.GetReadTimestamp(); - auto seqNo = records.size() > 0 ? std::stoi(records.back().Getsequence_number()) + 1 - : ShardIterator.GetSequenceNumber(); - TShardIterator shardIterator(ShardIterator.GetStreamName(), - ShardIterator.GetStreamArn(), - ShardIterator.GetShardId(), - timestamp, seqNo, ShardIterator.GetKind()); - Result.set_next_shard_iterator(shardIterator.Serialize()); - Result.set_millis_behind_latest(millisBehindLatestMs); - } - void TGetRecordsActor::SendResponse(const TActorContext& ctx) { Request_->SendResult(Result, Ydb::StatusIds::SUCCESS); Die(ctx); diff --git a/ydb/services/datastreams/datastreams_ut.cpp b/ydb/services/datastreams/datastreams_ut.cpp index 455b296a8d..5e6e8933ab 100644 --- a/ydb/services/datastreams/datastreams_ut.cpp +++ b/ydb/services/datastreams/datastreams_ut.cpp @@ -2569,4 +2569,50 @@ Y_UNIT_TEST_SUITE(DataStreams) { UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); } } + + Y_UNIT_TEST(TestGetRecordsWithBigSeqno) { + TInsecureDatastreamsTestServer testServer; + const TString streamName = TStringBuilder() << "stream_" << Y_UNIT_TEST_NAME; + { + auto result = testServer.DataStreamsClient->CreateStream(streamName, + NYDS_V1::TCreateStreamSettings().ShardCount(1)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + std::vector<NYDS_V1::TDataRecord> records; + records.push_back({ + .Data = "overflow", + .PartitionKey = "overflow", + .ExplicitHashDecimal = "", + }); + + { + auto result = testServer.DataStreamsClient->PutRecords(streamName, records).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + if (result.GetStatus() != EStatus::SUCCESS) { + result.GetIssues().PrintTo(Cerr); + } + } + + TString shardIterator; + { + auto seqNo = std::to_string(static_cast<ui64>(std::numeric_limits<ui32>::max()) + 5); + auto result = testServer.DataStreamsClient->GetShardIterator( + streamName, + "shard-000000", + YDS_V1::ShardIteratorType::LATEST, + NYDS_V1::TGetShardIteratorSettings().StartingSequenceNumber(seqNo.c_str()) + ).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + shardIterator = result.GetResult().shard_iterator(); + } + + { + auto result = testServer.DataStreamsClient->GetRecords(shardIterator).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResult().records().size(), 0); + } + } } diff --git a/ydb/services/datastreams/next_token.h b/ydb/services/datastreams/next_token.h index 1fe61997bc..46ee2797ec 100644 --- a/ydb/services/datastreams/next_token.h +++ b/ydb/services/datastreams/next_token.h @@ -6,77 +6,76 @@ namespace NKikimr::NDataStreams::V1 { - class TNextToken { - public: - static constexpr ui32 LIFETIME_MS = 300*1000; - - TNextToken(const TString& nextToken) - : Valid{true} - { - try { - TString decoded; - Base64Decode(nextToken, decoded); - auto ok = Proto.ParseFromString(decoded); - if (ok) { - Valid = IsAlive(TInstant::Now().MilliSeconds()); - } else { - Valid = false; - } - } catch (std::exception&) { - Valid = false; - } - } - - TNextToken(const TString& streamArn, ui32 alreadyRead, ui32 maxResults, ui64 creationTimestamp) - : Valid{true} - { - Proto.SetStreamArn(streamArn); - Proto.SetAlreadyRead(alreadyRead); - Proto.SetMaxResults(maxResults); - Proto.SetCreationTimestamp(creationTimestamp); - } - - TString Serialize() const { - TString data; - bool result = Proto.SerializeToString(&data); - Y_VERIFY(result); - TString encoded; - Base64Encode(data, encoded); - return encoded; - } - - ui32 GetAlreadyRead() const { - return Proto.GetAlreadyRead(); - } - - TString GetStreamArn() const { - return Proto.GetStreamArn(); - } - - TString GetStreamName() const { - return Proto.GetStreamArn(); - } - - ui32 GetMaxResults() const { - return Proto.GetMaxResults(); - } - - ui32 GetCreationTimestamp() const { - return Proto.GetCreationTimestamp(); - } - - bool IsAlive(ui64 now) const { - return now >= Proto.GetCreationTimestamp() && - (now - Proto.GetCreationTimestamp()) < LIFETIME_MS; - } - - bool IsValid() const { - return Valid && Proto.GetStreamArn().size() > 0; - } - - private: - bool Valid; - NKikimrPQ::TYdsNextToken Proto; - }; +class TNextToken { +public: +static constexpr ui64 LIFETIME_MS = TDuration::Minutes(5).MilliSeconds(); + +TNextToken(const TString& nextToken): Expired{false}, Valid{true} +{ + try { + TString decoded; + Base64StrictDecode(nextToken, decoded); + Valid = Proto.ParseFromString(decoded) && IsAlive(TInstant::Now().MilliSeconds()); + Expired = !IsAlive(TInstant::Now().MilliSeconds()); + } catch (std::exception&) { + Valid = false; + } +} + +TNextToken(const TString& streamArn, ui32 alreadyRead, ui32 maxResults, ui64 creationTimestamp) +: Expired{false}, Valid{true} { + Proto.SetStreamArn(streamArn); + Proto.SetAlreadyRead(alreadyRead); + Proto.SetMaxResults(maxResults); + Proto.SetCreationTimestamp(creationTimestamp); +} + +TString Serialize() const { + TString data; + bool result = Proto.SerializeToString(&data); + Y_VERIFY(result); + TString encoded; + Base64Encode(data, encoded); + return encoded; +} + +ui32 GetAlreadyRead() const { + return Proto.GetAlreadyRead(); +} + +TString GetStreamArn() const { + return Proto.GetStreamArn(); +} + +TString GetStreamName() const { + return Proto.GetStreamArn(); +} + +ui32 GetMaxResults() const { + return Proto.GetMaxResults(); +} + +ui64 GetCreationTimestamp() const { + return Proto.GetCreationTimestamp(); +} + +bool IsAlive(ui64 now) const { + return now >= GetCreationTimestamp() && + (now - GetCreationTimestamp()) < LIFETIME_MS; +} + +bool IsExpired() const { + return Expired; +} + +bool IsValid() const { + return Valid && GetStreamArn().size() > 0; +} + +private: +bool Expired; +bool Valid; +NKikimrPQ::TYdsNextToken Proto; +}; } // namespace NKikimr::NDataStreams::V1 diff --git a/ydb/services/datastreams/shard_iterator.h b/ydb/services/datastreams/shard_iterator.h index 626f5110b7..e426bba750 100644 --- a/ydb/services/datastreams/shard_iterator.h +++ b/ydb/services/datastreams/shard_iterator.h @@ -1,98 +1,141 @@ #pragma once #include <ydb/core/protos/pqconfig.pb.h> +#include <ydb/core/protos/msgbus_pq.pb.h> #include <library/cpp/string_utils/base64/base64.h> #include <util/datetime/base.h> namespace NKikimr::NDataStreams::V1 { - class TShardIterator { - public: - static constexpr ui64 LIFETIME_MS = 5*60*1000; - - TShardIterator(const TString& iteratorStr) { - try { - TString decoded; - Base64Decode(iteratorStr, decoded); - Valid = Proto.ParseFromString(decoded) && IsAlive(TInstant::Now().MilliSeconds()); - } catch (std::exception&) { - Valid = false; - } - } - - TShardIterator(const TString& streamName, const TString& streamArn, - ui32 shardId, ui64 readTimestamp, ui32 sequenceNumber, - NKikimrPQ::TYdsShardIterator::ETopicKind kind = NKikimrPQ::TYdsShardIterator::KIND_COMMON) { - Proto.SetStreamName(streamName); - Proto.SetStreamArn(streamArn); - Proto.SetShardId(shardId); - Proto.SetReadTimestampMs(readTimestamp); - Proto.SetSequenceNumber(sequenceNumber); - Proto.SetCreationTimestampMs(TInstant::Now().MilliSeconds()); - Proto.SetKind(kind); - Valid = true; - } - - static TShardIterator Common(const TString& streamName, const TString& streamArn, - ui32 shardId, ui64 readTimestamp, ui32 sequenceNumber) { - return TShardIterator(streamName, streamArn, shardId, readTimestamp, sequenceNumber); - } - - static TShardIterator Cdc(const TString& streamName, const TString& streamArn, - ui32 shardId, ui64 readTimestamp, ui32 sequenceNumber) { - return TShardIterator(streamName, streamArn, shardId, readTimestamp, sequenceNumber, - NKikimrPQ::TYdsShardIterator::KIND_CDC); - } - - TString Serialize() const { - TString data; - bool result = Proto.SerializeToString(&data); - Y_VERIFY(result); - TString encoded; - Base64Encode(data, encoded); - return encoded; - } - - TString GetStreamName() const { - return Proto.GetStreamName(); - } - - TString GetStreamArn() const { - return Proto.GetStreamArn(); - } - - ui32 GetShardId() const { - return Proto.GetShardId(); - } - - ui64 GetReadTimestamp() const { - return Proto.GetReadTimestampMs(); - } - - ui32 GetSequenceNumber() const { - return Proto.GetSequenceNumber(); - } - - bool IsAlive(ui64 now) const { - return now >= Proto.GetCreationTimestampMs() && now - - Proto.GetCreationTimestampMs() < LIFETIME_MS; - } - - NKikimrPQ::TYdsShardIterator::ETopicKind GetKind() const { - return Proto.GetKind(); - } - - bool IsCdcTopic() const { - return Proto.GetKind() == NKikimrPQ::TYdsShardIterator::KIND_CDC; - } - - bool IsValid() const { - return Valid; - } - - private: - bool Valid; - NKikimrPQ::TYdsShardIterator Proto; - }; +class TShardIterator { +using TPartitionOffset = + std::invoke_result_t<decltype(&NKikimrClient::TCmdReadResult_TResult::GetOffset), + NKikimrClient::TCmdReadResult_TResult>; +using TYdsSeqNo = + std::invoke_result_t<decltype(&NKikimrPQ::TYdsShardIterator::GetSequenceNumber), + NKikimrPQ::TYdsShardIterator>; +static_assert(std::is_same<TPartitionOffset, TYdsSeqNo>::value, + "Types of partition message offset and yds record sequence number should match"); + +using TCreationTimestamp = + std::invoke_result_t<decltype(&NKikimrClient::TCmdReadResult_TResult::GetCreateTimestampMS), + NKikimrClient::TCmdReadResult_TResult>; +using TYdsTimestamp = + std::invoke_result_t<decltype(&NKikimrPQ::TYdsShardIterator::GetReadTimestampMs), + NKikimrPQ::TYdsShardIterator>; +static_assert(std::is_same<TCreationTimestamp, TYdsTimestamp>::value, + "Types of partition message creation timestamp and yds record timestamp should match"); + +public: +static constexpr ui64 LIFETIME_MS = TDuration::Minutes(5).MilliSeconds(); + +TShardIterator(const TString& iteratorStr) : Expired{false}, Valid{true} { + try { + TString decoded; + Base64StrictDecode(iteratorStr, decoded); + Valid = Proto.ParseFromString(decoded) && IsAlive(TInstant::Now().MilliSeconds()); + Expired = !IsAlive(TInstant::Now().MilliSeconds()); + } catch (std::exception&) { + Valid = false; + } +} + +TShardIterator(const TString& streamName, const TString& streamArn, + ui32 shardId, ui64 readTimestamp, ui64 sequenceNumber, + NKikimrPQ::TYdsShardIterator::ETopicKind kind = NKikimrPQ::TYdsShardIterator::KIND_COMMON) + : Expired{false}, Valid{true} { + Proto.SetStreamName(streamName); + Proto.SetStreamArn(streamArn); + Proto.SetShardId(shardId); + Proto.SetReadTimestampMs(readTimestamp); + Proto.SetSequenceNumber(sequenceNumber); + Proto.SetCreationTimestampMs(TInstant::Now().MilliSeconds()); + Proto.SetKind(kind); +} + +TShardIterator(const TShardIterator& other) : TShardIterator( + other.GetStreamName(), + other.GetStreamArn(), + other.GetShardId(), + other.GetReadTimestamp(), + other.GetSequenceNumber(), + other.GetKind() +) {} + + +static TShardIterator Common(const TString& streamName, const TString& streamArn, + ui32 shardId, ui64 readTimestamp, ui64 sequenceNumber) { + return TShardIterator(streamName, streamArn, shardId, readTimestamp, sequenceNumber); +} + +static TShardIterator Cdc(const TString& streamName, const TString& streamArn, + ui32 shardId, ui64 readTimestamp, ui64 sequenceNumber) { + return TShardIterator(streamName, streamArn, shardId, readTimestamp, sequenceNumber, + NKikimrPQ::TYdsShardIterator::KIND_CDC); +} + +TString Serialize() const { + TString data; + bool result = Proto.SerializeToString(&data); + Y_VERIFY(result); + TString encoded; + Base64Encode(data, encoded); + return encoded; +} + +TString GetStreamName() const { + return Proto.GetStreamName(); +} + +TString GetStreamArn() const { + return Proto.GetStreamArn(); +} + +ui32 GetShardId() const { + return Proto.GetShardId(); +} + +ui64 GetReadTimestamp() const { + return Proto.GetReadTimestampMs(); +} + +void SetReadTimestamp(ui64 ts) { + Proto.SetReadTimestampMs(ts); +} + +ui64 GetSequenceNumber() const { + return Proto.GetSequenceNumber(); +} + +void SetSequenceNumber(ui64 seqno) { + Proto.SetSequenceNumber(seqno); +} + +bool IsAlive(ui64 now) const { + return now >= Proto.GetCreationTimestampMs() && now - + Proto.GetCreationTimestampMs() < LIFETIME_MS; +} + +NKikimrPQ::TYdsShardIterator::ETopicKind GetKind() const { + return Proto.GetKind(); +} + +bool IsCdcTopic() const { + return Proto.GetKind() == NKikimrPQ::TYdsShardIterator::KIND_CDC; +} + +bool IsValid() const { + return Valid; +} + +bool IsExpired() const { + return Expired; +} + +private: +bool Expired; +bool Valid; +NKikimrPQ::TYdsShardIterator Proto; +}; } // namespace NKikimr::NDataStreams::V1 diff --git a/ydb/services/lib/actors/pq_schema_actor.cpp b/ydb/services/lib/actors/pq_schema_actor.cpp index b201f9d66f..5c24df2747 100644 --- a/ydb/services/lib/actors/pq_schema_actor.cpp +++ b/ydb/services/lib/actors/pq_schema_actor.cpp @@ -638,11 +638,6 @@ namespace NKikimr::NGRpcProxy::V1 { switch (settings.retention_case()) { case Ydb::PersQueue::V1::TopicSettings::kRetentionPeriodMs: { - if (settings.retention_period_ms() <= 0) { - error = TStringBuilder() << "retention_period_ms must be positive, provided " << - settings.retention_period_ms(); - return Ydb::StatusIds::BAD_REQUEST; - } partConfig->SetLifetimeSeconds(Max(settings.retention_period_ms() / 1000ll, 1ll)); } break; @@ -956,11 +951,6 @@ namespace NKikimr::NGRpcProxy::V1 { partConfig->MutableExplicitChannelProfiles()->CopyFrom(channelProfiles); } if (request.has_retention_period()) { - if (request.retention_period().seconds() <= 0) { - error = TStringBuilder() << "retention_period must be not negative, provided " << - request.retention_period().DebugString(); - return Ydb::StatusIds::BAD_REQUEST; - } partConfig->SetLifetimeSeconds(request.retention_period().seconds()); } else { partConfig->SetLifetimeSeconds(TDuration::Days(1).Seconds()); @@ -1060,16 +1050,7 @@ namespace NKikimr::NGRpcProxy::V1 { if (request.has_set_retention_period()) { CHECK_CDC; - if (request.set_retention_period().seconds() < 0) { - error = TStringBuilder() << "retention_period must be not negative, provided " << - request.set_retention_period().DebugString(); - return Ydb::StatusIds::BAD_REQUEST; - } - if (request.set_retention_period().seconds() > 0) { - partConfig->SetLifetimeSeconds(request.set_retention_period().seconds()); - } else { - partConfig->SetLifetimeSeconds(TDuration::Days(1).Seconds()); - } + partConfig->SetLifetimeSeconds(request.set_retention_period().seconds()); } diff --git a/ydb/services/lib/actors/pq_schema_actor.h b/ydb/services/lib/actors/pq_schema_actor.h index b3da17970f..08fe0a71db 100644 --- a/ydb/services/lib/actors/pq_schema_actor.h +++ b/ydb/services/lib/actors/pq_schema_actor.h @@ -185,7 +185,7 @@ namespace NKikimr::NGRpcProxy::V1 { NSchemeCache::TSchemeCacheNavigate::KindTopic) { this->Request_->RaiseIssue( FillIssue( - TStringBuilder() << "path '" << path << "' is not a stream", + TStringBuilder() << "path '" << path << "' is not a topic", Ydb::PersQueue::ErrorCode::ERROR ) ); diff --git a/ydb/services/lib/actors/type_definitions.h b/ydb/services/lib/actors/type_definitions.h index 89e0168a71..9db7e6e686 100644 --- a/ydb/services/lib/actors/type_definitions.h +++ b/ydb/services/lib/actors/type_definitions.h @@ -1,43 +1,59 @@ #pragma once #include <ydb/library/persqueue/topic_parser/topic_parser.h> + #include <library/cpp/actors/core/actor.h> -#include <library/cpp/actors/core/event_local.h> +#include <util/generic/hash.h> +#include <util/generic/map.h> +#include <util/generic/maybe.h> +#include <util/generic/vector.h> namespace NKikimr::NGRpcProxy { - struct TTopicHolder { - ui64 TabletID; - TActorId PipeClient; - bool ACLRequestInfly; - TString CloudId; - TString DbId; - TString FolderId; - NKikimrPQ::TPQTabletConfig::EMeteringMode MeteringMode; - NPersQueue::TDiscoveryConverterPtr DiscoveryConverter; - NPersQueue::TTopicConverterPtr FullConverter; - TMaybe<TString> CdcStreamPath; - - TVector<ui32> Groups; - TMap<ui64, ui64> Partitions; - - TTopicHolder() - : TabletID(0) - , PipeClient() - , ACLRequestInfly(false) - {} - }; - - struct TTopicInitInfo { - NPersQueue::TTopicConverterPtr TopicNameConverter; - ui64 TabletID; - TString CloudId; - TString DbId; - TString FolderId; - NKikimrPQ::TPQTabletConfig::EMeteringMode MeteringMode; - }; - - using TTopicInitInfoMap = THashMap<TString, TTopicInitInfo>; - -} // namespace NKikimr::NGRpcProxy +struct TTopicInitInfo { + NPersQueue::TTopicConverterPtr TopicNameConverter; + ui64 TabletID; + TString CloudId; + TString DbId; + TString DbPath; + bool IsServerless = false; + TString FolderId; + NKikimrPQ::TPQTabletConfig::EMeteringMode MeteringMode; +}; + +using TTopicInitInfoMap = THashMap<TString, TTopicInitInfo>; + +struct TTopicHolder { + ui64 TabletID = 0; + TActorId PipeClient; + bool ACLRequestInfly = false; + TString CloudId; + TString DbId; + TString DbPath; + bool IsServerless; + TString FolderId; + NKikimrPQ::TPQTabletConfig::EMeteringMode MeteringMode; + NPersQueue::TDiscoveryConverterPtr DiscoveryConverter; + NPersQueue::TTopicConverterPtr FullConverter; + TMaybe<TString> CdcStreamPath; + + TVector<ui32> Groups; + TMap<ui64, ui64> Partitions; + + inline static TTopicHolder FromTopicInfo(const TTopicInitInfo& info) { + return TTopicHolder{ + .TabletID = info.TabletID, + .ACLRequestInfly = false, + .CloudId = info.CloudId, + .DbId = info.DbId, + .DbPath = info.DbPath, + .IsServerless = info.IsServerless, + .FolderId = info.FolderId, + .MeteringMode = info.MeteringMode, + .FullConverter = info.TopicNameConverter, + }; + } +}; + +} // namespace NKikimr::NGRpcProxy diff --git a/ydb/services/persqueue_v1/actors/CMakeLists.txt b/ydb/services/persqueue_v1/actors/CMakeLists.txt index 39e69eccf9..213b3a8c42 100644 --- a/ydb/services/persqueue_v1/actors/CMakeLists.txt +++ b/ydb/services/persqueue_v1/actors/CMakeLists.txt @@ -16,6 +16,7 @@ target_link_libraries(services-persqueue_v1-actors PUBLIC ydb-core-base ydb-core-grpc_services ydb-core-persqueue + core-persqueue-events ydb-core-protos ydb-core-scheme core-tx-scheme_cache diff --git a/ydb/services/persqueue_v1/actors/events.h b/ydb/services/persqueue_v1/actors/events.h index f3d685fd43..536714d08c 100644 --- a/ydb/services/persqueue_v1/actors/events.h +++ b/ydb/services/persqueue_v1/actors/events.h @@ -62,6 +62,7 @@ struct TEvPQProxy { EvUpdateToken, EvTopicUpdateToken, EvCommitRange, + EvRequestTablet, EvEnd }; @@ -435,6 +436,13 @@ struct TEvPQProxy { ui64 WriteTimestampEstimateMs; bool Init; }; + struct TEvRequestTablet : public NActors::TEventLocal<TEvRequestTablet, EvRequestTablet> { + TEvRequestTablet(const ui64 tabletId) + : TabletId(tabletId) + { } + + ui64 TabletId; + }; }; } diff --git a/ydb/services/persqueue_v1/actors/partition_actor.cpp b/ydb/services/persqueue_v1/actors/partition_actor.cpp index 7267c41839..d1b18bf5a2 100644 --- a/ydb/services/persqueue_v1/actors/partition_actor.cpp +++ b/ydb/services/persqueue_v1/actors/partition_actor.cpp @@ -867,7 +867,9 @@ void TPartitionActor::Handle(TEvPersQueue::TEvHasDataInfoResponse::TPtr& ev, con if (!WaitForData) return; - Counters.WaitsForData.Inc(); + if (Counters.WaitsForData) { + Counters.WaitsForData.Inc(); + } Y_VERIFY(record.HasEndOffset()); Y_VERIFY(EndOffset <= record.GetEndOffset()); //end offset could not be changed if no data arrived, but signal will be sended anyway after timeout diff --git a/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp b/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp index 34e6522250..587dcf9042 100644 --- a/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp +++ b/ydb/services/persqueue_v1/actors/read_init_auth_actor.cpp @@ -98,6 +98,9 @@ bool TReadInitAndAuthActor::ProcessTopicSchemeCacheResponse( topicsIter->second.DbId = pqDescr.GetPQTabletConfig().GetYdbDatabaseId(); topicsIter->second.FolderId = pqDescr.GetPQTabletConfig().GetYcFolderId(); topicsIter->second.MeteringMode = pqDescr.GetPQTabletConfig().GetMeteringMode(); + topicsIter->second.DbPath = pqDescr.GetPQTabletConfig().GetYdbDatabasePath(); + topicsIter->second.IsServerless = entry.DomainInfo->IsServerless(); + if (!topicsIter->second.DiscoveryConverter->IsValid()) { TString errorReason = Sprintf("Internal server error with topic '%s', Marker# PQ503", topicsIter->second.DiscoveryConverter->GetPrintableString().c_str()); @@ -262,7 +265,7 @@ void TReadInitAndAuthActor::FinishInitialization(const TActorContext& ctx) { TTopicInitInfoMap res; for (auto& [name, holder] : Topics) { res.insert(std::make_pair(name, TTopicInitInfo{ - holder.FullConverter, holder.TabletID, holder.CloudId, holder.DbId, holder.FolderId, holder.MeteringMode + holder.FullConverter, holder.TabletID, holder.CloudId, holder.DbId, holder.DbPath, holder.IsServerless, holder.FolderId, holder.MeteringMode })); } ctx.Send(ParentId, new TEvPQProxy::TEvAuthResultOk(std::move(res))); diff --git a/ydb/services/persqueue_v1/actors/read_session_actor.h b/ydb/services/persqueue_v1/actors/read_session_actor.h index d9e048394d..8dde6b616f 100644 --- a/ydb/services/persqueue_v1/actors/read_session_actor.h +++ b/ydb/services/persqueue_v1/actors/read_session_actor.h @@ -4,20 +4,19 @@ #include "partition_actor.h" #include "persqueue_utils.h" -#include <library/cpp/actors/core/actor_bootstrapped.h> -#include <library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h> - #include <ydb/core/base/tablet_pipe.h> #include <ydb/core/grpc_services/grpc_request_proxy.h> #include <ydb/core/persqueue/events/global.h> #include <ydb/services/lib/actors/pq_rl_helpers.h> +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/containers/disjoint_interval_tree/disjoint_interval_tree.h> + #include <util/generic/guid.h> #include <util/system/compiler.h> #include <type_traits> - namespace NKikimr::NGRpcProxy::V1 { inline TActorId GetPQReadServiceActorID() { @@ -25,8 +24,9 @@ inline TActorId GetPQReadServiceActorID() { } struct TPartitionActorInfo { - TActorId Actor; + const TActorId Actor; const TPartitionId Partition; + NPersQueue::TTopicConverterPtr Topic; std::deque<ui64> Commits; bool Reading; bool Releasing; @@ -38,17 +38,18 @@ struct TPartitionActorInfo { ui64 ReadIdCommitted; TSet<ui64> NextCommits; TDisjointIntervalTree<ui64> NextRanges; - ui64 Offset; TInstant AssignTimestamp; - NPersQueue::TTopicConverterPtr Topic; - - TPartitionActorInfo(const TActorId& actor, const TPartitionId& partition, - const NPersQueue::TTopicConverterPtr& topic, const TActorContext& ctx) + explicit TPartitionActorInfo( + const TActorId& actor, + const TPartitionId& partition, + const NPersQueue::TTopicConverterPtr& topic, + const TInstant& timestamp) : Actor(actor) , Partition(partition) + , Topic(topic) , Reading(false) , Releasing(false) , Released(false) @@ -57,11 +58,9 @@ struct TPartitionActorInfo { , ReadIdToResponse(1) , ReadIdCommitted(0) , Offset(0) - , AssignTimestamp(ctx.Now()) - , Topic(topic) - { } - - void MakeCommit(const TActorContext& ctx); + , AssignTimestamp(timestamp) + { + } }; struct TPartitionInfo { @@ -69,6 +68,15 @@ struct TPartitionInfo { ui64 WTime; ui64 SizeLag; ui64 MsgLag; + + explicit TPartitionInfo(ui64 assignId, ui64 wTime, ui64 sizeLag, ui64 msgLag) + : AssignId(assignId) + , WTime(wTime) + , SizeLag(sizeLag) + , MsgLag(msgLag) + { + } + bool operator < (const TPartitionInfo& rhs) const { return std::tie(WTime, AssignId) < std::tie(rhs.WTime, rhs.AssignId); } @@ -94,15 +102,15 @@ struct TFormedReadResponse: public TSimpleRefCount<TFormedReadResponse<TServerMe i64 ByteSizeBeforeFiltering = 0; ui64 RequiredQuota = 0; - //returns byteSize diff + // returns byteSize diff i64 ApplyResponse(TServerMessage&& resp); THashSet<TActorId> PartitionsTookPartInRead; TSet<TPartitionId> PartitionsTookPartInControlMessages; - TSet<TPartitionInfo> PartitionsBecameAvailable; // Partitions that became available during this read request execution. - - // These partitions are bringed back to AvailablePartitions after reply to this read request. + // Partitions that became available during this read request execution. + // These partitions are bringed back to AvailablePartitions after reply to this read request. + TSet<TPartitionInfo> PartitionsBecameAvailable; const TString Guid; TInstant Start; @@ -110,23 +118,37 @@ struct TFormedReadResponse: public TSimpleRefCount<TFormedReadResponse<TServerMe TDuration WaitQuotaTime; }; - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> // Migration protocol is "pqv1" class TReadSessionActor : public TActorBootstrapped<TReadSessionActor<UseMigrationProtocol>> , private TRlHelpers { - using TClientMessage = typename std::conditional_t<UseMigrationProtocol, PersQueue::V1::MigrationStreamingReadClientMessage, Topic::StreamReadMessage::FromClient>; - using TServerMessage = typename std::conditional_t<UseMigrationProtocol, PersQueue::V1::MigrationStreamingReadServerMessage, Topic::StreamReadMessage::FromServer>; + using TClientMessage = typename std::conditional_t<UseMigrationProtocol, + PersQueue::V1::MigrationStreamingReadClientMessage, + Topic::StreamReadMessage::FromClient>; + + using TServerMessage = typename std::conditional_t<UseMigrationProtocol, + PersQueue::V1::MigrationStreamingReadServerMessage, + Topic::StreamReadMessage::FromServer>; + + using TEvReadInit = typename std::conditional_t<UseMigrationProtocol, + TEvPQProxy::TEvMigrationReadInit, + TEvPQProxy::TEvReadInit>; + + using TEvReadResponse = typename std::conditional_t<UseMigrationProtocol, + TEvPQProxy::TEvMigrationReadResponse, + TEvPQProxy::TEvReadResponse>; + + using TEvStreamReadRequest = typename std::conditional_t<UseMigrationProtocol, + NGRpcService::TEvStreamPQMigrationReadRequest, + NGRpcService::TEvStreamTopicReadRequest>; using IContext = NGRpcServer::IGRpcStreamingContext<TClientMessage, TServerMessage>; - using TEvReadInit = typename std::conditional_t<UseMigrationProtocol, TEvPQProxy::TEvMigrationReadInit, TEvPQProxy::TEvReadInit>; - using TEvReadResponse = typename std::conditional_t<UseMigrationProtocol, TEvPQProxy::TEvMigrationReadResponse, TEvPQProxy::TEvReadResponse>; - using TEvStreamPQReadRequest = typename std::conditional_t<UseMigrationProtocol, NKikimr::NGRpcService::TEvStreamPQMigrationReadRequest, NKikimr::NGRpcService::TEvStreamTopicReadRequest>; + using TPartitionsMap = THashMap<ui64, TPartitionActorInfo>; private: - //11 tries = 10,23 seconds, then each try for 5 seconds , so 21 retries will take near 1 min + // 11 tries = 10,23 seconds, then each try for 5 seconds , so 21 retries will take near 1 min static constexpr NTabletPipe::TClientRetryPolicy RetryPolicyForPipes = { .RetryLimitCount = 21, .MinRetryTime = TDuration::MilliSeconds(10), @@ -138,145 +160,143 @@ private: static constexpr ui64 MAX_INFLY_BYTES = 25_MB; static constexpr ui32 MAX_INFLY_READS = 10; - static constexpr ui64 MAX_READ_SIZE = 100 << 20; //100mb; + static constexpr ui64 MAX_READ_SIZE = 100_MB; static constexpr ui64 READ_BLOCK_SIZE = 8_KB; // metering - static constexpr double LAG_GROW_MULTIPLIER = 1.2; //assume that 20% more data arrived to partitions + static constexpr double LAG_GROW_MULTIPLIER = 1.2; // assume that 20% more data arrived to partitions public: - TReadSessionActor(TEvStreamPQReadRequest* request, const ui64 cookie, - const NActors::TActorId& schemeCache, const NActors::TActorId& newSchemeCache, - TIntrusivePtr<::NMonitoring::TDynamicCounters> counters, const TMaybe<TString> clientDC, - const NPersQueue::TTopicsListController& topicsHandler); - ~TReadSessionActor(); - - void Bootstrap(const NActors::TActorContext& ctx); + TReadSessionActor(TEvStreamReadRequest* request, const ui64 cookie, + const TActorId& schemeCache, const TActorId& newSchemeCache, + TIntrusivePtr<::NMonitoring::TDynamicCounters> counters, + const TMaybe<TString> clientDC, + const NPersQueue::TTopicsListController& topicsHandler); - void Die(const NActors::TActorContext& ctx) override; + void Bootstrap(const TActorContext& ctx); - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::FRONT_PQ_READ; } + void Die(const TActorContext& ctx) override; + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::FRONT_PQ_READ; + } private: STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { - HFunc(TEvents::TEvWakeup, Handle); - + // grpc events HFunc(IContext::TEvReadFinished, Handle); HFunc(IContext::TEvWriteFinished, Handle); - CFunc(IContext::TEvNotifiedWhenDone::EventType, HandleDone); + HFunc(IContext::TEvNotifiedWhenDone, Handle) HFunc(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse, Handle); + // proxy events HFunc(TEvPQProxy::TEvAuthResultOk, Handle); // form auth actor - - HFunc(TEvPQProxy::TEvDieCommand, HandlePoison) - - HFunc(/* type alias */ TEvReadInit, Handle) //from gRPC - HFunc(TEvPQProxy::TEvReadSessionStatus, Handle) // from read sessions info builder proxy - HFunc(TEvPQProxy::TEvRead, Handle) //from gRPC - HFunc(TEvPQProxy::TEvDone, Handle) //from gRPC - HFunc(TEvPQProxy::TEvCloseSession, Handle) //from partitionActor - HFunc(TEvPQProxy::TEvPartitionReady, Handle) //from partitionActor - HFunc(TEvPQProxy::TEvPartitionReleased, Handle) //from partitionActor - - HFunc(/* type alias */ TEvReadResponse, Handle) //from partitionActor - HFunc(TEvPQProxy::TEvCommitCookie, Handle) //from gRPC - HFunc(TEvPQProxy::TEvCommitRange, Handle) //from gRPC - HFunc(TEvPQProxy::TEvStartRead, Handle) //from gRPC - HFunc(TEvPQProxy::TEvReleased, Handle) //from gRPC - HFunc(TEvPQProxy::TEvGetStatus, Handle) //from gRPC - HFunc(TEvPQProxy::TEvAuth, Handle) //from gRPC - - HFunc(TEvPQProxy::TEvCommitDone, Handle) //from PartitionActor - HFunc(TEvPQProxy::TEvPartitionStatus, Handle) //from partitionActor - - HFunc(TEvPersQueue::TEvLockPartition, Handle) //from Balancer - HFunc(TEvPersQueue::TEvReleasePartition, Handle) //from Balancer - HFunc(TEvPersQueue::TEvError, Handle) //from Balancer - - HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(/* type alias */ TEvReadInit, Handle); // from gRPC + HFunc(TEvPQProxy::TEvReadSessionStatus, Handle); // from read sessions info builder proxy + HFunc(TEvPQProxy::TEvRead, Handle); // from gRPC + HFunc(/* type alias */ TEvReadResponse, Handle); // from partitionActor + HFunc(TEvPQProxy::TEvDone, Handle); // from gRPC + HFunc(TEvPQProxy::TEvCloseSession, Handle); // from partitionActor + HFunc(TEvPQProxy::TEvDieCommand, Handle); + HFunc(TEvPQProxy::TEvPartitionReady, Handle); // from partitionActor + HFunc(TEvPQProxy::TEvPartitionReleased, Handle); // from partitionActor + HFunc(TEvPQProxy::TEvCommitCookie, Handle); // from gRPC + HFunc(TEvPQProxy::TEvCommitRange, Handle); // from gRPC + HFunc(TEvPQProxy::TEvStartRead, Handle); // from gRPC + HFunc(TEvPQProxy::TEvReleased, Handle); // from gRPC + HFunc(TEvPQProxy::TEvGetStatus, Handle); // from gRPC + HFunc(TEvPQProxy::TEvAuth, Handle); // from gRPC + HFunc(TEvPQProxy::TEvCommitDone, Handle); // from PartitionActor + HFunc(TEvPQProxy::TEvPartitionStatus, Handle); // from partitionActor + + // Balancer events + HFunc(TEvPersQueue::TEvLockPartition, Handle); + HFunc(TEvPersQueue::TEvReleasePartition, Handle); + HFunc(TEvPersQueue::TEvError, Handle); + + // pipe events HFunc(TEvTabletPipe::TEvClientConnected, Handle); + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + + // system events + HFunc(TEvents::TEvWakeup, Handle); default: break; - }; + } } - ui64 PrepareResponse(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse); // returns estimated response's size - bool WriteResponse(TServerMessage&& response, bool finish = false); + bool ReadFromStreamOrDie(const TActorContext& ctx); + bool WriteToStreamOrDie(const TActorContext& ctx, TServerMessage&& response, bool finish = false); + bool SendControlMessage(TPartitionId id, TServerMessage&& message, const TActorContext& ctx); + // grpc events void Handle(typename IContext::TEvReadFinished::TPtr& ev, const TActorContext &ctx); void Handle(typename IContext::TEvWriteFinished::TPtr& ev, const TActorContext &ctx); - void HandleDone(const TActorContext &ctx); - + void Handle(typename IContext::TEvNotifiedWhenDone::TPtr& ev, const TActorContext &ctx); void Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev, const TActorContext &ctx); - - void Handle(typename TEvReadInit::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvRead::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(typename TEvReadResponse::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvDone::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvReleased::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const NActors::TActorContext& ctx); - void ProcessAuth(const TString& auth, const TActorContext& ctx); - void Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvPersQueue::TEvError::TPtr& ev, const NActors::TActorContext& ctx); - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const NActors::TActorContext& ctx); - void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const NActors::TActorContext& ctx); - [[nodiscard]] bool ProcessBalancerDead(const ui64 tabletId, const NActors::TActorContext& ctx); // returns false if actor died - - void HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const NActors::TActorContext& ctx); + // proxy events + void Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx); + void Handle(typename TEvReadInit::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx); + void Handle(typename TEvReadResponse::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvDone::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvReleased::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx); + + // Balancer events + void Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvError::TPtr& ev, const TActorContext& ctx); + + // pipe events + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx); + + // system events void Handle(TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx); - void Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const NActors::TActorContext& ctx); - void RecheckACL(const TActorContext& ctx); + TActorId CreatePipeClient(ui64 tabletId, const TActorContext& ctx); + void ProcessBalancerDead(ui64 tabletId, const TActorContext& ctx); + + void RunAuthActor(const TActorContext& ctx); + void RecheckACL(const TActorContext& ctx); void InitSession(const TActorContext& ctx); - void CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, - const NActors::TActorContext& ctx); + void RegisterSession(const TString& topic, const TActorId& pipe, const TVector<ui32>& groups, const TActorContext& ctx); + void CloseSession(PersQueue::ErrorCode::ErrorCode code, const TString& reason, const TActorContext& ctx); void SetupCounters(); void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic); - void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic, const TString& cloudId, const TString& dbId, - const TString& folderId); - - void ProcessReads(const NActors::TActorContext& ctx); // returns false if actor died - void ProcessAnswer(const NActors::TActorContext& ctx, typename TFormedReadResponse<TServerMessage>::TPtr formedResponse); // returns false if actor died - - void RegisterSessions(const NActors::TActorContext& ctx); - void RegisterSession(const TActorId& pipe, const TString& topic, const TVector<ui32>& groups, const TActorContext& ctx); - - void DropPartition(typename THashMap<ui64, TPartitionActorInfo>::iterator it, const TActorContext& ctx); + void SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic, + const TString& cloudId, const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId); - bool ActualPartitionActor(const TActorId& part); - void ReleasePartition(const typename THashMap<ui64, TPartitionActorInfo>::iterator& it, - bool couldBeReads, const TActorContext& ctx); // returns false if actor died + void ProcessReads(const TActorContext& ctx); + ui64 PrepareResponse(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse); + void ProcessAnswer(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse, const TActorContext& ctx); - void SendReleaseSignalToClient(const typename THashMap<ui64, TPartitionActorInfo>::iterator& it, bool kill, const TActorContext& ctx); - - void InformBalancerAboutRelease(const typename THashMap<ui64, TPartitionActorInfo>::iterator& it, const TActorContext& ctx); + void DropPartition(typename TPartitionsMap::iterator it, const TActorContext& ctx); + void ReleasePartition(typename TPartitionsMap::iterator it, bool couldBeReads, const TActorContext& ctx); + void SendReleaseSignal(typename TPartitionsMap::iterator it, bool kill, const TActorContext& ctx); + void InformBalancerAboutRelease(typename TPartitionsMap::iterator it, const TActorContext& ctx); static ui32 NormalizeMaxReadMessagesCount(ui32 sourceValue); static ui32 NormalizeMaxReadSize(ui32 sourceValue); private: - std::unique_ptr</* type alias */ TEvStreamPQReadRequest> Request; - + std::unique_ptr</* type alias */ TEvStreamReadRequest> Request; const TString ClientDC; - const TInstant StartTimestamp; TActorId SchemeCache; @@ -293,7 +313,7 @@ private: bool CommitsDisabled; bool InitDone; - bool RangesMode = false; + bool RangesMode; ui32 MaxReadMessagesCount; ui32 MaxReadSize; @@ -310,7 +330,7 @@ private: THashSet<TActorId> ActualPartitionActors; THashMap<ui64, std::pair<ui32, ui64>> BalancerGeneration; ui64 NextAssignId; - THashMap<ui64, TPartitionActorInfo> Partitions; //assignId -> info + TPartitionsMap Partitions; // assignId -> info THashMap<TString, TTopicHolder> Topics; // topic -> info THashMap<TString, NPersQueue::TTopicConverterPtr> FullPathToConverter; // PrimaryFullPath -> Converter, for balancer replies matching @@ -324,10 +344,15 @@ private: TSet<TPartitionInfo> AvailablePartitions; - THashMap<TActorId, typename TFormedReadResponse<TServerMessage>::TPtr> PartitionToReadResponse; // Partition actor -> TFormedReadResponse answer that has this partition. - // PartitionsTookPartInRead in formed read response contain this actor id. - typename TFormedReadResponse<TServerMessage>::TPtr PendingQuota; // response that currenly pending quota - std::deque<typename TFormedReadResponse<TServerMessage>::TPtr> WaitingQuota; // responses that will be quoted next + // Partition actor -> TFormedReadResponse answer that has this partition. + // PartitionsTookPartInRead in formed read response contain this actor id. + THashMap<TActorId, typename TFormedReadResponse<TServerMessage>::TPtr> PartitionToReadResponse; + + // Response that currenly pending quota + typename TFormedReadResponse<TServerMessage>::TPtr PendingQuota; + + // Responses that will be quoted next + std::deque<typename TFormedReadResponse<TServerMessage>::TPtr> WaitingQuota; struct TControlMessages { TVector<TServerMessage> ControlMessages; @@ -336,7 +361,6 @@ private: TMap<TPartitionId, TControlMessages> PartitionToControlMessages; - std::deque<THolder<TEvPQProxy::TEvRead>> Reads; ui64 Cookie; @@ -346,7 +370,7 @@ private: ui32 Partitions; }; - TMap<ui64, TCommitInfo> Commits; //readid->TCommitInfo + TMap<ui64, TCommitInfo> Commits; // readid -> TCommitInfo TIntrusivePtr<::NMonitoring::TDynamicCounters> Counters; @@ -361,22 +385,22 @@ private: ui32 ReadsInfly; std::queue<ui64> ActiveWrites; - NKikimr::NPQ::TPercentileCounter PartsPerSession; + NPQ::TPercentileCounter PartsPerSession; THashMap<TString, TTopicCounters> TopicCounters; THashMap<TString, ui32> NumPartitionsFromTopic; TVector<NPersQueue::TPQLabelsInfo> Aggr; - NKikimr::NPQ::TMultiCounter SLITotal; - NKikimr::NPQ::TMultiCounter SLIErrors; + NPQ::TMultiCounter SLITotal; + NPQ::TMultiCounter SLIErrors; TInstant StartTime; - NKikimr::NPQ::TPercentileCounter InitLatency; - NKikimr::NPQ::TPercentileCounter ReadLatency; - NKikimr::NPQ::TPercentileCounter ReadLatencyFromDisk; - NKikimr::NPQ::TPercentileCounter CommitLatency; - NKikimr::NPQ::TMultiCounter SLIBigLatency; - NKikimr::NPQ::TMultiCounter SLIBigReadLatency; - NKikimr::NPQ::TMultiCounter ReadsTotal; + NPQ::TPercentileCounter InitLatency; + NPQ::TPercentileCounter ReadLatency; + NPQ::TPercentileCounter ReadLatencyFromDisk; + NPQ::TPercentileCounter CommitLatency; + NPQ::TMultiCounter SLIBigLatency; + NPQ::TMultiCounter SLIBigReadLatency; + NPQ::TMultiCounter ReadsTotal; NPersQueue::TTopicsListController TopicsHandler; NPersQueue::TTopicsToConverter TopicsList; @@ -384,8 +408,7 @@ private: } -///////////////////////////////////////// // Implementation #define READ_SESSION_ACTOR_IMPL -#include "read_session_actor.ipp" + #include "read_session_actor.ipp" #undef READ_SESSION_ACTOR_IMPL diff --git a/ydb/services/persqueue_v1/actors/read_session_actor.ipp b/ydb/services/persqueue_v1/actors/read_session_actor.ipp index baad770c4e..ebe6f3764d 100644 --- a/ydb/services/persqueue_v1/actors/read_session_actor.ipp +++ b/ydb/services/persqueue_v1/actors/read_session_actor.ipp @@ -1,9 +1,9 @@ #ifndef READ_SESSION_ACTOR_IMPL -#error "Do not include this file directly" + #error "Do not include this file directly" #endif -#include "read_init_auth_actor.h" #include "helpers.h" +#include "read_init_auth_actor.h" #include <ydb/library/persqueue/topic_parser/counters.h> @@ -13,41 +13,33 @@ #include <util/string/join.h> #include <util/string/strip.h> -#include <util/charset/utf8.h> #include <utility> -using namespace NActors; -using namespace NKikimrClient; - -namespace NKikimr { +namespace NKikimr::NGRpcProxy::V1 { +using namespace NKikimrClient; using namespace NMsgBusProxy; - -namespace NGRpcProxy::V1 { - using namespace PersQueue::V1; -//TODO: add here tracking of bytes in/out +// TODO: add here tracking of bytes in/out template <bool UseMigrationProtocol> -TReadSessionActor<UseMigrationProtocol>::TReadSessionActor(TEvStreamPQReadRequest* request, const ui64 cookie, - const TActorId& schemeCache, const TActorId& newSchemeCache, - TIntrusivePtr<NMonitoring::TDynamicCounters> counters, - const TMaybe<TString> clientDC, - const NPersQueue::TTopicsListController& topicsHandler) +TReadSessionActor<UseMigrationProtocol>::TReadSessionActor( + TEvStreamReadRequest* request, const ui64 cookie, + const TActorId& schemeCache, const TActorId& newSchemeCache, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const TMaybe<TString> clientDC, + const NPersQueue::TTopicsListController& topicsHandler) : TRlHelpers(request, READ_BLOCK_SIZE, TDuration::Minutes(1)) , Request(request) - , ClientDC(clientDC ? *clientDC : "other") + , ClientDC(clientDC.GetOrElse("other")) , StartTimestamp(TInstant::Now()) , SchemeCache(schemeCache) , NewSchemeCache(newSchemeCache) - , AuthInitActor() - , ClientId() - , ClientPath() - , Session() , CommitsDisabled(false) , InitDone(false) + , RangesMode(false) , MaxReadMessagesCount(0) , MaxReadSize(0) , MaxTimeLagMs(0) @@ -63,57 +55,60 @@ TReadSessionActor<UseMigrationProtocol>::TReadSessionActor(TEvStreamPQReadReques , BytesInflight_(0) , RequestedBytes(0) , ReadsInfly(0) - , TopicsHandler(topicsHandler) { + , TopicsHandler(topicsHandler) +{ Y_ASSERT(Request); } -template<bool UseMigrationProtocol> -TReadSessionActor<UseMigrationProtocol>::~TReadSessionActor() = default; - - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Bootstrap(const TActorContext& ctx) { - Y_VERIFY(Request); if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { ++(*GetServiceCounters(Counters, "pqproxy|readSession") ->GetNamedCounter("sensor", "SessionsCreatedTotal", true)); } Request->GetStreamCtx()->Attach(ctx.SelfID); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); + if (!ReadFromStreamOrDie(ctx)) { return; } - StartTime = ctx.Now(); - TReadSessionActor<UseMigrationProtocol>::Become(&TReadSessionActor<UseMigrationProtocol>::TThis::StateFunc); + StartTime = ctx.Now(); + this->Become(&TReadSessionActor<UseMigrationProtocol>::TThis::StateFunc); } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::HandleDone(const TActorContext& ctx) { - +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvNotifiedWhenDone::TPtr&, const TActorContext& ctx) { LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc closed"); Die(ctx); } +template <bool UseMigrationProtocol> +bool TReadSessionActor<UseMigrationProtocol>::ReadFromStreamOrDie(const TActorContext& ctx) { + if (!Request->GetStreamCtx()->Read()) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); + Die(ctx); + return false; + } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { + return true; +} +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { auto& request = ev->Get()->Record; if constexpr (UseMigrationProtocol) { - auto token = request.token(); + const auto token = request.token(); request.set_token(""); - if (!token.empty()) { //TODO refreshtoken here + if (!token.empty()) { // TODO: refresh token here ctx.Send(ctx.SelfID, new TEvPQProxy::TEvAuth(token)); } } - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, - PQ_LOG_PREFIX << " grpc read done: success: " << ev->Get()->Success << " data: " << request); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read done" + << ": success# " << ev->Get()->Success + << ", data# " << request); if (!ev->Get()->Success) { LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed"); @@ -121,7 +116,7 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadF return; } - auto GetAssignId = [](auto& request) { + auto getAssignId = [](auto& request) { if constexpr (UseMigrationProtocol) { return request.assign_id(); } else { @@ -132,33 +127,23 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadF if constexpr (UseMigrationProtocol) { switch (request.request_case()) { case TClientMessage::kInitRequest: { - ctx.Send(ctx.SelfID, new TEvReadInit(request, Request->GetStreamCtx()->GetPeerName())); - break; + return (void)ctx.Send(ctx.SelfID, new TEvReadInit(request, Request->GetStreamCtx()->GetPeerName())); } - case TClientMessage::kStatus: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvGetStatus(GetAssignId(request.status()))); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; + case TClientMessage::kStatus: { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvGetStatus(getAssignId(request.status()))); + return (void)ReadFromStreamOrDie(ctx); } + case TClientMessage::kRead: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvRead()); // Proto read message have no parameters - break; + return (void)ctx.Send(ctx.SelfID, new TEvPQProxy::TEvRead()); } - case TClientMessage::kReleased: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(GetAssignId(request.released()))); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; + case TClientMessage::kReleased: { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(getAssignId(request.released()))); + return (void)ReadFromStreamOrDie(ctx); } + case TClientMessage::kStartRead: { const auto& req = request.start_read(); @@ -166,188 +151,186 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvReadF const ui64 commitOffset = req.commit_offset(); const bool verifyReadOffset = req.verify_read_offset(); - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvStartRead(GetAssignId(request.start_read()), readOffset, commitOffset, verifyReadOffset)); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvStartRead(getAssignId(request.start_read()), readOffset, commitOffset, verifyReadOffset)); + return (void)ReadFromStreamOrDie(ctx); } + case TClientMessage::kCommit: { const auto& req = request.commit(); if (!req.cookies_size() && !RangesMode) { - CloseSession(TStringBuilder() << "can't commit without cookies", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "can't commit without cookies", ctx); } - if (RangesMode && !req.offset_ranges_size()) { - CloseSession(TStringBuilder() << "can't commit without offsets", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + if (RangesMode && !req.offset_ranges_size()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "can't commit without offsets", ctx); } THashMap<ui64, TEvPQProxy::TCommitCookie> commitCookie; THashMap<ui64, TEvPQProxy::TCommitRange> commitRange; - for (auto& c: req.cookies()) { + for (const auto& c : req.cookies()) { commitCookie[c.assign_id()].Cookies.push_back(c.partition_cookie()); } - for (auto& c: req.offset_ranges()) { - commitRange[c.assign_id()].Ranges.push_back(std::make_pair(c.start_offset(), c.end_offset())); - } - for (auto& c : commitCookie) { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitCookie(c.first, std::move(c.second))); + for (const auto& c : req.offset_ranges()) { + commitRange[c.assign_id()].Ranges.emplace_back(c.start_offset(), c.end_offset()); } - for (auto& c : commitRange) { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitRange(c.first, std::move(c.second))); + for (auto& [id, cookies] : commitCookie) { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitCookie(id, std::move(cookies))); } - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; + for (auto& [id, range] : commitRange) { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitRange(id, std::move(range))); } - break; + + return (void)ReadFromStreamOrDie(ctx); } default: { - CloseSession(TStringBuilder() << "unsupported request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - break; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "unsupported request", ctx); } } } else { - switch(request.client_message_case()) { + switch (request.client_message_case()) { case TClientMessage::kInitRequest: { - ctx.Send(ctx.SelfID, new TEvReadInit(request, Request->GetStreamCtx()->GetPeerName())); - break; + return (void)ctx.Send(ctx.SelfID, new TEvReadInit(request, Request->GetStreamCtx()->GetPeerName())); } + case TClientMessage::kReadRequest: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvRead(request.read_request().bytes_size())); - break; + return (void)ctx.Send(ctx.SelfID, new TEvPQProxy::TEvRead(request.read_request().bytes_size())); } - case TClientMessage::kPartitionSessionStatusRequest: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvGetStatus(GetAssignId(request.partition_session_status_request()))); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; + case TClientMessage::kPartitionSessionStatusRequest: { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvGetStatus(getAssignId(request.partition_session_status_request()))); + return (void)ReadFromStreamOrDie(ctx); } - case TClientMessage::kStopPartitionSessionResponse: { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(GetAssignId(request.stop_partition_session_response()))); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; + case TClientMessage::kStopPartitionSessionResponse: { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvReleased(getAssignId(request.stop_partition_session_response()))); + return (void)ReadFromStreamOrDie(ctx); } + case TClientMessage::kStartPartitionSessionResponse: { const auto& req = request.start_partition_session_response(); const ui64 readOffset = req.read_offset(); const ui64 commitOffset = req.commit_offset(); - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvStartRead(GetAssignId(req), readOffset, commitOffset, req.has_read_offset())); - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvStartRead(getAssignId(req), readOffset, commitOffset, req.has_read_offset())); + return (void)ReadFromStreamOrDie(ctx); } + case TClientMessage::kCommitOffsetRequest: { const auto& req = request.commit_offset_request(); if (!RangesMode || !req.commit_offsets_size()) { - CloseSession(TStringBuilder() << "can't commit without offsets", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "can't commit without offsets", ctx); } THashMap<ui64, TEvPQProxy::TCommitRange> commitRange; - for (auto& pc: req.commit_offsets()) { - auto id = pc.partition_session_id(); - for (auto& c: pc.offsets()) { - commitRange[id].Ranges.push_back(std::make_pair(c.start(), c.end())); + for (const auto& pc : req.commit_offsets()) { + for (const auto& c : pc.offsets()) { + commitRange[pc.partition_session_id()].Ranges.emplace_back(c.start(), c.end()); } } - for (auto& c : commitRange) { - ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitRange(c.first, std::move(c.second))); + for (auto& [id, range] : commitRange) { + ctx.Send(ctx.SelfID, new TEvPQProxy::TEvCommitRange(id, std::move(range))); } - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); - return; - } - break; + return (void)ReadFromStreamOrDie(ctx); } + case TClientMessage::kUpdateTokenRequest: { - auto token = request.update_token_request().token(); - if (!token.empty()) { //TODO refreshtoken here + if (const auto token = request.update_token_request().token()) { // TODO: refresh token here ctx.Send(ctx.SelfID, new TEvPQProxy::TEvAuth(token)); } break; } default: { - CloseSession(TStringBuilder() << "unsupported request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - break; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "unsupported request", ctx); } } } } +template <bool UseMigrationProtocol> +bool TReadSessionActor<UseMigrationProtocol>::WriteToStreamOrDie(const TActorContext& ctx, TServerMessage&& response, bool finish) { + const ui64 sz = response.ByteSize(); + ActiveWrites.push(sz); + + BytesInflight_ += sz; + if (BytesInflight) { + (*BytesInflight) += sz; + } + + bool res = false; + if (!finish) { + res = Request->GetStreamCtx()->Write(std::move(response)); + } else { + res = Request->GetStreamCtx()->WriteAndFinish(std::move(response), grpc::Status::OK); + } + + if (!res) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed at start"); + Die(ctx); + } + + return res; +} -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(typename IContext::TEvWriteFinished::TPtr& ev, const TActorContext& ctx) { if (!ev->Get()->Success) { LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); + return Die(ctx); } + Y_VERIFY(!ActiveWrites.empty()); - ui64 sz = ActiveWrites.front(); + const auto sz = ActiveWrites.front(); ActiveWrites.pop(); + Y_VERIFY(BytesInflight_ >= sz); BytesInflight_ -= sz; - if (BytesInflight) (*BytesInflight) -= sz; + if (BytesInflight) { + (*BytesInflight) -= sz; + } ProcessReads(ctx); } - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Die(const TActorContext& ctx) { + if (AuthInitActor) { + ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill()); + } - ctx.Send(AuthInitActor, new TEvents::TEvPoisonPill()); - - for (auto& p : Partitions) { - ctx.Send(p.second.Actor, new TEvents::TEvPoisonPill()); + for (const auto& [_, info] : Partitions) { + if (info.Actor) { + ctx.Send(info.Actor, new TEvents::TEvPoisonPill()); + } - if (!p.second.Released) { - // ToDo[counters] - auto it = TopicCounters.find(p.second.Topic->GetInternalName()); + if (!info.Released) { + // TODO: counters + auto it = TopicCounters.find(info.Topic->GetInternalName()); Y_VERIFY(it != TopicCounters.end()); it->second.PartitionsInfly.Dec(); it->second.PartitionsReleased.Inc(); - if (p.second.Releasing) + if (info.Releasing) { it->second.PartitionsToBeReleased.Dec(); + } } } - for (auto& t : Topics) { - if (t.second.PipeClient) - NTabletPipe::CloseClient(ctx, t.second.PipeClient); + for (const auto& [_, holder] : Topics) { + if (holder.PipeClient) { + NTabletPipe::CloseClient(ctx, holder.PipeClient); + } } - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " is DEAD"); if (BytesInflight) { (*BytesInflight) -= BytesInflight_; @@ -359,96 +342,114 @@ void TReadSessionActor<UseMigrationProtocol>::Die(const TActorContext& ctx) { PartsPerSession.DecFor(Partitions.size(), 1); } + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " is DEAD"); ctx.Send(GetPQReadServiceActorID(), new TEvPQProxy::TEvSessionDead(Cookie)); TActorBootstrapped<TReadSessionActor>::Die(ctx); } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvDone::TPtr&, const TActorContext& ctx) { - CloseSession(TStringBuilder() << "Reads done signal - closing everything", PersQueue::ErrorCode::OK, ctx); + CloseSession(PersQueue::ErrorCode::OK, "reads done signal, closing everything", ctx); } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->ErrorCode, ev->Get()->Reason, ctx); +} + +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx) { + CloseSession(ev->Get()->ErrorCode, ev->Get()->Reason, ctx); +} + +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvCommitCookie::TPtr& ev, const TActorContext& ctx) { RequestNotChecked = true; if (CommitsDisabled) { - CloseSession("commits in session are disabled by client option", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "commits in session are disabled by client option", ctx); } - const ui64& assignId = ev->Get()->AssignId; - auto it = Partitions.find(assignId); - if (it == Partitions.end()) //stale commit - ignore it + + auto it = Partitions.find(ev->Get()->AssignId); + if (it == Partitions.end()) { // stale commit - ignore it return; + } - for (auto& c : ev->Get()->CommitInfo.Cookies) { - if(RangesMode) { - CloseSession("Commits cookies in ranges commit mode is illegal", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + for (const auto c : ev->Get()->CommitInfo.Cookies) { + if (RangesMode) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "commits cookies in ranges commit mode is prohibited", ctx); } + it->second.NextCommits.insert(c); } ctx.Send(it->second.Actor, new TEvPQProxy::TEvCommitCookie(ev->Get()->AssignId, std::move(ev->Get()->CommitInfo))); } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvCommitRange::TPtr& ev, const TActorContext& ctx) { RequestNotChecked = true; if (CommitsDisabled) { - CloseSession("commits in session are disabled by client option", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "commits in session are disabled by client option", ctx); } - const ui64& assignId = ev->Get()->AssignId; - auto it = Partitions.find(assignId); - if (it == Partitions.end()) //stale commit - ignore it + + auto it = Partitions.find(ev->Get()->AssignId); + if (it == Partitions.end()) { // stale commit - ignore it return; + } - for (auto& c : ev->Get()->CommitInfo.Ranges) { - if(!RangesMode) { - CloseSession("Commits ranges in cookies commit mode is illegal", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + for (const auto& [b, e] : ev->Get()->CommitInfo.Ranges) { + if (!RangesMode) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "commits ranges in cookies commit mode is prohibited", ctx); } - if (c.first >= c.second || it->second.NextRanges.Intersects(c.first, c.second) || c.first < it->second.Offset) { - CloseSession(TStringBuilder() << "Offsets range [" << c.first << ", " << c.second << ") has already committed offsets, double committing is forbiden; or incorrect", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + if (b >= e || it->second.NextRanges.Intersects(b, e) || b < it->second.Offset) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "offsets range [" << b << ", " << e << ")" + << " has already committed offsets, double committing is forbiden or incorrect", ctx); } - it->second.NextRanges.InsertInterval(c.first, c.second); + + it->second.NextRanges.InsertInterval(b, e); } ctx.Send(it->second.Actor, new TEvPQProxy::TEvCommitRange(ev->Get()->AssignId, std::move(ev->Get()->CommitInfo))); } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvAuth::TPtr& ev, const TActorContext& ctx) { - ProcessAuth(ev->Get()->Auth, ctx); + const auto& auth = ev->Get()->Auth; + if (!auth.empty() && auth != Auth) { + Auth = auth; + Request->RefreshToken(auth, ctx, ctx.SelfID); + } } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvStartRead::TPtr& ev, const TActorContext& ctx) { RequestNotChecked = true; auto it = Partitions.find(ev->Get()->AssignId); if (it == Partitions.end() || it->second.Releasing) { - //do nothing - already released partition - LOG_WARN_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got NOTACTUAL StartRead from client for partition with assign id " << ev->Get()->AssignId - << " at offset " << ev->Get()->ReadOffset); + // do nothing - already released partition + LOG_WARN_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got irrelevant StartRead from client" + << ": partition# " << ev->Get()->AssignId + << ", offset# " << ev->Get()->ReadOffset); return; } - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got StartRead from client for " - << it->second.Partition << - " at readOffset " << ev->Get()->ReadOffset << - " commitOffset " << ev->Get()->CommitOffset); - //proxy request to partition - allow initing - //TODO: add here VerifyReadOffset too and check it againts Committed position + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got StartRead from client" + << ": partition# " << it->second.Partition + << ", readOffset# " << ev->Get()->ReadOffset + << ", commitOffset# " << ev->Get()->CommitOffset); + + // proxy request to partition - allow initing + // TODO: add here VerifyReadOffset too and check it againts Committed position ctx.Send(it->second.Actor, new TEvPQProxy::TEvLockPartition(ev->Get()->ReadOffset, ev->Get()->CommitOffset, ev->Get()->VerifyReadOffset, true)); } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvReleased::TPtr& ev, const TActorContext& ctx) { RequestNotChecked = true; @@ -456,167 +457,150 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvReleased::TP if (it == Partitions.end()) { return; } - if (!it->second.Releasing) { - CloseSession(TStringBuilder() << "Release of partition that is not requested for release is forbiden for " << it->second.Partition, PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + if (!it->second.Releasing) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "release of partition that is not requested for release is forbiden for " << it->second.Partition, ctx); } - Y_VERIFY(it->second.LockSent); - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got Released from client for partition " << it->second.Partition); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got Released from client" + << ": partition# " << it->second.Partition); + + Y_VERIFY(it->second.LockSent); ReleasePartition(it, true, ctx); } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvGetStatus::TPtr& ev, const TActorContext& ctx) { auto it = Partitions.find(ev->Get()->AssignId); if (it == Partitions.end() || it->second.Releasing) { // Ignore request - client asking status after releasing of partition. return; } + ctx.Send(it->second.Actor, new TEvPQProxy::TEvGetStatus(ev->Get()->AssignId)); } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::DropPartition(typename THashMap<ui64, TPartitionActorInfo>::iterator it, const TActorContext& ctx) { +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::DropPartition(typename TPartitionsMap::iterator it, const TActorContext& ctx) { ctx.Send(it->second.Actor, new TEvents::TEvPoisonPill()); + bool res = ActualPartitionActors.erase(it->second.Actor); Y_VERIFY(res); if (--NumPartitionsFromTopic[it->second.Topic->GetInternalName()] == 0) { - //ToDo[counters] - bool res_ = TopicCounters.erase(it->second.Topic->GetInternalName()); - Y_VERIFY(res_); + // TODO: counters + res = TopicCounters.erase(it->second.Topic->GetInternalName()); + Y_VERIFY(res); } if (SessionsActive) { PartsPerSession.DecFor(Partitions.size(), 1); } + BalancerGeneration.erase(it->first); Partitions.erase(it); + if (SessionsActive) { PartsPerSession.IncFor(Partitions.size(), 1); } } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvCommitDone::TPtr& ev, const TActorContext& ctx) { - Y_VERIFY(!CommitsDisabled); - if (!ActualPartitionActor(ev->Sender)) + if (!ActualPartitionActors.contains(ev->Sender)) { return; + } - ui64 assignId = ev->Get()->AssignId; - - auto it = Partitions.find(assignId); + auto it = Partitions.find(ev->Get()->AssignId); Y_VERIFY(it != Partitions.end()); Y_VERIFY(it->second.Offset < ev->Get()->Offset); it->second.NextRanges.EraseInterval(it->second.Offset, ev->Get()->Offset); - - if (ev->Get()->StartCookie == Max<ui64>()) //means commit at start + if (ev->Get()->StartCookie == Max<ui64>()) { // means commit at start return; + } TServerMessage result; result.set_status(Ydb::StatusIds::SUCCESS); + if (!RangesMode) { if constexpr (UseMigrationProtocol) { for (ui64 i = ev->Get()->StartCookie; i <= ev->Get()->LastCookie; ++i) { auto c = result.mutable_committed()->add_cookies(); c->set_partition_cookie(i); - c->set_assign_id(assignId); + c->set_assign_id(ev->Get()->AssignId); it->second.NextCommits.erase(i); it->second.ReadIdCommitted = i; } } else { // commit on cookies not supported in this case Y_VERIFY(false); } - } else { if constexpr (UseMigrationProtocol) { auto c = result.mutable_committed()->add_offset_ranges(); - c->set_assign_id(assignId); + c->set_assign_id(ev->Get()->AssignId); c->set_start_offset(it->second.Offset); c->set_end_offset(ev->Get()->Offset); - } else { auto c = result.mutable_commit_offset_response()->add_partitions_committed_offsets(); - c->set_partition_session_id(assignId); + c->set_partition_session_id(ev->Get()->AssignId); c->set_committed_offset(ev->Get()->Offset); } } it->second.Offset = ev->Get()->Offset; - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " replying for commits from assignId " << assignId << " from " << ev->Get()->StartCookie << " to " << ev->Get()->LastCookie << " to offset " << it->second.Offset); - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " replying for commits" + << ": assignId# " << ev->Get()->AssignId + << ", from# " << ev->Get()->StartCookie + << ", to# " << ev->Get()->LastCookie + << ", offset# " << it->second.Offset); + WriteToStreamOrDie(ctx, std::move(result)); } - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvReadSessionStatus::TPtr& ev, const TActorContext& ctx) { - THolder<TEvPQProxy::TEvReadSessionStatusResponse> result(new TEvPQProxy::TEvReadSessionStatusResponse()); - for (auto& p : Partitions) { + auto result = MakeHolder<TEvPQProxy::TEvReadSessionStatusResponse>(); + + for (const auto& [_, info] : Partitions) { auto part = result->Record.AddPartition(); - part->SetTopic(p.second.Partition.DiscoveryConverter->GetPrimaryPath()); - part->SetPartition(p.second.Partition.Partition); - part->SetAssignId(p.second.Partition.AssignId); - for (auto& c : p.second.NextCommits) { + part->SetTopic(info.Partition.DiscoveryConverter->GetPrimaryPath()); + part->SetPartition(info.Partition.Partition); + part->SetAssignId(info.Partition.AssignId); + part->SetReadIdCommitted(info.ReadIdCommitted); + part->SetLastReadId(info.ReadIdToResponse - 1); + part->SetTimestampMs(info.AssignTimestamp.MilliSeconds()); + + for (const auto c : info.NextCommits) { part->AddNextCommits(c); } - part->SetReadIdCommitted(p.second.ReadIdCommitted); - part->SetLastReadId(p.second.ReadIdToResponse - 1); - part->SetTimestampMs(p.second.AssignTimestamp.MilliSeconds()); } + result->Record.SetSession(Session); result->Record.SetTimestamp(StartTimestamp.MilliSeconds()); - result->Record.SetClientNode(PeerName); result->Record.SetProxyNodeId(ctx.SelfID.NodeId()); ctx.Send(ev->Sender, result.Release()); } -inline TString GetTopicSettingsPath(const PersQueue::V1::MigrationStreamingReadClientMessage::TopicReadSettings& settings) { - return settings.topic(); -} -inline TString GetTopicSettingsPath(const Topic::StreamReadMessage::InitRequest::TopicReadSettings& settings) { - return settings.path(); -} -inline i64 GetTopicSettingsReadFrom(const PersQueue::V1::MigrationStreamingReadClientMessage::TopicReadSettings& settings) { - return settings.start_from_written_at_ms(); -} -inline i64 GetTopicSettingsReadFrom(const Topic::StreamReadMessage::InitRequest::TopicReadSettings& settings) { - return ::google::protobuf::util::TimeUtil::TimestampToMilliseconds(settings.read_from()); -} - - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr& ev, const TActorContext& ctx) { - - THolder<TEvReadInit> event(ev->Release()); - if (!Topics.empty()) { - //answer error - CloseSession("got second init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "got second init request", ctx); } - const auto& init = event->Request.init_request(); + const auto& init = ev->Get()->Request.init_request(); if (!init.topics_read_settings_size()) { - CloseSession("no topics in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "no topics in init request", ctx); } if (init.consumer().empty()) { - CloseSession("no consumer in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "no consumer in init request", ctx); } ClientId = NPersQueue::ConvertNewConsumerName(init.consumer(), ctx); @@ -626,11 +610,15 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr& ClientPath = NPersQueue::StripLeadSlash(NPersQueue::MakeConsumerPath(init.consumer())); } - TStringBuilder session; - session << ClientPath << "_" << ctx.SelfID.NodeId() << "_" << Cookie << "_" << TAppData::RandomProvider->GenRand64() << "_v1"; - Session = session; + Session = TStringBuilder() << ClientPath + << "_" << ctx.SelfID.NodeId() + << "_" << Cookie + << "_" << TAppData::RandomProvider->GenRand64() + << "_" << "v1"; CommitsDisabled = false; + PeerName = ev->Get()->PeerName; + if constexpr (UseMigrationProtocol) { RangesMode = init.ranges_mode(); MaxReadMessagesCount = NormalizeMaxReadMessagesCount(init.read_params().max_read_messages_count()); @@ -645,108 +633,121 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr& MaxTimeLagMs = 0; // max_lag per topic only ReadTimestampMs = 0; // read_from per topic only ReadOnlyLocal = true; + if (init.reader_name()) { + PeerName = init.reader_name(); + } } + if (MaxTimeLagMs < 0) { - CloseSession("max_lag_duration_ms must be nonnegative number", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "max_lag_duration_ms must be nonnegative number", ctx); } + if (ReadTimestampMs < 0) { - CloseSession("start_from_written_at_ms must be nonnegative number", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "start_from_written_at_ms must be nonnegative number", ctx); } - PeerName = event->PeerName; + + auto getTopicPath = [](const auto& settings) { + if constexpr (UseMigrationProtocol) { + return settings.topic(); + } else { + return settings.path(); + } + }; + + auto getReadFrom = [](const auto& settings) { + if constexpr (UseMigrationProtocol) { + return settings.start_from_written_at_ms(); + } else { + return ::google::protobuf::util::TimeUtil::TimestampToMilliseconds(settings.read_from()); + } + }; for (const auto& topic : init.topics_read_settings()) { - TString topic_path = GetTopicSettingsPath(topic); - if (topic_path.empty()) { - CloseSession("empty topic in init request", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + const TString path = getTopicPath(topic); + if (path.empty()) { + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "empty topic in init request", ctx); } - i64 read_from = GetTopicSettingsReadFrom(topic); + + const i64 read_from = getReadFrom(topic); if (read_from < 0) { - CloseSession("start_from_written_at_ms must be nonnegative number", PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, "start_from_written_at_ms must be nonnegative number", ctx); } - TopicsToResolve.insert(topic_path); + + TopicsToResolve.insert(path); } if (Request->GetInternalToken().empty()) { if (AppData(ctx)->PQConfig.GetRequireCredentialsInNewProtocol()) { - CloseSession("Unauthenticated access is forbidden, please provide credentials", PersQueue::ErrorCode::ACCESS_DENIED, ctx); - return; + return CloseSession(PersQueue::ErrorCode::ACCESS_DENIED, + "unauthenticated access is forbidden, please provide credentials", ctx); } } else { Y_VERIFY(Request->GetYdbToken()); Auth = *(Request->GetYdbToken()); Token = new NACLib::TUserToken(Request->GetInternalToken()); } - TopicsList = TopicsHandler.GetReadTopicsList( - TopicsToResolve, ReadOnlyLocal, Request->GetDatabaseName().GetOrElse(TString()) - ); + + TopicsList = TopicsHandler.GetReadTopicsList(TopicsToResolve, ReadOnlyLocal, + Request->GetDatabaseName().GetOrElse(TString())); if (!TopicsList.IsValid) { - return CloseSession( - TopicsList.Reason, - PersQueue::ErrorCode::BAD_REQUEST, ctx - ); + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TopicsList.Reason, ctx); } for (const auto& topic : init.topics_read_settings()) { - auto topicIter = TopicsList.ClientTopics.find(GetTopicSettingsPath(topic)); - Y_VERIFY(!topicIter.IsEnd()); - for (const auto& converter: topicIter->second) { + auto it = TopicsList.ClientTopics.find(getTopicPath(topic)); + Y_VERIFY(it != TopicsList.ClientTopics.end()); + + for (const auto& converter : it->second) { const auto internalName = converter->GetOriginalPath(); if constexpr (UseMigrationProtocol) { - for (i64 pg: topic.partition_group_ids()) { + for (const i64 pg : topic.partition_group_ids()) { if (pg <= 0) { - CloseSession("partition group id must be positive number", PersQueue::ErrorCode::BAD_REQUEST, - ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, + "partition group id must be positive number", ctx); } + if (pg > Max<ui32>()) { - CloseSession( - TStringBuilder() << "partition group id is too big: " << pg << " > " << Max<ui32>(), - PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "partition group id is too big: " << pg << " > " << Max<ui32>(), ctx); } + TopicGroups[internalName].push_back(static_cast<ui32>(pg)); } + MaxLagByTopic[internalName] = MaxTimeLagMs; - ReadFromTimestamp[internalName] = GetTopicSettingsReadFrom(topic); + ReadFromTimestamp[internalName] = getReadFrom(topic); } else { - for (i64 p: topic.partition_ids()) { + for (const i64 p : topic.partition_ids()) { if (p < 0) { - CloseSession("partition id must be nonnegative number", PersQueue::ErrorCode::BAD_REQUEST, - ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, + "partition id must be nonnegative number", ctx); } + if (p + 1 > Max<ui32>()) { - CloseSession( - TStringBuilder() << "partition id is too big: " << p << " > " << Max<ui32>() - 1, - PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "partition id is too big: " << p << " > " << Max<ui32>() - 1, ctx); } + TopicGroups[internalName].push_back(static_cast<ui32>(p + 1)); } - MaxLagByTopic[internalName] = - ::google::protobuf::util::TimeUtil::DurationToMilliseconds(topic.max_lag());; - ReadFromTimestamp[internalName] = GetTopicSettingsReadFrom(topic); + + MaxLagByTopic[internalName] = ::google::protobuf::util::TimeUtil::DurationToMilliseconds(topic.max_lag());; + ReadFromTimestamp[internalName] = getReadFrom(topic); } } } - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " init: " << event->Request << " from " << PeerName); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " read init" + << ": from# " << PeerName + << ", request# " << ev->Get()->Request); if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { SetupCounters(); } - AuthInitActor = ctx.Register(new TReadInitAndAuthActor( - ctx, ctx.SelfID, ClientId, Cookie, Session, SchemeCache, NewSchemeCache, Counters, Token, TopicsList, - TopicsHandler.GetLocalCluster() - )); - + RunAuthActor(ctx); auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); Aggr = {{{{"Account", ClientPath.substr(0, ClientPath.find("/"))}}, {"total"}}}; @@ -756,41 +757,8 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadInit::TPtr& SLITotal.Inc(); } - -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::RegisterSession(const TActorId& pipe, const TString& topic, const TVector<ui32>& groups, const TActorContext& ctx) -{ - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " register session to " << topic); - THolder<TEvPersQueue::TEvRegisterReadSession> request; - request.Reset(new TEvPersQueue::TEvRegisterReadSession); - auto& req = request->Record; - req.SetSession(Session); - req.SetClientNode(PeerName); - ActorIdToProto(pipe, req.MutablePipeClient()); - req.SetClientId(ClientId); - - for (ui32 i = 0; i < groups.size(); ++i) { - req.AddGroups(groups[i]); - } - - NTabletPipe::SendData(ctx, pipe, request.Release()); -} - -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::RegisterSessions(const TActorContext& ctx) { - InitDone = true; - - for (auto& t : Topics) { - RegisterSession(t.second.PipeClient, t.second.FullConverter->GetInternalName(), t.second.Groups, ctx); - NumPartitionsFromTopic[t.second.FullConverter->GetInternalName()] = 0; - } -} - - -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::SetupCounters() -{ +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::SetupCounters() { if (SessionsCreated) { return; } @@ -815,74 +783,65 @@ void TReadSessionActor<UseMigrationProtocol>::SetupCounters() ++(*SessionsCreated); ++(*SessionsActive); - PartsPerSession.IncFor(Partitions.size(), 1); //for 0 + PartsPerSession.IncFor(Partitions.size(), 1); // for 0 } - -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic) -{ +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic) { auto& topicCounters = TopicCounters[topic->GetInternalName()]; auto subGroup = GetServiceCounters(Counters, "pqproxy|readSession"); -//client/consumerPath Account/Producer OriginDC Topic/TopicPath auto aggr = NPersQueue::GetLabels(topic); - TVector<std::pair<TString, TString>> cons = {{"Client", ClientId}, {"ConsumerPath", ClientPath}}; + const TVector<std::pair<TString, TString>> cons = {{"Client", ClientId}, {"ConsumerPath", ClientPath}}; - topicCounters.PartitionsLocked = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsLocked"}, true); - topicCounters.PartitionsReleased = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsReleased"}, true); - topicCounters.PartitionsToBeReleased = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsToBeReleased"}, false); - topicCounters.PartitionsToBeLocked = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsToBeLocked"}, false); - topicCounters.PartitionsInfly = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsInfly"}, false); - topicCounters.Errors = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsErrors"}, true); - topicCounters.Commits = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"Commits"}, true); - topicCounters.WaitsForData = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"WaitsForData"}, true); + topicCounters.PartitionsLocked = NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsLocked"}, true); + topicCounters.PartitionsReleased = NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsReleased"}, true); + topicCounters.PartitionsToBeReleased = NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsToBeReleased"}, false); + topicCounters.PartitionsToBeLocked = NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsToBeLocked"}, false); + topicCounters.PartitionsInfly = NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsInfly"}, false); + topicCounters.Errors = NPQ::TMultiCounter(subGroup, aggr, cons, {"PartitionsErrors"}, true); + topicCounters.Commits = NPQ::TMultiCounter(subGroup, aggr, cons, {"Commits"}, true); + topicCounters.WaitsForData = NPQ::TMultiCounter(subGroup, aggr, cons, {"WaitsForData"}, true); topicCounters.CommitLatency = CommitLatency; topicCounters.SLIBigLatency = SLIBigLatency; topicCounters.SLITotal = SLITotal; } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic, const TString& cloudId, - const TString& dbId, const TString& folderId) +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::SetupTopicCounters(const NPersQueue::TTopicConverterPtr& topic, + const TString& cloudId, const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId) { auto& topicCounters = TopicCounters[topic->GetInternalName()]; - auto subGroup = NPersQueue::GetCountersForStream(Counters); -//client/consumerPath Account/Producer OriginDC Topic/TopicPath - auto aggr = NPersQueue::GetLabelsForStream(topic, cloudId, dbId, folderId); - TVector<std::pair<TString, TString>> cons{{"consumer", ClientPath}}; - - topicCounters.PartitionsLocked = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"stream.internal_read.partitions_locked_per_second"}, true, "name"); - topicCounters.PartitionsReleased = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"stream.internal_read.partitions_released_per_second"}, true, "name"); - topicCounters.PartitionsToBeReleased = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"stream.internal_read.partitions_to_be_released"}, false, "name"); - topicCounters.PartitionsToBeLocked = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"stream.internal_read.partitions_to_be_locked"}, false, "name"); - topicCounters.PartitionsInfly = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"stream.internal_read.partitions_locked"}, false, "name"); - topicCounters.Errors = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"stream.internal_read.partitions_errors_per_second"}, true, "name"); - topicCounters.Commits = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"stream.internal_read.commits_per_second"}, true, "name"); - topicCounters.WaitsForData = NKikimr::NPQ::TMultiCounter(subGroup, aggr, cons, {"stream.internal_read.waits_for_data"}, true, "name"); + auto subGroup = NPersQueue::GetCountersForTopic(Counters, isServerless); + auto subgroups = NPersQueue::GetSubgroupsForTopic(topic, cloudId, dbId, dbPath, folderId); + subgroups.push_back({"consumer", ClientPath}); + + topicCounters.PartitionsLocked = NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.started"}, true, "name"); + topicCounters.PartitionsReleased = NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.stopped"}, true, "name"); + topicCounters.PartitionsToBeReleased = NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.stopping_count"}, false, "name"); + topicCounters.PartitionsToBeLocked = NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.starting_count"}, false, "name"); + topicCounters.PartitionsInfly = NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.count"}, false, "name"); + topicCounters.Errors = NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.partition_session.errors"}, true, "name"); + topicCounters.Commits = NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_read.commits"}, true, "name"); topicCounters.CommitLatency = CommitLatency; topicCounters.SLIBigLatency = SLIBigLatency; topicCounters.SLITotal = SLITotal; } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvAuthResultOk::TPtr& ev, const TActorContext& ctx) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " auth ok" + << ": topics# " << ev->Get()->TopicAndTablets.size() + << ", initDone# " << InitDone); LastACLCheckTimestamp = ctx.Now(); - - LOG_INFO_S( - ctx, - NKikimrServices::PQ_READ_PROXY, - PQ_LOG_PREFIX << " auth ok, got " << ev->Get()->TopicAndTablets.size() << " topics, init done " << InitDone - ); - AuthInitActor = TActorId(); if (!InitDone) { - ui32 initBorder = AppData(ctx)->PQConfig.GetReadInitLatencyBigMs(); - ui32 readBorder = AppData(ctx)->PQConfig.GetReadLatencyBigMs(); - ui32 readBorderFromDisk = AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs(); + const ui32 initBorder = AppData(ctx)->PQConfig.GetReadInitLatencyBigMs(); + const ui32 readBorder = AppData(ctx)->PQConfig.GetReadLatencyBigMs(); + const ui32 readBorderFromDisk = AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs(); auto subGroup = GetServiceCounters(Counters, "pqproxy|SLI"); InitLatency = NKikimr::NPQ::CreateSLIDurationCounter(subGroup, Aggr, "ReadInit", initBorder, {100, 200, 500, 1000, 1500, 2000, 5000, 10000, 30000, 99999999}); @@ -893,47 +852,48 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvAuthResultOk SLIBigReadLatency = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"ReadBigLatency"}, true, "sensor", false); ReadsTotal = NKikimr::NPQ::TMultiCounter(subGroup, Aggr, {}, {"ReadsTotal"}, true, "sensor", false); - ui32 initDurationMs = (ctx.Now() - StartTime).MilliSeconds(); + const ui32 initDurationMs = (ctx.Now() - StartTime).MilliSeconds(); InitLatency.IncFor(initDurationMs, 1); if (initDurationMs >= initBorder) { SLIBigLatency.Inc(); } - for (auto& [name, t] : ev->Get()->TopicAndTablets) { // ToDo - return something from Init and Auth Actor (Full Path - ?) + for (const auto& [name, t] : ev->Get()->TopicAndTablets) { // TODO: return something from Init and Auth Actor (Full Path - ?) auto internalName = t.TopicNameConverter->GetInternalName(); - auto topicGrIter = TopicGroups.find(name); - if (!topicGrIter.IsEnd()) { - auto value = std::move(topicGrIter->second); - TopicGroups.erase(topicGrIter); - TopicGroups.insert(std::make_pair(internalName, std::move(value))); + { + auto it = TopicGroups.find(name); + if (it != TopicGroups.end()) { + auto value = std::move(it->second); + TopicGroups.erase(it); + TopicGroups[internalName] = std::move(value); + } } - auto rtfsIter = ReadFromTimestamp.find(name); - if (!rtfsIter.IsEnd()) { - auto value = std::move(rtfsIter->second); - ReadFromTimestamp.erase(rtfsIter); - ReadFromTimestamp[internalName] = value; + { + auto it = ReadFromTimestamp.find(name); + if (it != ReadFromTimestamp.end()) { + auto value = std::move(it->second); + ReadFromTimestamp.erase(it); + ReadFromTimestamp[internalName] = std::move(value); + } } - auto lagIter = MaxLagByTopic.find(name); - if (!lagIter.IsEnd()) { - auto value = std::move(lagIter->second); - MaxLagByTopic.erase(lagIter); - MaxLagByTopic[internalName] = value; + { + auto it = MaxLagByTopic.find(name); + if (it != MaxLagByTopic.end()) { + auto value = std::move(it->second); + MaxLagByTopic.erase(it); + MaxLagByTopic[internalName] = std::move(value); + } } - auto& topicHolder = Topics[internalName]; - topicHolder.TabletID = t.TabletID; - topicHolder.FullConverter = t.TopicNameConverter; - topicHolder.CloudId = t.CloudId; - topicHolder.DbId = t.DbId; - topicHolder.FolderId = t.FolderId; - topicHolder.MeteringMode = t.MeteringMode; + + Topics[internalName] = TTopicHolder::FromTopicInfo(t); FullPathToConverter[t.TopicNameConverter->GetPrimaryPath()] = t.TopicNameConverter; FullPathToConverter[t.TopicNameConverter->GetSecondaryPath()] = t.TopicNameConverter; if (!GetMeteringMode()) { SetMeteringMode(t.MeteringMode); } else if (*GetMeteringMode() != t.MeteringMode) { - return CloseSession("Cannot read from topics with different metering modes", - PersQueue::ErrorCode::BAD_REQUEST, ctx); + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, + "cannot read from topics with different metering modes", ctx); } } @@ -943,158 +903,168 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvAuthResultOk InitSession(ctx); } } else { - for (auto& [name, t] : ev->Get()->TopicAndTablets) { + for (const auto& [name, t] : ev->Get()->TopicAndTablets) { auto it = Topics.find(t.TopicNameConverter->GetInternalName()); if (it == Topics.end()) { - return CloseSession( - TStringBuilder() << "list of topics changed - new topic '" - << t.TopicNameConverter->GetPrintableString() << "' found", - PersQueue::ErrorCode::BAD_REQUEST, ctx - ); + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "list of topics changed, new topic found: " << t.TopicNameConverter->GetPrintableString(), ctx); } if (t.MeteringMode != *GetMeteringMode()) { - return CloseSession( - TStringBuilder() << "Metering mode of topic: " << name << " has been changed", - PersQueue::ErrorCode::OVERLOAD, ctx - ); + return CloseSession(PersQueue::ErrorCode::OVERLOAD, TStringBuilder() + << "metering mode of topic: " << name << " has been changed", ctx); } } } } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::InitSession(const TActorContext& ctx) { TServerMessage result; result.set_status(Ydb::StatusIds::SUCCESS); result.mutable_init_response()->set_session_id(Session); - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); + if (!WriteToStreamOrDie(ctx, std::move(result))) { return; } - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); + if (!ReadFromStreamOrDie(ctx)) { return; } - for (auto& t : Topics) { - NTabletPipe::TClientConfig clientConfig; - - clientConfig.CheckAliveness = false; - - clientConfig.RetryPolicy = RetryPolicyForPipes; - t.second.PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, t.second.TabletID, clientConfig)); - - Y_VERIFY(t.second.FullConverter); - auto it = TopicGroups.find(t.second.FullConverter->GetInternalName()); + for (auto& [_, holder] : Topics) { + holder.PipeClient = CreatePipeClient(holder.TabletID, ctx); + Y_VERIFY(holder.FullConverter); + auto it = TopicGroups.find(holder.FullConverter->GetInternalName()); if (it != TopicGroups.end()) { - t.second.Groups = it->second; + holder.Groups = it->second; } } - RegisterSessions(ctx); + InitDone = true; + + for (const auto& [_, holder] : Topics) { + RegisterSession(holder.FullConverter->GetInternalName(), holder.PipeClient, holder.Groups, ctx); + NumPartitionsFromTopic[holder.FullConverter->GetInternalName()] = 0; + } ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup(EWakeupTag::RecheckAcl)); } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::RegisterSession(const TString& topic, const TActorId& pipe, const TVector<ui32>& groups, const TActorContext& ctx) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " register session" + << ": topic# " << topic); + + auto request = MakeHolder<TEvPersQueue::TEvRegisterReadSession>(); + + auto& req = request->Record; + req.SetSession(Session); + req.SetClientNode(PeerName); + ActorIdToProto(pipe, req.MutablePipeClient()); + req.SetClientId(ClientId); + + for (ui32 i = 0; i < groups.size(); ++i) { + req.AddGroups(groups[i]); + } + + NTabletPipe::SendData(ctx, pipe, request.Release()); +} - auto& record = ev->Get()->Record; +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPersQueue::TEvLockPartition::TPtr& ev, const TActorContext& ctx) { + const auto& record = ev->Get()->Record; Y_VERIFY(record.GetSession() == Session); Y_VERIFY(record.GetClientId() == ClientId); - TActorId pipe = ActorIdFromProto(record.GetPipeClient()); auto path = record.GetPath(); if (path.empty()) { path = record.GetTopic(); } - auto converterIter = FullPathToConverter.find(NPersQueue::NormalizeFullPath(path)); - if (converterIter.IsEnd()) { - LOG_DEBUG_S( - ctx, NKikimrServices::PQ_READ_PROXY, - PQ_LOG_PREFIX << " ignored ev lock for path = " << record.GetPath() << ", path not recognized" - ); - return; - } - //const auto& topic = converterIter->second->GetPrimaryPath(); - const auto& intName = converterIter->second->GetInternalName(); - auto jt = Topics.find(intName); - if (jt == Topics.end() || pipe != jt->second.PipeClient) { //this is message from old version of pipe - LOG_ALERT_S( - ctx, NKikimrServices::PQ_READ_PROXY, - PQ_LOG_PREFIX << " ignored ev lock for topic = " << intName - << " path recognized, but topic is unknown, this is unexpected" - ); + auto converterIter = FullPathToConverter.find(NPersQueue::NormalizeFullPath(path)); + if (converterIter == FullPathToConverter.end()) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " ignored ev lock" + << ": path# " << path + << ", reason# " << "path not recognized"); return; } - //ToDo[counters] - if (NumPartitionsFromTopic[converterIter->second->GetInternalName()]++ == 0) { - if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { - SetupTopicCounters(converterIter->second, jt->second.CloudId, jt->second.DbId, jt->second.FolderId); - } else { - SetupTopicCounters(converterIter->second); + const auto name = converterIter->second->GetInternalName(); + + { + auto it = Topics.find(name); + if (it == Topics.end() || it->second.PipeClient != ActorIdFromProto(record.GetPipeClient())) { + LOG_ALERT_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " ignored ev lock" + << ": path# " << name + << ", reason# " << "topic is unknown"); + return; + } + + // TODO: counters + if (NumPartitionsFromTopic[name]++ == 0) { + if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { + SetupTopicCounters(converterIter->second, it->second.CloudId, it->second.DbId, it->second.DbPath, it->second.IsServerless, it->second.FolderId); + } else { + SetupTopicCounters(converterIter->second); + } } } - //ToDo[counters] - auto it = TopicCounters.find(converterIter->second->GetInternalName()); + // TODO: counters + auto it = TopicCounters.find(name); Y_VERIFY(it != TopicCounters.end()); - ui64 assignId = NextAssignId++; + Y_VERIFY(record.GetGeneration() > 0); + const ui64 assignId = NextAssignId++; BalancerGeneration[assignId] = {record.GetGeneration(), record.GetStep()}; - TPartitionId partitionId{converterIter->second, record.GetPartition(), assignId}; + const TPartitionId partitionId{converterIter->second, record.GetPartition(), assignId}; - IActor* partitionActor = new TPartitionActor( - ctx.SelfID, ClientId, ClientPath, Cookie, Session, partitionId, record.GetGeneration(), - record.GetStep(), record.GetTabletId(), it->second, CommitsDisabled, ClientDC, RangesMode, - converterIter->second, UseMigrationProtocol); + const TActorId actorId = ctx.Register(new TPartitionActor( + ctx.SelfID, ClientId, ClientPath, Cookie, Session, partitionId, record.GetGeneration(), + record.GetStep(), record.GetTabletId(), it->second, CommitsDisabled, ClientDC, RangesMode, + converterIter->second, UseMigrationProtocol)); - TActorId actorId = ctx.Register(partitionActor); if (SessionsActive) { PartsPerSession.DecFor(Partitions.size(), 1); } - Y_VERIFY(record.GetGeneration() > 0); - auto pp = Partitions.insert(std::make_pair(assignId, TPartitionActorInfo{actorId, partitionId, converterIter->second, ctx})); - Y_VERIFY(pp.second); + + bool res = Partitions.emplace(assignId, TPartitionActorInfo(actorId, partitionId, converterIter->second, ctx.Now())).second; + Y_VERIFY(res); + if (SessionsActive) { PartsPerSession.IncFor(Partitions.size(), 1); } - bool res = ActualPartitionActors.insert(actorId).second; + res = ActualPartitionActors.insert(actorId).second; Y_VERIFY(res); it->second.PartitionsLocked.Inc(); it->second.PartitionsInfly.Inc(); - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " Assign: " << record); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " assign" + << ": record# " << record); ctx.Send(actorId, new TEvPQProxy::TEvLockPartition(0, 0, false, false)); } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionStatus::TPtr& ev, const TActorContext& ctx) { - if (!ActualPartitionActor(ev->Sender)) + if (!ActualPartitionActors.contains(ev->Sender)) { return; + } auto it = Partitions.find(ev->Get()->Partition.AssignId); Y_VERIFY(it != Partitions.end()); Y_VERIFY(!it->second.Releasing); // if releasing and no lock sent yet - then server must already release partition + TServerMessage result; + result.set_status(Ydb::StatusIds::SUCCESS); + if (ev->Get()->Init) { Y_VERIFY(!it->second.LockSent); - it->second.LockSent = true; it->second.Offset = ev->Get()->Offset; - TServerMessage result; - result.set_status(Ydb::StatusIds::SUCCESS); - if constexpr (UseMigrationProtocol) { result.mutable_assigned()->mutable_topic()->set_path(it->second.Topic->GetFederationPath()); result.mutable_assigned()->set_cluster(it->second.Topic->GetCluster()); @@ -1103,9 +1073,8 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionSta result.mutable_assigned()->set_read_offset(ev->Get()->Offset); result.mutable_assigned()->set_end_offset(ev->Get()->EndOffset); - } else { - // TODO GetFederationPath() -> GetFederationPathWithDC() + // TODO: GetFederationPath() -> GetFederationPathWithDC() result.mutable_start_partition_session_request()->mutable_partition_session()->set_path(it->second.Topic->GetFederationPath()); result.mutable_start_partition_session_request()->mutable_partition_session()->set_partition_id(ev->Get()->Partition.Partition); result.mutable_start_partition_session_request()->mutable_partition_session()->set_partition_session_id(it->first); @@ -1114,29 +1083,10 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionSta result.mutable_start_partition_session_request()->mutable_partition_offsets()->set_start(ev->Get()->Offset); result.mutable_start_partition_session_request()->mutable_partition_offsets()->set_end(ev->Get()->EndOffset); } - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " sending to client create partition stream event"); - - auto pp = it->second.Partition; - pp.AssignId = 0; - auto jt = PartitionToControlMessages.find(pp); - if (jt == PartitionToControlMessages.end()) { - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - } else { - Y_VERIFY(jt->second.Infly); - jt->second.ControlMessages.push_back(result); - } } else { Y_VERIFY(it->second.LockSent); - TServerMessage result; - result.set_status(Ydb::StatusIds::SUCCESS); - - if constexpr(UseMigrationProtocol) { + if constexpr (UseMigrationProtocol) { result.mutable_partition_status()->mutable_topic()->set_path(it->second.Topic->GetFederationPath()); result.mutable_partition_status()->set_cluster(it->second.Topic->GetCluster()); result.mutable_partition_status()->set_partition(ev->Get()->Partition.Partition); @@ -1145,7 +1095,6 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionSta result.mutable_partition_status()->set_committed_offset(ev->Get()->Offset); result.mutable_partition_status()->set_end_offset(ev->Get()->EndOffset); result.mutable_partition_status()->set_write_watermark_ms(ev->Get()->WriteTimestampEstimateMs); - } else { result.mutable_partition_session_status_response()->set_partition_session_id(it->first); @@ -1155,169 +1104,157 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionSta *result.mutable_partition_session_status_response()->mutable_write_time_high_watermark() = ::google::protobuf::util::TimeUtil::MillisecondsToTimestamp(ev->Get()->WriteTimestampEstimateMs); } + } - auto pp = it->second.Partition; - pp.AssignId = 0; - auto jt = PartitionToControlMessages.find(pp); - if (jt == PartitionToControlMessages.end()) { - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - } else { - Y_VERIFY(jt->second.Infly); - jt->second.ControlMessages.push_back(result); - } + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " sending to client create partition stream event"); + SendControlMessage(it->second.Partition, std::move(result), ctx); +} + +template <bool UseMigrationProtocol> +bool TReadSessionActor<UseMigrationProtocol>::SendControlMessage(TPartitionId id, TServerMessage&& message, const TActorContext& ctx) { + id.AssignId = 0; + + auto it = PartitionToControlMessages.find(id); + if (it == PartitionToControlMessages.end()) { + return WriteToStreamOrDie(ctx, std::move(message)); + } else { + Y_VERIFY(it->second.Infly); + it->second.ControlMessages.push_back(std::move(message)); } + + return true; } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPersQueue::TEvError::TPtr& ev, const TActorContext& ctx) { - CloseSession(ev->Get()->Record.GetDescription(), ConvertOldCode(ev->Get()->Record.GetCode()), ctx); + CloseSession(ConvertOldCode(ev->Get()->Record.GetCode()), ev->Get()->Record.GetDescription(), ctx); } - -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::SendReleaseSignalToClient(const typename THashMap<ui64, TPartitionActorInfo>::iterator& it, bool kill, const TActorContext& ctx) -{ +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::SendReleaseSignal(typename TPartitionsMap::iterator it, bool kill, const TActorContext& ctx) { TServerMessage result; result.set_status(Ydb::StatusIds::SUCCESS); - if constexpr(UseMigrationProtocol) { + if constexpr (UseMigrationProtocol) { result.mutable_release()->mutable_topic()->set_path(it->second.Topic->GetFederationPath()); result.mutable_release()->set_cluster(it->second.Topic->GetCluster()); result.mutable_release()->set_partition(it->second.Partition.Partition); result.mutable_release()->set_assign_id(it->second.Partition.AssignId); result.mutable_release()->set_forceful_release(kill); result.mutable_release()->set_commit_offset(it->second.Offset); - } else { result.mutable_stop_partition_session_request()->set_partition_session_id(it->second.Partition.AssignId); result.mutable_stop_partition_session_request()->set_graceful(!kill); result.mutable_stop_partition_session_request()->set_committed_offset(it->second.Offset); } - auto pp = it->second.Partition; - pp.AssignId = 0; - auto jt = PartitionToControlMessages.find(pp); - if (jt == PartitionToControlMessages.end()) { - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } - } else { - Y_VERIFY(jt->second.Infly); - jt->second.ControlMessages.push_back(result); + if (!SendControlMessage(it->second.Partition, std::move(result), ctx)) { + return; } + Y_VERIFY(it->second.LockSent); it->second.ReleaseSent = true; } - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPersQueue::TEvReleasePartition::TPtr& ev, const TActorContext& ctx) { - auto& record = ev->Get()->Record; + const auto& record = ev->Get()->Record; Y_VERIFY(record.GetSession() == Session); Y_VERIFY(record.GetClientId() == ClientId); - TString topicPath = NPersQueue::NormalizeFullPath(record.GetPath()); - ui32 group = record.HasGroup() ? record.GetGroup() : 0; - auto pathIter = FullPathToConverter.find(topicPath); - Y_VERIFY(!pathIter.IsEnd()); - auto it = Topics.find(pathIter->second->GetInternalName()); - Y_VERIFY(!it.IsEnd()); - auto& converter = it->second.FullConverter; + const ui32 group = record.HasGroup() ? record.GetGroup() : 0; - TActorId pipe = ActorIdFromProto(record.GetPipeClient()); + auto pathIter = FullPathToConverter.find(NPersQueue::NormalizeFullPath(record.GetPath())); + Y_VERIFY(pathIter != FullPathToConverter.end()); - if (pipe != it->second.PipeClient) { //this is message from old version of pipe + auto it = Topics.find(pathIter->second->GetInternalName()); + Y_VERIFY(it != Topics.end()); + + if (it->second.PipeClient != ActorIdFromProto(record.GetPipeClient())) { return; } + auto& converter = it->second.FullConverter; + for (ui32 c = 0; c < record.GetCount(); ++c) { Y_VERIFY(!Partitions.empty()); - TActorId actorId = TActorId{}; + TActorId actorId; auto jt = Partitions.begin(); ui32 i = 0; + for (auto it = Partitions.begin(); it != Partitions.end(); ++it) { if (it->second.Topic->GetInternalName() == converter->GetInternalName() && !it->second.Releasing && (group == 0 || it->second.Partition.Partition + 1 == group) ) { ++i; - if (rand() % i == 0) { //will lead to 1/n probability for each of n partitions + if (rand() % i == 0) { // will lead to 1/n probability for each of n partitions actorId = it->second.Actor; jt = it; } } } + Y_VERIFY(actorId); - { - //ToDo[counters] - auto it = TopicCounters.find(converter->GetInternalName()); - Y_VERIFY(it != TopicCounters.end()); - it->second.PartitionsToBeReleased.Inc(); - } + // TODO: counters + auto it = TopicCounters.find(converter->GetInternalName()); + Y_VERIFY(it != TopicCounters.end()); + it->second.PartitionsToBeReleased.Inc(); - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " releasing " << jt->second.Partition); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " releasing" + << ": partition# " << jt->second.Partition); jt->second.Releasing = true; - if (!jt->second.LockSent) { //no lock yet - can release silently + + if (!jt->second.LockSent) { // no lock yet - can release silently ReleasePartition(jt, true, ctx); } else { - SendReleaseSignalToClient(jt, false, ctx); + SendReleaseSignal(jt, false, ctx); } } } - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionReleased::TPtr& ev, const TActorContext& ctx) { - if (!ActualPartitionActor(ev->Sender)) + if (!ActualPartitionActors.contains(ev->Sender)) { return; + } - const auto assignId = ev->Get()->Partition.AssignId; - - auto it = Partitions.find(assignId); + auto it = Partitions.find(ev->Get()->Partition.AssignId); Y_VERIFY(it != Partitions.end()); Y_VERIFY(it->second.Releasing); - ReleasePartition(it, false, ctx); //no reads could be here - this is release from partition + ReleasePartition(it, false, ctx); // no reads could be here - this is release from partition } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::InformBalancerAboutRelease(const typename THashMap<ui64, TPartitionActorInfo>::iterator& it, const TActorContext& ctx) { - - THolder<TEvPersQueue::TEvPartitionReleased> request; - request.Reset(new TEvPersQueue::TEvPartitionReleased); - auto& req = request->Record; - +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::InformBalancerAboutRelease(typename TPartitionsMap::iterator it, const TActorContext& ctx) { const auto& converter = it->second.Topic; auto jt = Topics.find(converter->GetInternalName()); Y_VERIFY(jt != Topics.end()); + auto request = MakeHolder<TEvPersQueue::TEvPartitionReleased>(); + + auto& req = request->Record; req.SetSession(Session); ActorIdToProto(jt->second.PipeClient, req.MutablePipeClient()); req.SetClientId(ClientId); req.SetTopic(converter->GetPrimaryPath()); req.SetPartition(it->second.Partition.Partition); - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " released: " << it->second.Partition); - + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " released" + << ": partition# " << it->second.Partition); NTabletPipe::SendData(ctx, jt->second.PipeClient, request.Release()); } - -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::CloseSession(const TString& errorReason, const PersQueue::ErrorCode::ErrorCode errorCode, const NActors::TActorContext& ctx) { - - if (errorCode != PersQueue::ErrorCode::OK) { - if (InternalErrorCode(errorCode)) { +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::CloseSession(PersQueue::ErrorCode::ErrorCode code, const TString& reason, const TActorContext& ctx) { + if (code != PersQueue::ErrorCode::OK) { + if (InternalErrorCode(code)) { SLIErrors.Inc(); } + if (Errors) { ++(*Errors); } else if (!AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { @@ -1325,169 +1262,155 @@ void TReadSessionActor<UseMigrationProtocol>::CloseSession(const TString& errorR } TServerMessage result; - result.set_status(ConvertPersQueueInternalCodeToStatus(errorCode)); - - FillIssue(result.add_issues(), errorCode, errorReason); - - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed with error reason: " << errorReason); + result.set_status(ConvertPersQueueInternalCodeToStatus(code)); + FillIssue(result.add_issues(), code, reason); - if (!WriteResponse(std::move(result), true)) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed with error" + << ": reason# " << reason); + if (!WriteToStreamOrDie(ctx, std::move(result), true)) { return; } } else { LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " closed"); - if (!Request->GetStreamCtx()->Finish(std::move(grpc::Status::OK))) { + if (!Request->GetStreamCtx()->Finish(grpc::Status::OK)) { LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc double finish failed"); - Die(ctx); - return; } - } Die(ctx); } - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { - TEvTabletPipe::TEvClientConnected *msg = ev->Get(); + const auto* msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { if (msg->Dead) { - CloseSession(TStringBuilder() << "one of topics is deleted, tablet " << msg->TabletId, PersQueue::ErrorCode::BAD_REQUEST, ctx); - return; + return CloseSession(PersQueue::ErrorCode::BAD_REQUEST, TStringBuilder() + << "one of topics is deleted, tablet " << msg->TabletId, ctx); } - //TODO: remove it - CloseSession(TStringBuilder() << "unable to connect to one of topics, tablet " << msg->TabletId, PersQueue::ErrorCode::ERROR, ctx); - return; + + // TODO: remove it + return CloseSession(PersQueue::ErrorCode::ERROR, TStringBuilder() + << "unable to connect to one of topics, tablet " << msg->TabletId, ctx); #if 0 - const bool isAlive = ProcessBalancerDead(msg->TabletId, ctx); // returns false if actor died - Y_UNUSED(isAlive); + ProcessBalancerDead(msg->TabletId, ctx); // returns false if actor died return; #endif } } -template<bool UseMigrationProtocol> -bool TReadSessionActor<UseMigrationProtocol>::ActualPartitionActor(const TActorId& part) { - return ActualPartitionActors.contains(part); +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + ProcessBalancerDead(ev->Get()->TabletId, ctx); } +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::ReleasePartition(typename TPartitionsMap::iterator it, bool couldBeReads, const TActorContext& ctx) { + // TODO: counters + auto jt = TopicCounters.find(it->second.Topic->GetInternalName()); + Y_VERIFY(jt != TopicCounters.end()); + + jt->second.PartitionsReleased.Inc(); + jt->second.PartitionsInfly.Dec(); -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::ReleasePartition(const typename THashMap<ui64, TPartitionActorInfo>::iterator& it, - bool couldBeReads, const TActorContext& ctx) -{ - { - //ToDo[counters] - auto jt = TopicCounters.find(it->second.Topic->GetInternalName()); - Y_VERIFY(jt != TopicCounters.end()); - jt->second.PartitionsReleased.Inc(); - jt->second.PartitionsInfly.Dec(); - if (!it->second.Released && it->second.Releasing) { - jt->second.PartitionsToBeReleased.Dec(); - } + if (!it->second.Released && it->second.Releasing) { + jt->second.PartitionsToBeReleased.Dec(); } Y_VERIFY(couldBeReads || !it->second.Reading); - //process reads - typename TFormedReadResponse<TServerMessage>::TPtr formedResponseToAnswer; + typename TFormedReadResponse<TServerMessage>::TPtr response; + + // process reads if (it->second.Reading) { - const auto readIt = PartitionToReadResponse.find(it->second.Actor); + auto readIt = PartitionToReadResponse.find(it->second.Actor); Y_VERIFY(readIt != PartitionToReadResponse.end()); if (--readIt->second->RequestsInfly == 0) { - formedResponseToAnswer = readIt->second; + response = readIt->second; } } InformBalancerAboutRelease(it, ctx); - it->second.Released = true; //to force drop - DropPartition(it, ctx); //partition will be dropped + it->second.Released = true; // to force drop + DropPartition(it, ctx); // partition will be dropped - if (formedResponseToAnswer) { - if (const auto ru = CalcRuConsumption(PrepareResponse(formedResponseToAnswer))) { - formedResponseToAnswer->RequiredQuota = ru; + if (response) { + if (const auto ru = CalcRuConsumption(PrepareResponse(response))) { + response->RequiredQuota = ru; if (MaybeRequestQuota(ru, EWakeupTag::RlAllowed, ctx)) { Y_VERIFY(!PendingQuota); - PendingQuota = formedResponseToAnswer; + PendingQuota = response; } else { - WaitingQuota.push_back(formedResponseToAnswer); + WaitingQuota.push_back(response); } } else { - ProcessAnswer(ctx, formedResponseToAnswer); // returns false if actor died + ProcessAnswer(response, ctx); } } } -template<bool UseMigrationProtocol> -bool TReadSessionActor<UseMigrationProtocol>::ProcessBalancerDead(const ui64 tablet, const TActorContext& ctx) { +template <bool UseMigrationProtocol> +TActorId TReadSessionActor<UseMigrationProtocol>::CreatePipeClient(ui64 tabletId, const TActorContext& ctx) { + NTabletPipe::TClientConfig clientConfig; + clientConfig.CheckAliveness = false; + clientConfig.RetryPolicy = RetryPolicyForPipes; + return ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, tabletId, clientConfig)); +} + +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::ProcessBalancerDead(ui64 tabletId, const TActorContext& ctx) { for (auto& t : Topics) { - if (t.second.TabletID == tablet) { - LOG_INFO_S( - ctx, NKikimrServices::PQ_READ_PROXY, - PQ_LOG_PREFIX << " balancer for topic " << t.second.FullConverter->GetPrintableString() - << " is dead, restarting all from this topic" - ); - - //Drop all partitions from this topic + if (t.second.TabletID == tabletId) { + LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " balancer dead, restarting all from topic" + << ": topic# " << t.second.FullConverter->GetPrintableString()); + + // Drop all partitions from this topic for (auto it = Partitions.begin(); it != Partitions.end();) { - if (it->second.Topic->GetInternalName() == t.first) { //partition from this topic + if (it->second.Topic->GetInternalName() == t.first) { // partition from this topic // kill actor auto jt = it; ++it; + if (jt->second.LockSent) { - SendReleaseSignalToClient(jt, true, ctx); + SendReleaseSignal(jt, true, ctx); } + ReleasePartition(jt, true, ctx); } else { ++it; } } - //reconnect pipe - NTabletPipe::TClientConfig clientConfig; - clientConfig.CheckAliveness = false; - clientConfig.RetryPolicy = RetryPolicyForPipes; - t.second.PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, t.second.TabletID, clientConfig)); + t.second.PipeClient = CreatePipeClient(t.second.TabletID, ctx); + if (InitDone) { if (PipeReconnects) { ++(*PipeReconnects); } + if (Errors) { ++(*Errors); } - RegisterSession(t.second.PipeClient, t.first, t.second.Groups, ctx); + RegisterSession(t.first, t.second.PipeClient, t.second.Groups, ctx); } } } - return true; -} - - -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { - const bool isAlive = ProcessBalancerDead(ev->Get()->TabletId, ctx); // returns false if actor died - Y_UNUSED(isAlive); } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr &ev , const TActorContext& ctx) { +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::Handle(NGRpcService::TGRpcRequestProxy::TEvRefreshTokenResponse::TPtr& ev , const TActorContext& ctx) { if (ev->Get()->Authenticated && !ev->Get()->InternalToken.empty()) { Token = new NACLib::TUserToken(ev->Get()->InternalToken); ForceACLCheck = true; + if constexpr (!UseMigrationProtocol) { TServerMessage result; result.set_status(Ydb::StatusIds::SUCCESS); result.mutable_update_token_response(); - if (!WriteResponse(std::move(result))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); - return; - } + WriteToStreamOrDie(ctx, std::move(result)); } } else { Request->ReplyUnauthenticated("refreshed token is invalid"); @@ -1495,46 +1418,33 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(NGRpcService::TGRpcRequestP } } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::ProcessAuth(const TString& auth, const TActorContext& ctx) { - if (!auth.empty() && auth != Auth) { - Auth = auth; - Request->RefreshToken(auth, ctx, ctx.SelfID); - } -} - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvRead::TPtr& ev, const TActorContext& ctx) { RequestNotChecked = true; - THolder<TEvPQProxy::TEvRead> event(ev->Release()); - - if (!Request->GetStreamCtx()->Read()) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc read failed at start"); - Die(ctx); + if (!ReadFromStreamOrDie(ctx)) { return; } - - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got read request with guid: " << event->Guid); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " got read request" + << ": guid# " << ev->Get()->Guid); if constexpr (UseMigrationProtocol) { - Reads.emplace_back(event.Release()); + Reads.emplace_back(ev->Release()); } else { - ReadSizeBudget += event->MaxSize; + ReadSizeBudget += ev->Get()->MaxSize; } ProcessReads(ctx); } - -template<typename TServerMessage> +template <typename TServerMessage> i64 TFormedReadResponse<TServerMessage>::ApplyResponse(TServerMessage&& resp) { constexpr bool UseMigrationProtocol = std::is_same_v<TServerMessage, PersQueue::V1::MigrationStreamingReadServerMessage>; + if constexpr (UseMigrationProtocol) { Y_VERIFY(resp.data_batch().partition_data_size() == 1); Response.mutable_data_batch()->add_partition_data()->Swap(resp.mutable_data_batch()->mutable_partition_data(0)); - } else { Y_VERIFY(resp.read_response().partition_data_size() == 1); Response.mutable_read_response()->add_partition_data()->Swap(resp.mutable_read_response()->mutable_partition_data(0)); @@ -1547,50 +1457,53 @@ i64 TFormedReadResponse<TServerMessage>::ApplyResponse(TServerMessage&& resp) { return ByteSize - prev; } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadResponse::TPtr& ev, const TActorContext& ctx) { - TActorId sender = ev->Sender; - if (!ActualPartitionActor(sender)) + if (!ActualPartitionActors.contains(ev->Sender)) { return; + } - THolder<TEvReadResponse> event(ev->Release()); - + auto& response = ev->Get()->Response; ui64 partitionCookie; ui64 assignId; + if constexpr (UseMigrationProtocol) { - Y_VERIFY(event->Response.data_batch().partition_data_size() == 1); - partitionCookie = event->Response.data_batch().partition_data(0).cookie().partition_cookie(); + Y_VERIFY(response.data_batch().partition_data_size() == 1); + partitionCookie = response.data_batch().partition_data(0).cookie().partition_cookie(); Y_VERIFY(partitionCookie != 0); // cookie is assigned - assignId = event->Response.data_batch().partition_data(0).cookie().assign_id(); - + assignId = response.data_batch().partition_data(0).cookie().assign_id(); } else { - Y_VERIFY(event->Response.read_response().partition_data_size() == 1); - assignId = event->Response.read_response().partition_data(0).partition_session_id(); + Y_VERIFY(response.read_response().partition_data_size() == 1); + assignId = response.read_response().partition_data(0).partition_session_id(); } - const auto partitionIt = Partitions.find(assignId); - Y_VERIFY(partitionIt != Partitions.end()); - Y_VERIFY(partitionIt->second.Reading); - partitionIt->second.Reading = false; - - if constexpr (UseMigrationProtocol) { - partitionIt->second.ReadIdToResponse = partitionCookie + 1; + typename TFormedReadResponse<TServerMessage>::TPtr formedResponse; + { + auto it = PartitionToReadResponse.find(ev->Sender); + Y_VERIFY(it != PartitionToReadResponse.end()); + formedResponse = it->second; } - auto it = PartitionToReadResponse.find(sender); - Y_VERIFY(it != PartitionToReadResponse.end()); + auto it = Partitions.find(assignId); + Y_VERIFY(it != Partitions.end()); + Y_VERIFY(it->second.Reading); + it->second.Reading = false; - typename TFormedReadResponse<TServerMessage>::TPtr formedResponse = it->second; + if constexpr (UseMigrationProtocol) { + it->second.ReadIdToResponse = partitionCookie + 1; + } - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " read done guid " << formedResponse->Guid - << partitionIt->second.Partition - << " size " << event->Response.ByteSize()); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " read done" + << ": guid# " << formedResponse->Guid + << ", partition# " << it->second.Partition + << ", size# " << response.ByteSize()); - const i64 diff = formedResponse->ApplyResponse(std::move(event->Response)); - if (event->FromDisk) { + const i64 diff = formedResponse->ApplyResponse(std::move(response)); + if (ev->Get()->FromDisk) { formedResponse->FromDisk = true; } - formedResponse->WaitQuotaTime = Max(formedResponse->WaitQuotaTime, event->WaitQuotaTime); + + formedResponse->WaitQuotaTime = Max(formedResponse->WaitQuotaTime, ev->Get()->WaitQuotaTime); --formedResponse->RequestsInfly; BytesInflight_ += diff; @@ -1608,28 +1521,16 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(typename TEvReadResponse::T WaitingQuota.push_back(formedResponse); } } else { - ProcessAnswer(ctx, formedResponse); + ProcessAnswer(formedResponse, ctx); } } } -template<bool UseMigrationProtocol> -bool TReadSessionActor<UseMigrationProtocol>::WriteResponse(TServerMessage&& response, bool finish) { - ui64 sz = response.ByteSize(); - ActiveWrites.push(sz); - BytesInflight_ += sz; - if (BytesInflight) { - (*BytesInflight) += sz; - } - - return finish ? Request->GetStreamCtx()->WriteAndFinish(std::move(response), grpc::Status::OK) : Request->GetStreamCtx()->Write(std::move(response)); -} - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> ui64 TReadSessionActor<UseMigrationProtocol>::PrepareResponse(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse) { formedResponse->ByteSizeBeforeFiltering = formedResponse->Response.ByteSize(); - if constexpr(UseMigrationProtocol) { + if constexpr (UseMigrationProtocol) { formedResponse->HasMessages = RemoveEmptyMessages(*formedResponse->Response.mutable_data_batch()); } else { formedResponse->HasMessages = RemoveEmptyMessages(*formedResponse->Response.mutable_read_response()); @@ -1638,35 +1539,40 @@ ui64 TReadSessionActor<UseMigrationProtocol>::PrepareResponse(typename TFormedRe return formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0; } -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(const TActorContext& ctx, typename TFormedReadResponse<TServerMessage>::TPtr formedResponse) { +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(typename TFormedReadResponse<TServerMessage>::TPtr formedResponse, const TActorContext& ctx) { ui32 readDurationMs = (ctx.Now() - formedResponse->Start - formedResponse->WaitQuotaTime).MilliSeconds(); + if (formedResponse->FromDisk) { ReadLatencyFromDisk.IncFor(readDurationMs, 1); } else { ReadLatency.IncFor(readDurationMs, 1); } - if (readDurationMs >= (formedResponse->FromDisk ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs() : AppData(ctx)->PQConfig.GetReadLatencyBigMs())) { + + const auto latencyThreshold = formedResponse->FromDisk + ? AppData(ctx)->PQConfig.GetReadLatencyFromDiskBigMs() + : AppData(ctx)->PQConfig.GetReadLatencyBigMs(); + if (readDurationMs >= latencyThreshold) { SLIBigReadLatency.Inc(); } Y_VERIFY(formedResponse->RequestsInfly == 0); const ui64 diff = formedResponse->ByteSizeBeforeFiltering; - ui64 sizeEstimation = formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0; + const ui64 sizeEstimation = formedResponse->HasMessages ? formedResponse->Response.ByteSize() : 0; + if constexpr (!UseMigrationProtocol) { formedResponse->Response.mutable_read_response()->set_bytes_size(sizeEstimation); } if (formedResponse->HasMessages) { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " response to read " << formedResponse->Guid); - - if (!WriteResponse(std::move(formedResponse->Response))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " response to read" + << ": guid# " << formedResponse->Guid); + if (!WriteToStreamOrDie(ctx, std::move(formedResponse->Response))) { return; } } else { - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " empty read result " << formedResponse->Guid << ", start new reading"); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " empty read result, start new reading" + << ": guid# " << formedResponse->Guid); } BytesInflight_ -= diff; @@ -1674,27 +1580,28 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(const TActorContext& (*BytesInflight) -= diff; } - for (auto& pp : formedResponse->PartitionsTookPartInControlMessages) { - auto it = PartitionToControlMessages.find(pp); + for (const auto& id : formedResponse->PartitionsTookPartInControlMessages) { + auto it = PartitionToControlMessages.find(id); Y_VERIFY(it != PartitionToControlMessages.end()); + if (--it->second.Infly == 0) { for (auto& r : it->second.ControlMessages) { - if (!WriteResponse(std::move(r))) { - LOG_INFO_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " grpc write failed"); - Die(ctx); + if (!WriteToStreamOrDie(ctx, std::move(r))) { return; } } + PartitionToControlMessages.erase(it); } } - for (const TActorId& p : formedResponse->PartitionsTookPartInRead) { - PartitionToReadResponse.erase(p); + for (const auto& id : formedResponse->PartitionsTookPartInRead) { + PartitionToReadResponse.erase(id); } RequestedBytes -= formedResponse->RequestedBytes; ReadsInfly--; + if constexpr (!UseMigrationProtocol) { ReadSizeBudget += formedResponse->RequestedBytes; ReadSizeBudget -= sizeEstimation; @@ -1712,35 +1619,34 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessAnswer(const TActorContext& } } - ProcessReads(ctx); // returns false if actor died -} - -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvCloseSession::TPtr& ev, const TActorContext& ctx) { - CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); + ProcessReads(ctx); } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> ui32 TReadSessionActor<UseMigrationProtocol>::NormalizeMaxReadMessagesCount(ui32 sourceValue) { ui32 count = Min<ui32>(sourceValue, Max<i32>()); + if (count == 0) { count = Max<i32>(); } + return count; } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> ui32 TReadSessionActor<UseMigrationProtocol>::NormalizeMaxReadSize(ui32 sourceValue) { ui32 size = Min<ui32>(sourceValue, MAX_READ_SIZE); + if (size == 0) { size = MAX_READ_SIZE; } + return size; } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::ProcessReads(const TActorContext& ctx) { - auto ShouldContinueReads = [this]() { + auto shouldContinueReads = [this]() { if constexpr (UseMigrationProtocol) { return !Reads.empty() && ReadsInfly < MAX_INFLY_READS; } else { @@ -1748,7 +1654,7 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessReads(const TActorContext& } }; - while (ShouldContinueReads() && BytesInflight_ + RequestedBytes < MAX_INFLY_BYTES) { + while (shouldContinueReads() && BytesInflight_ + RequestedBytes < MAX_INFLY_BYTES) { ui32 count = MaxReadMessagesCount; ui64 size = MaxReadSize; ui32 partitionsAsked = 0; @@ -1759,127 +1665,141 @@ void TReadSessionActor<UseMigrationProtocol>::ProcessReads(const TActorContext& } else { guid = CreateGuidAsString(); } + typename TFormedReadResponse<TServerMessage>::TPtr formedResponse = new TFormedReadResponse<TServerMessage>(guid, ctx.Now()); + while (!AvailablePartitions.empty()) { auto part = *AvailablePartitions.begin(); AvailablePartitions.erase(AvailablePartitions.begin()); auto it = Partitions.find(part.AssignId); - if (it == Partitions.end() || it->second.Releasing) { //this is already released partition + if (it == Partitions.end() || it->second.Releasing) { // this is already released partition continue; } - //add this partition to reading - ++partitionsAsked; + + ++partitionsAsked; // add this partition to reading const ui32 ccount = Min<ui32>(part.MsgLag * LAG_GROW_MULTIPLIER, count); count -= ccount; + ui64 csize = (ui64)Min<double>(part.SizeLag * LAG_GROW_MULTIPLIER, size); if constexpr (!UseMigrationProtocol) { csize = Min<i64>(csize, ReadSizeBudget); } + size -= csize; Y_VERIFY(csize < Max<i32>()); auto jt = ReadFromTimestamp.find(it->second.Topic->GetInternalName()); if (jt == ReadFromTimestamp.end()) { - LOG_ALERT_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << "Error searching for topic: " << it->second.Topic->GetInternalName() - << " (" << it->second.Topic->GetPrintableString() << ")"); - for (const auto& [k, v] : ReadFromTimestamp) { - const auto& kk = k; - LOG_ALERT_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << "Have topic: " << kk); + LOG_ALERT_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " error searching for topic" + << ": internalName# " << it->second.Topic->GetInternalName() + << ", prettyName# " << it->second.Topic->GetPrintableString()); + + for (const auto& kv : ReadFromTimestamp) { + LOG_ALERT_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " have topic" + << ": topic# " << kv.first); } - CloseSession(TStringBuilder() << "Internal error", PersQueue::ErrorCode::ERROR, ctx); - return; + + return CloseSession(PersQueue::ErrorCode::ERROR, "internal error", ctx); } - ui64 readTimestampMs = Max(ReadTimestampMs, jt->second); - auto lags_it = MaxLagByTopic.find(it->second.Topic->GetInternalName()); - Y_VERIFY(lags_it != MaxLagByTopic.end()); - ui32 maxLag = lags_it->second; + ui64 readTimestampMs = Max(ReadTimestampMs, jt->second); - TAutoPtr<TEvPQProxy::TEvRead> read = new TEvPQProxy::TEvRead(guid, ccount, csize, maxLag, readTimestampMs); + auto lagsIt = MaxLagByTopic.find(it->second.Topic->GetInternalName()); + Y_VERIFY(lagsIt != MaxLagByTopic.end()); + const ui32 maxLag = lagsIt->second; - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX - << " performing read request with guid " << read->Guid - << " from " << it->second.Partition << " count " << ccount << " size " << csize - << " partitionsAsked " << partitionsAsked << " maxTimeLag " << maxLag << "ms"); + auto ev = MakeHolder<TEvPQProxy::TEvRead>(guid, ccount, csize, maxLag, readTimestampMs); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " performing read request" + << ": guid# " << ev->Guid + << ", from# " << it->second.Partition + << ", count# " << ccount + << ", size# " << csize + << ", partitionsAsked# " << partitionsAsked + << ", maxTimeLag# " << maxLag << "ms"); Y_VERIFY(!it->second.Reading); it->second.Reading = true; + formedResponse->PartitionsTookPartInRead.insert(it->second.Actor); - auto pp = it->second.Partition; - pp.AssignId = 0; - PartitionToControlMessages[pp].Infly++; - bool res = formedResponse->PartitionsTookPartInControlMessages.insert(pp).second; + auto id = it->second.Partition; + id.AssignId = 0; + PartitionToControlMessages[id].Infly++; + + bool res = formedResponse->PartitionsTookPartInControlMessages.insert(id).second; Y_VERIFY(res); RequestedBytes += csize; formedResponse->RequestedBytes += csize; ReadSizeBudget -= csize; - ctx.Send(it->second.Actor, read.Release()); - const auto insertResult = PartitionToReadResponse.insert(std::make_pair(it->second.Actor, formedResponse)); - Y_VERIFY(insertResult.second); + ctx.Send(it->second.Actor, ev.Release()); + res = PartitionToReadResponse.emplace(it->second.Actor, formedResponse).second; + Y_VERIFY(res); - // TODO (ildar-khisam@): Gather data from all partitions; - // For now send messages only from single partition + // TODO (ildar-khisam@): Gather data from all partitions. + // For now send messages only from single partition. if constexpr (!UseMigrationProtocol) { break; } - if (count == 0 || size == 0) + if (count == 0 || size == 0) { break; + } } - if (partitionsAsked == 0) + if (partitionsAsked == 0) { break; + } + ReadsTotal.Inc(); formedResponse->RequestsInfly = partitionsAsked; - ReadsInfly++; i64 diff = formedResponse->Response.ByteSize(); BytesInflight_ += diff; formedResponse->ByteSize = diff; + if (BytesInflight) { (*BytesInflight) += diff; } + if constexpr (UseMigrationProtocol) { Reads.pop_front(); } } } - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvPQProxy::TEvPartitionReady::TPtr& ev, const TActorContext& ctx) { - - if (!ActualPartitionActor(ev->Sender)) + if (!ActualPartitionActors.contains(ev->Sender)) { return; + } - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " " << ev->Get()->Partition - << " ready for read with readOffset " - << ev->Get()->ReadOffset << " endOffset " << ev->Get()->EndOffset << " WTime " - << ev->Get()->WTime << " sizeLag " << ev->Get()->SizeLag); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " partition ready for read" + << ": partition# " << ev->Get()->Partition + << ", readOffset# " << ev->Get()->ReadOffset + << ", endOffset# " << ev->Get()->EndOffset + << ", WTime# " << ev->Get()->WTime + << ", sizeLag# " << ev->Get()->SizeLag); - const auto it = PartitionToReadResponse.find(ev->Sender); // check whether this partition is taking part in read response + auto it = PartitionToReadResponse.find(ev->Sender); // check whether this partition is taking part in read response auto& container = it != PartitionToReadResponse.end() ? it->second->PartitionsBecameAvailable : AvailablePartitions; - auto res = container.insert(TPartitionInfo{ev->Get()->Partition.AssignId, ev->Get()->WTime, ev->Get()->SizeLag, - ev->Get()->EndOffset - ev->Get()->ReadOffset}); - Y_VERIFY(res.second); - ProcessReads(ctx); -} + bool res = container.emplace( + ev->Get()->Partition.AssignId, + ev->Get()->WTime, + ev->Get()->SizeLag, + ev->Get()->EndOffset - ev->Get()->ReadOffset).second; + Y_VERIFY(res); -template<bool UseMigrationProtocol> -void TReadSessionActor<UseMigrationProtocol>::HandlePoison(TEvPQProxy::TEvDieCommand::TPtr& ev, const TActorContext& ctx) { - CloseSession(ev->Get()->Reason, ev->Get()->ErrorCode, ctx); + ProcessReads(ctx); } - -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::Handle(TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx) { const auto tag = static_cast<EWakeupTag>(ev->Get()->Tag); OnWakeup(tag); @@ -1896,7 +1816,7 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvents::TEvWakeup::TPtr& e counters->AddConsumedRequestUnits(PendingQuota->RequiredQuota); } - ProcessAnswer(ctx, PendingQuota); + ProcessAnswer(PendingQuota, ctx); if (!WaitingQuota.empty()) { PendingQuota = WaitingQuota.front(); @@ -1913,27 +1833,34 @@ void TReadSessionActor<UseMigrationProtocol>::Handle(TEvents::TEvWakeup::TPtr& e if (PendingQuota) { Y_VERIFY(MaybeRequestQuota(PendingQuota->RequiredQuota, EWakeupTag::RlAllowed, ctx)); } else { - return CloseSession("Throughput limit exceeded", PersQueue::ErrorCode::OVERLOAD, ctx); + return CloseSession(PersQueue::ErrorCode::OVERLOAD, "throughput limit exceeded", ctx); } break; } } -template<bool UseMigrationProtocol> +template <bool UseMigrationProtocol> void TReadSessionActor<UseMigrationProtocol>::RecheckACL(const TActorContext& ctx) { ctx.Schedule(CHECK_ACL_DELAY, new TEvents::TEvWakeup(EWakeupTag::RecheckAcl)); - if (Token && !AuthInitActor && (ForceACLCheck || (ctx.Now() - LastACLCheckTimestamp > TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()) && RequestNotChecked))) { + + const auto timeout = TDuration::Seconds(AppData(ctx)->PQConfig.GetACLRetryTimeoutSec()); + const bool authTimedOut = (ctx.Now() - LastACLCheckTimestamp) > timeout; + + if (Token && !AuthInitActor && (ForceACLCheck || (authTimedOut && RequestNotChecked))) { ForceACLCheck = false; RequestNotChecked = false; - Y_VERIFY(!AuthInitActor); - LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " checking auth because of timeout"); - AuthInitActor = ctx.Register(new TReadInitAndAuthActor( - ctx, ctx.SelfID, ClientId, Cookie, Session, SchemeCache, NewSchemeCache, Counters, Token, TopicsList, - TopicsHandler.GetLocalCluster() - )); + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, PQ_LOG_PREFIX << " checking auth because of timeout"); + RunAuthActor(ctx); } } -} // namespace NGRpcProxy::V1 -} // namespace NKikimr +template <bool UseMigrationProtocol> +void TReadSessionActor<UseMigrationProtocol>::RunAuthActor(const TActorContext& ctx) { + Y_VERIFY(!AuthInitActor); + AuthInitActor = ctx.Register(new TReadInitAndAuthActor( + ctx, ctx.SelfID, ClientId, Cookie, Session, SchemeCache, NewSchemeCache, Counters, Token, TopicsList, + TopicsHandler.GetLocalCluster())); +} + +} diff --git a/ydb/services/persqueue_v1/actors/schema_actors.cpp b/ydb/services/persqueue_v1/actors/schema_actors.cpp index fe23b799d0..0eddfe0cf2 100644 --- a/ydb/services/persqueue_v1/actors/schema_actors.cpp +++ b/ydb/services/persqueue_v1/actors/schema_actors.cpp @@ -439,16 +439,423 @@ void TAlterTopicActor::ModifyPersqueueConfig( TDescribeTopicActor::TDescribeTopicActor(NKikimr::NGRpcService::TEvDescribeTopicRequest* request) : TBase(request, request->GetProtoRequest()->path()) + , TDescribeTopicActorImpl("") { } -void TDescribeTopicActor::StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { +TDescribeTopicActor::TDescribeTopicActor(NKikimr::NGRpcService::IRequestOpCtx * ctx) + : TBase(ctx, dynamic_cast<const Ydb::Topic::DescribeTopicRequest*>(ctx->GetRequest())->path()) + , TDescribeTopicActorImpl("") +{ +} + + + +TDescribeConsumerActor::TDescribeConsumerActor(NKikimr::NGRpcService::TEvDescribeConsumerRequest* request) + : TBase(request, request->GetProtoRequest()->path()) + , TDescribeTopicActorImpl(request->GetProtoRequest()->consumer()) +{ +} + +TDescribeConsumerActor::TDescribeConsumerActor(NKikimr::NGRpcService::IRequestOpCtx * ctx) + : TBase(ctx, dynamic_cast<const Ydb::Topic::DescribeConsumerRequest*>(ctx->GetRequest())->path()) + , TDescribeTopicActorImpl(dynamic_cast<const Ydb::Topic::DescribeConsumerRequest*>(ctx->GetRequest())->consumer()) +{ +} + + +TDescribeTopicActorImpl::TDescribeTopicActorImpl(const TString& consumer) + : Consumer(consumer) +{ +} + + +bool TDescribeTopicActorImpl::StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { switch (ev->GetTypeRewrite()) { - default: TBase::StateWork(ev, ctx); + HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); + HFunc(NKikimr::TEvPersQueue::TEvStatusResponse, Handle); + HFunc(NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse, Handle); + default: return false; + } + return true; +} + +void TDescribeTopicActor::StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { + if (!TDescribeTopicActorImpl::StateWork(ev, ctx)) { + TBase::StateWork(ev, ctx); + } +} + +void TDescribeConsumerActor::StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { + if (!TDescribeTopicActorImpl::StateWork(ev, ctx)) { + TBase::StateWork(ev, ctx); + } +} + + +void TDescribeTopicActorImpl::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + if (ev->Get()->Status != NKikimrProto::OK) { + RestartTablet(ev->Get()->TabletId, ctx, ev->Sender); + } else { + auto it = Tablets.find(ev->Get()->TabletId); + if (it == Tablets.end()) return; + it->second.NodeId = ev->Get()->ServerId.NodeId(); + } +} + +void TDescribeTopicActorImpl::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + RestartTablet(ev->Get()->TabletId, ctx, ev->Sender); +} + +void TDescribeTopicActor::RaiseError(const TString& error, const Ydb::PersQueue::ErrorCode::ErrorCode errorCode, const Ydb::StatusIds::StatusCode status, const TActorContext& ctx) { + this->Request_->RaiseIssue(FillIssue(error, errorCode)); + TBase::Reply(status, ctx); +} + +void TDescribeConsumerActor::RaiseError(const TString& error, const Ydb::PersQueue::ErrorCode::ErrorCode errorCode, const Ydb::StatusIds::StatusCode status, const TActorContext& ctx) { + this->Request_->RaiseIssue(FillIssue(error, errorCode)); + TBase::Reply(status, ctx); +} + + +void TDescribeTopicActorImpl::RestartTablet(ui64 tabletId, const TActorContext& ctx, TActorId pipe, const TDuration& delay) { + auto it = Tablets.find(tabletId); + if (it == Tablets.end()) return; + if (pipe && pipe != it->second.Pipe) return; + if (--it->second.RetriesLeft == 0) { + return RaiseError(TStringBuilder() << "Tablet " << tabletId << " unresponsible", Ydb::PersQueue::ErrorCode::ERROR, Ydb::StatusIds::INTERNAL_ERROR, ctx); + } + Y_VERIFY(RequestsInfly > 0); + --RequestsInfly; + if (delay == TDuration::Zero()) { + RequestTablet(it->second, ctx); + } else { + ++RequestsInfly; + ctx.Schedule(delay, new TEvPQProxy::TEvRequestTablet(tabletId)); + } +} + +void TDescribeTopicActorImpl::Handle(TEvPQProxy::TEvRequestTablet::TPtr& ev, const TActorContext& ctx) { + --RequestsInfly; + auto it = Tablets.find(ev->Get()->TabletId); + if (it == Tablets.end()) return; + RequestTablet(it->second, ctx); +} + +void TDescribeTopicActorImpl::RequestTablet(TTabletInfo& tablet, const TActorContext& ctx) { + tablet.Pipe = ctx.Register(NTabletPipe::CreateClient(ctx.SelfID, tablet.TabletId, NTabletPipe::TClientConfig(NTabletPipe::TClientRetryPolicy::WithRetries()))); + + if (tablet.TabletId == BalancerTabletId) { + THolder<NKikimr::TEvPersQueue::TEvGetReadSessionsInfo> ev(new NKikimr::TEvPersQueue::TEvGetReadSessionsInfo(Consumer)); + NTabletPipe::SendData(ctx, tablet.Pipe, ev.Release()); + + } else { + THolder<NKikimr::TEvPersQueue::TEvStatus> ev(new NKikimr::TEvPersQueue::TEvStatus(Consumer.empty() ? "" : NPersQueue::ConvertNewConsumerName(Consumer), Consumer.empty())); + NTabletPipe::SendData(ctx, tablet.Pipe, ev.Release()); + } + ++RequestsInfly; +} + +void TDescribeTopicActorImpl::Handle(NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) { + auto it = Tablets.find(ev->Get()->Record.GetTabletId()); + if (it == Tablets.end()) return; + --RequestsInfly; + NTabletPipe::CloseClient(ctx, it->second.Pipe); + it->second.Pipe = TActorId{}; + + auto& record = ev->Get()->Record; + for (auto& partResult : record.GetPartResult()) { + if (partResult.GetStatus() == NKikimrPQ::TStatusResponse::STATUS_INITIALIZING || + partResult.GetStatus() == NKikimrPQ::TStatusResponse::STATUS_UNKNOWN) { + RestartTablet(record.GetTabletId(), ctx, {}, TDuration::MilliSeconds(100)); + return; + } + } + + ApplyResponse(it->second, ev, ctx); + + if (RequestsInfly == 0) { + RequestAdditionalInfo(ctx); + if (RequestsInfly == 0) { + Reply(ctx); + } + } +} + + +void TDescribeTopicActorImpl::Handle(NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) { + if (BalancerTabletId == 0) + return; + auto it = Tablets.find(BalancerTabletId); + Y_VERIFY(it != Tablets.end()); + --RequestsInfly; + NTabletPipe::CloseClient(ctx, it->second.Pipe); + it->second.Pipe = TActorId{}; + BalancerTabletId = 0; + + ApplyResponse(it->second, ev, ctx); + + if (RequestsInfly == 0) { + RequestAdditionalInfo(ctx); + if (RequestsInfly == 0) { + Reply(ctx); + } + } +} + + +void TDescribeTopicActorImpl::RequestAdditionalInfo(const TActorContext& ctx) { + if (BalancerTabletId) { + RequestTablet(BalancerTabletId, ctx); + } +} + +void TDescribeTopicActorImpl::RequestTablet(ui64 tabletId, const TActorContext& ctx) { + auto it = Tablets.find(tabletId); + if (it != Tablets.end()) { + RequestTablet(it->second, ctx); + } +} + + +template<class T> +void SetProtoTime(T* proto, const ui64 ms) { + proto->set_seconds(ms / 1000); + proto->set_nanos((ms % 1000) * 1'000'000); +} + +template<class T> +void UpdateProtoTime(T* proto, const ui64 ms, bool storeMin) { + ui64 storedMs = proto->seconds() * 1000 + proto->nanos() / 1'000'000; + if ((ms < storedMs) == storeMin) { + SetProtoTime(proto, ms); + } +} + + +void TDescribeTopicActor::ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ctx); + Y_UNUSED(tabletInfo); + Y_UNUSED(ev); + Y_FAIL(""); +} + + +void AddWindowsStat(Ydb::Topic::MultipleWindowsStat *stat, ui64 perMin, ui64 perHour, ui64 perDay) { + stat->set_per_minute(stat->per_minute() + perMin); + stat->set_per_hour(stat->per_hour() + perHour); + stat->set_per_day(stat->per_day() + perDay); +} + +void TDescribeTopicActor::ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ctx); + + auto& record = ev->Get()->Record; + + std::map<ui32, NKikimrPQ::TStatusResponse::TPartResult> res; + + auto topicStats = Result.mutable_topic_stats(); + + if (record.PartResultSize() > 0) { // init with first value + + SetProtoTime(topicStats->mutable_min_last_write_time(), record.GetPartResult(0).GetLastWriteTimestampMs()); + SetProtoTime(topicStats->mutable_max_write_time_lag(), record.GetPartResult(0).GetWriteLagMs()); + } + + std::map<TString, Ydb::Topic::Consumer*> consumersInfo; + for (auto& consumer : *Result.mutable_consumers()) { + consumersInfo[NPersQueue::ConvertNewConsumerName(consumer.name(), ctx)] = &consumer; + } + + for (auto& partResult : record.GetPartResult()) { + res[partResult.GetPartition()] = partResult; + + topicStats->set_store_size_bytes(topicStats->store_size_bytes() + partResult.GetPartitionSize()); + + UpdateProtoTime(topicStats->mutable_min_last_write_time(), partResult.GetLastWriteTimestampMs(), true); + UpdateProtoTime(topicStats->mutable_max_write_time_lag(), partResult.GetWriteLagMs(), false); + + AddWindowsStat(topicStats->mutable_bytes_written(), partResult.GetAvgWriteSpeedPerMin(), partResult.GetAvgWriteSpeedPerHour(), partResult.GetAvgWriteSpeedPerDay()); + + + for (auto& cons : partResult.GetConsumerResult()) { + auto it = consumersInfo.find(cons.GetConsumer()); + if (it == consumersInfo.end()) continue; + + if (!it->second->has_consumer_stats()) { + auto* stats = it->second->mutable_consumer_stats(); + + SetProtoTime(stats->mutable_min_partitions_last_read_time(), cons.GetLastReadTimestampMs()); + SetProtoTime(stats->mutable_max_read_time_lag(), cons.GetReadLagMs()); + SetProtoTime(stats->mutable_max_write_time_lag(), cons.GetWriteLagMs()); + } else { + auto* stats = it->second->mutable_consumer_stats(); + + UpdateProtoTime(stats->mutable_min_partitions_last_read_time(), cons.GetLastReadTimestampMs(), true); + UpdateProtoTime(stats->mutable_max_read_time_lag(), cons.GetReadLagMs(), false); + UpdateProtoTime(stats->mutable_max_write_time_lag(), cons.GetWriteLagMs(), false); + } + + AddWindowsStat(it->second->mutable_consumer_stats()->mutable_bytes_read(), cons.GetAvgReadSpeedPerMin(), cons.GetAvgReadSpeedPerHour(), cons.GetAvgReadSpeedPerDay()); + } + } + + for (auto& partRes : *(Result.mutable_partitions())) { + auto it = res.find(partRes.partition_id()); + if (it == res.end()) continue; + + const auto& partResult = it->second; + auto partStats = partRes.mutable_partition_stats(); + + partStats->set_store_size_bytes(partResult.GetPartitionSize()); + partStats->mutable_partition_offsets()->set_start(partResult.GetStartOffset()); + partStats->mutable_partition_offsets()->set_end(partResult.GetEndOffset()); + + SetProtoTime(partStats->mutable_last_write_time(), partResult.GetLastWriteTimestampMs()); + SetProtoTime(partStats->mutable_max_write_time_lag(), partResult.GetWriteLagMs()); + + AddWindowsStat(partStats->mutable_bytes_written(), partResult.GetAvgWriteSpeedPerMin(), partResult.GetAvgWriteSpeedPerHour(), partResult.GetAvgWriteSpeedPerDay()); + + partStats->set_partition_node_id(tabletInfo.NodeId); + } +} + + +void TDescribeTopicActor::Reply(const TActorContext& ctx) { + return ReplyWithResult(Ydb::StatusIds::SUCCESS, Result, ctx); +} + +void TDescribeConsumerActor::Reply(const TActorContext& ctx) { + return ReplyWithResult(Ydb::StatusIds::SUCCESS, Result, ctx); +} + + +void TDescribeConsumerActor::ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ctx); + Y_UNUSED(tabletInfo); + + std::map<ui32, NKikimrPQ::TReadSessionsInfoResponse::TPartitionInfo> res; + + for (const auto& partInfo : ev->Get()->Record.GetPartitionInfo()) { + res[partInfo.GetPartition()] = partInfo; + } + for (auto& partRes : *(Result.mutable_partitions())) { + auto it = res.find(partRes.partition_id()); + if (it == res.end()) continue; + auto consRes = partRes.mutable_partition_consumer_stats(); + consRes->set_read_session_id(it->second.GetSession()); + SetProtoTime(consRes->mutable_partition_read_session_create_time(), it->second.GetTimestampMs()); + consRes->set_connection_node_id(it->second.GetProxyNodeId()); + consRes->set_reader_name(it->second.GetClientNode()); + } +} + + +void TDescribeConsumerActor::ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ctx); + Y_UNUSED(tabletInfo); + + auto& record = ev->Get()->Record; + + std::map<ui32, NKikimrPQ::TStatusResponse::TPartResult> res; + + for (auto& partResult : record.GetPartResult()) { + res[partResult.GetPartition()] = partResult; + } + + for (auto& partRes : *(Result.mutable_partitions())) { + auto it = res.find(partRes.partition_id()); + if (it == res.end()) continue; + + const auto& partResult = it->second; + auto partStats = partRes.mutable_partition_stats(); + + partStats->set_store_size_bytes(partResult.GetPartitionSize()); + partStats->mutable_partition_offsets()->set_start(partResult.GetStartOffset()); + partStats->mutable_partition_offsets()->set_end(partResult.GetEndOffset()); + + SetProtoTime(partStats->mutable_last_write_time(), partResult.GetLastWriteTimestampMs()); + SetProtoTime(partStats->mutable_max_write_time_lag(), partResult.GetWriteLagMs()); + + + AddWindowsStat(partStats->mutable_bytes_written(), partResult.GetAvgWriteSpeedPerMin(), partResult.GetAvgWriteSpeedPerHour(), partResult.GetAvgWriteSpeedPerDay()); + + partStats->set_partition_node_id(tabletInfo.NodeId); + + if (Consumer) { + auto consStats = partRes.mutable_partition_consumer_stats(); + + consStats->set_last_read_offset(partResult.GetLagsInfo().GetReadPosition().GetOffset()); + consStats->set_committed_offset(partResult.GetLagsInfo().GetWritePosition().GetOffset()); + + SetProtoTime(consStats->mutable_last_read_time(), partResult.GetLagsInfo().GetLastReadTimestampMs()); + SetProtoTime(consStats->mutable_max_read_time_lag(), partResult.GetLagsInfo().GetReadLagMs()); + SetProtoTime(consStats->mutable_max_write_time_lag(), partResult.GetLagsInfo().GetWriteLagMs()); + + AddWindowsStat(consStats->mutable_bytes_read(), partResult.GetAvgReadSpeedPerMin(), partResult.GetAvgReadSpeedPerHour(), partResult.GetAvgReadSpeedPerDay()); + + if (!Result.consumer().has_consumer_stats()) { + auto* stats = Result.mutable_consumer()->mutable_consumer_stats(); + + SetProtoTime(stats->mutable_min_partitions_last_read_time(), partResult.GetLagsInfo().GetLastReadTimestampMs()); + SetProtoTime(stats->mutable_max_read_time_lag(), partResult.GetLagsInfo().GetReadLagMs()); + SetProtoTime(stats->mutable_max_write_time_lag(), partResult.GetLagsInfo().GetWriteLagMs()); + + AddWindowsStat(consStats->mutable_bytes_read(), partResult.GetAvgReadSpeedPerMin(), partResult.GetAvgReadSpeedPerHour(), partResult.GetAvgReadSpeedPerDay()); + } else { + auto* stats = Result.mutable_consumer()->mutable_consumer_stats(); + + UpdateProtoTime(stats->mutable_min_partitions_last_read_time(), partResult.GetLagsInfo().GetLastReadTimestampMs(), true); + UpdateProtoTime(stats->mutable_max_read_time_lag(), partResult.GetLagsInfo().GetReadLagMs(), false); + UpdateProtoTime(stats->mutable_max_write_time_lag(), partResult.GetLagsInfo().GetWriteLagMs(), false); + + AddWindowsStat(consStats->mutable_bytes_read(), partResult.GetAvgReadSpeedPerMin(), partResult.GetAvgReadSpeedPerHour(), partResult.GetAvgReadSpeedPerDay()); + } + } } } + +bool FillConsumerProto(Ydb::Topic::Consumer *rr, const NKikimrPQ::TPQTabletConfig& config, ui32 i, + const NActors::TActorContext& ctx, Ydb::StatusIds::StatusCode& status, TString& error) +{ + const auto &partConfig = config.GetPartitionConfig(); + const auto& pqConfig = AppData(ctx)->PQConfig; + + auto consumerName = NPersQueue::ConvertOldConsumerName(config.GetReadRules(i), ctx); + rr->set_name(consumerName); + rr->mutable_read_from()->set_seconds(config.GetReadFromTimestampsMs(i) / 1000); + auto version = config.GetReadRuleVersions(i); + if (version != 0) + (*rr->mutable_attributes())["_version"] = TStringBuilder() << version; + for (const auto &codec : config.GetConsumerCodecs(i).GetIds()) { + rr->mutable_supported_codecs()->add_codecs((Ydb::Topic::Codec) (codec + 1)); + } + bool important = false; + for (const auto &c : partConfig.GetImportantClientId()) { + if (c == config.GetReadRules(i)) { + important = true; + break; + } + } + rr->set_important(important); + TString serviceType = ""; + if (i < config.ReadRuleServiceTypesSize()) { + serviceType = config.GetReadRuleServiceTypes(i); + } else { + if (pqConfig.GetDisallowDefaultClientServiceType()) { + error = "service type must be set for all read rules"; + status = Ydb::StatusIds::INTERNAL_ERROR; + return false; + } + serviceType = pqConfig.GetDefaultClientServiceType().GetName(); + } + (*rr->mutable_attributes())["_service_type"] = serviceType; + return true; +} + void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { Y_VERIFY(ev->Get()->Request.Get()->ResultSet.size() == 1); // describe for only one topic if (ReplyIfNotTopic(ev, ctx)) { @@ -459,9 +866,7 @@ void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEv const TString path = JoinSeq("/", response.Path); - Ydb::Topic::DescribeTopicResult result; - - Ydb::Scheme::Entry *selfEntry = result.mutable_self(); + Ydb::Scheme::Entry *selfEntry = Result.mutable_self(); ConvertDirectoryEntry(response.Self->Info, selfEntry, true); if (const auto& name = GetCdcStreamName()) { selfEntry->set_name(*name); @@ -469,109 +874,171 @@ void TDescribeTopicActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEv if (response.PQGroupInfo) { const auto &pqDescr = response.PQGroupInfo->Description; - result.mutable_partitioning_settings()->set_min_active_partitions(pqDescr.GetTotalGroupCount()); + Result.mutable_partitioning_settings()->set_min_active_partitions(pqDescr.GetTotalGroupCount()); for(ui32 i = 0; i < pqDescr.GetTotalGroupCount(); ++i) { - auto part = result.add_partitions(); + auto part = Result.add_partitions(); part->set_partition_id(i); part->set_active(true); } const auto &config = pqDescr.GetPQTabletConfig(); if (!config.GetRequireAuthWrite()) { - (*result.mutable_attributes())["_allow_unauthenticated_write"] = "true"; + (*Result.mutable_attributes())["_allow_unauthenticated_write"] = "true"; } if (!config.GetRequireAuthRead()) { - (*result.mutable_attributes())["_allow_unauthenticated_read"] = "true"; + (*Result.mutable_attributes())["_allow_unauthenticated_read"] = "true"; } if (pqDescr.GetPartitionPerTablet() != 2) { - (*result.mutable_attributes())["_partitions_per_tablet"] = + (*Result.mutable_attributes())["_partitions_per_tablet"] = TStringBuilder() << pqDescr.GetPartitionPerTablet(); } if (config.HasAbcId()) { - (*result.mutable_attributes())["_abc_id"] = TStringBuilder() << config.GetAbcId(); + (*Result.mutable_attributes())["_abc_id"] = TStringBuilder() << config.GetAbcId(); } if (config.HasAbcSlug()) { - (*result.mutable_attributes())["_abc_slug"] = config.GetAbcSlug(); + (*Result.mutable_attributes())["_abc_slug"] = config.GetAbcSlug(); } if (config.HasFederationAccount()) { - (*result.mutable_attributes())["_federation_account"] = config.GetFederationAccount(); + (*Result.mutable_attributes())["_federation_account"] = config.GetFederationAccount(); } bool local = config.GetLocalDC(); const auto &partConfig = config.GetPartitionConfig(); i64 msip = partConfig.GetMaxSizeInPartition(); - if (partConfig.HasMaxSizeInPartition() && msip != Max<i64>()) - (*result.mutable_attributes())["_max_partition_storage_size"] = TStringBuilder() << msip ; - result.mutable_retention_period()->set_seconds(partConfig.GetLifetimeSeconds()); - result.set_retention_storage_mb(partConfig.GetStorageLimitBytes() / 1024 / 1024); - (*result.mutable_attributes())["_message_group_seqno_retention_period_ms"] = TStringBuilder() << (partConfig.GetSourceIdLifetimeSeconds() * 1000); - (*result.mutable_attributes())["__max_partition_message_groups_seqno_stored"] = TStringBuilder() << partConfig.GetSourceIdMaxCounts(); + if (partConfig.HasMaxSizeInPartition() && msip != Max<i64>()) { + (*Result.mutable_attributes())["_max_partition_storage_size"] = TStringBuilder() << msip; + } + Result.mutable_retention_period()->set_seconds(partConfig.GetLifetimeSeconds()); + Result.set_retention_storage_mb(partConfig.GetStorageLimitBytes() / 1024 / 1024); + (*Result.mutable_attributes())["_message_group_seqno_retention_period_ms"] = TStringBuilder() << (partConfig.GetSourceIdLifetimeSeconds() * 1000); + (*Result.mutable_attributes())["__max_partition_message_groups_seqno_stored"] = TStringBuilder() << partConfig.GetSourceIdMaxCounts(); const auto& pqConfig = AppData(ctx)->PQConfig; if (local || pqConfig.GetTopicsAreFirstClassCitizen()) { - result.set_partition_write_speed_bytes_per_second(partConfig.GetWriteSpeedInBytesPerSecond()); - result.set_partition_write_burst_bytes(partConfig.GetBurstSize()); + Result.set_partition_write_speed_bytes_per_second(partConfig.GetWriteSpeedInBytesPerSecond()); + Result.set_partition_write_burst_bytes(partConfig.GetBurstSize()); } for (const auto &codec : config.GetCodecs().GetIds()) { - result.mutable_supported_codecs()->add_codecs((Ydb::Topic::Codec)(codec + 1)); + Result.mutable_supported_codecs()->add_codecs((Ydb::Topic::Codec)(codec + 1)); } if (pqConfig.GetBillingMeteringConfig().GetEnabled()) { switch (config.GetMeteringMode()) { case NKikimrPQ::TPQTabletConfig::METERING_MODE_RESERVED_CAPACITY: - result.set_metering_mode(Ydb::Topic::METERING_MODE_RESERVED_CAPACITY); + Result.set_metering_mode(Ydb::Topic::METERING_MODE_RESERVED_CAPACITY); break; case NKikimrPQ::TPQTabletConfig::METERING_MODE_REQUEST_UNITS: - result.set_metering_mode(Ydb::Topic::METERING_MODE_REQUEST_UNITS); + Result.set_metering_mode(Ydb::Topic::METERING_MODE_REQUEST_UNITS); break; default: break; } } - + auto consumerName = NPersQueue::ConvertNewConsumerName(Consumer, ctx); + bool found = false; for (ui32 i = 0; i < config.ReadRulesSize(); ++i) { - auto rr = result.add_consumers(); - auto consumerName = NPersQueue::ConvertOldConsumerName(config.GetReadRules(i), ctx); - rr->set_name(consumerName); - rr->mutable_read_from()->set_seconds(config.GetReadFromTimestampsMs(i) / 1000); - auto version = config.GetReadRuleVersions(i); - if (version != 0) - (*rr->mutable_attributes())["_version"] = TStringBuilder() << version; - for (const auto &codec : config.GetConsumerCodecs(i).GetIds()) { - rr->mutable_supported_codecs()->add_codecs((Ydb::Topic::Codec) (codec + 1)); + if (consumerName == config.GetReadRules(i)) found = true; + auto rr = Result.add_consumers(); + Ydb::StatusIds::StatusCode status; + TString error; + if (!FillConsumerProto(rr, config, i, ctx, status, error)) { + return RaiseError(error, Ydb::PersQueue::ErrorCode::ERROR, status, ctx); } - bool important = false; - for (const auto &c : partConfig.GetImportantClientId()) { - if (c == config.GetReadRules(i)) { - important = true; - break; - } + } + + if (GetProtoRequest()->include_stats()) { + if (Consumer && !found) { + Request_->RaiseIssue(FillIssue(TStringBuilder() << "no consumer '" << Consumer << "' in topic", Ydb::PersQueue::ErrorCode::ERROR)); + return ReplyWithResult(Ydb::StatusIds::SCHEME_ERROR, ctx); } - rr->set_important(important); - TString serviceType = ""; - if (i < config.ReadRuleServiceTypesSize()) { - serviceType = config.GetReadRuleServiceTypes(i); - } else { - if (pqConfig.GetDisallowDefaultClientServiceType()) { - this->Request_->RaiseIssue(FillIssue( - "service type must be set for all read rules", - Ydb::PersQueue::ErrorCode::ERROR - )); - Reply(Ydb::StatusIds::INTERNAL_ERROR, ctx); - return; - } - serviceType = pqConfig.GetDefaultClientServiceType().GetName(); + + ProcessTablets(pqDescr, ctx); + return; + } + } + return ReplyWithResult(Ydb::StatusIds::SUCCESS, Result, ctx); +} + +void TDescribeConsumerActor::HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { + Y_VERIFY(ev->Get()->Request.Get()->ResultSet.size() == 1); // describe for only one topic + if (ReplyIfNotTopic(ev, ctx)) { + return; + } + const auto& response = ev->Get()->Request.Get()->ResultSet.front(); + + const TString path = JoinSeq("/", response.Path); + + Ydb::Scheme::Entry *selfEntry = Result.mutable_self(); + ConvertDirectoryEntry(response.Self->Info, selfEntry, true); + //TODO: change entry + if (const auto& name = GetCdcStreamName()) { + selfEntry->set_name(*name); + } + selfEntry->set_name(selfEntry->name() + "/" + Consumer); + + if (response.PQGroupInfo) { + const auto& pqDescr = response.PQGroupInfo->Description; + const auto& config = pqDescr.GetPQTabletConfig(); + + for(ui32 i = 0; i < pqDescr.GetTotalGroupCount(); ++i) { + auto part = Result.add_partitions(); + part->set_partition_id(i); + part->set_active(true); + } + + auto consumerName = NPersQueue::ConvertNewConsumerName(Consumer, ctx); + bool found = false; + for (ui32 i = 0; i < config.ReadRulesSize(); ++i) { + if (consumerName != config.GetReadRules(i)) + continue; + found = true; + auto rr = Result.mutable_consumer(); + Ydb::StatusIds::StatusCode status; + TString error; + if (!FillConsumerProto(rr, config, i, ctx, status, error)) { + return RaiseError(error, Ydb::PersQueue::ErrorCode::ERROR, status, ctx); } - (*rr->mutable_attributes())["_service_type"] = serviceType; + break; + } + if (!found) { + Request_->RaiseIssue(FillIssue(TStringBuilder() << "no consumer '" << Consumer << "' in topic", Ydb::PersQueue::ErrorCode::ERROR)); + return ReplyWithResult(Ydb::StatusIds::SCHEME_ERROR, ctx); + } + + if (GetProtoRequest()->include_stats()) { + ProcessTablets(pqDescr, ctx); + return; } } - return ReplyWithResult(Ydb::StatusIds::SUCCESS, result, ctx); + + return ReplyWithResult(Ydb::StatusIds::SUCCESS, Result, ctx); } +bool TDescribeTopicActorImpl::ProcessTablets(const NKikimrSchemeOp::TPersQueueGroupDescription& pqDescr, const TActorContext& ctx) { + for (ui32 i = 0; i < pqDescr.PartitionsSize(); ++i) { + const auto& pi = pqDescr.GetPartitions(i); + Tablets[pi.GetTabletId()].Partitions.push_back(pi.GetPartitionId()); + Tablets[pi.GetTabletId()].TabletId = pi.GetTabletId(); + } + for (auto& pair : Tablets) { + RequestTablet(pair.second, ctx); + } + if (!Consumer.empty()) { + BalancerTabletId = pqDescr.GetBalancerTabletID(); + Tablets[BalancerTabletId].TabletId = BalancerTabletId; + } + + if (RequestsInfly == 0) { + Reply(ctx); + return false; + } + return true; +} + void TDescribeTopicActor::Bootstrap(const NActors::TActorContext& ctx) { TBase::Bootstrap(ctx); @@ -580,7 +1047,12 @@ void TDescribeTopicActor::Bootstrap(const NActors::TActorContext& ctx) Become(&TDescribeTopicActor::StateWork); } +void TDescribeConsumerActor::Bootstrap(const NActors::TActorContext& ctx) +{ + TBase::Bootstrap(ctx); - + SendDescribeProposeRequest(ctx); + Become(&TDescribeConsumerActor::StateWork); +} } diff --git a/ydb/services/persqueue_v1/actors/schema_actors.h b/ydb/services/persqueue_v1/actors/schema_actors.h index 40dbe350a0..ec1182bd5c 100644 --- a/ydb/services/persqueue_v1/actors/schema_actors.h +++ b/ydb/services/persqueue_v1/actors/schema_actors.h @@ -1,7 +1,8 @@ #pragma once +#include "events.h" #include <ydb/services/lib/actors/pq_schema_actor.h> - +#include <ydb/core/persqueue/events/global.h> namespace NKikimr::NGRpcProxy::V1 { using namespace NKikimr::NGRpcService; @@ -40,7 +41,6 @@ public: void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx){ Y_UNUSED(ev); Y_UNUSED(ctx); } }; - class TPQDescribeTopicActor : public TPQGrpcSchemaBase<TPQDescribeTopicActor, NKikimr::NGRpcService::TEvPQDescribeTopicRequest> , public TCdcStreamCompatible { @@ -57,22 +57,113 @@ public: void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx); }; +class TDescribeTopicActorImpl +{ +protected: + struct TTabletInfo { + ui64 TabletId; + std::vector<ui32> Partitions; + TActorId Pipe; + ui32 NodeId = 0; + ui32 RetriesLeft = 3; + }; +public: + TDescribeTopicActorImpl(const TString& consumer); + virtual ~TDescribeTopicActorImpl() = default; + + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx); + + void Handle(NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx); + void Handle(NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx); + + void Handle(TEvPQProxy::TEvRequestTablet::TPtr& ev, const TActorContext& ctx); + + bool ProcessTablets(const NKikimrSchemeOp::TPersQueueGroupDescription& description, const TActorContext& ctx); + + void RequestTablet(TTabletInfo& tablet, const TActorContext& ctx); + void RequestTablet(ui64 tabletId, const TActorContext& ctx); + void RestartTablet(ui64 tabletId, const TActorContext& ctx, TActorId pipe = {}, const TDuration& delay = TDuration::Zero()); + void RequestAdditionalInfo(const TActorContext& ctx); + + bool StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx); + + void Bootstrap(const NActors::TActorContext& ctx); + + virtual void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) = 0; + + virtual void RaiseError(const TString& error, const Ydb::PersQueue::ErrorCode::ErrorCode errorCode, const Ydb::StatusIds::StatusCode status, const TActorContext& ctx) = 0; + virtual void ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) = 0; + virtual void ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) = 0; + virtual void Reply(const TActorContext& ctx) = 0; + +private: + + std::map<ui64, TTabletInfo> Tablets; + ui32 RequestsInfly = 0; + + ui64 BalancerTabletId; + +protected: + TString Consumer; +}; + class TDescribeTopicActor : public TPQGrpcSchemaBase<TDescribeTopicActor, NKikimr::NGRpcService::TEvDescribeTopicRequest> , public TCdcStreamCompatible + , public TDescribeTopicActorImpl { -using TBase = TPQGrpcSchemaBase<TDescribeTopicActor, TEvDescribeTopicRequest>; +using TBase = TPQGrpcSchemaBase<TDescribeTopicActor, NKikimr::NGRpcService::TEvDescribeTopicRequest>; +using TTabletInfo = TDescribeTopicActorImpl::TTabletInfo; public: TDescribeTopicActor(NKikimr::NGRpcService::TEvDescribeTopicRequest* request); + TDescribeTopicActor(NKikimr::NGRpcService::IRequestOpCtx * ctx); + ~TDescribeTopicActor() = default; + void Bootstrap(const NActors::TActorContext& ctx); + void RaiseError(const TString& error, const Ydb::PersQueue::ErrorCode::ErrorCode errorCode, const Ydb::StatusIds::StatusCode status, const TActorContext& ctx) override; + void StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx); + void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) override; + void ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) override; + void ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) override; + virtual void Reply(const TActorContext& ctx) override; + +private: + Ydb::Topic::DescribeTopicResult Result; +}; + +class TDescribeConsumerActor : public TPQGrpcSchemaBase<TDescribeConsumerActor, NKikimr::NGRpcService::TEvDescribeConsumerRequest> + , public TCdcStreamCompatible + , public TDescribeTopicActorImpl +{ +using TBase = TPQGrpcSchemaBase<TDescribeConsumerActor, NKikimr::NGRpcService::TEvDescribeConsumerRequest>; +using TTabletInfo = TDescribeTopicActorImpl::TTabletInfo; + +public: + TDescribeConsumerActor(NKikimr::NGRpcService::TEvDescribeConsumerRequest* request); + TDescribeConsumerActor(NKikimr::NGRpcService::IRequestOpCtx * ctx); + + ~TDescribeConsumerActor() = default; + void Bootstrap(const NActors::TActorContext& ctx); - void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx); + void StateWork(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx); + + void RaiseError(const TString& error, const Ydb::PersQueue::ErrorCode::ErrorCode errorCode, const Ydb::StatusIds::StatusCode status, const TActorContext& ctx) override; + void HandleCacheNavigateResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) override; + void ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) override; + void ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr& ev, const TActorContext& ctx) override; + virtual void Reply(const TActorContext& ctx) override; + +private: + Ydb::Topic::DescribeConsumerResult Result; }; + + class TAddReadRuleActor : public TUpdateSchemeActor<TAddReadRuleActor, TEvPQAddReadRuleRequest> , public TCdcStreamCompatible { diff --git a/ydb/services/persqueue_v1/actors/write_session_actor.h b/ydb/services/persqueue_v1/actors/write_session_actor.h index 44e5d71a23..9e4ae01448 100644 --- a/ydb/services/persqueue_v1/actors/write_session_actor.h +++ b/ydb/services/persqueue_v1/actors/write_session_actor.h @@ -184,7 +184,7 @@ private: void SendRequest(typename TWriteRequestInfo::TPtr&& request, const TActorContext& ctx); void SetupCounters(); - void SetupCounters(const TString& cloudId, const TString& dbId, const TString& folderId); + void SetupCounters(const TString& cloudId, const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId); private: std::unique_ptr<TEvStreamWriteRequest> Request; diff --git a/ydb/services/persqueue_v1/actors/write_session_actor.ipp b/ydb/services/persqueue_v1/actors/write_session_actor.ipp index 2efca88d28..d6502a7656 100644 --- a/ydb/services/persqueue_v1/actors/write_session_actor.ipp +++ b/ydb/services/persqueue_v1/actors/write_session_actor.ipp @@ -286,8 +286,10 @@ void TWriteSessionActor<UseMigrationProtocol>::Die(const TActorContext& ctx) { if (SessionsActive) { SessionsActive.Dec(); - BytesInflight.Dec(BytesInflight_); - BytesInflightTotal.Dec(BytesInflightTotal_); + if (BytesInflight && BytesInflightTotal) { + BytesInflight.Dec(BytesInflight_); + BytesInflightTotal.Dec(BytesInflightTotal_); + } } LOG_INFO_S(ctx, NKikimrServices::PQ_WRITE_PROXY, "session v1 cookie: " << Cookie << " sessionId: " << OwnerCookie << " is DEAD"); @@ -512,22 +514,19 @@ void TWriteSessionActor<UseMigrationProtocol>::SetupCounters() } template<bool UseMigrationProtocol> -void TWriteSessionActor<UseMigrationProtocol>::SetupCounters(const TString& cloudId, const TString& dbId, const TString& folderId) +void TWriteSessionActor<UseMigrationProtocol>::SetupCounters(const TString& cloudId, const TString& dbId, const TString& dbPath, const bool isServerless, const TString& folderId) { if (SessionsCreated) { return; } //now topic is checked, can create group for real topic, not garbage - auto subGroup = NPersQueue::GetCountersForStream(Counters); - auto aggr = NPersQueue::GetLabelsForStream(FullConverter, cloudId, dbId, folderId); - - BytesInflight = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"stream.internal_write.bytes_proceeding"}, false, "name"); - BytesInflightTotal = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"stream.internal_write.bytes_proceeding_total"}, false, "name"); - SessionsCreated = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"stream.internal_write.sessions_created_per_second"}, true, "name"); - SessionsActive = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"stream.internal_write.sessions_active"}, false, "name"); - Errors = NKikimr::NPQ::TMultiCounter(subGroup, aggr, {}, {"stream.internal_write.errors_per_second"}, true, "name"); + auto subGroup = NPersQueue::GetCountersForTopic(Counters, isServerless); + auto subgroups = NPersQueue::GetSubgroupsForTopic(FullConverter, cloudId, dbId, dbPath, folderId); + SessionsCreated = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_write.sessions_created"}, true, "name"); + SessionsActive = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_write.sessions_active_count"}, false, "name"); + Errors = NKikimr::NPQ::TMultiCounter(subGroup, {}, subgroups, {"api.grpc.topic.stream_write.errors"}, true, "name"); SessionsCreated.Inc(); SessionsActive.Inc(); @@ -582,6 +581,7 @@ void TWriteSessionActor<UseMigrationProtocol>::Handle(TEvDescribeTopicsResponse: if (AppData(ctx)->PQConfig.GetTopicsAreFirstClassCitizen()) { const auto& tabletConfig = description.GetPQTabletConfig(); SetupCounters(tabletConfig.GetYcCloudId(), tabletConfig.GetYdbDatabaseId(), + tabletConfig.GetYdbDatabasePath(), entry.DomainInfo->IsServerless(), tabletConfig.GetYcFolderId()); } else { SetupCounters(); @@ -1022,8 +1022,9 @@ void TWriteSessionActor<UseMigrationProtocol>::Handle(NPQ::TEvPartitionWriter::T AcceptedRequests.emplace_back(std::move(writeRequest)); BytesInflight_ -= diff; - BytesInflight.Dec(diff); - + if (BytesInflight) { + BytesInflight.Dec(diff); + } if (!NextRequestInited && BytesInflight_ < MAX_BYTES_INFLIGHT) { //allow only one big request to be readed but not sended NextRequestInited = true; if (!Request->GetStreamCtx()->Read()) { @@ -1177,7 +1178,9 @@ void TWriteSessionActor<UseMigrationProtocol>::Handle(NPQ::TEvPartitionWriter::T ui64 diff = writeRequest->ByteSize; BytesInflightTotal_ -= diff; - BytesInflightTotal.Dec(diff); + if (BytesInflightTotal) { + BytesInflightTotal.Dec(diff); + } CheckFinish(ctx); } @@ -1276,8 +1279,10 @@ void TWriteSessionActor<UseMigrationProtocol>::SendRequest(typename TWriteReques diff += request->PartitionWriteRequest->Record.ByteSize(); BytesInflight_ += diff; BytesInflightTotal_ += diff; - BytesInflight.Inc(diff); - BytesInflightTotal.Inc(diff); + if (BytesInflight && BytesInflightTotal) { + BytesInflight.Inc(diff); + BytesInflightTotal.Inc(diff); + } ctx.Send(Writer, std::move(request->PartitionWriteRequest)); SentRequests.push_back(std::move(request)); @@ -1466,8 +1471,10 @@ void TWriteSessionActor<UseMigrationProtocol>::Handle(typename TEvWrite::TPtr& e ui64 diff = ev->Get()->Request.ByteSize(); BytesInflight_ += diff; BytesInflightTotal_ += diff; - BytesInflight.Inc(diff); - BytesInflightTotal.Inc(diff); + if (BytesInflight && BytesInflightTotal) { + BytesInflight.Inc(diff); + BytesInflightTotal.Inc(diff); + } if (BytesInflight_ < MAX_BYTES_INFLIGHT) { //allow only one big request to be readed but not sended Y_VERIFY(NextRequestInited); diff --git a/ydb/services/persqueue_v1/grpc_pq_schema.cpp b/ydb/services/persqueue_v1/grpc_pq_schema.cpp index 1c52b7149b..e020ffd64b 100644 --- a/ydb/services/persqueue_v1/grpc_pq_schema.cpp +++ b/ydb/services/persqueue_v1/grpc_pq_schema.cpp @@ -134,48 +134,75 @@ void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvDescribeTopicRequest::TP ctx.Register(new TDescribeTopicActor(ev->Release().Release())); } - +void TPQSchemaService::Handle(NKikimr::NGRpcService::TEvDescribeConsumerRequest::TPtr& ev, const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PQ_READ_PROXY, "new Describe consumer request"); + ctx.Register(new TDescribeConsumerActor(ev->Release().Release())); +} } +namespace NKikimr { +namespace NGRpcService { -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvPQDropTopicRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQCreateTopicRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvPQCreateTopicRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvPQAlterTopicRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvDropTopicRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvDropTopicRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvCreateTopicRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvCreateTopicRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvAlterTopicRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvAlterTopicRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvDescribeTopicRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvDescribeTopicRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } +void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvDescribeConsumerRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); +} -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQAddReadRuleRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvPQAddReadRuleRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } -void NKikimr::NGRpcService::TGRpcRequestProxy::Handle(NKikimr::NGRpcService::TEvPQRemoveReadRuleRequest::TPtr& ev, const TActorContext& ctx) { +void TGRpcRequestProxy::Handle(TEvPQRemoveReadRuleRequest::TPtr& ev, const TActorContext& ctx) { ctx.Send(NKikimr::NGRpcProxy::V1::GetPQSchemaServiceActorID(), ev->Release().Release()); } + + +#ifdef DECLARE_RPC +#error DECLARE_RPC macro already defined +#endif + +#define DECLARE_RPC(name) template<> IActor* TEv##name##Request::CreateRpcActor(NKikimr::NGRpcService::IRequestOpCtx* msg) { \ + return new NKikimr::NGRpcProxy::V1::T##name##Actor(msg);\ + } + +DECLARE_RPC(DescribeTopic); +DECLARE_RPC(DescribeConsumer); + +#undef DECLARE_RPC + + + +} +}
\ No newline at end of file diff --git a/ydb/services/persqueue_v1/grpc_pq_schema.h b/ydb/services/persqueue_v1/grpc_pq_schema.h index 899cef430e..58ba33eb58 100644 --- a/ydb/services/persqueue_v1/grpc_pq_schema.h +++ b/ydb/services/persqueue_v1/grpc_pq_schema.h @@ -41,7 +41,7 @@ private: HFunc(NKikimr::NGRpcService::TEvCreateTopicRequest, Handle); HFunc(NKikimr::NGRpcService::TEvAlterTopicRequest, Handle); HFunc(NKikimr::NGRpcService::TEvDescribeTopicRequest, Handle); - + HFunc(NKikimr::NGRpcService::TEvDescribeConsumerRequest, Handle); hFunc(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate, Handle); } } @@ -57,6 +57,7 @@ private: void Handle(NKikimr::NGRpcService::TEvCreateTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(NKikimr::NGRpcService::TEvAlterTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(NKikimr::NGRpcService::TEvDescribeTopicRequest::TPtr& ev, const TActorContext& ctx); + void Handle(NKikimr::NGRpcService::TEvDescribeConsumerRequest::TPtr& ev, const TActorContext& ctx); void Handle(NPQ::NClusterTracker::TEvClusterTracker::TEvClustersUpdate::TPtr& ev); diff --git a/ydb/services/persqueue_v1/persqueue_new_schemecache_ut.cpp b/ydb/services/persqueue_v1/persqueue_new_schemecache_ut.cpp index fe04c78c36..0ca23090ed 100644 --- a/ydb/services/persqueue_v1/persqueue_new_schemecache_ut.cpp +++ b/ydb/services/persqueue_v1/persqueue_new_schemecache_ut.cpp @@ -358,8 +358,8 @@ namespace NKikimr::NPersQueueTests { const std::set<std::string>& canonicalSensorNames, const TString& stream, const TString& consumer, const TString& host, const TString& shard) { - auto counters = GetCounters1stClass(monPort, "datastreams", cloudId, databaseId, - folderId, stream, consumer, host, shard); + auto counters = GetCounters1stClass(monPort, "datastreams", "%2FRoot", cloudId, + databaseId, folderId, stream, consumer, host, shard); const auto sensors = counters["sensors"].GetArray(); std::set<std::string> sensorNames; std::transform(sensors.begin(), sensors.end(), @@ -387,84 +387,63 @@ namespace NKikimr::NPersQueueTests { checkCounters(monPort, { - "stream.internal_read.commits_per_second", - "stream.internal_read.partitions_errors_per_second", - "stream.internal_read.partitions_locked", - "stream.internal_read.partitions_locked_per_second", - "stream.internal_read.partitions_released_per_second", - "stream.internal_read.partitions_to_be_locked", - "stream.internal_read.partitions_to_be_released", - "stream.internal_read.waits_for_data", - "stream.internal_write.bytes_proceeding", - "stream.internal_write.bytes_proceeding_total", - "stream.internal_write.errors_per_second", - "stream.internal_write.sessions_active", - "stream.internal_write.sessions_created_per_second", + "api.grpc.topic.stream_read.commits", + "api.grpc.topic.stream_read.partition_session.errors", + "api.grpc.topic.stream_read.partition_session.started", + "api.grpc.topic.stream_read.partition_session.stopped", + "api.grpc.topic.stream_read.partition_session.count", + "api.grpc.topic.stream_read.partition_session.starting_count", + "api.grpc.topic.stream_read.partition_session.stopping_count", + "api.grpc.topic.stream_write.errors", + "api.grpc.topic.stream_write.sessions_active_count", + "api.grpc.topic.stream_write.sessions_created", }, topicName, "", "", "" ); checkCounters(monPort, { - "stream.internal_read.commits_per_second", - "stream.internal_read.partitions_errors_per_second", - "stream.internal_read.partitions_locked", - "stream.internal_read.partitions_locked_per_second", - "stream.internal_read.partitions_released_per_second", - "stream.internal_read.partitions_to_be_locked", - "stream.internal_read.partitions_to_be_released", - "stream.internal_read.waits_for_data", + "api.grpc.topic.stream_read.commits", + "api.grpc.topic.stream_read.partition_session.errors", + "api.grpc.topic.stream_read.partition_session.started", + "api.grpc.topic.stream_read.partition_session.stopped", + "api.grpc.topic.stream_read.partition_session.count", + "api.grpc.topic.stream_read.partition_session.starting_count", + "api.grpc.topic.stream_read.partition_session.stopping_count", + }, topicName, consumerName, "", "" ); checkCounters(server.CleverServer->GetRuntime()->GetMonPort(), { - "stream.internal_read.time_lags_milliseconds", - "stream.incoming_bytes_per_second", - "stream.incoming_records_per_second", - "stream.internal_write.bytes_per_second", - "stream.internal_write.compacted_bytes_per_second", - "stream.internal_write.partition_write_quota_wait_milliseconds", - "stream.internal_write.record_size_bytes", - "stream.internal_write.records_per_second", - "stream.internal_write.time_lags_milliseconds", - "stream.internal_write.uncompressed_bytes_per_second", - "stream.await_operating_milliseconds", - "stream.internal_write.buffer_brimmed_duration_ms", - "stream.internal_read.bytes_per_second", - "stream.internal_read.records_per_second", - "stream.outgoing_bytes_per_second", - "stream.outgoing_records_per_second", + "topic.read.lag_milliseconds", + "topic.write.bytes", + "topic.write.messages", + "api.grpc.topic.stream_write.bytes", + "api.grpc.topic.stream_write.partition_throttled_milliseconds", + "topic.write.message_size_bytes", + "api.grpc.topic.stream_write.messages", + "topic.write.lag_milliseconds", + "topic.write.uncompressed_bytes", + "api.grpc.topic.stream_read.bytes", + "api.grpc.topic.stream_read.messages", + "topic.read.bytes", + "topic.read.messages", }, topicName, "", "", "" ); checkCounters(server.CleverServer->GetRuntime()->GetMonPort(), { - "stream.internal_read.time_lags_milliseconds", - "stream.await_operating_milliseconds", - "stream.internal_read.bytes_per_second", - "stream.internal_read.records_per_second", - "stream.outgoing_bytes_per_second", - "stream.outgoing_records_per_second", + "topic.read.lag_milliseconds", + "api.grpc.topic.stream_read.bytes", + "api.grpc.topic.stream_read.messages", + "topic.read.bytes", + "topic.read.messages", }, topicName, consumerName, "", "" ); - - checkCounters(server.CleverServer->GetRuntime()->GetMonPort(), - { - "stream.await_operating_milliseconds" - }, - topicName, consumerName, "1", "" - ); - - checkCounters(server.CleverServer->GetRuntime()->GetMonPort(), - { - "stream.internal_write.buffer_brimmed_duration_ms" - }, - topicName, "", "1", "" - ); } }; diff --git a/ydb/services/persqueue_v1/persqueue_ut.cpp b/ydb/services/persqueue_v1/persqueue_ut.cpp index ae10b8922b..f01e29cc70 100644 --- a/ydb/services/persqueue_v1/persqueue_ut.cpp +++ b/ydb/services/persqueue_v1/persqueue_ut.cpp @@ -2658,6 +2658,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { const TString& clientDc, const TString& originDc, const TString& client, const TString& consumerPath) { NJson::TJsonValue counters; + if (clientDc.empty() && originDc.empty()) { counters = GetClientCountersLegacy(monPort, "pqproxy", session, client, consumerPath); } else { @@ -2779,7 +2780,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { "PartitionsReleased", "PartitionsToBeLocked", "PartitionsToBeReleased", - "WaitsForData" + "WaitsForData", }, "", "cluster", "", "" ); @@ -3932,7 +3933,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { request.mutable_set_retention_period()->set_seconds(TDuration::Days(2).Seconds()); request.mutable_alter_partitioning_settings()->set_set_min_active_partitions(1); alter(request, Ydb::StatusIds::SCHEME_ERROR, true); - alter(request, Ydb::StatusIds::GENERIC_ERROR, false); + alter(request, Ydb::StatusIds::BAD_REQUEST, false); request.mutable_alter_partitioning_settings()->set_set_min_active_partitions(3); request.set_set_retention_storage_mb(-2); alter(request, Ydb::StatusIds::BAD_REQUEST, false); @@ -3988,7 +3989,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { alter(request, Ydb::StatusIds::SUCCESS, false); TString topic4 = "rt3.dc1--acc--topic4"; - server.AnnoyingClient->CreateTopic(topic4, 1); //ensure creation + server.AnnoyingClient->CreateTopic(topic4, 3); //ensure creation auto res = server.AnnoyingClient->DescribeTopic({topic3}); Cerr << res.DebugString(); TString resultDescribe = R"___(TopicInfo { @@ -4121,6 +4122,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { Ydb::Topic::DescribeTopicRequest request; Ydb::Topic::DescribeTopicResponse response; request.set_path(TStringBuilder() << "/Root/PQ/" << topic3); + grpc::ClientContext rcontext; auto status = TopicStubP_->DescribeTopic(&rcontext, request, &response); @@ -4129,6 +4131,8 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { Ydb::Topic::DescribeTopicResult res; response.operation().result().UnpackTo(&res); + Cerr << response.DebugString() << "\n" << res.DebugString() << "\n"; + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); res1 = res; } @@ -4148,10 +4152,10 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { UNIT_ASSERT(status.ok()); Ydb::Topic::DescribeTopicResult descrRes; response.operation().result().UnpackTo(&descrRes); - Cerr << response << "\n" << descrRes << "\n"; + Cerr << response.DebugString() << "\n" << descrRes.DebugString() << "\n"; UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); - UNIT_ASSERT_VALUES_EQUAL(descrRes.DebugString(), res1.DebugString()); + UNIT_ASSERT_VALUES_EQUAL(descrRes.DebugString(), res1.DebugString()); { NYdb::TDriverConfig driverCfg; @@ -4163,8 +4167,10 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { res.Wait(); Cerr << res.GetValue().IsSuccess() << " " << res.GetValue().GetIssues().ToString() << "\n"; UNIT_ASSERT(res.GetValue().IsSuccess()); + auto res2 = NYdb::TProtoAccessor::GetProto(res.GetValue().GetTopicDescription()); Cerr << res2 << "\n"; + UNIT_ASSERT_VALUES_EQUAL(descrRes.DebugString(), res2.DebugString()); { NYdb::NTopic::TCreateTopicSettings settings; @@ -4221,6 +4227,7 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { } } + { Ydb::Topic::DropTopicRequest request; Ydb::Topic::DropTopicResponse response; @@ -4236,7 +4243,6 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { server.AnnoyingClient->RemoveTopic(topic3); } - { Ydb::Topic::DropTopicRequest request; Ydb::Topic::DropTopicResponse response; @@ -4266,6 +4272,179 @@ Y_UNIT_TEST_SUITE(TPersQueueTest) { res.Wait(); Cerr << res.GetValue().IsSuccess() << " " << res.GetValue().GetIssues().ToString() << "\n"; } + + for (ui32 i = 0; i < 5; ++ i) { + auto writer = CreateWriter(*driver, "acc/topic4", TStringBuilder() << "abacaba" << i); + auto ev = writer->GetEvent(true); + auto ct = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TReadyToAcceptEvent>(&*ev); + UNIT_ASSERT(ct); + writer->Write(std::move(ct->ContinuationToken), "1234567890"); + UNIT_ASSERT(ev.Defined()); + while(true) { + ev = writer->GetEvent(true); + auto ack = std::get_if<NYdb::NPersQueue::TWriteSessionEvent::TAcksEvent>(&*ev); + if (ack) { + break; + } + } + } + + { + NYdb::TDriverConfig driverCfg; + driverCfg.SetEndpoint(TStringBuilder() << "localhost:" << server.GrpcPort); + std::shared_ptr<NYdb::TDriver> ydbDriver(new NYdb::TDriver(driverCfg)); + auto topicClient = NYdb::NTopic::TTopicClient(*ydbDriver); + + auto res = topicClient.DescribeTopic("/Root/PQ/" + topic4, NYdb::NTopic::TDescribeTopicSettings{}.IncludeStats(true)); + res.Wait(); + Cerr << res.GetValue().IsSuccess() << " " << res.GetValue().GetIssues().ToString() << "\n"; + UNIT_ASSERT(res.GetValue().IsSuccess()); + + auto res2 = NYdb::TProtoAccessor::GetProto(res.GetValue().GetTopicDescription()); + Cerr << res2 << "\n"; + UNIT_ASSERT(res.GetValue().GetTopicDescription().GetPartitions().size() == 3); + UNIT_ASSERT(res.GetValue().GetTopicDescription().GetPartitions()[0].GetPartitionStats()); + UNIT_ASSERT(res.GetValue().GetTopicDescription().GetPartitions()[0].GetPartitionStats()->GetEndOffset() > 0); + } + + { + Ydb::Topic::DescribeTopicRequest request; + Ydb::Topic::DescribeTopicResponse response; + request.set_path(TStringBuilder() << "/Root/PQ/" << topic4); + request.set_include_stats(true); + + grpc::ClientContext rcontext; + + auto status = TopicStubP_->DescribeTopic(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + Ydb::Topic::DescribeTopicResult res; + response.operation().result().UnpackTo(&res); + + Cerr << response.DebugString() << "\n" << res.DebugString() << "\n"; + + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(res.topic_stats().store_size_bytes(), 800); + UNIT_ASSERT_GE(res.partitions(0).partition_stats().partition_offsets().end(), 1); + } + + auto reader1 = CreateReader( + *driver, + NYdb::NPersQueue::TReadSessionSettings() + .AppendTopics( + NYdb::NPersQueue::TTopicReadSettings("acc/topic4") + ) + .ConsumerName("shared/user") + .ReadOnlyOriginal(true) + ); + int numLocks = 3; + while (numLocks > 0) { + auto msg = reader1->GetEvent(true, 1); + UNIT_ASSERT(msg); + + Cerr << "===Got message: " << NYdb::NPersQueue::DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); + UNIT_ASSERT(ev); + --numLocks; + } + + auto reader2 = CreateReader( + *driver, + NYdb::NPersQueue::TReadSessionSettings() + .AppendTopics( + NYdb::NPersQueue::TTopicReadSettings("acc/topic4") + ) + .ConsumerName("shared/user") + .ReadOnlyOriginal(true) + ); + + numLocks = 1; + while (numLocks > 0) { + { + auto msg = reader1->GetEvent(true, 1); + UNIT_ASSERT(msg); + Cerr << "===Got message: " << NYdb::NPersQueue::DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TDestroyPartitionStreamEvent>(&*msg); + UNIT_ASSERT(ev); + ev->Confirm(); + } + { + auto msg = reader2->GetEvent(true, 1); + UNIT_ASSERT(msg); + + Cerr << "===Got message: " << NYdb::NPersQueue::DebugString(*msg) << "\n"; + + auto ev = std::get_if<NYdb::NPersQueue::TReadSessionEvent::TCreatePartitionStreamEvent>(&*msg); + UNIT_ASSERT(ev); + } + --numLocks; + } + + { + Ydb::Topic::DescribeConsumerRequest request; + Ydb::Topic::DescribeConsumerResponse response; + request.set_path(TStringBuilder() << "/Root/PQ/" << topic4); + request.set_consumer("user"); + request.set_include_stats(true); + grpc::ClientContext rcontext; + + auto status = TopicStubP_->DescribeConsumer(&rcontext, request, &response); + + UNIT_ASSERT(status.ok()); + Ydb::Topic::DescribeConsumerResult res; + response.operation().result().UnpackTo(&res); + + Cerr << "DESCRIBE CONSUMER RESULT:\n" << response << "\n" << res.DebugString() << "\n"; + +// UNIT_ASSERT_GE(res.partitions(0).partition_stats().partition_offsets().end(), 1); + //TODO: check here some stats from describe consumer + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(res.partitions_size(), 3); + UNIT_ASSERT(res.partitions(0).partition_consumer_stats().read_session_id().size() > 0); + UNIT_ASSERT(res.partitions(1).partition_consumer_stats().read_session_id().size() > 0); + UNIT_ASSERT(res.partitions(2).partition_consumer_stats().read_session_id().size() > 0); + + } + + { + Ydb::Topic::DescribeConsumerRequest request; + Ydb::Topic::DescribeConsumerResponse response; + request.set_path(TStringBuilder() << "/Root/PQ/" << topic4); + request.set_consumer("not-consumer"); + request.set_include_stats(true); + + grpc::ClientContext rcontext; + + auto status = TopicStubP_->DescribeConsumer(&rcontext, request, &response); + + Cerr << response << "\n" << res << "\n"; + + UNIT_ASSERT(status.ok()); + Ydb::Topic::DescribeConsumerResult res; + response.operation().result().UnpackTo(&res); + + UNIT_ASSERT_VALUES_EQUAL(response.operation().status(), Ydb::StatusIds::SCHEME_ERROR); + } + { + NYdb::TDriverConfig driverCfg; + driverCfg.SetEndpoint(TStringBuilder() << "localhost:" << server.GrpcPort); + std::shared_ptr<NYdb::TDriver> ydbDriver(new NYdb::TDriver(driverCfg)); + auto topicClient = NYdb::NTopic::TTopicClient(*ydbDriver); + + auto res = topicClient.DescribeConsumer("/Root/PQ/" + topic4, "user", NYdb::NTopic::TDescribeConsumerSettings{}.IncludeStats(true)); + res.Wait(); + Cerr << res.GetValue().IsSuccess() << " " << res.GetValue().GetIssues().ToString() << "\n"; + UNIT_ASSERT(res.GetValue().IsSuccess()); + + auto res2 = NYdb::TProtoAccessor::GetProto(res.GetValue().GetConsumerDescription()); + Cerr << res2 << "\n"; + UNIT_ASSERT(res.GetValue().GetConsumerDescription().GetPartitions().size() == 3); + UNIT_ASSERT(res.GetValue().GetConsumerDescription().GetPartitions()[0].GetPartitionStats()); + UNIT_ASSERT(res.GetValue().GetConsumerDescription().GetPartitions()[0].GetPartitionStats()->GetEndOffset() > 0); + UNIT_ASSERT(res.GetValue().GetConsumerDescription().GetPartitions()[0].GetPartitionConsumerStats()); + } } Y_UNIT_TEST(SchemeOperationFirstClassCitizen) { diff --git a/ydb/services/persqueue_v1/topic.cpp b/ydb/services/persqueue_v1/topic.cpp index c56e81faad..11d0866db1 100644 --- a/ydb/services/persqueue_v1/topic.cpp +++ b/ydb/services/persqueue_v1/topic.cpp @@ -119,7 +119,9 @@ void TGRpcTopicService::SetupIncomingRequests(NGrpc::TLoggerPtr logger) { ADD_REQUEST(DescribeTopic, TopicService, DescribeTopicRequest, DescribeTopicResponse, { ActorSystem_->Send(GRpcRequestProxyId_, new NGRpcService::TEvDescribeTopicRequest(ctx, IsRlAllowed())); }) - + ADD_REQUEST(DescribeConsumer, TopicService, DescribeConsumerRequest, DescribeConsumerResponse, { + ActorSystem_->Send(GRpcRequestProxyId_, new NGRpcService::TEvDescribeConsumerRequest(ctx, IsRlAllowed())); + }) #undef ADD_REQUEST diff --git a/ydb/services/ydb/ydb_bulk_upsert_olap_ut.cpp b/ydb/services/ydb/ydb_bulk_upsert_olap_ut.cpp index e03928e85c..35f6bc3874 100644 --- a/ydb/services/ydb/ydb_bulk_upsert_olap_ut.cpp +++ b/ydb/services/ydb/ydb_bulk_upsert_olap_ut.cpp @@ -11,18 +11,20 @@ using namespace NYdb; namespace { -ui32 ScanQuerySelect(NYdb::NTable::TTableClient client, const TString& tablePath, - const std::vector<std::pair<TString, NYdb::EPrimitiveType>>& ydbSchema = TTestOlap::PublicSchema()) { +std::vector<TString> ScanQuerySelect( + NYdb::NTable::TTableClient client, const TString& tablePath, + const std::vector<std::pair<TString, NYdb::EPrimitiveType>>& ydbSchema = TTestOlap::PublicSchema()) +{ auto query = Sprintf("SELECT * FROM `%s`", tablePath.c_str()); // Executes scan query auto result = client.StreamExecuteScanQuery(query).GetValueSync(); if (!result.IsSuccess()) { Cerr << "ScanQuery execution failure: " << result.GetIssues().ToString() << Endl; - return 0; + return {}; } - ui32 numRows = 0; + std::vector<TString> out; bool eos = false; Cout << "ScanQuery:" << Endl; while (!eos) { @@ -31,7 +33,7 @@ ui32 ScanQuerySelect(NYdb::NTable::TTableClient client, const TString& tablePath eos = true; if (!streamPart.EOS()) { Cerr << "ScanQuery execution failure: " << streamPart.GetIssues().ToString() << Endl; - return 0; + return {}; } continue; } @@ -42,31 +44,50 @@ ui32 ScanQuerySelect(NYdb::NTable::TTableClient client, const TString& tablePath TResultSetParser parser(rs); while (parser.TryNextRow()) { + TStringBuilder ss; + for (auto& [colName, colType] : ydbSchema) { switch (colType) { case NYdb::EPrimitiveType::Timestamp: - Cout << parser.ColumnParser(colName).GetOptionalTimestamp() << ", "; + ss << parser.ColumnParser(colName).GetOptionalTimestamp() << ","; + break; + case NYdb::EPrimitiveType::Datetime: + ss << parser.ColumnParser(colName).GetOptionalDatetime() << ","; + break; + case NYdb::EPrimitiveType::String: { + auto& col = parser.ColumnParser(colName); + if (col.GetKind() == TTypeParser::ETypeKind::Optional) { + ss << col.GetOptionalString() << ","; + } else { + ss << col.GetString() << ","; + } break; + } case NYdb::EPrimitiveType::Utf8: - Cout << parser.ColumnParser(colName).GetOptionalUtf8() << ", "; + ss << parser.ColumnParser(colName).GetOptionalUtf8() << ","; break; case NYdb::EPrimitiveType::Int32: - Cout << parser.ColumnParser(colName).GetOptionalInt32() << ", "; + ss << parser.ColumnParser(colName).GetOptionalInt32() << ","; break; case NYdb::EPrimitiveType::JsonDocument: - Cout << parser.ColumnParser(colName).GetOptionalJsonDocument() << ", "; + ss << parser.ColumnParser(colName).GetOptionalJsonDocument() << ","; break; default: - Cout << "<other>, "; + ss << "<other>,"; break; } } - Cout << Endl; - ++numRows; + + out.emplace_back(TString(ss)); + auto& str = out.back(); + if (str.size()) { + str.resize(str.size() - 1); + } + Cout << str << Endl; } } } - return numRows; + return out; } } @@ -105,8 +126,8 @@ Y_UNIT_TEST_SUITE(YdbTableBulkUpsertOlap) { Cerr << "Upsert done: " << TInstant::Now() - start << Endl; { // Read all - ui32 numRows = ScanQuerySelect(client, tablePath); - UNIT_ASSERT_GT(numRows, 0); + auto rows = ScanQuerySelect(client, tablePath); + UNIT_ASSERT_GT(rows.size(), 0); } // Negatives @@ -159,6 +180,89 @@ Y_UNIT_TEST_SUITE(YdbTableBulkUpsertOlap) { } } + Y_UNIT_TEST(UpsertCsvBug) { + NKikimrConfig::TAppConfig appConfig; + TKikimrWithGrpcAndRootSchema server(appConfig); + server.Server_->GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); + + ui16 grpc = server.GetPort(); + TString location = TStringBuilder() << "localhost:" << grpc; + auto connection = NYdb::TDriver(TDriverConfig().SetEndpoint(location)); + + NYdb::NTable::TTableClient client(connection); + auto session = client.GetSession().ExtractValueSync().GetSession(); + TString tablePath = TTestOlap::TablePath; + + { // KIKIMR-16411 +// CREATE TABLE subscriber ( +// id String NOT NULL, +// email Utf8, +// status String, +// subscribed_at Datetime, +// confirmed_at Datetime, +// unsubscribed_at Datetime, +// referrer Utf8, +// language Utf8, +// timezone Utf8, +// ip_address String, +// fields JsonDocument, +// PRIMARY KEY (id) +// ); + std::vector<std::pair<TString, NYdb::EPrimitiveType>> schema = { + { "id", NYdb::EPrimitiveType::String }, + { "email", NYdb::EPrimitiveType::Utf8 }, + { "status", NYdb::EPrimitiveType::String }, + { "subscribed_at", NYdb::EPrimitiveType::Datetime }, + { "confirmed_at", NYdb::EPrimitiveType::Datetime }, + { "unsubscribed_at", NYdb::EPrimitiveType::Datetime }, + { "referrer", NYdb::EPrimitiveType::Utf8 }, + { "language", NYdb::EPrimitiveType::Utf8 }, + { "timezone", NYdb::EPrimitiveType::Utf8 }, + { "ip_address", NYdb::EPrimitiveType::String }, + { "fields", NYdb::EPrimitiveType::JsonDocument } + }; + + auto tableBuilder = client.GetTableBuilder(); + for (auto& [name, type] : schema) { + if (name == "id") { + tableBuilder.AddNonNullableColumn(name, type); + } else { + tableBuilder.AddNullableColumn(name, type); + } + } + tableBuilder.SetPrimaryKeyColumns({"id"}); + auto result = session.CreateTable(tablePath, tableBuilder.Build(), {}).ExtractValueSync(); + + UNIT_ASSERT_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_EQUAL(result.GetStatus(), EStatus::SUCCESS); + + TString csv = + "id|email|status|subscribed_at|confirmed_at|unsubscribed_at|referrer|language|timezone|ip_address|fields\n" + "123123bs|testd|subscr|1579301930|123213123||http|ru|AsiaNovo|hello|\"{}\"\n"; + + Ydb::Formats::CsvSettings csvSettings; + csvSettings.set_header(true); + csvSettings.set_delimiter("|"); + + TString formatSettings; + Y_PROTOBUF_SUPPRESS_NODISCARD csvSettings.SerializeToString(&formatSettings); + + NYdb::NTable::TBulkUpsertSettings upsertSettings; + upsertSettings.FormatSettings(formatSettings); + + auto res = client.BulkUpsert(tablePath, + NYdb::NTable::EDataFormat::CSV, csv, {}, upsertSettings).GetValueSync(); + + Cerr << res.GetStatus() << Endl; + UNIT_ASSERT_EQUAL_C(res.GetStatus(), EStatus::SUCCESS, res.GetIssues().ToString()); + + auto rows = ScanQuerySelect(client, tablePath, schema); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(rows[0], + "123123bs,testd,subscr,2020-01-17T22:58:50.000000Z,1973-11-27T01:52:03.000000Z,(empty maybe),http,ru,AsiaNovo,hello,{}"); + } + } + Y_UNIT_TEST(UpsertCSV) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableFeatureFlags()->SetEnableOlapSchemaOperations(true); @@ -192,8 +296,8 @@ Y_UNIT_TEST_SUITE(YdbTableBulkUpsertOlap) { Cerr << "Upsert done: " << TInstant::Now() - start << Endl; { // Read all - ui32 numRows = ScanQuerySelect(client, tablePath); - UNIT_ASSERT_GT(numRows, 0); + auto rows = ScanQuerySelect(client, tablePath); + UNIT_ASSERT_GT(rows.size(), 0); } // Negatives @@ -341,8 +445,8 @@ Y_UNIT_TEST_SUITE(YdbTableBulkUpsertOlap) { Cerr << "Upsert done: " << TInstant::Now() - start << Endl; { // Read all - ui32 numRows = ScanQuerySelect(client, tablePath); - UNIT_ASSERT_GT(numRows, 0); + auto rows = ScanQuerySelect(client, tablePath); + UNIT_ASSERT_GT(rows.size(), 0); } // Read diff --git a/ydb/services/ydb/ydb_bulk_upsert_ut.cpp b/ydb/services/ydb/ydb_bulk_upsert_ut.cpp index 1b7655e158..27b2199506 100644 --- a/ydb/services/ydb/ydb_bulk_upsert_ut.cpp +++ b/ydb/services/ydb/ydb_bulk_upsert_ut.cpp @@ -1286,4 +1286,21 @@ Y_UNIT_TEST_SUITE(YdbTableBulkUpsert) { } } } + + Y_UNIT_TEST(ZeroRows) { + TKikimrWithGrpcAndRootSchema server; + ui16 grpc = server.GetPort(); + TString location = TStringBuilder() << "localhost:" << grpc; + auto connection = NYdb::TDriver(TDriverConfig().SetEndpoint(location)); + + NYdb::NTable::TTableClient db(connection); + + CreateTestTable(db); + + NYdb::TValueBuilder rows; + rows.BeginList() + .EndList(); + auto status = db.BulkUpsert("Root/Test", rows.Build()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(status.GetStatus(), EStatus::SUCCESS, status.GetIssues().ToString()); + } } diff --git a/ydb/services/ydb/ydb_logstore_ut.cpp b/ydb/services/ydb/ydb_logstore_ut.cpp index 9fca08cd0d..3f844db42d 100644 --- a/ydb/services/ydb/ydb_logstore_ut.cpp +++ b/ydb/services/ydb/ydb_logstore_ut.cpp @@ -501,7 +501,7 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { NYdb::NLogStore::TAlterLogTableSettings alterLogTableSettings; alterLogTableSettings.AlterTtlSettings(NYdb::NTable::TAlterTtlSettings::Set("uint_timestamp", NYdb::NTable::TTtlSettings::EUnit::MilliSeconds, TDuration::Seconds(3600))); auto res = logStoreClient.AlterLogTable("/Root/LogStore/log1", std::move(alterLogTableSettings)).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::GENERIC_ERROR, res.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::BAD_REQUEST, res.GetIssues().ToString()); } { auto res = logStoreClient.DescribeLogTable("/Root/LogStore/log1").GetValueSync(); @@ -516,7 +516,7 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { NYdb::NLogStore::TAlterLogTableSettings alterLogTableSettings; alterLogTableSettings.AlterTtlSettings(NYdb::NTable::TAlterTtlSettings::Set("ingested_at", TDuration::Seconds(86400))); auto res = logStoreClient.AlterLogTable("/Root/LogStore/log2", std::move(alterLogTableSettings)).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::GENERIC_ERROR, res.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::BAD_REQUEST, res.GetIssues().ToString()); } { auto res = logStoreClient.DescribeLogTable("/Root/LogStore/log2").GetValueSync(); @@ -550,7 +550,7 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { NYdb::NLogStore::TAlterLogTableSettings alterLogTableSettings; alterLogTableSettings.AlterTtlSettings(NYdb::NTable::TAlterTtlSettings::Drop()); auto res = logStoreClient.AlterLogTable("/Root/LogStore/log2", std::move(alterLogTableSettings)).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::GENERIC_ERROR, res.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::BAD_REQUEST, res.GetIssues().ToString()); } { auto res = logStoreClient.DescribeLogTable("/Root/LogStore/log2").GetValueSync(); @@ -568,7 +568,7 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { NYdb::NLogStore::TLogTableSharding sharding(NYdb::NLogStore::HASH_TYPE_LOGS_SPECIAL, {"timestamp", "uid"}, 4); NYdb::NLogStore::TLogTableDescription tableDescr("default", sharding, ttlSettings); auto res = logStoreClient.CreateLogTable("/Root/LogStore/log3", std::move(tableDescr)).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::GENERIC_ERROR, res.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::BAD_REQUEST, res.GetIssues().ToString()); } // Use column of invalid type for TTL @@ -577,7 +577,7 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { NYdb::NLogStore::TLogTableSharding sharding(NYdb::NLogStore::HASH_TYPE_LOGS_SPECIAL, {"timestamp", "uid"}, 4); NYdb::NLogStore::TLogTableDescription tableDescr("default", sharding, ttlSettings); auto res = logStoreClient.CreateLogTable("/Root/LogStore/log4", std::move(tableDescr)).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::GENERIC_ERROR, res.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::BAD_REQUEST, res.GetIssues().ToString()); } // Use non-Timestamp column for TTL @@ -586,7 +586,7 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { NYdb::NLogStore::TLogTableSharding sharding(NYdb::NLogStore::HASH_TYPE_LOGS_SPECIAL, {"timestamp", "uid"}, 4); NYdb::NLogStore::TLogTableDescription tableDescr("default", sharding, ttlSettings); auto res = logStoreClient.CreateLogTable("/Root/LogStore/log5", std::move(tableDescr)).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::GENERIC_ERROR, res.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::BAD_REQUEST, res.GetIssues().ToString()); } } } diff --git a/ydb/services/ydb/ydb_table_ut.cpp b/ydb/services/ydb/ydb_table_ut.cpp index bb827c5f45..41994e303e 100644 --- a/ydb/services/ydb/ydb_table_ut.cpp +++ b/ydb/services/ydb/ydb_table_ut.cpp @@ -3118,7 +3118,7 @@ R"___(<main>: Error: Transaction not found: , code: 2015 , {"/Root/Table-1", "/Root/Table-10"}} ).ExtractValueSync(); UNIT_ASSERT_EQUAL(result.IsTransportError(), false); - UNIT_ASSERT_EQUAL_C(result.GetStatus(), EStatus::GENERIC_ERROR, result.GetStatus()); + UNIT_ASSERT_EQUAL_C(result.GetStatus(), EStatus::BAD_REQUEST, result.GetStatus()); } { diff --git a/ydb/services/ydb/ydb_ut.cpp b/ydb/services/ydb/ydb_ut.cpp index 65d0827b65..aec88ec4e9 100644 --- a/ydb/services/ydb/ydb_ut.cpp +++ b/ydb/services/ydb/ydb_ut.cpp @@ -447,6 +447,7 @@ Y_UNIT_TEST_SUITE(TGRpcClientLowTest) { UNIT_ASSERT(deferred.ready() == true); Ydb::Scheme::DescribePathResult result; deferred.result().UnpackTo(&result); + result.mutable_self()->clear_created_at(); // variadic part TString tmp; google::protobuf::TextFormat::PrintToString(result, &tmp); const TString expected = R"___(self { @@ -949,6 +950,10 @@ Y_UNIT_TEST_SUITE(TGRpcYdbTest) { UNIT_ASSERT(response.operation().status() == Ydb::StatusIds::SUCCESS); Ydb::Scheme::ListDirectoryResult result; response.operation().result().UnpackTo(&result); + result.mutable_self()->clear_created_at(); // variadic part + for (auto& child : *result.mutable_children()) { + child.clear_created_at(); + } TString tmp; google::protobuf::TextFormat::PrintToString(result, &tmp); const TString expected = "self {\n" @@ -1237,6 +1242,7 @@ Y_UNIT_TEST_SUITE(TGRpcYdbTest) { UNIT_ASSERT(response.operation().status() == Ydb::StatusIds::SUCCESS); Ydb::Table::DescribeTableResult result; response.operation().result().UnpackTo(&result); + result.mutable_self()->clear_created_at(); // variadic part TString tmp; google::protobuf::TextFormat::PrintToString(result, &tmp); const TString expected = R"___(self { @@ -1289,6 +1295,7 @@ partitioning_settings { UNIT_ASSERT(response.operation().status() == Ydb::StatusIds::SUCCESS); Ydb::Scheme::DescribePathResult result; response.operation().result().UnpackTo(&result); + result.mutable_self()->clear_created_at(); // variadic part TString tmp; google::protobuf::TextFormat::PrintToString(result, &tmp); const TString expected = "self {\n" @@ -1572,6 +1579,7 @@ value { UNIT_ASSERT(response.operation().status() == Ydb::StatusIds::SUCCESS); Ydb::Table::DescribeTableResult result; response.operation().result().UnpackTo(&result); + result.mutable_self()->clear_created_at(); // variadic part TString tmp; google::protobuf::TextFormat::PrintToString(result, &tmp); const TString expected = R"___(self { diff --git a/ydb/tests/functional/limits/test_schemeshard_limits.py b/ydb/tests/functional/limits/test_schemeshard_limits.py index 330cb88377..03b0ed61a3 100644 --- a/ydb/tests/functional/limits/test_schemeshard_limits.py +++ b/ydb/tests/functional/limits/test_schemeshard_limits.py @@ -67,7 +67,7 @@ class TestSchemeShardLimitsCase0(Base): assert_that( callee, raises( - ydb.GenericError, + ydb.BadRequest, "ACL is too long" ) ) @@ -103,7 +103,7 @@ class TestSchemeShardLimitsCase1(Base): assert_that( callee, raises( - ydb.GenericError, + ydb.BadRequest, "ACL is too long" ) ) diff --git a/ydb/tests/functional/scheme_shard/test_alter_ops.py b/ydb/tests/functional/scheme_shard/test_alter_ops.py index d3869272bf..60e11f0fff 100644 --- a/ydb/tests/functional/scheme_shard/test_alter_ops.py +++ b/ydb/tests/functional/scheme_shard/test_alter_ops.py @@ -69,7 +69,7 @@ class TestSchemeShardAlterTest(object): assert_that( callee, raises( - ydb.GenericError, + ydb.BadRequest, "Cannot alter type for column" ) ) @@ -157,7 +157,7 @@ class TestSchemeShardAlterTest(object): assert_that( callee, raises( - ydb.GenericError, + ydb.BadRequest, "drop key column:" ) ) @@ -208,7 +208,7 @@ class TestSchemeShardAlterTest(object): assert_that( callee, raises( - ydb.GenericError, + ydb.BadRequest, "drop key column:" ) ) diff --git a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema index 3595c4ceed..0937c94d86 100644 --- a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema +++ b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema @@ -6068,6 +6068,11 @@ ], "ColumnsAdded": [ { + "ColumnId": 7, + "ColumnName": "VirtualTimestamps", + "ColumnType": "Bool" + }, + { "ColumnId": 1, "ColumnName": "OwnerPathId", "ColumnType": "Uint64" @@ -6102,6 +6107,7 @@ "ColumnFamilies": { "0": { "Columns": [ + 7, 1, 2, 3, @@ -6134,6 +6140,11 @@ ], "ColumnsAdded": [ { + "ColumnId": 7, + "ColumnName": "VirtualTimestamps", + "ColumnType": "Bool" + }, + { "ColumnId": 1, "ColumnName": "OwnerPathId", "ColumnType": "Uint64" @@ -6168,6 +6179,7 @@ "ColumnFamilies": { "0": { "Columns": [ + 7, 1, 2, 3, diff --git a/ydb/tests/functional/sqs/common/test_counters.py b/ydb/tests/functional/sqs/common/test_counters.py index aebfd1b14f..08db16c37c 100644 --- a/ydb/tests/functional/sqs/common/test_counters.py +++ b/ydb/tests/functional/sqs/common/test_counters.py @@ -23,8 +23,14 @@ class TestSqsCountersFeatures(KikimrSqsTestBase): attributes_path = self._smart_make_table_path(self._username, self.queue_name, version, None, 'Attributes') deadline = int((time.time() + 600) * 1000) - self._execute_yql_query('UPSERT INTO `{}` (State, ShowDetailedCountersDeadline) VALUES (0, {})' - .format(attributes_path, deadline)) + query_key = {'name': 'State', 'value': 0} + if self.get_tables_format() != 0: + query_key = {'name': 'QueueIdNumber', 'value': self._get_queue_version_number(self._username, self.queue_name)} + + self._execute_yql_query( + f'''UPDATE `{attributes_path}` SET ShowDetailedCountersDeadline = {deadline} + WHERE {query_key['name']} = {query_key['value']}''' + ) @pytest.mark.parametrize(**TABLES_FORMAT_PARAMS) def test_creates_counter(self, tables_format): diff --git a/ydb/tests/functional/sqs/common/test_queues_managing.py b/ydb/tests/functional/sqs/common/test_queues_managing.py index 198d38c2c5..015deec30a 100644 --- a/ydb/tests/functional/sqs/common/test_queues_managing.py +++ b/ydb/tests/functional/sqs/common/test_queues_managing.py @@ -297,7 +297,7 @@ class QueuesManagingTest(KikimrSqsTestBase): @pytest.mark.parametrize(**IS_FIFO_PARAMS) def test_delete_and_create_queue(self, is_fifo): - self._init_with_params(is_fifo) + self._init_with_params(is_fifo, tables_format=0) created_queue_url = self._create_queue_and_assert(self.queue_name, is_fifo=is_fifo, use_http=True) self.seq_no += 1 diff --git a/ydb/tests/functional/sqs/messaging/test_fifo_messaging.py b/ydb/tests/functional/sqs/messaging/test_fifo_messaging.py index dbc269bc0a..fbbe0c3eb4 100644 --- a/ydb/tests/functional/sqs/messaging/test_fifo_messaging.py +++ b/ydb/tests/functional/sqs/messaging/test_fifo_messaging.py @@ -54,6 +54,7 @@ class SqsFifoMessagingTest(KikimrSqsTestBase): return config_generator def test_only_single_read_infly_from_fifo(self): + self._init_with_params(tables_format=0) self._create_queue_send_x_messages_read_y_messages( self.queue_name, send_count=10, read_count=1, visibility_timeout=1000, msg_body_template=self._msg_body_template, is_fifo=True @@ -63,6 +64,7 @@ class SqsFifoMessagingTest(KikimrSqsTestBase): ) def test_fifo_read_delete_single_message(self): + self._init_with_params(tables_format=0) created_queue_url = self._create_queue_and_assert(self.queue_name, is_fifo=True) message_ids = self._send_messages( created_queue_url, message_count=10, msg_body_template=self._msg_body_template, is_fifo=True, group_id='group' diff --git a/ydb/tests/functional/sqs/multinode/test_multinode_cluster.py b/ydb/tests/functional/sqs/multinode/test_multinode_cluster.py index 8a431a90a4..c5a8af816c 100644 --- a/ydb/tests/functional/sqs/multinode/test_multinode_cluster.py +++ b/ydb/tests/functional/sqs/multinode/test_multinode_cluster.py @@ -59,7 +59,7 @@ class TestSqsMultinodeCluster(KikimrSqsTestBase): @pytest.mark.parametrize(**IS_FIFO_PARAMS) @pytest.mark.parametrize(**STOP_NODE_PARAMS) def test_has_messages_counters(self, is_fifo, stop_node): - self._init_with_params(is_fifo) + self._init_with_params(is_fifo, tables_format=0) self._create_queue_and_assert(self.queue_name, is_fifo=is_fifo) node_index = self._get_queue_master_node_index() logging.debug('Master node for queue "{}" is {}'.format(self.queue_name, node_index)) @@ -144,6 +144,7 @@ class TestSqsMultinodeCluster(KikimrSqsTestBase): @pytest.mark.parametrize(**STOP_NODE_PARAMS) def test_reassign_master(self, stop_node): + self._init_with_params(tables_format=0) self._create_queue_and_assert(self.queue_name) node_index = self._get_queue_master_node_index() proxy_node_index = self._other_node(node_index) @@ -192,6 +193,7 @@ class TestSqsMultinodeCluster(KikimrSqsTestBase): self.receive_message_finished = True def test_ends_request_after_kill(self): + self._init_with_params(tables_format=0) self._create_queue_and_assert(self.queue_name) node_index = self._get_queue_master_node_index() self.receive_message_finished = False diff --git a/ydb/tests/library/common/protobuf_ss.py b/ydb/tests/library/common/protobuf_ss.py index 90b0638eb4..ae905bad45 100644 --- a/ydb/tests/library/common/protobuf_ss.py +++ b/ydb/tests/library/common/protobuf_ss.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import itertools import string +from datetime import timedelta from os.path import basename, dirname, join from ydb.core.protos import msgbus_pb2 @@ -330,7 +331,7 @@ class CreateTopicRequest(AbstractTSchemeOperationRequest): self.__important_client_ids = None self.__max_count_in_partition = None self.__max_size_in_partition = None - self.__lifetime_seconds = 0 + self.__lifetime_seconds = int(timedelta(days=1).total_seconds()) @property def partitions_count(self): diff --git a/ydb/tests/library/sqs/test_base.py b/ydb/tests/library/sqs/test_base.py index 9468280880..6572e94679 100644 --- a/ydb/tests/library/sqs/test_base.py +++ b/ydb/tests/library/sqs/test_base.py @@ -25,6 +25,8 @@ from concurrent import futures DEFAULT_VISIBILITY_TIMEOUT = 30 +DEFAULT_TABLES_FORMAT = 1 + logger = logging.getLogger(__name__) @@ -245,6 +247,11 @@ class KikimrSqsTestBase(object): ) ) + def get_tables_format(self, user=None): + if user is None: + user = self._username + return self.tables_format_per_user.get(user, DEFAULT_TABLES_FORMAT) + def check_all_users_queues_tables_consistency(self): users = [entry.name for entry in self._driver.scheme_client.list_directory(self.sqs_root).children] for user in users: @@ -642,7 +649,7 @@ class KikimrSqsTestBase(object): time.sleep(1) # wait node to start def _smart_make_table_path(self, user_name, queue_name, queue_version, shard, table_name): - tables_format = self.tables_format_per_user.get(user_name, 0) + tables_format = self.get_tables_format(user_name) table_path = self.sqs_root if tables_format == 0: |