intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <arcadia-devtools@yandex-team.ru> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/actors/core
download: ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
94 files changed, 17947 insertions, 0 deletions
diff --git a/library/cpp/actors/core/README.md b/library/cpp/actors/core/README.md
new file mode 100644
index 0000000000..439a8dd459
--- /dev/null
+++ b/library/cpp/actors/core/README.md
@@ -0,0 +1,99 @@
+## Memory tracker
+
+https://a.yandex-team.ru/arc/trunk/arcadia/library/cpp/actors/core/memory_track.h
+
+Использование:
+
+* отслеживание аллокаций экземпляров конкретного класса через new/delete и new[]/delete[]
+* отслеживание аллокаций в контейнерах
+* ручное отслеживание моментов аллокации/деаллокации
+
+----
+
+### Отслеживание аллокаций класса через new/delete
+
+Использование с автоматически генерируемой меткой:
+
+```cpp
+#include <library/cpp/actors/core/memory_track.h>
+
+struct TTypeLabeled
+    : public NActors::NMemory::TTrack<TTypeLabeled>
+{
+    char payload[16];
+};
+```
+
+Использование с пользовательским именем метки:
+
+```cpp
+#include <library/cpp/actors/core/memory_track.h>
+
+static const char NamedLabel[] = "NamedLabel";
+
+struct TNameLabeled
+    : public NActors::NMemory::TTrack<TNameLabeled, NamedLabel>
+{
+    char payload[32];
+};
+```
+
+----
+
+### Отслеживание аллокаций в контейнерах
+
+```cpp
+#include <library/cpp/actors/core/memory_track.h>
+
+static const char InContainerLabel[] = "InContainerLabel";
+
+struct TInContainer {
+    char payload[16];
+};
+
+std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer>> vecT;
+
+std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer, InContainerLabel>> vecN;
+
+using TKey = int;
+
+std::map<TKey, TInContainer, std::less<TKey>,
+    NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> mapT;
+
+std::map<TKey, TInContainer, std::less<TKey>,
+    NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> mapN;
+
+std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>,
+    NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> umapT;
+
+std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>,
+    NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> umapN;
+```
+
+----
+
+### Ручное отслеживание аллокаций/деаллокаций
+
+```cpp
+#include <library/cpp/actors/core/memory_track.h>
+
+static const char ManualLabel[] = "ManualLabel";
+
+...
+NActors::NMemory::TLabel<ManualLabel>::Add(size);
+
+...
+NActors::NMemory::TLabel<ManualLabel>::Sub(size);
+```
+
+----
+
+### Собираемые метрики
+
+Сервис **utils**, пользовательская метка **label**, сенсоры:
+
+- MT/Count: количество аллокаций в моменте 
+- MT/Memory: аллоцированная память в моменте
+- MT/PeakCount: пиковое значение количества аллокаций (сэмплится с фиксированной частотой)
+- MT/PeakMemory: пиковое значение аллоцированной памяти
+
diff --git a/library/cpp/actors/core/actor.cpp b/library/cpp/actors/core/actor.cpp
new file mode 100644
index 0000000000..6f9ba6a42b
--- /dev/null
+++ b/library/cpp/actors/core/actor.cpp
@@ -0,0 +1,172 @@
+#include "actor.h"
+#include "executor_thread.h"
+#include "mailbox.h"
+#include <library/cpp/actors/util/datetime.h>
+
+namespace NActors {
+    Y_POD_THREAD(TActivationContext*)
+    TlsActivationContext((TActivationContext*)nullptr);
+
+    bool TActorContext::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const {
+        return Send(new IEventHandle(recipient, SelfID, ev, flags, cookie, nullptr, std::move(traceId)));
+    }
+
+    bool TActorContext::Send(TAutoPtr<IEventHandle> ev) const {
+        return ExecutorThread.Send(ev);
+    }
+
+    void IActor::Registered(TActorSystem* sys, const TActorId& owner) {
+        // fallback to legacy method, do not use it anymore
+        if (auto eh = AfterRegister(SelfId(), owner))
+            sys->Send(eh);
+    }
+
+    void IActor::Describe(IOutputStream &out) const noexcept {
+        SelfActorId.Out(out);
+    }
+
+    bool IActor::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const noexcept {
+        return SelfActorId.Send(recipient, ev, flags, cookie, std::move(traceId));
+    }
+
+    bool TActivationContext::Send(TAutoPtr<IEventHandle> ev) {
+        return TlsActivationContext->ExecutorThread.Send(ev);
+    }
+
+    void TActivationContext::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+        TlsActivationContext->ExecutorThread.Schedule(deadline, ev, cookie);
+    }
+
+    void TActivationContext::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+        TlsActivationContext->ExecutorThread.Schedule(deadline, ev, cookie);
+    }
+
+    void TActivationContext::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+        TlsActivationContext->ExecutorThread.Schedule(delta, ev, cookie);
+    }
+
+    bool TActorIdentity::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const {
+        return TActivationContext::Send(new IEventHandle(recipient, *this, ev, flags, cookie, nullptr, std::move(traceId)));
+    }
+
+    void TActorIdentity::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const {
+        return TActivationContext::Schedule(deadline, new IEventHandle(*this, {}, ev), cookie);
+    }
+
+    void TActorIdentity::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const {
+        return TActivationContext::Schedule(deadline, new IEventHandle(*this, {}, ev), cookie);
+    }
+
+    void TActorIdentity::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const {
+        return TActivationContext::Schedule(delta, new IEventHandle(*this, {}, ev), cookie);
+    }
+
+    TActorId TActivationContext::RegisterWithSameMailbox(IActor* actor, TActorId parentId) {
+        Y_VERIFY_DEBUG(parentId);
+        auto& ctx = *TlsActivationContext;
+        return ctx.ExecutorThread.RegisterActor(actor, &ctx.Mailbox, parentId.Hint(), parentId);
+    }
+
+    TActorId TActorContext::RegisterWithSameMailbox(IActor* actor) const {
+        return ExecutorThread.RegisterActor(actor, &Mailbox, SelfID.Hint(), SelfID);
+    }
+
+    TActorId IActor::RegisterWithSameMailbox(IActor* actor) const noexcept {
+        return TlsActivationContext->ExecutorThread.RegisterActor(actor, &TlsActivationContext->Mailbox, SelfActorId.Hint(), SelfActorId);
+    }
+
+    TActorId TActivationContext::Register(IActor* actor, TActorId parentId, TMailboxType::EType mailboxType, ui32 poolId) {
+        return TlsActivationContext->ExecutorThread.RegisterActor(actor, mailboxType, poolId, parentId);
+    }
+
+    TActorId TActivationContext::InterconnectProxy(ui32 destinationNodeId) {
+        return TlsActivationContext->ExecutorThread.ActorSystem->InterconnectProxy(destinationNodeId);
+    }
+
+    TActorSystem* TActivationContext::ActorSystem() {
+        return TlsActivationContext->ExecutorThread.ActorSystem;
+    }
+
+    i64 TActivationContext::GetCurrentEventTicks() {
+        return GetCycleCountFast() - TlsActivationContext->EventStart;
+    }
+
+    double TActivationContext::GetCurrentEventTicksAsSeconds() {
+        return NHPTimer::GetSeconds(GetCurrentEventTicks());
+    }
+
+    TActorId TActorContext::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) const {
+        return ExecutorThread.RegisterActor(actor, mailboxType, poolId, SelfID);
+    }
+
+    TActorId IActor::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) const noexcept {
+        return TlsActivationContext->ExecutorThread.RegisterActor(actor, mailboxType, poolId, SelfActorId);
+    }
+
+    void TActorContext::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const {
+        ExecutorThread.Schedule(deadline, new IEventHandle(SelfID, TActorId(), ev), cookie);
+    }
+
+    void TActorContext::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const {
+        ExecutorThread.Schedule(deadline, new IEventHandle(SelfID, TActorId(), ev), cookie);
+    }
+
+    void TActorContext::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const {
+        ExecutorThread.Schedule(delta, new IEventHandle(SelfID, TActorId(), ev), cookie);
+    }
+
+    void IActor::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const noexcept {
+        TlsActivationContext->ExecutorThread.Schedule(deadline, new IEventHandle(SelfActorId, TActorId(), ev), cookie);
+    }
+
+    void IActor::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const noexcept {
+        TlsActivationContext->ExecutorThread.Schedule(deadline, new IEventHandle(SelfActorId, TActorId(), ev), cookie);
+    }
+
+    void IActor::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const noexcept {
+        TlsActivationContext->ExecutorThread.Schedule(delta, new IEventHandle(SelfActorId, TActorId(), ev), cookie);
+    }
+
+    TInstant TActivationContext::Now() {
+        return TlsActivationContext->ExecutorThread.ActorSystem->Timestamp();
+    }
+
+    TMonotonic TActivationContext::Monotonic() {
+        return TlsActivationContext->ExecutorThread.ActorSystem->Monotonic();
+    }
+
+    TInstant TActorContext::Now() const {
+        return ExecutorThread.ActorSystem->Timestamp();
+    }
+
+    TMonotonic TActorContext::Monotonic() const {
+        return ExecutorThread.ActorSystem->Monotonic();
+    }
+
+    NLog::TSettings* TActivationContext::LoggerSettings() const {
+        return ExecutorThread.ActorSystem->LoggerSettings();
+    }
+
+    std::pair<ui32, ui32> TActorContext::CountMailboxEvents(ui32 maxTraverse) const {
+        return Mailbox.CountMailboxEvents(SelfID.LocalId(), maxTraverse);
+    }
+
+    std::pair<ui32, ui32> IActor::CountMailboxEvents(ui32 maxTraverse) const {
+        return TlsActivationContext->Mailbox.CountMailboxEvents(SelfActorId.LocalId(), maxTraverse);
+    }
+
+    void IActor::Die(const TActorContext& ctx) {
+        if (ctx.SelfID)
+            Y_VERIFY(ctx.SelfID == SelfActorId);
+        PassAway();
+    }
+
+    void IActor::PassAway() {
+        auto& cx = *TlsActivationContext;
+        cx.ExecutorThread.UnregisterActor(&cx.Mailbox, SelfActorId.LocalId());
+    }
+
+    double IActor::GetElapsedTicksAsSeconds() const {
+        return NHPTimer::GetSeconds(ElapsedTicks);
+    }
+}
diff --git a/library/cpp/actors/core/actor.h b/library/cpp/actors/core/actor.h
new file mode 100644
index 0000000000..ed29bd14b9
--- /dev/null
+++ b/library/cpp/actors/core/actor.h
@@ -0,0 +1,530 @@
+#pragma once
+
+#include "event.h"
+#include "monotonic.h"
+#include <util/system/tls.h>
+#include <library/cpp/actors/util/local_process_key.h>
+
+namespace NActors {
+    class TActorSystem;
+    class TMailboxTable;
+    struct TMailboxHeader;
+
+    class TExecutorThread;
+    class IActor;
+    class ISchedulerCookie;
+
+    namespace NLog {
+        struct TSettings;
+    }
+
+    struct TActorContext;
+
+    struct TActivationContext {
+    public:
+        TMailboxHeader& Mailbox;
+        TExecutorThread& ExecutorThread;
+        const NHPTimer::STime EventStart;
+
+    protected:
+        explicit TActivationContext(TMailboxHeader& mailbox, TExecutorThread& executorThread, NHPTimer::STime eventStart)
+            : Mailbox(mailbox)
+            , ExecutorThread(executorThread)
+            , EventStart(eventStart)
+        {
+        }
+
+    public:
+        static bool Send(TAutoPtr<IEventHandle> ev);
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the wallclock time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        static void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the monotonic time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        static void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+
+        /**
+         * Schedule one-shot event that will be send after given delay.
+         *
+         * @param delta      the time from now to delay event sending
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        static void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+
+        static TInstant Now();
+        static TMonotonic Monotonic();
+        NLog::TSettings* LoggerSettings() const;
+
+        // register new actor in ActorSystem on new fresh mailbox.
+        static TActorId Register(IActor* actor, TActorId parentId = TActorId(), TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>());
+
+        // Register new actor in ActorSystem on same _mailbox_ as current actor.
+        // There is one thread per mailbox to execute actor, which mean
+        // no _cpu core scalability_ for such actors.
+        // This method of registration can be usefull if multiple actors share
+        // some memory.
+        static TActorId RegisterWithSameMailbox(IActor* actor, TActorId parentId);
+
+        static const TActorContext& AsActorContext();
+        static TActorContext ActorContextFor(TActorId id);
+
+        static TActorId InterconnectProxy(ui32 nodeid);
+        static TActorSystem* ActorSystem();
+
+        static i64 GetCurrentEventTicks();
+        static double GetCurrentEventTicksAsSeconds();
+    };
+
+    struct TActorContext: public TActivationContext {
+        const TActorId SelfID;
+
+        explicit TActorContext(TMailboxHeader& mailbox, TExecutorThread& executorThread, NHPTimer::STime eventStart, const TActorId& selfID)
+            : TActivationContext(mailbox, executorThread, eventStart)
+            , SelfID(selfID)
+        {
+        }
+
+        bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const;
+        template <typename TEvent>
+        bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const {
+            return Send(recipient, static_cast<IEventBase*>(ev.Release()), flags, cookie, std::move(traceId));
+        }
+        bool Send(TAutoPtr<IEventHandle> ev) const;
+
+        TInstant Now() const;
+        TMonotonic Monotonic() const;
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the wallclock time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the monotonic time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+
+        /**
+         * Schedule one-shot event that will be send after given delay.
+         *
+         * @param delta      the time from now to delay event sending
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+
+        TActorContext MakeFor(const TActorId& otherId) const {
+            return TActorContext(Mailbox, ExecutorThread, EventStart, otherId);
+        }
+
+        // register new actor in ActorSystem on new fresh mailbox.
+        TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const;
+
+        // Register new actor in ActorSystem on same _mailbox_ as current actor.
+        // There is one thread per mailbox to execute actor, which mean
+        // no _cpu core scalability_ for such actors.
+        // This method of registration can be usefull if multiple actors share
+        // some memory.
+        TActorId RegisterWithSameMailbox(IActor* actor) const;
+
+        std::pair<ui32, ui32> CountMailboxEvents(ui32 maxTraverse = Max<ui32>()) const;
+    };
+
+    extern Y_POD_THREAD(TActivationContext*) TlsActivationContext;
+
+    struct TActorIdentity: public TActorId {
+        explicit TActorIdentity(TActorId actorId)
+            : TActorId(actorId)
+        {
+        }
+
+        void operator=(TActorId actorId) {
+            *this = TActorIdentity(actorId);
+        }
+
+        bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const;
+        void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+        void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+        void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const;
+    };
+
+    class IActor;
+
+    class IActorOps : TNonCopyable {
+    public:
+        virtual void Describe(IOutputStream&) const noexcept = 0;
+        virtual bool Send(const TActorId& recipient, IEventBase*, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const noexcept = 0;
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the wallclock time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        virtual void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0;
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the monotonic time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        virtual void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0;
+
+        /**
+         * Schedule one-shot event that will be send after given delay.
+         *
+         * @param delta      the time from now to delay event sending
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        virtual void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0;
+
+        virtual TActorId Register(IActor*, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const noexcept = 0;
+        virtual TActorId RegisterWithSameMailbox(IActor*) const noexcept = 0;
+    };
+
+    class TDecorator;
+
+    class IActor : protected IActorOps {
+    public:
+        typedef void (IActor::*TReceiveFunc)(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx);
+
+    private:
+        TReceiveFunc StateFunc;
+        TActorIdentity SelfActorId;
+        i64 ElapsedTicks;
+        ui64 HandledEvents;
+
+        friend void DoActorInit(TActorSystem*, IActor*, const TActorId&, const TActorId&);
+        friend class TDecorator;
+
+    public:
+        /// @sa services.proto NKikimrServices::TActivity::EType
+        enum EActorActivity {
+            OTHER = 0,
+            ACTOR_SYSTEM = 1,
+            ACTORLIB_COMMON = 2,
+            ACTORLIB_STATS = 3,
+            LOG_ACTOR = 4,
+            INTERCONNECT_PROXY_TCP = 12,
+            INTERCONNECT_SESSION_TCP = 13,
+            INTERCONNECT_COMMON = 171,
+            SELF_PING_ACTOR = 207,
+            TEST_ACTOR_RUNTIME = 283,
+            INTERCONNECT_HANDSHAKE = 284,
+            INTERCONNECT_POLLER = 285,
+            INTERCONNECT_SESSION_KILLER = 286,
+            ACTOR_SYSTEM_SCHEDULER_ACTOR = 312,
+            ACTOR_FUTURE_CALLBACK = 337,
+            INTERCONNECT_MONACTOR = 362,
+            INTERCONNECT_LOAD_ACTOR = 376,
+            INTERCONNECT_LOAD_RESPONDER = 377,
+            NAMESERVICE = 450,
+            DNS_RESOLVER = 481,
+            INTERCONNECT_PROXY_WRAPPER = 546,
+        };
+
+        using EActivityType = EActorActivity;
+        ui32 ActivityType;
+
+    protected:
+        IActor(TReceiveFunc stateFunc, ui32 activityType = OTHER)
+            : StateFunc(stateFunc)
+            , SelfActorId(TActorId())
+            , ElapsedTicks(0)
+            , HandledEvents(0)
+            , ActivityType(activityType)
+        {
+        }
+
+    public:
+        virtual ~IActor() {
+        } // must not be called for registered actors, see Die method instead
+
+    protected:
+        virtual void Die(const TActorContext& ctx); // would unregister actor so call exactly once and only from inside of message processing
+        virtual void PassAway();
+
+    public:
+        template <typename T>
+        void Become(T stateFunc) {
+            StateFunc = static_cast<TReceiveFunc>(stateFunc);
+        }
+
+        template <typename T, typename... TArgs>
+        void Become(T stateFunc, const TActorContext& ctx, TArgs&&... args) {
+            StateFunc = static_cast<TReceiveFunc>(stateFunc);
+            ctx.Schedule(std::forward<TArgs>(args)...);
+        }
+
+        template <typename T, typename... TArgs>
+        void Become(T stateFunc, TArgs&&... args) {
+            StateFunc = static_cast<TReceiveFunc>(stateFunc);
+            Schedule(std::forward<TArgs>(args)...);
+        }
+
+    protected:
+        void SetActivityType(ui32 activityType) {
+            ActivityType = activityType;
+        }
+
+    public:
+        TReceiveFunc CurrentStateFunc() const {
+            return StateFunc;
+        }
+
+        // NOTE: exceptions must not escape state function but if an exception hasn't be caught
+        // by the actor then we want to crash an see the stack
+        void Receive(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) {
+            (this->*StateFunc)(ev, ctx);
+            HandledEvents++;
+        }
+
+        // must be called to wrap any call trasitions from one actor to another
+        template<typename TActor, typename TMethod, typename... TArgs>
+        static decltype((std::declval<TActor>().*std::declval<TMethod>())(std::declval<TArgs>()...))
+                InvokeOtherActor(TActor& actor, TMethod&& method, TArgs&&... args) {
+            struct TRecurseContext : TActorContext {
+                TActivationContext *Prev;
+                TRecurseContext(const TActorId& actorId)
+                    : TActorContext(TActivationContext::ActorContextFor(actorId))
+                    , Prev(TlsActivationContext)
+                {
+                    TlsActivationContext = this;
+                }
+                ~TRecurseContext() {
+                    TlsActivationContext = Prev;
+                }
+            } context(actor.SelfId());
+            return (actor.*method)(std::forward<TArgs>(args)...);
+        }
+
+        virtual void Registered(TActorSystem* sys, const TActorId& owner);
+
+        virtual TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parentId) {
+            Y_UNUSED(self);
+            Y_UNUSED(parentId);
+            return TAutoPtr<IEventHandle>();
+        }
+
+        i64 GetElapsedTicks() const {
+            return ElapsedTicks;
+        }
+        double GetElapsedTicksAsSeconds() const;
+        void AddElapsedTicks(i64 ticks) {
+            ElapsedTicks += ticks;
+        }
+        auto GetActivityType() const {
+            return ActivityType;
+        }
+        ui64 GetHandledEvents() const {
+            return HandledEvents;
+        }
+        TActorIdentity SelfId() const {
+            return SelfActorId;
+        }
+
+    protected:
+        void Describe(IOutputStream&) const noexcept override;
+        bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const noexcept final;
+        template <typename TEvent>
+        bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const{
+            return Send(recipient, static_cast<IEventBase*>(ev.Release()), flags, cookie, std::move(traceId));
+        }
+
+        template <class TEvent, class ... TEventArgs>
+        bool Send(TActorId recipient, TEventArgs&& ... args) const {
+            return Send(recipient, MakeHolder<TEvent>(std::forward<TEventArgs>(args)...));
+        }
+
+        void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final;
+        void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final;
+        void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final;
+
+        // register new actor in ActorSystem on new fresh mailbox.
+        TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const noexcept final;
+
+        // Register new actor in ActorSystem on same _mailbox_ as current actor.
+        // There is one thread per mailbox to execute actor, which mean
+        // no _cpu core scalability_ for such actors.
+        // This method of registration can be usefull if multiple actors share
+        // some memory.
+        TActorId RegisterWithSameMailbox(IActor* actor) const noexcept final;
+
+        std::pair<ui32, ui32> CountMailboxEvents(ui32 maxTraverse = Max<ui32>()) const;
+
+    private:
+        void ChangeSelfId(TActorId actorId) {
+            SelfActorId = actorId;
+        }
+    };
+
+    struct TActorActivityTag {};
+
+    inline size_t GetActivityTypeCount() {
+        return TLocalProcessKeyState<TActorActivityTag>::GetInstance().GetCount();
+    }
+
+    inline TStringBuf GetActivityTypeName(size_t index) {
+        return TLocalProcessKeyState<TActorActivityTag>::GetInstance().GetNameByIndex(index);
+    }
+
+    template <typename TDerived>
+    class TActor: public IActor {
+    private:
+        template <typename T, typename = const char*>
+        struct HasActorName: std::false_type { };
+        template <typename T>
+        struct HasActorName<T, decltype((void)T::ActorName, (const char*)nullptr)>: std::true_type { };
+
+        static ui32 GetActivityTypeIndex() {
+            if constexpr(HasActorName<TDerived>::value) {
+                return TLocalProcessKey<TActorActivityTag, TDerived::ActorName>::GetIndex();
+            } else {
+                using TActorActivity = decltype(((TDerived*)nullptr)->ActorActivityType());
+                // if constexpr(std::is_enum<TActorActivity>::value) {
+                    return TEnumProcessKey<TActorActivityTag, TActorActivity>::GetIndex(
+                    TDerived::ActorActivityType());
+                //} else {
+                    // for int, ui32, ...
+                //    return TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetIndex(
+                //        static_cast<IActor::EActorActivity>(TDerived::ActorActivityType()));
+                //}
+            }
+        }
+
+    protected:
+        //* Comment this function to find unmarked activities
+        static constexpr IActor::EActivityType ActorActivityType() {
+            return EActorActivity::OTHER;
+        } //*/
+
+        // static constexpr char ActorName[] = "UNNAMED";
+
+        TActor(void (TDerived::*func)(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx), ui32 activityType = GetActivityTypeIndex())
+            : IActor(static_cast<TReceiveFunc>(func), activityType)
+        { }
+
+    public:
+        typedef TDerived TThis;
+    };
+
+
+#define STFUNC_SIG TAutoPtr< ::NActors::IEventHandle>&ev, const ::NActors::TActorContext &ctx
+#define STATEFN_SIG TAutoPtr<::NActors::IEventHandle>& ev
+#define STFUNC(funcName) void funcName(TAutoPtr< ::NActors::IEventHandle>& ev, const ::NActors::TActorContext& ctx)
+#define STATEFN(funcName) void funcName(TAutoPtr< ::NActors::IEventHandle>& ev, const ::NActors::TActorContext& )
+
+#define STRICT_STFUNC(NAME, HANDLERS)                                                               \
+    void NAME(STFUNC_SIG) {                                                                         \
+        Y_UNUSED(ctx);                                                                              \
+        switch (const ui32 etype = ev->GetTypeRewrite()) {                                          \
+            HANDLERS                                                                                \
+            default:                                                                                \
+                Y_VERIFY_DEBUG(false, "%s: unexpected message type 0x%08" PRIx32, __func__, etype); \
+        }                                                                                           \
+    }
+
+    inline const TActorContext& TActivationContext::AsActorContext() {
+        TActivationContext* tls = TlsActivationContext;
+        return *static_cast<TActorContext*>(tls);
+    }
+
+    inline TActorContext TActivationContext::ActorContextFor(TActorId id) {
+        auto& tls = *TlsActivationContext;
+        return TActorContext(tls.Mailbox, tls.ExecutorThread, tls.EventStart, id);
+    }
+
+    class TDecorator : public IActor {
+    protected:
+        THolder<IActor> Actor;
+
+    public:
+        TDecorator(THolder<IActor>&& actor)
+            : IActor(static_cast<TReceiveFunc>(&TDecorator::State), actor->GetActivityType())
+            , Actor(std::move(actor))
+        {
+        }
+
+        void Registered(TActorSystem* sys, const TActorId& owner) override {
+            Actor->ChangeSelfId(SelfId());
+            Actor->Registered(sys, owner);
+        }
+
+        virtual bool DoBeforeReceiving(TAutoPtr<IEventHandle>& /*ev*/, const TActorContext& /*ctx*/) {
+            return true;
+        }
+
+        virtual void DoAfterReceiving(const TActorContext& /*ctx*/)
+        {
+        }
+
+        STFUNC(State) {
+            if (DoBeforeReceiving(ev, ctx)) {
+                Actor->Receive(ev, ctx);
+                DoAfterReceiving(ctx);
+            }
+        }
+    };
+
+    // TTestDecorator doesn't work with the real actor system
+    struct TTestDecorator : public TDecorator {
+        TTestDecorator(THolder<IActor>&& actor)
+            : TDecorator(std::move(actor))
+        {
+        }
+
+        virtual ~TTestDecorator() = default;
+
+        // This method must be called in the test actor system
+        bool BeforeSending(TAutoPtr<IEventHandle>& ev)
+        {
+            bool send = true;
+            TTestDecorator *decorator = dynamic_cast<TTestDecorator*>(Actor.Get());
+            if (decorator) {
+                send = decorator->BeforeSending(ev);
+            }
+            return send && ev && DoBeforeSending(ev);
+        }
+
+        virtual bool DoBeforeSending(TAutoPtr<IEventHandle>& /*ev*/) {
+            return true;
+        }
+    };
+}
+
+template <>
+inline void Out<NActors::TActorIdentity>(IOutputStream& o, const NActors::TActorIdentity& x) {
+    return x.Out(o);
+}
+
+template <>
+struct THash<NActors::TActorIdentity> {
+    inline ui64 operator()(const NActors::TActorIdentity& x) const {
+        return x.Hash();
+    }
+};
diff --git a/library/cpp/actors/core/actor_bootstrapped.h b/library/cpp/actors/core/actor_bootstrapped.h
new file mode 100644
index 0000000000..a37887c939
--- /dev/null
+++ b/library/cpp/actors/core/actor_bootstrapped.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "actor.h"
+#include "events.h"
+
+namespace NActors {
+    template<typename T> struct dependent_false : std::false_type {};
+
+    template<typename TDerived>
+    class TActorBootstrapped : public TActor<TDerived> {
+    protected:
+        TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parentId) override {
+            return new IEventHandle(TEvents::TSystem::Bootstrap, 0, self, parentId, {}, 0);
+        }
+
+        STFUNC(StateBootstrap) {
+            Y_VERIFY(ev->GetTypeRewrite() == TEvents::TSystem::Bootstrap, "Unexpected bootstrap message");
+            using T = decltype(&TDerived::Bootstrap);
+            TDerived& self = static_cast<TDerived&>(*this);
+            if constexpr (std::is_invocable_v<T, TDerived, const TActorContext&>) {
+                self.Bootstrap(ctx);
+            } else if constexpr (std::is_invocable_v<T, TDerived, const TActorId&, const TActorContext&>) {
+                self.Bootstrap(ev->Sender, ctx);
+            } else if constexpr (std::is_invocable_v<T, TDerived>) {
+                self.Bootstrap();
+            } else if constexpr (std::is_invocable_v<T, TDerived, const TActorId&>) {
+                self.Bootstrap(ev->Sender);
+            } else {
+                static_assert(dependent_false<TDerived>::value, "No correct Bootstrap() signature");
+            }
+        }
+
+        TActorBootstrapped()
+            : TActor<TDerived>(&TDerived::StateBootstrap)
+        {}
+    };
+}
diff --git a/library/cpp/actors/core/actor_coroutine.cpp b/library/cpp/actors/core/actor_coroutine.cpp
new file mode 100644
index 0000000000..0ab4d2b24d
--- /dev/null
+++ b/library/cpp/actors/core/actor_coroutine.cpp
@@ -0,0 +1,165 @@
+#include "actor_coroutine.h"
+#include "executor_thread.h"
+
+#include <util/system/sanitizers.h>
+#include <util/system/type_name.h>
+
+namespace NActors {
+    static constexpr size_t StackOverflowGap = 4096;
+    static char GoodStack[StackOverflowGap];
+
+    static struct TInitGoodStack {
+        TInitGoodStack() {
+            // fill stack with some pseudo-random pattern
+            for (size_t k = 0; k < StackOverflowGap; ++k) {
+                GoodStack[k] = k + k * 91;
+            }
+        }
+    } initGoodStack;
+
+    TActorCoroImpl::TActorCoroImpl(size_t stackSize, bool allowUnhandledPoisonPill, bool allowUnhandledDtor)
+        : Stack(stackSize)
+        , AllowUnhandledPoisonPill(allowUnhandledPoisonPill)
+        , AllowUnhandledDtor(allowUnhandledDtor)
+        , FiberClosure{this, TArrayRef(Stack.Begin(), Stack.End())}
+        , FiberContext(FiberClosure)
+    {
+#ifndef NDEBUG
+        char* p;
+#if STACK_GROW_DOWN
+        p = Stack.Begin();
+#else
+        p = Stack.End() - StackOverflowGap;
+#endif
+        memcpy(p, GoodStack, StackOverflowGap);
+#endif
+    }
+
+    TActorCoroImpl::~TActorCoroImpl() {
+        if (!Finished && !NSan::TSanIsOn()) { // only resume when we have bootstrapped and Run() was entered and not yet finished; in other case simply terminate
+            Y_VERIFY(!PendingEvent);
+            Resume();
+        }
+    }
+
+    bool TActorCoroImpl::Send(TAutoPtr<IEventHandle> ev) {
+        return GetActorContext().ExecutorThread.Send(ev);
+    }
+
+    THolder<IEventHandle> TActorCoroImpl::WaitForEvent(TInstant deadline) {
+        const ui64 cookie = ++WaitCookie;
+        if (deadline != TInstant::Max()) {
+            ActorContext->ExecutorThread.Schedule(deadline - Now(), new IEventHandle(SelfActorId, {}, new TEvCoroTimeout,
+                0, cookie));
+        }
+
+        // ensure we have no unprocessed event and return back to actor system to receive one
+        Y_VERIFY(!PendingEvent);
+        ReturnToActorSystem();
+
+        // obtain pending event and ensure we've got one
+        while (THolder<IEventHandle> event = std::exchange(PendingEvent, {})) {
+            if (event->GetTypeRewrite() != TEvents::TSystem::CoroTimeout) {
+                // special handling for poison pill -- we throw exception
+                if (event->GetTypeRewrite() == TEvents::TEvPoisonPill::EventType) {
+                    throw TPoisonPillException();
+                }
+
+                // otherwise just return received event
+                return event;
+            } else if (event->Cookie == cookie) {
+                return nullptr; // it is not a race -- we've got timeout exactly for our current wait
+            } else {
+                ReturnToActorSystem(); // drop this event and wait for the next one
+            }
+        }
+        Y_FAIL("no pending event");
+    }
+
+    const TActorContext& TActorCoroImpl::GetActorContext() const {
+        Y_VERIFY(ActorContext);
+        return *ActorContext;
+    }
+
+    bool TActorCoroImpl::ProcessEvent(THolder<IEventHandle> ev) {
+        Y_VERIFY(!PendingEvent);
+        if (!SelfActorId) { // process bootstrap message, extract actor ids
+            Y_VERIFY(ev->GetTypeRewrite() == TEvents::TSystem::Bootstrap);
+            SelfActorId = ev->Recipient;
+            ParentActorId = ev->Sender;
+        } else { // process further messages
+            PendingEvent = std::move(ev);
+        }
+
+        // prepare actor context for in-coroutine use
+        TActivationContext *ac = TlsActivationContext;
+        TlsActivationContext = nullptr;
+        TActorContext ctx(ac->Mailbox, ac->ExecutorThread, ac->EventStart, SelfActorId);
+        ActorContext = &ctx;
+
+        Resume();
+
+        // drop actor context
+        TlsActivationContext = ac;
+        ActorContext = nullptr;
+
+        return Finished;
+    }
+
+    void TActorCoroImpl::Resume() {
+        // save caller context for a later return
+        Y_VERIFY(!ActorSystemContext);
+        TExceptionSafeContext actorSystemContext;
+        ActorSystemContext = &actorSystemContext;
+
+        // go to actor coroutine
+        BeforeResume();
+        ActorSystemContext->SwitchTo(&FiberContext);
+
+        // check for stack overflow
+#ifndef NDEBUG
+        const char* p;
+#if STACK_GROW_DOWN
+        p = Stack.Begin();
+#else
+        p = Stack.End() - StackOverflowGap;
+#endif
+        Y_VERIFY_DEBUG(memcmp(p, GoodStack, StackOverflowGap) == 0);
+#endif
+    }
+
+    void TActorCoroImpl::DoRun() {
+        try {
+            if (ActorContext) { // ActorContext may be nullptr here if the destructor was invoked before bootstrapping
+                Y_VERIFY(!PendingEvent);
+                Run();
+            }
+        } catch (const TPoisonPillException& /*ex*/) {
+            if (!AllowUnhandledPoisonPill) {
+                Y_FAIL("unhandled TPoisonPillException");
+            }
+        } catch (const TDtorException& /*ex*/) {
+            if (!AllowUnhandledDtor) {
+                Y_FAIL("unhandled TDtorException");
+            }
+        } catch (const std::exception& ex) {
+            Y_FAIL("unhandled exception of type %s", TypeName(ex).data());
+        } catch (...) {
+            Y_FAIL("unhandled exception of type not derived from std::exception");
+        }
+        Finished = true;
+        ReturnToActorSystem();
+    }
+
+    void TActorCoroImpl::ReturnToActorSystem() {
+        TExceptionSafeContext* returnContext = std::exchange(ActorSystemContext, nullptr);
+        Y_VERIFY(returnContext);
+        FiberContext.SwitchTo(returnContext);
+        if (!PendingEvent) {
+            // we have returned from the actor system and it kindly asks us to terminate the coroutine as it is being
+            // stopped
+            throw TDtorException();
+        }
+    }
+
+}
diff --git a/library/cpp/actors/core/actor_coroutine.h b/library/cpp/actors/core/actor_coroutine.h
new file mode 100644
index 0000000000..6bcb768eaf
--- /dev/null
+++ b/library/cpp/actors/core/actor_coroutine.h
@@ -0,0 +1,174 @@
+#pragma once
+
+#include <util/system/context.h>
+#include <util/system/filemap.h>
+
+#include "actor_bootstrapped.h"
+#include "executor_thread.h"
+#include "event_local.h"
+
+namespace NActors {
+
+    class TActorCoro;
+
+    class TActorCoroImpl : public ITrampoLine {
+        TMappedAllocation Stack;
+        bool AllowUnhandledPoisonPill;
+        bool AllowUnhandledDtor;
+        TContClosure FiberClosure;
+        TExceptionSafeContext FiberContext;
+        TExceptionSafeContext* ActorSystemContext = nullptr;
+        THolder<IEventHandle> PendingEvent;
+        bool Finished = false;
+        ui64 WaitCookie = 0;
+        TActorContext *ActorContext = nullptr;
+
+    protected:
+        TActorIdentity SelfActorId = TActorIdentity(TActorId());
+        TActorId ParentActorId;
+
+    private:
+        template <typename TFirstEvent, typename... TOtherEvents>
+        struct TIsOneOf: public TIsOneOf<TOtherEvents...> {
+            bool operator()(IEventHandle& ev) const {
+                return ev.GetTypeRewrite() == TFirstEvent::EventType || TIsOneOf<TOtherEvents...>()(ev);
+            }
+        };
+
+        template <typename TSingleEvent>
+        struct TIsOneOf<TSingleEvent> {
+            bool operator()(IEventHandle& ev) const {
+                return ev.GetTypeRewrite() == TSingleEvent::EventType;
+            }
+        };
+
+        struct TEvCoroTimeout : TEventLocal<TEvCoroTimeout, TEvents::TSystem::CoroTimeout> {};
+
+    protected:
+        struct TPoisonPillException : yexception {};
+        struct TDtorException : yexception {};
+
+    public:
+        TActorCoroImpl(size_t stackSize, bool allowUnhandledPoisonPill = false, bool allowUnhandledDtor = false);
+        // specify stackSize explicitly for each actor; don't forget about overflow control gap
+
+        virtual ~TActorCoroImpl();
+
+        virtual void Run() = 0;
+
+        virtual void BeforeResume() {}
+
+        // Handle all events that are not expected in wait loops.
+        virtual void ProcessUnexpectedEvent(TAutoPtr<IEventHandle> ev) = 0;
+
+        // Release execution ownership and wait for some event to arrive. When PoisonPill event is received, then
+        // TPoisonPillException is thrown.
+        THolder<IEventHandle> WaitForEvent(TInstant deadline = TInstant::Max());
+
+        // Wait for specific event set by filter functor. Function returns first event that matches filter. On any other
+        // kind of event ProcessUnexpectedEvent() is called.
+        //
+        // Example: WaitForSpecificEvent([](IEventHandle& ev) { return ev.Cookie == 42; });
+        template <typename TFunc>
+        THolder<IEventHandle> WaitForSpecificEvent(TFunc&& filter, TInstant deadline = TInstant::Max()) {
+            for (;;) {
+                if (THolder<IEventHandle> event = WaitForEvent(deadline); !event) {
+                    return nullptr;
+                } else if (filter(*event)) {
+                    return event;
+                } else {
+                    ProcessUnexpectedEvent(event);
+                }
+            }
+        }
+
+        // Wait for specific event or set of events. Function returns first event that matches enlisted type. On any other
+        // kind of event ProcessUnexpectedEvent() is called.
+        //
+        // Example: WaitForSpecificEvent<TEvReadResult, TEvFinished>();
+        template <typename TFirstEvent, typename TSecondEvent, typename... TOtherEvents>
+        THolder<IEventHandle> WaitForSpecificEvent(TInstant deadline = TInstant::Max()) {
+            TIsOneOf<TFirstEvent, TSecondEvent, TOtherEvents...> filter;
+            return WaitForSpecificEvent(filter, deadline);
+        }
+
+        // Wait for single specific event.
+        template <typename TEventType>
+        THolder<typename TEventType::THandle> WaitForSpecificEvent(TInstant deadline = TInstant::Max()) {
+            auto filter = [](IEventHandle& ev) {
+                return ev.GetTypeRewrite() == TEventType::EventType;
+            };
+            THolder<IEventHandle> event = WaitForSpecificEvent(filter, deadline);
+            return THolder<typename TEventType::THandle>(static_cast<typename TEventType::THandle*>(event ? event.Release() : nullptr));
+        }
+
+    protected: // Actor System compatibility section
+        const TActorContext& GetActorContext() const;
+        TActorSystem *GetActorSystem() const { return GetActorContext().ExecutorThread.ActorSystem; }
+        TInstant Now() const { return GetActorContext().Now(); }
+
+        bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) {
+            return GetActorContext().Send(recipient, ev, flags, cookie, std::move(traceId));
+        }
+
+        template <typename TEvent>
+        bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) {
+            return GetActorContext().Send(recipient, ev.Release(), flags, cookie, std::move(traceId));
+        }
+
+        bool Send(TAutoPtr<IEventHandle> ev);
+
+        void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) {
+            return GetActorContext().Schedule(delta, ev, cookie);
+        }
+
+        void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) {
+            return GetActorContext().Schedule(deadline, ev, cookie);
+        }
+
+        void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) {
+            return GetActorContext().Schedule(deadline, ev, cookie);
+        }
+
+        TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) {
+            return GetActorContext().Register(actor, mailboxType, poolId);
+        }
+
+        TActorId RegisterWithSameMailbox(IActor* actor) {
+            return GetActorContext().RegisterWithSameMailbox(actor);
+        }
+
+    private:
+        friend class TActorCoro;
+        bool ProcessEvent(THolder<IEventHandle> ev);
+
+    private:
+        /* Resume() function goes to actor coroutine context and continues (or starts) to execute it until actor finishes
+         * his job or it is blocked on WaitForEvent. Then the function returns. */
+        void Resume();
+        void ReturnToActorSystem();
+        void DoRun() override final;
+    };
+
+    class TActorCoro : public IActor {
+        THolder<TActorCoroImpl> Impl;
+
+    public:
+        TActorCoro(THolder<TActorCoroImpl> impl, ui32 activityType = IActor::ACTORLIB_COMMON)
+            : IActor(static_cast<TReceiveFunc>(&TActorCoro::StateFunc), activityType)
+            , Impl(std::move(impl))
+        {}
+
+        TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parent) override {
+            return new IEventHandle(TEvents::TSystem::Bootstrap, 0, self, parent, {}, 0);
+        }
+
+    private:
+        STATEFN(StateFunc) {
+            if (Impl->ProcessEvent(ev)) {
+                PassAway();
+            }
+        }
+    };
+
+}
diff --git a/library/cpp/actors/core/actor_coroutine_ut.cpp b/library/cpp/actors/core/actor_coroutine_ut.cpp
new file mode 100644
index 0000000000..951512b877
--- /dev/null
+++ b/library/cpp/actors/core/actor_coroutine_ut.cpp
@@ -0,0 +1,141 @@
+#include "actor_coroutine.h"
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "scheduler_basic.h"
+#include "events.h"
+#include "event_local.h"
+#include "hfunc.h"
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/system/sanitizers.h>
+
+using namespace NActors;
+
+Y_UNIT_TEST_SUITE(ActorCoro) {
+    enum {
+        Begin = EventSpaceBegin(TEvents::ES_USERSPACE),
+        Request,
+        Response,
+        Enough
+    };
+
+    struct TEvRequest: public TEventLocal<TEvRequest, Request> {
+    };
+
+    struct TEvResponse: public TEventLocal<TEvResponse, Response> {
+    };
+
+    struct TEvEnough: public TEventLocal<TEvEnough, Enough> {
+    };
+
+    class TBasicResponderActor: public TActorBootstrapped<TBasicResponderActor> {
+        TDeque<TActorId> RespondTo;
+
+    public:
+        TBasicResponderActor() {
+        }
+
+        void Bootstrap(const TActorContext& /*ctx*/) {
+            Become(&TBasicResponderActor::StateFunc);
+        }
+
+        STFUNC(StateFunc) {
+            switch (ev->GetTypeRewrite()) {
+                HFunc(TEvRequest, Handle);
+                HFunc(TEvents::TEvWakeup, Handle);
+                HFunc(TEvents::TEvPoisonPill, Handle);
+            }
+        }
+
+        void Handle(TEvRequest::TPtr& ev, const TActorContext& ctx) {
+            RespondTo.push_back(ev->Sender);
+            ctx.Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup);
+        }
+
+        void Handle(TEvents::TEvWakeup::TPtr& /*ev*/, const TActorContext& ctx) {
+            ctx.Send(RespondTo.front(), new TEvResponse());
+            RespondTo.pop_front();
+        }
+
+        void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) {
+            Die(ctx);
+        }
+    };
+
+    class TCoroActor: public TActorCoroImpl {
+        TManualEvent& DoneEvent;
+        TAtomic& ItemsProcessed;
+        bool Finish;
+
+    public:
+        TCoroActor(TManualEvent& doneEvent, TAtomic& itemsProcessed)
+            : TActorCoroImpl(1 << 20)
+            , DoneEvent(doneEvent)
+            , ItemsProcessed(itemsProcessed)
+            , Finish(false)
+        {
+        }
+
+        void Run() override {
+            TActorId child = GetActorContext().Register(new TBasicResponderActor);
+            ui32 itemsProcessed = 0;
+            try {
+                while (!Finish) {
+                    GetActorContext().Send(child, new TEvRequest());
+                    THolder<IEventHandle> resp = WaitForSpecificEvent<TEvResponse>();
+                    UNIT_ASSERT_EQUAL(resp->GetTypeRewrite(), TEvResponse::EventType);
+                    ++itemsProcessed;
+                }
+            } catch (const TPoisonPillException& /*ex*/) {
+            }
+            GetActorContext().Send(child, new TEvents::TEvPoisonPill);
+
+            AtomicSet(ItemsProcessed, itemsProcessed);
+            DoneEvent.Signal();
+        }
+
+        void ProcessUnexpectedEvent(TAutoPtr<IEventHandle> event) override {
+            if (event->GetTypeRewrite() == Enough) {
+                Finish = true;
+            }
+        }
+    };
+
+    void Check(THolder<IEventBase> && message) {
+        THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>();
+        setup->NodeId = 0;
+        setup->ExecutorsCount = 1;
+        setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]);
+        for (ui32 i = 0; i < setup->ExecutorsCount; ++i) {
+            setup->Executors[i] = new TBasicExecutorPool(i, 5, 10, "basic");
+        }
+        setup->Scheduler = new TBasicSchedulerThread;
+
+        TActorSystem actorSystem(setup);
+
+        actorSystem.Start();
+
+        TManualEvent doneEvent;
+        TAtomic itemsProcessed = 0;
+        TActorId actor = actorSystem.Register(new TActorCoro(MakeHolder<TCoroActor>(doneEvent, itemsProcessed)));
+        NanoSleep(3UL * 1000 * 1000 * 1000);
+        actorSystem.Send(actor, message.Release());
+        doneEvent.WaitI();
+
+        UNIT_ASSERT(AtomicGet(itemsProcessed) >= 2);
+
+        actorSystem.Stop();
+    }
+
+    Y_UNIT_TEST(Basic) {
+        if (NSan::TSanIsOn()) {
+            // TODO https://st.yandex-team.ru/DEVTOOLS-3154
+            return;
+        }
+        Check(MakeHolder<TEvEnough>());
+    }
+
+    Y_UNIT_TEST(PoisonPill) {
+        Check(MakeHolder<TEvents::TEvPoisonPill>());
+    }
+}
diff --git a/library/cpp/actors/core/actor_ut.cpp b/library/cpp/actors/core/actor_ut.cpp
new file mode 100644
index 0000000000..e1b765ec72
--- /dev/null
+++ b/library/cpp/actors/core/actor_ut.cpp
@@ -0,0 +1,578 @@
+#include "actor.cpp"
+#include "events.h"
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "scheduler_basic.h"
+#include "actor_bootstrapped.h"
+
+#include <library/cpp/actors/util/threadparkpad.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/algorithm.h>
+#include <util/system/atomic.h>
+#include <util/system/rwlock.h>
+#include <util/system/hp_timer.h>
+
+using namespace NActors;
+
+struct TTestEndDecorator : TDecorator {
+    TThreadParkPad* Pad;
+    TAtomic* ActorsAlive;
+
+    TTestEndDecorator(THolder<IActor>&& actor, TThreadParkPad* pad, TAtomic* actorsAlive)
+        : TDecorator(std::move(actor))
+        , Pad(pad)
+        , ActorsAlive(actorsAlive)
+    {
+        AtomicIncrement(*ActorsAlive);
+    }
+
+    ~TTestEndDecorator() {
+        if (AtomicDecrement(*ActorsAlive) == 0) {
+            Pad->Unpark();
+        }
+    }
+};
+
+Y_UNIT_TEST_SUITE(ActorBenchmark) {
+    static constexpr bool DefaultNoRealtime = true;
+    static constexpr ui32 DefaultSpinThreshold = 1000000;
+    static constexpr ui32 TotalEventsAmount = 1000;
+
+    class TDummyActor : public TActor<TDummyActor> {
+    public:
+        TDummyActor() : TActor<TDummyActor>(&TDummyActor::StateFunc) {}
+        STFUNC(StateFunc) {
+            (void)ev;
+            (void)ctx;
+        }
+    };
+
+    enum ERole {
+        Leader,
+        Follower
+    };
+
+    class TSendReceiveActor : public TActorBootstrapped<TSendReceiveActor> {
+    public:
+        static constexpr auto ActorActivityType() {
+            return ACTORLIB_COMMON;
+        }
+
+        TSendReceiveActor(double* elapsedTime, TActorId receiver, bool allocation, ERole role, ui32 neighbours = 0)
+            : EventsCounter(TotalEventsAmount)
+            , ElapsedTime(elapsedTime)
+            , Receiver(receiver)
+            , AllocatesMemory(allocation)
+            , Role(role)
+            , MailboxNeighboursCount(neighbours)
+        {}
+
+        void Bootstrap(const TActorContext &ctx) {
+            if (!Receiver) {
+                this->Receiver = SelfId();
+            } else {
+                EventsCounter /= 2; // We want to measure CPU requirement for one-way send
+            }
+            Timer.Reset();
+            Become(&TThis::StateFunc);
+            for (ui32 i = 0; i < MailboxNeighboursCount; ++i) {
+                ctx.RegisterWithSameMailbox(new TDummyActor());
+            }
+            if (Role == Leader) {
+                Send(Receiver, new TEvents::TEvPing());
+            }
+        }
+
+        STATEFN(StateFunc) {
+            if (EventsCounter == 0 && ElapsedTime != nullptr) {
+                *ElapsedTime = Timer.Passed() / TotalEventsAmount;
+                PassAway();
+            }
+
+            if (AllocatesMemory) {
+                Send(ev->Sender, new TEvents::TEvPing());
+            } else {
+                std::swap(*const_cast<TActorId*>(&ev->Sender), *const_cast<TActorId*>(&ev->Recipient));
+                ev->DropRewrite();
+                TActivationContext::Send(ev.Release());
+            }
+            EventsCounter--;
+        }
+
+    private:
+        THPTimer Timer;
+        ui64 EventsCounter;
+        double* ElapsedTime;
+        TActorId Receiver;
+        bool AllocatesMemory;
+        ERole Role;
+        ui32 MailboxNeighboursCount;
+    };
+
+    void AddBasicPool(THolder<TActorSystemSetup>& setup, ui32 threads, bool activateEveryEvent) {
+        TBasicExecutorPoolConfig basic;
+        basic.PoolId = setup->GetExecutorsCount();
+        basic.PoolName = TStringBuilder() << "b" << basic.PoolId;
+        basic.Threads = threads;
+        basic.SpinThreshold = DefaultSpinThreshold;
+        basic.TimePerMailbox = TDuration::Hours(1);
+        if (activateEveryEvent) {
+            basic.EventsPerMailbox = 1;
+        } else {
+            basic.EventsPerMailbox = Max<ui32>();
+        }
+        setup->CpuManager.Basic.emplace_back(std::move(basic));
+    }
+
+    void AddUnitedPool(THolder<TActorSystemSetup>& setup, ui32 concurrency, bool activateEveryEvent) {
+        TUnitedExecutorPoolConfig united;
+        united.PoolId = setup->GetExecutorsCount();
+        united.PoolName = TStringBuilder() << "u" << united.PoolId;
+        united.Concurrency = concurrency;
+        united.TimePerMailbox = TDuration::Hours(1);
+        if (activateEveryEvent) {
+            united.EventsPerMailbox = 1;
+        } else {
+            united.EventsPerMailbox = Max<ui32>();
+        }
+        setup->CpuManager.United.emplace_back(std::move(united));
+    }
+
+    THolder<TActorSystemSetup> GetActorSystemSetup(ui32 unitedCpuCount, bool preemption) {
+        auto setup = MakeHolder<NActors::TActorSystemSetup>();
+        setup->NodeId = 1;
+        setup->CpuManager.UnitedWorkers.CpuCount = unitedCpuCount;
+        setup->CpuManager.UnitedWorkers.SpinThresholdUs = DefaultSpinThreshold;
+        setup->CpuManager.UnitedWorkers.NoRealtime = DefaultNoRealtime;
+        if (preemption) {
+            setup->CpuManager.UnitedWorkers.PoolLimitUs = 500;
+            setup->CpuManager.UnitedWorkers.EventLimitUs = 100;
+            setup->CpuManager.UnitedWorkers.LimitPrecisionUs = 100;
+        } else {
+            setup->CpuManager.UnitedWorkers.PoolLimitUs = 100'000'000'000;
+            setup->CpuManager.UnitedWorkers.EventLimitUs = 10'000'000'000;
+            setup->CpuManager.UnitedWorkers.LimitPrecisionUs = 10'000'000'000;
+        }
+        setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0));
+        return setup;
+    }
+
+    enum class EPoolType {
+        Basic,
+        United
+    };
+
+    THolder<TActorSystemSetup> InitActorSystemSetup(EPoolType poolType, ui32 poolsCount, ui32 threads, bool activateEveryEvent, bool preemption) {
+        if (poolType == EPoolType::Basic) {
+            THolder<TActorSystemSetup> setup = GetActorSystemSetup(0, false);
+            for (ui32 i = 0; i < poolsCount; ++i) {
+                AddBasicPool(setup, threads, activateEveryEvent);
+            }
+            return setup;
+        } else if (poolType == EPoolType::United) {
+            THolder<TActorSystemSetup> setup = GetActorSystemSetup(poolsCount * threads, preemption);
+            for (ui32 i = 0; i < poolsCount; ++i) {
+                AddUnitedPool(setup, threads, activateEveryEvent);
+            }
+            return setup;
+        }
+        Y_FAIL();
+    }
+
+    double BenchSendReceive(bool allocation, NActors::TMailboxType::EType mType, EPoolType poolType) {
+        THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, 1, false, false);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        TThreadParkPad pad;
+        TAtomic actorsAlive = 0;
+        double elapsedTime = 0;
+        THolder<IActor> endActor{
+            new TTestEndDecorator(THolder(
+                new TSendReceiveActor(&elapsedTime, {}, allocation, Leader)), &pad, &actorsAlive)};
+
+        actorSystem.Register(endActor.Release(), mType);
+
+        pad.Park();
+        actorSystem.Stop();
+
+        return 1e9 * elapsedTime;
+    }
+
+    double BenchSendActivateReceive(ui32 poolsCount, ui32 threads, bool allocation, EPoolType poolType) {
+        THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, poolsCount, threads, true, false);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        TThreadParkPad pad;
+        TAtomic actorsAlive = 0;
+        double elapsedTime = 0;
+        ui32 followerPoolId = 0;
+
+        ui32 leaderPoolId = poolsCount == 1 ? 0 : 1;
+        TActorId followerId = actorSystem.Register(
+            new TSendReceiveActor(nullptr, {}, allocation, Follower), TMailboxType::HTSwap, followerPoolId);
+        THolder<IActor> leader{
+            new TTestEndDecorator(THolder(
+                new TSendReceiveActor(&elapsedTime, followerId, allocation, Leader)), &pad, &actorsAlive)};
+        actorSystem.Register(leader.Release(), TMailboxType::HTSwap, leaderPoolId);
+
+        pad.Park();
+        actorSystem.Stop();
+
+        return 1e9 * elapsedTime;
+    }
+
+    double BenchSendActivateReceiveWithMailboxNeighbours(ui32 MailboxNeighbourActors, EPoolType poolType) {
+        THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, 1, false, false);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        TThreadParkPad pad;
+        TAtomic actorsAlive = 0;
+        double elapsedTime = 0;
+
+        TActorId followerId = actorSystem.Register(
+            new TSendReceiveActor(nullptr, {}, false, Follower, MailboxNeighbourActors), TMailboxType::HTSwap);
+        THolder<IActor> leader{
+            new TTestEndDecorator(THolder(
+                new TSendReceiveActor(&elapsedTime, followerId, false, Leader, MailboxNeighbourActors)), &pad, &actorsAlive)};
+        actorSystem.Register(leader.Release(), TMailboxType::HTSwap);
+
+        pad.Park();
+        actorSystem.Stop();
+
+        return 1e9 * elapsedTime;
+    }
+
+    double BenchContentedThreads(ui32 threads, ui32 actorsPairsCount, EPoolType poolType) {
+        THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, threads, true, false);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        TThreadParkPad pad;
+        TAtomic actorsAlive = 0;
+        THPTimer Timer;
+
+        TVector<double> dummy(actorsPairsCount);
+        Timer.Reset();
+        for (ui32 i = 0; i < actorsPairsCount; ++i) {
+            ui32 followerPoolId = 0;
+            ui32 leaderPoolId = 0;
+            TActorId followerId = actorSystem.Register(
+                new TSendReceiveActor(nullptr, {}, true, Follower), TMailboxType::HTSwap, followerPoolId);
+            THolder<IActor> leader{
+                new TTestEndDecorator(THolder(
+                    new TSendReceiveActor(&dummy[i], followerId, true, Leader)), &pad, &actorsAlive)};
+            actorSystem.Register(leader.Release(), TMailboxType::HTSwap, leaderPoolId);
+        }
+
+        pad.Park();
+        auto elapsedTime = Timer.Passed() / TotalEventsAmount;
+        actorSystem.Stop();
+
+        return 1e9 * elapsedTime;
+    }
+
+    auto Mean(const TVector<double>& data) {
+        return Accumulate(data.begin(), data.end(), 0.0) / data.size();
+    }
+
+    auto Deviation(const TVector<double>& data) {
+        auto mean = Mean(data);
+        double deviation = 0.0;
+        for (const auto& x : data) {
+            deviation += (x - mean) * (x - mean);
+        }
+        return std::sqrt(deviation / data.size());
+    }
+
+    struct TStats {
+        double Mean;
+        double Deviation;
+        TString ToString() {
+            return TStringBuilder() << Mean << " ± " << Deviation << " ns " << std::ceil(Deviation / Mean * 1000) / 10.0 << "%";
+        }
+    };
+
+    template <typename Func>
+    TStats CountStats(Func func, ui32 itersCount = 5) {
+        TVector<double> elapsedTimes;
+        for (ui32 i = 0; i < itersCount; ++i) {
+            auto elapsedTime = func();
+            elapsedTimes.push_back(elapsedTime);
+        }
+        return {Mean(elapsedTimes), Deviation(elapsedTimes)};
+    }
+
+    TVector<NActors::TMailboxType::EType> MailboxTypes = {
+        TMailboxType::Simple,
+        TMailboxType::Revolving,
+        TMailboxType::HTSwap,
+        TMailboxType::ReadAsFilled,
+        TMailboxType::TinyReadAsFilled
+    };
+
+    Y_UNIT_TEST(SendReceive1Pool1ThreadAlloc) {
+        for (const auto& mType : MailboxTypes) {
+            auto stats = CountStats([mType] {
+                return BenchSendReceive(true, mType, EPoolType::Basic);
+            });
+            Cerr << stats.ToString() << " " << mType << Endl;
+        }
+    }
+
+    Y_UNIT_TEST(SendReceive1Pool1ThreadAllocUnited) {
+        for (const auto& mType : MailboxTypes) {
+            auto stats = CountStats([mType] {
+                return BenchSendReceive(true, mType, EPoolType::United);
+            });
+            Cerr << stats.ToString() << " " << mType << Endl;
+        }
+    }
+
+    Y_UNIT_TEST(SendReceive1Pool1ThreadNoAlloc) {
+        for (const auto& mType : MailboxTypes) {
+            auto stats = CountStats([mType] {
+                return BenchSendReceive(false, mType, EPoolType::Basic);
+            });
+            Cerr << stats.ToString() << " " << mType << Endl;
+        }
+    }
+
+    Y_UNIT_TEST(SendReceive1Pool1ThreadNoAllocUnited) {
+        for (const auto& mType : MailboxTypes) {
+            auto stats = CountStats([mType] {
+                return BenchSendReceive(false, mType, EPoolType::United);
+            });
+            Cerr << stats.ToString() << " " << mType << Endl;
+        }
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool1ThreadAlloc) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(1, 1, true, EPoolType::Basic);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool1ThreadAllocUnited) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(1, 1, true, EPoolType::United);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool1ThreadNoAlloc) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(1, 1, false, EPoolType::Basic);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool1ThreadNoAllocUnited) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(1, 1, false, EPoolType::United);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsAlloc) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(1, 2, true, EPoolType::Basic);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsAllocUnited) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(1, 2, true, EPoolType::United);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsNoAlloc) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(1, 2, false, EPoolType::Basic);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsNoAllocUnited) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(1, 2, false, EPoolType::United);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive2Pool1ThreadAlloc) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(2, 1, true, EPoolType::Basic);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive2Pool1ThreadAllocUnited) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(2, 1, true, EPoolType::United);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive2Pool1ThreadNoAlloc) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(2, 1, false, EPoolType::Basic);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    Y_UNIT_TEST(SendActivateReceive2Pool1ThreadNoAllocUnited) {
+        auto stats = CountStats([] {
+            return BenchSendActivateReceive(2, 1, false, EPoolType::United);
+        });
+        Cerr << stats.ToString() << Endl;
+    }
+
+    void RunBenchContentedThreads(ui32 threads, EPoolType poolType) {
+        for (ui32 actorPairs = 1; actorPairs <= 2 * threads; actorPairs++) {
+            auto stats = CountStats([threads, actorPairs, poolType] {
+                return BenchContentedThreads(threads, actorPairs, poolType);
+            });
+            Cerr << stats.ToString() << " actorPairs: " << actorPairs << Endl;
+        }
+    }
+
+    Y_UNIT_TEST(SendActivateReceive1Pool1Threads)       { RunBenchContentedThreads(1, EPoolType::Basic);  }
+    Y_UNIT_TEST(SendActivateReceive1Pool1ThreadsUnited) { RunBenchContentedThreads(1, EPoolType::United); }
+    Y_UNIT_TEST(SendActivateReceive1Pool2Threads)       { RunBenchContentedThreads(2, EPoolType::Basic);  }
+    Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsUnited) { RunBenchContentedThreads(2, EPoolType::United); }
+    Y_UNIT_TEST(SendActivateReceive1Pool3Threads)       { RunBenchContentedThreads(3, EPoolType::Basic);  }
+    Y_UNIT_TEST(SendActivateReceive1Pool3ThreadsUnited) { RunBenchContentedThreads(3, EPoolType::United); }
+    Y_UNIT_TEST(SendActivateReceive1Pool4Threads)       { RunBenchContentedThreads(4, EPoolType::Basic);  }
+    Y_UNIT_TEST(SendActivateReceive1Pool4ThreadsUnited) { RunBenchContentedThreads(4, EPoolType::United); }
+    Y_UNIT_TEST(SendActivateReceive1Pool5Threads)       { RunBenchContentedThreads(5, EPoolType::Basic);  }
+    Y_UNIT_TEST(SendActivateReceive1Pool5ThreadsUnited) { RunBenchContentedThreads(5, EPoolType::United); }
+    Y_UNIT_TEST(SendActivateReceive1Pool6Threads)       { RunBenchContentedThreads(6, EPoolType::Basic);  }
+    Y_UNIT_TEST(SendActivateReceive1Pool6ThreadsUnited) { RunBenchContentedThreads(6, EPoolType::United); }
+    Y_UNIT_TEST(SendActivateReceive1Pool7Threads)       { RunBenchContentedThreads(7, EPoolType::Basic);  }
+    Y_UNIT_TEST(SendActivateReceive1Pool7ThreadsUnited) { RunBenchContentedThreads(7, EPoolType::United); }
+    Y_UNIT_TEST(SendActivateReceive1Pool8Threads)       { RunBenchContentedThreads(8, EPoolType::Basic);  }
+    Y_UNIT_TEST(SendActivateReceive1Pool8ThreadsUnited) { RunBenchContentedThreads(8, EPoolType::United); }
+
+    Y_UNIT_TEST(SendActivateReceiveWithMailboxNeighbours) {
+        TVector<ui32> NeighbourActors = {0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256};
+        for (const auto& neighbour : NeighbourActors) {
+            auto stats = CountStats([neighbour] {
+                return BenchSendActivateReceiveWithMailboxNeighbours(neighbour, EPoolType::Basic);
+            });
+            Cerr << stats.ToString() << " neighbourActors: " << neighbour << Endl;
+        }
+    }
+
+    Y_UNIT_TEST(SendActivateReceiveWithMailboxNeighboursUnited) {
+        TVector<ui32> NeighbourActors = {0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256};
+        for (const auto& neighbour : NeighbourActors) {
+            auto stats = CountStats([neighbour] {
+                return BenchSendActivateReceiveWithMailboxNeighbours(neighbour, EPoolType::United);
+            });
+            Cerr << stats.ToString() << " neighbourActors: " << neighbour << Endl;
+        }
+    }
+}
+
+Y_UNIT_TEST_SUITE(TestDecorator) {
+    struct TPingDecorator : TDecorator {
+        TAutoPtr<IEventHandle> SavedEvent = nullptr;
+        ui64* Counter;
+
+        TPingDecorator(THolder<IActor>&& actor, ui64* counter)
+            : TDecorator(std::move(actor))
+            , Counter(counter)
+        {
+        }
+
+        bool DoBeforeReceiving(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) override {
+            *Counter += 1;
+            if (ev->Type != TEvents::THelloWorld::Pong) {
+                TAutoPtr<IEventHandle> pingEv = new IEventHandle(SelfId(), SelfId(), new TEvents::TEvPing());
+                SavedEvent = ev;
+                Actor->Receive(pingEv, ctx);
+            } else {
+                Actor->Receive(SavedEvent, ctx);
+            }
+            return false;
+        }
+    };
+
+    struct TPongDecorator : TDecorator {
+        ui64* Counter;
+
+        TPongDecorator(THolder<IActor>&& actor, ui64* counter)
+            : TDecorator(std::move(actor))
+            , Counter(counter)
+        {
+        }
+
+        bool DoBeforeReceiving(TAutoPtr<IEventHandle>& ev, const TActorContext&) override {
+            *Counter += 1;
+            if (ev->Type == TEvents::THelloWorld::Ping) {
+                TAutoPtr<IEventHandle> pongEv = new IEventHandle(SelfId(), SelfId(), new TEvents::TEvPong());
+                Send(SelfId(), new TEvents::TEvPong());
+                return false;
+            }
+            return true;
+        }
+    };
+
+    struct TTestActor : TActorBootstrapped<TTestActor> {
+        static constexpr char ActorName[] = "TestActor";
+
+        void Bootstrap()
+        {
+            const auto& activityTypeIndex = GetActivityType();
+            Y_ENSURE(activityTypeIndex < GetActivityTypeCount());
+            Y_ENSURE(GetActivityTypeName(activityTypeIndex) == "TestActor");
+            PassAway();
+        }
+    };
+
+    Y_UNIT_TEST(Basic) {
+        THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>();
+        setup->NodeId = 0;
+        setup->ExecutorsCount = 1;
+        setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]);
+        for (ui32 i = 0; i < setup->ExecutorsCount; ++i) {
+            setup->Executors[i] = new TBasicExecutorPool(i, 1, 10, "basic");
+        }
+        setup->Scheduler = new TBasicSchedulerThread;
+
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        THolder<IActor> innerActor = MakeHolder<TTestActor>();
+        ui64 pongCounter = 0;
+        THolder<IActor> pongActor = MakeHolder<TPongDecorator>(std::move(innerActor), &pongCounter);
+        ui64 pingCounter = 0;
+        THolder<IActor> pingActor = MakeHolder<TPingDecorator>(std::move(pongActor), &pingCounter);
+
+        TThreadParkPad pad;
+        TAtomic actorsAlive = 0;
+
+        THolder<IActor> endActor = MakeHolder<TTestEndDecorator>(std::move(pingActor), &pad, &actorsAlive);
+        actorSystem.Register(endActor.Release(), TMailboxType::HTSwap);
+
+        pad.Park();
+        actorSystem.Stop();
+        UNIT_ASSERT(pongCounter == 2 && pingCounter == 2);
+    }
+
+    Y_UNIT_TEST(LocalProcessKey) {
+        static constexpr char ActorName[] = "TestActor";
+
+        UNIT_ASSERT((TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetName(IActor::INTERCONNECT_PROXY_TCP) == "INTERCONNECT_PROXY_TCP"));
+
+        UNIT_ASSERT((TLocalProcessKey<TActorActivityTag, ActorName>::GetName() == ActorName));
+        UNIT_ASSERT((TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetIndex(IActor::INTERCONNECT_PROXY_TCP) == IActor::INTERCONNECT_PROXY_TCP));
+    }
+}
diff --git a/library/cpp/actors/core/actorid.cpp b/library/cpp/actors/core/actorid.cpp
new file mode 100644
index 0000000000..ccda035eac
--- /dev/null
+++ b/library/cpp/actors/core/actorid.cpp
@@ -0,0 +1,34 @@
+#include "actorid.h"
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+
+namespace NActors {
+    void TActorId::Out(IOutputStream& o) const {
+        o << "[" << NodeId() << ":" << LocalId() << ":" << Hint() << "]";
+    }
+
+    TString TActorId::ToString() const {
+        TString x;
+        TStringOutput o(x);
+        Out(o);
+        return x;
+    }
+
+    bool TActorId::Parse(const char* buf, ui32 sz) {
+        if (sz < 4 || buf[0] != '[' || buf[sz - 1] != ']')
+            return false;
+
+        size_t semicolons[2];
+        TStringBuf str(buf, sz);
+        semicolons[0] = str.find(':', 1);
+        if (semicolons[0] == TStringBuf::npos)
+            return false;
+        semicolons[1] = str.find(':', semicolons[0] + 1);
+        if (semicolons[1] == TStringBuf::npos)
+            return false;
+
+        bool success = TryFromString(buf + 1, semicolons[0] - 1, Raw.N.NodeId) && TryFromString(buf + semicolons[0] + 1, semicolons[1] - semicolons[0] - 1, Raw.N.LocalId) && TryFromString(buf + semicolons[1] + 1, sz - semicolons[1] - 2, Raw.N.Hint);
+
+        return success;
+    }
+}
diff --git a/library/cpp/actors/core/actorid.h b/library/cpp/actors/core/actorid.h
new file mode 100644
index 0000000000..d972b1a0ff
--- /dev/null
+++ b/library/cpp/actors/core/actorid.h
@@ -0,0 +1,196 @@
+#pragma once
+
+#include "defs.h"
+#include <util/stream/output.h> // for IOutputStream
+#include <util/generic/hash.h>
+
+namespace NActors {
+    // used as global uniq address of actor
+    // also could be used to transport service id (12 byte strings placed in hint-localid)
+    // highest 1 bit of node - mark of service id
+    // next 11 bits of node-id - pool id
+    // next 20 bits - node id itself
+
+    struct TActorId {
+        static constexpr ui32 MaxServiceIDLength = 12;
+        static constexpr ui32 MaxPoolID = 0x000007FF;
+        static constexpr ui32 MaxNodeId = 0x000FFFFF;
+        static constexpr ui32 PoolIndexShift = 20;
+        static constexpr ui32 PoolIndexMask = MaxPoolID << PoolIndexShift;
+        static constexpr ui32 ServiceMask = 0x80000000;
+        static constexpr ui32 NodeIdMask = MaxNodeId;
+
+    private:
+        union {
+            struct {
+                ui64 LocalId;
+                ui32 Hint;
+                ui32 NodeId;
+            } N;
+
+            struct {
+                ui64 X1;
+                ui64 X2;
+            } X;
+
+            ui8 Buf[16];
+        } Raw;
+
+    public:
+        TActorId() noexcept {
+            Raw.X.X1 = 0;
+            Raw.X.X2 = 0;
+        }
+
+        explicit TActorId(ui32 nodeId, ui32 poolId, ui64 localId, ui32 hint) noexcept {
+            Y_VERIFY_DEBUG(poolId <= MaxPoolID);
+            Raw.N.LocalId = localId;
+            Raw.N.Hint = hint;
+            Raw.N.NodeId = nodeId | (poolId << PoolIndexShift);
+        }
+
+        explicit TActorId(ui32 nodeId, const TStringBuf& x) noexcept {
+            Y_VERIFY(x.size() <= MaxServiceIDLength, "service id is too long");
+            Raw.N.LocalId = 0;
+            Raw.N.Hint = 0;
+            Raw.N.NodeId = nodeId | ServiceMask;
+            memcpy(Raw.Buf, x.data(), x.size());
+        }
+
+        explicit TActorId(ui64 x1, ui64 x2) noexcept {
+            Raw.X.X1 = x1;
+            Raw.X.X2 = x2;
+        }
+
+        explicit operator bool() const noexcept {
+            return Raw.X.X1 != 0 || Raw.X.X2 != 0;
+        }
+
+        ui64 LocalId() const noexcept {
+            return Raw.N.LocalId;
+        }
+
+        ui32 Hint() const noexcept {
+            return Raw.N.Hint;
+        }
+
+        ui32 NodeId() const noexcept {
+            return Raw.N.NodeId & NodeIdMask;
+        }
+
+        bool IsService() const noexcept {
+            return (Raw.N.NodeId & ServiceMask);
+        }
+
+        TStringBuf ServiceId() const noexcept {
+            Y_VERIFY_DEBUG(IsService());
+            return TStringBuf((const char*)Raw.Buf, MaxServiceIDLength);
+        }
+
+        static ui32 PoolIndex(ui32 nodeid) noexcept {
+            return ((nodeid & PoolIndexMask) >> PoolIndexShift);
+        }
+
+        ui32 PoolID() const noexcept {
+            return PoolIndex(Raw.N.NodeId);
+        }
+
+        ui64 RawX1() const noexcept {
+            return Raw.X.X1;
+        }
+
+        ui64 RawX2() const noexcept {
+            return Raw.X.X2;
+        }
+
+        bool operator<(const TActorId& x) const noexcept {
+            const ui64 s1 = Raw.X.X1;
+            const ui64 s2 = Raw.X.X2;
+            const ui64 x1 = x.Raw.X.X1;
+            const ui64 x2 = x.Raw.X.X2;
+
+            return (s1 != x1) ? (s1 < x1) : (s2 < x2);
+        }
+
+        bool operator!=(const TActorId& x) const noexcept {
+            return Raw.X.X1 != x.Raw.X.X1 || Raw.X.X2 != x.Raw.X.X2;
+        }
+
+        bool operator==(const TActorId& x) const noexcept {
+            return !(x != *this);
+        }
+
+        ui64 Hash() const noexcept {
+            const ui32* x = (const ui32*)Raw.Buf;
+
+            const ui64 x1 = x[0] * 0x001DFF3D8DC48F5Dull;
+            const ui64 x2 = x[1] * 0x179CA10C9242235Dull;
+            const ui64 x3 = x[2] * 0x0F530CAD458B0FB1ull;
+            const ui64 x4 = x[3] * 0xB5026F5AA96619E9ull;
+
+            const ui64 z1 = x1 + x2;
+            const ui64 z2 = x3 + x4;
+
+            const ui64 sum = 0x5851F42D4C957F2D + z1 + z2;
+
+            return (sum >> 32) | (sum << 32);
+        }
+
+        ui32 Hash32() const noexcept {
+            const ui32* x = (const ui32*)Raw.Buf;
+
+            const ui64 x1 = x[0] * 0x001DFF3D8DC48F5Dull;
+            const ui64 x2 = x[1] * 0x179CA10C9242235Dull;
+            const ui64 x3 = x[2] * 0x0F530CAD458B0FB1ull;
+            const ui64 x4 = x[3] * 0xB5026F5AA96619E9ull;
+
+            const ui64 z1 = x1 + x2;
+            const ui64 z2 = x3 + x4;
+
+            const ui64 sum = 0x5851F42D4C957F2D + z1 + z2;
+
+            return sum >> 32;
+        }
+
+        struct THash {
+            ui64 operator()(const TActorId& actorId) const noexcept {
+                return actorId.Hash();
+            }
+        };
+
+        struct THash32 {
+            ui64 operator()(const TActorId& actorId) const noexcept {
+                return actorId.Hash();
+            }
+        };
+
+        struct TOrderedCmp {
+            bool operator()(const TActorId &left, const TActorId &right) const noexcept {
+                Y_VERIFY_DEBUG(!left.IsService() && !right.IsService(), "ordered compare works for plain actorids only");
+                const ui32 n1 = left.NodeId();
+                const ui32 n2 = right.NodeId();
+
+                return (n1 != n2) ? (n1 < n2) : left.LocalId() < right.LocalId();
+            }
+        };
+
+        TString ToString() const;
+        void Out(IOutputStream& o) const;
+        bool Parse(const char* buf, ui32 sz);
+    };
+
+    static_assert(sizeof(TActorId) == 16, "expect sizeof(TActorId) == 16");
+    static_assert(MaxPools < TActorId::MaxPoolID); // current implementation of united pool has limit MaxPools on pool id
+}
+
+template <>
+inline void Out<NActors::TActorId>(IOutputStream& o, const NActors::TActorId& x) {
+    return x.Out(o);
+}
+
+template <>
+struct THash<NActors::TActorId> {
+    inline ui64 operator()(const NActors::TActorId& x) const {
+        return x.Hash();
+    }
+};
diff --git a/library/cpp/actors/core/actorsystem.cpp b/library/cpp/actors/core/actorsystem.cpp
new file mode 100644
index 0000000000..c58698a206
--- /dev/null
+++ b/library/cpp/actors/core/actorsystem.cpp
@@ -0,0 +1,277 @@
+#include "defs.h"
+#include "actorsystem.h"
+#include "callstack.h"
+#include "cpu_manager.h"
+#include "mailbox.h"
+#include "events.h"
+#include "interconnect.h"
+#include "servicemap.h"
+#include "scheduler_queue.h"
+#include "scheduler_actor.h"
+#include "log.h"
+#include "probes.h"
+#include "ask.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <util/generic/hash.h>
+#include <util/system/rwlock.h>
+#include <util/random/random.h>
+
+namespace NActors {
+    LWTRACE_USING(ACTORLIB_PROVIDER);
+
+    struct TActorSystem::TServiceMap : TNonCopyable {
+        NActors::TServiceMap<TActorId, TActorId, TActorId::THash> LocalMap;
+        TTicketLock Lock;
+
+        TActorId RegisterLocalService(const TActorId& serviceId, const TActorId& actorId) {
+            TTicketLock::TGuard guard(&Lock);
+            const TActorId old = LocalMap.Update(serviceId, actorId);
+            return old;
+        }
+
+        TActorId LookupLocal(const TActorId& x) {
+            return LocalMap.Find(x);
+        }
+    };
+
+    TActorSystem::TActorSystem(THolder<TActorSystemSetup>& setup, void* appData,
+                               TIntrusivePtr<NLog::TSettings> loggerSettings)
+        : NodeId(setup->NodeId)
+        , CpuManager(new TCpuManager(setup))
+        , ExecutorPoolCount(CpuManager->GetExecutorsCount())
+        , Scheduler(setup->Scheduler)
+        , InterconnectCount((ui32)setup->Interconnect.ProxyActors.size())
+        , CurrentTimestamp(0)
+        , CurrentMonotonic(0)
+        , CurrentIDCounter(RandomNumber<ui64>())
+        , SystemSetup(setup.Release())
+        , DefSelfID(NodeId, "actorsystem")
+        , AppData0(appData)
+        , LoggerSettings0(loggerSettings)
+        , StartExecuted(false)
+        , StopExecuted(false)
+        , CleanupExecuted(false)
+    {
+        ServiceMap.Reset(new TServiceMap());
+    }
+
+    TActorSystem::~TActorSystem() {
+        Cleanup();
+    }
+
+    bool TActorSystem::Send(TAutoPtr<IEventHandle> ev) const {
+        if (Y_UNLIKELY(!ev))
+            return false;
+
+#ifdef USE_ACTOR_CALLSTACK
+        ev->Callstack.TraceIfEmpty();
+#endif
+
+        TActorId recipient = ev->GetRecipientRewrite();
+        const ui32 recpNodeId = recipient.NodeId();
+
+        if (recpNodeId != NodeId && recpNodeId != 0) {
+            // if recipient is not local one - rewrite with forward instruction
+            Y_VERIFY_DEBUG(!ev->HasEvent() || ev->GetBase()->IsSerializable());
+            Y_VERIFY(ev->Recipient == recipient,
+                "Event rewrite from %s to %s would be lost via interconnect",
+                ev->Recipient.ToString().c_str(),
+                recipient.ToString().c_str());
+            recipient = InterconnectProxy(recpNodeId);
+            ev->Rewrite(TEvInterconnect::EvForward, recipient);
+        }
+        if (recipient.IsService()) {
+            TActorId target = ServiceMap->LookupLocal(recipient);
+            if (!target && IsInterconnectProxyId(recipient) && ProxyWrapperFactory) {
+                const TActorId actorId = ProxyWrapperFactory(const_cast<TActorSystem*>(this),
+                    GetInterconnectProxyNode(recipient));
+                with_lock(ProxyCreationLock) {
+                    target = ServiceMap->LookupLocal(recipient);
+                    if (!target) {
+                        target = actorId;
+                        ServiceMap->RegisterLocalService(recipient, target);
+                    }
+                }
+                if (target != actorId) {
+                    // a race has occured, terminate newly created actor
+                    Send(new IEventHandle(TEvents::TSystem::Poison, 0, actorId, {}, nullptr, 0));
+                }
+            }
+            recipient = target;
+            ev->Rewrite(ev->GetTypeRewrite(), recipient);
+        }
+
+        Y_VERIFY_DEBUG(recipient == ev->GetRecipientRewrite());
+        const ui32 recpPool = recipient.PoolID();
+        if (recipient && recpPool < ExecutorPoolCount) {
+            if (CpuManager->GetExecutorPool(recpPool)->Send(ev)) {
+                return true;
+            }
+        }
+
+        Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown));
+        return false;
+    }
+
+    bool TActorSystem::Send(const TActorId& recipient, IEventBase* ev, ui32 flags) const {
+        return this->Send(new IEventHandle(recipient, DefSelfID, ev, flags));
+    }
+
+    void TActorSystem::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const {
+        Schedule(deadline - Timestamp(), ev, cookie);
+    }
+
+    void TActorSystem::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const {
+        const auto current = Monotonic();
+        if (deadline < current)
+            deadline = current;
+
+        TTicketLock::TGuard guard(&ScheduleLock);
+        ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie);
+    }
+
+    void TActorSystem::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const {
+        const auto deadline = Monotonic() + delta;
+
+        TTicketLock::TGuard guard(&ScheduleLock);
+        ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie);
+    }
+
+    TActorId TActorSystem::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 executorPool, ui64 revolvingCounter,
+                                    const TActorId& parentId) {
+        Y_VERIFY(executorPool < ExecutorPoolCount, "executorPool# %" PRIu32 ", ExecutorPoolCount# %" PRIu32,
+                 (ui32)executorPool, (ui32)ExecutorPoolCount);
+        return CpuManager->GetExecutorPool(executorPool)->Register(actor, mailboxType, revolvingCounter, parentId);
+    }
+
+    NThreading::TFuture<THolder<IEventBase>> TActorSystem::AskGeneric(TMaybe<ui32> expectedEventType,
+                                                                      TActorId recipient, THolder<IEventBase> event,
+                                                                      TDuration timeout) {
+        auto promise = NThreading::NewPromise<THolder<IEventBase>>();
+        Register(MakeAskActor(expectedEventType, recipient, std::move(event), timeout, promise).Release());
+        return promise.GetFuture();
+    }
+
+    ui64 TActorSystem::AllocateIDSpace(ui64 count) {
+        Y_VERIFY_DEBUG(count < Max<ui32>() / 65536);
+
+        static_assert(sizeof(TAtomic) == sizeof(ui64), "expect sizeof(TAtomic) == sizeof(ui64)");
+
+        // get high 32 bits as seconds from epoch
+        // it could wrap every century, but we don't expect any actor-reference to live this long so such wrap will do no harm
+        const ui64 timeFromEpoch = TInstant::MicroSeconds(RelaxedLoad(&CurrentTimestamp)).Seconds();
+
+        // get low 32 bits as counter value
+        ui32 lowPartEnd = (ui32)(AtomicAdd(CurrentIDCounter, count));
+        while (lowPartEnd < count) // if our request crosses 32bit boundary - retry
+            lowPartEnd = (ui32)(AtomicAdd(CurrentIDCounter, count));
+
+        const ui64 lowPart = lowPartEnd - count;
+        const ui64 ret = (timeFromEpoch << 32) | lowPart;
+
+        return ret;
+    }
+
+    TActorId TActorSystem::InterconnectProxy(ui32 destinationNode) const {
+        if (destinationNode < InterconnectCount)
+            return Interconnect[destinationNode];
+        else if (destinationNode != NodeId)
+            return MakeInterconnectProxyId(destinationNode);
+        else
+            return TActorId();
+    }
+
+    ui32 TActorSystem::BroadcastToProxies(const std::function<IEventHandle*(const TActorId&)>& eventFabric) {
+        // TODO: get rid of this method
+        for (ui32 i = 0; i < InterconnectCount; ++i) {
+            Send(eventFabric(Interconnect[i]));
+        }
+        return InterconnectCount;
+    }
+
+    TActorId TActorSystem::LookupLocalService(const TActorId& x) const {
+        return ServiceMap->LookupLocal(x);
+    }
+
+    TActorId TActorSystem::RegisterLocalService(const TActorId& serviceId, const TActorId& actorId) {
+        // TODO: notify old actor about demotion
+        return ServiceMap->RegisterLocalService(serviceId, actorId);
+    }
+
+    void TActorSystem::GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+        CpuManager->GetPoolStats(poolId, poolStats, statsCopy);
+    }
+
+    void TActorSystem::Start() {
+        Y_VERIFY(StartExecuted == false);
+        StartExecuted = true;
+
+        ScheduleQueue.Reset(new NSchedulerQueue::TQueueType());
+        TVector<NSchedulerQueue::TReader*> scheduleReaders;
+        scheduleReaders.push_back(&ScheduleQueue->Reader);
+        CpuManager->PrepareStart(scheduleReaders, this);
+        Scheduler->Prepare(this, &CurrentTimestamp, &CurrentMonotonic);
+        Scheduler->PrepareSchedules(&scheduleReaders.front(), (ui32)scheduleReaders.size());
+
+        // setup interconnect proxies
+        {
+            const TInterconnectSetup& setup = SystemSetup->Interconnect;
+            Interconnect.Reset(new TActorId[InterconnectCount + 1]);
+            for (ui32 i = 0, e = InterconnectCount; i != e; ++i) {
+                const TActorSetupCmd& x = setup.ProxyActors[i];
+                if (x.Actor) {
+                    Interconnect[i] = Register(x.Actor, x.MailboxType, x.PoolId, i);
+                    Y_VERIFY(!!Interconnect[i]);
+                }
+            }
+            ProxyWrapperFactory = std::move(SystemSetup->Interconnect.ProxyWrapperFactory);
+        }
+
+        // setup local services
+        {
+            for (ui32 i = 0, e = (ui32)SystemSetup->LocalServices.size(); i != e; ++i) {
+                const std::pair<TActorId, TActorSetupCmd>& x = SystemSetup->LocalServices[i];
+                const TActorId xid = Register(x.second.Actor, x.second.MailboxType, x.second.PoolId, i);
+                Y_VERIFY(!!xid);
+                if (!!x.first)
+                    RegisterLocalService(x.first, xid);
+            }
+        }
+
+        // ok, setup complete, we could destroy setup config
+        SystemSetup.Destroy();
+
+        Scheduler->PrepareStart();
+        CpuManager->Start();
+        Send(MakeSchedulerActorId(), new TEvSchedulerInitialize(scheduleReaders, &CurrentTimestamp, &CurrentMonotonic));
+        Scheduler->Start();
+    }
+
+    void TActorSystem::Stop() {
+        if (StopExecuted || !StartExecuted)
+            return;
+
+        StopExecuted = true;
+
+        for (auto&& fn : std::exchange(DeferredPreStop, {})) {
+            fn();
+        }
+
+        Scheduler->PrepareStop();
+        CpuManager->PrepareStop();
+        Scheduler->Stop();
+        CpuManager->Shutdown();
+    }
+
+    void TActorSystem::Cleanup() {
+        Stop();
+        if (CleanupExecuted || !StartExecuted)
+            return;
+        CleanupExecuted = true;
+        CpuManager->Cleanup();
+        Scheduler.Destroy();
+    }
+
+    ui32 TActorSystem::MemProfActivityBase;
+}
diff --git a/library/cpp/actors/core/actorsystem.h b/library/cpp/actors/core/actorsystem.h
new file mode 100644
index 0000000000..40499d7586
--- /dev/null
+++ b/library/cpp/actors/core/actorsystem.h
@@ -0,0 +1,367 @@
+#pragma once
+
+#include "defs.h"
+
+#include "actor.h"
+#include "balancer.h"
+#include "config.h"
+#include "event.h"
+#include "log_settings.h"
+#include "scheduler_cookie.h"
+#include "mon_stats.h"
+
+#include <library/cpp/threading/future/future.h>
+#include <library/cpp/actors/util/ticket_lock.h>
+
+#include <util/generic/vector.h>
+#include <util/datetime/base.h>
+#include <util/system/mutex.h>
+
+namespace NActors {
+    class TActorSystem;
+    class TCpuManager;
+    class IExecutorPool;
+    struct TWorkerContext;
+
+    inline TActorId MakeInterconnectProxyId(ui32 destNodeId) {
+        char data[12];
+        memcpy(data, "ICProxy@", 8);
+        memcpy(data + 8, &destNodeId, sizeof(ui32));
+        return TActorId(0, TStringBuf(data, 12));
+    }
+
+    inline bool IsInterconnectProxyId(const TActorId& actorId) {
+        return actorId.IsService() && !memcmp(actorId.ServiceId().data(), "ICProxy@", 8);
+    }
+
+    inline ui32 GetInterconnectProxyNode(const TActorId& actorId) {
+        ui32 nodeId;
+        memcpy(&nodeId, actorId.ServiceId().data() + 8, sizeof(ui32));
+        return nodeId;
+    }
+
+    namespace NSchedulerQueue {
+        class TReader;
+        struct TQueueType;
+    }
+
+    class IExecutorPool : TNonCopyable {
+    public:
+        const ui32 PoolId;
+
+        TAtomic ActorRegistrations;
+        TAtomic DestroyedActors;
+
+        IExecutorPool(ui32 poolId)
+            : PoolId(poolId)
+            , ActorRegistrations(0)
+            , DestroyedActors(0)
+        {
+        }
+
+        virtual ~IExecutorPool() {
+        }
+
+        // for workers
+        virtual ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) = 0;
+        virtual void ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingCounter) = 0;
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the wallclock time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         * @param workerId   index of thread which will perform event dispatching
+         */
+        virtual void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0;
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the monotonic time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         * @param workerId   index of thread which will perform event dispatching
+         */
+        virtual void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0;
+
+        /**
+         * Schedule one-shot event that will be send after given delay.
+         *
+         * @param delta      the time from now to delay event sending
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         * @param workerId   index of thread which will perform event dispatching
+         */
+        virtual void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0;
+
+        // for actorsystem
+        virtual bool Send(TAutoPtr<IEventHandle>& ev) = 0;
+        virtual void ScheduleActivation(ui32 activation) = 0;
+        virtual void ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) = 0;
+        virtual TActorId Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingCounter, const TActorId& parentId) = 0;
+        virtual TActorId Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) = 0;
+
+        // lifecycle stuff
+        virtual void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) = 0;
+        virtual void Start() = 0;
+        virtual void PrepareStop() = 0;
+        virtual void Shutdown() = 0;
+        virtual bool Cleanup() = 0;
+
+        virtual void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+            // TODO: make pure virtual and override everywhere
+            Y_UNUSED(poolStats);
+            Y_UNUSED(statsCopy);
+        }
+
+        virtual TString GetName() const {
+            return TString();
+        }
+
+        virtual ui32 GetThreads() const {
+            return 1;
+        }
+
+        // generic
+        virtual TAffinity* Affinity() const = 0;
+
+        virtual void SetRealTimeMode() const {}
+    };
+
+    // could be proxy to in-pool schedulers (for NUMA-aware executors)
+    class ISchedulerThread : TNonCopyable {
+    public:
+        virtual ~ISchedulerThread() {
+        }
+
+        virtual void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) = 0;
+        virtual void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) = 0;
+        virtual void PrepareStart() { /* empty */ }
+        virtual void Start() = 0;
+        virtual void PrepareStop() = 0;
+        virtual void Stop() = 0;
+    };
+
+    struct TActorSetupCmd {
+        TMailboxType::EType MailboxType;
+        ui32 PoolId;
+        IActor* Actor;
+
+        TActorSetupCmd()
+            : MailboxType(TMailboxType::HTSwap)
+            , PoolId(0)
+            , Actor(nullptr)
+        {
+        }
+
+        TActorSetupCmd(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId)
+            : MailboxType(mailboxType)
+            , PoolId(poolId)
+            , Actor(actor)
+        {
+        }
+
+        void Set(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) {
+            MailboxType = mailboxType;
+            PoolId = poolId;
+            Actor = actor;
+        }
+    };
+
+    using TProxyWrapperFactory = std::function<TActorId(TActorSystem*, ui32)>;
+
+    struct TInterconnectSetup {
+        TVector<TActorSetupCmd> ProxyActors;
+        TProxyWrapperFactory ProxyWrapperFactory;
+    };
+
+    struct TActorSystemSetup {
+        ui32 NodeId = 0;
+
+        // Either Executors or CpuManager must be initialized
+        ui32 ExecutorsCount = 0;
+        TArrayHolder<TAutoPtr<IExecutorPool>> Executors;
+
+        TAutoPtr<IBalancer> Balancer; // main implementation will be implicitly created if not set
+
+        TCpuManagerConfig CpuManager;
+
+        TAutoPtr<ISchedulerThread> Scheduler;
+        ui32 MaxActivityType = 5; // for default entries
+
+        TInterconnectSetup Interconnect;
+
+        using TLocalServices = TVector<std::pair<TActorId, TActorSetupCmd>>;
+        TLocalServices LocalServices;
+
+        ui32 GetExecutorsCount() const {
+            return Executors ? ExecutorsCount : CpuManager.GetExecutorsCount();
+        }
+
+        TString GetPoolName(ui32 poolId) const {
+            return Executors ? Executors[poolId]->GetName() : CpuManager.GetPoolName(poolId);
+        }
+
+        ui32 GetThreads(ui32 poolId) const {
+            return Executors ? Executors[poolId]->GetThreads() : CpuManager.GetThreads(poolId);
+        }
+    };
+
+    class TActorSystem : TNonCopyable {
+        struct TServiceMap;
+
+    public:
+        const ui32 NodeId;
+
+    private:
+        THolder<TCpuManager> CpuManager;
+        const ui32 ExecutorPoolCount;
+
+        TAutoPtr<ISchedulerThread> Scheduler;
+        THolder<TServiceMap> ServiceMap;
+
+        const ui32 InterconnectCount;
+        TArrayHolder<TActorId> Interconnect;
+
+        volatile ui64 CurrentTimestamp;
+        volatile ui64 CurrentMonotonic;
+        volatile ui64 CurrentIDCounter;
+
+        THolder<NSchedulerQueue::TQueueType> ScheduleQueue;
+        mutable TTicketLock ScheduleLock;
+
+        friend class TExecutorThread;
+
+        THolder<TActorSystemSetup> SystemSetup;
+        TActorId DefSelfID;
+        void* AppData0;
+        TIntrusivePtr<NLog::TSettings> LoggerSettings0;
+        TProxyWrapperFactory ProxyWrapperFactory;
+        TMutex ProxyCreationLock;
+
+        bool StartExecuted;
+        bool StopExecuted;
+        bool CleanupExecuted;
+
+        std::deque<std::function<void()>> DeferredPreStop;
+    public:
+        TActorSystem(THolder<TActorSystemSetup>& setup, void* appData = nullptr,
+                     TIntrusivePtr<NLog::TSettings> loggerSettings = TIntrusivePtr<NLog::TSettings>(nullptr));
+        ~TActorSystem();
+
+        void Start();
+        void Stop();
+        void Cleanup();
+
+        TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 executorPool = 0,
+                          ui64 revolvingCounter = 0, const TActorId& parentId = TActorId());
+
+        bool Send(TAutoPtr<IEventHandle> ev) const;
+        bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0) const;
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the wallclock time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const;
+
+        /**
+         * Schedule one-shot event that will be send at given time point in the future.
+         *
+         * @param deadline   the monotonic time point in future when event must be send
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const;
+
+        /**
+         * Schedule one-shot event that will be send after given delay.
+         *
+         * @param delta      the time from now to delay event sending
+         * @param ev         the event to send
+         * @param cookie     cookie that will be piggybacked with event
+         */
+        void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const;
+
+        /**
+         * A way to interact with actors from non-actor context.
+         *
+         * This method will send the `event` to the `recipient` and then will wait for a response. When response arrives,
+         * it will be passed to the future. If response is not of type `T`, the future will resolve into an exception.
+         *
+         * @tparam T            expected response type. Must be derived from `TEventBase`,
+         *                      or use `IEventBase` to catch any response.
+         * @param actorSystem   actor system that will be used to register an actor that'll wait for response.
+         * @param recipient     who will get a request.
+         * @param event         a request message.
+         * @return              future that will be resolved when a message from `recipient` arrives.
+         */
+        template <typename T>
+        [[nodiscard]]
+        NThreading::TFuture<THolder<T>> Ask(TActorId recipient, THolder<IEventBase> event, TDuration timeout = TDuration::Max()) {
+            if constexpr (std::is_same_v<T, IEventBase>) {
+                return AskGeneric(Nothing(), recipient, std::move(event), timeout);
+            } else {
+                return AskGeneric(T::EventType, recipient, std::move(event), timeout)
+                    .Apply([](const NThreading::TFuture<THolder<IEventBase>>& ev) {
+                        return THolder<T>(static_cast<T*>(const_cast<THolder<IEventBase>&>(ev.GetValueSync()).Release()));  // =(
+                    });
+            }
+        }
+
+        [[nodiscard]]
+        NThreading::TFuture<THolder<IEventBase>> AskGeneric(
+            TMaybe<ui32> expectedEventType,
+            TActorId recipient,
+            THolder<IEventBase> event,
+            TDuration timeout);
+
+        ui64 AllocateIDSpace(ui64 count);
+
+        TActorId InterconnectProxy(ui32 destinationNode) const;
+        ui32 BroadcastToProxies(const std::function<IEventHandle*(const TActorId&)>&);
+
+        void UpdateLinkStatus(ui8 status, ui32 destinationNode);
+        ui8 LinkStatus(ui32 destinationNode);
+
+        TActorId LookupLocalService(const TActorId& x) const;
+        TActorId RegisterLocalService(const TActorId& serviceId, const TActorId& actorId);
+
+        ui32 GetMaxActivityType() const {
+            return SystemSetup ? SystemSetup->MaxActivityType : 1;
+        }
+
+        TInstant Timestamp() const {
+            return TInstant::MicroSeconds(RelaxedLoad(&CurrentTimestamp));
+        }
+
+        TMonotonic Monotonic() const {
+            return TMonotonic::MicroSeconds(RelaxedLoad(&CurrentMonotonic));
+        }
+
+        template <typename T>
+        T* AppData() const {
+            return (T*)AppData0;
+        }
+
+        NLog::TSettings* LoggerSettings() const {
+            return LoggerSettings0.Get();
+        }
+
+        void GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const;
+
+        void DeferPreStop(std::function<void()> fn) {
+            DeferredPreStop.push_back(std::move(fn));
+        }
+
+        /* This is the base for memory profiling tags.
+       System sets memory profiling tag for debug version of lfalloc.
+       The tag is set as "base_tag + actor_activity_type". */
+        static ui32 MemProfActivityBase;
+    };
+}
diff --git a/library/cpp/actors/core/actorsystem_ut.cpp b/library/cpp/actors/core/actorsystem_ut.cpp
new file mode 100644
index 0000000000..231d6f0ca1
--- /dev/null
+++ b/library/cpp/actors/core/actorsystem_ut.cpp
@@ -0,0 +1,45 @@
+#include "actorsystem.h"
+
+#include <library/cpp/actors/testlib/test_runtime.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NActors;
+
+Y_UNIT_TEST_SUITE(TActorSystemTest) {
+
+    class TTestActor: public TActor<TTestActor> {
+    public:
+        TTestActor()
+            : TActor{&TThis::Main}
+        {
+        }
+
+        STATEFN(Main) {
+            Y_UNUSED(ev);
+        }
+    };
+
+    THolder<TTestActorRuntimeBase> CreateRuntime() {
+        auto runtime = MakeHolder<TTestActorRuntimeBase>();
+        runtime->SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; });
+        runtime->Initialize();
+        return runtime;
+    }
+
+    Y_UNIT_TEST(LocalService) {
+        THolder<TTestActorRuntimeBase> runtime = CreateRuntime();
+        auto actorA = runtime->Register(new TTestActor);
+        auto actorB = runtime->Register(new TTestActor);
+
+        TActorId myServiceId{0, TStringBuf{"my-service"}};
+
+        auto prevActorId = runtime->RegisterService(myServiceId, actorA);
+        UNIT_ASSERT(!prevActorId);
+        UNIT_ASSERT_EQUAL(runtime->GetLocalServiceId(myServiceId), actorA);
+
+        prevActorId = runtime->RegisterService(myServiceId, actorB);
+        UNIT_ASSERT(prevActorId);
+        UNIT_ASSERT_EQUAL(prevActorId, actorA);
+        UNIT_ASSERT_EQUAL(runtime->GetLocalServiceId(myServiceId), actorB);
+    }
+}
diff --git a/library/cpp/actors/core/ask.cpp b/library/cpp/actors/core/ask.cpp
new file mode 100644
index 0000000000..0054c9a906
--- /dev/null
+++ b/library/cpp/actors/core/ask.cpp
@@ -0,0 +1,74 @@
+#include "ask.h"
+
+#include "actor_bootstrapped.h"
+#include "actorid.h"
+#include "event.h"
+#include "hfunc.h"
+
+namespace NActors {
+    namespace {
+        class TAskActor: public TActorBootstrapped<TAskActor> {
+            enum {
+                Timeout = EventSpaceBegin(TEvents::ES_PRIVATE),
+            };
+
+            // We can't use the standard timeout event because recipient may send us one.
+            struct TTimeout: public TEventLocal<TTimeout, Timeout> {
+            };
+
+        public:
+            TAskActor(
+                    TMaybe<ui32> expectedEventType,
+                    TActorId recipient,
+                    THolder<IEventBase> event,
+                    TDuration timeout,
+                    const NThreading::TPromise<THolder<IEventBase>>& promise)
+                : ExpectedEventType_(expectedEventType)
+                , Recipient_(recipient)
+                , Event_(std::move(event))
+                , Timeout_(timeout)
+                , Promise_(promise)
+            {
+            }
+
+        public:
+            void Bootstrap() {
+                Send(Recipient_, std::move(Event_));
+                Become(&TAskActor::Waiting);
+
+                if (Timeout_ != TDuration::Max()) {
+                    Schedule(Timeout_, new TTimeout);
+                }
+            }
+
+            STATEFN(Waiting) {
+                if (ev->GetTypeRewrite() == TTimeout::EventType) {
+                    Promise_.SetException(std::make_exception_ptr(yexception() << "ask timeout"));
+                } else if (!ExpectedEventType_ || ev->GetTypeRewrite() == ExpectedEventType_) {
+                    Promise_.SetValue(ev->ReleaseBase());
+                } else {
+                    Promise_.SetException(std::make_exception_ptr(yexception() << "received unexpected response " << ev->GetBase()->ToString()));
+                }
+
+                PassAway();
+            }
+
+        public:
+            TMaybe<ui32> ExpectedEventType_;
+            TActorId Recipient_;
+            THolder<IEventBase> Event_;
+            TDuration Timeout_;
+            NThreading::TPromise<THolder<IEventBase>> Promise_;
+        };
+    }
+
+    THolder<IActor> MakeAskActor(
+            TMaybe<ui32> expectedEventType,
+            TActorId recipient,
+            THolder<IEventBase> event,
+            TDuration timeout,
+            const NThreading::TPromise<THolder<IEventBase>>& promise)
+    {
+        return MakeHolder<TAskActor>(expectedEventType, std::move(recipient), std::move(event), timeout, promise);
+    }
+}
diff --git a/library/cpp/actors/core/ask.h b/library/cpp/actors/core/ask.h
new file mode 100644
index 0000000000..036f1833a4
--- /dev/null
+++ b/library/cpp/actors/core/ask.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "actor.h"
+#include "event.h"
+
+#include <library/cpp/threading/future/future.h>
+
+namespace NActors {
+    /**
+     * See `TActorSystem::Ask`.
+     */
+    THolder<IActor> MakeAskActor(
+        TMaybe<ui32> expectedEventType,
+        TActorId recipient,
+        THolder<IEventBase> event,
+        TDuration timeout,
+        const NThreading::TPromise<THolder<IEventBase>>& promise);
+}
diff --git a/library/cpp/actors/core/ask_ut.cpp b/library/cpp/actors/core/ask_ut.cpp
new file mode 100644
index 0000000000..e72ebdba9b
--- /dev/null
+++ b/library/cpp/actors/core/ask_ut.cpp
@@ -0,0 +1,131 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include "actorsystem.h"
+
+#include <library/cpp/actors/testlib/test_runtime.h>
+
+using namespace NActors;
+
+class TPingPong: public TActor<TPingPong> {
+public:
+    TPingPong()
+        : TActor(&TPingPong::Main)
+    {
+    }
+
+    STATEFN(Main) {
+        switch (ev->GetTypeRewrite()) {
+            hFunc(TEvents::TEvPing, OnPing);
+            hFunc(TEvents::TEvBlob, OnBlob);
+        }
+    }
+
+    void OnPing(const TEvents::TEvPing::TPtr& ev) {
+        Send(ev->Sender, new TEvents::TEvPong);
+    }
+
+    void OnBlob(const TEvents::TEvBlob::TPtr& ev) {
+        Send(ev->Sender, ev->Release().Release());
+    }
+};
+
+class TPing: public TActor<TPing> {
+public:
+    TPing()
+        : TActor(&TPing::Main)
+    {
+    }
+
+    STATEFN(Main) {
+        Y_UNUSED(ev);
+    }
+};
+
+THolder<TTestActorRuntimeBase> CreateRuntime() {
+    auto runtime = MakeHolder<TTestActorRuntimeBase>();
+    runtime->SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; });
+    runtime->Initialize();
+    return runtime;
+}
+
+Y_UNIT_TEST_SUITE(AskActor) {
+    Y_UNIT_TEST(Ok) {
+        auto runtime = CreateRuntime();
+        auto pingpong = runtime->Register(new TPingPong);
+
+        {
+            auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvPong>(
+                pingpong,
+                THolder(new TEvents::TEvPing));
+            runtime->DispatchEvents();
+            fut.ExtractValueSync();
+        }
+
+        {
+            auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvBlob>(
+                pingpong,
+                THolder(new TEvents::TEvBlob("hello!")));
+            runtime->DispatchEvents();
+            auto ev = fut.ExtractValueSync();
+            UNIT_ASSERT_VALUES_EQUAL(ev->Blob, "hello!");
+        }
+
+        {
+            auto fut = runtime->GetAnyNodeActorSystem()->Ask<IEventBase>(
+                pingpong,
+                THolder(new TEvents::TEvPing));
+            runtime->DispatchEvents();
+            auto ev = fut.ExtractValueSync();
+            UNIT_ASSERT_VALUES_EQUAL(ev->Type(), TEvents::TEvPong::EventType);
+        }
+    }
+
+    Y_UNIT_TEST(Err) {
+        auto runtime = CreateRuntime();
+        auto pingpong = runtime->Register(new TPingPong);
+
+        {
+            auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvBlob>(
+                pingpong,
+                THolder(new TEvents::TEvPing));
+            runtime->DispatchEvents();
+            UNIT_ASSERT_EXCEPTION_CONTAINS(
+                fut.ExtractValueSync(),
+                yexception,
+                "received unexpected response HelloWorld: Pong");
+        }
+    }
+
+    Y_UNIT_TEST(Timeout) {
+        auto runtime = CreateRuntime();
+        auto ping = runtime->Register(new TPing);
+
+        {
+            auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvPong>(
+                ping,
+                THolder(new TEvents::TEvPing),
+                TDuration::Seconds(1));
+            auto start = runtime->GetCurrentTime();
+            runtime->DispatchEvents({}, TDuration::Seconds(5));
+            UNIT_ASSERT_EXCEPTION_CONTAINS(
+                fut.ExtractValueSync(),
+                yexception,
+                "ask timeout");
+            UNIT_ASSERT_VALUES_EQUAL(runtime->GetCurrentTime() - start, TDuration::Seconds(1));
+        }
+
+        {
+            auto fut = runtime->GetAnyNodeActorSystem()->Ask<IEventBase>(
+                ping,
+                THolder(new TEvents::TEvPing),
+                TDuration::Seconds(1));
+            auto start = runtime->GetCurrentTime();
+            runtime->DispatchEvents({}, TDuration::Seconds(5));
+            UNIT_ASSERT_EXCEPTION_CONTAINS(
+                fut.ExtractValueSync(),
+                yexception,
+                "ask timeout");
+            UNIT_ASSERT_VALUES_EQUAL(runtime->GetCurrentTime() - start, TDuration::Seconds(1));
+        }
+    }
+}
diff --git a/library/cpp/actors/core/balancer.cpp b/library/cpp/actors/core/balancer.cpp
new file mode 100644
index 0000000000..cc5417b0b5
--- /dev/null
+++ b/library/cpp/actors/core/balancer.cpp
@@ -0,0 +1,293 @@
+#include "balancer.h"
+
+#include "probes.h"
+
+#include <library/cpp/actors/util/intrinsics.h>
+#include <library/cpp/actors/util/datetime.h>
+
+#include <util/system/spinlock.h>
+
+#include <algorithm>
+
+namespace NActors {
+    LWTRACE_USING(ACTORLIB_PROVIDER);
+
+    // Describes balancing-related state of pool, the most notable is `Importance` to add new cpu
+    struct TLevel {
+        // Balancer will try to give more cpu to overloaded pools
+        enum ELoadClass {
+            Underloaded = 0,
+            Moderate = 1,
+            Overloaded = 2,
+        };
+
+        double ScaleFactor;
+        ELoadClass LoadClass;
+        ui64 Importance; // pool with lower importance is allowed to pass cpu to pool with higher, but the opposite is forbidden
+
+        TLevel() {}
+
+        TLevel(const TBalancingConfig& cfg, TPoolId poolId, ui64 currentCpus, double cpuIdle) {
+            ScaleFactor = double(currentCpus) / cfg.Cpus;
+            if (cpuIdle > 1.3) { // TODO: add a better underload criterion, based on estimated latency w/o 1 cpu
+                LoadClass = Underloaded;
+            } else if (cpuIdle < 0.2) { // TODO: add a better overload criterion, based on latency
+                LoadClass = Overloaded;
+            } else {
+                LoadClass = Moderate;
+            }
+            Importance = MakeImportance(LoadClass, cfg.Priority, ScaleFactor, cpuIdle, poolId);
+        }
+
+    private:
+        // Importance is simple ui64 value (from highest to lowest):
+        //   2 Bits: LoadClass
+        //   8 Bits: Priority
+        //  10 Bits: -ScaleFactor (for max-min fairness with weights equal to TBalancingConfig::Cpus)
+        //  10 Bits: -CpuIdle
+        //   6 Bits: PoolId
+        static ui64 MakeImportance(ELoadClass load, ui8 priority, double scaleFactor, double cpuIdle, TPoolId poolId) {
+            ui64 idle = std::clamp<i64>(1024 - cpuIdle * 512, 0, 1023);
+            ui64 scale = std::clamp<i64>(1024 - scaleFactor * 32, 0, 1023);
+
+            Y_VERIFY(ui64(load)     < (1ull << 2ull));
+            Y_VERIFY(ui64(priority) < (1ull << 8ull));
+            Y_VERIFY(ui64(scale)    < (1ull << 10ull));
+            Y_VERIFY(ui64(idle)     < (1ull << 10ull));
+            Y_VERIFY(ui64(poolId)   < (1ull << 6ull));
+
+            static_assert(ui64(MaxPools) <= (1ull << 6ull));
+
+            ui64 importance =
+                (ui64(load)     << ui64(6 + 10 + 10 + 8)) |
+                (ui64(priority) << ui64(6 + 10 + 10)) |
+                (ui64(scale)    << ui64(6 + 10)) |
+                (ui64(idle)     << ui64(6)) |
+                ui64(poolId);
+            return importance;
+        }
+    };
+
+    // Main balancer implemenation
+    class TBalancer: public IBalancer {
+    private:
+        struct TCpu;
+        struct TPool;
+
+        bool Disabled = true;
+        TSpinLock Lock;
+        ui64 NextBalanceTs;
+        TVector<TCpu> Cpus; // Indexed by CpuId, can have gaps
+        TVector<TPool> Pools; // Indexed by PoolId, can have gaps
+        TBalancerConfig Config;
+
+    public:
+        // Setup
+        TBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts);
+        bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) override;
+        ~TBalancer();
+
+        // Balancing
+        bool TryLock(ui64 ts) override;
+        void SetPoolStats(TPoolId pool, const TBalancerStats& stats) override;
+        void Balance() override;
+        void Unlock() override;
+
+    private:
+        void MoveCpu(TPool& from, TPool& to);
+    };
+
+    struct TBalancer::TPool {
+        TBalancingConfig Config;
+        TPoolId PoolId;
+        TString PoolName;
+
+        // Input data for balancing
+        TBalancerStats Prev;
+        TBalancerStats Next;
+
+        // Derived stats
+        double CpuLoad;
+        double CpuIdle;
+
+        // Classification
+        // NOTE: We want to avoid passing cpu back and forth, so we must consider not only current level,
+        // NOTE: but expected levels after movements also
+        TLevel CurLevel; // Level with current amount of cpu
+        TLevel AddLevel; // Level after one cpu acception
+        TLevel SubLevel; // Level after one cpu donation
+
+        // Balancing state
+        ui64 CurrentCpus = 0; // Total number of cpus assigned for this pool (zero means pools is not balanced)
+        ui64 PrevCpus = 0; // Cpus in last period
+
+        explicit TPool(const TBalancingConfig& cfg = {})
+            : Config(cfg)
+        {}
+
+        void Configure(const TBalancingConfig& cfg, const TString& poolName) {
+            Config = cfg;
+            // Enforce constraints
+            Config.MinCpus = std::clamp<ui32>(Config.MinCpus, 1, Config.Cpus);
+            Config.MaxCpus = Max<ui32>(Config.MaxCpus, Config.Cpus);
+            PoolName = poolName;
+        }
+    };
+
+    struct TBalancer::TCpu {
+        TCpuState* State = nullptr; // Cpu state, nullptr means cpu is not used (gap)
+        TCpuAllocation Alloc;
+        TPoolId Current;
+        TPoolId Assigned;
+    };
+
+    TBalancer::TBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts)
+        : NextBalanceTs(ts)
+        , Config(config)
+    {
+        for (TPoolId pool = 0; pool < MaxPools; pool++) {
+            Pools.emplace_back();
+            Pools.back().PoolId = pool;
+        }
+        for (const TUnitedExecutorPoolConfig& united : unitedPools) {
+            Pools[united.PoolId].Configure(united.Balancing, united.PoolName);
+        }
+    }
+
+    TBalancer::~TBalancer() {
+    }
+
+    bool TBalancer::AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* state) {
+        // Setup
+        TCpuId cpuId = cpuAlloc.CpuId;
+        if (Cpus.size() <= cpuId) {
+            Cpus.resize(cpuId + 1);
+        }
+        TCpu& cpu = Cpus[cpuId];
+        cpu.State = state;
+        cpu.Alloc = cpuAlloc;
+
+        // Fill every pool with cpus up to TBalancingConfig::Cpus
+        TPoolId pool = 0;
+        for (TPool& p : Pools) {
+            if (p.CurrentCpus < p.Config.Cpus) {
+                p.CurrentCpus++;
+                break;
+            }
+            pool++;
+        }
+        if (pool != MaxPools) { // cpu under balancer control
+            state->SwitchPool(pool);
+            state->AssignPool(pool);
+            Disabled = false;
+            return true;
+        }
+        return false; // non-balanced cpu
+    }
+
+    bool TBalancer::TryLock(ui64 ts) {
+        if (!Disabled && NextBalanceTs < ts && Lock.TryAcquire()) {
+            NextBalanceTs = ts + Us2Ts(Config.PeriodUs);
+            return true;
+        }
+        return false;
+    }
+
+    void TBalancer::SetPoolStats(TPoolId pool, const TBalancerStats& stats) {
+        Y_VERIFY(pool < MaxPools);
+        TPool& p = Pools[pool];
+        p.Prev = p.Next;
+        p.Next = stats;
+    }
+
+    void TBalancer::Balance() {
+        // Update every cpu state
+        for (TCpu& cpu : Cpus) {
+            if (cpu.State) {
+                cpu.State->Load(cpu.Assigned, cpu.Current);
+                if (cpu.Current < MaxPools && cpu.Current != cpu.Assigned) {
+                    return; // previous movement has not been applied yet, wait
+                }
+            }
+        }
+
+        // Process stats, classify and compute pool importance
+        TStackVec<TPool*, MaxPools> order;
+        for (TPool& pool : Pools) {
+            if (pool.Config.Cpus == 0) {
+                continue; // skip gaps (non-existent or non-united pools)
+            }
+            if (pool.Prev.Ts == 0 || pool.Prev.Ts >= pool.Next.Ts) {
+                return; // invalid stats
+            }
+
+            // Compute derived stats
+            pool.CpuLoad = (pool.Next.CpuUs - pool.Prev.CpuUs) / Ts2Us(pool.Next.Ts - pool.Prev.Ts);
+            if (pool.Prev.IdleUs == ui64(-1) || pool.Next.IdleUs == ui64(-1)) {
+                pool.CpuIdle = pool.CurrentCpus - pool.CpuLoad; // for tests
+            } else {
+                pool.CpuIdle = (pool.Next.IdleUs - pool.Prev.IdleUs) / Ts2Us(pool.Next.Ts - pool.Prev.Ts);
+            }
+
+            // Compute levels
+            pool.CurLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus, pool.CpuIdle);
+            pool.AddLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus + 1, pool.CpuIdle); // we expect taken cpu to became utilized
+            pool.SubLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus - 1, pool.CpuIdle - 1);
+
+            // Prepare for balancing
+            pool.PrevCpus = pool.CurrentCpus;
+            order.push_back(&pool);
+        }
+
+        // Sort pools by importance
+        std::sort(order.begin(), order.end(), [] (TPool* l, TPool* r) {return l->CurLevel.Importance < r->CurLevel.Importance; });
+        for (TPool* pool : order) {
+            LWPROBE(PoolStats, pool->PoolId, pool->PoolName, pool->CurrentCpus, pool->CurLevel.LoadClass, pool->Config.Priority, pool->CurLevel.ScaleFactor, pool->CpuIdle, pool->CpuLoad, pool->CurLevel.Importance, pool->AddLevel.Importance, pool->SubLevel.Importance);
+        }
+
+        // Move cpus from lower importance to higher importance pools
+        for (auto toIter = order.rbegin(); toIter != order.rend(); ++toIter) {
+            TPool& to = **toIter;
+            if (to.CurLevel.LoadClass == TLevel::Overloaded && // if pool is overloaded
+                to.CurrentCpus < to.Config.MaxCpus) // and constraints would not be violated
+            {
+                for (auto fromIter = order.begin(); (*fromIter)->CurLevel.Importance < to.CurLevel.Importance; ++fromIter) {
+                    TPool& from = **fromIter;
+                    if (from.CurrentCpus == from.PrevCpus && // if not balanced yet
+                        from.CurrentCpus > from.Config.MinCpus && // and constraints would not be violated
+                        from.SubLevel.Importance < to.AddLevel.Importance) // and which of two pools is more important would not change after cpu movement
+                    {
+                        MoveCpu(from, to);
+                        from.CurrentCpus--;
+                        to.CurrentCpus++;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    void TBalancer::MoveCpu(TBalancer::TPool& from, TBalancer::TPool& to) {
+        for (auto ci = Cpus.rbegin(), ce = Cpus.rend(); ci != ce; ci++) {
+            TCpu& cpu = *ci;
+            if (!cpu.State) {
+                continue;
+            }
+            if (cpu.Assigned == from.PoolId) {
+                cpu.State->AssignPool(to.PoolId);
+                cpu.Assigned = to.PoolId;
+                LWPROBE(MoveCpu, from.PoolId, to.PoolId, from.PoolName, to.PoolName, cpu.Alloc.CpuId);
+                return;
+            }
+        }
+        Y_FAIL();
+    }
+
+    void TBalancer::Unlock() {
+        Lock.Release();
+    }
+
+    IBalancer* MakeBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts) {
+        return new TBalancer(config, unitedPools, ts);
+    }
+}
diff --git a/library/cpp/actors/core/balancer.h b/library/cpp/actors/core/balancer.h
new file mode 100644
index 0000000000..9763ec79e1
--- /dev/null
+++ b/library/cpp/actors/core/balancer.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "defs.h"
+#include "config.h"
+#include "cpu_state.h"
+
+namespace NActors {
+    // Per-pool statistics used by balancer
+    struct TBalancerStats {
+        ui64 Ts = 0; // Measurement timestamp
+        ui64 CpuUs = 0; // Total cpu microseconds consumed by pool on all cpus since start
+        ui64 IdleUs = ui64(-1); // Total cpu microseconds in spinning or waiting on futex
+    };
+
+    // Pool cpu balancer
+    struct IBalancer {
+        virtual ~IBalancer() {}
+        virtual bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) = 0;
+        virtual bool TryLock(ui64 ts) = 0;
+        virtual void SetPoolStats(TPoolId pool, const TBalancerStats& stats) = 0;
+        virtual void Balance() = 0;
+        virtual void Unlock() = 0;
+        // TODO: add method for reconfiguration on fly
+    };
+
+    IBalancer* MakeBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts);
+}
diff --git a/library/cpp/actors/core/balancer_ut.cpp b/library/cpp/actors/core/balancer_ut.cpp
new file mode 100644
index 0000000000..7e5e95f4b9
--- /dev/null
+++ b/library/cpp/actors/core/balancer_ut.cpp
@@ -0,0 +1,225 @@
+#include "balancer.h"
+
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/lwtrace/all.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/stream/str.h>
+
+using namespace NActors;
+
+////////////////////////////////////////////////////////////////////////////////
+
+Y_UNIT_TEST_SUITE(PoolCpuBalancer) {
+    struct TTest {
+        TCpuManagerConfig Config;
+        TCpuMask Available;
+        THolder<IBalancer> Balancer;
+        TVector<TCpuState> CpuStates;
+        TVector<ui64> CpuUs;
+        ui64 Now = 0;
+
+        void SetCpuCount(size_t count) {
+            Config.UnitedWorkers.CpuCount = count;
+            for (TCpuId cpuId = 0; cpuId < count; cpuId++) {
+                Available.Set(cpuId);
+            }
+        }
+
+        void AddPool(ui32 minCpus, ui32 cpus, ui32 maxCpus, ui8 priority = 0) {
+            TUnitedExecutorPoolConfig u;
+            u.PoolId = TPoolId(Config.United.size());
+            u.Balancing.Cpus = cpus;
+            u.Balancing.MinCpus = minCpus;
+            u.Balancing.MaxCpus = maxCpus;
+            u.Balancing.Priority = priority;
+            Config.United.push_back(u);
+        }
+
+        void Start() {
+            TCpuAllocationConfig allocation(Available, Config);
+            Balancer.Reset(MakeBalancer(Config.UnitedWorkers.Balancer, Config.United, 0));
+            CpuStates.resize(allocation.Items.size()); // do not resize it later to avoid dangling pointers
+            CpuUs.resize(CpuStates.size());
+            for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+                bool added = Balancer->AddCpu(cpuAlloc, &CpuStates[cpuAlloc.CpuId]);
+                UNIT_ASSERT(added);
+            }
+        }
+
+        void Balance(ui64 deltaTs, const TVector<ui64>& cpuUs) {
+            Now += deltaTs;
+            ui64 ts = Now;
+            if (Balancer->TryLock(ts)) {
+                for (TPoolId pool = 0; pool < cpuUs.size(); pool++) {
+                    CpuUs[pool] += cpuUs[pool];
+                    TBalancerStats stats;
+                    stats.Ts = ts;
+                    stats.CpuUs = CpuUs[pool];
+                    Balancer->SetPoolStats(pool, stats);
+                }
+                Balancer->Balance();
+                Balancer->Unlock();
+            }
+        }
+
+        void ApplyMovements() {
+            for (TCpuState& state : CpuStates) {
+                TPoolId current;
+                TPoolId assigned;
+                state.Load(assigned, current);
+                state.SwitchPool(assigned);
+            }
+        }
+
+        static TString ToStr(const TVector<ui64>& values) {
+            TStringStream ss;
+            ss << "{";
+            for (auto v : values) {
+                ss << " " << v;
+            }
+            ss << " }";
+            return ss.Str();
+        }
+
+        void AssertPoolsCurrentCpus(const TVector<ui64>& cpuRequired) {
+            TVector<ui64> cpuCurrent;
+            cpuCurrent.resize(cpuRequired.size());
+            for (TCpuState& state : CpuStates) {
+                TPoolId current;
+                TPoolId assigned;
+                state.Load(assigned, current);
+                cpuCurrent[current]++;
+            }
+            for (TPoolId pool = 0; pool < cpuRequired.size(); pool++) {
+                UNIT_ASSERT_C(cpuCurrent[pool] == cpuRequired[pool],
+                    "cpu distribution mismatch, required " << ToStr(cpuRequired) << " but got " << ToStr(cpuCurrent));
+            }
+        }
+    };
+
+    Y_UNIT_TEST(StartLwtrace) {
+        NLWTrace::StartLwtraceFromEnv();
+    }
+
+    Y_UNIT_TEST(AllOverloaded) {
+        TTest t;
+        int cpus = 10;
+        t.SetCpuCount(cpus);
+        t.AddPool(1, 1, 10); // pool=0
+        t.AddPool(1, 2, 10); // pool=1
+        t.AddPool(1, 3, 10); // pool=2
+        t.AddPool(1, 4, 10); // pool=2
+        t.Start();
+        ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs);
+        ui64 totalCpuUs = cpus * Ts2Us(dts); // pretend every pool has consumed as whole actorsystem, overload
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {totalCpuUs, totalCpuUs, totalCpuUs, totalCpuUs});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({1, 2, 3, 4});
+    }
+
+    Y_UNIT_TEST(OneOverloaded) {
+        TTest t;
+        int cpus = 10;
+        t.SetCpuCount(cpus);
+        t.AddPool(1, 1, 10); // pool=0
+        t.AddPool(1, 2, 10); // pool=1
+        t.AddPool(1, 3, 10); // pool=2
+        t.AddPool(1, 4, 10); // pool=2
+        t.Start();
+        ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs);
+        ui64 totalCpuUs = cpus * Ts2Us(dts);
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {totalCpuUs, 0, 0, 0});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({7, 1, 1, 1});
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {0, totalCpuUs, 0, 0});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({1, 7, 1, 1});
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {0, 0, totalCpuUs, 0});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({1, 1, 7, 1});
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {0, 0, 0, totalCpuUs});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({1, 1, 1, 7});
+    }
+
+    Y_UNIT_TEST(TwoOverloadedFairness) {
+        TTest t;
+        int cpus = 10;
+        t.SetCpuCount(cpus);
+        t.AddPool(1, 1, 10); // pool=0
+        t.AddPool(1, 2, 10); // pool=1
+        t.AddPool(1, 3, 10); // pool=2
+        t.AddPool(1, 4, 10); // pool=2
+        t.Start();
+        ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs);
+        ui64 totalCpuUs = cpus * Ts2Us(dts);
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {totalCpuUs, totalCpuUs, 0, 0});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({3, 5, 1, 1});
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {totalCpuUs, 0, totalCpuUs, 0});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({2, 1, 6, 1});
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {totalCpuUs, 0, 0, totalCpuUs});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({2, 1, 1, 6});
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {0, totalCpuUs, totalCpuUs, 0});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({1, 3, 5, 1});
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {0, totalCpuUs, 0, totalCpuUs});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({1, 3, 1, 5});
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {0, 0, totalCpuUs, totalCpuUs});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({1, 1, 3, 5});
+    }
+
+    Y_UNIT_TEST(TwoOverloadedPriority) {
+        TTest t;
+        int cpus = 20;
+        t.SetCpuCount(cpus);
+        t.AddPool(1, 5, 20, 0); // pool=0
+        t.AddPool(1, 5, 20, 1); // pool=1
+        t.AddPool(1, 5, 20, 2); // pool=2
+        t.AddPool(1, 5, 20, 3); // pool=3
+        t.Start();
+        ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs);
+        ui64 mErlang = Ts2Us(dts) / 1000;
+        for (int i = 0; i < cpus; i++) {
+            t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 4500 * mErlang, 9500 * mErlang});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({2, 3, 5, 10});
+        t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 4500 * mErlang, 8500 * mErlang});
+        t.ApplyMovements();
+        t.AssertPoolsCurrentCpus({3, 3, 5, 9});
+        // NOTE: this operation require one move, but we do not make global analysis, so multiple steps (1->2 & 0->1) are required (can be optimized later)
+        for (int i = 0; i < 3; i++) {
+            t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 5500 * mErlang, 8500 * mErlang});
+            t.ApplyMovements();
+        }
+        t.AssertPoolsCurrentCpus({2, 3, 6, 9});
+    }
+}
diff --git a/library/cpp/actors/core/buffer.cpp b/library/cpp/actors/core/buffer.cpp
new file mode 100644
index 0000000000..48128d76ef
--- /dev/null
+++ b/library/cpp/actors/core/buffer.cpp
@@ -0,0 +1,93 @@
+#include "buffer.h"
+
+#include <util/system/yassert.h>
+
+#include <algorithm>
+
+TBufferBase::TBufferBase(size_t size) noexcept
+    : Size(size)
+{
+}
+
+size_t
+TBufferBase::GetSize() const noexcept {
+    return Size;
+}
+
+void TBufferBase::SetSize(size_t size) noexcept {
+    Size = size;
+}
+
+/////////////////////////////////////////////////////////////////////
+
+template <typename PointerType>
+TBufferBaseT<PointerType>::TBufferBaseT(PointerType data, size_t size) noexcept
+    : TBufferBase(size)
+    , Data(data)
+{
+}
+
+template <typename PointerType>
+PointerType
+TBufferBaseT<PointerType>::GetPointer() const noexcept {
+    return Data;
+}
+
+template <typename PointerType>
+void TBufferBaseT<PointerType>::Assign(PointerType data, size_t size) noexcept {
+    Data = data;
+    Size = size;
+}
+
+template <>
+void TBufferBaseT<void*>::Cut(size_t offset) noexcept {
+    Y_VERIFY_DEBUG(offset <= Size);
+    Data = static_cast<char*>(Data) + offset;
+    TBufferBase::Size -= offset;
+}
+
+template <>
+void TBufferBaseT<const void*>::Cut(size_t offset) noexcept {
+    Y_VERIFY_DEBUG(offset <= Size);
+    Data = static_cast<const char*>(Data) + offset;
+    TBufferBase::Size -= offset;
+}
+
+template class TBufferBaseT<void*>;
+template class TBufferBaseT<const void*>;
+
+/////////////////////////////////////////////////////////////////////
+
+TConstBuffer::TConstBuffer(const void* data, size_t size) noexcept
+    : TBufferBaseT<const void*>(data, size)
+{
+}
+
+TConstBuffer::TConstBuffer(const TMutableBuffer& buffer) noexcept
+    : TBufferBaseT<const void*>(buffer.GetPointer(), buffer.GetSize())
+{
+}
+
+TConstBuffer
+TConstBuffer::Offset(ptrdiff_t offset, size_t size) const noexcept {
+    return TConstBuffer(static_cast<const char*>(Data) + offset, std::min(Size - offset, size));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TMutableBuffer::TMutableBuffer(void* data, size_t size) noexcept
+    : TBufferBaseT<void*>(data, size)
+{
+}
+
+TMutableBuffer
+TMutableBuffer::Offset(ptrdiff_t offset, size_t size) const noexcept {
+    return TMutableBuffer(static_cast<char*>(Data) + offset, std::min(Size - offset, size));
+}
+
+size_t
+TMutableBuffer::CopyFrom(const TConstBuffer& buffer) const noexcept {
+    const auto size = std::min(Size, buffer.Size);
+    std::memcpy(Data, buffer.Data, size);
+    return size;
+}
diff --git a/library/cpp/actors/core/buffer.h b/library/cpp/actors/core/buffer.h
new file mode 100644
index 0000000000..95425046d6
--- /dev/null
+++ b/library/cpp/actors/core/buffer.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include <limits>
+
+class TConstBuffer;
+class TMutableBuffer;
+
+class TBufferBase {
+public:
+    size_t GetSize() const noexcept;
+
+    void SetSize(size_t newSize) noexcept;
+
+protected:
+    TBufferBase(size_t size = 0) noexcept;
+
+    size_t Size;
+};
+
+template <typename PointerType>
+class TBufferBaseT: public TBufferBase {
+public:
+    PointerType GetPointer() const noexcept;
+
+    void Cut(size_t offset) noexcept;
+
+    void Assign(PointerType data = nullptr, size_t size = 0U) noexcept;
+
+protected:
+    TBufferBaseT(PointerType data, size_t size) noexcept;
+
+    PointerType Data;
+};
+
+/// Represents constant memory buffer, but do not owns it.
+class TConstBuffer: public TBufferBaseT<const void*> {
+    friend class TMutableBuffer;
+
+public:
+    TConstBuffer(const TMutableBuffer& buffer) noexcept;
+
+    TConstBuffer(const void* data = nullptr, size_t size = 0U) noexcept;
+
+    TConstBuffer Offset(ptrdiff_t offset, size_t size = std::numeric_limits<size_t>::max()) const noexcept;
+};
+
+/// Represents mutable memory buffer, but do not owns it.
+class TMutableBuffer: public TBufferBaseT<void*> {
+    friend class TConstBuffer;
+
+public:
+    TMutableBuffer(void* data = nullptr, size_t size = 0U) noexcept;
+
+    TMutableBuffer(const TMutableBuffer& value) noexcept
+        : TBufferBaseT<void*>(value)
+    {
+    }
+
+    TMutableBuffer Offset(ptrdiff_t offset, size_t size = std::numeric_limits<size_t>::max()) const noexcept;
+
+    size_t CopyFrom(const TConstBuffer& buffer) const noexcept;
+};
diff --git a/library/cpp/actors/core/callstack.cpp b/library/cpp/actors/core/callstack.cpp
new file mode 100644
index 0000000000..9297c1a079
--- /dev/null
+++ b/library/cpp/actors/core/callstack.cpp
@@ -0,0 +1,93 @@
+#include "callstack.h"
+#include <util/thread/singleton.h>
+
+#ifdef USE_ACTOR_CALLSTACK
+
+namespace NActors {
+    namespace {
+        void (*PreviousFormatBackTrace)(IOutputStream*) = 0;
+        ui32 ActorBackTraceEnableCounter = 0;
+    }
+
+    void ActorFormatBackTrace(IOutputStream* out) {
+        TStringStream str;
+        PreviousFormatBackTrace(&str);
+        str << Endl;
+        TCallstack::DumpCallstack(str);
+        *out << str.Str();
+    }
+
+    void EnableActorCallstack() {
+        if (ActorBackTraceEnableCounter == 0) {
+            Y_VERIFY(PreviousFormatBackTrace == 0);
+            PreviousFormatBackTrace = SetFormatBackTraceFn(ActorFormatBackTrace);
+        }
+
+        ++ActorBackTraceEnableCounter;
+    }
+
+    void DisableActorCallstack() {
+        --ActorBackTraceEnableCounter;
+
+        if (ActorBackTraceEnableCounter == 0) {
+            Y_VERIFY(PreviousFormatBackTrace);
+            SetFormatBackTraceFn(PreviousFormatBackTrace);
+            PreviousFormatBackTrace = 0;
+        }
+    }
+
+    TCallstack::TCallstack()
+        : BeginIdx(0)
+        , Size(0)
+        , LinesToSkip(0)
+    {
+    }
+
+    void TCallstack::SetLinesToSkip() {
+        TTrace record;
+        LinesToSkip = BackTrace(record.Data, TTrace::CAPACITY);
+    }
+
+    void TCallstack::Trace() {
+        size_t currentIdx = (BeginIdx + Size) % RECORDS;
+        if (Size == RECORDS) {
+            ++BeginIdx;
+        } else {
+            ++Size;
+        }
+        TTrace& record = Record[currentIdx];
+        record.Size = BackTrace(record.Data, TTrace::CAPACITY);
+        record.LinesToSkip = LinesToSkip;
+    }
+
+    void TCallstack::TraceIfEmpty() {
+        if (Size == 0) {
+            LinesToSkip = 0;
+            Trace();
+        }
+    }
+
+    TCallstack& TCallstack::GetTlsCallstack() {
+        return *FastTlsSingleton<TCallstack>();
+    }
+
+    void TCallstack::DumpCallstack(TStringStream& str) {
+        TCallstack& callstack = GetTlsCallstack();
+        for (int i = callstack.Size - 1; i >= 0; --i) {
+            TTrace& record = callstack.Record[(callstack.BeginIdx + i) % RECORDS];
+            str << Endl << "Trace entry " << i << Endl << Endl;
+            size_t size = record.Size;
+            if (size > record.LinesToSkip && size < TTrace::CAPACITY) {
+                size -= record.LinesToSkip;
+            }
+            if (size > RECORDS_TO_SKIP) {
+                FormatBackTrace(&str, &record.Data[RECORDS_TO_SKIP], size - RECORDS_TO_SKIP);
+            } else {
+                FormatBackTrace(&str, record.Data, size);
+            }
+            str << Endl;
+        }
+    }
+}
+
+#endif
diff --git a/library/cpp/actors/core/callstack.h b/library/cpp/actors/core/callstack.h
new file mode 100644
index 0000000000..176717d2ae
--- /dev/null
+++ b/library/cpp/actors/core/callstack.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#ifndef NDEBUG
+//#define ENABLE_ACTOR_CALLSTACK
+#endif
+
+#ifdef ENABLE_ACTOR_CALLSTACK
+#include "defs.h"
+#include <util/system/backtrace.h>
+#include <util/stream/str.h>
+#include <util/generic/deque.h>
+#define USE_ACTOR_CALLSTACK
+
+namespace NActors {
+    struct TCallstack {
+        struct TTrace {
+            static const size_t CAPACITY = 50;
+            void* Data[CAPACITY];
+            size_t Size;
+            size_t LinesToSkip;
+
+            TTrace()
+                : Size(0)
+                , LinesToSkip(0)
+            {
+            }
+        };
+
+        static const size_t RECORDS = 8;
+        static const size_t RECORDS_TO_SKIP = 2;
+        TTrace Record[RECORDS];
+        size_t BeginIdx;
+        size_t Size;
+        size_t LinesToSkip;
+
+        TCallstack();
+        void SetLinesToSkip();
+        void Trace();
+        void TraceIfEmpty();
+        static TCallstack& GetTlsCallstack();
+        static void DumpCallstack(TStringStream& str);
+    };
+
+    void EnableActorCallstack();
+    void DisableActorCallstack();
+
+}
+
+#else
+
+namespace NActors {
+    inline void EnableActorCallstack(){};
+
+    inline void DisableActorCallstack(){};
+
+}
+
+#endif
diff --git a/library/cpp/actors/core/config.h b/library/cpp/actors/core/config.h
new file mode 100644
index 0000000000..2486bf4c43
--- /dev/null
+++ b/library/cpp/actors/core/config.h
@@ -0,0 +1,239 @@
+#pragma once
+
+#include "defs.h"
+#include <library/cpp/actors/util/cpumask.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <util/datetime/base.h>
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+
+namespace NActors {
+
+    struct TBalancingConfig {
+        // Default cpu count (used during overload). Zero value disables this pool balancing
+        // 1) Sum of `Cpus` on all pools cannot be changed without restart
+        //    (changing cpu mode between Shared and Assigned is not implemented yet)
+        // 2) This sum must be equal to TUnitedWorkersConfig::CpuCount,
+        //    otherwise `CpuCount - SUM(Cpus)` cpus will be in Shared mode (i.e. actorsystem 2.0)
+        ui32 Cpus = 0;
+
+        ui32 MinCpus = 0; // Lower balancing bound, should be at least 1, and not greater than `Cpus`
+        ui32 MaxCpus = 0; // Higher balancing bound, should be not lower than `Cpus`
+        ui8 Priority = 0; // Priority of pool to obtain cpu due to balancing (higher is better)
+        ui64 ToleratedLatencyUs = 0; // p100-latency threshold indicating that more cpus are required by pool
+    };
+
+    struct TBalancerConfig {
+        ui64 PeriodUs = 15000000; // Time between balancer steps
+    };
+
+    struct TBasicExecutorPoolConfig {
+        static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TDuration::MilliSeconds(10);
+        static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100;
+
+        ui32 PoolId = 0;
+        TString PoolName;
+        ui32 Threads = 1;
+        ui64 SpinThreshold = 100;
+        TCpuMask Affinity; // Executor thread affinity
+        TDuration TimePerMailbox = DEFAULT_TIME_PER_MAILBOX;
+        ui32 EventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX;
+        int RealtimePriority = 0;
+        ui32 MaxActivityType = 1;
+    };
+
+    struct TIOExecutorPoolConfig {
+        ui32 PoolId = 0;
+        TString PoolName;
+        ui32 Threads = 1;
+        TCpuMask Affinity; // Executor thread affinity
+        ui32 MaxActivityType = 1;
+    };
+
+    struct TUnitedExecutorPoolConfig {
+        static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TDuration::MilliSeconds(10);
+        static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100;
+
+        ui32 PoolId = 0;
+        TString PoolName;
+
+        // Resource sharing
+        ui32 Concurrency = 0; // Limits simultaneously running mailboxes count if set to non-zero value (do not set if Balancing.Cpus != 0)
+        TPoolWeight Weight = 0; // Weight in fair cpu-local pool scheduler
+        TCpuMask Allowed; // Allowed CPUs for workers to run this pool on (ignored if balancer works, i.e. actorsystem 1.5)
+
+        // Single mailbox execution limits
+        TDuration TimePerMailbox = DEFAULT_TIME_PER_MAILBOX;
+        ui32 EventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX;
+
+        // Introspection
+        ui32 MaxActivityType = 1;
+
+        // Long-term balancing
+        TBalancingConfig Balancing;
+    };
+
+    struct TUnitedWorkersConfig {
+        ui32 CpuCount = 0; // Total CPUs running united workers (i.e. TBasicExecutorPoolConfig::Threads analog); set to zero to disable united workers
+        ui64 SpinThresholdUs = 100; // Limit for active spinning in case all pools became idle
+        ui64 PoolLimitUs = 500; // Soft limit on pool execution
+        ui64 EventLimitUs = 100; // Hard limit on last event execution exceeding pool limit
+        ui64 LimitPrecisionUs = 100; // Maximum delay of timer on limit excess (delay needed to avoid settimer syscall on every pool switch)
+        ui64 FastWorkerPriority = 10; // Real-time priority of workers not exceeding hard limits
+        ui64 IdleWorkerPriority = 20; // Real-time priority of standby workers waiting for hard preemption on timers (should be greater than FastWorkerPriority)
+        TCpuMask Allowed; // Allowed CPUs for workers to run on (every worker has affinity for exactly one cpu)
+        bool NoRealtime = false; // For environments w/o permissions for RT-threads
+        bool NoAffinity = false; // For environments w/o permissions for cpu affinity
+        TBalancerConfig Balancer;
+    };
+
+    struct TCpuManagerConfig {
+        TUnitedWorkersConfig UnitedWorkers;
+        TVector<TBasicExecutorPoolConfig> Basic;
+        TVector<TIOExecutorPoolConfig> IO;
+        TVector<TUnitedExecutorPoolConfig> United;
+
+        ui32 GetExecutorsCount() const {
+            return Basic.size() + IO.size() + United.size();
+        }
+
+        TString GetPoolName(ui32 poolId) const {
+            for (const auto& p : Basic) {
+                if (p.PoolId == poolId) {
+                    return p.PoolName;
+                }
+            }
+            for (const auto& p : IO) {
+                if (p.PoolId == poolId) {
+                    return p.PoolName;
+                }
+            }
+            for (const auto& p : United) {
+                if (p.PoolId == poolId) {
+                    return p.PoolName;
+                }
+            }
+            Y_FAIL("undefined pool id: %" PRIu32, (ui32)poolId);
+        }
+
+        ui32 GetThreads(ui32 poolId) const {
+            for (const auto& p : Basic) {
+                if (p.PoolId == poolId) {
+                    return p.Threads;
+                }
+            }
+            for (const auto& p : IO) {
+                if (p.PoolId == poolId) {
+                    return p.Threads;
+                }
+            }
+            for (const auto& p : United) {
+                if (p.PoolId == poolId) {
+                    return p.Concurrency ? p.Concurrency : UnitedWorkers.CpuCount;
+                }
+            }
+            Y_FAIL("undefined pool id: %" PRIu32, (ui32)poolId);
+        }
+    };
+
+    struct TSchedulerConfig {
+        TSchedulerConfig(
+                ui64 resolution = 1024,
+                ui64 spinThreshold = 100,
+                ui64 progress = 10000,
+                bool useSchedulerActor = false)
+            : ResolutionMicroseconds(resolution)
+            , SpinThreshold(spinThreshold)
+            , ProgressThreshold(progress)
+            , UseSchedulerActor(useSchedulerActor)
+        {}
+
+        ui64 ResolutionMicroseconds = 1024;
+        ui64 SpinThreshold = 100;
+        ui64 ProgressThreshold = 10000;
+        bool UseSchedulerActor = false; // False is default because tests use scheduler thread
+        ui64 RelaxedSendPaceEventsPerSecond = 200000;
+        ui64 RelaxedSendPaceEventsPerCycle = RelaxedSendPaceEventsPerSecond * ResolutionMicroseconds / 1000000;
+        // For resolution >= 250000 microseconds threshold is SendPace
+        // For resolution <= 250 microseconds threshold is 20 * SendPace
+        ui64 RelaxedSendThresholdEventsPerSecond = RelaxedSendPaceEventsPerSecond *
+            (20 - ((20 - 1) * ClampVal(ResolutionMicroseconds, ui64(250), ui64(250000)) - 250) / (250000 - 250));
+        ui64 RelaxedSendThresholdEventsPerCycle = RelaxedSendThresholdEventsPerSecond * ResolutionMicroseconds / 1000000;
+
+        // Optional subsection for scheduler counters (usually subsystem=utils)
+        NMonitoring::TDynamicCounterPtr MonCounters = nullptr;
+    };
+
+    struct TCpuAllocation {
+        struct TPoolAllocation {
+            TPoolId PoolId;
+            TPoolWeight Weight;
+
+            TPoolAllocation(TPoolId poolId = 0, TPoolWeight weight = 0)
+                : PoolId(poolId)
+                , Weight(weight)
+            {}
+        };
+
+        TCpuId CpuId;
+        TVector<TPoolAllocation> AllowedPools;
+
+        TPoolsMask GetPoolsMask() const {
+            TPoolsMask mask = 0;
+            for (const auto& pa : AllowedPools) {
+                if (pa.PoolId < MaxPools) {
+                    mask &= (1ull << pa.PoolId);
+                }
+            }
+            return mask;
+        }
+
+        bool HasPool(TPoolId pool) const {
+            for (const auto& pa : AllowedPools) {
+                if (pa.PoolId == pool) {
+                    return true;
+                }
+            }
+            return false;
+        }
+    };
+
+    struct TCpuAllocationConfig {
+        TVector<TCpuAllocation> Items;
+
+        TCpuAllocationConfig(const TCpuMask& available, const TCpuManagerConfig& cfg) {
+            for (const TUnitedExecutorPoolConfig& pool : cfg.United) {
+                Y_VERIFY(pool.PoolId < MaxPools, "wrong PoolId of united executor pool: %s(%d)",
+                    pool.PoolName.c_str(), (pool.PoolId));
+            }
+            ui32 allocated[MaxPools] = {0};
+            for (TCpuId cpu = 0; cpu < available.Size() && Items.size() < cfg.UnitedWorkers.CpuCount; cpu++) {
+                if (available.IsSet(cpu)) {
+                    TCpuAllocation item;
+                    item.CpuId = cpu;
+                    for (const TUnitedExecutorPoolConfig& pool : cfg.United) {
+                        if (cfg.UnitedWorkers.Allowed.IsEmpty() || cfg.UnitedWorkers.Allowed.IsSet(cpu)) {
+                            if (pool.Allowed.IsEmpty() || pool.Allowed.IsSet(cpu)) {
+                                item.AllowedPools.emplace_back(pool.PoolId, pool.Weight);
+                                allocated[pool.PoolId]++;
+                            }
+                        }
+                    }
+                    if (!item.AllowedPools.empty()) {
+                        Items.push_back(item);
+                    }
+                }
+            }
+            for (const TUnitedExecutorPoolConfig& pool : cfg.United) {
+                Y_VERIFY(allocated[pool.PoolId] > 0, "unable to allocate cpu for united executor pool: %s(%d)",
+                    pool.PoolName.c_str(), (pool.PoolId));
+            }
+        }
+
+        operator bool() const {
+            return !Items.empty();
+        }
+    };
+
+}
diff --git a/library/cpp/actors/core/cpu_manager.cpp b/library/cpp/actors/core/cpu_manager.cpp
new file mode 100644
index 0000000000..39089b5d83
--- /dev/null
+++ b/library/cpp/actors/core/cpu_manager.cpp
@@ -0,0 +1,108 @@
+#include "cpu_manager.h"
+#include "probes.h"
+
+namespace NActors {
+    LWTRACE_USING(ACTORLIB_PROVIDER);
+
+    void TCpuManager::Setup() {
+        TAffinity available;
+        available.Current();
+        TCpuAllocationConfig allocation(available, Config);
+
+        if (allocation) {
+            if (!Balancer) {
+                Balancer.Reset(MakeBalancer(Config.UnitedWorkers.Balancer, Config.United, GetCycleCountFast()));
+            }
+            UnitedWorkers.Reset(new TUnitedWorkers(Config.UnitedWorkers, Config.United, allocation, Balancer.Get()));
+        }
+
+        Executors.Reset(new TAutoPtr<IExecutorPool>[ExecutorPoolCount]);
+
+        for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+            Executors[excIdx].Reset(CreateExecutorPool(excIdx));
+        }
+    }
+
+    void TCpuManager::PrepareStart(TVector<NSchedulerQueue::TReader*>& scheduleReaders, TActorSystem* actorSystem) {
+        if (UnitedWorkers) {
+            UnitedWorkers->Prepare(actorSystem, scheduleReaders);
+        }
+        for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+            NSchedulerQueue::TReader* readers;
+            ui32 readersCount = 0;
+            Executors[excIdx]->Prepare(actorSystem, &readers, &readersCount);
+            for (ui32 i = 0; i != readersCount; ++i, ++readers) {
+                scheduleReaders.push_back(readers);
+            }
+        }
+    }
+
+    void TCpuManager::Start() {
+        if (UnitedWorkers) {
+            UnitedWorkers->Start();
+        }
+        for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+            Executors[excIdx]->Start();
+        }
+    }
+
+    void TCpuManager::PrepareStop() {
+        for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+            Executors[excIdx]->PrepareStop();
+        }
+        if (UnitedWorkers) {
+            UnitedWorkers->PrepareStop();
+        }
+    }
+
+    void TCpuManager::Shutdown() {
+        for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+            Executors[excIdx]->Shutdown();
+        }
+        if (UnitedWorkers) {
+            UnitedWorkers->Shutdown();
+        }
+        for (ui32 round = 0, done = 0; done < ExecutorPoolCount && round < 3; ++round) {
+            done = 0;
+            for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+                if (Executors[excIdx]->Cleanup()) {
+                    ++done;
+                }
+            }
+        }
+    }
+
+    void TCpuManager::Cleanup() {
+        for (ui32 round = 0, done = 0; done < ExecutorPoolCount; ++round) {
+            Y_VERIFY(round < 10, "actorsystem cleanup could not be completed in 10 rounds");
+            done = 0;
+            for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+                if (Executors[excIdx]->Cleanup()) {
+                    ++done;
+                }
+            }
+        }
+        Executors.Destroy();
+        UnitedWorkers.Destroy();
+    }
+
+    IExecutorPool* TCpuManager::CreateExecutorPool(ui32 poolId) {
+        for (TBasicExecutorPoolConfig& cfg : Config.Basic) {
+            if (cfg.PoolId == poolId) {
+                return new TBasicExecutorPool(cfg);
+            }
+        }
+        for (TIOExecutorPoolConfig& cfg : Config.IO) {
+            if (cfg.PoolId == poolId) {
+                return new TIOExecutorPool(cfg);
+            }
+        }
+        for (TUnitedExecutorPoolConfig& cfg : Config.United) {
+            if (cfg.PoolId == poolId) {
+                IExecutorPool* result = new TUnitedExecutorPool(cfg, UnitedWorkers.Get());
+                return result;
+            }
+        }
+        Y_FAIL("missing PoolId: %d", int(poolId));
+    }
+}
diff --git a/library/cpp/actors/core/cpu_manager.h b/library/cpp/actors/core/cpu_manager.h
new file mode 100644
index 0000000000..454035477b
--- /dev/null
+++ b/library/cpp/actors/core/cpu_manager.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "executor_pool_io.h"
+#include "executor_pool_united.h"
+
+namespace NActors {
+    class TCpuManager : public TNonCopyable {
+        const ui32 ExecutorPoolCount;
+        TArrayHolder<TAutoPtr<IExecutorPool>> Executors;
+        THolder<TUnitedWorkers> UnitedWorkers;
+        THolder<IBalancer> Balancer;
+        TCpuManagerConfig Config;
+    public:
+        explicit TCpuManager(THolder<TActorSystemSetup>& setup)
+            : ExecutorPoolCount(setup->GetExecutorsCount())
+            , Balancer(setup->Balancer)
+            , Config(setup->CpuManager)
+        {
+            if (setup->Executors) { // Explicit mode w/o united pools
+                Executors.Reset(setup->Executors.Release());
+                for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) {
+                    IExecutorPool* pool = Executors[excIdx].Get();
+                    Y_VERIFY(dynamic_cast<TUnitedExecutorPool*>(pool) == nullptr,
+                        "united executor pool is prohibited in explicit mode of NActors::TCpuManager");
+                }
+            } else {
+                Setup();
+            }
+        }
+
+        void Setup();
+        void PrepareStart(TVector<NSchedulerQueue::TReader*>& scheduleReaders, TActorSystem* actorSystem);
+        void Start();
+        void PrepareStop();
+        void Shutdown();
+        void Cleanup();
+
+        ui32 GetExecutorsCount() const {
+            return ExecutorPoolCount;
+        }
+
+        IExecutorPool* GetExecutorPool(ui32 poolId) {
+            return Executors[poolId].Get();
+        }
+
+        void GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+            if (poolId < ExecutorPoolCount) {
+                Executors[poolId]->GetCurrentStats(poolStats, statsCopy);
+            }
+        }
+
+    private:
+        IExecutorPool* CreateExecutorPool(ui32 poolId);
+    };
+}
diff --git a/library/cpp/actors/core/cpu_state.h b/library/cpp/actors/core/cpu_state.h
new file mode 100644
index 0000000000..b8030149a7
--- /dev/null
+++ b/library/cpp/actors/core/cpu_state.h
@@ -0,0 +1,215 @@
+#pragma once
+
+#include "defs.h"
+
+#include <library/cpp/actors/util/futex.h>
+
+namespace NActors {
+
+    class alignas(64) TCpuState {
+        // Atomic cachelign-aligned 64-bit state, see description below
+        TAtomic State = 0;
+        char Padding[64 - sizeof(TAtomic)];
+
+        // Bits 0-31: Currently executing pool
+        //  - value less than MaxPools means cpu is executing corresponding pool (fast-worker is executing or waiting for slow-workers)
+        //  - one of Cpu* values in case of idle cpu
+        //  - used as futex by blocked fast-worker
+        static constexpr ui64 CurrentBits = 32;
+        static constexpr ui64 CurrentMask = ui64((1ull << CurrentBits) - 1);
+
+        // Bits 32-63: Assigned pool
+        //  - value is set by balancer
+        //  - NOT used as futex
+        //  - Not balanced
+        static constexpr ui64 AssignedOffs = 32;
+        static constexpr ui64 AssignedMask = ~CurrentMask;
+
+    public:
+        TCpuState() {
+            Y_UNUSED(Padding);
+        }
+
+        void Load(TPoolId& assigned, TPoolId& current) const {
+            TAtomicBase state = AtomicLoad(&State);
+            assigned = (state & AssignedMask) >> AssignedOffs;
+            current = state & CurrentMask;
+        }
+
+        TPoolId CurrentPool() const {
+            return TPoolId(AtomicLoad(&State) & CurrentMask);
+        }
+
+        void SwitchPool(TPoolId pool) {
+            while (true) {
+                TAtomicBase state = AtomicLoad(&State);
+                if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) {
+                    return;
+                }
+            }
+        }
+
+        TPoolId AssignedPool() const {
+            return TPoolId((AtomicLoad(&State) & AssignedMask) >> AssignedOffs);
+        }
+
+        // Assigns new pool to cpu and wakes it up if cpu is idle
+        void AssignPool(TPoolId pool) {
+            while (true) {
+                TAtomicBase state = AtomicLoad(&State);
+                TPoolId current(state & CurrentMask);
+                if (Y_UNLIKELY(current == CpuStopped)) {
+                    return; // it would be better to shutdown instead of balancing
+                }
+                // Idle cpu must be woken up after balancing to handle pending tokens (if any) in assigned/schedulable pool(s)
+                if (current == CpuSpinning) {
+                    if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | pool, state)) {
+                        return; // successfully woken up
+                    }
+                } else if (current == CpuBlocked) {
+                    if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | pool, state)) {
+                        FutexWake();
+                        return; // successfully woken up
+                    }
+                } else {
+                    if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | (state & ~AssignedMask), state)) {
+                        return; // wakeup is not required
+                    }
+                }
+            }
+        }
+
+        void Stop() {
+            while (true) {
+                TAtomicBase state = AtomicLoad(&State);
+                if (AtomicCas(&State, (state & ~CurrentMask) | CpuStopped, state)) {
+                    FutexWake();
+                    return; // successfully stopped
+                }
+            }
+        }
+
+        // Start waiting, returns false in case of actorsystem shutdown
+        bool StartSpinning() {
+            while (true) {
+                TAtomicBase state = AtomicLoad(&State);
+                TPoolId current(state & CurrentMask);
+                if (Y_UNLIKELY(current == CpuStopped)) {
+                    return false;
+                }
+                Y_VERIFY_DEBUG(current < MaxPools, "unexpected already waiting state of cpu (%d)", (int)current);
+                if (AtomicCas(&State, (state & ~CurrentMask) | CpuSpinning, state)) { // successfully marked as spinning
+                    return true;
+                }
+            }
+        }
+
+        bool StartBlocking() {
+            while (true) {
+                TAtomicBase state = AtomicLoad(&State);
+                TPoolId current(state & CurrentMask);
+                if (current == CpuSpinning) {
+                    if (AtomicCas(&State, (state & ~CurrentMask) | CpuBlocked, state)) {
+                        return false; // successful switch
+                    }
+                } else {
+                    return true; // wakeup
+                }
+            }
+        }
+
+        bool Block(ui64 timeoutNs, TPoolId& result) {
+#ifdef _linux_
+            timespec timeout;
+            timeout.tv_sec = timeoutNs / 1'000'000'000;
+            timeout.tv_nsec = timeoutNs % 1'000'000'000;
+            SysFutex(Futex(), FUTEX_WAIT_PRIVATE, CpuBlocked, &timeout, nullptr, 0);
+#else
+            NanoSleep(timeoutNs); // non-linux wake is not supported, cpu will go idle on wake after blocked state
+#endif
+            TAtomicBase state = AtomicLoad(&State);
+            TPoolId current(state & CurrentMask);
+            if (current == CpuBlocked) {
+                return false; // timeout
+            } else {
+                result = current;
+                return true; // wakeup
+            }
+        }
+
+        enum EWakeResult {
+            Woken, // successfully woken up
+            NotIdle, // cpu is already not idle
+            Forbidden, // cpu is assigned to another pool
+            Stopped, // cpu is shutdown
+        };
+
+        EWakeResult WakeWithoutToken(TPoolId pool) {
+            while (true) {
+                TAtomicBase state = RelaxedLoad(&State);
+                TPoolId current(state & CurrentMask);
+                TPoolId assigned((state & AssignedMask) >> AssignedOffs);
+                if (assigned == CpuShared || assigned == pool) {
+                    if (current == CpuSpinning) {
+                        if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) {
+                            return Woken;
+                        }
+                    } else if (current == CpuBlocked) {
+                        if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) {
+                            FutexWake();
+                            return Woken;
+                        }
+                    } else if (current == CpuStopped) {
+                        return Stopped;
+                    } else {
+                        return NotIdle;
+                    }
+                } else {
+                    return Forbidden;
+                }
+            }
+        }
+
+        EWakeResult WakeWithTokenAcquired(TPoolId token) {
+            while (true) {
+                TAtomicBase state = RelaxedLoad(&State);
+                TPoolId current(state & CurrentMask);
+                // NOTE: We ignore assigned value because we already have token, so
+                // NOTE: not assigned pool may be run here. This will be fixed
+                // NOTE: after we finish with current activation
+                if (current == CpuSpinning) {
+                    if (AtomicCas(&State, (state & ~CurrentMask) | token, state)) {
+                        return Woken;
+                    }
+                } else if (current == CpuBlocked) {
+                    if (AtomicCas(&State, (state & ~CurrentMask) | token, state)) {
+                        FutexWake();
+                        return Woken;
+                    }
+                } else if (current == CpuStopped) {
+                    return Stopped;
+                } else {
+                    return NotIdle;
+                }
+            }
+        }
+
+        bool IsPoolReassigned(TPoolId current) const {
+            TAtomicBase state = AtomicLoad(&State);
+            TPoolId assigned((state & AssignedMask) >> AssignedOffs);
+            return assigned != current;
+        }
+
+    private:
+        void* Futex() {
+            return (void*)&State; // little endian assumed
+        }
+
+        void FutexWake() {
+#ifdef _linux_
+            SysFutex(Futex(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
+#endif
+        }
+    };
+
+}
diff --git a/library/cpp/actors/core/defs.h b/library/cpp/actors/core/defs.h
new file mode 100644
index 0000000000..980b7d767b
--- /dev/null
+++ b/library/cpp/actors/core/defs.h
@@ -0,0 +1,69 @@
+#pragma once
+
+// unique tag to fix pragma once gcc glueing: ./library/actorlib/core/defs.h
+
+#include <library/cpp/actors/util/defs.h>
+#include <util/generic/hash.h>
+#include <util/string/printf.h>
+
+// Enables collection of
+//    event send/receive counts
+//    activation time histograms
+//    event processing time histograms
+#define ACTORSLIB_COLLECT_EXEC_STATS
+
+namespace NActors {
+    using TPoolId = ui8;
+    using TPoolsMask = ui64;
+    static constexpr TPoolId PoolBits = 6;
+    static constexpr TPoolId MaxPools = (1 << PoolBits) - 1; // maximum amount of pools (poolid=63 is reserved)
+    static constexpr TPoolsMask WaitPoolsFlag = (1ull << MaxPools); // wait-for-slow-workers flag bitmask
+
+    // Special TPoolId values used by TCpuState
+    static constexpr TPoolId CpuSpinning = MaxPools; // fast-worker is actively spinning, no slow-workers
+    static constexpr TPoolId CpuBlocked = MaxPools + 1; // fast-worker is blocked, no slow-workers
+    static constexpr TPoolId CpuStopped = TPoolId(-1); // special value indicating worker should stop
+    static constexpr TPoolId CpuShared = MaxPools; // special value for `assigned` meaning balancer disabled, pool scheduler is used instead
+
+    using TPoolWeight = ui16;
+    static constexpr TPoolWeight MinPoolWeight = 1;
+    static constexpr TPoolWeight DefPoolWeight = 32;
+    static constexpr TPoolWeight MaxPoolWeight = 1024;
+
+    using TWorkerId = ui16;
+    static constexpr TWorkerId WorkerBits = 11;
+    static constexpr TWorkerId MaxWorkers = 1 << WorkerBits;
+
+    using TThreadId = ui64;
+    static constexpr TThreadId UnknownThreadId = ui64(-1);
+
+    struct TMailboxType {
+        enum EType {
+            Inherited = -1, // inherit mailbox from parent
+            Simple = 0, // simplest queue under producer lock. fastest in no-contention case
+            Revolving = 1, // somewhat outdated, tries to be wait-free. replaced by ReadAsFilled
+            HTSwap = 2, // other simple lf queue, suggested for low-contention case
+            ReadAsFilled = 3, // wait-free queue, suggested for high-contention or latency critical
+            TinyReadAsFilled = 4, // same as 3 but with lower overhead
+            //Inplace;
+            //Direct;
+            //Virtual
+        };
+    };
+
+    struct TScopeId : std::pair<ui64, ui64> {
+        using TBase = std::pair<ui64, ui64>;
+        using TBase::TBase;
+        static const TScopeId LocallyGenerated;
+    };
+
+    static inline TString ScopeIdToString(const TScopeId& scopeId) {
+        return Sprintf("<%" PRIu64 ":%" PRIu64 ">", scopeId.first, scopeId.second);
+    }
+
+}
+
+template<>
+struct hash<NActors::TScopeId> : hash<std::pair<ui64, ui64>> {};
+
+class TAffinity;
diff --git a/library/cpp/actors/core/event.cpp b/library/cpp/actors/core/event.cpp
new file mode 100644
index 0000000000..33f8ce2aaf
--- /dev/null
+++ b/library/cpp/actors/core/event.cpp
@@ -0,0 +1,38 @@
+#include "event.h"
+#include "event_pb.h"
+
+namespace NActors {
+
+    const TScopeId TScopeId::LocallyGenerated{
+        Max<ui64>(), Max<ui64>()
+    };
+
+    TIntrusivePtr<TEventSerializedData> IEventHandle::ReleaseChainBuffer() {
+        if (Buffer) {
+            TIntrusivePtr<TEventSerializedData> result;
+            DoSwap(result, Buffer);
+            Event.Reset();
+            return result;
+        }
+        if (Event) {
+            TAllocChunkSerializer serializer;
+            Event->SerializeToArcadiaStream(&serializer);
+            auto chainBuf = serializer.Release(Event->IsExtendedFormat());
+            Event.Reset();
+            return chainBuf;
+        }
+        return new TEventSerializedData;
+    }
+
+    TIntrusivePtr<TEventSerializedData> IEventHandle::GetChainBuffer() {
+        if (Buffer)
+            return Buffer;
+        if (Event) {
+            TAllocChunkSerializer serializer;
+            Event->SerializeToArcadiaStream(&serializer);
+            Buffer = serializer.Release(Event->IsExtendedFormat());
+            return Buffer;
+        }
+        return new TEventSerializedData;
+    }
+}
diff --git a/library/cpp/actors/core/event.h b/library/cpp/actors/core/event.h
new file mode 100644
index 0000000000..6ff02aaf94
--- /dev/null
+++ b/library/cpp/actors/core/event.h
@@ -0,0 +1,344 @@
+#pragma once
+
+#include "defs.h"
+#include "actorid.h"
+#include "callstack.h"
+#include "event_load.h"
+
+#include <library/cpp/actors/wilson/wilson_trace.h>
+
+#include <util/system/hp_timer.h>
+#include <util/generic/maybe.h>
+
+namespace NActors {
+    class TChunkSerializer;
+
+    class ISerializerToStream {
+    public:
+        virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0;
+    };
+
+    class IEventBase
+        : TNonCopyable,
+          public ISerializerToStream {
+    public:
+        // actual typing is performed by IEventHandle
+
+        virtual ~IEventBase() {
+        }
+
+        virtual TString ToStringHeader() const = 0;
+        virtual TString ToString() const {
+            return ToStringHeader();
+        }
+        virtual ui32 CalculateSerializedSize() const {
+            return 0;
+        }
+        virtual ui32 Type() const = 0;
+        virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0;
+        virtual bool IsSerializable() const = 0;
+        virtual bool IsExtendedFormat() const {
+            return false;
+        }
+        virtual ui32 CalculateSerializedSizeCached() const {
+            return CalculateSerializedSize();
+        }
+    };
+
+    // fat handle
+    class IEventHandle : TNonCopyable {
+        struct TOnNondelivery {
+            TActorId Recipient;
+
+            TOnNondelivery(const TActorId& recipient)
+                : Recipient(recipient)
+            {
+            }
+        };
+
+    public:
+        template <typename TEv>
+        inline TEv* CastAsLocal() const noexcept {
+            auto fits = GetTypeRewrite() == TEv::EventType;
+
+            return fits ? static_cast<TEv*>(Event.Get()) : nullptr;
+        }
+
+        template <typename TEventType>
+        TEventType* Get() {
+            if (Type != TEventType::EventType)
+                Y_FAIL("Event type %" PRIu32 " doesn't match the expected type %" PRIu32, Type, TEventType::EventType);
+
+            if (!Event) {
+                Event.Reset(TEventType::Load(Buffer.Get()));
+            }
+
+            if (Event) {
+                return static_cast<TEventType*>(Event.Get());
+            }
+
+            Y_FAIL("Failed to Load() event type %" PRIu32 " class %s", Type, TypeName<TEventType>().data());
+        }
+
+        template <typename T>
+        TAutoPtr<T> Release() {
+            TAutoPtr<T> x = Get<T>();
+            Y_UNUSED(Event.Release());
+            Buffer.Reset();
+            return x;
+        }
+
+        enum EFlags {
+            FlagTrackDelivery = 1 << 0,
+            FlagForwardOnNondelivery = 1 << 1,
+            FlagSubscribeOnSession = 1 << 2,
+            FlagUseSubChannel = 1 << 3,
+            FlagGenerateUnsureUndelivered = 1 << 4,
+            FlagExtendedFormat = 1 << 5,
+        };
+
+        const ui32 Type;
+        const ui32 Flags;
+        const TActorId Recipient;
+        const TActorId Sender;
+        const ui64 Cookie;
+        const TScopeId OriginScopeId = TScopeId::LocallyGenerated; // filled in when the message is received from Interconnect
+
+        // if set, used by ActorSystem/Interconnect to report tracepoints
+        NWilson::TTraceId TraceId;
+
+        // filled if feeded by interconnect session
+        const TActorId InterconnectSession;
+
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+        ::NHPTimer::STime SendTime;
+#endif
+
+        static const size_t ChannelBits = 12;
+        static const size_t ChannelShift = (sizeof(ui32) << 3) - ChannelBits;
+
+#ifdef USE_ACTOR_CALLSTACK
+        TCallstack Callstack;
+#endif
+        ui16 GetChannel() const noexcept {
+            return Flags >> ChannelShift;
+        }
+
+        ui64 GetSubChannel() const noexcept {
+            return Flags & FlagUseSubChannel ? Sender.LocalId() : 0ULL;
+        }
+
+        static ui32 MakeFlags(ui32 channel, ui32 flags) {
+            Y_VERIFY(channel < (1 << ChannelBits));
+            Y_VERIFY(flags < (1 << ChannelShift));
+            return (flags | (channel << ChannelShift));
+        }
+
+    private:
+        THolder<IEventBase> Event;
+        TIntrusivePtr<TEventSerializedData> Buffer;
+
+        TActorId RewriteRecipient;
+        ui32 RewriteType;
+
+        THolder<TOnNondelivery> OnNondeliveryHolder; // only for local events
+
+    public:
+        void Rewrite(ui32 typeRewrite, TActorId recipientRewrite) {
+            RewriteRecipient = recipientRewrite;
+            RewriteType = typeRewrite;
+        }
+
+        void DropRewrite() {
+            RewriteRecipient = Recipient;
+            RewriteType = Type;
+        }
+
+        const TActorId& GetRecipientRewrite() const {
+            return RewriteRecipient;
+        }
+
+        ui32 GetTypeRewrite() const {
+            return RewriteType;
+        }
+
+        TActorId GetForwardOnNondeliveryRecipient() const {
+            return OnNondeliveryHolder.Get() ? OnNondeliveryHolder->Recipient : TActorId();
+        }
+
+        IEventHandle(const TActorId& recipient, const TActorId& sender, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0,
+                     const TActorId* forwardOnNondelivery = nullptr, NWilson::TTraceId traceId = {})
+            : Type(ev->Type())
+            , Flags(flags)
+            , Recipient(recipient)
+            , Sender(sender)
+            , Cookie(cookie)
+            , TraceId(std::move(traceId))
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+            , SendTime(0)
+#endif
+            , Event(ev)
+            , RewriteRecipient(Recipient)
+            , RewriteType(Type)
+        {
+            if (forwardOnNondelivery)
+                OnNondeliveryHolder.Reset(new TOnNondelivery(*forwardOnNondelivery));
+        }
+
+        IEventHandle(ui32 type,
+                     ui32 flags,
+                     const TActorId& recipient,
+                     const TActorId& sender,
+                     TIntrusivePtr<TEventSerializedData> buffer,
+                     ui64 cookie,
+                     const TActorId* forwardOnNondelivery = nullptr,
+                     NWilson::TTraceId traceId = {})
+            : Type(type)
+            , Flags(flags)
+            , Recipient(recipient)
+            , Sender(sender)
+            , Cookie(cookie)
+            , TraceId(std::move(traceId))
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+            , SendTime(0)
+#endif
+            , Buffer(std::move(buffer))
+            , RewriteRecipient(Recipient)
+            , RewriteType(Type)
+        {
+            if (forwardOnNondelivery)
+                OnNondeliveryHolder.Reset(new TOnNondelivery(*forwardOnNondelivery));
+        }
+
+        // Special ctor for events from interconnect.
+        IEventHandle(const TActorId& session,
+                     ui32 type,
+                     ui32 flags,
+                     const TActorId& recipient,
+                     const TActorId& sender,
+                     TIntrusivePtr<TEventSerializedData> buffer,
+                     ui64 cookie,
+                     TScopeId originScopeId,
+                     NWilson::TTraceId traceId) noexcept
+            : Type(type)
+            , Flags(flags)
+            , Recipient(recipient)
+            , Sender(sender)
+            , Cookie(cookie)
+            , OriginScopeId(originScopeId)
+            , TraceId(std::move(traceId))
+            , InterconnectSession(session)
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+            , SendTime(0)
+#endif
+            , Buffer(std::move(buffer))
+            , RewriteRecipient(Recipient)
+            , RewriteType(Type)
+        {
+        }
+
+        TIntrusivePtr<TEventSerializedData> GetChainBuffer();
+        TIntrusivePtr<TEventSerializedData> ReleaseChainBuffer();
+
+        ui32 GetSize() const {
+            if (Buffer) {
+                return Buffer->GetSize();
+            } else if (Event) {
+                return Event->CalculateSerializedSize();
+            } else {
+                return 0;
+            }
+        }
+
+        bool HasBuffer() const {
+            return bool(Buffer);
+        }
+
+        bool HasEvent() const {
+            return bool(Event);
+        }
+
+        IEventBase* GetBase() {
+            if (!Event) {
+                if (!Buffer)
+                    return nullptr;
+                else
+                    ythrow TWithBackTrace<yexception>() << "don't know how to load the event from buffer";
+            }
+
+            return Event.Get();
+        }
+
+        TAutoPtr<IEventBase> ReleaseBase() {
+            TAutoPtr<IEventBase> x = GetBase();
+            Y_UNUSED(Event.Release());
+            Buffer.Reset();
+            return x;
+        }
+
+        TAutoPtr<IEventHandle> Forward(const TActorId& dest) {
+            if (Event)
+                return new IEventHandle(dest, Sender, Event.Release(), Flags, Cookie, nullptr, std::move(TraceId));
+            else
+                return new IEventHandle(Type, Flags, dest, Sender, Buffer, Cookie, nullptr, std::move(TraceId));
+        }
+
+        TAutoPtr<IEventHandle> ForwardOnNondelivery(ui32 reason, bool unsure = false);
+    };
+
+    template <typename TEventType>
+    class TEventHandle: public IEventHandle {
+        TEventHandle(); // we never made instance of TEventHandle
+    public:
+        TEventType* Get() {
+            return IEventHandle::Get<TEventType>();
+        }
+
+        TAutoPtr<TEventType> Release() {
+            return IEventHandle::Release<TEventType>();
+        }
+    };
+
+    static_assert(sizeof(TEventHandle<IEventBase>) == sizeof(IEventHandle), "expect sizeof(TEventHandle<IEventBase>) == sizeof(IEventHandle)");
+
+    template <typename TEventType, ui32 EventType0>
+    class TEventBase: public IEventBase {
+    public:
+        static constexpr ui32 EventType = EventType0;
+        ui32 Type() const override {
+            return EventType0;
+        }
+        // still abstract
+
+        typedef TEventHandle<TEventType> THandle;
+        typedef TAutoPtr<THandle> TPtr;
+    };
+
+#define DEFINE_SIMPLE_LOCAL_EVENT(eventType, header)                    \
+    TString ToStringHeader() const override {                           \
+        return TString(header);                                         \
+    }                                                                   \
+    bool SerializeToArcadiaStream(NActors::TChunkSerializer*) const override { \
+        Y_FAIL("Local event " #eventType " is not serializable");       \
+    }                                                                   \
+    static IEventBase* Load(NActors::TEventSerializedData*) {           \
+        Y_FAIL("Local event " #eventType " has no load method");        \
+    }                                                                   \
+    bool IsSerializable() const override {                              \
+        return false;                                                   \
+    }
+
+#define DEFINE_SIMPLE_NONLOCAL_EVENT(eventType, header)                 \
+    TString ToStringHeader() const override {                           \
+        return TString(header);                                         \
+    }                                                                   \
+    bool SerializeToArcadiaStream(NActors::TChunkSerializer*) const override { \
+        return true;                                                    \
+    }                                                                   \
+    static IEventBase* Load(NActors::TEventSerializedData*) {           \
+        return new eventType();                                         \
+    }                                                                   \
+    bool IsSerializable() const override {                              \
+        return true;                                                    \
+    }
+}
diff --git a/library/cpp/actors/core/event_load.h b/library/cpp/actors/core/event_load.h
new file mode 100644
index 0000000000..0dab1dd374
--- /dev/null
+++ b/library/cpp/actors/core/event_load.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include <util/stream/walk.h>
+#include <util/system/types.h>
+#include <util/generic/string.h>
+#include <library/cpp/actors/util/rope.h>
+#include <library/cpp/actors/wilson/wilson_trace.h>
+
+namespace NActors {
+    class IEventHandle;
+
+    struct TConstIoVec {
+        const void* Data;
+        size_t Size;
+    };
+
+    struct TIoVec {
+        void* Data;
+        size_t Size;
+    };
+
+    class TEventSerializedData
+       : public TThrRefBase
+    {
+        TRope Rope;
+        bool ExtendedFormat = false;
+
+    public:
+        TEventSerializedData() = default;
+
+        TEventSerializedData(TRope&& rope, bool extendedFormat)
+            : Rope(std::move(rope))
+            , ExtendedFormat(extendedFormat)
+        {}
+
+        TEventSerializedData(const TEventSerializedData& original, TString extraBuffer)
+            : Rope(original.Rope)
+            , ExtendedFormat(original.ExtendedFormat)
+        {
+            Append(std::move(extraBuffer));
+        }
+
+        TEventSerializedData(TString buffer, bool extendedFormat)
+            : ExtendedFormat(extendedFormat)
+        {
+            Append(std::move(buffer));
+        }
+
+        void SetExtendedFormat() {
+            ExtendedFormat = true;
+        }
+
+        bool IsExtendedFormat() const {
+            return ExtendedFormat;
+        }
+
+        TRope::TConstIterator GetBeginIter() const {
+            return Rope.Begin();
+        }
+
+        size_t GetSize() const {
+            return Rope.GetSize();
+        }
+
+        TString GetString() const {
+            TString result;
+            result.reserve(GetSize());
+            for (auto it = Rope.Begin(); it.Valid(); it.AdvanceToNextContiguousBlock()) {
+                result.append(it.ContiguousData(), it.ContiguousSize());
+            }
+            return result;
+        }
+
+        TRope EraseBack(size_t count) {
+            Y_VERIFY(count <= Rope.GetSize());
+            TRope::TIterator iter = Rope.End();
+            iter -= count;
+            return Rope.Extract(iter, Rope.End());
+        }
+
+        void Append(TRope&& from) {
+            Rope.Insert(Rope.End(), std::move(from));
+        }
+
+        void Append(TString buffer) {
+            if (buffer) {
+                Rope.Insert(Rope.End(), TRope(std::move(buffer)));
+            }
+        }
+    };
+}
+
+class TChainBufWalk : public IWalkInput {
+    TIntrusivePtr<NActors::TEventSerializedData> Buffer;
+    TRope::TConstIterator Iter;
+
+public:
+    TChainBufWalk(TIntrusivePtr<NActors::TEventSerializedData> buffer)
+        : Buffer(std::move(buffer))
+        , Iter(Buffer->GetBeginIter())
+    {}
+
+private:
+    size_t DoUnboundedNext(const void **ptr) override {
+        const size_t size = Iter.ContiguousSize();
+        *ptr = Iter.ContiguousData();
+        if (Iter.Valid()) {
+            Iter.AdvanceToNextContiguousBlock();
+        }
+        return size;
+    }
+};
diff --git a/library/cpp/actors/core/event_local.h b/library/cpp/actors/core/event_local.h
new file mode 100644
index 0000000000..2845aa94dd
--- /dev/null
+++ b/library/cpp/actors/core/event_local.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "event.h"
+#include "scheduler_cookie.h"
+#include "event_load.h"
+#include <util/system/type_name.h>
+
+namespace NActors {
+    template <typename TEv, ui32 TEventType>
+    class TEventLocal: public TEventBase<TEv, TEventType> {
+    public:
+        TString ToStringHeader() const override {
+            return TypeName<TEv>();
+        }
+
+        bool SerializeToArcadiaStream(TChunkSerializer* /*serializer*/) const override {
+            Y_FAIL("Serialization of local event %s type %" PRIu32, TypeName<TEv>().data(), TEventType);
+        }
+
+        bool IsSerializable() const override {
+            return false;
+        }
+
+        static IEventBase* Load(TEventSerializedData*) {
+            Y_FAIL("Loading of local event %s type %" PRIu32, TypeName<TEv>().data(), TEventType);
+        }
+    };
+
+    template <typename TEv, ui32 TEventType>
+    class TEventScheduler: public TEventLocal<TEv, TEventType> {
+    public:
+        TSchedulerCookieHolder Cookie;
+
+        TEventScheduler(ISchedulerCookie* cookie)
+            : Cookie(cookie)
+        {
+        }
+    };
+
+    template <ui32 TEventType>
+    class TEventSchedulerEv: public TEventScheduler<TEventSchedulerEv<TEventType>, TEventType> {
+    public:
+        TEventSchedulerEv(ISchedulerCookie* cookie)
+            : TEventScheduler<TEventSchedulerEv<TEventType>, TEventType>(cookie)
+        {
+        }
+    };
+
+    template <typename TEv, ui32 TEventType>
+    class TEventSimple: public TEventBase<TEv, TEventType> {
+    public:
+        TString ToStringHeader() const override {
+            static TString header(TypeName<TEv>());
+            return header;
+        }
+
+        bool SerializeToArcadiaStream(TChunkSerializer* /*serializer*/) const override {
+            static_assert(sizeof(TEv) == sizeof(TEventSimple<TEv, TEventType>), "Descendant should be an empty class");
+            return true;
+        }
+
+        bool IsSerializable() const override {
+            return true;
+        }
+
+        static IEventBase* Load(NActors::TEventSerializedData*) {
+            return new TEv();
+        }
+
+        static IEventBase* Load(const TString&) {
+            return new TEv();
+        }
+    };
+}
diff --git a/library/cpp/actors/core/event_pb.cpp b/library/cpp/actors/core/event_pb.cpp
new file mode 100644
index 0000000000..018ff9ac34
--- /dev/null
+++ b/library/cpp/actors/core/event_pb.cpp
@@ -0,0 +1,223 @@
+#include "event_pb.h"
+
+namespace NActors {
+    bool TRopeStream::Next(const void** data, int* size) {
+        *data = Iter.ContiguousData();
+        *size = Iter.ContiguousSize();
+        if (size_t(*size + TotalByteCount) > Size) {
+            *size = Size - TotalByteCount;
+            Iter += *size;
+        } else if (Iter.Valid()) {
+            Iter.AdvanceToNextContiguousBlock();
+        }
+        TotalByteCount += *size;
+        return *size != 0;
+    }
+
+    void TRopeStream::BackUp(int count) {
+        Y_VERIFY(count <= TotalByteCount);
+        Iter -= count;
+        TotalByteCount -= count;
+    }
+
+    bool TRopeStream::Skip(int count) {
+        if (static_cast<size_t>(TotalByteCount + count) > Size) {
+            count = Size - TotalByteCount;
+        }
+        Iter += count;
+        TotalByteCount += count;
+        return static_cast<size_t>(TotalByteCount) != Size;
+    }
+
+    TCoroutineChunkSerializer::TCoroutineChunkSerializer()
+        : TotalSerializedDataSize(0)
+        , Stack(64 * 1024)
+        , SelfClosure{this, TArrayRef(Stack.Begin(), Stack.End())}
+        , InnerContext(SelfClosure)
+    {}
+
+    TCoroutineChunkSerializer::~TCoroutineChunkSerializer() {
+        CancelFlag = true;
+        Resume();
+        Y_VERIFY(Finished);
+    }
+
+    bool TCoroutineChunkSerializer::AllowsAliasing() const {
+        return true;
+    }
+
+    bool TCoroutineChunkSerializer::Produce(const void *data, size_t size) {
+        Y_VERIFY(size <= SizeRemain);
+        SizeRemain -= size;
+        TotalSerializedDataSize += size;
+
+        if (NumChunks) {
+            auto& last = Chunks[NumChunks - 1];
+            if (last.first + last.second == data) {
+                last.second += size; // just extend the last buffer
+                return true;
+            }
+        }
+
+        if (NumChunks == MaxChunks) {
+            InnerContext.SwitchTo(BufFeedContext);
+            if (CancelFlag || AbortFlag) {
+                return false;
+            }
+        }
+
+        Y_VERIFY(NumChunks < MaxChunks);
+        Chunks[NumChunks++] = {static_cast<const char*>(data), size};
+        return true;
+    }
+
+    bool TCoroutineChunkSerializer::WriteAliasedRaw(const void* data, int size) {
+        Y_VERIFY(size >= 0);
+        while (size) {
+            if (CancelFlag || AbortFlag) {
+                return false;
+            } else if (const size_t bytesToAppend = Min<size_t>(size, SizeRemain)) {
+                if (!Produce(data, bytesToAppend)) {
+                    return false;
+                }
+                data = static_cast<const char*>(data) + bytesToAppend;
+                size -= bytesToAppend;
+            } else {
+                InnerContext.SwitchTo(BufFeedContext);
+            }
+        }
+        return true;
+    }
+
+    bool TCoroutineChunkSerializer::Next(void** data, int* size) {
+        if (CancelFlag || AbortFlag) {
+            return false;
+        }
+        if (!SizeRemain) {
+            InnerContext.SwitchTo(BufFeedContext);
+            if (CancelFlag || AbortFlag) {
+                return false;
+            }
+        }
+        Y_VERIFY(SizeRemain);
+        *data = BufferPtr;
+        *size = SizeRemain;
+        BufferPtr += SizeRemain;
+        return Produce(*data, *size);
+    }
+
+    void TCoroutineChunkSerializer::BackUp(int count) {
+        if (!count) {
+            return;
+        }
+        Y_VERIFY(count > 0);
+        Y_VERIFY(NumChunks);
+        TChunk& buf = Chunks[NumChunks - 1];
+        Y_VERIFY((size_t)count <= buf.second);
+        Y_VERIFY(buf.first + buf.second == BufferPtr);
+        buf.second -= count;
+        if (!buf.second) {
+            --NumChunks;
+        }
+        BufferPtr -= count;
+        SizeRemain += count;
+        TotalSerializedDataSize -= count;
+    }
+
+    void TCoroutineChunkSerializer::Resume() {
+        TContMachineContext feedContext;
+        BufFeedContext = &feedContext;
+        feedContext.SwitchTo(&InnerContext);
+        BufFeedContext = nullptr;
+    }
+
+    bool TCoroutineChunkSerializer::WriteRope(const TRope *rope) {
+        for (auto iter = rope->Begin(); iter.Valid(); iter.AdvanceToNextContiguousBlock()) {
+            if (!WriteAliasedRaw(iter.ContiguousData(), iter.ContiguousSize())) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    bool TCoroutineChunkSerializer::WriteString(const TString *s) {
+        return WriteAliasedRaw(s->data(), s->length());
+    }
+
+    std::pair<TCoroutineChunkSerializer::TChunk*, TCoroutineChunkSerializer::TChunk*> TCoroutineChunkSerializer::FeedBuf(void* data, size_t size) {
+        // fill in base params
+        BufferPtr = static_cast<char*>(data);
+        SizeRemain = size;
+
+        // transfer control to the coroutine
+        Y_VERIFY(Event);
+        NumChunks = 0;
+        Resume();
+
+        return {Chunks, Chunks + NumChunks};
+    }
+
+    void TCoroutineChunkSerializer::SetSerializingEvent(const IEventBase *event) {
+        Y_VERIFY(Event == nullptr);
+        Event = event;
+        TotalSerializedDataSize = 0;
+        AbortFlag = false;
+    }
+
+    void TCoroutineChunkSerializer::Abort() {
+        Y_VERIFY(Event);
+        AbortFlag = true;
+        Resume();
+    }
+
+    void TCoroutineChunkSerializer::DoRun() {
+        while (!CancelFlag) {
+            Y_VERIFY(Event);
+            SerializationSuccess = Event->SerializeToArcadiaStream(this);
+            Event = nullptr;
+            if (!CancelFlag) { // cancel flag may have been received during serialization
+                InnerContext.SwitchTo(BufFeedContext);
+            }
+        }
+        Finished = true;
+        InnerContext.SwitchTo(BufFeedContext);
+    }
+
+    bool TAllocChunkSerializer::Next(void** pdata, int* psize) {
+        if (Backup) {
+            // we have some data in backup rope -- move the first chunk from the backup rope to the buffer and return
+            // pointer to the buffer; it is safe to remove 'const' here as we uniquely own this buffer
+            TRope::TIterator iter = Backup.Begin();
+            *pdata = const_cast<char*>(iter.ContiguousData());
+            *psize = iter.ContiguousSize();
+            iter.AdvanceToNextContiguousBlock();
+            Buffers->Append(Backup.Extract(Backup.Begin(), iter));
+        } else {
+            // no backup buffer, so we have to create new one
+            auto item = TRopeAlignedBuffer::Allocate(4096);
+            *pdata = item->GetBuffer();
+            *psize = item->GetCapacity();
+            Buffers->Append(TRope(std::move(item)));
+        }
+        return true;
+    }
+
+    void TAllocChunkSerializer::BackUp(int count) {
+        Backup.Insert(Backup.Begin(), Buffers->EraseBack(count));
+    }
+
+    bool TAllocChunkSerializer::WriteAliasedRaw(const void*, int) {
+        Y_VERIFY(false);
+        return false;
+    }
+
+    bool TAllocChunkSerializer::WriteRope(const TRope *rope) {
+        Buffers->Append(TRope(*rope));
+        return true;
+    }
+
+    bool TAllocChunkSerializer::WriteString(const TString *s) {
+        Buffers->Append(*s);
+        return true;
+    }
+}
diff --git a/library/cpp/actors/core/event_pb.h b/library/cpp/actors/core/event_pb.h
new file mode 100644
index 0000000000..d7546b901a
--- /dev/null
+++ b/library/cpp/actors/core/event_pb.h
@@ -0,0 +1,500 @@
+#pragma once
+
+#include "event.h"
+#include "event_load.h"
+
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/arena.h>
+#include <library/cpp/actors/protos/actors.pb.h>
+#include <util/generic/deque.h>
+#include <util/system/context.h>
+#include <util/system/filemap.h>
+#include <array>
+
+namespace NActors {
+
+    class TRopeStream : public NProtoBuf::io::ZeroCopyInputStream {
+        TRope::TConstIterator Iter;
+        const size_t Size;
+
+    public:
+        TRopeStream(TRope::TConstIterator iter, size_t size)
+            : Iter(iter)
+            , Size(size)
+        {}
+
+        bool Next(const void** data, int* size) override;
+        void BackUp(int count) override;
+        bool Skip(int count) override;
+        int64_t ByteCount() const override {
+            return TotalByteCount;
+        }
+
+    private:
+        int64_t TotalByteCount = 0;
+    };
+
+    class TChunkSerializer : public NProtoBuf::io::ZeroCopyOutputStream {
+    public:
+        TChunkSerializer() = default;
+        virtual ~TChunkSerializer() = default;
+
+        virtual bool WriteRope(const TRope *rope) = 0;
+        virtual bool WriteString(const TString *s) = 0;
+    };
+
+    class TAllocChunkSerializer final : public TChunkSerializer {
+    public:
+        bool Next(void** data, int* size) override;
+        void BackUp(int count) override;
+        int64_t ByteCount() const override {
+            return Buffers->GetSize();
+        }
+        bool WriteAliasedRaw(const void* data, int size) override;
+
+        // WARNING: these methods require owner to retain ownership and immutability of passed objects
+        bool WriteRope(const TRope *rope) override;
+        bool WriteString(const TString *s) override;
+
+        inline TIntrusivePtr<TEventSerializedData> Release(bool extendedFormat) {
+            if (extendedFormat) {
+                Buffers->SetExtendedFormat();
+            }
+            return std::move(Buffers);
+        }
+
+    protected:
+        TIntrusivePtr<TEventSerializedData> Buffers = new TEventSerializedData;
+        TRope Backup;
+    };
+
+    class TCoroutineChunkSerializer final : public TChunkSerializer, protected ITrampoLine {
+    public:
+        using TChunk = std::pair<const char*, size_t>;
+
+        TCoroutineChunkSerializer();
+        ~TCoroutineChunkSerializer();
+
+        void SetSerializingEvent(const IEventBase *event);
+        void Abort();
+        std::pair<TChunk*, TChunk*> FeedBuf(void* data, size_t size);
+        bool IsComplete() const {
+            return !Event;
+        }
+        bool IsSuccessfull() const {
+            return SerializationSuccess;
+        }
+        const IEventBase *GetCurrentEvent() const {
+            return Event;
+        }
+
+        bool Next(void** data, int* size) override;
+        void BackUp(int count) override;
+        int64_t ByteCount() const override {
+            return TotalSerializedDataSize;
+        }
+        bool WriteAliasedRaw(const void* data, int size) override;
+        bool AllowsAliasing() const override;
+
+        bool WriteRope(const TRope *rope) override;
+        bool WriteString(const TString *s) override;
+
+    protected:
+        void DoRun() override;
+        void Resume();
+        bool Produce(const void *data, size_t size);
+
+        i64 TotalSerializedDataSize;
+        TMappedAllocation Stack;
+        TContClosure SelfClosure;
+        TContMachineContext InnerContext;
+        TContMachineContext *BufFeedContext = nullptr;
+        char *BufferPtr;
+        size_t SizeRemain;
+        static constexpr size_t MaxChunks = 16;
+        TChunk Chunks[MaxChunks];
+        size_t NumChunks = 0;
+        const IEventBase *Event = nullptr;
+        bool CancelFlag = false;
+        bool AbortFlag;
+        bool SerializationSuccess;
+        bool Finished = false;
+    };
+
+#ifdef ACTORLIB_HUGE_PB_SIZE
+    static const size_t EventMaxByteSize = 140 << 20; // (140MB)
+#else
+    static const size_t EventMaxByteSize = 67108000;
+#endif
+
+    template <typename TEv, typename TRecord /*protobuf record*/, ui32 TEventType, typename TRecHolder>
+    class TEventPBBase: public TEventBase<TEv, TEventType> , public TRecHolder {
+        // a vector of data buffers referenced by record; if filled, then extended serialization mechanism applies
+        TVector<TRope> Payload;
+
+    public:
+        using TRecHolder::Record;
+
+    public:
+        using ProtoRecordType = TRecord;
+
+        TEventPBBase() = default;
+
+        explicit TEventPBBase(const TRecord& rec)
+        {
+            Record = rec;
+        }
+
+        explicit TEventPBBase(TRecord&& rec)
+        {
+            Record = std::move(rec);
+        }
+
+        TString ToStringHeader() const override {
+            return Record.GetTypeName();
+        }
+
+        TString ToString() const override {
+            return Record.ShortDebugString();
+        }
+
+        bool IsSerializable() const override {
+            return true;
+        }
+
+        bool IsExtendedFormat() const override {
+            return static_cast<bool>(Payload);
+        }
+
+        bool SerializeToArcadiaStream(TChunkSerializer* chunker) const override {
+            // serialize payload first
+            if (Payload) {
+                void *data;
+                int size = 0;
+                auto append = [&](const char *p, size_t len) {
+                    while (len) {
+                        if (size) {
+                            const size_t numBytesToCopy = std::min<size_t>(size, len);
+                            memcpy(data, p, numBytesToCopy);
+                            data = static_cast<char*>(data) + numBytesToCopy;
+                            size -= numBytesToCopy;
+                            p += numBytesToCopy;
+                            len -= numBytesToCopy;
+                        } else if (!chunker->Next(&data, &size)) {
+                            return false;
+                        }
+                    }
+                    return true;
+                };
+                auto appendNumber = [&](size_t number) {
+                    char buf[MaxNumberBytes];
+                    return append(buf, SerializeNumber(number, buf));
+                };
+                char marker = PayloadMarker;
+                append(&marker, 1);
+                if (!appendNumber(Payload.size())) {
+                    return false;
+                }
+                for (const TRope& rope : Payload) {
+                    if (!appendNumber(rope.GetSize())) {
+                        return false;
+                    }
+                    if (rope) {
+                        if (size) {
+                            chunker->BackUp(std::exchange(size, 0));
+                        }
+                        if (!chunker->WriteRope(&rope)) {
+                            return false;
+                        }
+                    }
+                }
+                if (size) {
+                    chunker->BackUp(size);
+                }
+            }
+
+            return Record.SerializeToZeroCopyStream(chunker);
+        }
+
+        ui32 CalculateSerializedSize() const override {
+            ssize_t result = Record.ByteSize();
+            if (result >= 0 && Payload) {
+                ++result; // marker
+                char buf[MaxNumberBytes];
+                result += SerializeNumber(Payload.size(), buf);
+                for (const TRope& rope : Payload) {
+                    result += SerializeNumber(rope.GetSize(), buf);
+                    result += rope.GetSize();
+                }
+            }
+            return result;
+        }
+
+        static IEventBase* Load(TIntrusivePtr<TEventSerializedData> input) {
+            THolder<TEventPBBase> ev(new TEv());
+            if (!input->GetSize()) {
+                Y_PROTOBUF_SUPPRESS_NODISCARD ev->Record.ParseFromString(TString());
+            } else {
+                TRope::TConstIterator iter = input->GetBeginIter();
+                ui64 size = input->GetSize();
+
+                if (input->IsExtendedFormat()) {
+                    // check marker
+                    if (!iter.Valid() || *iter.ContiguousData() != PayloadMarker) {
+                        Y_FAIL("invalid event");
+                    }
+                    // skip marker
+                    iter += 1;
+                    --size;
+                    // parse number of payload ropes
+                    size_t numRopes = DeserializeNumber(iter, size);
+                    if (numRopes == Max<size_t>()) {
+                        Y_FAIL("invalid event");
+                    }
+                    while (numRopes--) {
+                        // parse length of the rope
+                        const size_t len = DeserializeNumber(iter, size);
+                        if (len == Max<size_t>() || size < len) {
+                            Y_FAIL("invalid event len# %zu size# %" PRIu64, len, size);
+                        }
+                        // extract the rope
+                        TRope::TConstIterator begin = iter;
+                        iter += len;
+                        size -= len;
+                        ev->Payload.emplace_back(begin, iter);
+                    }
+                }
+
+                // parse the protobuf
+                TRopeStream stream(iter, size);
+                if (!ev->Record.ParseFromZeroCopyStream(&stream)) {
+                    Y_FAIL("Failed to parse protobuf event type %" PRIu32 " class %s", TEventType, TypeName(ev->Record).data());
+                }
+            }
+            ev->CachedByteSize = input->GetSize();
+            return ev.Release();
+        }
+
+        size_t GetCachedByteSize() const {
+            if (CachedByteSize == 0) {
+                CachedByteSize = CalculateSerializedSize();
+            }
+            return CachedByteSize;
+        }
+
+        ui32 CalculateSerializedSizeCached() const override {
+            return GetCachedByteSize();
+        }
+
+        void InvalidateCachedByteSize() {
+            CachedByteSize = 0;
+        }
+
+    public:
+        void ReservePayload(size_t size) {
+            Payload.reserve(size);
+        }
+
+        ui32 AddPayload(TRope&& rope) {
+            const ui32 id = Payload.size();
+            Payload.push_back(std::move(rope));
+            InvalidateCachedByteSize();
+            return id;
+        }
+
+        const TRope& GetPayload(ui32 id) const {
+            Y_VERIFY(id < Payload.size());
+            return Payload[id];
+        }
+
+        ui32 GetPayloadCount() const {
+            return Payload.size();
+        }
+
+        void StripPayload() {
+            Payload.clear();
+        }
+
+    protected:
+        mutable size_t CachedByteSize = 0;
+
+        static constexpr char PayloadMarker = 0x07;
+        static constexpr size_t MaxNumberBytes = (sizeof(size_t) * CHAR_BIT + 6) / 7;
+
+        static size_t SerializeNumber(size_t num, char *buffer) {
+            char *begin = buffer;
+            do {
+                *buffer++ = (num & 0x7F) | (num >= 128 ? 0x80 : 0x00);
+                num >>= 7;
+            } while (num);
+            return buffer - begin;
+        }
+
+        static size_t DeserializeNumber(const char **ptr, const char *end) {
+            const char *p = *ptr;
+            size_t res = 0;
+            size_t offset = 0;
+            for (;;) {
+                if (p == end) {
+                    return Max<size_t>();
+                }
+                const char byte = *p++;
+                res |= (static_cast<size_t>(byte) & 0x7F) << offset;
+                offset += 7;
+                if (!(byte & 0x80)) {
+                    break;
+                }
+            }
+            *ptr = p;
+            return res;
+        }
+
+        static size_t DeserializeNumber(TRope::TConstIterator& iter, ui64& size) {
+            size_t res = 0;
+            size_t offset = 0;
+            for (;;) {
+                if (!iter.Valid()) {
+                    return Max<size_t>();
+                }
+                const char byte = *iter.ContiguousData();
+                iter += 1;
+                --size;
+                res |= (static_cast<size_t>(byte) & 0x7F) << offset;
+                offset += 7;
+                if (!(byte & 0x80)) {
+                    break;
+                }
+            }
+            return res;
+        }
+    };
+
+    // Protobuf record not using arena
+    template <typename TRecord>
+    struct TRecordHolder {
+        TRecord Record;
+    };
+
+    // Protobuf arena and a record allocated on it
+    template <typename TRecord, size_t InitialBlockSize, size_t MaxBlockSize>
+    struct TArenaRecordHolder {
+        google::protobuf::Arena PbArena;
+        TRecord& Record;
+
+        static const google::protobuf::ArenaOptions GetArenaOptions() {
+            google::protobuf::ArenaOptions opts;
+            opts.initial_block_size = InitialBlockSize;
+            opts.max_block_size = MaxBlockSize;
+            return opts;
+        }
+
+        TArenaRecordHolder()
+            : PbArena(GetArenaOptions())
+            , Record(*google::protobuf::Arena::CreateMessage<TRecord>(&PbArena))
+        {}
+    };
+
+    template <typename TEv, typename TRecord, ui32 TEventType>
+    class TEventPB : public TEventPBBase<TEv, TRecord, TEventType, TRecordHolder<TRecord> > {
+        typedef TEventPBBase<TEv, TRecord, TEventType, TRecordHolder<TRecord> > TPbBase;
+        // NOTE: No extra fields allowed: TEventPB must be a "template typedef"
+    public:
+        using TPbBase::TPbBase;
+    };
+
+    template <typename TEv, typename TRecord, ui32 TEventType, size_t InitialBlockSize = 512, size_t MaxBlockSize = 16*1024>
+    using TEventPBWithArena = TEventPBBase<TEv, TRecord, TEventType, TArenaRecordHolder<TRecord, InitialBlockSize, MaxBlockSize> >;
+
+    template <typename TEv, typename TRecord, ui32 TEventType>
+    class TEventShortDebugPB: public TEventPB<TEv, TRecord, TEventType> {
+    public:
+        using TBase = TEventPB<TEv, TRecord, TEventType>;
+        TEventShortDebugPB() = default;
+        explicit TEventShortDebugPB(const TRecord& rec)
+            : TBase(rec)
+        {
+        }
+        explicit TEventShortDebugPB(TRecord&& rec)
+            : TBase(std::move(rec))
+        {
+        }
+        TString ToString() const override {
+            return TypeName<TEv>() + " { " + TBase::Record.ShortDebugString() + " }";
+        }
+    };
+
+    template <typename TEv, typename TRecord, ui32 TEventType>
+    class TEventPreSerializedPB: public TEventPB<TEv, TRecord, TEventType> {
+    protected:
+        using TBase = TEventPB<TEv, TRecord, TEventType>;
+        using TSelf = TEventPreSerializedPB<TEv, TRecord, TEventType>;
+        using TBase::Record;
+
+    public:
+        TString PreSerializedData; // already serialized PB data (using message::SerializeToString)
+
+        TEventPreSerializedPB() = default;
+
+        explicit TEventPreSerializedPB(const TRecord& rec)
+            : TBase(rec)
+        {
+        }
+
+        explicit TEventPreSerializedPB(TRecord&& rec)
+            : TBase(std::move(rec))
+        {
+        }
+
+        // when remote event received locally this method will merge preserialized data
+        const TRecord& GetRecord() {
+            TRecord& base(TBase::Record);
+            if (!PreSerializedData.empty()) {
+                TRecord copy;
+                Y_PROTOBUF_SUPPRESS_NODISCARD copy.ParseFromString(PreSerializedData);
+                copy.MergeFrom(base);
+                base.Swap(&copy);
+                PreSerializedData.clear();
+            }
+            return TBase::Record;
+        }
+
+        const TRecord& GetRecord() const {
+            return const_cast<TSelf*>(this)->GetRecord();
+        }
+
+        TRecord* MutableRecord() {
+            GetRecord(); // Make sure PreSerializedData is parsed
+            return &(TBase::Record);
+        }
+
+        TString ToString() const override {
+            return GetRecord().ShortDebugString();
+        }
+
+        bool SerializeToArcadiaStream(TChunkSerializer* chunker) const override {
+            return chunker->WriteString(&PreSerializedData) && TBase::SerializeToArcadiaStream(chunker);
+        }
+
+        ui32 CalculateSerializedSize() const override {
+            return PreSerializedData.size() + TBase::CalculateSerializedSize();
+        }
+
+        size_t GetCachedByteSize() const {
+            return PreSerializedData.size() + TBase::GetCachedByteSize();
+        }
+
+        ui32 CalculateSerializedSizeCached() const override {
+            return GetCachedByteSize();
+        }
+    };
+
+    inline TActorId ActorIdFromProto(const NActorsProto::TActorId& actorId) {
+        return TActorId(actorId.GetRawX1(), actorId.GetRawX2());
+    }
+
+    inline void ActorIdToProto(const TActorId& src, NActorsProto::TActorId* dest) {
+        Y_VERIFY_DEBUG(dest);
+        dest->SetRawX1(src.RawX1());
+        dest->SetRawX2(src.RawX2());
+    }
+}
diff --git a/library/cpp/actors/core/event_pb_payload_ut.cpp b/library/cpp/actors/core/event_pb_payload_ut.cpp
new file mode 100644
index 0000000000..eab007bc15
--- /dev/null
+++ b/library/cpp/actors/core/event_pb_payload_ut.cpp
@@ -0,0 +1,154 @@
+#include "event_pb.h"
+#include "events.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/protos/unittests.pb.h>
+
+using namespace NActors;
+
+enum {
+    EvMessageWithPayload = EventSpaceBegin(TEvents::ES_PRIVATE),
+    EvArenaMessage,
+    EvArenaMessageBig,
+    EvMessageWithPayloadPreSerialized
+};
+
+struct TEvMessageWithPayload : TEventPB<TEvMessageWithPayload, TMessageWithPayload, EvMessageWithPayload> {
+    TEvMessageWithPayload() = default;
+    explicit TEvMessageWithPayload(const TMessageWithPayload& p)
+        : TEventPB<TEvMessageWithPayload, TMessageWithPayload, EvMessageWithPayload>(p)
+    {}
+};
+
+struct TEvMessageWithPayloadPreSerialized : TEventPreSerializedPB<TEvMessageWithPayloadPreSerialized, TMessageWithPayload, EvMessageWithPayloadPreSerialized> {
+};
+
+
+TRope MakeStringRope(const TString& message) {
+    return message ? TRope(message) : TRope();
+}
+
+TString MakeString(size_t len) {
+    TString res;
+    for (size_t i = 0; i < len; ++i) {
+        res += RandomNumber<char>();
+    }
+    return res;
+}
+
+Y_UNIT_TEST_SUITE(TEventProtoWithPayload) {
+
+    template <class TEventFrom, class TEventTo>
+    void TestSerializeDeserialize(size_t size1, size_t size2) {
+        static_assert(TEventFrom::EventType == TEventTo::EventType, "Must be same event type");
+
+        TEventFrom msg;
+        msg.Record.SetMeta("hello, world!");
+        msg.Record.AddPayloadId(msg.AddPayload(MakeStringRope(MakeString(size1))));
+        msg.Record.AddPayloadId(msg.AddPayload(MakeStringRope(MakeString(size2))));
+        msg.Record.AddSomeData(MakeString((size1 + size2) % 50 + 11));
+
+        auto serializer = MakeHolder<TAllocChunkSerializer>();
+        msg.SerializeToArcadiaStream(serializer.Get());
+        auto buffers = serializer->Release(msg.IsExtendedFormat());
+        UNIT_ASSERT_VALUES_EQUAL(buffers->GetSize(), msg.CalculateSerializedSize());
+        TString ser = buffers->GetString();
+
+        TString chunkerRes;
+        TCoroutineChunkSerializer chunker;
+        chunker.SetSerializingEvent(&msg);
+        while (!chunker.IsComplete()) {
+            char buffer[4096];
+            auto range = chunker.FeedBuf(buffer, sizeof(buffer));
+            for (auto p = range.first; p != range.second; ++p) {
+                chunkerRes += TString(p->first, p->second);
+            }
+        }
+        UNIT_ASSERT_VALUES_EQUAL(chunkerRes, ser);
+
+        THolder<IEventBase> ev2 = THolder(TEventTo::Load(buffers));
+        TEventTo& msg2 = static_cast<TEventTo&>(*ev2);
+        UNIT_ASSERT_VALUES_EQUAL(msg2.Record.GetMeta(), msg.Record.GetMeta());
+        UNIT_ASSERT_EQUAL(msg2.GetPayload(msg2.Record.GetPayloadId(0)), msg.GetPayload(msg.Record.GetPayloadId(0)));
+        UNIT_ASSERT_EQUAL(msg2.GetPayload(msg2.Record.GetPayloadId(1)), msg.GetPayload(msg.Record.GetPayloadId(1)));
+    }
+
+    template <class TEvent>
+    void TestAllSizes(size_t step1 = 100, size_t step2 = 111) {
+        for (size_t size1 = 0; size1 < 10000; size1 += step1) {
+            for (size_t size2 = 0; size2 < 10000; size2 += step2) {
+                TestSerializeDeserialize<TEvent, TEvent>(size1, size2);
+            }
+        }
+    }
+
+#if (!defined(_tsan_enabled_))
+    Y_UNIT_TEST(SerializeDeserialize) {
+        TestAllSizes<TEvMessageWithPayload>();
+    }
+#endif
+
+
+    struct TEvArenaMessage : TEventPBWithArena<TEvArenaMessage, TMessageWithPayload, EvArenaMessage> {
+    };
+
+    Y_UNIT_TEST(SerializeDeserializeArena) {
+        TestAllSizes<TEvArenaMessage>(500, 111);
+    }
+
+
+    struct TEvArenaMessageBig : TEventPBWithArena<TEvArenaMessageBig, TMessageWithPayload, EvArenaMessageBig, 4000, 32000> {
+    };
+
+    Y_UNIT_TEST(SerializeDeserializeArenaBig) {
+        TestAllSizes<TEvArenaMessageBig>(111, 500);
+    }
+
+
+    // Compatible with TEvArenaMessage but doesn't use arenas
+    struct TEvArenaMessageWithoutArena : TEventPB<TEvArenaMessageWithoutArena, TMessageWithPayload, EvArenaMessage> {
+    };
+
+    Y_UNIT_TEST(Compatibility) {
+        TestSerializeDeserialize<TEvArenaMessage, TEvArenaMessageWithoutArena>(200, 14010);
+        TestSerializeDeserialize<TEvArenaMessageWithoutArena, TEvArenaMessage>(2000, 4010);
+    }
+
+    Y_UNIT_TEST(PreSerializedCompatibility) {
+        // ensure TEventPreSerializedPB and TEventPB are interchangable with no compatibility issues
+        TMessageWithPayload msg;
+        msg.SetMeta("hello, world!");
+        msg.AddPayloadId(123);
+        msg.AddPayloadId(999);
+        msg.AddSomeData("abc");
+        msg.AddSomeData("xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz");
+
+        TEvMessageWithPayloadPreSerialized e1;
+        Y_PROTOBUF_SUPPRESS_NODISCARD msg.SerializeToString(&e1.PreSerializedData);
+
+        auto serializer1 = MakeHolder<TAllocChunkSerializer>();
+        e1.SerializeToArcadiaStream(serializer1.Get());
+        auto buffers1 = serializer1->Release(e1.IsExtendedFormat());
+        UNIT_ASSERT_VALUES_EQUAL(buffers1->GetSize(), e1.CalculateSerializedSize());
+        TString ser1 = buffers1->GetString();
+
+        TEvMessageWithPayload e2(msg);
+        auto serializer2 = MakeHolder<TAllocChunkSerializer>();
+        e2.SerializeToArcadiaStream(serializer2.Get());
+        auto buffers2 = serializer2->Release(e2.IsExtendedFormat());
+        UNIT_ASSERT_VALUES_EQUAL(buffers2->GetSize(), e2.CalculateSerializedSize());
+        TString ser2 = buffers2->GetString();
+        UNIT_ASSERT_VALUES_EQUAL(ser1, ser2);
+
+        // deserialize
+        auto data = MakeIntrusive<TEventSerializedData>(ser1, false);
+        THolder<TEvMessageWithPayloadPreSerialized> parsedEvent(static_cast<TEvMessageWithPayloadPreSerialized*>(TEvMessageWithPayloadPreSerialized::Load(data)));
+        UNIT_ASSERT_VALUES_EQUAL(parsedEvent->PreSerializedData, ""); // this field is empty after deserialization
+        auto& record = parsedEvent->GetRecord();
+        UNIT_ASSERT_VALUES_EQUAL(record.GetMeta(), msg.GetMeta());
+        UNIT_ASSERT_VALUES_EQUAL(record.PayloadIdSize(), msg.PayloadIdSize());
+        UNIT_ASSERT_VALUES_EQUAL(record.PayloadIdSize(), 2);
+        UNIT_ASSERT_VALUES_EQUAL(record.GetPayloadId(0), msg.GetPayloadId(0));
+        UNIT_ASSERT_VALUES_EQUAL(record.GetPayloadId(1), msg.GetPayloadId(1));
+    }
+}
diff --git a/library/cpp/actors/core/event_pb_ut.cpp b/library/cpp/actors/core/event_pb_ut.cpp
new file mode 100644
index 0000000000..a16c3092b3
--- /dev/null
+++ b/library/cpp/actors/core/event_pb_ut.cpp
@@ -0,0 +1,71 @@
+#include "event_pb.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/protos/unittests.pb.h>
+
+Y_UNIT_TEST_SUITE(TEventSerialization) {
+    struct TMockEvent: public NActors::IEventBase {
+        TBigMessage* msg;
+        bool
+        SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override {
+            return msg->SerializeToZeroCopyStream(chunker);
+        }
+        bool IsSerializable() const override {
+            return true;
+        }
+        TString ToStringHeader() const override {
+            return TString();
+        }
+        virtual TString Serialize() const {
+            return TString();
+        }
+        ui32 Type() const override {
+            return 0;
+        };
+    };
+
+    Y_UNIT_TEST(Coroutine) {
+        TString strA(507, 'a');
+        TString strB(814, 'b');
+        TString strC(198, 'c');
+
+        TBigMessage bm;
+
+        TSimple* simple0 = bm.AddSimples();
+        simple0->SetStr1(strA);
+        simple0->SetStr2(strB);
+        simple0->SetNumber1(213431324);
+
+        TSimple* simple1 = bm.AddSimples();
+        simple1->SetStr1(strC);
+        simple1->SetStr2(strA);
+        simple1->SetNumber1(21039313);
+
+        bm.AddManyStr(strA);
+        bm.AddManyStr(strC);
+        bm.AddManyStr(strB);
+
+        bm.SetOneMoreStr(strB);
+        bm.SetYANumber(394143);
+
+        TString bmSerialized;
+        Y_PROTOBUF_SUPPRESS_NODISCARD bm.SerializeToString(&bmSerialized);
+        UNIT_ASSERT_UNEQUAL(bmSerialized.size(), 0);
+
+        NActors::TCoroutineChunkSerializer chunker;
+        for (int i = 0; i < 4; ++i) {
+            TMockEvent event;
+            event.msg = &bm;
+            chunker.SetSerializingEvent(&event);
+            char buf1[87];
+            TString bmChunkedSerialized;
+            while (!chunker.IsComplete()) {
+                auto range = chunker.FeedBuf(&buf1[0], sizeof(buf1));
+                for (auto p = range.first; p != range.second; ++p) {
+                    bmChunkedSerialized.append(p->first, p->second);
+                }
+            }
+            UNIT_ASSERT_EQUAL(bmSerialized, bmChunkedSerialized);
+        }
+    }
+}
diff --git a/library/cpp/actors/core/events.h b/library/cpp/actors/core/events.h
new file mode 100644
index 0000000000..702cf50fad
--- /dev/null
+++ b/library/cpp/actors/core/events.h
@@ -0,0 +1,222 @@
+#pragma once
+
+#include "event.h"
+#include "event_pb.h"
+
+#include <library/cpp/actors/protos/actors.pb.h>
+#include <util/system/unaligned_mem.h>
+
+namespace NActors {
+    struct TEvents {
+        enum EEventSpace {
+            ES_HELLOWORLD = 0,
+            ES_SYSTEM = 1,
+            ES_INTERCONNECT = 2,
+            ES_INTERCONNECT_MSGBUS = 3,
+            ES_DNS = 4,
+            ES_SOCKET_POLLER = 5,
+            ES_LOGGER = 6,
+            ES_MON = 7,
+            ES_INTERCONNECT_TCP = 8,
+            ES_PROFILER = 9,
+            ES_YF = 10,
+            ES_HTTP = 11,
+
+            ES_USERSPACE = 4096,
+
+            ES_PRIVATE = (1 << 15) - 16,
+            ES_MAX = (1 << 15),
+        };
+
+#define EventSpaceBegin(eventSpace) (eventSpace << 16u)
+#define EventSpaceEnd(eventSpace) ((eventSpace << 16u) + (1u << 16u))
+
+        struct THelloWorld {
+            enum {
+                Start = EventSpaceBegin(ES_HELLOWORLD),
+                Ping,
+                Pong,
+                Blob,
+                End
+            };
+
+            static_assert(End < EventSpaceEnd(ES_HELLOWORLD), "expect End < EventSpaceEnd(ES_HELLOWORLD)");
+        };
+
+        struct TEvPing: public TEventBase<TEvPing, THelloWorld::Ping> {
+            DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPing, "HelloWorld: Ping");
+        };
+
+        struct TEvPong: public TEventBase<TEvPong, THelloWorld::Pong> {
+            DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPong, "HelloWorld: Pong");
+        };
+
+        struct TEvBlob: public TEventBase<TEvBlob, THelloWorld::Blob> {
+            const TString Blob;
+
+            TEvBlob(const TString& blob) noexcept
+                : Blob(blob)
+            {
+            }
+
+            TString ToStringHeader() const noexcept override {
+                return "THelloWorld::Blob";
+            }
+
+            bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+                return serializer->WriteString(&Blob);
+            }
+
+            static IEventBase* Load(TEventSerializedData* bufs) noexcept {
+                return new TEvBlob(bufs->GetString());
+            }
+
+            bool IsSerializable() const override {
+                return true;
+            }
+        };
+
+        struct TSystem {
+            enum {
+                Start = EventSpaceBegin(ES_SYSTEM),
+                Bootstrap,   // generic bootstrap event
+                Wakeup,      // generic timeout
+                Subscribe,   // generic subscribe to something
+                Unsubscribe, // generic unsubscribe from something
+                Delivered,   // event delivered
+                Undelivered, // event undelivered
+                Poison,      // request actor to shutdown
+                Completed,   // generic async job result event
+                PoisonTaken, // generic Poison taken (reply to PoisonPill event, i.e. died completely)
+                FlushLog,
+                CallbackCompletion,
+                CallbackException,
+                Gone,        // Generic notification of actor death
+                TrackActor,
+                UntrackActor,
+                InvokeResult,
+                CoroTimeout,
+                InvokeQuery,
+                End,
+
+                // Compatibility section
+                PoisonPill = Poison,
+                ActorDied = Gone,
+            };
+
+            static_assert(End < EventSpaceEnd(ES_SYSTEM), "expect End < EventSpaceEnd(ES_SYSTEM)");
+        };
+
+        struct TEvBootstrap: public TEventBase<TEvBootstrap, TSystem::Bootstrap> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvBootstrap, "System: TEvBootstrap")
+        };
+
+        struct TEvPoison : public TEventBase<TEvPoison, TSystem::Poison> {
+            DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPoison, "System: TEvPoison")
+        };
+
+        struct TEvWakeup: public TEventBase<TEvWakeup, TSystem::Wakeup> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvWakeup, "System: TEvWakeup")
+
+            TEvWakeup(ui64 tag = 0) : Tag(tag) { }
+
+            const ui64 Tag = 0;
+        };
+
+        struct TEvSubscribe: public TEventBase<TEvSubscribe, TSystem::Subscribe> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvSubscribe, "System: TEvSubscribe")
+        };
+
+        struct TEvUnsubscribe: public TEventBase<TEvUnsubscribe, TSystem::Unsubscribe> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvUnsubscribe, "System: TEvUnsubscribe")
+        };
+
+        struct TEvUndelivered: public TEventBase<TEvUndelivered, TSystem::Undelivered> {
+            enum EReason {
+                ReasonUnknown,
+                ReasonActorUnknown,
+                Disconnected
+            };
+            const ui32 SourceType;
+            const EReason Reason;
+            const bool Unsure;
+            const TString Data;
+
+            TEvUndelivered(ui32 sourceType, ui32 reason, bool unsure = false)
+                : SourceType(sourceType)
+                , Reason(static_cast<EReason>(reason))
+                , Unsure(unsure)
+                , Data(MakeData(sourceType, reason))
+            {}
+
+            TString ToStringHeader() const override;
+            bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override;
+            static IEventBase* Load(TEventSerializedData* bufs);
+            bool IsSerializable() const override;
+
+            ui32 CalculateSerializedSize() const override { return 2 * sizeof(ui32); }
+
+            static void Out(IOutputStream& o, EReason x);
+
+        private:
+            static TString MakeData(ui32 sourceType, ui32 reason) {
+                TString s = TString::Uninitialized(sizeof(ui32) + sizeof(ui32));
+                char *p = s.Detach();
+                WriteUnaligned<ui32>(p + 0, sourceType);
+                WriteUnaligned<ui32>(p + 4, reason);
+                return s;
+            }
+        };
+
+        struct TEvCompleted: public TEventBase<TEvCompleted, TSystem::Completed> {
+            const ui32 Id;
+            const ui32 Status;
+            TEvCompleted(ui32 id = 0, ui32 status = 0)
+                : Id(id)
+                , Status(status)
+            {
+            }
+
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvCompleted, "System: TEvCompleted")
+        };
+
+        struct TEvPoisonTaken: public TEventBase<TEvPoisonTaken, TSystem::PoisonTaken> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvPoisonTaken, "System: TEvPoisonTaken")
+        };
+
+        struct TEvFlushLog: public TEventBase<TEvFlushLog, TSystem::FlushLog> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvFlushLog, "System: TEvFlushLog")
+        };
+
+        struct TEvCallbackException: public TEventPB<TEvCallbackException,
+                                                      NActorsProto::TCallbackException,
+                                                      TSystem::CallbackException> {
+            TEvCallbackException(const TActorId& id, const TString& msg) {
+                ActorIdToProto(id, Record.MutableActorId());
+                Record.SetExceptionMessage(msg);
+            }
+        };
+
+        struct TEvCallbackCompletion: public TEventPB<TEvCallbackCompletion,
+                                                       NActorsProto::TActorId,
+                                                       TSystem::CallbackCompletion> {
+            TEvCallbackCompletion(const TActorId& id) {
+                ActorIdToProto(id, &Record);
+            }
+        };
+
+        struct TEvGone: public TEventBase<TEvGone, TSystem::Gone> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvGone, "System: TEvGone")
+        };
+
+        struct TEvInvokeResult;
+
+        using TEvPoisonPill = TEvPoison; // Legacy name, deprecated
+        using TEvActorDied = TEvGone;
+    };
+}
+
+template <>
+inline void Out<NActors::TEvents::TEvUndelivered::EReason>(IOutputStream& o, NActors::TEvents::TEvUndelivered::EReason x) {
+    NActors::TEvents::TEvUndelivered::Out(o, x);
+}
diff --git a/library/cpp/actors/core/events_undelivered.cpp b/library/cpp/actors/core/events_undelivered.cpp
new file mode 100644
index 0000000000..23deaffd10
--- /dev/null
+++ b/library/cpp/actors/core/events_undelivered.cpp
@@ -0,0 +1,60 @@
+#include "events.h"
+#include "actorsystem.h"
+
+namespace NActors {
+    TString TEvents::TEvUndelivered::ToStringHeader() const {
+        return "TSystem::Undelivered";
+    }
+
+    bool TEvents::TEvUndelivered::SerializeToArcadiaStream(TChunkSerializer *serializer) const {
+        Y_VERIFY(!Unsure); // these are local-only events generated by Interconnect
+        return serializer->WriteString(&Data);
+    }
+
+    void TEvents::TEvUndelivered::Out(IOutputStream& o, EReason x) {
+        switch (x) {
+        case ReasonActorUnknown:
+            o << "ActorUnknown";
+            break;
+        case Disconnected:
+            o << "Disconnected";
+            break;
+        default:
+            o << "Undefined";
+            break;
+        }
+    }
+
+    bool TEvents::TEvUndelivered::IsSerializable() const {
+        return true;
+    }
+
+    IEventBase* TEvents::TEvUndelivered::Load(TEventSerializedData* bufs) {
+        TString str = bufs->GetString();
+        Y_VERIFY(str.size() == (sizeof(ui32) + sizeof(ui32)));
+        const char* p = str.data();
+        const ui64 sourceType = ReadUnaligned<ui32>(p + 0);
+        const ui64 reason = ReadUnaligned<ui32>(p + 4);
+        return new TEvUndelivered(sourceType, reason);
+    }
+
+    TAutoPtr<IEventHandle> IEventHandle::ForwardOnNondelivery(ui32 reason, bool unsure) {
+        if (Flags & FlagForwardOnNondelivery) {
+            const ui32 updatedFlags = Flags & ~(FlagForwardOnNondelivery | FlagSubscribeOnSession);
+            const TActorId recp = OnNondeliveryHolder ? OnNondeliveryHolder->Recipient : TActorId();
+
+            if (Event)
+                return new IEventHandle(recp, Sender, Event.Release(), updatedFlags, Cookie, &Recipient, TraceId.Clone());
+            else
+                return new IEventHandle(Type, updatedFlags, recp, Sender, Buffer, Cookie, &Recipient, TraceId.Clone());
+        }
+
+        if (Flags & FlagTrackDelivery) {
+            const ui32 updatedFlags = Flags & ~(FlagTrackDelivery | FlagSubscribeOnSession | FlagGenerateUnsureUndelivered);
+            return new IEventHandle(Sender, Recipient, new TEvents::TEvUndelivered(Type, reason, unsure), updatedFlags,
+                Cookie, nullptr, TraceId.Clone());
+        }
+
+        return nullptr;
+    }
+}
diff --git a/library/cpp/actors/core/executelater.h b/library/cpp/actors/core/executelater.h
new file mode 100644
index 0000000000..e7a13c1005
--- /dev/null
+++ b/library/cpp/actors/core/executelater.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include "actor_bootstrapped.h"
+
+#include <utility>
+
+namespace NActors {
+    template <typename TCallback>
+    class TExecuteLater: public TActorBootstrapped<TExecuteLater<TCallback>> {
+    public:
+        static constexpr IActor::EActivityType ActorActivityType() {
+            return IActor::ACTORLIB_COMMON;
+        }
+
+        TExecuteLater(
+            TCallback&& callback,
+            IActor::EActivityType activityType,
+            ui32 channel = 0,
+            ui64 cookie = 0,
+            const TActorId& reportCompletionTo = TActorId(),
+            const TActorId& reportExceptionTo = TActorId()) noexcept
+            : Callback(std::move(callback))
+            , Channel(channel)
+            , Cookie(cookie)
+            , ReportCompletionTo(reportCompletionTo)
+            , ReportExceptionTo(reportExceptionTo)
+        {
+            this->SetActivityType(activityType);
+        }
+
+        void Bootstrap(const TActorContext& ctx) noexcept {
+            try {
+                {
+                    /* RAII, Callback should be destroyed right before sending
+                   TEvCallbackCompletion */
+
+                    auto local = std::move(Callback);
+                    using T = decltype(local);
+
+                    if constexpr (std::is_invocable_v<T, const TActorContext&>) {
+                        local(ctx);
+                    } else {
+                        local();
+                    }
+                }
+
+                if (ReportCompletionTo) {
+                    ctx.Send(ReportCompletionTo,
+                             new TEvents::TEvCallbackCompletion(ctx.SelfID),
+                             Channel, Cookie);
+                }
+            } catch (...) {
+                if (ReportExceptionTo) {
+                    const TString msg = CurrentExceptionMessage();
+                    ctx.Send(ReportExceptionTo,
+                             new TEvents::TEvCallbackException(ctx.SelfID, msg),
+                             Channel, Cookie);
+                }
+            }
+
+            this->Die(ctx);
+        }
+
+    private:
+        TCallback Callback;
+        const ui32 Channel;
+        const ui64 Cookie;
+        const TActorId ReportCompletionTo;
+        const TActorId ReportExceptionTo;
+    };
+
+    template <typename T>
+    IActor* CreateExecuteLaterActor(
+        T&& func,
+        IActor::EActivityType activityType,
+        ui32 channel = 0,
+        ui64 cookie = 0,
+        const TActorId& reportCompletionTo = TActorId(),
+        const TActorId& reportExceptionTo = TActorId()) noexcept {
+        return new TExecuteLater<T>(std::forward<T>(func),
+                                    activityType,
+                                    channel,
+                                    cookie,
+                                    reportCompletionTo,
+                                    reportExceptionTo);
+    }
+}
diff --git a/library/cpp/actors/core/executor_pool_base.cpp b/library/cpp/actors/core/executor_pool_base.cpp
new file mode 100644
index 0000000000..c3b9999168
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_base.cpp
@@ -0,0 +1,168 @@
+#include "executor_pool_base.h"
+#include "executor_thread.h"
+#include "mailbox.h"
+#include "probes.h"
+#include <library/cpp/actors/util/datetime.h>
+
+namespace NActors {
+    LWTRACE_USING(ACTORLIB_PROVIDER);
+
+    void DoActorInit(TActorSystem* sys, IActor* actor, const TActorId& self, const TActorId& owner) {
+        actor->SelfActorId = self;
+        actor->Registered(sys, owner);
+    }
+
+    TExecutorPoolBaseMailboxed::TExecutorPoolBaseMailboxed(ui32 poolId, ui32 maxActivityType)
+        : IExecutorPool(poolId)
+        , ActorSystem(nullptr)
+        , MailboxTable(new TMailboxTable)
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+        , Stats(maxActivityType)
+#endif
+    {}
+
+    TExecutorPoolBaseMailboxed::~TExecutorPoolBaseMailboxed() {
+        MailboxTable.Destroy();
+    }
+
+    TExecutorPoolBase::TExecutorPoolBase(ui32 poolId, ui32 threads, TAffinity* affinity, ui32 maxActivityType)
+        : TExecutorPoolBaseMailboxed(poolId, maxActivityType)
+        , PoolThreads(threads)
+        , ThreadsAffinity(affinity)
+    {}
+
+    TExecutorPoolBase::~TExecutorPoolBase() {
+        while (Activations.Pop(0))
+            ;
+    }
+
+    void TExecutorPoolBaseMailboxed::ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingWriteCounter) {
+        Y_UNUSED(workerId);
+        MailboxTable->ReclaimMailbox(mailboxType, hint, revolvingWriteCounter);
+    }
+
+    ui64 TExecutorPoolBaseMailboxed::AllocateID() {
+        return ActorSystem->AllocateIDSpace(1);
+    }
+
+    bool TExecutorPoolBaseMailboxed::Send(TAutoPtr<IEventHandle>& ev) {
+        Y_VERIFY_DEBUG(ev->GetRecipientRewrite().PoolID() == PoolId);
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+        RelaxedStore(&ev->SendTime, (::NHPTimer::STime)GetCycleCountFast());
+#endif
+        return MailboxTable->SendTo(ev, this);
+    }
+
+    void TExecutorPoolBase::ScheduleActivation(ui32 activation) {
+        ScheduleActivationEx(activation, AtomicIncrement(ActivationsRevolvingCounter));
+    }
+
+    TActorId TExecutorPoolBaseMailboxed::Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingWriteCounter, const TActorId& parentId) {
+        NHPTimer::STime hpstart = GetCycleCountFast();
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+        ui32 at = actor->GetActivityType();
+        if (at >= Stats.MaxActivityType())
+            at = 0;
+        AtomicIncrement(Stats.ActorsAliveByActivity[at]);
+#endif
+        AtomicIncrement(ActorRegistrations);
+
+        // first step - find good enough mailbox
+        ui32 hint = 0;
+        TMailboxHeader* mailbox = nullptr;
+
+        if (revolvingWriteCounter == 0)
+            revolvingWriteCounter = AtomicIncrement(RegisterRevolvingCounter);
+
+        {
+            ui32 hintBackoff = 0;
+
+            while (hint == 0) {
+                hint = MailboxTable->AllocateMailbox(mailboxType, ++revolvingWriteCounter);
+                mailbox = MailboxTable->Get(hint);
+
+                if (!mailbox->LockFromFree()) {
+                    MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingWriteCounter);
+                    hintBackoff = hint;
+                    hint = 0;
+                }
+            }
+
+            MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingWriteCounter);
+        }
+
+        const ui64 localActorId = AllocateID();
+
+        // ok, got mailbox
+        mailbox->AttachActor(localActorId, actor);
+
+        // do init
+        const TActorId actorId(ActorSystem->NodeId, PoolId, localActorId, hint);
+        DoActorInit(ActorSystem, actor, actorId, parentId);
+
+        // Once we unlock the mailbox the actor starts running and we cannot use the pointer any more
+        actor = nullptr;
+
+        switch (mailboxType) {
+            case TMailboxType::Simple:
+                UnlockFromExecution((TMailboxTable::TSimpleMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+                break;
+            case TMailboxType::Revolving:
+                UnlockFromExecution((TMailboxTable::TRevolvingMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+                break;
+            case TMailboxType::HTSwap:
+                UnlockFromExecution((TMailboxTable::THTSwapMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+                break;
+            case TMailboxType::ReadAsFilled:
+                UnlockFromExecution((TMailboxTable::TReadAsFilledMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+                break;
+            case TMailboxType::TinyReadAsFilled:
+                UnlockFromExecution((TMailboxTable::TTinyReadAsFilledMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter);
+                break;
+            default:
+                Y_FAIL();
+        }
+
+        NHPTimer::STime elapsed = GetCycleCountFast() - hpstart;
+        if (elapsed > 1000000) {
+            LWPROBE(SlowRegisterNew, PoolId, NHPTimer::GetSeconds(elapsed) * 1000.0);
+        }
+
+        return actorId;
+    }
+
+    TActorId TExecutorPoolBaseMailboxed::Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) {
+        NHPTimer::STime hpstart = GetCycleCountFast();
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+        ui32 at = actor->GetActivityType();
+        if (at >= Stats.MaxActivityType())
+            at = 0;
+        AtomicIncrement(Stats.ActorsAliveByActivity[at]);
+#endif
+        AtomicIncrement(ActorRegistrations);
+
+        const ui64 localActorId = AllocateID();
+        mailbox->AttachActor(localActorId, actor);
+
+        const TActorId actorId(ActorSystem->NodeId, PoolId, localActorId, hint);
+        DoActorInit(ActorSystem, actor, actorId, parentId);
+        NHPTimer::STime elapsed = GetCycleCountFast() - hpstart;
+        if (elapsed > 1000000) {
+            LWPROBE(SlowRegisterAdd, PoolId, NHPTimer::GetSeconds(elapsed) * 1000.0);
+        }
+
+        return actorId;
+    }
+
+    TAffinity* TExecutorPoolBase::Affinity() const {
+        return ThreadsAffinity.Get();
+    }
+
+    bool TExecutorPoolBaseMailboxed::Cleanup() {
+        return MailboxTable->Cleanup();
+    }
+
+    ui32 TExecutorPoolBase::GetThreads() const {
+        return PoolThreads;
+    }
+}
diff --git a/library/cpp/actors/core/executor_pool_base.h b/library/cpp/actors/core/executor_pool_base.h
new file mode 100644
index 0000000000..c84ce1af77
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_base.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "executor_thread.h"
+#include "scheduler_queue.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/actors/util/threadparkpad.h>
+
+namespace NActors {
+    class TExecutorPoolBaseMailboxed: public IExecutorPool {
+    protected:
+        TActorSystem* ActorSystem;
+        THolder<TMailboxTable> MailboxTable;
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+        // Need to have per pool object to collect stats like actor registrations (because
+        // registrations might be done in threads from other pools)
+        TExecutorThreadStats Stats;
+#endif
+        TAtomic RegisterRevolvingCounter = 0;
+        ui64 AllocateID();
+    public:
+        TExecutorPoolBaseMailboxed(ui32 poolId, ui32 maxActivityType);
+        ~TExecutorPoolBaseMailboxed();
+        void ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingWriteCounter) override;
+        bool Send(TAutoPtr<IEventHandle>& ev) override;
+        TActorId Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingWriteCounter, const TActorId& parentId) override;
+        TActorId Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) override;
+        bool Cleanup() override;
+    };
+
+    class TExecutorPoolBase: public TExecutorPoolBaseMailboxed {
+    protected:
+        const ui32 PoolThreads;
+        TIntrusivePtr<TAffinity> ThreadsAffinity;
+        TAtomic Semaphore = 0;
+        TUnorderedCache<ui32, 512, 4> Activations;
+        TAtomic ActivationsRevolvingCounter = 0;
+        volatile bool StopFlag = false;
+    public:
+        TExecutorPoolBase(ui32 poolId, ui32 threads, TAffinity* affinity, ui32 maxActivityType);
+        ~TExecutorPoolBase();
+        void ScheduleActivation(ui32 activation) override;
+        TAffinity* Affinity() const override;
+        ui32 GetThreads() const override;
+    };
+
+    void DoActorInit(TActorSystem*, IActor*, const TActorId&, const TActorId&);
+}
diff --git a/library/cpp/actors/core/executor_pool_basic.cpp b/library/cpp/actors/core/executor_pool_basic.cpp
new file mode 100644
index 0000000000..4dce16939a
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_basic.cpp
@@ -0,0 +1,431 @@
+#include "executor_pool_basic.h"
+#include "probes.h"
+#include "mailbox.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+
+#ifdef _linux_
+#include <pthread.h>
+#endif
+
+namespace NActors {
+    LWTRACE_USING(ACTORLIB_PROVIDER);
+
+    constexpr TDuration TBasicExecutorPool::DEFAULT_TIME_PER_MAILBOX;
+
+    TBasicExecutorPool::TBasicExecutorPool(
+        ui32 poolId,
+        ui32 threads,
+        ui64 spinThreshold,
+        const TString& poolName,
+        TAffinity* affinity,
+        TDuration timePerMailbox,
+        ui32 eventsPerMailbox,
+        int realtimePriority,
+        ui32 maxActivityType)
+        : TExecutorPoolBase(poolId, threads, affinity, maxActivityType)
+        , SpinThreshold(spinThreshold)
+        , SpinThresholdCycles(spinThreshold * NHPTimer::GetCyclesPerSecond() * 0.000001) // convert microseconds to cycles
+        , Threads(new TThreadCtx[threads])
+        , PoolName(poolName)
+        , TimePerMailbox(timePerMailbox)
+        , EventsPerMailbox(eventsPerMailbox)
+        , RealtimePriority(realtimePriority)
+        , ThreadUtilization(0)
+        , MaxUtilizationCounter(0)
+        , MaxUtilizationAccumulator(0)
+        , ThreadCount(threads)
+    {
+    }
+
+    TBasicExecutorPool::TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg)
+        : TBasicExecutorPool(
+            cfg.PoolId,
+            cfg.Threads,
+            cfg.SpinThreshold,
+            cfg.PoolName,
+            new TAffinity(cfg.Affinity),
+            cfg.TimePerMailbox,
+            cfg.EventsPerMailbox,
+            cfg.RealtimePriority,
+            cfg.MaxActivityType
+        )
+    {}
+
+    TBasicExecutorPool::~TBasicExecutorPool() {
+        Threads.Destroy();
+    }
+
+    ui32 TBasicExecutorPool::GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) {
+        ui32 workerId = wctx.WorkerId;
+        Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+        NHPTimer::STime elapsed = 0;
+        NHPTimer::STime parked = 0;
+        NHPTimer::STime blocked = 0;
+        NHPTimer::STime hpstart = GetCycleCountFast();
+        NHPTimer::STime hpnow;
+
+        TThreadCtx& threadCtx = Threads[workerId];
+        AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_NONE);
+
+        if (Y_UNLIKELY(AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE)) {
+            do {
+                if (AtomicCas(&threadCtx.BlockedFlag, TThreadCtx::BS_BLOCKED, TThreadCtx::BS_BLOCKING)) {
+                    hpnow = GetCycleCountFast();
+                    elapsed += hpnow - hpstart;
+                    if (threadCtx.BlockedPad.Park()) // interrupted
+                        return 0;
+                    hpstart = GetCycleCountFast();
+                    blocked += hpstart - hpnow;
+                }
+            } while (AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE && !AtomicLoad(&StopFlag));
+        }
+
+        const TAtomic x = AtomicDecrement(Semaphore);
+
+        if (x < 0) {
+#if defined ACTORSLIB_COLLECT_EXEC_STATS
+            if (AtomicGetAndIncrement(ThreadUtilization) == 0) {
+                // Initially counter contains -t0, the pool start timestamp
+                // When the first thread goes to sleep we add t1, so the counter
+                // becomes t1-t0 >= 0, or the duration of max utilization so far.
+                // If the counter was negative and becomes positive, that means
+                // counter just turned into a duration and we should store that
+                // duration. Otherwise another thread raced with us and
+                // subtracted some other timestamp t2.
+                const i64 t = GetCycleCountFast();
+                const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, t);
+                if (x < 0 && x + t > 0)
+                    AtomicStore(&MaxUtilizationAccumulator, x + t);
+            }
+#endif
+
+            Y_VERIFY(AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_NONE);
+
+            if (SpinThreshold > 0) {
+                // spin configured period
+                AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_ACTIVE);
+                ui64 start = GetCycleCountFast();
+                bool doSpin = true;
+                while (true) {
+                    for (ui32 j = 0; doSpin && j < 12; ++j) {
+                        if (GetCycleCountFast() >= (start + SpinThresholdCycles)) {
+                            doSpin = false;
+                            break;
+                        }
+                        for (ui32 i = 0; i < 12; ++i) {
+                            if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) {
+                                SpinLockPause();
+                            } else {
+                                doSpin = false;
+                                break;
+                            }
+                        }
+                    }
+                    if (!doSpin) {
+                        break;
+                    }
+                    if (RelaxedLoad(&StopFlag)) {
+                        break;
+                    }
+                }
+                // then - sleep
+                if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) {
+                    if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED, TThreadCtx::WS_ACTIVE)) {
+                        do {
+                            hpnow = GetCycleCountFast();
+                            elapsed += hpnow - hpstart;
+                            if (threadCtx.Pad.Park()) // interrupted
+                                return 0;
+                            hpstart = GetCycleCountFast();
+                            parked += hpstart - hpnow;
+                        } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED);
+                    }
+                }
+            } else {
+                AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED);
+                do {
+                    hpnow = GetCycleCountFast();
+                    elapsed += hpnow - hpstart;
+                    if (threadCtx.Pad.Park()) // interrupted
+                        return 0;
+                    hpstart = GetCycleCountFast();
+                    parked += hpstart - hpnow;
+                } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED);
+            }
+
+            Y_VERIFY_DEBUG(AtomicLoad(&StopFlag) || AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_RUNNING);
+
+#if defined ACTORSLIB_COLLECT_EXEC_STATS
+            if (AtomicDecrement(ThreadUtilization) == 0) {
+                // When we started sleeping counter contained t1-t0, or the
+                // last duration of max utilization. Now we subtract t2 >= t1,
+                // which turns counter negative again, and the next sleep cycle
+                // at timestamp t3 would be adding some new duration t3-t2.
+                // If the counter was positive and becomes negative that means
+                // there are no current races with other threads and we should
+                // store the last positive duration we observed. Multiple
+                // threads may be adding and subtracting values in potentially
+                // arbitrary order, which would cause counter to oscillate
+                // around zero. When it crosses zero is a good indication of a
+                // correct value.
+                const i64 t = GetCycleCountFast();
+                const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, -t);
+                if (x > 0 && x - t < 0)
+                    AtomicStore(&MaxUtilizationAccumulator, x);
+            }
+#endif
+        } else {
+            AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING);
+        }
+
+        // ok, has work suggested, must dequeue
+        while (!RelaxedLoad(&StopFlag)) {
+            if (const ui32 activation = Activations.Pop(++revolvingCounter)) {
+                hpnow = GetCycleCountFast();
+                elapsed += hpnow - hpstart;
+                wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, elapsed);
+                if (parked > 0) {
+                    wctx.AddParkedCycles(parked);
+                }
+                if (blocked > 0) {
+                    wctx.AddBlockedCycles(blocked);
+                }
+                return activation;
+            }
+            SpinLockPause();
+        }
+
+        // stopping, die!
+        return 0;
+    }
+
+    inline void TBasicExecutorPool::WakeUpLoop() {
+        for (ui32 i = 0;;) {
+            TThreadCtx& threadCtx = Threads[i % PoolThreads];
+            switch (AtomicLoad(&threadCtx.WaitingFlag)) {
+                case TThreadCtx::WS_NONE:
+                case TThreadCtx::WS_RUNNING:
+                    ++i;
+                    break;
+                case TThreadCtx::WS_ACTIVE: // in active spin-lock, just set flag
+                    if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_ACTIVE)) {
+                        return;
+                    }
+                    break;
+                case TThreadCtx::WS_BLOCKED:
+                    if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_BLOCKED)) {
+                        threadCtx.Pad.Unpark();
+                        return;
+                    }
+                    break;
+                default:
+                    Y_FAIL();
+            }
+        }
+    }
+
+    void TBasicExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) {
+        Activations.Push(activation, revolvingCounter);
+        const TAtomic x = AtomicIncrement(Semaphore);
+        if (x <= 0) { // we must find someone to wake-up
+            WakeUpLoop();
+        }
+    }
+
+    void TBasicExecutorPool::GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+        poolStats.MaxUtilizationTime = RelaxedLoad(&MaxUtilizationAccumulator) / (i64)(NHPTimer::GetCyclesPerSecond() / 1000);
+
+        statsCopy.resize(PoolThreads + 1);
+        // Save counters from the pool object
+        statsCopy[0] = TExecutorThreadStats();
+        statsCopy[0].Aggregate(Stats);
+        // Per-thread stats
+        for (size_t i = 0; i < PoolThreads; ++i) {
+            Threads[i].Thread->GetCurrentStats(statsCopy[i + 1]);
+        }
+    }
+
+    void TBasicExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) {
+        TAffinityGuard affinityGuard(Affinity());
+
+        ActorSystem = actorSystem;
+
+        ScheduleReaders.Reset(new NSchedulerQueue::TReader[PoolThreads]);
+        ScheduleWriters.Reset(new NSchedulerQueue::TWriter[PoolThreads]);
+
+        for (ui32 i = 0; i != PoolThreads; ++i) {
+            Threads[i].Thread.Reset(
+                new TExecutorThread(
+                    i,
+                    0, // CpuId is not used in BASIC pool
+                    actorSystem,
+                    this,
+                    MailboxTable.Get(),
+                    PoolName,
+                    TimePerMailbox,
+                    EventsPerMailbox));
+            ScheduleWriters[i].Init(ScheduleReaders[i]);
+        }
+
+        *scheduleReaders = ScheduleReaders.Get();
+        *scheduleSz = PoolThreads;
+    }
+
+    void TBasicExecutorPool::Start() {
+        TAffinityGuard affinityGuard(Affinity());
+
+        ThreadUtilization = 0;
+        AtomicAdd(MaxUtilizationCounter, -(i64)GetCycleCountFast());
+
+        for (ui32 i = 0; i != PoolThreads; ++i) {
+            Threads[i].Thread->Start();
+        }
+    }
+
+    void TBasicExecutorPool::PrepareStop() {
+        AtomicStore(&StopFlag, true);
+        for (ui32 i = 0; i != PoolThreads; ++i) {
+            Threads[i].Pad.Interrupt();
+            Threads[i].BlockedPad.Interrupt();
+        }
+    }
+
+    void TBasicExecutorPool::Shutdown() {
+        for (ui32 i = 0; i != PoolThreads; ++i)
+            Threads[i].Thread->Join();
+    }
+
+    void TBasicExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+        Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId);
+    }
+
+    void TBasicExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+        const auto current = ActorSystem->Monotonic();
+        if (deadline < current)
+            deadline = current;
+
+        ScheduleWriters[workerId].Push(deadline.MicroSeconds(), ev.Release(), cookie);
+    }
+
+    void TBasicExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+        const auto deadline = ActorSystem->Monotonic() + delta;
+        ScheduleWriters[workerId].Push(deadline.MicroSeconds(), ev.Release(), cookie);
+    }
+
+    void TBasicExecutorPool::SetRealTimeMode() const {
+// TODO: musl-libc version of `sched_param` struct is for some reason different from pthread
+// version in Ubuntu 12.04
+#if defined(_linux_) && !defined(_musl_)
+        if (RealtimePriority != 0) {
+            pthread_t threadSelf = pthread_self();
+            sched_param param = {RealtimePriority};
+            if (pthread_setschedparam(threadSelf, SCHED_FIFO, &param)) {
+                Y_FAIL("Cannot set realtime priority");
+            }
+        }
+#else
+        Y_UNUSED(RealtimePriority);
+#endif
+    }
+
+    ui32 TBasicExecutorPool::GetThreadCount() const {
+        return AtomicGet(ThreadCount);
+    }
+
+    void TBasicExecutorPool::SetThreadCount(ui32 threads) {
+        threads = Max(1u, Min(PoolThreads, threads));
+        with_lock (ChangeThreadsLock) {
+            size_t prevCount = GetThreadCount();
+            AtomicSet(ThreadCount, threads);
+            if (prevCount < threads) {
+                for (size_t i = prevCount; i < threads; ++i) {
+                    bool repeat = true;
+                    while (repeat) {
+                        switch (AtomicGet(Threads[i].BlockedFlag)) {
+                        case TThreadCtx::BS_BLOCKING:
+                            if (AtomicCas(&Threads[i].BlockedFlag, TThreadCtx::BS_NONE, TThreadCtx::BS_BLOCKING)) {
+                                // thread not entry to blocked loop
+                                repeat = false;
+                            }
+                            break;
+                        case TThreadCtx::BS_BLOCKED:
+                            // thread entry to blocked loop and we wake it
+                            AtomicSet(Threads[i].BlockedFlag, TThreadCtx::BS_NONE);
+                            Threads[i].BlockedPad.Unpark();
+                            repeat = false;
+                            break;
+                        default:
+                            // thread mustn't has TThreadCtx::BS_NONE because last time it was started to block
+                            Y_FAIL("BlockedFlag is not TThreadCtx::BS_BLOCKING and TThreadCtx::BS_BLOCKED when thread was waked up");
+                        }
+                    }
+                }
+            } else if (prevCount > threads) {
+                // at first, start to block
+                for (size_t i = threads; i < prevCount; ++i) {
+                    Y_VERIFY(AtomicGet(Threads[i].BlockedFlag) == TThreadCtx::BS_NONE);
+                    AtomicSet(Threads[i].BlockedFlag, TThreadCtx::BS_BLOCKING);
+                }
+                // after check need to wake up threads
+                for (size_t idx = threads; idx < prevCount; ++idx) {
+                    TThreadCtx& threadCtx = Threads[idx];
+                    auto waitingFlag = AtomicGet(threadCtx.WaitingFlag);
+                    auto blockedFlag = AtomicGet(threadCtx.BlockedFlag);
+                    // while thread has this states (WS_NONE and BS_BLOCKING) we can't guess which way thread will go.
+                    // Either go to sleep and it will have to wake up,
+                    // or go to execute task and after completion will be blocked.
+                    while (waitingFlag == TThreadCtx::WS_NONE && blockedFlag == TThreadCtx::BS_BLOCKING) {
+                        waitingFlag = AtomicGet(threadCtx.WaitingFlag);
+                        blockedFlag = AtomicGet(threadCtx.BlockedFlag);
+                    }
+                    // next states:
+                    // 1) WS_ACTIVE  BS_BLOCKING - waiting and start spinig | need wake up to block
+                    // 2) WS_BLOCKED BS_BLOCKING - waiting and start sleep  | need wake up to block
+                    // 3) WS_RUNNING BS_BLOCKING - start execute | not need wake up, will block after executing
+                    // 4) WS_NONE    BS_BLOCKED  - blocked       | not need wake up, already blocked
+
+                    if (waitingFlag == TThreadCtx::WS_ACTIVE || waitingFlag == TThreadCtx::WS_BLOCKED) {
+                        // need wake up
+                        Y_VERIFY(blockedFlag == TThreadCtx::BS_BLOCKING);
+
+                        // creaty empty mailBoxHint, where LineIndex == 1 and LineHint == 0, and activations will be ignored
+                        constexpr auto emptyMailBoxHint = TMailboxTable::LineIndexMask & -TMailboxTable::LineIndexMask;
+                        ui64 revolvingCounter = AtomicGet(ActivationsRevolvingCounter);
+
+                        Activations.Push(emptyMailBoxHint, revolvingCounter);
+
+                        auto x = AtomicIncrement(Semaphore);
+                        if (x <= 0) {
+                            // try wake up. if success then go to next thread
+                            switch (waitingFlag){
+                                case TThreadCtx::WS_ACTIVE: // in active spin-lock, just set flag
+                                    if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_ACTIVE)) {
+                                        continue;
+                                    }
+                                    break;
+                                case TThreadCtx::WS_BLOCKED:
+                                    if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_BLOCKED)) {
+                                        threadCtx.Pad.Unpark();
+                                        continue;
+                                    }
+                                    break;
+                                default:
+                                    ; // other thread woke this sleeping thread
+                            }
+                            // if thread has already been awakened then we must awaken the other
+                            WakeUpLoop();
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/library/cpp/actors/core/executor_pool_basic.h b/library/cpp/actors/core/executor_pool_basic.h
new file mode 100644
index 0000000000..023190f7fe
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_basic.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "executor_thread.h"
+#include "scheduler_queue.h"
+#include "executor_pool_base.h"
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/actors/util/threadparkpad.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+#include <util/system/mutex.h>
+
+namespace NActors {
+    class TBasicExecutorPool: public TExecutorPoolBase {
+        struct TThreadCtx {
+            TAutoPtr<TExecutorThread> Thread;
+            TThreadParkPad Pad;
+            TThreadParkPad BlockedPad;
+            TAtomic WaitingFlag;
+            TAtomic BlockedFlag;
+
+            // different threads must spin/block on different cache-lines.
+            // we add some padding bytes to enforce this rule
+            static const size_t SizeWithoutPadding = sizeof(TAutoPtr<TExecutorThread>) + 2 * sizeof(TThreadParkPad) + 2 * sizeof(TAtomic);
+            ui8 Padding[64 - SizeWithoutPadding];
+            static_assert(64 >= SizeWithoutPadding);
+
+            enum EWaitState {
+                WS_NONE,
+                WS_ACTIVE,
+                WS_BLOCKED,
+                WS_RUNNING
+            };
+
+            enum EBlockedState {
+                BS_NONE,
+                BS_BLOCKING,
+                BS_BLOCKED
+            };
+
+            TThreadCtx()
+                : WaitingFlag(WS_NONE)
+                , BlockedFlag(BS_NONE)
+            {
+            }
+        };
+
+        const ui64 SpinThreshold;
+        const ui64 SpinThresholdCycles;
+
+        TArrayHolder<TThreadCtx> Threads;
+
+        TArrayHolder<NSchedulerQueue::TReader> ScheduleReaders;
+        TArrayHolder<NSchedulerQueue::TWriter> ScheduleWriters;
+
+        const TString PoolName;
+        const TDuration TimePerMailbox;
+        const ui32 EventsPerMailbox;
+
+        const int RealtimePriority;
+
+        TAtomic ThreadUtilization;
+        TAtomic MaxUtilizationCounter;
+        TAtomic MaxUtilizationAccumulator;
+
+        TAtomic ThreadCount;
+        TMutex ChangeThreadsLock;
+
+    public:
+        static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TBasicExecutorPoolConfig::DEFAULT_TIME_PER_MAILBOX;
+        static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX;
+
+        TBasicExecutorPool(ui32 poolId,
+                           ui32 threads,
+                           ui64 spinThreshold,
+                           const TString& poolName = "",
+                           TAffinity* affinity = nullptr,
+                           TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX,
+                           ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX,
+                           int realtimePriority = 0,
+                           ui32 maxActivityType = 1);
+        explicit TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg);
+        ~TBasicExecutorPool();
+
+        ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override;
+
+        void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+        void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+        void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+
+        void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override;
+
+        void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override;
+        void Start() override;
+        void PrepareStop() override;
+        void Shutdown() override;
+
+        void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override;
+        TString GetName() const override {
+            return PoolName;
+        }
+
+        void SetRealTimeMode() const override;
+
+        ui32 GetThreadCount() const;
+        void SetThreadCount(ui32 threads);
+
+    private:
+        void WakeUpLoop();
+    };
+}
diff --git a/library/cpp/actors/core/executor_pool_basic_ut.cpp b/library/cpp/actors/core/executor_pool_basic_ut.cpp
new file mode 100644
index 0000000000..76dff693af
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_basic_ut.cpp
@@ -0,0 +1,435 @@
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "hfunc.h"
+#include "scheduler_basic.h"
+
+#include <library/cpp/actors/util/should_continue.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/protos/unittests.pb.h>
+
+using namespace NActors;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TEvMsg : public NActors::TEventBase<TEvMsg, 10347> {
+    DEFINE_SIMPLE_LOCAL_EVENT(TEvMsg, "ExecutorPoolTest: Msg");
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TTestSenderActor : public IActor {
+private:
+    using EActivityType = IActor::EActivityType ;
+    using EActorActivity = IActor::EActorActivity;
+
+private:
+    TAtomic Counter;
+    TActorId Receiver;
+
+    std::function<void(void)> Action;
+
+public:
+    TTestSenderActor(std::function<void(void)> action = [](){},
+                     EActivityType activityType =  EActorActivity::OTHER)
+        : IActor(static_cast<TReceiveFunc>(&TTestSenderActor::Execute), activityType)
+        , Action(action)
+    {}
+
+    void Start(TActorId receiver, size_t count)
+    {
+        AtomicSet(Counter, count);
+        Receiver = receiver;
+    }
+
+    void Stop() {
+        while (true) {
+            if (GetCounter() == 0) {
+                break;
+            }
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+    }
+
+    size_t GetCounter() const {
+        return AtomicGet(Counter);
+    }
+
+private:
+    STFUNC(Execute)
+    {
+        Y_UNUSED(ctx);
+        switch (ev->GetTypeRewrite()) {
+            hFunc(TEvMsg, Handle);
+        }
+    }
+
+    void Handle(TEvMsg::TPtr &ev)
+    {
+        Y_UNUSED(ev);
+        Action();
+        TAtomicBase count = AtomicDecrement(Counter);
+        Y_VERIFY(count != Max<TAtomicBase>());
+        if (count) {
+            Send(Receiver, new TEvMsg());
+        }
+    }
+};
+
+THolder<TActorSystemSetup> GetActorSystemSetup(TBasicExecutorPool* pool)
+{
+    auto setup = MakeHolder<NActors::TActorSystemSetup>();
+    setup->NodeId = 1;
+    setup->ExecutorsCount = 1;
+    setup->Executors.Reset(new TAutoPtr<NActors::IExecutorPool>[1]);
+    setup->Executors[0] = pool;
+    setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0));
+    return setup;
+}
+
+Y_UNIT_TEST_SUITE(BasicExecutorPool) {
+
+    Y_UNIT_TEST(DecreaseIncreaseThreadsCount) {
+        const size_t msgCount = 1e4;
+        const size_t size = 4;
+        const size_t halfSize = size / 2;
+        TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+        auto setup = GetActorSystemSetup(executorPool);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        executorPool->SetThreadCount(halfSize);
+        TTestSenderActor* actors[size];
+        TActorId actorIds[size];
+        for (size_t i = 0; i < size; ++i) {
+            actors[i] = new TTestSenderActor();
+            actorIds[i] = actorSystem.Register(actors[i]);
+        }
+
+        const int testCount = 2;
+
+        TExecutorPoolStats poolStats[testCount];
+        TVector<TExecutorThreadStats> statsCopy[testCount];
+
+        for (size_t testIdx = 0; testIdx < testCount; ++testIdx) {
+            for (size_t i = 0; i < size; ++i) {
+                actors[i]->Start(actors[i]->SelfId(), msgCount);
+            }
+            for (size_t i = 0; i < size; ++i) {
+                actorSystem.Send(actorIds[i], new TEvMsg());
+            }
+
+            Sleep(TDuration::MilliSeconds(100));
+
+            for (size_t i = 0; i < size; ++i) {
+                actors[i]->Stop();
+            }
+
+            executorPool->GetCurrentStats(poolStats[testIdx], statsCopy[testIdx]);
+        }
+
+        for (size_t i = 1; i <= halfSize; ++i) {
+            UNIT_ASSERT_UNEQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents);
+        }
+
+        for (size_t i = halfSize + 1; i <= size; ++i) {
+            UNIT_ASSERT_EQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents);
+        }
+
+        executorPool->SetThreadCount(size);
+
+        for (size_t testIdx = 0; testIdx < testCount; ++testIdx) {
+            for (size_t i = 0; i < size; ++i) {
+                actors[i]->Start(actors[i]->SelfId(), msgCount);
+            }
+            for (size_t i = 0; i < size; ++i) {
+                actorSystem.Send(actorIds[i], new TEvMsg());
+            }
+
+            Sleep(TDuration::MilliSeconds(100));
+
+            for (size_t i = 0; i < size; ++i) {
+                actors[i]->Stop();
+            }
+
+            executorPool->GetCurrentStats(poolStats[testIdx], statsCopy[testIdx]);
+        }
+
+        for (size_t i = 1; i <= size; ++i) {
+            UNIT_ASSERT_UNEQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents);
+        }
+    }
+
+    Y_UNIT_TEST(ChangeCount) {
+        const size_t msgCount = 1e3;
+        const size_t size = 4;
+        const size_t halfSize = size / 2;
+        TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+        auto begin = TInstant::Now();
+
+        auto setup = GetActorSystemSetup(executorPool);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+        executorPool->SetThreadCount(halfSize);
+
+        TTestSenderActor* actors[size];
+        TActorId actorIds[size];
+        for (size_t i = 0; i < size; ++i) {
+            actors[i] = new TTestSenderActor();
+            actorIds[i] = actorSystem.Register(actors[i]);
+        }
+
+        for (size_t i = 0; i < size; ++i) {
+            actors[i]->Start(actorIds[i], msgCount);
+        }
+        for (size_t i = 0; i < size; ++i) {
+            actorSystem.Send(actorIds[i], new TEvMsg());
+        }
+
+        const i32 N = 6;
+        const i32 threadsCouns[N] = { 1, 3, 2, 3, 1, 4 };
+
+        ui64 counter = 0;
+
+        TTestSenderActor* changerActor = new TTestSenderActor([&]{
+            executorPool->SetThreadCount(threadsCouns[counter]);
+            counter++;
+            if (counter == N) {
+                counter = 0;
+            }
+        });
+        TActorId changerActorId = actorSystem.Register(changerActor);
+        changerActor->Start(changerActorId, msgCount);
+        actorSystem.Send(changerActorId, new TEvMsg());
+
+        while (true) {
+            size_t maxCounter = 0;
+            for (size_t i = 0; i < size; ++i) {
+                maxCounter = Max(maxCounter, actors[i]->GetCounter());
+            }
+
+            if (maxCounter == 0) {
+                break;
+            }
+
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter);
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+
+        changerActor->Stop();
+    }
+
+    Y_UNIT_TEST(CheckCompleteOne) {
+        const size_t size = 4;
+        const size_t msgCount = 1e4;
+        TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+        auto setup = GetActorSystemSetup(executorPool);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        auto actor = new TTestSenderActor();
+        auto actorId = actorSystem.Register(actor);
+        actor->Start(actor->SelfId(), msgCount);
+        actorSystem.Send(actorId, new TEvMsg());
+
+        while (actor->GetCounter()) {
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter());
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+    }
+
+    Y_UNIT_TEST(CheckCompleteAll) {
+        const size_t size = 4;
+        const size_t msgCount = 1e4;
+        TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+        auto setup = GetActorSystemSetup(executorPool);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        TTestSenderActor* actors[size];
+        TActorId actorIds[size];
+
+        for (size_t i = 0; i < size; ++i) {
+            actors[i] = new TTestSenderActor();
+            actorIds[i] = actorSystem.Register(actors[i]);
+        }
+        for (size_t i = 0; i < size; ++i) {
+            actors[i]->Start(actors[i]->SelfId(), msgCount);
+        }
+        for (size_t i = 0; i < size; ++i) {
+            actorSystem.Send(actorIds[i], new TEvMsg());
+        }
+
+
+        while (true) {
+            size_t maxCounter = 0;
+            for (size_t i = 0; i < size; ++i) {
+                maxCounter = Max(maxCounter, actors[i]->GetCounter());
+            }
+
+            if (maxCounter == 0) {
+                break;
+            }
+
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter);
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+    }
+
+    Y_UNIT_TEST(CheckCompleteOver) {
+        const size_t size = 4;
+        const size_t actorsCount = size * 2;
+        const size_t msgCount = 1e4;
+        TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+        auto setup = GetActorSystemSetup(executorPool);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        TTestSenderActor* actors[actorsCount];
+        TActorId actorIds[actorsCount];
+
+        for (size_t i = 0; i < actorsCount; ++i) {
+            actors[i] = new TTestSenderActor();
+            actorIds[i] = actorSystem.Register(actors[i]);
+        }
+        for (size_t i = 0; i < actorsCount; ++i) {
+            actors[i]->Start(actors[i]->SelfId(), msgCount);
+        }
+        for (size_t i = 0; i < actorsCount; ++i) {
+            actorSystem.Send(actorIds[i], new TEvMsg());
+        }
+
+
+        while (true) {
+            size_t maxCounter = 0;
+            for (size_t i = 0; i < actorsCount; ++i) {
+                maxCounter = Max(maxCounter, actors[i]->GetCounter());
+            }
+
+            if (maxCounter == 0) {
+                break;
+            }
+
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter);
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+    }
+
+    Y_UNIT_TEST(CheckCompleteRoundRobinOver) {
+        const size_t size = 4;
+        const size_t actorsCount = size * 2;
+        const size_t msgCount = 1e2;
+        TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+        auto setup = GetActorSystemSetup(executorPool);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        TTestSenderActor* actors[actorsCount];
+        TActorId actorIds[actorsCount];
+
+        for (size_t i = 0; i < actorsCount; ++i) {
+            actors[i] = new TTestSenderActor();
+            actorIds[i] = actorSystem.Register(actors[i]);
+        }
+        for (size_t i = 0; i < actorsCount; ++i) {
+            actors[i]->Start(actorIds[(i + 1) % actorsCount], msgCount);
+        }
+        for (size_t i = 0; i < actorsCount; ++i) {
+            actorSystem.Send(actorIds[i], new TEvMsg());
+        }
+
+        while (true) {
+            size_t maxCounter = 0;
+            for (size_t i = 0; i < actorsCount; ++i) {
+                maxCounter = Max(maxCounter, actors[i]->GetCounter());
+            }
+
+            if (maxCounter == 0) {
+                break;
+            }
+
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter);
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+    }
+
+    Y_UNIT_TEST(CheckStats) {
+        const size_t size = 4;
+        const size_t msgCount = 1e4;
+        TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50);
+
+        auto setup = GetActorSystemSetup(executorPool);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        auto actor = new TTestSenderActor();
+        auto actorId = actorSystem.Register(actor);
+        actor->Start(actor->SelfId(), msgCount);
+        actorSystem.Send(actorId, new TEvMsg());
+
+        while (actor->GetCounter()) {
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter());
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+
+        TVector<TExecutorThreadStats> stats;
+        TExecutorPoolStats poolStats;
+        actorSystem.GetPoolStats(0, poolStats, stats);
+        // Sum all per-thread counters into the 0th element
+        for (ui32 idx = 1; idx < stats.size(); ++idx) {
+            stats[0].Aggregate(stats[idx]);
+        }
+
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].SentEvents, msgCount - 1);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].NonDeliveredEvents, 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].EmptyMailboxActivation, 0);
+        //UNIT_ASSERT_VALUES_EQUAL(stats[0].CpuNs, 0); // depends on total duration of test, so undefined
+        UNIT_ASSERT(stats[0].ElapsedTicks > 0);
+        UNIT_ASSERT(stats[0].ParkedTicks > 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].BlockedTicks, 0);
+        UNIT_ASSERT(stats[0].ActivationTimeHistogram.TotalSamples >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].EventDeliveryTimeHistogram.TotalSamples, msgCount);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].EventProcessingCountHistogram.TotalSamples, msgCount);
+        UNIT_ASSERT(stats[0].EventProcessingTimeHistogram.TotalSamples > 0);
+        UNIT_ASSERT(stats[0].ElapsedTicksByActivity[0] > 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEventsByActivity[0], msgCount);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].ActorsAliveByActivity[0], 1);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].ScheduledEventsByActivity[0], 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolDestroyedActors, 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolAllocatedMailboxes, 4095); // one line
+        UNIT_ASSERT(stats[0].MailboxPushedOutByTime + stats[0].MailboxPushedOutByEventCount >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].MailboxPushedOutBySoftPreemption, 0);
+    }
+}
diff --git a/library/cpp/actors/core/executor_pool_io.cpp b/library/cpp/actors/core/executor_pool_io.cpp
new file mode 100644
index 0000000000..fb557ae6b0
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_io.cpp
@@ -0,0 +1,151 @@
+#include "executor_pool_io.h"
+#include "mailbox.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+
+namespace NActors {
+    TIOExecutorPool::TIOExecutorPool(ui32 poolId, ui32 threads, const TString& poolName, TAffinity* affinity, ui32 maxActivityType)
+        : TExecutorPoolBase(poolId, threads, affinity, maxActivityType)
+        , Threads(new TThreadCtx[threads])
+        , PoolName(poolName)
+    {}
+
+    TIOExecutorPool::TIOExecutorPool(const TIOExecutorPoolConfig& cfg)
+        : TIOExecutorPool(
+            cfg.PoolId,
+            cfg.Threads,
+            cfg.PoolName,
+            new TAffinity(cfg.Affinity),
+            cfg.MaxActivityType
+        )
+    {}
+
+    TIOExecutorPool::~TIOExecutorPool() {
+        Threads.Destroy();
+        while (ThreadQueue.Pop(0))
+            ;
+    }
+
+    ui32 TIOExecutorPool::GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) {
+        ui32 workerId = wctx.WorkerId;
+        Y_VERIFY_DEBUG(workerId < PoolThreads);
+
+        NHPTimer::STime elapsed = 0;
+        NHPTimer::STime parked = 0;
+        NHPTimer::STime hpstart = GetCycleCountFast();
+        NHPTimer::STime hpnow;
+
+        const TAtomic x = AtomicDecrement(Semaphore);
+        if (x < 0) {
+            TThreadCtx& threadCtx = Threads[workerId];
+            ThreadQueue.Push(workerId + 1, revolvingCounter);
+            hpnow = GetCycleCountFast();
+            elapsed += hpnow - hpstart;
+            if (threadCtx.Pad.Park())
+                return 0;
+            hpstart = GetCycleCountFast();
+            parked += hpstart - hpnow;
+        }
+
+        while (!RelaxedLoad(&StopFlag)) {
+            if (const ui32 activation = Activations.Pop(++revolvingCounter)) {
+                hpnow = GetCycleCountFast();
+                elapsed += hpnow - hpstart;
+                wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, elapsed);
+                if (parked > 0) {
+                    wctx.AddParkedCycles(parked);
+                }
+                return activation;
+            }
+            SpinLockPause();
+        }
+
+        return 0;
+    }
+
+    void TIOExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId);
+    }
+
+    void TIOExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        Y_UNUSED(workerId);
+
+        const auto current = ActorSystem->Monotonic();
+        if (deadline < current)
+            deadline = current;
+
+        TTicketLock::TGuard guard(&ScheduleLock);
+        ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie);
+    }
+
+    void TIOExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        Y_UNUSED(workerId);
+        const auto deadline = ActorSystem->Monotonic() + delta;
+
+        TTicketLock::TGuard guard(&ScheduleLock);
+        ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie);
+    }
+
+    void TIOExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) {
+        Activations.Push(activation, revolvingWriteCounter);
+        const TAtomic x = AtomicIncrement(Semaphore);
+        if (x <= 0) {
+            for (;; ++revolvingWriteCounter) {
+                if (const ui32 x = ThreadQueue.Pop(revolvingWriteCounter)) {
+                    const ui32 threadIdx = x - 1;
+                    Threads[threadIdx].Pad.Unpark();
+                    return;
+                }
+                SpinLockPause();
+            }
+        }
+    }
+
+    void TIOExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) {
+        TAffinityGuard affinityGuard(Affinity());
+
+        ActorSystem = actorSystem;
+
+        ScheduleQueue.Reset(new NSchedulerQueue::TQueueType());
+
+        for (ui32 i = 0; i != PoolThreads; ++i) {
+            Threads[i].Thread.Reset(new TExecutorThread(i, 0, actorSystem, this, MailboxTable.Get(), PoolName));
+        }
+
+        *scheduleReaders = &ScheduleQueue->Reader;
+        *scheduleSz = 1;
+    }
+
+    void TIOExecutorPool::Start() {
+        TAffinityGuard affinityGuard(Affinity());
+
+        for (ui32 i = 0; i != PoolThreads; ++i)
+            Threads[i].Thread->Start();
+    }
+
+    void TIOExecutorPool::PrepareStop() {
+        AtomicStore(&StopFlag, true);
+        for (ui32 i = 0; i != PoolThreads; ++i)
+            Threads[i].Pad.Interrupt();
+    }
+
+    void TIOExecutorPool::Shutdown() {
+        for (ui32 i = 0; i != PoolThreads; ++i)
+            Threads[i].Thread->Join();
+    }
+
+    void TIOExecutorPool::GetCurrentStats(TExecutorPoolStats& /*poolStats*/, TVector<TExecutorThreadStats>& statsCopy) const {
+        statsCopy.resize(PoolThreads + 1);
+        // Save counters from the pool object
+        statsCopy[0] = TExecutorThreadStats();
+        statsCopy[0].Aggregate(Stats);
+        // Per-thread stats
+        for (size_t i = 0; i < PoolThreads; ++i) {
+            Threads[i].Thread->GetCurrentStats(statsCopy[i + 1]);
+        }
+    }
+
+    TString TIOExecutorPool::GetName() const {
+        return PoolName;
+    }
+}
diff --git a/library/cpp/actors/core/executor_pool_io.h b/library/cpp/actors/core/executor_pool_io.h
new file mode 100644
index 0000000000..e576d642a1
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_io.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "executor_thread.h"
+#include "scheduler_queue.h"
+#include "executor_pool_base.h"
+#include <library/cpp/actors/util/ticket_lock.h>
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/actors/util/threadparkpad.h>
+#include <util/system/condvar.h>
+
+namespace NActors {
+    class TIOExecutorPool: public TExecutorPoolBase {
+        struct TThreadCtx {
+            TAutoPtr<TExecutorThread> Thread;
+            TThreadParkPad Pad;
+        };
+
+        TArrayHolder<TThreadCtx> Threads;
+        TUnorderedCache<ui32, 512, 4> ThreadQueue;
+
+        THolder<NSchedulerQueue::TQueueType> ScheduleQueue;
+        TTicketLock ScheduleLock;
+
+        const TString PoolName;
+
+    public:
+        TIOExecutorPool(ui32 poolId, ui32 threads, const TString& poolName = "", TAffinity* affinity = nullptr,
+                        ui32 maxActivityType = 1);
+        explicit TIOExecutorPool(const TIOExecutorPoolConfig& cfg);
+        ~TIOExecutorPool();
+
+        ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override;
+
+        void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+        void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+        void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+
+        void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override;
+
+        void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override;
+        void Start() override;
+        void PrepareStop() override;
+        void Shutdown() override;
+
+        void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override;
+        TString GetName() const override;
+    };
+}
diff --git a/library/cpp/actors/core/executor_pool_united.cpp b/library/cpp/actors/core/executor_pool_united.cpp
new file mode 100644
index 0000000000..dac6245635
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_united.cpp
@@ -0,0 +1,1428 @@
+#include "executor_pool_united.h"
+
+#include "balancer.h"
+#include "cpu_state.h"
+#include "executor_thread.h"
+#include "probes.h"
+#include "mailbox.h"
+#include "scheduler_queue.h"
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/futex.h>
+#include <library/cpp/actors/util/intrinsics.h>
+#include <library/cpp/actors/util/timerfd.h>
+
+#include <util/system/datetime.h>
+#include <util/system/hp_timer.h>
+
+#include <algorithm>
+
+namespace NActors {
+    LWTRACE_USING(ACTORLIB_PROVIDER);
+
+    struct TUnitedWorkers::TWorker: public TNonCopyable {
+        TAutoPtr<TExecutorThread> Thread;
+        volatile TThreadId ThreadId = UnknownThreadId;
+        NSchedulerQueue::TQueueType SchedulerQueue;
+    };
+
+    struct TUnitedWorkers::TPool: public TNonCopyable {
+        TAtomic Waiters = 0; // Number of idle cpus, waiting for activations in this pool
+        char Padding[64 - sizeof(TAtomic)];
+
+        TUnorderedCache<ui32, 512, 4> Activations; // MPMC-queue for mailbox activations
+        TAtomic Active = 0; // Number of mailboxes ready for execution or currently executing
+        TAtomic Tokens = 0; // Pending tokens (token is required for worker to start execution, guarantees concurrency limit and activation availability)
+        volatile bool StopFlag = false;
+
+        // Configuration
+        TPoolId PoolId;
+        TAtomicBase Concurrency; // Max concurrent workers running this pool
+        IExecutorPool* ExecutorPool;
+        TMailboxTable* MailboxTable;
+        ui64 TimePerMailboxTs;
+        ui32 EventsPerMailbox;
+
+        // Cpus this pool is allowed to run on
+        // Cpus are specified in wake order
+        TStackVec<TCpu*, 15> WakeOrderCpus;
+
+        ~TPool() {
+            while (Activations.Pop(0)) {}
+        }
+
+        void Stop() {
+            AtomicStore(&StopFlag, true);
+        }
+
+        bool IsUnited() const {
+            return WakeOrderCpus.size();
+        }
+
+        // Add activation of newly scheduled mailbox. Returns generated token (unless concurrency is exceeded)
+        bool PushActivation(ui32 activation, ui64 revolvingCounter) {
+            Activations.Push(activation, revolvingCounter);
+            TAtomicBase active = AtomicIncrement(Active);
+            if (active <= Concurrency) { // token generated
+                AtomicIncrement(Tokens);
+                return true;
+            }
+            return false;
+        }
+
+        template <bool Relaxed>
+        static bool TryAcquireTokenImpl(TAtomic* tokens) {
+            while (true) {
+                TAtomicBase value;
+                if constexpr (Relaxed) {
+                    value = RelaxedLoad(tokens);
+                } else {
+                    value = AtomicLoad(tokens);
+                }
+                if (value > 0) {
+                    if (AtomicCas(tokens, value - 1, value)) {
+                        return true; // token acquired
+                    }
+                } else {
+                    return false; // no more tokens
+                }
+            }
+        }
+
+        // Try acquire pending token. Must be done before execution
+        bool TryAcquireToken() {
+            return TryAcquireTokenImpl<false>(&Tokens);
+        }
+
+        // Try acquire pending token. Must be done before execution
+        bool TryAcquireTokenRelaxed() {
+            return TryAcquireTokenImpl<true>(&Tokens);
+        }
+
+        // Get activation. Requires acquired token.
+        void BeginExecution(ui32& activation, ui64 revolvingCounter) {
+            while (!RelaxedLoad(&StopFlag)) {
+                if (activation = Activations.Pop(++revolvingCounter)) {
+                    return;
+                }
+                SpinLockPause();
+            }
+            activation = 0; // should stop
+        }
+
+        // End currently active execution and start new one if token is available.
+        // Reuses token if it's not destroyed.
+        // Returned `true` means successful switch, `activation` is filled.
+        // Returned `false` means execution has ended, no need to call StopExecution()
+        bool NextExecution(ui32& activation, ui64 revolvingCounter) {
+            if (AtomicDecrement(Active) >= Concurrency) { // reuse just released token
+                BeginExecution(activation, revolvingCounter);
+                return true;
+            } else if (TryAcquireToken()) { // another token acquired
+                BeginExecution(activation, revolvingCounter);
+                return true;
+            }
+            return false; // no more tokens available
+        }
+
+        // Stop active execution. Returns released token (unless it is destroyed)
+        bool StopExecution() {
+            TAtomicBase active = AtomicDecrement(Active);
+            if (active >= Concurrency) { // token released
+                AtomicIncrement(Tokens);
+                return true;
+            }
+            return false; // token destroyed
+        }
+
+        // Switch worker context into this pool
+        void Switch(TWorkerContext& wctx, ui64 softDeadlineTs, TExecutorThreadStats& stats) {
+            wctx.Switch(ExecutorPool, MailboxTable, TimePerMailboxTs, EventsPerMailbox, softDeadlineTs, &stats);
+        }
+    };
+
+    class TPoolScheduler {
+        class TSchedulable {
+            // Lower PoolBits store PoolId
+            // All other higher bits store virtual runtime in cycles
+            using TValue = ui64;
+            TValue Value;
+
+            static constexpr ui64 PoolIdMask = ui64((1ull << PoolBits) - 1);
+            static constexpr ui64 VRunTsMask = ~PoolIdMask;
+
+        public:
+            explicit TSchedulable(TPoolId poolId = MaxPools, ui64 vrunts = 0)
+                : Value((poolId & PoolIdMask) | (vrunts & VRunTsMask))
+            {}
+
+            TPoolId GetPoolId() const {
+                return Value & PoolIdMask;
+            }
+
+            ui64 GetVRunTs() const {
+                // Do not truncate pool id
+                // NOTE: it decrease accuracy, but improves performance
+                return Value;
+            }
+
+            ui64 GetPreciseVRunTs() const {
+                return Value & VRunTsMask;
+            }
+
+            void SetVRunTs(ui64 vrunts) {
+                Value = (Value & PoolIdMask) | (vrunts & VRunTsMask);
+            }
+
+            void Account(ui64 base, ui64 ts) {
+                // Add at least minimum amount to change Value
+                SetVRunTs(base + Max(ts, PoolIdMask + 1));
+            }
+        };
+
+        // For min-heap of Items
+        struct TCmp {
+            bool operator()(TSchedulable lhs, TSchedulable rhs) const {
+                return lhs.GetVRunTs() > rhs.GetVRunTs();
+            }
+        };
+
+        TPoolId Size = 0; // total number of pools on this cpu
+        TPoolId Current = 0; // index of current pool in `Items`
+
+        // At the beginning `Current` items are orginized as binary min-heap -- ready to be scheduled
+        // The rest `Size - Current` items are unordered (required to keep track of last vrunts)
+        TSchedulable Items[MaxPools]; // virtual runtime in cycles for each pool
+        ui64 MinVRunTs = 0; // virtual runtime used by waking pools (system's vrunts)
+        ui64 Ts = 0; // real timestamp of current execution start (for accounting)
+
+        // Maps PoolId into it's inverse weight
+        ui64 InvWeights[MaxPools];
+        static constexpr ui64 VRunTsOverflow = ui64(1ull << 62ull) / MaxPoolWeight;
+
+    public:
+        void AddPool(TPoolId pool, TPoolWeight weight) {
+            Items[Size] = TSchedulable(pool, MinVRunTs);
+            Size++;
+            InvWeights[pool] = MaxPoolWeight / std::clamp(weight ? weight : DefPoolWeight, MinPoolWeight, MaxPoolWeight);
+        }
+
+        // Iterate over pools in scheduling order
+        // should be used in construction:
+        // for (TPoolId pool = Begin(); pool != End(); pool = Next())
+        TPoolId Begin() {
+            // Wrap vruntime around to avoid overflow, if required
+            if (Y_UNLIKELY(MinVRunTs >= VRunTsOverflow)) {
+                for (TPoolId i = 0; i < Size; i++) {
+                    ui64 ts = Items[i].GetPreciseVRunTs();
+                    Items[i].SetVRunTs(ts >= VRunTsOverflow ? ts - VRunTsOverflow : 0);
+                }
+                MinVRunTs -= VRunTsOverflow;
+            }
+            Current = Size;
+            std::make_heap(Items, Items + Current, TCmp());
+            return Next();
+        }
+
+        constexpr TPoolId End() const {
+            return MaxPools;
+        }
+
+        TPoolId Next() {
+            if (Current > 0) {
+                std::pop_heap(Items, Items + Current, TCmp());
+                Current--;
+                return CurrentPool();
+            } else {
+                return End();
+            }
+        }
+
+        // Scheduling was successful, we are going to run CurrentPool()
+        void Scheduled() {
+            MinVRunTs = Max(MinVRunTs, Items[Current].GetPreciseVRunTs());
+            // NOTE: Ts is propagated on Account() to avoid gaps
+        }
+
+        // Schedule specific pool that woke up cpu after idle
+        void ScheduledAfterIdle(TPoolId pool, ui64 ts) {
+            if (Y_UNLIKELY(ts < Ts)) { // anomaly: time goes backwards (e.g. rdtsc is reset to zero on cpu reset)
+                Ts = ts; // just skip anomalous time slice
+                return;
+            }
+            MinVRunTs += (ts - Ts) * (MaxPoolWeight / DefPoolWeight); // propagate system's vrunts to blur difference between pools
+            Ts = ts; // propagate time w/o accounting to any pool
+
+            // Set specified pool as current, it requires scan
+            for (Current = 0; Current < Size && pool != Items[Current].GetPoolId(); Current++) {}
+            Y_VERIFY(Current < Size);
+        }
+
+        // Account currently running pool till now (ts)
+        void Account(ui64 ts) {
+            // Skip time slice for the first run and when time goes backwards (e.g. rdtsc is reset to zero on cpu reset)
+            if (Y_LIKELY(Ts > 0 && Ts <= ts)) {
+                TPoolId pool = CurrentPool();
+                Y_VERIFY(pool < MaxPools);
+                Items[Current].Account(MinVRunTs, (ts - Ts) * InvWeights[pool]);
+            }
+            Ts = ts; // propagate time
+        }
+
+        TPoolId CurrentPool() const {
+            return Items[Current].GetPoolId();
+        }
+    };
+
+    // Cyclic array of timers for idle workers to wait for hard preemption on
+    struct TIdleQueue: public TNonCopyable {
+        TArrayHolder<TTimerFd> Timers;
+        size_t Size;
+        TAtomic EnqueueCounter = 0;
+        TAtomic DequeueCounter = 0;
+
+        explicit TIdleQueue(size_t size)
+            : Timers(new TTimerFd[size])
+            , Size(size)
+        {}
+
+        void Stop() {
+            for (size_t i = 0; i < Size; i++) {
+                Timers[i].Wake();
+            }
+        }
+
+        // Returns timer which new idle-worker should wait for
+        TTimerFd* Enqueue() {
+            return &Timers[AtomicGetAndIncrement(EnqueueCounter) % Size];
+        }
+
+        // Returns timer that hard preemption should trigger to wake idle-worker
+        TTimerFd* Dequeue() {
+            return &Timers[AtomicGetAndIncrement(DequeueCounter) % Size];
+        }
+    };
+
+    // Base class for cpu-local managers that help workers on single cpu to cooperate
+    struct TCpuLocalManager: public TThrRefBase {
+        TUnitedWorkers* United;
+
+        explicit TCpuLocalManager(TUnitedWorkers* united)
+            : United(united)
+        {}
+
+        virtual TWorkerId WorkerCount() const = 0;
+        virtual void AddWorker(TWorkerId workerId) = 0;
+        virtual void Stop() = 0;
+    };
+
+    // Represents cpu with single associated worker that is able to execute any pool.
+    // It always executes pool assigned by balancer and switch pool only if assigned pool has changed
+    struct TAssignedCpu: public TCpuLocalManager {
+        bool Started = false;
+
+        TAssignedCpu(TUnitedWorkers* united)
+            : TCpuLocalManager(united)
+        {}
+
+        TWorkerId WorkerCount() const override {
+            return 1;
+        }
+
+        void AddWorker(TWorkerId workerId) override {
+            Y_UNUSED(workerId);
+        }
+
+        ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) {
+            ui32 activation;
+            if (Y_UNLIKELY(!Started)) {
+                Started = true;
+            } else if (Y_UNLIKELY(United->IsPoolReassigned(wctx))) {
+                United->StopExecution(wctx.PoolId); // stop current execution and switch pool if reassigned
+            } else if (United->NextExecution(wctx.PoolId, activation, revolvingCounter)) {
+                return activation; // another activation from currently executing pool (or 0 if stopped)
+            }
+
+            // Switch to another pool, it blocks until token is acquired
+            if (Y_UNLIKELY(!SwitchPool(wctx))) {
+                return 0; // stopped
+            }
+            United->SwitchPool(wctx, 0);
+            United->BeginExecution(wctx.PoolId, activation, revolvingCounter);
+            return activation;
+        }
+
+        void Stop() override {
+        }
+
+    private:
+        // Sets next pool to run, and acquires token, blocks if there are no tokens
+        bool SwitchPool(TWorkerContext& wctx) {
+            if (Y_UNLIKELY(United->IsStopped())) {
+                return false;
+            }
+
+            // Run balancer (if it's time to)
+            United->Balance();
+
+            // Select pool to execute
+            wctx.PoolId = United->AssignedPool(wctx);
+            Y_VERIFY(wctx.PoolId != CpuShared);
+            if (United->TryAcquireToken(wctx.PoolId)) {
+                return true;
+            }
+
+            // No more work -- wait for activations (spinning, then blocked)
+            wctx.PoolId = United->Idle(wctx.PoolId, wctx);
+
+            // Wakeup or stop occured
+            if (Y_UNLIKELY(wctx.PoolId == CpuStopped)) {
+                return false;
+            }
+            return true; // United->Idle() has already acquired token
+        }
+    };
+
+    // Lock-free data structure that help workers on single cpu to discover their state and do hard preemptions
+    struct TSharedCpu: public TCpuLocalManager {
+        // Current lease
+        volatile TLease::TValue CurrentLease;
+        char Padding1[64 - sizeof(TLease)];
+
+        // Slow pools
+        // the highest bit: 1=wait-for-slow-workers mode 0=else
+        // any lower bit (poolId is bit position): 1=pool-is-slow 0=pool-is-fast
+        volatile TPoolsMask SlowPoolsMask = 0;
+        char Padding2[64 - sizeof(TPoolsMask)];
+
+        // Must be accessed under never expiring lease to avoid races
+        TPoolScheduler PoolSched;
+        TWorkerId FastWorker = MaxWorkers;
+        TTimerFd* PreemptionTimer = nullptr;
+        ui64 HardPreemptionTs = 0;
+        bool Started = false;
+
+        TIdleQueue IdleQueue;
+
+        struct TConfig {
+            const TCpuId CpuId;
+            const TWorkerId Workers;
+            ui64 SoftLimitTs;
+            ui64 HardLimitTs;
+            ui64 EventLimitTs;
+            ui64 LimitPrecisionTs;
+            const int IdleWorkerPriority;
+            const int FastWorkerPriority;
+            const bool NoRealtime;
+            const bool NoAffinity;
+            const TCpuAllocation CpuAlloc;
+
+            TConfig(const TCpuAllocation& allocation, const TUnitedWorkersConfig& united)
+                : CpuId(allocation.CpuId)
+                , Workers(allocation.AllowedPools.size() + 1)
+                , SoftLimitTs(Us2Ts(united.PoolLimitUs))
+                , HardLimitTs(Us2Ts(united.PoolLimitUs + united.EventLimitUs))
+                , EventLimitTs(Us2Ts(united.EventLimitUs))
+                , LimitPrecisionTs(Us2Ts(united.LimitPrecisionUs))
+                , IdleWorkerPriority(std::clamp<ui64>(united.IdleWorkerPriority ? united.IdleWorkerPriority : 20, 1, 99))
+                , FastWorkerPriority(std::clamp<ui64>(united.FastWorkerPriority ? united.FastWorkerPriority : 10, 1, IdleWorkerPriority - 1))
+                , NoRealtime(united.NoRealtime)
+                , NoAffinity(united.NoAffinity)
+                , CpuAlloc(allocation)
+            {}
+        };
+
+        TConfig Config;
+        TVector<TWorkerId> Workers;
+
+        TSharedCpu(const TConfig& cfg, TUnitedWorkers* united)
+            : TCpuLocalManager(united)
+            , IdleQueue(cfg.Workers)
+            , Config(cfg)
+        {
+            for (const auto& pa : Config.CpuAlloc.AllowedPools) {
+                PoolSched.AddPool(pa.PoolId, pa.Weight);
+            }
+        }
+
+        TWorkerId WorkerCount() const override {
+            return Config.Workers;
+        }
+
+        void AddWorker(TWorkerId workerId) override {
+            if (Workers.empty()) {
+                // Grant lease to the first worker
+                AtomicStore(&CurrentLease, TLease(workerId, NeverExpire).Value);
+            }
+            Workers.push_back(workerId);
+        }
+
+        ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) {
+            ui32 activation;
+            if (!wctx.Lease.IsNeverExpiring()) {
+                if (wctx.SoftDeadlineTs < GetCycleCountFast()) { // stop if lease has expired or is near to be expired
+                    United->StopExecution(wctx.PoolId);
+                } else if (United->NextExecution(wctx.PoolId, activation, revolvingCounter)) {
+                    return activation; // another activation from currently executing pool (or 0 if stopped)
+                }
+            }
+
+            // Switch to another pool, it blocks until token is acquired
+            if (Y_UNLIKELY(!SwitchPool(wctx))) {
+                return 0; // stopped
+            }
+            United->BeginExecution(wctx.PoolId, activation, revolvingCounter);
+            return activation;
+        }
+
+        void Stop() override {
+            IdleQueue.Stop();
+        }
+
+    private:
+        enum EPriority {
+            IdlePriority, // highest (real-time, Config.IdleWorkerPriority)
+            FastPriority, // normal (real-time, Config.FastWorkerPriority)
+            SlowPriority, // lowest (not real-time)
+        };
+
+        enum EWorkerAction {
+            // Fast-worker
+            ExecuteFast,
+            WaitForSlow,
+
+            // Slow-worker
+            BecameIdle,
+            WakeFast,
+
+            // Idle-worker
+            BecameFast,
+            Standby,
+
+            // Common
+            Stopped,
+        };
+
+        // Thread-safe; should be called from worker
+        // Blocks for idle-workers; sets lease and next pool to run
+        bool SwitchPool(TWorkerContext& wctx) {
+            TTimerFd* idleTimer = nullptr;
+            while (true) {
+                if (DisablePreemptionAndTryExtend(wctx.Lease)) { // if fast-worker
+                    if (Y_UNLIKELY(!Started)) {
+                        SetPriority(0, FastPriority);
+                        Started = true;
+                    }
+                    while (true) {
+                        switch (FastWorkerAction(wctx)) {
+                        case ExecuteFast:
+                            United->SwitchPool(wctx, wctx.Lease.GetPreciseExpireTs() - Config.EventLimitTs);
+                            EnablePreemptionAndGrant(wctx.Lease);
+                            return true;
+                        case WaitForSlow:
+                            FastWorkerSleep(GetCycleCountFast() + Config.SoftLimitTs);
+                            break;
+                        case Stopped: return false;
+                        default: Y_FAIL();
+                        }
+                    }
+                } else if (wctx.Lease.IsNeverExpiring()) { // if idle-worker
+                    switch (IdleWorkerAction(idleTimer, wctx.Lease.GetWorkerId())) {
+                    case BecameFast:
+                        SetPriority(0, FastPriority);
+                        break; // try acquire new lease
+                    case Standby:
+                        if (!idleTimer) {
+                            idleTimer = IdleQueue.Enqueue();
+                        }
+                        SetPriority(0, IdlePriority);
+                        idleTimer->Wait();
+                        break;
+                    case Stopped: return false;
+                    default: Y_FAIL();
+                    }
+                } else { // lease has expired and hard preemption occured, so we are executing in a slow-worker
+                    wctx.IncrementPreemptedEvents();
+                    switch (SlowWorkerAction(wctx.PoolId)) {
+                    case WakeFast:
+                        WakeFastWorker();
+                        [[fallthrough]]; // no break; pass through
+                    case BecameIdle:
+                        wctx.Lease = wctx.Lease.NeverExpire();
+                        wctx.PoolId = MaxPools;
+                        idleTimer = nullptr;
+                        break;
+                    case Stopped: return false;
+                    default: Y_FAIL();
+                    }
+                }
+            }
+        }
+
+        enum ETryRunPool {
+            RunFastPool,
+            RunSlowPool,
+            NoTokens,
+        };
+
+        ETryRunPool TryRun(TPoolId pool) {
+            while (true) {
+                // updates WaitPoolsFlag in SlowPoolsMask according to scheduled pool slowness
+                TPoolsMask slow = AtomicLoad(&SlowPoolsMask);
+                if ((1ull << pool) & slow) { // we are about to execute slow pool (fast-worker will just wait, token is NOT required)
+                    if (slow & WaitPoolsFlag) {
+                        return RunSlowPool; // wait flag is already set
+                    } else {
+                        if (AtomicCas(&SlowPoolsMask, slow | WaitPoolsFlag, slow)) { // try set wait flag
+                            return RunSlowPool; // wait flag has been successfully set
+                        }
+                    }
+                } else { // we are about to execute fast pool, token required
+                    if (slow & WaitPoolsFlag) { // reset wait flag if required
+                        if (AtomicCas(&SlowPoolsMask, slow & ~WaitPoolsFlag, slow)) { // try reset wait flag
+                            return United->TryAcquireToken(pool) ? RunFastPool : NoTokens; // wait flag has been successfully reset
+                        }
+                    } else {
+                        return United->TryAcquireToken(pool) ? RunFastPool : NoTokens; // wait flag is already reset
+                    }
+                }
+            }
+        }
+
+        EWorkerAction FastWorkerAction(TWorkerContext& wctx) {
+            if (Y_UNLIKELY(United->IsStopped())) {
+                return Stopped;
+            }
+
+            // Account current pool
+            ui64 ts = GetCycleCountFast();
+            PoolSched.Account(ts);
+
+            // Select next pool to execute
+            for (wctx.PoolId = PoolSched.Begin(); wctx.PoolId != PoolSched.End(); wctx.PoolId = PoolSched.Next()) {
+                switch (TryRun(wctx.PoolId)) {
+                case RunFastPool:
+                    PoolSched.Scheduled();
+                    wctx.Lease = PostponePreemption(wctx.Lease.GetWorkerId(), ts);
+                    return ExecuteFast;
+                case RunSlowPool:
+                    PoolSched.Scheduled();
+                    ResetPreemption(wctx.Lease.GetWorkerId(), ts); // there is no point in preemption during wait
+                    return WaitForSlow;
+                case NoTokens: // concurrency limit reached, or no more work in pool
+                    break; // just try next pool (if any)
+                }
+            }
+
+            // No more work, no slow-workers -- wait for activations (active, then blocked)
+            wctx.PoolId = United->Idle(CpuShared, wctx);
+
+            // Wakeup or stop occured
+            if (Y_UNLIKELY(wctx.PoolId == CpuStopped)) {
+                return Stopped;
+            }
+            ts = GetCycleCountFast();
+            PoolSched.ScheduledAfterIdle(wctx.PoolId, ts);
+            wctx.Lease = PostponePreemption(wctx.Lease.GetWorkerId(), ts);
+            return ExecuteFast; // United->Idle() has already acquired token
+        }
+
+        EWorkerAction IdleWorkerAction(TTimerFd* idleTimer, TWorkerId workerId) {
+            if (Y_UNLIKELY(United->IsStopped())) {
+                return Stopped;
+            }
+            if (!idleTimer) { // either worker start or became idle -- hard preemption is not required
+                return Standby;
+            }
+
+            TLease lease = TLease(AtomicLoad(&CurrentLease));
+            ui64 ts = GetCycleCountFast();
+            if (lease.GetExpireTs() < ts) { // current lease has expired
+                if (TryBeginHardPreemption(lease)) {
+                    SetPoolIsSlowFlag(PoolSched.CurrentPool());
+                    TWorkerId preempted = lease.GetWorkerId();
+                    SetPriority(United->GetWorkerThreadId(preempted), SlowPriority);
+                    LWPROBE(HardPreemption, Config.CpuId, PoolSched.CurrentPool(), preempted, workerId);
+                    EndHardPreemption(workerId);
+                    return BecameFast;
+                } else {
+                    // Lease has been changed just now, no way we need preemption right now, so no retry needed
+                    return Standby;
+                }
+            } else {
+                // Lease has not expired yet (maybe never expiring lease)
+                return Standby;
+            }
+        }
+
+        EWorkerAction SlowWorkerAction(TPoolId pool) {
+            if (Y_UNLIKELY(United->IsStopped())) {
+                return Stopped;
+            }
+            while (true) {
+                TPoolsMask slow = AtomicLoad(&SlowPoolsMask);
+                if (slow & (1ull << pool)) {
+                    if (slow == (1ull << pool) & WaitPoolsFlag) { // the last slow pool is going to became fast
+                        if (AtomicCas(&SlowPoolsMask, 0, slow)) { // reset both pool-is-slow flag and WaitPoolsFlag
+                            return WakeFast;
+                        }
+                    } else { // there are (a) several slow-worker or (b) one slow-worker w/o waiting fast-worker
+                        if (AtomicCas(&SlowPoolsMask, slow & ~(1ull << pool), slow)) { // reset pool-is-slow flag
+                            return BecameIdle;
+                        }
+                    }
+                } else {
+                    // SlowWorkerAction has been called between TryBeginHardPreemption and SetPoolIsSlowFlag
+                    // flag for this pool is not set yet, but we can be sure pool is slow:
+                    //  - because SlowWorkerAction has been called;
+                    //  - this mean lease has expired and hard preemption occured.
+                    // So just wait other worker to call SetPoolIsSlowFlag
+                    LWPROBE(SlowWorkerActionRace, Config.CpuId, pool, slow);
+                }
+            }
+        }
+
+        void SetPoolIsSlowFlag(TPoolId pool) {
+            while (true) {
+                TPoolsMask slow = AtomicLoad(&SlowPoolsMask);
+                if ((slow & (1ull << pool)) == 0) { // if pool is fast
+                    if (AtomicCas(&SlowPoolsMask, slow | (1ull << pool), slow)) { // set pool-is-slow flag
+                        return;
+                    }
+                } else {
+                    Y_FAIL("two slow-workers executing the same pool on the same core");
+                    return; // pool is already slow
+                }
+            }
+        }
+
+        bool TryBeginHardPreemption(TLease lease) {
+            return AtomicCas(&CurrentLease, HardPreemptionLease, lease);
+        }
+
+        void EndHardPreemption(TWorkerId to) {
+            ATOMIC_COMPILER_BARRIER();
+            if (!AtomicCas(&CurrentLease, TLease(to, NeverExpire), HardPreemptionLease)) {
+                Y_FAIL("hard preemption failed");
+            }
+        }
+
+        bool DisablePreemptionAndTryExtend(TLease lease) {
+            return AtomicCas(&CurrentLease, lease.NeverExpire(), lease);
+        }
+
+        void EnablePreemptionAndGrant(TLease lease) {
+            ATOMIC_COMPILER_BARRIER();
+            if (!AtomicCas(&CurrentLease, lease, lease.NeverExpire())) {
+                Y_FAIL("lease grant failed");
+            }
+        }
+
+        void FastWorkerSleep(ui64 deadlineTs) {
+            while (true) {
+                TPoolsMask slow = AtomicLoad(&SlowPoolsMask);
+                if ((slow & WaitPoolsFlag) == 0) {
+                    return; // woken by WakeFast action
+                }
+                ui64 ts = GetCycleCountFast();
+                if (deadlineTs <= ts) {
+                    if (AtomicCas(&SlowPoolsMask, slow & ~WaitPoolsFlag, slow)) { // try reset wait flag
+                        return; // wait flag has been successfully reset after timeout
+                    }
+                } else { // should wait
+                    ui64 timeoutNs = Ts2Ns(deadlineTs - ts);
+#ifdef _linux_
+                    timespec timeout;
+                    timeout.tv_sec = timeoutNs / 1'000'000'000;
+                    timeout.tv_nsec = timeoutNs % 1'000'000'000;
+                    SysFutex(FastWorkerFutex(), FUTEX_WAIT_PRIVATE, FastWorkerFutexValue(slow), &timeout, nullptr, 0);
+#else
+                    NanoSleep(timeoutNs); // non-linux wake is not supported, cpu will go idle on slow -> fast switch
+#endif
+                }
+            }
+        }
+
+        void WakeFastWorker() {
+#ifdef _linux_
+            SysFutex(FastWorkerFutex(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
+#endif
+        }
+
+#ifdef _linux_
+        ui32* FastWorkerFutex() {
+            // Actually we wait on one highest bit, but futex value size is 4 bytes on all platforms
+            static_assert(sizeof(TPoolsMask) >= 4, "cannot be used as futex value on linux");
+            return (ui32*)&SlowPoolsMask + 1; // higher 32 bits (little endian assumed)
+        }
+
+        ui32 FastWorkerFutexValue(TPoolsMask slow) {
+            return ui32(slow >> 32); // higher 32 bits
+        }
+#endif
+
+        void SetPriority(TThreadId tid, EPriority priority) {
+            if (Config.NoRealtime) {
+                return;
+            }
+#ifdef _linux_
+            int policy;
+            struct sched_param param;
+            switch (priority) {
+            case IdlePriority:
+                policy = SCHED_FIFO;
+                param.sched_priority = Config.IdleWorkerPriority;
+                break;
+            case FastPriority:
+                policy = SCHED_FIFO;
+                param.sched_priority = Config.FastWorkerPriority;
+                break;
+            case SlowPriority:
+                policy = SCHED_OTHER;
+                param.sched_priority = 0;
+                break;
+            }
+            int ret = sched_setscheduler(tid, policy, &param);
+            switch (ret) {
+                case 0: return;
+                case EINVAL:
+                    Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> EINVAL", tid, policy, param.sched_priority);
+                case EPERM:
+                    // Requirements:
+                    //  * CAP_SYS_NICE capability to run real-time processes and set cpu affinity.
+                    //    Either run under root or set application capabilities:
+                    //       sudo setcap cap_sys_nice=eip BINARY
+                    //  * Non-zero rt-runtime (in case cgroups are used).
+                    //    Either (a) disable global limit on RT processes bandwidth:
+                    //       sudo sysctl -w kernel.sched_rt_runtime_us=-1
+                    //    Or (b) set non-zero rt-runtime for your cgroup:
+                    //       echo -1 > /sys/fs/cgroup/cpu/[cgroup]/cpu.rt_runtime_us
+                    //       (also set the same value for every parent cgroup)
+                    //    https://www.kernel.org/doc/Documentation/scheduler/sched-rt-group.txt
+                    Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> EPERM", tid, policy, param.sched_priority);
+                case ESRCH:
+                    Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> ESRCH", tid, policy, param.sched_priority);
+                default:
+                    Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> %d", tid, policy, param.sched_priority, ret);
+            }
+#else
+            Y_UNUSED(tid);
+            Y_UNUSED(priority);
+#endif
+        }
+
+        void ResetPreemption(TWorkerId fastWorkerId, ui64 ts) {
+            if (Y_UNLIKELY(!PreemptionTimer)) {
+                return;
+            }
+            if (FastWorker == fastWorkerId && HardPreemptionTs > 0) {
+                PreemptionTimer->Reset();
+                LWPROBE(ResetPreemptionTimer, Config.CpuId, FastWorker, PreemptionTimer->Fd, Ts2Ms(ts), Ts2Ms(HardPreemptionTs));
+                HardPreemptionTs = 0;
+            }
+        }
+
+        TLease PostponePreemption(TWorkerId fastWorkerId, ui64 ts) {
+            // Select new timer after hard preemption
+            if (FastWorker != fastWorkerId) {
+                FastWorker = fastWorkerId;
+                PreemptionTimer = IdleQueue.Dequeue();
+                HardPreemptionTs = 0;
+            }
+
+            ui64 hardPreemptionTs = ts + Config.HardLimitTs;
+            if (hardPreemptionTs > HardPreemptionTs) {
+                // Reset timer (at most once in TickIntervalTs, sacrifice precision)
+                HardPreemptionTs = hardPreemptionTs + Config.LimitPrecisionTs;
+                PreemptionTimer->Set(HardPreemptionTs);
+                LWPROBE(SetPreemptionTimer, Config.CpuId, FastWorker, PreemptionTimer->Fd, Ts2Ms(ts), Ts2Ms(HardPreemptionTs));
+            }
+
+            return TLease(fastWorkerId, hardPreemptionTs);
+        }
+    };
+
+    // Proxy for start and switching TUnitedExecutorPool-s on single cpu via GetReadyActivation()
+    // (does not implement any other method in IExecutorPool)
+    class TCpuExecutorPool: public IExecutorPool {
+        const TString Name;
+
+    public:
+        explicit TCpuExecutorPool(const TString& name)
+            : IExecutorPool(MaxPools)
+            , Name(name)
+        {}
+
+        TString GetName() const override {
+            return Name;
+        }
+
+        void SetRealTimeMode() const override {
+            // derived classes controls rt-priority - do nothing
+        }
+
+        // Should never be called
+        void ReclaimMailbox(TMailboxType::EType, ui32, TWorkerId, ui64) override { Y_FAIL(); }
+        void Schedule(TInstant, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); }
+        void Schedule(TMonotonic, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); }
+        void Schedule(TDuration, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); }
+        bool Send(TAutoPtr<IEventHandle>&) override { Y_FAIL(); }
+        void ScheduleActivation(ui32) override { Y_FAIL(); }
+        void ScheduleActivationEx(ui32, ui64) override { Y_FAIL(); }
+        TActorId Register(IActor*, TMailboxType::EType, ui64, const TActorId&) override { Y_FAIL(); }
+        TActorId Register(IActor*, TMailboxHeader*, ui32, const TActorId&) override { Y_FAIL(); }
+        void Prepare(TActorSystem*, NSchedulerQueue::TReader**, ui32*) override { Y_FAIL(); }
+        void Start() override { Y_FAIL(); }
+        void PrepareStop() override { Y_FAIL(); }
+        void Shutdown() override { Y_FAIL(); }
+        bool Cleanup() override { Y_FAIL(); }
+    };
+
+    // Proxy executor pool working with cpu-local scheduler (aka actorsystem 2.0)
+    class TSharedCpuExecutorPool: public TCpuExecutorPool {
+        TSharedCpu* Local;
+        TIntrusivePtr<TAffinity> SingleCpuAffinity; // no migration support yet
+    public:
+        explicit TSharedCpuExecutorPool(TSharedCpu* local, const TUnitedWorkersConfig& config)
+            : TCpuExecutorPool("u-" + ToString(local->Config.CpuId))
+            , Local(local)
+            , SingleCpuAffinity(config.NoAffinity ? nullptr : new TAffinity(TCpuMask(local->Config.CpuId)))
+        {}
+
+        TAffinity* Affinity() const override {
+            return SingleCpuAffinity.Get();
+        }
+
+        ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override {
+            return Local->GetReadyActivation(wctx, revolvingCounter);
+        }
+    };
+
+    // Proxy executor pool working with balancer and assigned pools (aka actorsystem 1.5)
+    class TAssignedCpuExecutorPool: public TCpuExecutorPool {
+        TAssignedCpu* Local;
+        TIntrusivePtr<TAffinity> CpuAffinity;
+    public:
+        explicit TAssignedCpuExecutorPool(TAssignedCpu* local, const TUnitedWorkersConfig& config)
+            : TCpuExecutorPool("United")
+            , Local(local)
+            , CpuAffinity(config.NoAffinity ? nullptr : new TAffinity(config.Allowed))
+        {}
+
+        TAffinity* Affinity() const override {
+            return CpuAffinity.Get();
+        }
+
+        ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override {
+            return Local->GetReadyActivation(wctx, revolvingCounter);
+        }
+    };
+
+    // Representation of a single cpu and it's state visible to other cpus and pools
+    struct TUnitedWorkers::TCpu: public TNonCopyable {
+        struct TScopedWaiters {
+            TCpu& Cpu;
+            TPool* AssignedPool; // nullptr if CpuShared
+
+            // Subscribe on wakeups from allowed pools
+            TScopedWaiters(TCpu& cpu, TPool* assignedPool) : Cpu(cpu), AssignedPool(assignedPool) {
+                if (!AssignedPool) {
+                    for (TPool* pool : Cpu.AllowedPools) {
+                        AtomicIncrement(pool->Waiters);
+                    }
+                } else {
+                    AtomicIncrement(AssignedPool->Waiters);
+                }
+            }
+
+            // Unsubscribe from pools we've subscribed on
+            ~TScopedWaiters() {
+                if (!AssignedPool) {
+                    for (TPool* pool : Cpu.AllowedPools) {
+                        AtomicDecrement(pool->Waiters);
+                    }
+                } else {
+                    AtomicDecrement(AssignedPool->Waiters);
+                }
+            }
+        };
+
+        // Current cpu state important for other cpus and balancer
+        TCpuState State;
+
+        // Thread-safe per pool stats
+        // NOTE: It's guaranteed that cpu never executes two instance of the same pool
+        TVector<TExecutorThreadStats> PoolStats;
+
+        // Configuration
+        TCpuId CpuId;
+        THolder<TCpuLocalManager> LocalManager;
+        THolder<TCpuExecutorPool> ExecutorPool;
+
+        // Pools allowed to run on this cpu
+        TStackVec<TPool*, 15> AllowedPools;
+
+        void Stop() {
+            if (LocalManager) {
+                State.Stop();
+                LocalManager->Stop();
+            }
+        }
+
+        bool StartSpinning(TUnitedWorkers* united, TPool* assignedPool, TPoolId& result) {
+            // Mark cpu as idle
+            if (Y_UNLIKELY(!State.StartSpinning())) {
+                result = CpuStopped;
+                return true;
+            }
+
+            // Avoid using multiple atomic seq_cst loads in cycle, use barrier once and relaxed ops
+            AtomicBarrier();
+
+            // Check there is no pending tokens (can be released before Waiters increment)
+            if (!assignedPool) {
+                for (TPool* pool : AllowedPools) {
+                    if (pool->TryAcquireTokenRelaxed()) {
+                        result = WakeWithTokenAcquired(united, pool->PoolId);
+                        return true; // token acquired or stop
+                    }
+                }
+            } else {
+                if (assignedPool->TryAcquireTokenRelaxed()) {
+                    result = WakeWithTokenAcquired(united, assignedPool->PoolId);
+                    return true; // token acquired or stop
+                }
+            }
+
+            // At this point we can be sure wakeup won't be lost
+            // So we can actively spin or block w/o checking for pending tokens
+            return false;
+        }
+
+        bool ActiveWait(ui64 spinThresholdTs, TPoolId& result) {
+            ui64 deadline = GetCycleCountFast() + spinThresholdTs;
+            while (GetCycleCountFast() < deadline) {
+                for (ui32 i = 0; i < 12; ++i) {
+                    TPoolId current = State.CurrentPool();
+                    if (current == CpuSpinning) {
+                        SpinLockPause();
+                    } else {
+                        result = current;
+                        return true; // wakeup
+                    }
+                }
+            }
+            return false; // spin threshold exceeded, no wakeups
+        }
+
+        bool StartBlocking(TPoolId& result) {
+            // Switch into blocked state
+            if (State.StartBlocking()) {
+                result = State.CurrentPool();
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        bool BlockedWait(TPoolId& result, ui64 timeoutNs) {
+            return State.Block(timeoutNs, result);
+        }
+
+        void SwitchPool(TPoolId pool) {
+            return State.SwitchPool(pool);
+        }
+
+    private:
+        TPoolId WakeWithTokenAcquired(TUnitedWorkers* united, TPoolId token) {
+            switch (State.WakeWithTokenAcquired(token)) {
+            case TCpuState::Woken: // we've got token and successfully woken up this cpu
+                // NOTE: sending thread may also wakeup another worker, which wont be able to acquire token and will go idle (it's ok)
+                return token;
+            case TCpuState::NotIdle: { // wakeup event has also occured
+                TPoolId wakeup = State.CurrentPool();
+                if (wakeup != token) { // token and wakeup for different pools
+                    united->TryWake(wakeup); // rewake another cpu to avoid losing wakeup
+                }
+                return token;
+            }
+            case TCpuState::Forbidden:
+                Y_FAIL();
+            case TCpuState::Stopped:
+                return CpuStopped;
+            }
+        }
+    };
+
+    TUnitedWorkers::TUnitedWorkers(
+            const TUnitedWorkersConfig& config,
+            const TVector<TUnitedExecutorPoolConfig>& unitedPools,
+            const TCpuAllocationConfig& allocation,
+            IBalancer* balancer)
+        : Balancer(balancer)
+        , Config(config)
+        , Allocation(allocation)
+    {
+        // Find max pool id and initialize pools
+        PoolCount = 0;
+        for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+            for (const auto& pa : cpuAlloc.AllowedPools) {
+                PoolCount = Max<size_t>(PoolCount, pa.PoolId + 1);
+            }
+        }
+        Pools.Reset(new TPool[PoolCount]);
+
+        // Find max cpu id and initialize cpus
+        CpuCount = 0;
+        for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+            CpuCount = Max<size_t>(CpuCount, cpuAlloc.CpuId + 1);
+        }
+        Cpus.Reset(new TCpu[CpuCount]);
+
+        // Setup allocated cpus
+        // NOTE: leave gaps for not allocated cpus (default-initialized)
+        WorkerCount = 0;
+        for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+            TCpu& cpu = Cpus[cpuAlloc.CpuId];
+            cpu.CpuId = cpuAlloc.CpuId;
+            cpu.PoolStats.resize(PoolCount); // NOTE: also may have gaps
+            for (const auto& pa : cpuAlloc.AllowedPools) {
+                cpu.AllowedPools.emplace_back(&Pools[pa.PoolId]);
+            }
+
+            // Setup balancing and cpu-local manager
+            if (!Balancer->AddCpu(cpuAlloc, &cpu.State)) {
+                cpu.State.SwitchPool(0); // set initial state to non-idle to avoid losing wakeups on start
+                cpu.State.AssignPool(CpuShared);
+                TSharedCpu* local = new TSharedCpu(TSharedCpu::TConfig(cpuAlloc, Config), this);
+                cpu.LocalManager.Reset(local);
+                cpu.ExecutorPool.Reset(new TSharedCpuExecutorPool(local, Config));
+            } else {
+                TAssignedCpu* local = new TAssignedCpu(this);
+                cpu.LocalManager.Reset(local);
+                cpu.ExecutorPool.Reset(new TAssignedCpuExecutorPool(local, Config));
+            }
+            WorkerCount += cpu.LocalManager->WorkerCount();
+        }
+
+        // Initialize workers
+        Workers.Reset(new TWorker[WorkerCount]);
+
+        // Setup pools
+        // NOTE: leave gaps for not united pools (default-initialized)
+        for (const TUnitedExecutorPoolConfig& cfg : unitedPools) {
+            TPool& pool = Pools[cfg.PoolId];
+            Y_VERIFY(cfg.PoolId < MaxPools);
+            pool.PoolId = cfg.PoolId;
+            pool.Concurrency = cfg.Concurrency ? cfg.Concurrency : Config.CpuCount;
+            pool.ExecutorPool = nullptr; // should be set later using SetupPool()
+            pool.MailboxTable = nullptr; // should be set later using SetupPool()
+            pool.TimePerMailboxTs = DurationToCycles(cfg.TimePerMailbox);
+            pool.EventsPerMailbox = cfg.EventsPerMailbox;
+
+            // Reinitialize per cpu pool stats with right MaxActivityType
+            for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+                TCpu& cpu = Cpus[cpuAlloc.CpuId];
+                cpu.PoolStats[cfg.PoolId] = TExecutorThreadStats(cfg.MaxActivityType);
+            }
+
+            // Setup WakeOrderCpus: left to right exclusive cpus, then left to right shared cpus.
+            // Waking exclusive cpus first reduce load on shared cpu and improve latency isolation, which is
+            // the point of using exclusive cpu. But note that number of actively spinning idle cpus may increase,
+            // so cpu consumption on light load is higher.
+            for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+                TCpu& cpu = Cpus[cpuAlloc.CpuId];
+                if (cpu.AllowedPools.size() == 1 && cpu.AllowedPools[0] == &pool) {
+                    pool.WakeOrderCpus.emplace_back(&cpu);
+                }
+            }
+            for (const TCpuAllocation& cpuAlloc : allocation.Items) {
+                TCpu& cpu = Cpus[cpuAlloc.CpuId];
+                if (cpu.AllowedPools.size() > 1 && cpuAlloc.HasPool(pool.PoolId)) {
+                    pool.WakeOrderCpus.emplace_back(&cpu);
+                }
+            }
+        }
+    }
+
+    TUnitedWorkers::~TUnitedWorkers() {
+    }
+
+    void TUnitedWorkers::Prepare(TActorSystem* actorSystem, TVector<NSchedulerQueue::TReader*>& scheduleReaders) {
+        // Setup allocated cpus
+        // NOTE: leave gaps for not allocated cpus (default-initialized)
+        TWorkerId workers = 0;
+        for (TCpuId cpuId = 0; cpuId < CpuCount; cpuId++) {
+            TCpu& cpu = Cpus[cpuId];
+
+            // Setup cpu-local workers
+            if (cpu.LocalManager) {
+                for (size_t i = 0; i < cpu.LocalManager->WorkerCount(); i++) {
+                    TWorkerId workerId = workers++;
+                    cpu.LocalManager->AddWorker(workerId);
+
+                    // Setup worker
+                    Y_VERIFY(workerId < WorkerCount);
+                    Workers[workerId].Thread.Reset(new TExecutorThread(
+                        workerId,
+                        cpu.CpuId,
+                        actorSystem,
+                        cpu.ExecutorPool.Get(), // use cpu-local manager as proxy executor for all workers on cpu
+                        nullptr, // MailboxTable is pool-specific, will be set on pool switch
+                        cpu.ExecutorPool->GetName()));
+                    // NOTE: TWorker::ThreadId will be initialized after in Start()
+
+                    scheduleReaders.push_back(&Workers[workerId].SchedulerQueue.Reader);
+                }
+            }
+        }
+    }
+
+    void TUnitedWorkers::Start() {
+        for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) {
+            Workers[workerId].Thread->Start();
+        }
+        for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) {
+            AtomicStore(&Workers[workerId].ThreadId, Workers[workerId].Thread->GetThreadId());
+        }
+    }
+
+    inline TThreadId TUnitedWorkers::GetWorkerThreadId(TWorkerId workerId) const {
+        volatile TThreadId* threadId = &Workers[workerId].ThreadId;
+#ifdef _linux_
+        while (AtomicLoad(threadId) == UnknownThreadId) {
+            NanoSleep(1000);
+        }
+#endif
+        return AtomicLoad(threadId);
+    }
+
+    inline NSchedulerQueue::TWriter* TUnitedWorkers::GetScheduleWriter(TWorkerId workerId) const {
+        return &Workers[workerId].SchedulerQueue.Writer;
+    }
+
+    void TUnitedWorkers::SetupPool(TPoolId pool, IExecutorPool* executorPool, TMailboxTable* mailboxTable) {
+        Pools[pool].ExecutorPool = executorPool;
+        Pools[pool].MailboxTable = mailboxTable;
+    }
+
+    void TUnitedWorkers::PrepareStop() {
+        AtomicStore(&StopFlag, true);
+        for (TPoolId pool = 0; pool < PoolCount; pool++) {
+            Pools[pool].Stop();
+        }
+        for (TCpuId cpuId = 0; cpuId < CpuCount; cpuId++) {
+            Cpus[cpuId].Stop();
+        }
+    }
+
+    void TUnitedWorkers::Shutdown() {
+        for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) {
+            Workers[workerId].Thread->Join();
+        }
+    }
+
+    inline void TUnitedWorkers::PushActivation(TPoolId pool, ui32 activation, ui64 revolvingCounter) {
+        if (Pools[pool].PushActivation(activation, revolvingCounter)) { // token generated
+            TryWake(pool);
+        }
+    }
+
+    inline bool TUnitedWorkers::TryAcquireToken(TPoolId pool) {
+        return Pools[pool].TryAcquireToken();
+    }
+
+    inline void TUnitedWorkers::TryWake(TPoolId pool) {
+        // Avoid using multiple atomic seq_cst loads in cycle, use barrier once
+        AtomicBarrier();
+
+        // Scan every allowed cpu in pool's wakeup order and try to wake the first idle cpu
+        if (RelaxedLoad(&Pools[pool].Waiters) > 0) {
+            for (TCpu* cpu : Pools[pool].WakeOrderCpus) {
+                if (cpu->State.WakeWithoutToken(pool) == TCpuState::Woken) {
+                    return; // successful wake up
+                }
+            }
+        }
+
+        // Cpu has not been woken up
+    }
+
+    inline void TUnitedWorkers::BeginExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter) {
+        Pools[pool].BeginExecution(activation, revolvingCounter);
+    }
+
+    inline bool TUnitedWorkers::NextExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter) {
+        return Pools[pool].NextExecution(activation, revolvingCounter);
+    }
+
+    inline void TUnitedWorkers::StopExecution(TPoolId pool) {
+        if (Pools[pool].StopExecution()) { // pending token
+            TryWake(pool);
+        }
+    }
+
+    inline void TUnitedWorkers::Balance() {
+        ui64 ts = GetCycleCountFast();
+        if (Balancer->TryLock(ts)) {
+            for (TPoolId pool = 0; pool < PoolCount; pool++) {
+                if (Pools[pool].IsUnited()) {
+                    ui64 ElapsedTs = 0;
+                    ui64 ParkedTs = 0;
+                    for (TCpu* cpu : Pools[pool].WakeOrderCpus) {
+                        const TExecutorThreadStats& cpuStats = cpu->PoolStats[pool];
+                        ElapsedTs += cpuStats.ElapsedTicks;
+                        ParkedTs += cpuStats.ParkedTicks;
+                    }
+                    TBalancerStats stats;
+                    stats.Ts = ts;
+                    stats.CpuUs = Ts2Us(ElapsedTs);
+                    stats.IdleUs = Ts2Us(ParkedTs);
+                    Balancer->SetPoolStats(pool, stats);
+                }
+            }
+            Balancer->Balance();
+            Balancer->Unlock();
+        }
+    }
+
+    inline TPoolId TUnitedWorkers::AssignedPool(TWorkerContext& wctx) {
+        return Cpus[wctx.CpuId].State.AssignedPool();
+    }
+
+    inline bool TUnitedWorkers::IsPoolReassigned(TWorkerContext& wctx) {
+        return Cpus[wctx.CpuId].State.IsPoolReassigned(wctx.PoolId);
+    }
+
+    inline void TUnitedWorkers::SwitchPool(TWorkerContext& wctx, ui64 softDeadlineTs) {
+        Pools[wctx.PoolId].Switch(wctx, softDeadlineTs, Cpus[wctx.CpuId].PoolStats[wctx.PoolId]);
+        Cpus[wctx.CpuId].SwitchPool(wctx.PoolId);
+    }
+
+    TPoolId TUnitedWorkers::Idle(TPoolId assigned, TWorkerContext& wctx) {
+        wctx.SwitchToIdle();
+
+        TPoolId result;
+        TTimeTracker timeTracker;
+        TCpu& cpu = Cpus[wctx.CpuId];
+        TPool* assignedPool = assigned == CpuShared ? nullptr : &Pools[assigned];
+        TCpu::TScopedWaiters scopedWaiters(cpu, assignedPool);
+        while (true) {
+            if (cpu.StartSpinning(this, assignedPool, result)) {
+                break; // token already acquired (or stop)
+            }
+            result = WaitSequence(cpu, wctx, timeTracker);
+            if (Y_UNLIKELY(result == CpuStopped) || TryAcquireToken(result)) {
+                break; // token acquired (or stop)
+            }
+        }
+
+        wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());
+        return result;
+    }
+
+    TPoolId TUnitedWorkers::WaitSequence(TCpu& cpu, TWorkerContext& wctx, TTimeTracker& timeTracker) {
+        TPoolId result;
+        if (cpu.ActiveWait(Us2Ts(Config.SpinThresholdUs), result)) {
+            wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());
+            return result;
+        }
+        if (cpu.StartBlocking(result)) {
+            wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());
+            return result;
+        }
+        wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed());
+        bool wakeup;
+        do {
+            wakeup = cpu.BlockedWait(result, Config.Balancer.PeriodUs * 1000);
+            wctx.AddParkedCycles(timeTracker.Elapsed());
+        } while (!wakeup);
+        return result;
+    }
+
+    void TUnitedWorkers::GetCurrentStats(TPoolId pool, TVector<TExecutorThreadStats>& statsCopy) const {
+        size_t idx = 1;
+        statsCopy.resize(idx + Pools[pool].WakeOrderCpus.size());
+        for (TCpu* cpu : Pools[pool].WakeOrderCpus) {
+            TExecutorThreadStats& s = statsCopy[idx++];
+            s = TExecutorThreadStats();
+            s.Aggregate(cpu->PoolStats[pool]);
+        }
+    }
+
+    TUnitedExecutorPool::TUnitedExecutorPool(const TUnitedExecutorPoolConfig& cfg, TUnitedWorkers* united)
+        : TExecutorPoolBaseMailboxed(cfg.PoolId, cfg.MaxActivityType)
+        , United(united)
+        , PoolName(cfg.PoolName)
+    {
+        United->SetupPool(TPoolId(cfg.PoolId), this, MailboxTable.Get());
+    }
+
+    void TUnitedExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) {
+        ActorSystem = actorSystem;
+
+        // Schedule readers are initialized through TUnitedWorkers::Prepare
+        *scheduleReaders = nullptr;
+        *scheduleSz = 0;
+    }
+
+    void TUnitedExecutorPool::Start() {
+        // workers are actually started in TUnitedWorkers::Start()
+    }
+
+    void TUnitedExecutorPool::PrepareStop() {
+    }
+
+    void TUnitedExecutorPool::Shutdown() {
+        // workers are actually joined in TUnitedWorkers::Shutdown()
+    }
+
+    TAffinity* TUnitedExecutorPool::Affinity() const {
+        Y_FAIL(); // should never be called, TCpuExecutorPool is used instead
+    }
+
+    ui32 TUnitedExecutorPool::GetThreads() const {
+        return 0;
+    }
+
+    ui32 TUnitedExecutorPool::GetReadyActivation(TWorkerContext&, ui64) {
+        Y_FAIL(); // should never be called, TCpu*ExecutorPool is used instead
+    }
+
+    inline void TUnitedExecutorPool::ScheduleActivation(ui32 activation) {
+        TUnitedExecutorPool::ScheduleActivationEx(activation, AtomicIncrement(ActivationsRevolvingCounter));
+    }
+
+    inline void TUnitedExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) {
+        United->PushActivation(PoolId, activation, revolvingCounter);
+    }
+
+    void TUnitedExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        TUnitedExecutorPool::Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId);
+    }
+
+    void TUnitedExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        Y_VERIFY_DEBUG(workerId < United->GetWorkerCount());
+        const auto current = ActorSystem->Monotonic();
+        if (deadline < current) {
+            deadline = current;
+        }
+        United->GetScheduleWriter(workerId)->Push(deadline.MicroSeconds(), ev.Release(), cookie);
+    }
+
+    void TUnitedExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) {
+        Y_VERIFY_DEBUG(workerId < United->GetWorkerCount());
+        const auto deadline = ActorSystem->Monotonic() + delta;
+        United->GetScheduleWriter(workerId)->Push(deadline.MicroSeconds(), ev.Release(), cookie);
+    }
+
+    void TUnitedExecutorPool::GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const {
+        Y_UNUSED(poolStats);
+        if (statsCopy.empty()) {
+            statsCopy.resize(1);
+        }
+        statsCopy[0] = TExecutorThreadStats();
+        statsCopy[0].Aggregate(Stats);
+        United->GetCurrentStats(PoolId, statsCopy);
+    }
+}
diff --git a/library/cpp/actors/core/executor_pool_united.h b/library/cpp/actors/core/executor_pool_united.h
new file mode 100644
index 0000000000..a090ba2466
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_united.h
@@ -0,0 +1,135 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "balancer.h"
+#include "scheduler_queue.h"
+#include "executor_pool_base.h"
+
+#include <library/cpp/actors/util/unordered_cache.h>
+
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/containers/stack_vector/stack_vec.h>
+
+#include <util/generic/noncopyable.h>
+
+namespace NActors {
+    class TMailboxTable;
+
+    class TUnitedWorkers: public TNonCopyable {
+        struct TWorker;
+        struct TPool;
+        struct TCpu;
+
+        size_t WorkerCount;
+        TArrayHolder<TWorker> Workers; // indexed by WorkerId
+        size_t PoolCount;
+        TArrayHolder<TPool> Pools;  // indexed by PoolId, so may include not used (not united) pools
+        size_t CpuCount;
+        TArrayHolder<TCpu> Cpus; // indexed by CpuId, so may include not allocated CPUs
+
+        IBalancer* Balancer; // external pool cpu balancer
+
+        TUnitedWorkersConfig Config;
+        TCpuAllocationConfig Allocation;
+
+        volatile bool StopFlag = false;
+
+    public:
+        TUnitedWorkers(
+            const TUnitedWorkersConfig& config,
+            const TVector<TUnitedExecutorPoolConfig>& unitedPools,
+            const TCpuAllocationConfig& allocation,
+            IBalancer* balancer);
+        ~TUnitedWorkers();
+        void Prepare(TActorSystem* actorSystem, TVector<NSchedulerQueue::TReader*>& scheduleReaders);
+        void Start();
+        void PrepareStop();
+        void Shutdown();
+
+        bool IsStopped() const {
+            return RelaxedLoad(&StopFlag);
+        }
+
+        TWorkerId GetWorkerCount() const {
+            return WorkerCount;
+        }
+
+        // Returns thread id of a worker
+        TThreadId GetWorkerThreadId(TWorkerId workerId) const;
+
+        // Returns per worker schedule writers
+        NSchedulerQueue::TWriter* GetScheduleWriter(TWorkerId workerId) const;
+
+        // Sets executor for specified pool
+        void SetupPool(TPoolId pool, IExecutorPool* executorPool, TMailboxTable* mailboxTable);
+
+        // Add activation of newly scheduled mailbox and wake cpu to execute it if required
+        void PushActivation(TPoolId pool, ui32 activation, ui64 revolvingCounter);
+
+        // Try acquire pending token. Must be done before execution
+        bool TryAcquireToken(TPoolId pool);
+
+        // Try to wake idle cpu waiting for tokens on specified pool
+        void TryWake(TPoolId pool);
+
+        // Get activation from pool; requires pool's token
+        void BeginExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter);
+
+        // Stop currently active execution and start new one if token is available
+        // NOTE: Reuses token if it's not destroyed
+        bool NextExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter);
+
+        // Stop active execution
+        void StopExecution(TPoolId pool);
+
+        // Runs balancer to assign pools to cpus
+        void Balance();
+
+        // Returns pool to be executed by worker or `CpuShared`
+        TPoolId AssignedPool(TWorkerContext& wctx);
+
+        // Checks if balancer has assigned another pool for worker's cpu
+        bool IsPoolReassigned(TWorkerContext& wctx);
+
+        // Switch worker context into specified pool
+        void SwitchPool(TWorkerContext& wctx, ui64 softDeadlineTs);
+
+        // Wait for tokens from any pool allowed on specified cpu
+        TPoolId Idle(TPoolId assigned, TWorkerContext& wctx);
+
+        // Fill stats for specified pool
+        void GetCurrentStats(TPoolId pool, TVector<TExecutorThreadStats>& statsCopy) const;
+
+    private:
+        TPoolId WaitSequence(TCpu& cpu, TWorkerContext& wctx, TTimeTracker& timeTracker);
+    };
+
+    class TUnitedExecutorPool: public TExecutorPoolBaseMailboxed {
+        TUnitedWorkers* United;
+        const TString PoolName;
+        TAtomic ActivationsRevolvingCounter = 0;
+    public:
+        TUnitedExecutorPool(const TUnitedExecutorPoolConfig& cfg, TUnitedWorkers* united);
+
+        void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override;
+        void Start() override;
+        void PrepareStop() override;
+        void Shutdown() override;
+
+        TAffinity* Affinity() const override;
+        ui32 GetThreads() const override;
+        ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override;
+        void ScheduleActivation(ui32 activation) override;
+        void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override;
+        void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+        void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+        void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override;
+
+        void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override;
+
+        TString GetName() const override {
+            return PoolName;
+        }
+    };
+}
diff --git a/library/cpp/actors/core/executor_pool_united_ut.cpp b/library/cpp/actors/core/executor_pool_united_ut.cpp
new file mode 100644
index 0000000000..d4df17f1b8
--- /dev/null
+++ b/library/cpp/actors/core/executor_pool_united_ut.cpp
@@ -0,0 +1,338 @@
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "hfunc.h"
+#include "scheduler_basic.h"
+
+#include <library/cpp/actors/util/should_continue.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <library/cpp/actors/protos/unittests.pb.h>
+
+using namespace NActors;
+
+////////////////////////////////////////////////////////////////////////////////
+
+struct TEvMsg : public NActors::TEventBase<TEvMsg, 10347> {
+    DEFINE_SIMPLE_LOCAL_EVENT(TEvMsg, "ExecutorPoolTest: Msg");
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+inline ui64 DoTimedWork(ui64 workUs) {
+    ui64 startUs = ThreadCPUTime();
+    ui64 endUs = startUs + workUs;
+    ui64 nowUs = startUs;
+    do {
+        ui64 endTs = GetCycleCountFast() + Us2Ts(endUs - nowUs);
+        while (GetCycleCountFast() <= endTs) {}
+        nowUs = ThreadCPUTime();
+    } while (nowUs <= endUs);
+    return nowUs - startUs;
+}
+
+class TTestSenderActor : public IActor {
+private:
+    using EActivityType = IActor::EActivityType ;
+    using EActorActivity = IActor::EActorActivity;
+
+private:
+    TAtomic Counter;
+    TActorId Receiver;
+
+    std::function<void(void)> Action;
+
+public:
+    TTestSenderActor(std::function<void(void)> action = [](){},
+                     EActivityType activityType =  EActorActivity::OTHER)
+        : IActor(static_cast<TReceiveFunc>(&TTestSenderActor::Execute), activityType)
+        , Action(action)
+    {}
+
+    void Start(TActorId receiver, size_t count) {
+        AtomicSet(Counter, count);
+        Receiver = receiver;
+    }
+
+    void Stop() {
+        while (true) {
+            if (GetCounter() == 0) {
+                break;
+            }
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+    }
+
+    size_t GetCounter() const {
+        return AtomicGet(Counter);
+    }
+
+private:
+    STFUNC(Execute) {
+        Y_UNUSED(ctx);
+        switch (ev->GetTypeRewrite()) {
+            hFunc(TEvMsg, Handle);
+        }
+    }
+
+    void Handle(TEvMsg::TPtr &ev) {
+        Y_UNUSED(ev);
+        Action();
+        TAtomicBase count = AtomicDecrement(Counter);
+        Y_VERIFY(count != Max<TAtomicBase>());
+        if (count) {
+            Send(Receiver, new TEvMsg());
+        }
+    }
+};
+
+// Single cpu balancer that switches pool on every activation; not thread-safe
+struct TRoundRobinBalancer: public IBalancer {
+    TCpuState* State;
+    TMap<TPoolId, TPoolId> NextPool;
+
+    bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) override {
+        State = cpu;
+        TPoolId prev = cpuAlloc.AllowedPools.rbegin()->PoolId;
+        for (auto& p : cpuAlloc.AllowedPools) {
+            NextPool[prev] = p.PoolId;
+            prev = p.PoolId;
+        }
+        return true;
+    }
+
+    bool TryLock(ui64) override { return true; }
+    void SetPoolStats(TPoolId, const TBalancerStats&) override {}
+    void Unlock() override {}
+
+    void Balance() override {
+        TPoolId assigned;
+        TPoolId current;
+        State->Load(assigned, current);
+        State->AssignPool(NextPool[assigned]);
+    }
+};
+
+void AddUnitedPool(THolder<TActorSystemSetup>& setup, ui32 concurrency = 0) {
+    TUnitedExecutorPoolConfig united;
+    united.PoolId = setup->GetExecutorsCount();
+    united.Concurrency = concurrency;
+    setup->CpuManager.United.emplace_back(std::move(united));
+}
+
+THolder<TActorSystemSetup> GetActorSystemSetup(ui32 cpuCount) {
+    auto setup = MakeHolder<NActors::TActorSystemSetup>();
+    setup->NodeId = 1;
+    setup->CpuManager.UnitedWorkers.CpuCount = cpuCount;
+    setup->CpuManager.UnitedWorkers.NoRealtime = true; // unavailable in test environment
+    setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0));
+    return setup;
+}
+
+Y_UNIT_TEST_SUITE(UnitedExecutorPool) {
+
+#ifdef _linux_
+
+    Y_UNIT_TEST(OnePoolManyCpus) {
+        const size_t msgCount = 1e4;
+        auto setup = GetActorSystemSetup(4);
+        AddUnitedPool(setup);
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        auto actor = new TTestSenderActor();
+        auto actorId = actorSystem.Register(actor);
+        actor->Start(actor->SelfId(), msgCount);
+        actorSystem.Send(actorId, new TEvMsg());
+
+        while (actor->GetCounter()) {
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter());
+
+            Sleep(TDuration::MilliSeconds(1));
+        }
+
+        TVector<TExecutorThreadStats> stats;
+        TExecutorPoolStats poolStats;
+        actorSystem.GetPoolStats(0, poolStats, stats);
+        // Sum all per-thread counters into the 0th element
+        for (ui32 idx = 1; idx < stats.size(); ++idx) {
+            stats[0].Aggregate(stats[idx]);
+        }
+
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].SentEvents, msgCount - 1);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+        //UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, 0); // depends on execution time and system load, so may be non-zero
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].NonDeliveredEvents, 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].EmptyMailboxActivation, 0);
+        //UNIT_ASSERT_VALUES_EQUAL(stats[0].CpuNs, 0); // depends on total duration of test, so undefined
+        UNIT_ASSERT(stats[0].ElapsedTicks > 0);
+        UNIT_ASSERT(stats[0].ParkedTicks == 0); // per-pool parked time does not make sense for united pools
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].BlockedTicks, 0);
+        UNIT_ASSERT(stats[0].ActivationTimeHistogram.TotalSamples >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].EventDeliveryTimeHistogram.TotalSamples, msgCount);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].EventProcessingCountHistogram.TotalSamples, msgCount);
+        UNIT_ASSERT(stats[0].EventProcessingTimeHistogram.TotalSamples > 0);
+        UNIT_ASSERT(stats[0].ElapsedTicksByActivity[0] > 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEventsByActivity[0], msgCount);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].ActorsAliveByActivity[0], 1);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].ScheduledEventsByActivity[0], 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolDestroyedActors, 0);
+        UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolAllocatedMailboxes, 4095); // one line
+        UNIT_ASSERT(stats[0].MailboxPushedOutByTime + stats[0].MailboxPushedOutByEventCount + stats[0].MailboxPushedOutBySoftPreemption >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX);
+    }
+
+    Y_UNIT_TEST(ManyPoolsOneSharedCpu) {
+        const size_t msgCount = 1e4;
+        const size_t pools = 4;
+        auto setup = GetActorSystemSetup(1);
+        for (size_t pool = 0; pool < pools; pool++) {
+            AddUnitedPool(setup);
+        }
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        TVector<TTestSenderActor*> actors;
+        for (size_t pool = 0; pool < pools; pool++) {
+            auto actor = new TTestSenderActor();
+            auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool);
+            actor->Start(actor->SelfId(), msgCount);
+            actorSystem.Send(actorId, new TEvMsg());
+            actors.push_back(actor);
+        }
+
+        while (true) {
+            size_t left = 0;
+            for (auto actor : actors) {
+                left += actor->GetCounter();
+            }
+            if (left == 0) {
+                break;
+            }
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "left " << left);
+            Sleep(TDuration::MilliSeconds(1));
+        }
+
+        for (size_t pool = 0; pool < pools; pool++) {
+            TVector<TExecutorThreadStats> stats;
+            TExecutorPoolStats poolStats;
+            actorSystem.GetPoolStats(pool, poolStats, stats);
+            // Sum all per-thread counters into the 0th element
+            for (ui32 idx = 1; idx < stats.size(); ++idx) {
+                stats[0].Aggregate(stats[idx]);
+            }
+
+            UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+            UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+        }
+    }
+
+    Y_UNIT_TEST(ManyPoolsOneAssignedCpu) {
+        const size_t msgCount = 1e4;
+        const size_t pools = 4;
+        auto setup = GetActorSystemSetup(1);
+        setup->Balancer.Reset(new TRoundRobinBalancer());
+        for (size_t pool = 0; pool < pools; pool++) {
+            AddUnitedPool(setup);
+        }
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        TVector<TTestSenderActor*> actors;
+        for (size_t pool = 0; pool < pools; pool++) {
+            auto actor = new TTestSenderActor();
+            auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool);
+            actor->Start(actor->SelfId(), msgCount);
+            actorSystem.Send(actorId, new TEvMsg());
+            actors.push_back(actor);
+        }
+
+        while (true) {
+            size_t left = 0;
+            for (auto actor : actors) {
+                left += actor->GetCounter();
+            }
+            if (left == 0) {
+                break;
+            }
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "left " << left);
+            Sleep(TDuration::MilliSeconds(1));
+        }
+
+        for (size_t pool = 0; pool < pools; pool++) {
+            TVector<TExecutorThreadStats> stats;
+            TExecutorPoolStats poolStats;
+            actorSystem.GetPoolStats(pool, poolStats, stats);
+            // Sum all per-thread counters into the 0th element
+            for (ui32 idx = 1; idx < stats.size(); ++idx) {
+                stats[0].Aggregate(stats[idx]);
+            }
+
+            UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+            UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+        }
+    }
+
+    Y_UNIT_TEST(ManyPoolsOneCpuSlowEvents) {
+        const size_t msgCount = 3;
+        const size_t pools = 4;
+        auto setup = GetActorSystemSetup(1);
+        for (size_t pool = 0; pool < pools; pool++) {
+            AddUnitedPool(setup);
+        }
+        TActorSystem actorSystem(setup);
+        actorSystem.Start();
+
+        auto begin = TInstant::Now();
+
+        TVector<TTestSenderActor*> actors;
+        for (size_t pool = 0; pool < pools; pool++) {
+            auto actor = new TTestSenderActor([]() {
+                DoTimedWork(100'000);
+            });
+            auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool);
+            actor->Start(actor->SelfId(), msgCount);
+            actorSystem.Send(actorId, new TEvMsg());
+            actors.push_back(actor);
+        }
+
+        while (true) {
+            size_t left = 0;
+            for (auto actor : actors) {
+                left += actor->GetCounter();
+            }
+            if (left == 0) {
+                break;
+            }
+            auto now = TInstant::Now();
+            UNIT_ASSERT_C(now - begin < TDuration::Seconds(15), "left " << left);
+            Sleep(TDuration::MilliSeconds(1));
+        }
+
+        for (size_t pool = 0; pool < pools; pool++) {
+            TVector<TExecutorThreadStats> stats;
+            TExecutorPoolStats poolStats;
+            actorSystem.GetPoolStats(pool, poolStats, stats);
+            // Sum all per-thread counters into the 0th element
+            for (ui32 idx = 1; idx < stats.size(); ++idx) {
+                stats[0].Aggregate(stats[idx]);
+            }
+
+            UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount);
+            UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, msgCount); // every 100ms event should be preempted
+            UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1);
+        }
+    }
+
+#endif
+
+}
diff --git a/library/cpp/actors/core/executor_thread.cpp b/library/cpp/actors/core/executor_thread.cpp
new file mode 100644
index 0000000000..446b651efd
--- /dev/null
+++ b/library/cpp/actors/core/executor_thread.cpp
@@ -0,0 +1,563 @@
+#include "executor_thread.h"
+#include "actorsystem.h"
+#include "callstack.h"
+#include "mailbox.h"
+#include "event.h"
+#include "events.h"
+
+#include <library/cpp/actors/prof/tag.h>
+#include <library/cpp/actors/util/affinity.h>
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/thread.h>
+
+#ifdef BALLOC
+#include <library/cpp/balloc/optional/operators.h>
+#endif
+
+#ifdef _linux_
+#include <sys/syscall.h>
+#include <unistd.h>
+#endif
+
+#include <util/system/type_name.h>
+#include <util/system/datetime.h>
+
+LWTRACE_USING(ACTORLIB_PROVIDER)
+
+namespace NActors {
+    constexpr TDuration TExecutorThread::DEFAULT_TIME_PER_MAILBOX;
+
+    TExecutorThread::TExecutorThread(
+            TWorkerId workerId,
+            TWorkerId cpuId,
+            TActorSystem* actorSystem,
+            IExecutorPool* executorPool,
+            TMailboxTable* mailboxTable,
+            const TString& threadName,
+            TDuration timePerMailbox,
+            ui32 eventsPerMailbox)
+        : ActorSystem(actorSystem)
+        , ExecutorPool(executorPool)
+        , Ctx(workerId, cpuId, actorSystem ? actorSystem->GetMaxActivityType() : 1)
+        , ThreadName(threadName)
+    {
+        Ctx.Switch(
+            ExecutorPool,
+            mailboxTable,
+            NHPTimer::GetClockRate() * timePerMailbox.SecondsFloat(),
+            eventsPerMailbox,
+            ui64(-1), // infinite soft deadline
+            &Ctx.WorkerStats);
+    }
+
+    TActorId TExecutorThread::RegisterActor(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId, const TActorId& parentId) {
+        if (poolId == Max<ui32>())
+            return Ctx.Executor->Register(actor, mailboxType, ++RevolvingWriteCounter, parentId ? parentId : CurrentRecipient);
+        else
+            return ActorSystem->Register(actor, mailboxType, poolId, ++RevolvingWriteCounter, parentId ? parentId : CurrentRecipient);
+    }
+
+    TActorId TExecutorThread::RegisterActor(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) {
+        return Ctx.Executor->Register(actor, mailbox, hint, parentId ? parentId : CurrentRecipient);
+    }
+
+    void TExecutorThread::UnregisterActor(TMailboxHeader* mailbox, ui64 localActorId) {
+        IActor* actor = mailbox->DetachActor(localActorId);
+        Ctx.DecrementActorsAliveByActivity(actor->GetActivityType());
+        DyingActors.push_back(THolder(actor));
+    }
+
+    void TExecutorThread::DropUnregistered() {
+        DyingActors.clear(); // here is actual destruction of actors
+    }
+
+    void TExecutorThread::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+        ++CurrentActorScheduledEventsCounter;
+        Ctx.Executor->Schedule(deadline, ev, cookie, Ctx.WorkerId);
+    }
+
+    void TExecutorThread::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+        ++CurrentActorScheduledEventsCounter;
+        Ctx.Executor->Schedule(deadline, ev, cookie, Ctx.WorkerId);
+    }
+
+    void TExecutorThread::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) {
+        ++CurrentActorScheduledEventsCounter;
+        Ctx.Executor->Schedule(delta, ev, cookie, Ctx.WorkerId);
+    }
+
+    template <class T>
+    inline TString SafeTypeName(T* t) {
+        if (t == nullptr) {
+            return "nullptr";
+        }
+        try {
+            return TypeName(*t);
+        } catch (...) {
+            return "unknown-type";
+        }
+    }
+
+    inline TString ActorTypeName(const IActor* actor, ui32 activityType) {
+        return actor ? SafeTypeName(actor) : ("activityType_" + ToString(activityType) + " (destroyed)");
+    }
+
+    inline void LwTraceSlowDelivery(IEventHandle* ev, const IActor* actor, ui32 poolId, const TActorId& currentRecipient,
+                                    double delivMs, double sinceActivationMs, ui32 eventsExecutedBefore) {
+        const auto baseEv = (ev && ev->HasEvent()) ? ev->GetBase() : nullptr;
+        LWPROBE(EventSlowDelivery,
+                poolId,
+                delivMs,
+                sinceActivationMs,
+                eventsExecutedBefore,
+                baseEv ? SafeTypeName(baseEv) : (ev ? ToString(ev->Type) : TString("nullptr")),
+                currentRecipient.ToString(),
+                SafeTypeName(actor));
+    }
+
+    inline void LwTraceSlowEvent(IEventHandle* ev, ui32 evTypeForTracing, const IActor* actor, ui32 poolId, ui32 activityType,
+                                 const TActorId& currentRecipient, double eventMs) {
+        // Event could have been destroyed by actor->Receive();
+        const auto baseEv = (ev && ev->HasEvent()) ? ev->GetBase() : nullptr;
+        LWPROBE(SlowEvent,
+                poolId,
+                eventMs,
+                baseEv ? SafeTypeName(baseEv) : ToString(evTypeForTracing),
+                currentRecipient.ToString(),
+                ActorTypeName(actor, activityType));
+    }
+
+    template <typename TMailbox>
+    void TExecutorThread::Execute(TMailbox* mailbox, ui32 hint) {
+        Y_VERIFY_DEBUG(DyingActors.empty());
+
+        bool reclaimAsFree = false;
+
+        NHPTimer::STime hpstart = GetCycleCountFast();
+        NHPTimer::STime hpprev = hpstart;
+
+        IActor* actor = nullptr;
+        ui32 prevActivityType = std::numeric_limits<ui32>::max();
+        TActorId recipient;
+        for (ui32 executed = 0; executed < Ctx.EventsPerMailbox; ++executed) {
+            TAutoPtr<IEventHandle> ev(mailbox->Pop());
+            if (!!ev) {
+                NHPTimer::STime hpnow;
+                recipient = ev->GetRecipientRewrite();
+                if (actor = mailbox->FindActor(recipient.LocalId())) {
+                    TActorContext ctx(*mailbox, *this, hpprev, recipient);
+                    TlsActivationContext = &ctx;
+
+#ifdef USE_ACTOR_CALLSTACK
+                    TCallstack::GetTlsCallstack() = ev->Callstack;
+                    TCallstack::GetTlsCallstack().SetLinesToSkip();
+#endif
+                    CurrentRecipient = recipient;
+                    CurrentActorScheduledEventsCounter = 0;
+
+                    if (executed == 0) {
+                        double usec = Ctx.AddActivationStats(AtomicLoad(&mailbox->ScheduleMoment), hpprev);
+                        if (usec > 500) {
+                            GLOBAL_LWPROBE(ACTORLIB_PROVIDER, SlowActivation, Ctx.PoolId, usec / 1000.0);
+                        }
+                    }
+
+                    i64 usecDeliv = Ctx.AddEventDeliveryStats(ev->SendTime, hpprev);
+                    if (usecDeliv > 5000) {
+                        double sinceActivationMs = NHPTimer::GetSeconds(hpprev - hpstart) * 1000.0;
+                        LwTraceSlowDelivery(ev.Get(), actor, Ctx.PoolId, CurrentRecipient, NHPTimer::GetSeconds(hpprev - ev->SendTime) * 1000.0, sinceActivationMs, executed);
+                    }
+
+                    ui32 evTypeForTracing = ev->Type;
+
+                    ui32 activityType = actor->GetActivityType();
+                    if (activityType != prevActivityType) {
+                        prevActivityType = activityType;
+                        NProfiling::TMemoryTagScope::Reset(ActorSystem->MemProfActivityBase + activityType);
+                    }
+
+                    actor->Receive(ev, ctx);
+
+                    size_t dyingActorsCnt = DyingActors.size();
+                    Ctx.UpdateActorsStats(dyingActorsCnt);
+                    if (dyingActorsCnt) {
+                        DropUnregistered();
+                        actor = nullptr;
+                    }
+
+                    if (mailbox->IsEmpty()) // was not-free and become free, we must reclaim mailbox
+                        reclaimAsFree = true;
+
+                    hpnow = GetCycleCountFast();
+                    NHPTimer::STime elapsed = Ctx.AddEventProcessingStats(hpprev, hpnow, activityType, CurrentActorScheduledEventsCounter);
+                    if (elapsed > 1000000) {
+                        LwTraceSlowEvent(ev.Get(), evTypeForTracing, actor, Ctx.PoolId, activityType, CurrentRecipient, NHPTimer::GetSeconds(elapsed) * 1000.0);
+                    }
+
+                    // The actor might have been destroyed
+                    if (actor)
+                        actor->AddElapsedTicks(elapsed);
+
+                    CurrentRecipient = TActorId();
+                } else {
+                    TAutoPtr<IEventHandle> nonDelivered = ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown);
+                    if (nonDelivered.Get()) {
+                        ActorSystem->Send(nonDelivered);
+                    } else {
+                        Ctx.IncrementNonDeliveredEvents();
+                    }
+                    hpnow = GetCycleCountFast();
+                }
+
+                hpprev = hpnow;
+
+                // Soft preemption in united pool
+                if (Ctx.SoftDeadlineTs < (ui64)hpnow) {
+                    AtomicStore(&mailbox->ScheduleMoment, hpnow);
+                    Ctx.IncrementMailboxPushedOutBySoftPreemption();
+                    LWTRACK(MailboxPushedOutBySoftPreemption,
+                            Ctx.Orbit,
+                            Ctx.PoolId,
+                            Ctx.Executor->GetName(),
+                            executed + 1,
+                            CyclesToDuration(hpnow - hpstart),
+                            Ctx.WorkerId,
+                            recipient.ToString(),
+                            SafeTypeName(actor));
+                    break;
+                }
+
+                // time limit inside one mailbox passed, let others do some work
+                if (hpnow - hpstart > (i64)Ctx.TimePerMailboxTs) {
+                    AtomicStore(&mailbox->ScheduleMoment, hpnow);
+                    Ctx.IncrementMailboxPushedOutByTime();
+                    LWTRACK(MailboxPushedOutByTime,
+                            Ctx.Orbit,
+                            Ctx.PoolId,
+                            Ctx.Executor->GetName(),
+                            executed + 1,
+                            CyclesToDuration(hpnow - hpstart),
+                            Ctx.WorkerId,
+                            recipient.ToString(),
+                            SafeTypeName(actor));
+                    break;
+                }
+
+                if (executed + 1 == Ctx.EventsPerMailbox) {
+                    AtomicStore(&mailbox->ScheduleMoment, hpnow);
+                    Ctx.IncrementMailboxPushedOutByEventCount();
+                    LWTRACK(MailboxPushedOutByEventCount,
+                            Ctx.Orbit,
+                            Ctx.PoolId,
+                            Ctx.Executor->GetName(),
+                            executed + 1,
+                            CyclesToDuration(hpnow - hpstart),
+                            Ctx.WorkerId,
+                            recipient.ToString(),
+                            SafeTypeName(actor));
+                    break;
+                }
+            } else {
+                if (executed == 0)
+                    Ctx.IncrementEmptyMailboxActivation();
+                LWTRACK(MailboxEmpty,
+                        Ctx.Orbit,
+                        Ctx.PoolId,
+                        Ctx.Executor->GetName(),
+                        executed,
+                        CyclesToDuration(GetCycleCountFast() - hpstart),
+                        Ctx.WorkerId,
+                        recipient.ToString(),
+                        SafeTypeName(actor));
+                break; // empty queue, leave
+            }
+        }
+
+        NProfiling::TMemoryTagScope::Reset(0);
+        TlsActivationContext = nullptr;
+        UnlockFromExecution(mailbox, Ctx.Executor, reclaimAsFree, hint, Ctx.WorkerId, RevolvingWriteCounter);
+    }
+
+    TThreadId TExecutorThread::GetThreadId() const {
+#ifdef _linux_
+        while (AtomicLoad(&ThreadId) == UnknownThreadId) {
+            NanoSleep(1000);
+        }
+#endif
+        return ThreadId;
+    }
+
+    void* TExecutorThread::ThreadProc() {
+#ifdef _linux_
+        pid_t tid = syscall(SYS_gettid);
+        AtomicSet(ThreadId, (ui64)tid);
+#endif
+
+#ifdef BALLOC
+        ThreadDisableBalloc();
+#endif
+
+        if (ThreadName) {
+            ::SetCurrentThreadName(ThreadName);
+        }
+
+        ExecutorPool->SetRealTimeMode();
+        TAffinityGuard affinity(ExecutorPool->Affinity());
+
+        NHPTimer::STime hpnow = GetCycleCountFast();
+        NHPTimer::STime hpprev = hpnow;
+        ui64 execCount = 0;
+        ui64 readyActivationCount = 0;
+        i64 execCycles = 0;
+        i64 nonExecCycles = 0;
+
+        for (;;) {
+            if (ui32 activation = ExecutorPool->GetReadyActivation(Ctx, ++RevolvingReadCounter)) {
+                LWTRACK(ActivationBegin, Ctx.Orbit, Ctx.CpuId, Ctx.PoolId, Ctx.WorkerId, NHPTimer::GetSeconds(Ctx.Lease.GetPreciseExpireTs()) * 1e3);
+                readyActivationCount++;
+                if (TMailboxHeader* header = Ctx.MailboxTable->Get(activation)) {
+                    if (header->LockForExecution()) {
+                        hpnow = GetCycleCountFast();
+                        nonExecCycles += hpnow - hpprev;
+                        hpprev = hpnow;
+                        switch (header->Type) {
+                            case TMailboxType::Simple:
+                                Execute(static_cast<TMailboxTable::TSimpleMailbox*>(header), activation);
+                                break;
+                            case TMailboxType::Revolving:
+                                Execute(static_cast<TMailboxTable::TRevolvingMailbox*>(header), activation);
+                                break;
+                            case TMailboxType::HTSwap:
+                                Execute(static_cast<TMailboxTable::THTSwapMailbox*>(header), activation);
+                                break;
+                            case TMailboxType::ReadAsFilled:
+                                Execute(static_cast<TMailboxTable::TReadAsFilledMailbox*>(header), activation);
+                                break;
+                            case TMailboxType::TinyReadAsFilled:
+                                Execute(static_cast<TMailboxTable::TTinyReadAsFilledMailbox*>(header), activation);
+                                break;
+                        }
+                        hpnow = GetCycleCountFast();
+                        execCycles += hpnow - hpprev;
+                        hpprev = hpnow;
+                        execCount++;
+                        if (execCycles + nonExecCycles > 39000000) { // every 15 ms at 2.6GHz, so 1000 items is 15 sec (solomon interval)
+                            LWPROBE(ExecutorThreadStats, ExecutorPool->PoolId, ExecutorPool->GetName(), Ctx.WorkerId,
+                                    execCount, readyActivationCount,
+                                    NHPTimer::GetSeconds(execCycles) * 1000.0, NHPTimer::GetSeconds(nonExecCycles) * 1000.0);
+                            execCount = 0;
+                            readyActivationCount = 0;
+                            execCycles = 0;
+                            nonExecCycles = 0;
+                            Ctx.UpdateThreadTime();
+                        }
+                    }
+                }
+                LWTRACK(ActivationEnd, Ctx.Orbit, Ctx.CpuId, Ctx.PoolId, Ctx.WorkerId);
+                Ctx.Orbit.Reset();
+            } else { // no activation means PrepareStop was called so thread must terminate
+                break;
+            }
+        }
+        return nullptr;
+    }
+
+    // there must be barrier and check-read with following cas
+    // or just cas w/o read.
+    // or queue unlocks must be performed with exchange and not generic write
+    // TODO: check performance of those options under contention
+
+    // placed here in hope for better compiler optimization
+
+    bool TMailboxHeader::MarkForSchedule() {
+        AtomicBarrier();
+        for (;;) {
+            const ui32 state = AtomicLoad(&ExecutionState);
+            switch (state) {
+                case TExecutionState::Inactive:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::Inactive))
+                        return true;
+                    break;
+                case TExecutionState::Scheduled:
+                    return false;
+                case TExecutionState::Leaving:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::LeavingMarked, TExecutionState::Leaving))
+                        return true;
+                    break;
+                case TExecutionState::Executing:
+                case TExecutionState::LeavingMarked:
+                    return false;
+                case TExecutionState::Free:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::Free))
+                        return true;
+                    break;
+                case TExecutionState::FreeScheduled:
+                    return false;
+                case TExecutionState::FreeLeaving:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeLeavingMarked, TExecutionState::FreeLeaving))
+                        return true;
+                    break;
+                case TExecutionState::FreeExecuting:
+                case TExecutionState::FreeLeavingMarked:
+                    return false;
+                default:
+                    Y_FAIL();
+            }
+        }
+    }
+
+    bool TMailboxHeader::LockForExecution() {
+        AtomicBarrier(); // strictly speaking here should be AtomicBarrier, but as we got mailboxes from queue - this barrier is already set implicitly and could be removed
+        for (;;) {
+            const ui32 state = AtomicLoad(&ExecutionState);
+            switch (state) {
+                case TExecutionState::Inactive:
+                    return false;
+                case TExecutionState::Scheduled:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::Scheduled))
+                        return true;
+                    break;
+                case TExecutionState::Leaving:
+                case TExecutionState::Executing:
+                case TExecutionState::LeavingMarked:
+                    return false;
+                case TExecutionState::Free:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeExecuting, TExecutionState::Free))
+                        return true;
+                    break;
+                case TExecutionState::FreeScheduled:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeExecuting, TExecutionState::FreeScheduled))
+                        return true;
+                    break;
+                case TExecutionState::FreeLeaving:
+                case TExecutionState::FreeExecuting:
+                case TExecutionState::FreeLeavingMarked:
+                    return false;
+                default:
+                    Y_FAIL();
+            }
+        }
+    }
+
+    bool TMailboxHeader::LockFromFree() {
+        AtomicBarrier();
+        for (;;) {
+            const ui32 state = AtomicLoad(&ExecutionState);
+            switch (state) {
+                case TExecutionState::Inactive:
+                case TExecutionState::Scheduled:
+                case TExecutionState::Leaving:
+                case TExecutionState::Executing:
+                case TExecutionState::LeavingMarked:
+                    Y_FAIL();
+                case TExecutionState::Free:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::Free))
+                        return true;
+                    break;
+                case TExecutionState::FreeScheduled:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::FreeScheduled))
+                        return true;
+                    break;
+                case TExecutionState::FreeLeaving:
+                case TExecutionState::FreeExecuting:
+                case TExecutionState::FreeLeavingMarked:
+                    return false;
+                default:
+                    Y_FAIL();
+            }
+        }
+    }
+
+    void TMailboxHeader::UnlockFromExecution1() {
+        const ui32 state = AtomicLoad(&ExecutionState);
+        if (state == TExecutionState::Executing)
+            AtomicStore(&ExecutionState, (ui32)TExecutionState::Leaving);
+        else if (state == TExecutionState::FreeExecuting)
+            AtomicStore(&ExecutionState, (ui32)TExecutionState::FreeLeaving);
+        else
+            Y_FAIL();
+        AtomicBarrier();
+    }
+
+    bool TMailboxHeader::UnlockFromExecution2(bool wouldReschedule) {
+        AtomicBarrier();
+        for (;;) {
+            const ui32 state = AtomicLoad(&ExecutionState);
+            switch (state) {
+                case TExecutionState::Inactive:
+                case TExecutionState::Scheduled:
+                    Y_FAIL();
+                case TExecutionState::Leaving:
+                    if (!wouldReschedule) {
+                        if (AtomicUi32Cas(&ExecutionState, TExecutionState::Inactive, TExecutionState::Leaving))
+                            return false;
+                    } else {
+                        if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::Leaving))
+                            return true;
+                    }
+                    break;
+                case TExecutionState::Executing:
+                    Y_FAIL();
+                case TExecutionState::LeavingMarked:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::LeavingMarked))
+                        return true;
+                    break;
+                case TExecutionState::Free:
+                case TExecutionState::FreeScheduled:
+                    Y_FAIL();
+                case TExecutionState::FreeLeaving:
+                    if (!wouldReschedule) {
+                        if (AtomicUi32Cas(&ExecutionState, TExecutionState::Free, TExecutionState::FreeLeaving))
+                            return false;
+                    } else {
+                        if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::FreeLeaving))
+                            return true;
+                    }
+                    break;
+                case TExecutionState::FreeExecuting:
+                    Y_FAIL();
+                case TExecutionState::FreeLeavingMarked:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::FreeLeavingMarked))
+                        return true;
+                    break;
+                default:
+                    Y_FAIL();
+            }
+        }
+    }
+
+    bool TMailboxHeader::UnlockAsFree(bool wouldReschedule) {
+        AtomicBarrier();
+        for (;;) {
+            const ui32 state = AtomicLoad(&ExecutionState);
+            switch (state) {
+                case TExecutionState::Inactive:
+                case TExecutionState::Scheduled:
+                    Y_FAIL();
+                case TExecutionState::Leaving:
+                    if (!wouldReschedule) {
+                        if (AtomicUi32Cas(&ExecutionState, TExecutionState::Free, TExecutionState::Leaving))
+                            return false;
+                    } else {
+                        if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::Leaving))
+                            return true;
+                    }
+                    break;
+                case TExecutionState::Executing:
+                    Y_FAIL();
+                case TExecutionState::LeavingMarked:
+                    if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::LeavingMarked))
+                        return true;
+                    break;
+                case TExecutionState::Free:
+                case TExecutionState::FreeScheduled:
+                case TExecutionState::FreeLeaving:
+                case TExecutionState::FreeExecuting:
+                case TExecutionState::FreeLeavingMarked:
+                    Y_FAIL();
+                default:
+                    Y_FAIL();
+            }
+        }
+    }
+}
diff --git a/library/cpp/actors/core/executor_thread.h b/library/cpp/actors/core/executor_thread.h
new file mode 100644
index 0000000000..9d3c573f0d
--- /dev/null
+++ b/library/cpp/actors/core/executor_thread.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include "defs.h"
+#include "event.h"
+#include "actor.h"
+#include "actorsystem.h"
+#include "callstack.h"
+#include "probes.h"
+#include "worker_context.h"
+
+#include <library/cpp/actors/util/datetime.h>
+
+#include <util/system/thread.h>
+
+namespace NActors {
+
+    class TExecutorThread: public ISimpleThread {
+    public:
+        static constexpr TDuration DEFAULT_TIME_PER_MAILBOX =
+            TDuration::MilliSeconds(10);
+        static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100;
+
+        TExecutorThread(TWorkerId workerId,
+                        TWorkerId cpuId,
+                        TActorSystem* actorSystem,
+                        IExecutorPool* executorPool,
+                        TMailboxTable* mailboxTable,
+                        const TString& threadName,
+                        TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX,
+                        ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX);
+
+        TExecutorThread(TWorkerId workerId,
+                        TActorSystem* actorSystem,
+                        IExecutorPool* executorPool,
+                        TMailboxTable* mailboxTable,
+                        const TString& threadName,
+                        TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX,
+                        ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX)
+            : TExecutorThread(workerId, 0, actorSystem, executorPool, mailboxTable, threadName, timePerMailbox, eventsPerMailbox)
+        {}
+
+        TActorId RegisterActor(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>(),
+                               const TActorId& parentId = TActorId());
+        TActorId RegisterActor(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId = TActorId());
+        void UnregisterActor(TMailboxHeader* mailbox, ui64 localActorId);
+        void DropUnregistered();
+        const std::vector<THolder<IActor>>& GetUnregistered() const { return DyingActors; }
+
+        void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+        void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+        void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr);
+
+        bool Send(TAutoPtr<IEventHandle> ev) {
+#ifdef USE_ACTOR_CALLSTACK
+            ev->Callstack = TCallstack::GetTlsCallstack();
+            ev->Callstack.Trace();
+#endif
+            Ctx.IncrementSentEvents();
+            return ActorSystem->Send(ev);
+        }
+
+        void GetCurrentStats(TExecutorThreadStats& statsCopy) const {
+            Ctx.GetCurrentStats(statsCopy);
+        }
+
+        TThreadId GetThreadId() const; // blocks, must be called after Start()
+        TWorkerId GetWorkerId() const { return Ctx.WorkerId; }
+
+    private:
+        void* ThreadProc();
+
+        template <typename TMailbox>
+        void Execute(TMailbox* mailbox, ui32 hint);
+
+    public:
+        TActorSystem* const ActorSystem;
+
+    private:
+        // Pool-specific
+        IExecutorPool* const ExecutorPool;
+
+        // Event-specific (currently executing)
+        TVector<THolder<IActor>> DyingActors;
+        TActorId CurrentRecipient;
+        ui64 CurrentActorScheduledEventsCounter = 0;
+
+        // Thread-specific
+        TWorkerContext Ctx;
+        ui64 RevolvingReadCounter = 0;
+        ui64 RevolvingWriteCounter = 0;
+        const TString ThreadName;
+        volatile TThreadId ThreadId = UnknownThreadId;
+    };
+
+    template <typename TMailbox>
+    void UnlockFromExecution(TMailbox* mailbox, IExecutorPool* executorPool, bool asFree, ui32 hint, TWorkerId workerId, ui64& revolvingWriteCounter) {
+        mailbox->UnlockFromExecution1();
+        const bool needReschedule1 = (nullptr != mailbox->Head());
+        if (!asFree) {
+            if (mailbox->UnlockFromExecution2(needReschedule1)) {
+                RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+                executorPool->ScheduleActivationEx(hint, ++revolvingWriteCounter);
+            }
+        } else {
+            if (mailbox->UnlockAsFree(needReschedule1)) {
+                RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+                executorPool->ScheduleActivationEx(hint, ++revolvingWriteCounter);
+            }
+            executorPool->ReclaimMailbox(TMailbox::MailboxType, hint, workerId, ++revolvingWriteCounter);
+        }
+    }
+}
diff --git a/library/cpp/actors/core/hfunc.h b/library/cpp/actors/core/hfunc.h
new file mode 100644
index 0000000000..26f3c65013
--- /dev/null
+++ b/library/cpp/actors/core/hfunc.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include "actor.h"
+#include "executor_thread.h"
+
+#include <util/system/defaults.h>
+
+#define HFunc(TEvType, HandleFunc)                                                  \
+    case TEvType::EventType: {                                                      \
+        typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+        HandleFunc(*x, ctx);                                                        \
+        break;                                                                      \
+    }
+
+#define hFunc(TEvType, HandleFunc)                                                  \
+    case TEvType::EventType: {                                                      \
+        typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+        HandleFunc(*x);                                                             \
+        break;                                                                      \
+    }
+
+#define HFuncTraced(TEvType, HandleFunc)                          \
+    case TEvType::EventType: {                                    \
+        TRACE_EVENT_TYPE(Y_STRINGIZE(TEvType));                      \
+        TEvType::TPtr* x = reinterpret_cast<TEvType::TPtr*>(&ev); \
+        HandleFunc(*x, ctx);                                      \
+        break;                                                    \
+    }
+
+#define hFuncTraced(TEvType, HandleFunc)                                            \
+    case TEvType::EventType: {                                                      \
+        TRACE_EVENT_TYPE(Y_STRINGIZE(TEvType));                                        \
+        typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+        HandleFunc(*x);                                                             \
+        break;                                                                      \
+    }
+
+#define HTemplFunc(TEvType, HandleFunc)                                             \
+    case TEvType::EventType: {                                                      \
+        typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+        HandleFunc(*x, ctx);                                                        \
+        break;                                                                      \
+    }
+
+#define hTemplFunc(TEvType, HandleFunc)                                             \
+    case TEvType::EventType: {                                                      \
+        typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \
+        HandleFunc(*x);                                                             \
+        break;                                                                      \
+    }
+
+#define SFunc(TEvType, HandleFunc) \
+    case TEvType::EventType:       \
+        HandleFunc(ctx);           \
+        break;
+
+#define sFunc(TEvType, HandleFunc) \
+    case TEvType::EventType:       \
+        HandleFunc();              \
+        break;
+
+#define CFunc(TEventType, HandleFunc) \
+    case TEventType:                  \
+        HandleFunc(ctx);              \
+        break;
+
+#define cFunc(TEventType, HandleFunc) \
+    case TEventType:                  \
+        HandleFunc();                 \
+        break;
+
+#define FFunc(TEventType, HandleFunc) \
+    case TEventType:                  \
+        HandleFunc(ev, ctx);          \
+        break;
+
+#define fFunc(TEventType, HandleFunc) \
+    case TEventType:                  \
+        HandleFunc(ev);               \
+        break;
+
+#define IgnoreFunc(TEvType)  \
+    case TEvType::EventType: \
+        break;
diff --git a/library/cpp/actors/core/interconnect.cpp b/library/cpp/actors/core/interconnect.cpp
new file mode 100644
index 0000000000..9fb33413b2
--- /dev/null
+++ b/library/cpp/actors/core/interconnect.cpp
@@ -0,0 +1,170 @@
+#include "interconnect.h"
+#include <util/digest/murmur.h>
+#include <google/protobuf/text_format.h>
+
+namespace NActors {
+
+    TNodeLocation::TNodeLocation(const NActorsInterconnect::TNodeLocation& location) {
+        const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor();
+        const NActorsInterconnect::TNodeLocation *locp = &location;
+        NActorsInterconnect::TNodeLocation temp; // for legacy location case
+
+        // WalleConfig compatibility section
+        if (locp->HasBody()) {
+            if (locp == &location) {
+                temp.CopyFrom(*locp);
+                locp = &temp;
+            }
+            temp.SetUnit(::ToString(temp.GetBody()));
+            temp.ClearBody();
+        }
+
+        // legacy value processing
+        if (locp->HasDataCenterNum() || locp->HasRoomNum() || locp->HasRackNum() || locp->HasBodyNum()) {
+            if (locp == &location) {
+                temp.CopyFrom(*locp);
+                locp = &temp;
+            }
+            LegacyValue = TLegacyValue{temp.GetDataCenterNum(), temp.GetRoomNum(), temp.GetRackNum(), temp.GetBodyNum()};
+            temp.ClearDataCenterNum();
+            temp.ClearRoomNum();
+            temp.ClearRackNum();
+            temp.ClearBodyNum();
+
+            // legacy format must not interfere with new one
+            const NProtoBuf::Reflection *reflection = temp.GetReflection();
+            for (int i = 0, count = descriptor->field_count(); i < count; ++i) {
+                Y_VERIFY(!reflection->HasField(temp, descriptor->field(i)));
+            }
+
+            const auto& v = LegacyValue->DataCenter;
+            const char *p = reinterpret_cast<const char*>(&v);
+            temp.SetDataCenter(TString(p, strnlen(p, sizeof(ui32))));
+            temp.SetModule(::ToString(LegacyValue->Room));
+            temp.SetRack(::ToString(LegacyValue->Rack));
+            temp.SetUnit(::ToString(LegacyValue->Body));
+        }
+
+        auto makeString = [&] {
+            NProtoBuf::TextFormat::Printer p;
+            p.SetSingleLineMode(true);
+            TString s;
+            p.PrintToString(*locp, &s);
+            return s;
+        };
+
+        // modern format parsing
+        const NProtoBuf::Reflection *reflection = locp->GetReflection();
+        for (int i = 0, count = descriptor->field_count(); i < count; ++i) {
+            const NProtoBuf::FieldDescriptor *field = descriptor->field(i);
+            if (reflection->HasField(*locp, field)) {
+                Y_VERIFY(field->type() == NProtoBuf::FieldDescriptor::TYPE_STRING, "Location# %s", makeString().data());
+                Items.emplace_back(TKeys::E(field->number()), reflection->GetString(*locp, field));
+            }
+        }
+        const NProtoBuf::UnknownFieldSet& unknown = locp->unknown_fields();
+        for (int i = 0, count = unknown.field_count(); i < count; ++i) {
+            const NProtoBuf::UnknownField& field = unknown.field(i);
+            Y_VERIFY(field.type() == NProtoBuf::UnknownField::TYPE_LENGTH_DELIMITED, "Location# %s", makeString().data());
+            Items.emplace_back(TKeys::E(field.number()), field.length_delimited());
+        }
+        std::sort(Items.begin(), Items.end());
+    }
+
+    TNodeLocation::TNodeLocation(TFromSerialized, const TString& s)
+        : TNodeLocation(ParseLocation(s))
+    {}
+
+    NActorsInterconnect::TNodeLocation TNodeLocation::ParseLocation(const TString& s) {
+        NActorsInterconnect::TNodeLocation res;
+        const bool success = res.ParseFromString(s);
+        Y_VERIFY(success);
+        return res;
+    }
+
+    TString TNodeLocation::ToStringUpTo(TKeys::E upToKey) const {
+        const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor();
+
+        TStringBuilder res;
+        for (const auto& [key, value] : Items) {
+            if (upToKey < key) {
+                break;
+            }
+            TString name;
+            if (const NProtoBuf::FieldDescriptor *field = descriptor->FindFieldByNumber(key)) {
+                name = field->options().GetExtension(NActorsInterconnect::PrintName);
+            } else {
+                name = ::ToString(int(key));
+            }
+            if (key != upToKey) {
+                res << name << "=" << value << "/";
+            } else {
+                res << value;
+            }
+        }
+        return res;
+    }
+
+    void TNodeLocation::Serialize(NActorsInterconnect::TNodeLocation *pb) const {
+        const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor();
+        const NProtoBuf::Reflection *reflection = pb->GetReflection();
+        NProtoBuf::UnknownFieldSet *unknown = pb->mutable_unknown_fields();
+        for (const auto& [key, value] : Items) {
+            if (const NProtoBuf::FieldDescriptor *field = descriptor->FindFieldByNumber(key)) {
+                reflection->SetString(pb, field, value);
+            } else {
+                unknown->AddLengthDelimited(key)->assign(value);
+            }
+        }
+    }
+
+    TString TNodeLocation::GetSerializedLocation() const {
+        NActorsInterconnect::TNodeLocation pb;
+        Serialize(&pb);
+        TString s;
+        const bool success = pb.SerializeToString(&s);
+        Y_VERIFY(success);
+        return s;
+    }
+
+    TNodeLocation::TLegacyValue TNodeLocation::GetLegacyValue() const {
+        if (LegacyValue) {
+            return *LegacyValue;
+        }
+
+        ui32 dataCenterId = 0, moduleId = 0, rackId = 0, unitId = 0;
+
+        for (const auto& [key, value] : Items) {
+            switch (key) {
+                case TKeys::DataCenter:
+                    memcpy(&dataCenterId, value.data(), Min<size_t>(sizeof(dataCenterId), value.length()));
+                    break;
+
+                case TKeys::Module: {
+                    const bool success = TryFromString(value, moduleId);
+                    Y_VERIFY(success);
+                    break;
+                }
+
+                case TKeys::Rack:
+                    // hacky way to obtain numeric id by a rack name
+                    if (!TryFromString(value, rackId)) {
+                        rackId = MurmurHash<ui32>(value.data(), value.length());
+                    }
+                    break;
+
+                case TKeys::Unit: {
+                    const bool success = TryFromString(value, unitId);
+                    Y_VERIFY(success);
+                    break;
+                }
+
+                default:
+                    Y_FAIL("unexpected legacy key# %d", key);
+            }
+        }
+
+        return {dataCenterId, moduleId, rackId, unitId};
+    }
+
+} // NActors
diff --git a/library/cpp/actors/core/interconnect.h b/library/cpp/actors/core/interconnect.h
new file mode 100644
index 0000000000..8d1cbd1e77
--- /dev/null
+++ b/library/cpp/actors/core/interconnect.h
@@ -0,0 +1,248 @@
+#pragma once
+
+#include "events.h"
+#include "event_local.h"
+#include <library/cpp/actors/protos/interconnect.pb.h>
+#include <util/string/cast.h>
+#include <util/string/builder.h>
+
+namespace NActors {
+    class TNodeLocation {
+    public:
+        struct TKeys {
+            enum E : int {
+                DataCenter = 10,
+                Module = 20,
+                Rack = 30,
+                Unit = 40,
+            };
+        };
+
+        struct TLegacyValue {
+            ui32 DataCenter;
+            ui32 Room;
+            ui32 Rack;
+            ui32 Body;
+
+            auto ConvertToTuple() const { return std::make_tuple(DataCenter, Room, Rack, Body); }
+
+            int Compare(const TLegacyValue& other) const {
+                const auto x = ConvertToTuple();
+                const auto y = other.ConvertToTuple();
+                if (x < y) {
+                    return -1;
+                } else if (y < x) {
+                    return 1;
+                } else {
+                    return 0;
+                }
+            }
+
+            friend bool operator ==(const TLegacyValue& x, const TLegacyValue& y) { return x.Compare(y) == 0; }
+        };
+
+    private:
+        std::optional<TLegacyValue> LegacyValue;
+        std::vector<std::pair<TKeys::E, TString>> Items;
+
+    public:
+        // generic ctors
+        TNodeLocation() = default;
+        TNodeLocation(const TNodeLocation&) = default;
+        TNodeLocation(TNodeLocation&&) = default;
+
+        // protobuf-parser ctor
+        explicit TNodeLocation(const NActorsInterconnect::TNodeLocation& location);
+
+        // serialized protobuf ctor
+        static constexpr struct TFromSerialized {} FromSerialized {};
+        TNodeLocation(TFromSerialized, const TString& s);
+
+        // parser helper function
+        static NActorsInterconnect::TNodeLocation ParseLocation(const TString& s);
+
+        // assignment operators
+        TNodeLocation& operator =(const TNodeLocation&) = default;
+        TNodeLocation& operator =(TNodeLocation&&) = default;
+
+        void Serialize(NActorsInterconnect::TNodeLocation *pb) const;
+        TString GetSerializedLocation() const;
+
+        TString GetDataCenterId() const { return ToStringUpTo(TKeys::DataCenter); }
+        TString GetModuleId() const { return ToStringUpTo(TKeys::Module); }
+        TString GetRackId() const { return ToStringUpTo(TKeys::Rack); }
+        TString ToString() const { return ToStringUpTo(TKeys::E(Max<int>())); }
+        TString ToStringUpTo(TKeys::E upToKey) const;
+
+        TLegacyValue GetLegacyValue() const;
+
+        const std::vector<std::pair<TKeys::E, TString>>& GetItems() const { return Items; }
+
+        bool HasKey(TKeys::E key) const {
+            auto comp = [](const auto& p, TKeys::E value) { return p.first < value; };
+            const auto it = std::lower_bound(Items.begin(), Items.end(), key, comp);
+            return it != Items.end() && it->first == key;
+        }
+
+        int Compare(const TNodeLocation& other) const {
+            if (LegacyValue || other.LegacyValue) {
+                return GetLegacyValue().Compare(other.GetLegacyValue());
+            } else if (Items < other.Items) {
+                return -1;
+            } else if (other.Items < Items) {
+                return 1;
+            } else {
+                return 0;
+            }
+        }
+
+        void InheritLegacyValue(const TNodeLocation& other) {
+            LegacyValue = other.GetLegacyValue();
+        }
+
+        friend bool operator ==(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) == 0; }
+        friend bool operator !=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) != 0; }
+        friend bool operator < (const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) <  0; }
+        friend bool operator <=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) <= 0; }
+        friend bool operator > (const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) >  0; }
+        friend bool operator >=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) >= 0; }
+    };
+
+    struct TEvInterconnect {
+        enum EEv {
+            EvForward = EventSpaceBegin(TEvents::ES_INTERCONNECT),
+            EvResolveNode, // resolve info about node (internal)
+            EvNodeAddress, // node info (internal)
+            EvConnectNode, // request proxy to establish connection (like: we would send something there soon)
+            EvAcceptIncoming,
+            EvNodeConnected,    // node connected notify
+            EvNodeDisconnected, // node disconnected notify
+            EvRegisterNode,
+            EvRegisterNodeResult,
+            EvListNodes,
+            EvNodesInfo,
+            EvDisconnect,
+            EvGetNode,
+            EvNodeInfo,
+            EvClosePeerSocket,
+            EvCloseInputSession,
+            EvPoisonSession,
+            EvTerminate,
+            EvEnd
+        };
+
+        enum ESubscribes {
+            SubConnected,
+            SubDisconnected,
+        };
+
+        static_assert(EvEnd < EventSpaceEnd(TEvents::ES_INTERCONNECT), "expect EvEnd < EventSpaceEnd(TEvents::ES_INTERCONNECT)");
+
+        struct TEvResolveNode;
+        struct TEvNodeAddress;
+
+        struct TEvConnectNode: public TEventBase<TEvConnectNode, EvConnectNode> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvConnectNode, "TEvInterconnect::TEvConnectNode")
+        };
+
+        struct TEvAcceptIncoming;
+
+        struct TEvNodeConnected: public TEventLocal<TEvNodeConnected, EvNodeConnected> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvNodeConnected, "TEvInterconnect::TEvNodeConnected")
+            TEvNodeConnected(ui32 node) noexcept
+                : NodeId(node)
+            {
+            }
+            const ui32 NodeId;
+        };
+
+        struct TEvNodeDisconnected: public TEventLocal<TEvNodeDisconnected, EvNodeDisconnected> {
+            DEFINE_SIMPLE_LOCAL_EVENT(TEvNodeDisconnected, "TEvInterconnect::TEvNodeDisconnected")
+            TEvNodeDisconnected(ui32 node) noexcept
+                : NodeId(node)
+            {
+            }
+            const ui32 NodeId;
+        };
+
+        struct TEvRegisterNode;
+        struct TEvRegisterNodeResult;
+
+        struct TEvListNodes: public TEventLocal<TEvListNodes, EvListNodes> {
+        };
+
+        struct TNodeInfo {
+            ui32 NodeId;
+            TString Address;
+            TString Host;
+            TString ResolveHost;
+            ui16 Port;
+            TNodeLocation Location;
+
+            TNodeInfo() = default;
+            TNodeInfo(const TNodeInfo&) = default;
+            TNodeInfo& operator =(const TNodeInfo&) = default;
+            TNodeInfo(ui32 nodeId,
+                      const TString& address,
+                      const TString& host,
+                      const TString& resolveHost,
+                      ui16 port,
+                      const TNodeLocation& location)
+                : NodeId(nodeId)
+                , Address(address)
+                , Host(host)
+                , ResolveHost(resolveHost)
+                , Port(port)
+                , Location(location)
+            {
+            }
+
+            operator ui32() const {
+                return NodeId;
+            }
+        };
+
+        struct TEvNodesInfo: public TEventLocal<TEvNodesInfo, EvNodesInfo> {
+            TVector<TNodeInfo> Nodes;
+
+            const TNodeInfo* GetNodeInfo(ui32 nodeId) const {
+                for (const auto& x : Nodes) {
+                    if (x.NodeId == nodeId)
+                        return &x;
+                }
+                return nullptr;
+            }
+        };
+
+        struct TEvDisconnect;
+
+        struct TEvGetNode: public TEventLocal<TEvGetNode, EvGetNode> {
+            ui32 NodeId;
+            TInstant Deadline;
+
+            TEvGetNode(ui32 nodeId, TInstant deadline = TInstant::Max())
+                : NodeId(nodeId)
+                , Deadline(deadline)
+            {
+            }
+        };
+
+        struct TEvNodeInfo: public TEventLocal<TEvNodeInfo, EvNodeInfo> {
+            TEvNodeInfo(ui32 nodeId)
+                : NodeId(nodeId)
+            {
+            }
+
+            ui32 NodeId;
+            THolder<TNodeInfo> Node;
+        };
+
+        struct TEvClosePeerSocket : TEventLocal<TEvClosePeerSocket, EvClosePeerSocket> {};
+
+        struct TEvCloseInputSession : TEventLocal<TEvCloseInputSession, EvCloseInputSession> {};
+
+        struct TEvPoisonSession : TEventLocal<TEvPoisonSession, EvPoisonSession> {};
+
+        struct TEvTerminate : TEventLocal<TEvTerminate, EvTerminate> {};
+    };
+}
diff --git a/library/cpp/actors/core/invoke.h b/library/cpp/actors/core/invoke.h
new file mode 100644
index 0000000000..931a9767dd
--- /dev/null
+++ b/library/cpp/actors/core/invoke.h
@@ -0,0 +1,110 @@
+#pragma once
+
+#include "actor_bootstrapped.h"
+#include "events.h"
+#include "event_local.h"
+
+#include <any>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+#include <util/system/type_name.h>
+
+namespace NActors {
+
+    struct TEvents::TEvInvokeResult
+        : TEventLocal<TEvInvokeResult, TSystem::InvokeResult>
+    {
+        using TProcessCallback = std::function<void(TEvInvokeResult&, const TActorContext&)>;
+        TProcessCallback ProcessCallback;
+        std::variant<std::any /* the value */, std::exception_ptr> Result;
+
+        // This constructor creates TEvInvokeResult with the result of calling callback(args...) or exception_ptr,
+        // if exception occurs during evaluation.
+        template<typename TCallback, typename... TArgs>
+        TEvInvokeResult(TProcessCallback&& process, TCallback&& callback, TArgs&&... args)
+            : ProcessCallback(std::move(process))
+        {
+            try {
+                if constexpr (std::is_void_v<std::invoke_result_t<TCallback, TArgs...>>) {
+                    // just invoke callback without saving any value
+                    std::invoke(std::forward<TCallback>(callback), std::forward<TArgs>(args)...);
+                } else {
+                    Result.emplace<std::any>(std::invoke(std::forward<TCallback>(callback), std::forward<TArgs>(args)...));
+                }
+            } catch (...) {
+                Result.emplace<std::exception_ptr>(std::current_exception());
+            }
+        }
+
+        void Process(const TActorContext& ctx) {
+            ProcessCallback(*this, ctx);
+        }
+
+        template<typename TCallback>
+        std::invoke_result_t<TCallback, const TActorContext&> GetResult() {
+            using T = std::invoke_result_t<TCallback, const TActorContext&>;
+            return std::visit([](auto& arg) -> T {
+                using TArg = std::decay_t<decltype(arg)>;
+                if constexpr (std::is_same_v<TArg, std::exception_ptr>) {
+                    std::rethrow_exception(arg);
+                } else if constexpr (std::is_void_v<T>) {
+                    Y_VERIFY(!arg.has_value());
+                } else if (auto *value = std::any_cast<T>(&arg)) {
+                    return std::move(*value);
+                } else {
+                    Y_FAIL("unspported return type for TEvInvokeResult: actual# %s != expected# %s",
+                        TypeName(arg.type()).data(), TypeName<T>().data());
+                }
+            }, Result);
+        }
+    };
+
+    // Invoke Actor is used to make different procedure calls in specific threads pools.
+    //
+    // Actor is created by CreateInvokeActor(callback, complete) where `callback` is the function that will be invoked
+    // upon actor registration, which will issue then TEvInvokeResult to the parent actor with the result of called
+    // function. If the called function throws exception, then the exception will arrive in the result. Receiver of
+    // this message can either handle it by its own means calling ev.GetResult() (which will rethrow exception if it
+    // has occured in called function or return its return value; notice that when there is no return value, then
+    // GetResult() should also be called to prevent losing exception), or invoke ev.Process(), which will invoke
+    // callback provided as `complete` parameter to the CreateInvokeActor function. Complete handler is invoked with
+    // the result-getter lambda as the first argument and the actor system context as the second one. Result-getter
+    // should be called to obtain resulting value or exception like the GetResult() method of the TEvInvokeResult event.
+    //
+    // Notice that `callback` execution usually occurs in separate actor on separate mailbox and should not use parent
+    // actor's class. But `complete` handler is invoked in parent context and can use its contents. Do not forget to
+    // handle TEvInvokeResult event by calling Process/GetResult method, whichever is necessary.
+
+    template<typename TCallback, typename TCompletion, ui32 Activity>
+    class TInvokeActor : public TActorBootstrapped<TInvokeActor<TCallback, TCompletion, Activity>> {
+        TCallback Callback;
+        TCompletion Complete;
+
+    public:
+        static constexpr auto ActorActivityType() {
+            return static_cast<IActor::EActorActivity>(Activity);
+        }
+
+        TInvokeActor(TCallback&& callback, TCompletion&& complete)
+            : Callback(std::move(callback))
+            , Complete(std::move(complete))
+        {}
+
+        void Bootstrap(const TActorId& parentId, const TActorContext& ctx) {
+            auto process = [complete = std::move(Complete)](TEvents::TEvInvokeResult& res, const TActorContext& ctx) {
+                complete([&] { return res.GetResult<TCallback>(); }, ctx);
+            };
+            ctx.Send(parentId, new TEvents::TEvInvokeResult(std::move(process), std::move(Callback), ctx));
+            TActorBootstrapped<TInvokeActor>::Die(ctx);
+        }
+    };
+
+    template<ui32 Activity, typename TCallback, typename TCompletion>
+    std::unique_ptr<IActor> CreateInvokeActor(TCallback&& callback, TCompletion&& complete) {
+        return std::make_unique<TInvokeActor<std::decay_t<TCallback>, std::decay_t<TCompletion>, Activity>>(
+            std::forward<TCallback>(callback), std::forward<TCompletion>(complete));
+    }
+
+} // NActors
diff --git a/library/cpp/actors/core/io_dispatcher.cpp b/library/cpp/actors/core/io_dispatcher.cpp
new file mode 100644
index 0000000000..90699ff16c
--- /dev/null
+++ b/library/cpp/actors/core/io_dispatcher.cpp
@@ -0,0 +1,234 @@
+#include "io_dispatcher.h"
+#include "actor_bootstrapped.h"
+#include "hfunc.h"
+#include <util/system/mutex.h>
+#include <util/system/condvar.h>
+#include <util/system/thread.h>
+#include <map>
+#include <list>
+
+namespace NActors {
+
+    class TIoDispatcherActor : public TActorBootstrapped<TIoDispatcherActor> {
+        enum {
+            EvNotifyThreadStopped = EventSpaceBegin(TEvents::ES_PRIVATE),
+        };
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // IO task queue
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        class TTask {
+            TInstant Timestamp;
+            std::function<void()> Callback;
+
+        public:
+            TTask(TInstant timestamp, TEvInvokeQuery *ev)
+                : Timestamp(timestamp)
+                , Callback(std::move(ev->Callback))
+            {}
+
+            void Execute() {
+                Callback();
+            }
+
+            TInstant GetTimestamp() const {
+                return Timestamp;
+            }
+        };
+
+        class TTaskQueue {
+            std::list<TTask> Tasks;
+            TMutex Mutex;
+            TCondVar CondVar;
+            size_t NumThreadsToStop = 0;
+
+        public:
+            void Enqueue(TInstant timestamp, TEvInvokeQuery *ev) {
+                std::list<TTask> list;
+                list.emplace_back(timestamp, ev);
+                with_lock (Mutex) {
+                    Tasks.splice(Tasks.end(), std::move(list));
+                }
+                CondVar.Signal();
+            }
+
+            bool Dequeue(std::list<TTask>& list, bool *sendNotify) {
+                with_lock (Mutex) {
+                    CondVar.Wait(Mutex, [&] { return NumThreadsToStop || !Tasks.empty(); });
+                    if (NumThreadsToStop) {
+                        *sendNotify = NumThreadsToStop != Max<size_t>();
+                        if (*sendNotify) {
+                            --NumThreadsToStop;
+                        }
+                        return false;
+                    } else {
+                        list.splice(list.end(), Tasks, Tasks.begin());
+                        return true;
+                    }
+                }
+            }
+
+            void Stop() {
+                with_lock (Mutex) {
+                    NumThreadsToStop = Max<size_t>();
+                }
+                CondVar.BroadCast();
+            }
+
+            void StopOne() {
+                with_lock (Mutex) {
+                    ++NumThreadsToStop;
+                    Y_VERIFY(NumThreadsToStop);
+                }
+                CondVar.Signal();
+            }
+
+            std::optional<TInstant> GetEarliestTaskTimestamp() {
+                with_lock (Mutex) {
+                    return Tasks.empty() ? std::nullopt : std::make_optional(Tasks.front().GetTimestamp());
+                }
+            }
+        };
+
+        TTaskQueue TaskQueue;
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // IO dispatcher threads
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        class TThread : public ISimpleThread {
+            TIoDispatcherActor& Actor;
+            TActorSystem* const ActorSystem;
+
+        public:
+            TThread(TIoDispatcherActor& actor, TActorSystem *actorSystem)
+                : Actor(actor)
+                , ActorSystem(actorSystem)
+            {
+                Start();
+            }
+
+            void *ThreadProc() override {
+                SetCurrentThreadName("kikimr IO");
+                for (;;) {
+                    std::list<TTask> tasks;
+                    bool sendNotify;
+                    if (!Actor.TaskQueue.Dequeue(tasks, &sendNotify)) {
+                        if (sendNotify) {
+                            ActorSystem->Send(new IEventHandle(EvNotifyThreadStopped, 0, Actor.SelfId(), TActorId(),
+                                nullptr, TThread::CurrentThreadId()));
+                        }
+                        break;
+                    }
+                    for (TTask& task : tasks) {
+                        task.Execute();
+                        ++*Actor.TasksCompleted;
+                    }
+                }
+                return nullptr;
+            }
+        };
+
+        static constexpr size_t MinThreadCount = 4;
+        static constexpr size_t MaxThreadCount = 64;
+        std::map<TThread::TId, std::unique_ptr<TThread>> Threads;
+        size_t NumRunningThreads = 0;
+
+        void StartThread() {
+            auto thread = std::make_unique<TThread>(*this, TlsActivationContext->ExecutorThread.ActorSystem);
+            const TThread::TId id = thread->Id();
+            Threads.emplace(id, std::move(thread));
+            *NumThreads = ++NumRunningThreads;
+            ++*ThreadsStarted;
+        }
+
+        void StopThread() {
+            Y_VERIFY(Threads.size());
+            TaskQueue.StopOne();
+            *NumThreads = --NumRunningThreads;
+            ++*ThreadsStopped;
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // Counters
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        NMonitoring::TDynamicCounters::TCounterPtr NumThreads;
+        NMonitoring::TDynamicCounters::TCounterPtr TasksAdded;
+        NMonitoring::TDynamicCounters::TCounterPtr TasksCompleted;
+        NMonitoring::TDynamicCounters::TCounterPtr ThreadsStarted;
+        NMonitoring::TDynamicCounters::TCounterPtr ThreadsStopped;
+
+    public:
+        TIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters)
+            : NumThreads(counters->GetCounter("NumThreads"))
+            , TasksAdded(counters->GetCounter("TasksAdded", true))
+            , TasksCompleted(counters->GetCounter("TasksCompleted", true))
+            , ThreadsStarted(counters->GetCounter("ThreadsStarted", true))
+            , ThreadsStopped(counters->GetCounter("ThreadsStopped", true))
+        {}
+
+        ~TIoDispatcherActor() override {
+            TaskQueue.Stop();
+        }
+
+        void Bootstrap() {
+            while (NumRunningThreads < MinThreadCount) {
+                StartThread();
+            }
+            HandleWakeup();
+            Become(&TThis::StateFunc);
+        }
+
+        void HandleThreadStopped(TAutoPtr<IEventHandle> ev) {
+            auto it = Threads.find(ev->Cookie);
+            Y_VERIFY(it != Threads.end());
+            it->second->Join();
+            Threads.erase(it);
+        }
+
+        void Handle(TEvInvokeQuery::TPtr ev) {
+            ++*TasksAdded;
+            TaskQueue.Enqueue(TActivationContext::Now(), ev->Get());
+        }
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // Thread usage counter logic
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        std::optional<TInstant> IdleTimestamp;
+        static constexpr TDuration ThreadStartTime = TDuration::MilliSeconds(500);
+        static constexpr TDuration ThreadStopTime = TDuration::MilliSeconds(500);
+
+        void HandleWakeup() {
+            const TInstant now = TActivationContext::Now();
+            std::optional<TInstant> earliest = TaskQueue.GetEarliestTaskTimestamp();
+            if (earliest) {
+                if (now >= *earliest + ThreadStartTime && NumRunningThreads < MaxThreadCount) {
+                    StartThread();
+                }
+                IdleTimestamp.reset();
+            } else if (!IdleTimestamp) {
+                IdleTimestamp = now;
+            } else if (now >= *IdleTimestamp + ThreadStopTime) {
+                IdleTimestamp.reset();
+                if (NumRunningThreads > MinThreadCount) {
+                    StopThread();
+                }
+            }
+            Schedule(TDuration::MilliSeconds(100), new TEvents::TEvWakeup);
+        }
+
+        STRICT_STFUNC(StateFunc, {
+            fFunc(EvNotifyThreadStopped, HandleThreadStopped);
+            hFunc(TEvInvokeQuery, Handle);
+            cFunc(TEvents::TSystem::Wakeup, HandleWakeup);
+            cFunc(TEvents::TSystem::Poison, PassAway);
+        })
+    };
+
+    IActor *CreateIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters) {
+        return new TIoDispatcherActor(counters);
+    }
+
+} // NActors
diff --git a/library/cpp/actors/core/io_dispatcher.h b/library/cpp/actors/core/io_dispatcher.h
new file mode 100644
index 0000000000..b0e4e60d1a
--- /dev/null
+++ b/library/cpp/actors/core/io_dispatcher.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "actor.h"
+#include "event_local.h"
+#include "events.h"
+#include "actorsystem.h"
+#include "executor_thread.h"
+#include "executelater.h"
+
+namespace NActors {
+
+    struct TEvInvokeQuery : TEventLocal<TEvInvokeQuery, TEvents::TSystem::InvokeQuery> {
+        std::function<void()> Callback;
+
+        TEvInvokeQuery(std::function<void()>&& callback)
+            : Callback(std::move(callback))
+        {}
+    };
+
+    inline TActorId MakeIoDispatcherActorId() {
+        return TActorId(0, TStringBuf("IoDispatcher", 12));
+    }
+
+    extern IActor *CreateIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters);
+
+    /* InvokeIoCallback enqueues callback() to be executed in IO thread pool and then return result in TEvInvokeResult
+     * message to parentId actor.
+     */
+    template<typename TCallback>
+    static void InvokeIoCallback(TCallback&& callback, ui32 poolId, IActor::EActivityType activityType) {
+        if (!TActivationContext::Send(new IEventHandle(MakeIoDispatcherActorId(), TActorId(),
+                new TEvInvokeQuery(callback)))) {
+            TActivationContext::Register(CreateExecuteLaterActor(std::move(callback), activityType), TActorId(),
+                TMailboxType::HTSwap, poolId);
+        }
+    }
+
+} // NActors
diff --git a/library/cpp/actors/core/lease.h b/library/cpp/actors/core/lease.h
new file mode 100644
index 0000000000..650ae7b122
--- /dev/null
+++ b/library/cpp/actors/core/lease.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "defs.h"
+
+namespace NActors {
+    // Value representing specific worker's permission for exclusive use of CPU till specific deadline
+    struct TLease {
+        // Lower WorkerBits store current fast worker id
+        // All other higher bits store expiration (hard preemption) timestamp
+        using TValue = ui64;
+        TValue Value;
+
+        static constexpr ui64 WorkerIdMask = ui64((1ull << WorkerBits) - 1);
+        static constexpr ui64 ExpireTsMask = ~WorkerIdMask;
+
+        explicit constexpr TLease(ui64 value)
+            : Value(value)
+        {}
+
+        constexpr TLease(TWorkerId workerId, ui64 expireTs)
+            : Value((workerId & WorkerIdMask) | (expireTs & ExpireTsMask))
+        {}
+
+        TWorkerId GetWorkerId() const {
+            return Value & WorkerIdMask;
+        }
+
+        TLease NeverExpire() const {
+            return TLease(Value | ExpireTsMask);
+        }
+
+        bool IsNeverExpiring() const {
+            return (Value & ExpireTsMask) == ExpireTsMask;
+        }
+
+        ui64 GetExpireTs() const {
+            // Do not truncate worker id
+            // NOTE: it decrease accuracy, but improves performance
+            return Value;
+        }
+
+        ui64 GetPreciseExpireTs() const {
+            return Value & ExpireTsMask;
+        }
+
+        operator TValue() const {
+            return Value;
+        }
+    };
+
+    // Special expire timestamp values
+    static constexpr ui64 NeverExpire = ui64(-1);
+
+    // Special hard-preemption-in-progress lease
+    static constexpr TLease HardPreemptionLease = TLease(TLease::WorkerIdMask, NeverExpire);
+}
diff --git a/library/cpp/actors/core/log.cpp b/library/cpp/actors/core/log.cpp
new file mode 100644
index 0000000000..5f63b5af58
--- /dev/null
+++ b/library/cpp/actors/core/log.cpp
@@ -0,0 +1,753 @@
+#include "log.h"
+#include "log_settings.h"
+
+#include <library/cpp/monlib/service/pages/templates.h>
+
+static_assert(int(NActors::NLog::PRI_EMERG) == int(::TLOG_EMERG), "expect int(NActors::NLog::PRI_EMERG) == int(::TLOG_EMERG)");
+static_assert(int(NActors::NLog::PRI_ALERT) == int(::TLOG_ALERT), "expect int(NActors::NLog::PRI_ALERT) == int(::TLOG_ALERT)");
+static_assert(int(NActors::NLog::PRI_CRIT) == int(::TLOG_CRIT), "expect int(NActors::NLog::PRI_CRIT) == int(::TLOG_CRIT)");
+static_assert(int(NActors::NLog::PRI_ERROR) == int(::TLOG_ERR), "expect int(NActors::NLog::PRI_ERROR) == int(::TLOG_ERR)");
+static_assert(int(NActors::NLog::PRI_WARN) == int(::TLOG_WARNING), "expect int(NActors::NLog::PRI_WARN) == int(::TLOG_WARNING)");
+static_assert(int(NActors::NLog::PRI_NOTICE) == int(::TLOG_NOTICE), "expect int(NActors::NLog::PRI_NOTICE) == int(::TLOG_NOTICE)");
+static_assert(int(NActors::NLog::PRI_INFO) == int(::TLOG_INFO), "expect int(NActors::NLog::PRI_INFO) == int(::TLOG_INFO)");
+static_assert(int(NActors::NLog::PRI_DEBUG) == int(::TLOG_DEBUG), "expect int(NActors::NLog::PRI_DEBUG) == int(::TLOG_DEBUG)");
+static_assert(int(NActors::NLog::PRI_TRACE) == int(::TLOG_RESOURCES), "expect int(NActors::NLog::PRI_TRACE) == int(::TLOG_RESOURCES)");
+
+namespace {
+    struct TRecordWithNewline {
+        ELogPriority Priority;
+        TTempBuf Buf;
+
+        TRecordWithNewline(const TLogRecord& rec)
+            : Priority(rec.Priority)
+            , Buf(rec.Len + 1)
+        {
+            Buf.Append(rec.Data, rec.Len);
+            *Buf.Proceed(1) = '\n';
+        }
+
+        operator TLogRecord() const {
+            return TLogRecord(Priority, Buf.Data(), Buf.Filled());
+        }
+    };
+}
+
+namespace NActors {
+
+    class TLoggerCounters : public ILoggerMetrics {
+    public:
+        TLoggerCounters(TIntrusivePtr<NMonitoring::TDynamicCounters> counters)
+            : DynamicCounters(counters)
+        {
+            ActorMsgs_ = DynamicCounters->GetCounter("ActorMsgs", true);
+            DirectMsgs_ = DynamicCounters->GetCounter("DirectMsgs", true);
+            LevelRequests_ = DynamicCounters->GetCounter("LevelRequests", true);
+            IgnoredMsgs_ = DynamicCounters->GetCounter("IgnoredMsgs", true);
+            DroppedMsgs_ = DynamicCounters->GetCounter("DroppedMsgs", true);
+
+            AlertMsgs_ = DynamicCounters->GetCounter("AlertMsgs", true);
+            EmergMsgs_ = DynamicCounters->GetCounter("EmergMsgs", true);
+        }
+
+        ~TLoggerCounters() = default;
+
+        void IncActorMsgs() override {
+            ++*ActorMsgs_;
+        }
+        void IncDirectMsgs() override {
+            ++*DirectMsgs_;
+        }
+        void IncLevelRequests() override {
+            ++*LevelRequests_;
+        }
+        void IncIgnoredMsgs() override {
+            ++*IgnoredMsgs_;
+        }
+        void IncAlertMsgs() override {
+            ++*AlertMsgs_;
+        }
+        void IncEmergMsgs() override {
+            ++*EmergMsgs_;
+        }
+        void IncDroppedMsgs() override {
+            DroppedMsgs_->Inc();
+        };
+
+        void GetOutputHtml(IOutputStream& str) override {
+            HTML(str) {
+                DIV_CLASS("row") {
+                    DIV_CLASS("col-md-12") {
+                        H4() {
+                            str << "Counters" << Endl;
+                        }
+                        DynamicCounters->OutputHtml(str);
+                    }
+                }
+            }
+        }
+
+    private:
+        NMonitoring::TDynamicCounters::TCounterPtr ActorMsgs_;
+        NMonitoring::TDynamicCounters::TCounterPtr DirectMsgs_;
+        NMonitoring::TDynamicCounters::TCounterPtr LevelRequests_;
+        NMonitoring::TDynamicCounters::TCounterPtr IgnoredMsgs_;
+        NMonitoring::TDynamicCounters::TCounterPtr AlertMsgs_;
+        NMonitoring::TDynamicCounters::TCounterPtr EmergMsgs_;
+        // Dropped while the logger backend was unavailable
+        NMonitoring::TDynamicCounters::TCounterPtr DroppedMsgs_;
+
+        TIntrusivePtr<NMonitoring::TDynamicCounters> DynamicCounters;
+    };
+
+    class TLoggerMetrics : public ILoggerMetrics {
+    public:
+        TLoggerMetrics(std::shared_ptr<NMonitoring::TMetricRegistry> metrics)
+            : Metrics(metrics)
+        {
+            ActorMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.actor_msgs"}});
+            DirectMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.direct_msgs"}});
+            LevelRequests_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.level_requests"}});
+            IgnoredMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.ignored_msgs"}});
+            DroppedMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.dropped_msgs"}});
+
+            AlertMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.alert_msgs"}});
+            EmergMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.emerg_msgs"}});
+        }
+
+        ~TLoggerMetrics() = default;
+
+        void IncActorMsgs() override {
+            ActorMsgs_->Inc();
+        }
+        void IncDirectMsgs() override {
+            DirectMsgs_->Inc();
+        }
+        void IncLevelRequests() override {
+            LevelRequests_->Inc();
+        }
+        void IncIgnoredMsgs() override {
+            IgnoredMsgs_->Inc();
+        }
+        void IncAlertMsgs() override {
+            AlertMsgs_->Inc();
+        }
+        void IncEmergMsgs() override {
+            EmergMsgs_->Inc();
+        }
+        void IncDroppedMsgs() override {
+            DroppedMsgs_->Inc();
+        };
+
+        void GetOutputHtml(IOutputStream& str) override {
+            HTML(str) {
+                DIV_CLASS("row") {
+                    DIV_CLASS("col-md-12") {
+                        H4() {
+                            str << "Metrics" << Endl;
+                        }
+                        // TODO: Now, TMetricRegistry does not have the GetOutputHtml function
+                    }
+                }
+            }
+        }
+
+    private:
+        NMonitoring::TRate* ActorMsgs_;
+        NMonitoring::TRate* DirectMsgs_;
+        NMonitoring::TRate* LevelRequests_;
+        NMonitoring::TRate* IgnoredMsgs_;
+        NMonitoring::TRate* AlertMsgs_;
+        NMonitoring::TRate* EmergMsgs_;
+        // Dropped while the logger backend was unavailable
+        NMonitoring::TRate* DroppedMsgs_;
+
+        std::shared_ptr<NMonitoring::TMetricRegistry> Metrics;
+    };
+
+    TAtomic TLoggerActor::IsOverflow = 0;
+
+    TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+                               TAutoPtr<TLogBackend> logBackend,
+                               TIntrusivePtr<NMonitoring::TDynamicCounters> counters)
+        : TActor(&TLoggerActor::StateFunc)
+        , Settings(settings)
+        , LogBackend(logBackend.Release())
+        , Metrics(std::make_unique<TLoggerCounters>(counters))
+    {
+    }
+
+    TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+                               std::shared_ptr<TLogBackend> logBackend,
+                               TIntrusivePtr<NMonitoring::TDynamicCounters> counters)
+        : TActor(&TLoggerActor::StateFunc)
+        , Settings(settings)
+        , LogBackend(logBackend)
+        , Metrics(std::make_unique<TLoggerCounters>(counters))
+    {
+    }
+
+    TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+                               TAutoPtr<TLogBackend> logBackend,
+                               std::shared_ptr<NMonitoring::TMetricRegistry> metrics)
+        : TActor(&TLoggerActor::StateFunc)
+        , Settings(settings)
+        , LogBackend(logBackend.Release())
+        , Metrics(std::make_unique<TLoggerMetrics>(metrics))
+    {
+    }
+
+    TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+                               std::shared_ptr<TLogBackend> logBackend,
+                               std::shared_ptr<NMonitoring::TMetricRegistry> metrics)
+        : TActor(&TLoggerActor::StateFunc)
+        , Settings(settings)
+        , LogBackend(logBackend)
+        , Metrics(std::make_unique<TLoggerMetrics>(metrics))
+    {
+    }
+
+    TLoggerActor::~TLoggerActor() {
+    }
+
+    void TLoggerActor::Log(TInstant time, NLog::EPriority priority, NLog::EComponent component, const char* c, ...) {
+        Metrics->IncDirectMsgs();
+        if (Settings && Settings->Satisfies(priority, component, 0ull)) {
+            va_list params;
+            va_start(params, c);
+            TString formatted;
+            vsprintf(formatted, c, params);
+
+            auto ok = OutputRecord(time, NLog::EPrio(priority), component, formatted);
+            Y_UNUSED(ok);
+            va_end(params);
+        }
+    }
+
+    void TLoggerActor::Throttle(const NLog::TSettings& settings) {
+        if (AtomicGet(IsOverflow))
+            Sleep(settings.ThrottleDelay);
+    }
+
+    void TLoggerActor::LogIgnoredCount(TInstant now) {
+        TString message = Sprintf("Ignored IgnoredCount# %" PRIu64 " log records due to logger overflow!", IgnoredCount);
+        if (!OutputRecord(now, NActors::NLog::EPrio::Error, Settings->LoggerComponent, message)) {
+            BecomeDefunct();
+        }
+    }
+
+    void TLoggerActor::HandleIgnoredEvent(TLogIgnored::TPtr& ev, const NActors::TActorContext& ctx) {
+        Y_UNUSED(ev);
+        LogIgnoredCount(ctx.Now());
+        IgnoredCount = 0;
+        PassedCount = 0;
+    }
+
+    void TLoggerActor::HandleIgnoredEventDrop() {
+        // logger backend is unavailable, just ignore
+    }
+
+    void TLoggerActor::WriteMessageStat(const NLog::TEvLog& ev) {
+        Metrics->IncActorMsgs();
+
+        const auto prio = ev.Level.ToPrio();
+
+        switch (prio) {
+            case ::NActors::NLog::EPrio::Alert:
+                Metrics->IncAlertMsgs();
+                break;
+            case ::NActors::NLog::EPrio::Emerg:
+                Metrics->IncEmergMsgs();
+                break;
+            default:
+                break;
+        }
+
+    }
+
+    void TLoggerActor::HandleLogEvent(NLog::TEvLog::TPtr& ev, const NActors::TActorContext& ctx) {
+        i64 delayMillisec = (ctx.Now() - ev->Get()->Stamp).MilliSeconds();
+        WriteMessageStat(*ev->Get());
+        if (Settings->AllowDrop) {
+            // Disable throttling if it was enabled previously
+            if (AtomicGet(IsOverflow))
+                AtomicSet(IsOverflow, 0);
+
+            // Check if some records have to be dropped
+            if ((PassedCount > 10 && delayMillisec > (i64)Settings->TimeThresholdMs) || IgnoredCount > 0) {
+                Metrics->IncIgnoredMsgs();
+                if (IgnoredCount == 0) {
+                    ctx.Send(ctx.SelfID, new TLogIgnored());
+                }
+                ++IgnoredCount;
+                PassedCount = 0;
+                return;
+            }
+            PassedCount++;
+        } else {
+            // Enable of disable throttling depending on the load
+            if (delayMillisec > (i64)Settings->TimeThresholdMs && !AtomicGet(IsOverflow))
+                AtomicSet(IsOverflow, 1);
+            else if (delayMillisec <= (i64)Settings->TimeThresholdMs && AtomicGet(IsOverflow))
+                AtomicSet(IsOverflow, 0);
+        }
+
+        const auto prio = ev->Get()->Level.ToPrio();
+        if (!OutputRecord(ev->Get()->Stamp, prio, ev->Get()->Component, ev->Get()->Line)) {
+            BecomeDefunct();
+        }
+    }
+
+    void TLoggerActor::BecomeDefunct() {
+        Become(&TThis::StateDefunct);
+        Schedule(WakeupInterval, new TEvents::TEvWakeup);
+    }
+
+    void TLoggerActor::HandleLogComponentLevelRequest(TLogComponentLevelRequest::TPtr& ev, const NActors::TActorContext& ctx) {
+        Metrics->IncLevelRequests();
+        TString explanation;
+        int code = Settings->SetLevel(ev->Get()->Priority, ev->Get()->Component, explanation);
+        ctx.Send(ev->Sender, new TLogComponentLevelResponse(code, explanation));
+    }
+
+    void TLoggerActor::RenderComponentPriorities(IOutputStream& str) {
+        using namespace NLog;
+        HTML(str) {
+            H4() {
+                str << "Priority Settings for the Components";
+            }
+            TABLE_SORTABLE_CLASS("table") {
+                TABLEHEAD() {
+                    TABLER() {
+                        TABLEH() {
+                            str << "Component";
+                        }
+                        TABLEH() {
+                            str << "Level";
+                        }
+                        TABLEH() {
+                            str << "Sampling Level";
+                        }
+                        TABLEH() {
+                            str << "Sampling Rate";
+                        }
+                    }
+                }
+                TABLEBODY() {
+                    for (EComponent i = Settings->MinVal; i < Settings->MaxVal; i++) {
+                        auto name = Settings->ComponentName(i);
+                        if (!*name)
+                            continue;
+                        NLog::TComponentSettings componentSettings = Settings->GetComponentSettings(i);
+
+                        TABLER() {
+                            TABLED() {
+                                str << "<a href='logger?c=" << i << "'>" << name << "</a>";
+                            }
+                            TABLED() {
+                                str << PriorityToString(EPrio(componentSettings.Raw.X.Level));
+                            }
+                            TABLED() {
+                                str << PriorityToString(EPrio(componentSettings.Raw.X.SamplingLevel));
+                            }
+                            TABLED() {
+                                str << componentSettings.Raw.X.SamplingRate;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /*
+     * Logger INFO:
+     * 1. Current priority settings from components
+     * 2. Number of log messages (via actors events, directly)
+     * 3. Number of messages per components, per priority
+     * 4. Log level changes (last N changes)
+     */
+    void TLoggerActor::HandleMonInfo(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) {
+        const auto& params = ev->Get()->Request.GetParams();
+        NLog::EComponent component = NLog::InvalidComponent;
+        NLog::EPriority priority = NLog::PRI_DEBUG;
+        NLog::EPriority samplingPriority = NLog::PRI_DEBUG;
+        ui32 samplingRate = 0;
+        bool hasComponent = false;
+        bool hasPriority = false;
+        bool hasSamplingPriority = false;
+        bool hasSamplingRate = false;
+        bool hasAllowDrop = false;
+        int allowDrop = 0;
+        if (params.Has("c")) {
+            if (TryFromString(params.Get("c"), component) && (component == NLog::InvalidComponent || Settings->IsValidComponent(component))) {
+                hasComponent = true;
+                if (params.Has("p")) {
+                    int rawPriority;
+                    if (TryFromString(params.Get("p"), rawPriority) && NLog::TSettings::IsValidPriority((NLog::EPriority)rawPriority)) {
+                        priority = (NLog::EPriority)rawPriority;
+                        hasPriority = true;
+                    }
+                }
+                if (params.Has("sp")) {
+                    int rawPriority;
+                    if (TryFromString(params.Get("sp"), rawPriority) && NLog::TSettings::IsValidPriority((NLog::EPriority)rawPriority)) {
+                        samplingPriority = (NLog::EPriority)rawPriority;
+                        hasSamplingPriority = true;
+                    }
+                }
+                if (params.Has("sr")) {
+                    if (TryFromString(params.Get("sr"), samplingRate)) {
+                        hasSamplingRate = true;
+                    }
+                }
+            }
+        }
+        if (params.Has("allowdrop")) {
+            if (TryFromString(params.Get("allowdrop"), allowDrop)) {
+                hasAllowDrop = true;
+            }
+        }
+
+        TStringStream str;
+        if (hasComponent && !hasPriority && !hasSamplingPriority && !hasSamplingRate) {
+            NLog::TComponentSettings componentSettings = Settings->GetComponentSettings(component);
+            ui32 samplingRate = componentSettings.Raw.X.SamplingRate;
+            HTML(str) {
+                DIV_CLASS("row") {
+                    DIV_CLASS("col-md-12") {
+                        H4() {
+                            str << "Current log settings for " << Settings->ComponentName(component) << Endl;
+                        }
+                        UL() {
+                            LI() {
+                                str << "Priority: "
+                                    << NLog::PriorityToString(NLog::EPrio(componentSettings.Raw.X.Level));
+                            }
+                            LI() {
+                                str << "Sampling priority: "
+                                    << NLog::PriorityToString(NLog::EPrio(componentSettings.Raw.X.SamplingLevel));
+                            }
+                            LI() {
+                                str << "Sampling rate: "
+                                    << samplingRate;
+                            }
+                        }
+                    }
+                }
+
+                DIV_CLASS("row") {
+                    DIV_CLASS("col-md-12") {
+                        H4() {
+                            str << "Change priority" << Endl;
+                        }
+                        UL() {
+                            for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) {
+                                LI() {
+                                    str << "<a href='logger?c=" << component << "&p=" << p << "'>"
+                                        << NLog::PriorityToString(NLog::EPrio(p)) << "</a>";
+                                }
+                            }
+                        }
+                        H4() {
+                            str << "Change sampling priority" << Endl;
+                        }
+                        UL() {
+                            for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) {
+                                LI() {
+                                    str << "<a href='logger?c=" << component << "&sp=" << p << "'>"
+                                        << NLog::PriorityToString(NLog::EPrio(p)) << "</a>";
+                                }
+                            }
+                        }
+                        H4() {
+                            str << "Change sampling rate" << Endl;
+                        }
+                        str << "<form method=\"GET\">" << Endl;
+                        str << "Rate: <input type=\"number\" name=\"sr\" value=\"" << samplingRate << "\"/>" << Endl;
+                        str << "<input type=\"hidden\" name=\"c\" value=\"" << component << "\">" << Endl;
+                        str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"Change\"/>" << Endl;
+                        str << "</form>" << Endl;
+                        H4() {
+                            str << "<a href='logger'>Cancel</a>" << Endl;
+                        }
+                    }
+                }
+            }
+
+        } else {
+            TString explanation;
+            if (hasComponent && hasPriority) {
+                Settings->SetLevel(priority, component, explanation);
+            }
+            if (hasComponent && hasSamplingPriority) {
+                Settings->SetSamplingLevel(samplingPriority, component, explanation);
+            }
+            if (hasComponent && hasSamplingRate) {
+                Settings->SetSamplingRate(samplingRate, component, explanation);
+            }
+            if (hasAllowDrop) {
+                Settings->SetAllowDrop(allowDrop);
+            }
+
+            HTML(str) {
+                if (!explanation.empty()) {
+                    DIV_CLASS("row") {
+                        DIV_CLASS("col-md-12 alert alert-info") {
+                            str << explanation;
+                        }
+                    }
+                }
+
+                DIV_CLASS("row") {
+                    DIV_CLASS("col-md-6") {
+                        RenderComponentPriorities(str);
+                    }
+                    DIV_CLASS("col-md-6") {
+                        H4() {
+                            str << "Change priority for all components";
+                        }
+                        TABLE_CLASS("table table-condensed") {
+                            TABLEHEAD() {
+                                TABLER() {
+                                    TABLEH() {
+                                        str << "Priority";
+                                    }
+                                }
+                            }
+                            TABLEBODY() {
+                                for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) {
+                                    TABLER() {
+                                        TABLED() {
+                                            str << "<a href = 'logger?c=-1&p=" << p << "'>"
+                                                << NLog::PriorityToString(NLog::EPrio(p)) << "</a>";
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        H4() {
+                            str << "Change sampling priority for all components";
+                        }
+                        TABLE_CLASS("table table-condensed") {
+                            TABLEHEAD() {
+                                TABLER() {
+                                    TABLEH() {
+                                        str << "Priority";
+                                    }
+                                }
+                            }
+                            TABLEBODY() {
+                                for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) {
+                                    TABLER() {
+                                        TABLED() {
+                                            str << "<a href = 'logger?c=-1&sp=" << p << "'>"
+                                                << NLog::PriorityToString(NLog::EPrio(p)) << "</a>";
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        H4() {
+                            str << "Change sampling rate for all components";
+                        }
+                        str << "<form method=\"GET\">" << Endl;
+                        str << "Rate: <input type=\"number\" name=\"sr\" value=\"0\"/>" << Endl;
+                        str << "<input type=\"hidden\" name=\"c\" value=\"-1\">" << Endl;
+                        str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"Change\"/>" << Endl;
+                        str << "</form>" << Endl;
+                        H4() {
+                            str << "Drop log entries in case of overflow: "
+                                << (Settings->AllowDrop ? "Enabled" : "Disabled");
+                        }
+                        str << "<form method=\"GET\">" << Endl;
+                        str << "<input type=\"hidden\" name=\"allowdrop\" value=\"" << (Settings->AllowDrop ? "0" : "1") << "\"/>" << Endl;
+                        str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"" << (Settings->AllowDrop ? "Disable" : "Enable") << "\"/>" << Endl;
+                        str << "</form>" << Endl;
+                    }
+                }
+                Metrics->GetOutputHtml(str);
+            }
+        }
+
+        ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str()));
+    }
+
+    constexpr size_t TimeBufSize = 512;
+
+    bool TLoggerActor::OutputRecord(TInstant time, NLog::EPrio priority, NLog::EComponent component,
+                                    const TString& formatted) noexcept try {
+        const auto logPrio = ::ELogPriority(ui16(priority));
+
+        char buf[TimeBufSize];
+        switch (Settings->Format) {
+            case NActors::NLog::TSettings::PLAIN_FULL_FORMAT: {
+                TStringBuilder logRecord;
+                if (Settings->UseLocalTimestamps) {
+                    logRecord << FormatLocalTimestamp(time, buf);
+                } else {
+                    logRecord << time;
+                }
+                logRecord
+                    << Settings->MessagePrefix
+                    << " :" << Settings->ComponentName(component)
+                    << " "  << PriorityToString(priority)
+                    << ": " << formatted;
+                LogBackend->WriteData(
+                    TLogRecord(logPrio, logRecord.data(), logRecord.size()));
+            } break;
+
+            case NActors::NLog::TSettings::PLAIN_SHORT_FORMAT: {
+                TStringBuilder logRecord;
+                logRecord
+                    << Settings->ComponentName(component)
+                    << ": " << formatted;
+                LogBackend->WriteData(
+                    TLogRecord(logPrio, logRecord.data(), logRecord.size()));
+            } break;
+
+            case NActors::NLog::TSettings::JSON_FORMAT: {
+                NJsonWriter::TBuf json;
+                json.BeginObject()
+                    .WriteKey("@timestamp")
+                    .WriteString(Settings->UseLocalTimestamps ? FormatLocalTimestamp(time, buf) : time.ToString().data())
+                    .WriteKey("microseconds")
+                    .WriteULongLong(time.MicroSeconds())
+                    .WriteKey("host")
+                    .WriteString(Settings->ShortHostName)
+                    .WriteKey("cluster")
+                    .WriteString(Settings->ClusterName)
+                    .WriteKey("priority")
+                    .WriteString(PriorityToString(priority))
+                    .WriteKey("npriority")
+                    .WriteInt((int)priority)
+                    .WriteKey("component")
+                    .WriteString(Settings->ComponentName(component))
+                    .WriteKey("tag")
+                    .WriteString("KIKIMR")
+                    .WriteKey("revision")
+                    .WriteInt(GetProgramSvnRevision())
+                    .WriteKey("message")
+                    .WriteString(formatted)
+                    .EndObject();
+                auto logRecord = json.Str();
+                LogBackend->WriteData(
+                    TLogRecord(logPrio, logRecord.data(), logRecord.size()));
+            } break;
+        }
+
+        return true;
+    } catch (...) {
+        return false;
+    }
+
+    void TLoggerActor::HandleLogEventDrop(const NLog::TEvLog::TPtr& ev) {
+        WriteMessageStat(*ev->Get());
+        Metrics->IncDroppedMsgs();
+    }
+
+    void TLoggerActor::HandleWakeup() {
+        Become(&TThis::StateFunc);
+    }
+
+    const char* TLoggerActor::FormatLocalTimestamp(TInstant time, char* buf) {
+        struct tm localTime;
+        time.LocalTime(&localTime);
+        int r = strftime(buf, TimeBufSize, "%Y-%m-%d-%H-%M-%S", &localTime);
+        Y_VERIFY(r != 0);
+        return buf;
+    }
+
+    TAutoPtr<TLogBackend> CreateSysLogBackend(const TString& ident,
+                                              bool logPError, bool logCons) {
+        int flags = 0;
+        if (logPError)
+            flags |= TSysLogBackend::LogPerror;
+        if (logCons)
+            flags |= TSysLogBackend::LogCons;
+
+        return new TSysLogBackend(ident.data(), TSysLogBackend::TSYSLOG_LOCAL1, flags);
+    }
+
+    class TStderrBackend: public TLogBackend {
+    public:
+        TStderrBackend() {
+        }
+        void WriteData(const TLogRecord& rec) override {
+#ifdef _MSC_VER
+            if (IsDebuggerPresent()) {
+                TString x;
+                x.reserve(rec.Len + 2);
+                x.append(rec.Data, rec.Len);
+                x.append('\n');
+                OutputDebugString(x.c_str());
+            }
+#endif
+            bool isOk = false;
+            do {
+                try {
+                    TRecordWithNewline r(rec);
+                    Cerr.Write(r.Buf.Data(), r.Buf.Filled());
+                    isOk = true;
+                } catch (TSystemError err) {
+                    // Interrupted system call
+                    Y_UNUSED(err);
+                }
+            } while (!isOk);
+        }
+
+        void ReopenLog() override {
+        }
+
+    private:
+        const TString Indent;
+    };
+
+    class TLineFileLogBackend: public TFileLogBackend {
+    public:
+        TLineFileLogBackend(const TString& path)
+            : TFileLogBackend(path)
+        {
+        }
+
+        // Append newline after every record
+        void WriteData(const TLogRecord& rec) override {
+            TFileLogBackend::WriteData(TRecordWithNewline(rec));
+        }
+    };
+
+    class TCompositeLogBackend: public TLogBackend {
+    public:
+        TCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends)
+            : UnderlyingBackends(std::move(underlyingBackends))
+        {
+        }
+
+        void WriteData(const TLogRecord& rec) override {
+            for (auto& b: UnderlyingBackends) {
+                b->WriteData(rec);
+            }
+        }
+
+        void ReopenLog() override {
+        }
+
+    private:
+        TVector<TAutoPtr<TLogBackend>> UnderlyingBackends;
+    };
+
+    TAutoPtr<TLogBackend> CreateStderrBackend() {
+        return new TStderrBackend();
+    }
+
+    TAutoPtr<TLogBackend> CreateFileBackend(const TString& fileName) {
+        return new TLineFileLogBackend(fileName);
+    }
+
+    TAutoPtr<TLogBackend> CreateNullBackend() {
+        return new TNullLogBackend();
+    }
+
+    TAutoPtr<TLogBackend> CreateCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends) {
+        return new TCompositeLogBackend(std::move(underlyingBackends));
+    }
+}
diff --git a/library/cpp/actors/core/log.h b/library/cpp/actors/core/log.h
new file mode 100644
index 0000000000..c11a7cf3c1
--- /dev/null
+++ b/library/cpp/actors/core/log.h
@@ -0,0 +1,369 @@
+#pragma once
+
+#include "defs.h"
+
+#include "log_iface.h"
+#include "log_settings.h"
+#include "actorsystem.h"
+#include "events.h"
+#include "event_local.h"
+#include "hfunc.h"
+#include "mon.h"
+
+#include <util/generic/vector.h>
+#include <util/string/printf.h>
+#include <util/string/builder.h>
+#include <library/cpp/logger/all.h>
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/monlib/metrics/metric_registry.h>
+#include <library/cpp/json/writer/json.h>
+#include <library/cpp/svnversion/svnversion.h>
+
+#include <library/cpp/actors/memory_log/memlog.h>
+
+// TODO: limit number of messages per second
+// TODO: make TLogComponentLevelRequest/Response network messages
+
+#define IS_LOG_PRIORITY_ENABLED(actorCtxOrSystem, priority, component)                           \
+    (static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings()) &&            \
+     static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings())->Satisfies(   \
+             static_cast<::NActors::NLog::EPriority>(priority),                                  \
+             static_cast<::NActors::NLog::EComponent>(component),                                \
+             0ull)                                                                               \
+    )
+
+#define LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, ...)                                               \
+    do {                                                                                                                       \
+        ::NActors::NLog::TSettings* mSettings = static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings()); \
+        ::NActors::NLog::EPriority mPriority = static_cast<::NActors::NLog::EPriority>(priority);                              \
+        ::NActors::NLog::EComponent mComponent = static_cast<::NActors::NLog::EComponent>(component);                          \
+        if (mSettings && mSettings->Satisfies(mPriority, mComponent, sampleBy)) {                                              \
+            ::NActors::MemLogAdapter(                                                                                          \
+                actorCtxOrSystem, priority, component, __VA_ARGS__);                                                           \
+        }                                                                                                                      \
+    } while (0) /**/
+
+#define LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, stream)  \
+    LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, "%s", [&]() { \
+        TStringBuilder logStringBuilder;                                               \
+        logStringBuilder << stream;                                                    \
+        return static_cast<TString>(logStringBuilder);                                 \
+    }().data())
+
+#define LOG_LOG(actorCtxOrSystem, priority, component, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, 0ull, __VA_ARGS__)
+#define LOG_LOG_S(actorCtxOrSystem, priority, component, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, priority, component, 0ull, stream)
+
+// use these macros for logging via actor system or actor context
+#define LOG_EMERG(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, __VA_ARGS__)
+#define LOG_ALERT(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, __VA_ARGS__)
+#define LOG_CRIT(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, __VA_ARGS__)
+#define LOG_ERROR(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, __VA_ARGS__)
+#define LOG_WARN(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, __VA_ARGS__)
+#define LOG_NOTICE(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, __VA_ARGS__)
+#define LOG_INFO(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, __VA_ARGS__)
+#define LOG_DEBUG(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, __VA_ARGS__)
+#define LOG_TRACE(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, __VA_ARGS__)
+
+#define LOG_EMERG_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, stream)
+#define LOG_ALERT_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, stream)
+#define LOG_CRIT_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, stream)
+#define LOG_ERROR_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, stream)
+#define LOG_WARN_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, stream)
+#define LOG_NOTICE_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, stream)
+#define LOG_INFO_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, stream)
+#define LOG_DEBUG_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, stream)
+#define LOG_TRACE_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, stream)
+
+#define LOG_EMERG_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, sampleBy, __VA_ARGS__)
+#define LOG_ALERT_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, sampleBy, __VA_ARGS__)
+#define LOG_CRIT_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, sampleBy, __VA_ARGS__)
+#define LOG_ERROR_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, sampleBy, __VA_ARGS__)
+#define LOG_WARN_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, sampleBy, __VA_ARGS__)
+#define LOG_NOTICE_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, sampleBy, __VA_ARGS__)
+#define LOG_INFO_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, sampleBy, __VA_ARGS__)
+#define LOG_DEBUG_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, sampleBy, __VA_ARGS__)
+#define LOG_TRACE_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, sampleBy, __VA_ARGS__)
+
+#define LOG_EMERG_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, sampleBy, stream)
+#define LOG_ALERT_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, sampleBy, stream)
+#define LOG_CRIT_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, sampleBy, stream)
+#define LOG_ERROR_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, sampleBy, stream)
+#define LOG_WARN_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, sampleBy, stream)
+#define LOG_NOTICE_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, sampleBy, stream)
+#define LOG_INFO_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, sampleBy, stream)
+#define LOG_DEBUG_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, sampleBy, stream)
+#define LOG_TRACE_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, sampleBy, stream)
+
+// Log Throttling
+#define LOG_LOG_THROTTLE(throttler, actorCtxOrSystem, priority, component, ...) \
+    do {                                                                        \
+        if ((throttler).Kick()) {                                               \
+            LOG_LOG(actorCtxOrSystem, priority, component, __VA_ARGS__);        \
+        }                                                                       \
+    } while (0) /**/
+
+#define TRACE_EVENT(component)                                                                                                         \
+    const auto& currentTracer = component;                                                                                             \
+    if (ev->HasEvent()) {                                                                                                              \
+        LOG_TRACE(*TlsActivationContext, currentTracer, "%s, received event# %" PRIu32 ", Sender %s, Recipient %s: %s",                                  \
+                  __FUNCTION__, ev->Type, ev->Sender.ToString().data(), SelfId().ToString().data(), ev->GetBase()->ToString().substr(0, 1000).data()); \
+    } else {                                                                                                                           \
+        LOG_TRACE(*TlsActivationContext, currentTracer, "%s, received event# %" PRIu32 ", Sender %s, Recipient %s",                                      \
+                  __FUNCTION__, ev->Type, ev->Sender.ToString().data(), ev->Recipient.ToString().data());                                          \
+    }
+#define TRACE_EVENT_TYPE(eventType) LOG_TRACE(*TlsActivationContext, currentTracer, "%s, processing event %s", __FUNCTION__, eventType)
+
+class TLog;
+class TLogBackend;
+
+namespace NActors {
+    class TLoggerActor;
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // SET LOG LEVEL FOR A COMPONENT
+    ////////////////////////////////////////////////////////////////////////////////
+    class TLogComponentLevelRequest: public TEventLocal<TLogComponentLevelRequest, int(NLog::EEv::LevelReq)> {
+    public:
+        // set given priority for the component
+        TLogComponentLevelRequest(NLog::EPriority priority, NLog::EComponent component)
+            : Priority(priority)
+            , Component(component)
+        {
+        }
+
+        // set given priority for all components
+        TLogComponentLevelRequest(NLog::EPriority priority)
+            : Priority(priority)
+            , Component(NLog::InvalidComponent)
+        {
+        }
+
+    protected:
+        NLog::EPriority Priority;
+        NLog::EComponent Component;
+
+        friend class TLoggerActor;
+    };
+
+    class TLogComponentLevelResponse: public TEventLocal<TLogComponentLevelResponse, int(NLog::EEv::LevelResp)> {
+    public:
+        TLogComponentLevelResponse(int code, const TString& explanation)
+            : Code(code)
+            , Explanation(explanation)
+        {
+        }
+
+        int GetCode() const {
+            return Code;
+        }
+
+        const TString& GetExplanation() const {
+            return Explanation;
+        }
+
+    protected:
+        int Code;
+        TString Explanation;
+    };
+
+    class TLogIgnored: public TEventLocal<TLogIgnored, int(NLog::EEv::Ignored)> {
+    public:
+        TLogIgnored() {
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // LOGGER ACTOR
+    ////////////////////////////////////////////////////////////////////////////////
+    class ILoggerMetrics {
+    public:
+        virtual ~ILoggerMetrics() = default;
+
+        virtual void IncActorMsgs() = 0;
+        virtual void IncDirectMsgs() = 0;
+        virtual void IncLevelRequests() = 0;
+        virtual void IncIgnoredMsgs() = 0;
+        virtual void IncAlertMsgs() = 0;
+        virtual void IncEmergMsgs() = 0;
+        virtual void IncDroppedMsgs() = 0;
+
+        virtual void GetOutputHtml(IOutputStream&) = 0;
+    };
+
+    class TLoggerActor: public TActor<TLoggerActor> {
+    public:
+        static constexpr IActor::EActivityType ActorActivityType() {
+            return IActor::LOG_ACTOR;
+        }
+
+        TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+                     TAutoPtr<TLogBackend> logBackend,
+                     TIntrusivePtr<NMonitoring::TDynamicCounters> counters);
+        TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+                     std::shared_ptr<TLogBackend> logBackend,
+                     TIntrusivePtr<NMonitoring::TDynamicCounters> counters);
+        TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+                     TAutoPtr<TLogBackend> logBackend,
+                     std::shared_ptr<NMonitoring::TMetricRegistry> metrics);
+        TLoggerActor(TIntrusivePtr<NLog::TSettings> settings,
+                     std::shared_ptr<TLogBackend> logBackend,
+                     std::shared_ptr<NMonitoring::TMetricRegistry> metrics);
+        ~TLoggerActor();
+
+        void StateFunc(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) {
+            switch (ev->GetTypeRewrite()) {
+                HFunc(TLogIgnored, HandleIgnoredEvent);
+                HFunc(NLog::TEvLog, HandleLogEvent);
+                HFunc(TLogComponentLevelRequest, HandleLogComponentLevelRequest);
+                HFunc(NMon::TEvHttpInfo, HandleMonInfo);
+            }
+        }
+
+        STFUNC(StateDefunct) {
+            switch (ev->GetTypeRewrite()) {
+                cFunc(TLogIgnored::EventType, HandleIgnoredEventDrop);
+                hFunc(NLog::TEvLog, HandleLogEventDrop);
+                HFunc(TLogComponentLevelRequest, HandleLogComponentLevelRequest);
+                HFunc(NMon::TEvHttpInfo, HandleMonInfo);
+                cFunc(TEvents::TEvWakeup::EventType, HandleWakeup);
+            }
+        }
+
+        // Directly call logger instead of sending a message
+        void Log(TInstant time, NLog::EPriority priority, NLog::EComponent component, const char* c, ...);
+
+        static void Throttle(const NLog::TSettings& settings);
+
+    private:
+        TIntrusivePtr<NLog::TSettings> Settings;
+        std::shared_ptr<TLogBackend> LogBackend;
+        ui64 IgnoredCount = 0;
+        ui64 PassedCount = 0;
+        static TAtomic IsOverflow;
+        TDuration WakeupInterval{TDuration::Seconds(5)};
+        std::unique_ptr<ILoggerMetrics> Metrics;
+
+        void BecomeDefunct();
+        void HandleIgnoredEvent(TLogIgnored::TPtr& ev, const NActors::TActorContext& ctx);
+        void HandleIgnoredEventDrop();
+        void HandleLogEvent(NLog::TEvLog::TPtr& ev, const TActorContext& ctx);
+        void HandleLogEventDrop(const NLog::TEvLog::TPtr& ev);
+        void HandleLogComponentLevelRequest(TLogComponentLevelRequest::TPtr& ev, const TActorContext& ctx);
+        void HandleMonInfo(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx);
+        void HandleWakeup();
+        [[nodiscard]] bool OutputRecord(TInstant time, NLog::EPrio priority, NLog::EComponent component, const TString& formatted) noexcept;
+        void RenderComponentPriorities(IOutputStream& str);
+        void LogIgnoredCount(TInstant now);
+        void WriteMessageStat(const NLog::TEvLog& ev);
+        static const char* FormatLocalTimestamp(TInstant time, char* buf);
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // LOG THROTTLING
+    // TTrivialLogThrottler -- log a message every 'period' duration
+    // Use case:
+    //  TTrivialLogThrottler throttler(TDuration::Minutes(1));
+    //  ....
+    //  LOG_LOG_THROTTLE(throttler, ctx, NActors::NLog::PRI_ERROR, SOME, "Error");
+    ////////////////////////////////////////////////////////////////////////////////
+    class TTrivialLogThrottler {
+    public:
+        TTrivialLogThrottler(TDuration period)
+            : Period(period)
+        {
+        }
+
+        // return value:
+        // true -- write to log
+        // false -- don't write to log, throttle
+        bool Kick() {
+            auto now = TInstant::Now();
+            if (now >= (LastWrite + Period)) {
+                LastWrite = now;
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+    private:
+        TInstant LastWrite;
+        TDuration Period;
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // SYSLOG BACKEND
+    ////////////////////////////////////////////////////////////////////////////////
+    TAutoPtr<TLogBackend> CreateSysLogBackend(const TString& ident,
+                                              bool logPError, bool logCons);
+    TAutoPtr<TLogBackend> CreateStderrBackend();
+    TAutoPtr<TLogBackend> CreateFileBackend(const TString& fileName);
+    TAutoPtr<TLogBackend> CreateNullBackend();
+    TAutoPtr<TLogBackend> CreateCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends);
+
+    /////////////////////////////////////////////////////////////////////
+    //  Logging adaptors for memory log and logging into filesystem
+    /////////////////////////////////////////////////////////////////////
+
+    namespace NDetail {
+        inline void Y_PRINTF_FORMAT(2, 3) PrintfV(TString& dst, const char* format, ...) {
+            va_list params;
+            va_start(params, format);
+            vsprintf(dst, format, params);
+            va_end(params);
+        }
+
+        inline void PrintfV(TString& dst, const char* format, va_list params) {
+            vsprintf(dst, format, params);
+        }
+    } // namespace NDetail
+
+    template <typename TCtx>
+    inline void DeliverLogMessage(TCtx& ctx, NLog::EPriority mPriority, NLog::EComponent mComponent, TString &&str)
+    {
+        const NLog::TSettings *mSettings = ctx.LoggerSettings();
+        TLoggerActor::Throttle(*mSettings);
+        ctx.Send(new IEventHandle(mSettings->LoggerActorId, TActorId(), new NLog::TEvLog(mPriority, mComponent, std::move(str))));
+    }
+
+    template <typename TCtx, typename... TArgs>
+    inline void MemLogAdapter(
+        TCtx& actorCtxOrSystem,
+        NLog::EPriority mPriority,
+        NLog::EComponent mComponent,
+        const char* format, TArgs&&... params) {
+        TString Formatted;
+
+
+        if constexpr (sizeof... (params) > 0) {
+            NDetail::PrintfV(Formatted, format, std::forward<TArgs>(params)...);
+        } else {
+            NDetail::PrintfV(Formatted, "%s", format);
+        }
+
+        MemLogWrite(Formatted.data(), Formatted.size(), true);
+        DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(Formatted));
+    }
+
+    template <typename TCtx>
+    Y_WRAPPER inline void MemLogAdapter(
+        TCtx& actorCtxOrSystem,
+        NLog::EPriority mPriority,
+        NLog::EComponent mComponent,
+        const TString& str) {
+
+        MemLogWrite(str.data(), str.size(), true);
+        DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, TString(str));
+    }
+
+    template <typename TCtx>
+    Y_WRAPPER inline void MemLogAdapter(
+        TCtx& actorCtxOrSystem,
+        NLog::EPriority mPriority,
+        NLog::EComponent mComponent,
+        TString&& str) {
+
+        MemLogWrite(str.data(), str.size(), true);
+        DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(str));
+    }
+}
diff --git a/library/cpp/actors/core/log_iface.h b/library/cpp/actors/core/log_iface.h
new file mode 100644
index 0000000000..b331db9ca8
--- /dev/null
+++ b/library/cpp/actors/core/log_iface.h
@@ -0,0 +1,109 @@
+#pragma once
+
+#include "events.h"
+#include "event_local.h"
+
+namespace NActors {
+    namespace NLog {
+        using EComponent = int;
+
+        enum EPriority : ui16 { // migrate it to EPrio whenever possible
+            PRI_EMERG,
+            PRI_ALERT,
+            PRI_CRIT,
+            PRI_ERROR,
+            PRI_WARN,
+            PRI_NOTICE,
+            PRI_INFO,
+            PRI_DEBUG,
+            PRI_TRACE
+        };
+
+        enum class EPrio : ui16 {
+            Emerg = 0,
+            Alert = 1,
+            Crit = 2,
+            Error = 3,
+            Warn = 4,
+            Notice = 5,
+            Info = 6,
+            Debug = 7,
+            Trace = 8,
+        };
+
+        struct TLevel {
+            TLevel(ui32 raw)
+                : Raw(raw)
+            {
+            }
+
+            TLevel(EPrio prio)
+                : Raw((ui16(prio) + 1) << 8)
+            {
+            }
+
+            EPrio ToPrio() const noexcept {
+                const auto major = Raw >> 8;
+
+                return major > 0 ? EPrio(major - 1) : EPrio::Emerg;
+            }
+
+            bool IsUrgentAbortion() const noexcept {
+                return (Raw >> 8) == 0;
+            }
+
+            /* Generalized monotonic level value composed with major and minor
+            levels. Minor is used for verbosity within major, basic EPrio
+            mapped to (EPrio + 1, 0) and Major = 0 is reserved as special
+            space with meaning like EPrio::Emerg but with extened actions.
+            Thus logger should map Major = 0 to EPrio::Emerg if it have no
+            idea how to handle special emergency actions.
+         */
+
+            ui32 Raw = 0; // ((ui16(EPrio) + 1) << 8) | ui8(minor)
+        };
+
+        enum class EEv {
+            Log = EventSpaceBegin(TEvents::ES_LOGGER),
+            LevelReq,
+            LevelResp,
+            Ignored,
+            End
+        };
+
+        static_assert(int(EEv::End) < EventSpaceEnd(TEvents::ES_LOGGER), "");
+
+        class TEvLog: public TEventLocal<TEvLog, int(EEv::Log)> {
+        public:
+            TEvLog(TInstant stamp, TLevel level, EComponent comp, const TString &line)
+                : Stamp(stamp)
+                , Level(level)
+                , Component(comp)
+                , Line(line)
+            {
+            }
+
+            TEvLog(TInstant stamp, TLevel level, EComponent comp, TString &&line)
+                : Stamp(stamp)
+                , Level(level)
+                , Component(comp)
+                , Line(std::move(line))
+            {
+            }
+
+            TEvLog(EPriority prio, EComponent comp, TString line, TInstant time = TInstant::Now())
+                : Stamp(time)
+                , Level(EPrio(prio))
+                , Component(comp)
+                , Line(std::move(line))
+            {
+            }
+
+            const TInstant Stamp = TInstant::Max();
+            const TLevel Level;
+            const EComponent Component = 0;
+            TString Line;
+        };
+
+    }
+}
diff --git a/library/cpp/actors/core/log_settings.cpp b/library/cpp/actors/core/log_settings.cpp
new file mode 100644
index 0000000000..f52f2fc5d2
--- /dev/null
+++ b/library/cpp/actors/core/log_settings.cpp
@@ -0,0 +1,230 @@
+#include "log_settings.h"
+
+#include <util/stream/str.h>
+
+namespace NActors {
+    namespace NLog {
+        TSettings::TSettings(const TActorId& loggerActorId, const EComponent loggerComponent,
+                             EComponent minVal, EComponent maxVal, EComponentToStringFunc func,
+                             EPriority defPriority, EPriority defSamplingPriority,
+                             ui32 defSamplingRate, ui64 timeThresholdMs)
+            : LoggerActorId(loggerActorId)
+            , LoggerComponent(loggerComponent)
+            , TimeThresholdMs(timeThresholdMs)
+            , AllowDrop(true)
+            , ThrottleDelay(TDuration::MilliSeconds(100))
+            , MinVal(0)
+            , MaxVal(0)
+            , Mask(0)
+            , DefPriority(defPriority)
+            , DefSamplingPriority(defSamplingPriority)
+            , DefSamplingRate(defSamplingRate)
+            , UseLocalTimestamps(false)
+            , Format(PLAIN_FULL_FORMAT)
+            , ShortHostName("")
+            , ClusterName("")
+        {
+            Append(minVal, maxVal, func);
+        }
+
+        TSettings::TSettings(const TActorId& loggerActorId, const EComponent loggerComponent,
+                             EPriority defPriority, EPriority defSamplingPriority,
+                             ui32 defSamplingRate, ui64 timeThresholdMs)
+            : LoggerActorId(loggerActorId)
+            , LoggerComponent(loggerComponent)
+            , TimeThresholdMs(timeThresholdMs)
+            , AllowDrop(true)
+            , ThrottleDelay(TDuration::MilliSeconds(100))
+            , MinVal(0)
+            , MaxVal(0)
+            , Mask(0)
+            , DefPriority(defPriority)
+            , DefSamplingPriority(defSamplingPriority)
+            , DefSamplingRate(defSamplingRate)
+            , UseLocalTimestamps(false)
+            , Format(PLAIN_FULL_FORMAT)
+            , ShortHostName("")
+            , ClusterName("")
+        {
+        }
+
+        void TSettings::Append(EComponent minVal, EComponent maxVal, EComponentToStringFunc func) {
+            Y_VERIFY(minVal >= 0, "NLog::TSettings: minVal must be non-negative");
+            Y_VERIFY(maxVal > minVal, "NLog::TSettings: maxVal must be greater than minVal");
+
+            // update bounds
+            if (!MaxVal || minVal < MinVal) {
+                MinVal = minVal;
+            }
+
+            if (!MaxVal || maxVal > MaxVal) {
+                MaxVal = maxVal;
+
+                // expand ComponentNames to the new bounds
+                auto oldMask = Mask;
+                Mask = PowerOf2Mask(MaxVal);
+
+                TArrayHolder<TAtomic> oldComponentInfo(new TAtomic[Mask + 1]);
+                ComponentInfo.Swap(oldComponentInfo);
+                int startVal = oldMask ? oldMask + 1 : 0;
+                for (int i = 0; i < startVal; i++) {
+                    AtomicSet(ComponentInfo[i], AtomicGet(oldComponentInfo[i]));
+                }
+
+                TComponentSettings defSetting(DefPriority, DefSamplingPriority, DefSamplingRate);
+                for (int i = startVal; i < Mask + 1; i++) {
+                    AtomicSet(ComponentInfo[i], defSetting.Raw.Data);
+                }
+
+                ComponentNames.resize(Mask + 1);
+            }
+
+            // assign new names but validate if newly added members were not used before
+            for (int i = minVal; i <= maxVal; i++) {
+                Y_VERIFY(!ComponentNames[i], "component name at %d already set: %s",
+                    i, ComponentNames[i].data());
+                ComponentNames[i] = func(i);
+            }
+        }
+
+        int TSettings::SetLevelImpl(
+            const TString& name, bool isSampling,
+            EPriority priority, EComponent component, TString& explanation) {
+            TString titleName(name);
+            titleName.to_title();
+
+            // check priority
+            if (!IsValidPriority(priority)) {
+                TStringStream str;
+                str << "Invalid " << name;
+                explanation = str.Str();
+                return 1;
+            }
+
+            if (component == InvalidComponent) {
+                for (int i = 0; i < Mask + 1; i++) {
+                    TComponentSettings settings = AtomicGet(ComponentInfo[i]);
+                    if (isSampling) {
+                        settings.Raw.X.SamplingLevel = priority;
+                    } else {
+                        settings.Raw.X.Level = priority;
+                    }
+                    AtomicSet(ComponentInfo[i], settings.Raw.Data);
+                }
+
+                TStringStream str;
+
+                str << titleName
+                    << " for all components has been changed to "
+                    << PriorityToString(EPrio(priority));
+                explanation = str.Str();
+                return 0;
+            } else {
+                if (!IsValidComponent(component)) {
+                    explanation = "Invalid component";
+                    return 1;
+                }
+                TComponentSettings settings = AtomicGet(ComponentInfo[component]);
+                EPriority oldPriority;
+                if (isSampling) {
+                    oldPriority = (EPriority)settings.Raw.X.SamplingLevel;
+                    settings.Raw.X.SamplingLevel = priority;
+                } else {
+                    oldPriority = (EPriority)settings.Raw.X.Level;
+                    settings.Raw.X.Level = priority;
+                }
+                AtomicSet(ComponentInfo[component], settings.Raw.Data);
+                TStringStream str;
+                str << titleName << " for the component " << ComponentNames[component]
+                    << " has been changed from " << PriorityToString(EPrio(oldPriority))
+                    << " to " << PriorityToString(EPrio(priority));
+                explanation = str.Str();
+                return 0;
+            }
+        }
+
+        int TSettings::SetLevel(EPriority priority, EComponent component, TString& explanation) {
+            return SetLevelImpl("priority", false,
+                                priority, component, explanation);
+        }
+
+        int TSettings::SetSamplingLevel(EPriority priority, EComponent component, TString& explanation) {
+            return SetLevelImpl("sampling priority", true,
+                                priority, component, explanation);
+        }
+
+        int TSettings::SetSamplingRate(ui32 sampling, EComponent component, TString& explanation) {
+            if (component == InvalidComponent) {
+                for (int i = 0; i < Mask + 1; i++) {
+                    TComponentSettings settings = AtomicGet(ComponentInfo[i]);
+                    settings.Raw.X.SamplingRate = sampling;
+                    AtomicSet(ComponentInfo[i], settings.Raw.Data);
+                }
+                TStringStream str;
+                str << "Sampling rate for all components has been changed to " << sampling;
+                explanation = str.Str();
+            } else {
+                if (!IsValidComponent(component)) {
+                    explanation = "Invalid component";
+                    return 1;
+                }
+                TComponentSettings settings = AtomicGet(ComponentInfo[component]);
+                ui32 oldSampling = settings.Raw.X.SamplingRate;
+                settings.Raw.X.SamplingRate = sampling;
+                AtomicSet(ComponentInfo[component], settings.Raw.Data);
+                TStringStream str;
+                str << "Sampling rate for the component " << ComponentNames[component]
+                    << " has been changed from " << oldSampling
+                    << " to " << sampling;
+                explanation = str.Str();
+            }
+            return 0;
+        }
+
+        int TSettings::PowerOf2Mask(int val) {
+            int mask = 1;
+            while ((val & mask) != val) {
+                mask <<= 1;
+                mask |= 1;
+            }
+            return mask;
+        }
+
+        bool TSettings::IsValidPriority(EPriority priority) {
+            return priority == PRI_EMERG || priority == PRI_ALERT ||
+                   priority == PRI_CRIT || priority == PRI_ERROR ||
+                   priority == PRI_WARN || priority == PRI_NOTICE ||
+                   priority == PRI_INFO || priority == PRI_DEBUG || priority == PRI_TRACE;
+        }
+
+        bool TSettings::IsValidComponent(EComponent component) {
+            return (MinVal <= component) && (component <= MaxVal) && !ComponentNames[component].empty();
+        }
+
+        void TSettings::SetAllowDrop(bool val) {
+            AllowDrop = val;
+        }
+
+        void TSettings::SetThrottleDelay(TDuration value) {
+            ThrottleDelay = value;
+        }
+
+        void TSettings::SetUseLocalTimestamps(bool value) {
+            UseLocalTimestamps = value;
+        }
+
+        EComponent TSettings::FindComponent(const TStringBuf& componentName) const {
+            if (componentName.empty())
+                return InvalidComponent;
+
+            for (EComponent component = MinVal; component <= MaxVal; ++component) {
+                if (ComponentNames[component] == componentName)
+                    return component;
+            }
+
+            return InvalidComponent;
+        }
+
+    }
+
+}
diff --git a/library/cpp/actors/core/log_settings.h b/library/cpp/actors/core/log_settings.h
new file mode 100644
index 0000000000..7fe4504edd
--- /dev/null
+++ b/library/cpp/actors/core/log_settings.h
@@ -0,0 +1,176 @@
+#pragma once
+
+#include "actor.h"
+#include "log_iface.h"
+#include <util/generic/vector.h>
+#include <util/digest/murmur.h>
+#include <util/random/easy.h>
+
+namespace NActors {
+    namespace NLog {
+        inline const char* PriorityToString(EPrio priority) {
+            switch (priority) {
+                case EPrio::Emerg:
+                    return "EMERG";
+                case EPrio::Alert:
+                    return "ALERT";
+                case EPrio::Crit:
+                    return "CRIT";
+                case EPrio::Error:
+                    return "ERROR";
+                case EPrio::Warn:
+                    return "WARN";
+                case EPrio::Notice:
+                    return "NOTICE";
+                case EPrio::Info:
+                    return "INFO";
+                case EPrio::Debug:
+                    return "DEBUG";
+                case EPrio::Trace:
+                    return "TRACE";
+                default:
+                    return "UNKNOWN";
+            }
+        }
+
+        // You can structure your program to have multiple logical components.
+        // In this case you can set different log priorities for different
+        // components. And you can change component's priority while system
+        // is running. Suspect a component has a bug? Turn DEBUG priority level on
+        // for this component.
+        static const int InvalidComponent = -1;
+
+        // Functions converts EComponent id to string
+        using EComponentToStringFunc = std::function<const TString&(EComponent)>;
+        ;
+
+        // Log settings
+        struct TComponentSettings {
+            union {
+                struct {
+                    ui32 SamplingRate;
+                    ui8 SamplingLevel;
+                    ui8 Level;
+                } X;
+
+                ui64 Data;
+            } Raw;
+
+            TComponentSettings(TAtomicBase data) {
+                Raw.Data = (ui64)data;
+            }
+
+            TComponentSettings(ui8 level, ui8 samplingLevel, ui32 samplingRate) {
+                Raw.X.Level = level;
+                Raw.X.SamplingLevel = samplingLevel;
+                Raw.X.SamplingRate = samplingRate;
+            }
+        };
+
+        struct TSettings: public TThrRefBase {
+        public:
+            TActorId LoggerActorId;
+            EComponent LoggerComponent;
+            ui64 TimeThresholdMs;
+            bool AllowDrop;
+            TDuration ThrottleDelay;
+            TArrayHolder<TAtomic> ComponentInfo;
+            TVector<TString> ComponentNames;
+            EComponent MinVal;
+            EComponent MaxVal;
+            EComponent Mask;
+            EPriority DefPriority;
+            EPriority DefSamplingPriority;
+            ui32 DefSamplingRate;
+            bool UseLocalTimestamps;
+
+            enum ELogFormat {
+                PLAIN_FULL_FORMAT,
+                PLAIN_SHORT_FORMAT,
+                JSON_FORMAT
+            };
+            ELogFormat Format;
+            TString ShortHostName;
+            TString ClusterName;
+            TString MessagePrefix;
+
+            // The best way to provide minVal, maxVal and func is to have
+            // protobuf enumeration of components. In this case protoc
+            // automatically generates YOURTYPE_MIN, YOURTYPE_MAX and
+            // YOURTYPE_Name for you.
+            TSettings(const TActorId& loggerActorId, const EComponent loggerComponent,
+                      EComponent minVal, EComponent maxVal, EComponentToStringFunc func,
+                      EPriority defPriority, EPriority defSamplingPriority = PRI_DEBUG,
+                      ui32 defSamplingRate = 0, ui64 timeThresholdMs = 1000);
+
+            TSettings(const TActorId& loggerActorId, const EComponent loggerComponent,
+                      EPriority defPriority, EPriority defSamplingPriority = PRI_DEBUG,
+                      ui32 defSamplingRate = 0, ui64 timeThresholdMs = 1000);
+
+            void Append(EComponent minVal, EComponent maxVal, EComponentToStringFunc func);
+
+            template <typename T>
+            void Append(T minVal, T maxVal, const TString& (*func)(T)) {
+                Append(
+                    static_cast<EComponent>(minVal),
+                    static_cast<EComponent>(maxVal),
+                    [func](EComponent c) -> const TString& {
+                        return func(static_cast<T>(c));
+                    }
+                );
+            }
+
+            inline bool Satisfies(EPriority priority, EComponent component, ui64 sampleBy = 0) const {
+                // by using Mask we don't get outside of array boundaries
+                TComponentSettings settings = GetComponentSettings(component);
+                if (priority > settings.Raw.X.Level) {
+                    if (priority > settings.Raw.X.SamplingLevel) {
+                        return false; // priority > both levels ==> do not log
+                    }
+                    // priority <= sampling level ==> apply sampling
+                    ui32 samplingRate = settings.Raw.X.SamplingRate;
+                    if (samplingRate) {
+                        ui32 samplingValue = sampleBy ? MurmurHash<ui32>((const char*)&sampleBy, sizeof(sampleBy))
+                            : samplingRate != 1 ? RandomNumber<ui32>() : 0;
+                        return (samplingValue % samplingRate == 0);
+                    } else {
+                        // sampling rate not set ==> do not log
+                        return false;
+                    }
+                } else {
+                    // priority <= log level ==> log
+                    return true;
+                }
+            }
+
+            inline TComponentSettings GetComponentSettings(EComponent component) const {
+                Y_VERIFY_DEBUG((component & Mask) == component);
+                // by using Mask we don't get outside of array boundaries
+                return TComponentSettings(AtomicGet(ComponentInfo[component & Mask]));
+            }
+
+            const char* ComponentName(EComponent component) const {
+                Y_VERIFY_DEBUG((component & Mask) == component);
+                return ComponentNames[component & Mask].data();
+            }
+
+            int SetLevel(EPriority priority, EComponent component, TString& explanation);
+            int SetSamplingLevel(EPriority priority, EComponent component, TString& explanation);
+            int SetSamplingRate(ui32 sampling, EComponent component, TString& explanation);
+            EComponent FindComponent(const TStringBuf& componentName) const;
+            static int PowerOf2Mask(int val);
+            static bool IsValidPriority(EPriority priority);
+            bool IsValidComponent(EComponent component);
+            void SetAllowDrop(bool val);
+            void SetThrottleDelay(TDuration value);
+            void SetUseLocalTimestamps(bool value);
+
+        private:
+            int SetLevelImpl(
+                const TString& name, bool isSampling,
+                EPriority priority, EComponent component, TString& explanation);
+        };
+
+    }
+
+}
diff --git a/library/cpp/actors/core/log_ut.cpp b/library/cpp/actors/core/log_ut.cpp
new file mode 100644
index 0000000000..09b5f88ea2
--- /dev/null
+++ b/library/cpp/actors/core/log_ut.cpp
@@ -0,0 +1,185 @@
+#include "log.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <library/cpp/actors/testlib/test_runtime.h>
+
+using namespace NMonitoring;
+using namespace NActors;
+using namespace NActors::NLog;
+
+namespace {
+    const TString& ServiceToString(int) {
+        static const TString FAKE{"FAKE"};
+        return FAKE;
+    }
+
+    TIntrusivePtr<TSettings> DefaultSettings() {
+        auto loggerId = TActorId{0, "Logger"};
+        auto s = MakeIntrusive<TSettings>(loggerId, 0, EPriority::PRI_TRACE);
+        s->SetAllowDrop(false);
+        s->Append(0, 1, ServiceToString);
+        return s;
+    }
+
+    TIntrusivePtr<TSettings> DroppingSettings(ui64 timeThresholdMs) {
+        auto loggerId = TActorId{0, "Logger"};
+        auto s = MakeIntrusive<TSettings>(
+            loggerId,
+            0,
+            EPriority::PRI_TRACE,
+            EPriority::PRI_DEBUG,
+            (ui32)0,
+            timeThresholdMs);
+        s->Append(0, 1, ServiceToString);
+        return s;
+    }
+
+    class TMockBackend: public TLogBackend {
+    public:
+        using TWriteImpl = std::function<void(const TLogRecord&)>;
+        using TReopenImpl = std::function<void()>;
+
+        static void REOPEN_NOP() { }
+
+        TMockBackend(TWriteImpl writeImpl, TReopenImpl reopenImpl = REOPEN_NOP)
+             : WriteImpl_{writeImpl}
+             , ReopenImpl_{reopenImpl}
+         {
+         }
+
+        void WriteData(const TLogRecord& r) override {
+            WriteImpl_(r);
+        }
+
+        void ReopenLog() override { }
+
+        void SetWriteImpl(TWriteImpl writeImpl) {
+            WriteImpl_ = writeImpl;
+        }
+
+    private:
+        TWriteImpl WriteImpl_;
+        TReopenImpl ReopenImpl_;
+    };
+
+    void ThrowAlways(const TLogRecord&) {
+        ythrow yexception();
+    };
+
+    struct TFixture {
+        TFixture(
+            TIntrusivePtr<TSettings> settings,
+            TMockBackend::TWriteImpl writeImpl = ThrowAlways)
+        {
+            Runtime.Initialize();
+            LogBackend.reset(new TMockBackend{writeImpl});
+            LoggerActor = Runtime.Register(new TLoggerActor{settings, LogBackend, Counters});
+            Runtime.SetScheduledEventFilter([] (auto&&, auto&&, auto&&, auto) {
+                return false;
+            });
+        }
+
+        TFixture(TMockBackend::TWriteImpl writeImpl = ThrowAlways)
+            : TFixture(DefaultSettings(), writeImpl)
+        {}
+
+        void WriteLog() {
+            Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvLog(TInstant::Zero(), TLevel{EPrio::Emerg}, 0, "foo")});
+        }
+
+        void WriteLog(TInstant ts) {
+            Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvLog(ts, TLevel{EPrio::Emerg}, 0, "foo")});
+        }
+
+        void Wakeup() {
+            Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvents::TEvWakeup});
+        }
+
+        TIntrusivePtr<TDynamicCounters> Counters{MakeIntrusive<TDynamicCounters>()};
+        std::shared_ptr<TMockBackend> LogBackend;
+        TActorId LoggerActor;
+        TTestActorRuntimeBase Runtime;
+    };
+}
+
+
+Y_UNIT_TEST_SUITE(TLoggerActorTest) {
+    Y_UNIT_TEST(NoCrashOnWriteFailure) {
+        TFixture test;
+        test.WriteLog();
+        // everything is okay as long as we get here
+    }
+
+    Y_UNIT_TEST(SubsequentWritesAreIgnored) {
+        size_t count{0};
+        auto countWrites = [&count] (auto&& r) {
+            count++;
+            ThrowAlways(r);
+        };
+
+        TFixture test{countWrites};
+        test.WriteLog();
+        UNIT_ASSERT_VALUES_EQUAL(count, 1);
+
+        // at this point we should have started dropping messages
+        for (auto i = 0; i < 5; ++i) {
+            test.WriteLog();
+        }
+
+        UNIT_ASSERT_VALUES_EQUAL(count, 1);
+    }
+
+    Y_UNIT_TEST(LoggerCanRecover) {
+        TFixture test;
+        test.WriteLog();
+
+        TVector<TString> messages;
+        auto acceptWrites = [&] (const TLogRecord& r) {
+            messages.emplace_back(r.Data, r.Len);
+        };
+
+        auto scheduled = test.Runtime.CaptureScheduledEvents();
+        UNIT_ASSERT_VALUES_EQUAL(scheduled.size(), 1);
+
+        test.LogBackend->SetWriteImpl(acceptWrites);
+        test.Wakeup();
+
+        const auto COUNT = 10;
+        for (auto i = 0; i < COUNT; ++i) {
+            test.WriteLog();
+        }
+
+        UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT);
+    }
+
+    Y_UNIT_TEST(ShouldObeyTimeThresholdMsWhenOverloaded) {
+        TFixture test{DroppingSettings(5000)};
+
+        TVector<TString> messages;
+        auto acceptWrites = [&] (const TLogRecord& r) {
+            messages.emplace_back(r.Data, r.Len);
+        };
+
+        test.LogBackend->SetWriteImpl(acceptWrites);
+        test.Wakeup();
+
+        const auto COUNT = 11;
+        for (auto i = 0; i < COUNT; ++i) {
+            test.WriteLog();
+        }
+
+        UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT);
+
+        test.Runtime.AdvanceCurrentTime(TDuration::Seconds(20));
+        auto now = test.Runtime.GetCurrentTime();
+
+        test.WriteLog(now - TDuration::Seconds(5));
+
+        UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT + 1);
+
+        test.WriteLog(now - TDuration::Seconds(6));
+
+        UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT + 1);
+    }
+}
diff --git a/library/cpp/actors/core/mailbox.cpp b/library/cpp/actors/core/mailbox.cpp
new file mode 100644
index 0000000000..d84b4f9e46
--- /dev/null
+++ b/library/cpp/actors/core/mailbox.cpp
@@ -0,0 +1,551 @@
+#include "mailbox.h"
+#include "actorsystem.h"
+
+#include <library/cpp/actors/util/datetime.h>
+
+#include <util/system/sanitizers.h>
+
+namespace NActors {
+    TMailboxTable::TMailboxTable()
+        : LastAllocatedLine(0)
+        , AllocatedMailboxCount(0)
+        , CachedSimpleMailboxes(0)
+        , CachedRevolvingMailboxes(0)
+        , CachedHTSwapMailboxes(0)
+        , CachedReadAsFilledMailboxes(0)
+        , CachedTinyReadAsFilledMailboxes(0)
+    {
+        memset((void*)Lines, 0, sizeof(Lines));
+    }
+
+    bool IsGoodForCleanup(const TMailboxHeader* header) {
+        switch (AtomicLoad(&header->ExecutionState)) {
+            case TMailboxHeader::TExecutionState::Inactive:
+            case TMailboxHeader::TExecutionState::Scheduled:
+                return true;
+            case TMailboxHeader::TExecutionState::Leaving:
+            case TMailboxHeader::TExecutionState::Executing:
+            case TMailboxHeader::TExecutionState::LeavingMarked:
+                return false;
+            case TMailboxHeader::TExecutionState::Free:
+            case TMailboxHeader::TExecutionState::FreeScheduled:
+                return true;
+            case TMailboxHeader::TExecutionState::FreeLeaving:
+            case TMailboxHeader::TExecutionState::FreeExecuting:
+            case TMailboxHeader::TExecutionState::FreeLeavingMarked:
+                return false;
+            default:
+                Y_FAIL();
+        }
+    }
+
+    template <typename TMailbox>
+    void DestructMailboxLine(ui8* begin, ui8* end) {
+        const ui32 sx = TMailbox::AlignedSize();
+        for (ui8* x = begin; x + sx <= end; x += sx) {
+            TMailbox* mailbox = reinterpret_cast<TMailbox*>(x);
+            Y_VERIFY(IsGoodForCleanup(mailbox));
+            mailbox->ExecutionState = Max<ui32>();
+            mailbox->~TMailbox();
+        }
+    }
+
+    template <typename TMailbox>
+    bool CleanupMailboxLine(ui8* begin, ui8* end) {
+        const ui32 sx = TMailbox::AlignedSize();
+        bool done = true;
+        for (ui8* x = begin; x + sx <= end; x += sx) {
+            TMailbox* mailbox = reinterpret_cast<TMailbox*>(x);
+            Y_VERIFY(IsGoodForCleanup(mailbox));
+            done &= mailbox->CleanupActors() && mailbox->CleanupEvents();
+        }
+        return done;
+    }
+
+    TMailboxTable::~TMailboxTable() {
+        // on cleanup we must traverse everything and free stuff
+        for (ui32 i = 0; i < LastAllocatedLine; ++i) {
+            if (TMailboxLineHeader* lineHeader = Lines[i]) {
+                switch (lineHeader->MailboxType) {
+                    case TMailboxType::Simple:
+                        DestructMailboxLine<TSimpleMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    case TMailboxType::Revolving:
+                        DestructMailboxLine<TRevolvingMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    case TMailboxType::HTSwap:
+                        DestructMailboxLine<THTSwapMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    case TMailboxType::ReadAsFilled:
+                        DestructMailboxLine<TReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    case TMailboxType::TinyReadAsFilled:
+                        DestructMailboxLine<TTinyReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    default:
+                        Y_FAIL();
+                }
+
+                lineHeader->~TMailboxLineHeader();
+                free(lineHeader);
+                Lines[i] = nullptr;
+            }
+        }
+
+        while (MailboxCacheSimple.Pop(0))
+            ;
+        while (MailboxCacheRevolving.Pop(0))
+            ;
+        while (MailboxCacheHTSwap.Pop(0))
+            ;
+        while (MailboxCacheReadAsFilled.Pop(0))
+            ;
+        while (MailboxCacheTinyReadAsFilled.Pop(0))
+            ;
+    }
+
+    bool TMailboxTable::Cleanup() {
+        bool done = true;
+        for (ui32 i = 0; i < LastAllocatedLine; ++i) {
+            if (TMailboxLineHeader* lineHeader = Lines[i]) {
+                switch (lineHeader->MailboxType) {
+                    case TMailboxType::Simple:
+                        done &= CleanupMailboxLine<TSimpleMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    case TMailboxType::Revolving:
+                        done &= CleanupMailboxLine<TRevolvingMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    case TMailboxType::HTSwap:
+                        done &= CleanupMailboxLine<THTSwapMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    case TMailboxType::ReadAsFilled:
+                        done &= CleanupMailboxLine<TReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    case TMailboxType::TinyReadAsFilled:
+                        done &= CleanupMailboxLine<TTinyReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize);
+                        break;
+                    default:
+                        Y_FAIL();
+                }
+            }
+        }
+        return done;
+    }
+
+    TMailboxHeader* TMailboxTable::Get(ui32 hint) {
+        // get line
+        const ui32 lineIndex = (hint & LineIndexMask) >> LineIndexShift;
+        const ui32 lineHint = hint & LineHintMask;
+
+        Y_VERIFY((lineIndex < MaxLines) && (lineHint < LineSize / 64));
+        if (lineHint == 0)
+            return nullptr;
+
+        if (TMailboxLineHeader* const x = AtomicLoad(Lines + lineIndex)) {
+            switch (x->MailboxType) {
+                case TMailboxType::Simple:
+                    return TSimpleMailbox::Get(lineHint, x);
+                case TMailboxType::Revolving:
+                    return TRevolvingMailbox::Get(lineHint, x);
+                case TMailboxType::HTSwap:
+                    return THTSwapMailbox::Get(lineHint, x);
+                case TMailboxType::ReadAsFilled:
+                    return TReadAsFilledMailbox::Get(lineHint, x);
+                case TMailboxType::TinyReadAsFilled:
+                    return TTinyReadAsFilledMailbox::Get(lineHint, x);
+                default:
+                    Y_VERIFY_DEBUG(false);
+                    break;
+            }
+        }
+
+        return nullptr;
+    }
+
+    bool TMailboxTable::SendTo(TAutoPtr<IEventHandle>& ev, IExecutorPool* executorPool) {
+        const TActorId& recipient = ev->GetRecipientRewrite();
+        const ui32 hint = recipient.Hint();
+
+        // copy-paste from Get to avoid duplicated type-switches
+        const ui32 lineIndex = (hint & LineIndexMask) >> LineIndexShift;
+        const ui32 lineHint = hint & LineHintMask;
+
+        Y_VERIFY((lineIndex < MaxLines) && (lineHint < LineSize / 64));
+        if (lineHint == 0)
+            return false;
+
+        if (TMailboxLineHeader* const x = AtomicLoad(Lines + lineIndex)) {
+            switch (x->MailboxType) {
+                case TMailboxType::Simple: {
+                    TSimpleMailbox* const mailbox = TSimpleMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+                    Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+                    mailbox->Queue.Push(ev.Release());
+                    if (mailbox->MarkForSchedule()) {
+                        RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+                        executorPool->ScheduleActivation(hint);
+                    }
+                }
+                    return true;
+                case TMailboxType::Revolving: {
+                    // The actorid could be stale and coming from a different machine. If local process has restarted than
+                    // the stale actorid coming from a remote machine might be referencing an actor with simple mailbox
+                    // which is smaller than revolving mailbox. In this cases 'lineHint' index might be greater than actual
+                    // array size. Normally its ok to store stale event to other actor's valid mailbox beacuse Receive will
+                    // compare receiver actor id and discard stale event. But in this case we should discard the event right away
+                    // instead of trying to enque it to a mailbox at invalid address.
+                    // NOTE: lineHint is 1-based
+                    static_assert(TSimpleMailbox::AlignedSize() <= TRevolvingMailbox::AlignedSize(),
+                                  "We expect that one line can store more simple mailboxes than revolving mailboxes");
+                    if (lineHint > TRevolvingMailbox::MaxMailboxesInLine())
+                        return false;
+
+                    TRevolvingMailbox* const mailbox = TRevolvingMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+                    Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+                    mailbox->QueueWriter.Push(ev.Release());
+                    if (mailbox->MarkForSchedule()) {
+                        RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+                        executorPool->ScheduleActivation(hint);
+                    }
+                }
+                    return true;
+                case TMailboxType::HTSwap: {
+                    THTSwapMailbox* const mailbox = THTSwapMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+                    Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+                    mailbox->Queue.Push(ev.Release());
+                    if (mailbox->MarkForSchedule()) {
+                        RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+                        executorPool->ScheduleActivation(hint);
+                    }
+                }
+                    return true;
+                case TMailboxType::ReadAsFilled: {
+                    if (lineHint > TReadAsFilledMailbox::MaxMailboxesInLine())
+                        return false;
+
+                    TReadAsFilledMailbox* const mailbox = TReadAsFilledMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+                    Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+                    mailbox->Queue.Push(ev.Release());
+                    if (mailbox->MarkForSchedule()) {
+                        RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+                        executorPool->ScheduleActivation(hint);
+                    }
+                }
+                    return true;
+                case TMailboxType::TinyReadAsFilled: {
+                    if (lineHint > TTinyReadAsFilledMailbox::MaxMailboxesInLine())
+                        return false;
+
+                    TTinyReadAsFilledMailbox* const mailbox = TTinyReadAsFilledMailbox::Get(lineHint, x);
+#if (!defined(_tsan_enabled_))
+                    Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType);
+#endif
+                    mailbox->Queue.Push(ev.Release());
+                    if (mailbox->MarkForSchedule()) {
+                        RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast());
+                        executorPool->ScheduleActivation(hint);
+                    }
+                }
+                    return true;
+                default:
+                    Y_FAIL("unknown mailbox type");
+            }
+        }
+
+        return false;
+    }
+
+    ui32 TMailboxTable::AllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter) {
+        ui32 x = TryAllocateMailbox(type, revolvingCounter);
+        if (x == 0)
+            x = AllocateNewLine(type);
+        return x;
+    }
+
+    ui32 TMailboxTable::TryAllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter) {
+        switch (type) {
+            case TMailboxType::Simple:
+                do {
+                    if (ui32 ret = MailboxCacheSimple.Pop(revolvingCounter)) {
+                        AtomicDecrement(CachedSimpleMailboxes);
+                        return ret;
+                    }
+                } while (AtomicGet(CachedSimpleMailboxes) > (MailboxCacheSimple.Concurrency * 512));
+                return 0;
+            case TMailboxType::Revolving:
+                do {
+                    if (ui32 ret = MailboxCacheRevolving.Pop(revolvingCounter)) {
+                        AtomicDecrement(CachedRevolvingMailboxes);
+                        return ret;
+                    }
+                } while (AtomicGet(CachedRevolvingMailboxes) > (MailboxCacheRevolving.Concurrency * 512));
+                return 0;
+            case TMailboxType::HTSwap:
+                do {
+                    if (ui32 ret = MailboxCacheHTSwap.Pop(revolvingCounter)) {
+                        AtomicDecrement(CachedHTSwapMailboxes);
+                        return ret;
+                    }
+                } while (AtomicGet(CachedHTSwapMailboxes) > (MailboxCacheHTSwap.Concurrency * 512));
+                return 0;
+            case TMailboxType::ReadAsFilled:
+                do {
+                    if (ui32 ret = MailboxCacheReadAsFilled.Pop(revolvingCounter)) {
+                        AtomicDecrement(CachedReadAsFilledMailboxes);
+                        return ret;
+                    }
+                } while (AtomicGet(CachedReadAsFilledMailboxes) > (MailboxCacheReadAsFilled.Concurrency * 512));
+                return 0;
+            case TMailboxType::TinyReadAsFilled:
+                do {
+                    if (ui32 ret = MailboxCacheTinyReadAsFilled.Pop(revolvingCounter)) {
+                        AtomicDecrement(CachedTinyReadAsFilledMailboxes);
+                        return ret;
+                    }
+                } while (AtomicGet(CachedTinyReadAsFilledMailboxes) > (MailboxCacheTinyReadAsFilled.Concurrency * 512));
+                return 0;
+            default:
+                Y_FAIL("Unknown mailbox type");
+        }
+    }
+
+    void TMailboxTable::ReclaimMailbox(TMailboxType::EType type, ui32 hint, ui64 revolvingCounter) {
+        if (hint != 0) {
+            switch (type) {
+                case TMailboxType::Simple:
+                    MailboxCacheSimple.Push(hint, revolvingCounter);
+                    AtomicIncrement(CachedSimpleMailboxes);
+                    break;
+                case TMailboxType::Revolving:
+                    MailboxCacheRevolving.Push(hint, revolvingCounter);
+                    AtomicIncrement(CachedRevolvingMailboxes);
+                    break;
+                case TMailboxType::HTSwap:
+                    MailboxCacheHTSwap.Push(hint, revolvingCounter);
+                    AtomicIncrement(CachedHTSwapMailboxes);
+                    break;
+                case TMailboxType::ReadAsFilled:
+                    MailboxCacheReadAsFilled.Push(hint, revolvingCounter);
+                    AtomicIncrement(CachedReadAsFilledMailboxes);
+                    break;
+                case TMailboxType::TinyReadAsFilled:
+                    MailboxCacheTinyReadAsFilled.Push(hint, revolvingCounter);
+                    AtomicIncrement(CachedTinyReadAsFilledMailboxes);
+                    break;
+                default:
+                    Y_FAIL();
+            }
+        }
+    }
+
+    TMailboxHeader::TMailboxHeader(TMailboxType::EType type)
+        : ExecutionState(TExecutionState::Free)
+        , Reserved(0)
+        , Type(type)
+        , ActorPack(TMailboxActorPack::Simple)
+        , Knobs(0)
+    {
+        ActorsInfo.Simple.ActorId = 0;
+        ActorsInfo.Simple.Actor = nullptr;
+    }
+
+    TMailboxHeader::~TMailboxHeader() {
+        CleanupActors();
+    }
+
+    bool TMailboxHeader::CleanupActors() {
+        bool done = true;
+        switch (ActorPack) {
+            case TMailboxActorPack::Simple: {
+                if (ActorsInfo.Simple.ActorId != 0) {
+                    delete ActorsInfo.Simple.Actor;
+                    done = false;
+                }
+                break;
+            }
+            case TMailboxActorPack::Map: {
+                for (auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) {
+                    delete actor;
+                }
+                delete ActorsInfo.Map.ActorsMap;
+                done = false;
+                break;
+            }
+            case TMailboxActorPack::Array: {
+                for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) {
+                    delete ActorsInfo.Array.ActorsArray->Actors[i].Actor;
+                }
+                delete ActorsInfo.Array.ActorsArray;
+                done = false;
+                break;
+            }
+        }
+        ActorPack = TMailboxActorPack::Simple;
+        ActorsInfo.Simple.ActorId = 0;
+        ActorsInfo.Simple.Actor = nullptr;
+        return done;
+    }
+
+    std::pair<ui32, ui32> TMailboxHeader::CountMailboxEvents(ui64 localActorId, ui32 maxTraverse) {
+        switch (Type) {
+            case TMailboxType::Simple:
+                return static_cast<TMailboxTable::TSimpleMailbox*>(this)->CountSimpleMailboxEvents(localActorId, maxTraverse);
+            case TMailboxType::Revolving:
+                return static_cast<TMailboxTable::TRevolvingMailbox*>(this)->CountRevolvingMailboxEvents(localActorId, maxTraverse);
+            default:
+                return {0, 0};
+        }
+    }
+
+    TMailboxTable::TSimpleMailbox::TSimpleMailbox()
+        : TMailboxHeader(TMailboxType::Simple)
+        , ScheduleMoment(0)
+    {
+    }
+
+    TMailboxTable::TSimpleMailbox::~TSimpleMailbox() {
+        CleanupEvents();
+    }
+
+    bool TMailboxTable::TSimpleMailbox::CleanupEvents() {
+        const bool done = (Queue.Head() == nullptr);
+        while (IEventHandle* ev = Queue.Pop())
+            delete ev;
+        return done;
+    }
+
+    std::pair<ui32, ui32> TMailboxTable::TSimpleMailbox::CountSimpleMailboxEvents(ui64 localActorId, ui32 maxTraverse) {
+        ui32 local = 0;
+        ui32 total = 0;
+
+        auto it = Queue.ReadIterator();
+        while (IEventHandle* x = it.Next()) {
+            ++total;
+            if (x->GetRecipientRewrite().LocalId() == localActorId)
+                ++local;
+            if (total >= maxTraverse)
+                break;
+        }
+
+        return std::make_pair(local, total);
+    }
+
+    TMailboxTable::TRevolvingMailbox::TRevolvingMailbox()
+        : TMailboxHeader(TMailboxType::Revolving)
+        , QueueWriter(QueueReader)
+        , Reserved1(0)
+        , Reserved2(0)
+        , ScheduleMoment(0)
+    {
+    }
+
+    TMailboxTable::TRevolvingMailbox::~TRevolvingMailbox() {
+        CleanupEvents();
+    }
+
+    bool TMailboxTable::TRevolvingMailbox::CleanupEvents() {
+        const bool done = (QueueReader.Head() == nullptr);
+        while (IEventHandle* ev = QueueReader.Pop())
+            delete ev;
+        return done;
+    }
+
+    std::pair<ui32, ui32> TMailboxTable::TRevolvingMailbox::CountRevolvingMailboxEvents(ui64 localActorId, ui32 maxTraverse) {
+        ui32 local = 0;
+        ui32 total = 0;
+
+        auto it = QueueReader.Iterator();
+
+        while (IEventHandle* x = it.Next()) {
+            ++total;
+            if (x->GetRecipientRewrite().LocalId() == localActorId)
+                ++local;
+            if (total >= maxTraverse)
+                break;
+        }
+
+        return std::make_pair(local, total);
+    }
+
+    template <typename T>
+    static ui32 InitNewLine(ui8* x, ui8* end) {
+        const ui32 sx = T::AlignedSize();
+
+        for (ui32 index = 1; x + sx <= end; x += sx, ++index)
+            ::new (x) T();
+
+        return sx;
+    }
+
+    ui32 TMailboxTable::AllocateNewLine(TMailboxType::EType type) {
+        ui8* ptr = (ui8*)malloc(LineSize);
+        ui8* end = ptr + LineSize;
+
+        const ui32 lineIndex = (ui32)AtomicIncrement(LastAllocatedLine) - 1;
+        const ui32 lineIndexMask = (lineIndex << LineIndexShift) & LineIndexMask;
+
+        // first 64 bytes is TMailboxLineHeader
+        TMailboxLineHeader* header = ::new (ptr) TMailboxLineHeader(type, lineIndex);
+
+        ui8* x = ptr + 64;
+        ui32 sx = 0;
+        TMailboxCache* cache = nullptr;
+        TAtomic* counter = nullptr;
+
+        switch (type) {
+            case TMailboxType::Simple:
+                sx = InitNewLine<TSimpleMailbox>(x, end);
+                cache = &MailboxCacheSimple;
+                counter = &CachedSimpleMailboxes;
+                break;
+            case TMailboxType::Revolving:
+                sx = InitNewLine<TRevolvingMailbox>(x, end);
+                cache = &MailboxCacheRevolving;
+                counter = &CachedRevolvingMailboxes;
+                break;
+            case TMailboxType::HTSwap:
+                sx = InitNewLine<THTSwapMailbox>(x, end);
+                cache = &MailboxCacheHTSwap;
+                counter = &CachedHTSwapMailboxes;
+                break;
+            case TMailboxType::ReadAsFilled:
+                sx = InitNewLine<TReadAsFilledMailbox>(x, end);
+                cache = &MailboxCacheReadAsFilled;
+                counter = &CachedReadAsFilledMailboxes;
+                break;
+            case TMailboxType::TinyReadAsFilled:
+                sx = InitNewLine<TTinyReadAsFilledMailbox>(x, end);
+                cache = &MailboxCacheTinyReadAsFilled;
+                counter = &CachedTinyReadAsFilledMailboxes;
+                break;
+            default:
+                Y_FAIL();
+        }
+
+        AtomicStore(Lines + lineIndex, header);
+
+        ui32 ret = lineIndexMask | 1;
+
+        ui32 index = 2;
+        for (ui32 endIndex = LineSize / sx; index != endIndex;) {
+            const ui32 bufSize = 8;
+            ui32 buf[bufSize];
+            ui32 bufIndex;
+            for (bufIndex = 0; index != endIndex && bufIndex != bufSize; ++bufIndex, ++index)
+                buf[bufIndex] = lineIndexMask | index;
+            cache->PushBulk(buf, bufIndex, index);
+            AtomicAdd(*counter, bufIndex);
+        }
+
+        AtomicAdd(AllocatedMailboxCount, index - 1);
+
+        return ret;
+    }
+}
diff --git a/library/cpp/actors/core/mailbox.h b/library/cpp/actors/core/mailbox.h
new file mode 100644
index 0000000000..0bd9c4d314
--- /dev/null
+++ b/library/cpp/actors/core/mailbox.h
@@ -0,0 +1,553 @@
+#pragma once
+
+#include "defs.h"
+#include "event.h"
+#include "actor.h"
+#include "mailbox_queue_simple.h"
+#include "mailbox_queue_revolving.h"
+#include <library/cpp/actors/util/unordered_cache.h>
+#include <library/cpp/threading/queue/mpsc_htswap.h>
+#include <library/cpp/threading/queue/mpsc_read_as_filled.h>
+#include <util/generic/hash.h>
+#include <util/system/hp_timer.h>
+#include <util/generic/ptr.h>
+// TODO: clean all broken arcadia atomic stuff and replace with intrinsics
+
+namespace NActors {
+    class IActor;
+    class IExecutorPool;
+
+    const ui64 ARRAY_CAPACITY = 8;
+
+    // structure of hint:
+    // 1 bit: is service or direct hint
+    // 2 bits: pool index
+    // 17 bits: line
+    // 12 bits: index of mailbox inside of line
+
+    struct TMailboxHeader {
+        struct TMailboxActorPack {
+            enum EType {
+                Simple = 0,
+                Array = 1,
+                Map = 2
+            };
+        };
+
+        using TActorMap = THashMap<ui64, IActor*>;
+
+        struct TExecutionState {
+            enum EState {
+                // normal states
+                Inactive = 0,
+                Scheduled = 1,
+                Leaving = 2,
+                Executing = 3,
+                LeavingMarked = 4,
+                // states for free mailboxes (they can still be scheduled so we need duplicates)
+                Free = 5,
+                FreeScheduled = 6,
+                FreeLeaving = 7,
+                FreeExecuting = 8,
+                FreeLeavingMarked = 9,
+            };
+        };
+
+        volatile ui32 ExecutionState;
+        ui32 Reserved : 4; // never changes, always zero
+        ui32 Type : 4; // never changes
+        ui32 ActorPack : 2;
+        ui32 Knobs : 22;
+
+        struct TActorPair {
+            IActor *Actor;
+            ui64 ActorId;
+        };
+
+        struct alignas(64) TActorArray {
+            TActorPair Actors[ARRAY_CAPACITY];
+        };
+
+        union TActorsInfo {
+            TActorPair Simple;
+            struct {
+                TActorArray* ActorsArray;
+                ui64 ActorsCount;
+            } Array;
+            struct {
+                TActorMap* ActorsMap;
+            } Map;
+        } ActorsInfo;
+
+        TMailboxHeader(TMailboxType::EType type);
+        ~TMailboxHeader();
+
+        bool CleanupActors();
+
+        // this interface is used exclusively by executor thread, so implementation is there
+
+        bool MarkForSchedule(); // we put something in queue, check should we schedule?
+
+        bool LockForExecution(); // we got activation, try to lock mailbox
+        bool LockFromFree();     // try to claim mailbox from recycled (could fail if other thread process garbage)
+
+        void UnlockFromExecution1();                     // prepare for releasing lock
+        bool UnlockFromExecution2(bool wouldReschedule); // proceed with releasing lock
+        bool UnlockAsFree(bool wouldReschedule);         // preceed with releasing lock, but mark as free one
+
+        bool IsEmpty() const noexcept {
+            return (ActorPack == TMailboxActorPack::Simple && ActorsInfo.Simple.ActorId == 0);
+        }
+
+        template<typename T>
+        void ForEach(T&& callback) noexcept {
+            switch (ActorPack) {
+                case TMailboxActorPack::Simple:
+                    if (ActorsInfo.Simple.ActorId) {
+                        callback(ActorsInfo.Simple.ActorId, ActorsInfo.Simple.Actor);
+                    }
+                    break;
+
+                case TMailboxActorPack::Map:
+                    for (const auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) {
+                        callback(actorId, actor);
+                    }
+                    break;
+
+                case TMailboxActorPack::Array:
+                    for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) {
+                        auto& row = ActorsInfo.Array.ActorsArray->Actors[i];
+                        callback(row.ActorId, row.Actor);
+                    }
+                    break;
+            }
+        }
+
+        IActor* FindActor(ui64 localActorId) noexcept {
+            switch (ActorPack) {
+                case TMailboxActorPack::Simple: {
+                    if (ActorsInfo.Simple.ActorId == localActorId)
+                        return ActorsInfo.Simple.Actor;
+                    break;
+                }
+                case TMailboxActorPack::Map: {
+                    TActorMap::iterator it = ActorsInfo.Map.ActorsMap->find(localActorId);
+                    if (it != ActorsInfo.Map.ActorsMap->end())
+                        return it->second;
+                    break;
+                }
+                case TMailboxActorPack::Array: {
+                    for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) {
+                        if (ActorsInfo.Array.ActorsArray->Actors[i].ActorId == localActorId) {
+                            return ActorsInfo.Array.ActorsArray->Actors[i].Actor;
+                        }
+                    }
+                    break;
+                }
+                default:
+                    Y_FAIL();
+            }
+            return nullptr;
+        }
+
+        void AttachActor(ui64 localActorId, IActor* actor) noexcept {
+            switch (ActorPack) {
+                case TMailboxActorPack::Simple: {
+                    if (ActorsInfo.Simple.ActorId == 0) {
+                        ActorsInfo.Simple.ActorId = localActorId;
+                        ActorsInfo.Simple.Actor = actor;
+                        return;
+                    } else {
+                        auto ar = new TActorArray;
+                        ar->Actors[0] = ActorsInfo.Simple;
+                        ar->Actors[1] = TActorPair{actor, localActorId};
+                        ActorsInfo.Array.ActorsCount = 2;
+                        ActorPack = TMailboxActorPack::Array;
+                        ActorsInfo.Array.ActorsArray = ar;
+                    }
+                    break;
+                }
+                case TMailboxActorPack::Map: {
+                    ActorsInfo.Map.ActorsMap->insert(TActorMap::value_type(localActorId, actor));
+                    break;
+                }
+                case TMailboxActorPack::Array: {
+                    if (ActorsInfo.Array.ActorsCount == ARRAY_CAPACITY) {
+                        TActorMap* mp = new TActorMap();
+                        for (ui64 i = 0; i < ARRAY_CAPACITY; ++i) {
+                            mp->emplace(ActorsInfo.Array.ActorsArray->Actors[i].ActorId, ActorsInfo.Array.ActorsArray->Actors[i].Actor);
+                        }
+                        mp->emplace(localActorId, actor);
+                        ActorPack = TMailboxActorPack::Map;
+                        ActorsInfo.Array.ActorsCount = 0;
+                        delete ActorsInfo.Array.ActorsArray;
+                        ActorsInfo.Map.ActorsMap = mp;
+                    } else {
+                        ActorsInfo.Array.ActorsArray->Actors[ActorsInfo.Array.ActorsCount++] = TActorPair{actor, localActorId};
+                    }
+                    break;
+                }
+                default:
+                    Y_FAIL();
+            }
+        }
+
+        IActor* DetachActor(ui64 localActorId) noexcept {
+            Y_VERIFY_DEBUG(FindActor(localActorId) != nullptr);
+
+            IActor* actorToDestruct = nullptr;
+
+            switch (ActorPack) {
+                case TMailboxActorPack::Simple: {
+                    Y_VERIFY(ActorsInfo.Simple.ActorId == localActorId);
+                    actorToDestruct = ActorsInfo.Simple.Actor;
+
+                    ActorsInfo.Simple.ActorId = 0;
+                    ActorsInfo.Simple.Actor = nullptr;
+                    break;
+                }
+                case TMailboxActorPack::Map: {
+                    TActorMap::iterator it = ActorsInfo.Map.ActorsMap->find(localActorId);
+                    Y_VERIFY(it != ActorsInfo.Map.ActorsMap->end());
+
+                    actorToDestruct = it->second;
+                    ActorsInfo.Map.ActorsMap->erase(it);
+
+                    if (ActorsInfo.Map.ActorsMap->size() == ARRAY_CAPACITY) {
+                        auto ar = new TActorArray;
+                        ui64 i = 0;
+                        for (auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) {
+                           ar->Actors[i++] = TActorPair{actor, actorId};
+                        }
+                        delete ActorsInfo.Map.ActorsMap;
+                        ActorPack = TMailboxActorPack::Array;
+                        ActorsInfo.Array.ActorsArray = ar;
+                        ActorsInfo.Array.ActorsCount = ARRAY_CAPACITY;
+                    }
+                    break;
+                }
+                case TMailboxActorPack::Array: {
+                    bool found = false;
+                    for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) {
+                        if (ActorsInfo.Array.ActorsArray->Actors[i].ActorId == localActorId) {
+                            found = true;
+                            actorToDestruct = ActorsInfo.Array.ActorsArray->Actors[i].Actor;
+                            ActorsInfo.Array.ActorsArray->Actors[i] = ActorsInfo.Array.ActorsArray->Actors[ActorsInfo.Array.ActorsCount - 1];
+                            ActorsInfo.Array.ActorsCount -= 1;
+                            break;
+                        }
+                    }
+                    Y_VERIFY(found);
+
+                    if (ActorsInfo.Array.ActorsCount == 1) {
+                        const TActorPair Actor = ActorsInfo.Array.ActorsArray->Actors[0];
+                        delete ActorsInfo.Array.ActorsArray;
+                        ActorPack = TMailboxActorPack::Simple;
+                        ActorsInfo.Simple = Actor;
+                    }
+                    break;
+                }
+            }
+
+            return actorToDestruct;
+        }
+
+        std::pair<ui32, ui32> CountMailboxEvents(ui64 localActorId, ui32 maxTraverse);
+    };
+
+    class TMailboxTable : TNonCopyable {
+    private:
+        struct TMailboxLineHeader {
+            const TMailboxType::EType MailboxType;
+            const ui32 Index;
+            // some more stuff in first cache line, then goes mailboxes
+            ui8 Padding[52];
+
+            TMailboxLineHeader(TMailboxType::EType type, ui32 index)
+                : MailboxType(type)
+                , Index(index)
+            {
+            }
+        };
+        static_assert(sizeof(TMailboxLineHeader) <= 64, "expect sizeof(TMailboxLineHeader) <= 64");
+
+        constexpr static ui64 MaxLines = 131000; // somewhat less then 2^17.
+        constexpr static ui64 LineSize = 262144; // 64 * 2^12.
+
+        TAtomic LastAllocatedLine;
+        TAtomic AllocatedMailboxCount;
+
+        typedef TUnorderedCache<ui32, 512, 4> TMailboxCache;
+        TMailboxCache MailboxCacheSimple;
+        TAtomic CachedSimpleMailboxes;
+        TMailboxCache MailboxCacheRevolving;
+        TAtomic CachedRevolvingMailboxes;
+        TMailboxCache MailboxCacheHTSwap;
+        TAtomic CachedHTSwapMailboxes;
+        TMailboxCache MailboxCacheReadAsFilled;
+        TAtomic CachedReadAsFilledMailboxes;
+        TMailboxCache MailboxCacheTinyReadAsFilled;
+        TAtomic CachedTinyReadAsFilledMailboxes;
+
+        // and here goes large chunk of lines
+        // presented as array of static size to avoid sync on access
+        TMailboxLineHeader* volatile Lines[MaxLines];
+
+        ui32 AllocateNewLine(TMailboxType::EType type);
+        ui32 TryAllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter);
+
+    public:
+        TMailboxTable();
+        ~TMailboxTable();
+
+        bool Cleanup(); // returns true if nothing found to destruct (so nothing new is possible to be created)
+
+        static const ui32 LineIndexShift = 12;
+        static const ui32 LineIndexMask = 0x1FFFFu << LineIndexShift;
+        static const ui32 LineHintMask = 0xFFFu;
+        static const ui32 PoolIndexShift = TActorId::PoolIndexShift;
+        static const ui32 PoolIndexMask = TActorId::PoolIndexMask;
+
+        static ui32 LineIndex(ui32 hint) {
+            return ((hint & LineIndexMask) >> LineIndexShift);
+        }
+        static ui32 PoolIndex(ui32 hint) {
+            return TActorId::PoolIndex(hint);
+        }
+
+        TMailboxHeader* Get(ui32 hint);
+        ui32 AllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter);
+        void ReclaimMailbox(TMailboxType::EType type, ui32 hint, ui64 revolvingCounter);
+        ui64 GetAllocatedMailboxCount() const {
+            return RelaxedLoad(&AllocatedMailboxCount);
+        }
+
+        bool SendTo(TAutoPtr<IEventHandle>& ev, IExecutorPool* executorPool);
+
+        struct TSimpleMailbox: public TMailboxHeader {
+            // 4 bytes - state
+            // 4 bytes - knobs
+            // 8 bytes - actorid
+            // 8 bytes - actor*
+            TSimpleMailboxQueue<IEventHandle*, 64> Queue; // 24 + 8 bytes (body, lock)
+            NHPTimer::STime ScheduleMoment;
+
+            TSimpleMailbox();
+            ~TSimpleMailbox();
+
+            IEventHandle* Pop() {
+                return Queue.Pop();
+            }
+            IEventHandle* Head() {
+                return Queue.Head();
+            }
+
+            static TSimpleMailbox* Get(ui32 hint, void* line) {
+                return (TSimpleMailbox*)((ui8*)line + hint * 64); //
+            }
+            static const TMailboxType::EType MailboxType = TMailboxType::Simple;
+            constexpr static ui32 AlignedSize() {
+                return ((sizeof(TSimpleMailbox) + 63) / 64) * 64;
+            }
+
+            std::pair<ui32, ui32> CountSimpleMailboxEvents(ui64 localActorId, ui32 maxTraverse);
+            bool CleanupEvents();
+        };
+
+        static_assert(sizeof(TSimpleMailbox) == 64, "expect sizeof(TSimpleMailbox) == 64");
+
+        struct TRevolvingMailbox: public TMailboxHeader {
+            // 4 bytes - state
+            // 4 bytes - knobs
+            // 8 bytes - actorid
+            // 8 bytes - actor*
+            TRevolvingMailboxQueue<IEventHandle*, 3, 128>::TReader QueueReader; // 8 * 3 + 4 * 3 + (padding): 40 bytes
+            // here goes next cache-line, so less writers<-> reader interference
+            TRevolvingMailboxQueue<IEventHandle*, 3, 128>::TWriter QueueWriter; // 8 * 3 + 4 * 3 + 8 : 48 bytes
+            ui32 Reserved1;
+            ui32 Reserved2;
+            NHPTimer::STime ScheduleMoment;
+
+            TRevolvingMailbox();
+            ~TRevolvingMailbox();
+
+            IEventHandle* Pop() {
+                return QueueReader.Pop();
+            }
+            IEventHandle* Head() {
+                return QueueReader.Head();
+            }
+
+            static TRevolvingMailbox* Get(ui32 hint, void* line) {
+                return (TRevolvingMailbox*)((ui8*)line + 64 + (hint - 1) * 128);
+            }
+
+            constexpr static ui64 MaxMailboxesInLine() {
+                return (LineSize - 64) / AlignedSize();
+            }
+            static const TMailboxType::EType MailboxType = TMailboxType::Revolving;
+            constexpr static ui32 AlignedSize() {
+                return ((sizeof(TRevolvingMailbox) + 63) / 64) * 64;
+            }
+
+            std::pair<ui32, ui32> CountRevolvingMailboxEvents(ui64 localActorId, ui32 maxTraverse);
+            bool CleanupEvents();
+        };
+
+        static_assert(sizeof(TRevolvingMailbox) == 128, "expect sizeof(TRevolvingMailbox) == 128");
+
+        struct THTSwapMailbox: public TMailboxHeader {
+            using TQueueType = NThreading::THTSwapQueue<IEventHandle*>;
+
+            TQueueType Queue;
+            NHPTimer::STime ScheduleMoment;
+            char Padding_[16];
+
+            THTSwapMailbox()
+                : TMailboxHeader(TMailboxType::HTSwap)
+                , ScheduleMoment(0)
+            {
+            }
+
+            ~THTSwapMailbox() {
+                CleanupEvents();
+            }
+
+            IEventHandle* Pop() {
+                return Queue.Pop();
+            }
+
+            IEventHandle* Head() {
+                return Queue.Peek();
+            }
+
+            static THTSwapMailbox* Get(ui32 hint, void* line) {
+                return (THTSwapMailbox*)((ui8*)line + 64 + (hint - 1) * 64);
+            }
+
+            constexpr static ui64 MaxMailboxesInLine() {
+                return (LineSize - 64) / AlignedSize();
+            }
+
+            static const TMailboxType::EType MailboxType = TMailboxType::HTSwap;
+
+            constexpr static ui32 AlignedSize() {
+                return ((sizeof(THTSwapMailbox) + 63) / 64) * 64;
+            }
+
+            bool CleanupEvents() {
+                const bool done = (Queue.Peek() == nullptr);
+                while (IEventHandle* ev = Queue.Pop())
+                    delete ev;
+                return done;
+            }
+        };
+
+        static_assert(sizeof(THTSwapMailbox) == 64,
+                      "expect sizeof(THTSwapMailbox) == 64");
+
+        struct TReadAsFilledMailbox: public TMailboxHeader {
+            using TQueueType = NThreading::TReadAsFilledQueue<IEventHandle>;
+
+            TQueueType Queue;
+            NHPTimer::STime ScheduleMoment;
+            char Padding_[8];
+
+            TReadAsFilledMailbox()
+                : TMailboxHeader(TMailboxType::ReadAsFilled)
+                , ScheduleMoment(0)
+            {
+            }
+
+            ~TReadAsFilledMailbox() {
+                CleanupEvents();
+            }
+
+            IEventHandle* Pop() {
+                return Queue.Pop();
+            }
+
+            IEventHandle* Head() {
+                return Queue.Peek();
+            }
+
+            static TReadAsFilledMailbox* Get(ui32 hint, void* line) {
+                return (TReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192);
+            }
+
+            constexpr static ui64 MaxMailboxesInLine() {
+                return (LineSize - 64) / AlignedSize();
+            }
+
+            static const TMailboxType::EType MailboxType =
+                TMailboxType::ReadAsFilled;
+
+            constexpr static ui32 AlignedSize() {
+                return ((sizeof(TReadAsFilledMailbox) + 63) / 64) * 64;
+            }
+
+            bool CleanupEvents() {
+                const bool done = (Queue.Peek() == nullptr);
+                while (IEventHandle* ev = Queue.Pop())
+                    delete ev;
+                return done;
+            }
+        };
+
+        static_assert(sizeof(TReadAsFilledMailbox) == 192,
+                      "expect sizeof(TReadAsFilledMailbox) == 192");
+
+        struct TTinyReadAsFilledMailbox: public TMailboxHeader {
+            using TQueueType = NThreading::TReadAsFilledQueue<
+                IEventHandle,
+                NThreading::TRaFQueueBunchSize<4>>;
+
+            TQueueType Queue;
+            NHPTimer::STime ScheduleMoment;
+            char Padding_[8];
+
+            TTinyReadAsFilledMailbox()
+                : TMailboxHeader(TMailboxType::TinyReadAsFilled)
+                , ScheduleMoment(0)
+            {
+            }
+
+            ~TTinyReadAsFilledMailbox() {
+                CleanupEvents();
+            }
+
+            IEventHandle* Pop() {
+                return Queue.Pop();
+            }
+
+            IEventHandle* Head() {
+                return Queue.Peek();
+            }
+
+            static TTinyReadAsFilledMailbox* Get(ui32 hint, void* line) {
+                return (TTinyReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192);
+            }
+
+            constexpr static ui64 MaxMailboxesInLine() {
+                return (LineSize - 64) / AlignedSize();
+            }
+
+            static const TMailboxType::EType MailboxType =
+                TMailboxType::TinyReadAsFilled;
+
+            constexpr static ui32 AlignedSize() {
+                return ((sizeof(TTinyReadAsFilledMailbox) + 63) / 64) * 64;
+            }
+
+            bool CleanupEvents() {
+                const bool done = (Queue.Peek() == nullptr);
+                while (IEventHandle* ev = Queue.Pop())
+                    delete ev;
+                return done;
+            }
+        };
+
+        static_assert(sizeof(TTinyReadAsFilledMailbox) == 192,
+                      "expect sizeof(TTinyReadAsFilledMailbox) == 192");
+    };
+}
diff --git a/library/cpp/actors/core/mailbox_queue_revolving.h b/library/cpp/actors/core/mailbox_queue_revolving.h
new file mode 100644
index 0000000000..b0e78a18db
--- /dev/null
+++ b/library/cpp/actors/core/mailbox_queue_revolving.h
@@ -0,0 +1,214 @@
+#pragma once
+
+#include "defs.h"
+#include <library/cpp/actors/util/queue_chunk.h>
+
+namespace NActors {
+    // add some concurrency to basic queue to avoid hangs under contention (we pay with memory, so use only when really expect contention)
+    // ordering: every completed push guarantied to seen before any not-yet-initiated push. parallel pushes could reorder (and that is natural for concurrent queues).
+    // try to place reader/writer on different cache-lines to avoid congestion b/w reader and writers.
+    // if strict ordering does not matter - look at TManyOneQueue.
+
+    template <typename T, ui32 TWriteConcurrency = 3, ui32 TSize = 128>
+    class TRevolvingMailboxQueue {
+        static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value");
+
+        struct TValTagPair {
+            volatile T Value;
+            volatile ui64 Tag;
+        };
+
+        typedef TQueueChunk<TValTagPair, TSize> TChunk;
+
+        static_assert(sizeof(TAtomic) == sizeof(TChunk*), "expect sizeof(TAtomic) == sizeof(TChunk*)");
+        static_assert(sizeof(TAtomic) == sizeof(ui64), "expect sizeof(TAtomic) == sizeof(ui64)");
+
+    public:
+        class TWriter;
+
+        class TReader {
+            TChunk* ReadFrom[TWriteConcurrency];
+            ui32 ReadPosition[TWriteConcurrency];
+
+            friend class TRevolvingMailboxQueue<T, TWriteConcurrency, TSize>::TWriter; // for access to ReadFrom in constructor
+
+            bool ChunkHead(ui32 idx, ui64* tag, T* value) {
+                TChunk* head = ReadFrom[idx];
+                const ui32 pos = ReadPosition[idx];
+                if (pos != TChunk::EntriesCount) {
+                    if (const T xval = AtomicLoad(&head->Entries[pos].Value)) {
+                        const ui64 xtag = head->Entries[pos].Tag;
+                        if (xtag < *tag) {
+                            *value = xval;
+                            *tag = xtag;
+                            return true;
+                        }
+                    }
+                } else if (TChunk* next = AtomicLoad(&head->Next)) {
+                    ReadFrom[idx] = next;
+                    delete head;
+                    ReadPosition[idx] = 0;
+                    return ChunkHead(idx, tag, value);
+                }
+
+                return false;
+            }
+
+            T Head(bool pop) {
+                ui64 tag = Max<ui64>();
+                T ret = T{};
+                ui32 idx = 0;
+
+                for (ui32 i = 0; i < TWriteConcurrency; ++i)
+                    if (ChunkHead(i, &tag, &ret))
+                        idx = i;
+
+                // w/o second pass we could reorder updates with 'already scanned' range
+                if (ret) {
+                    for (ui32 i = 0; i < TWriteConcurrency; ++i)
+                        if (ChunkHead(i, &tag, &ret))
+                            idx = i;
+                }
+
+                if (pop && ret)
+                    ++ReadPosition[idx];
+
+                return ret;
+            }
+
+        public:
+            TReader() {
+                for (ui32 i = 0; i != TWriteConcurrency; ++i) {
+                    ReadFrom[i] = new TChunk();
+                    ReadPosition[i] = 0;
+                }
+            }
+
+            ~TReader() {
+                Y_VERIFY_DEBUG(Head() == 0);
+                for (ui32 i = 0; i < TWriteConcurrency; ++i)
+                    delete ReadFrom[i];
+            }
+
+            T Pop() {
+                return Head(true);
+            }
+
+            T Head() {
+                return Head(false);
+            }
+
+            class TReadIterator {
+                TChunk* ReadFrom[TWriteConcurrency];
+                ui32 ReadPosition[TWriteConcurrency];
+
+                bool ChunkHead(ui32 idx, ui64* tag, T* value) {
+                    TChunk* head = ReadFrom[idx];
+                    const ui32 pos = ReadPosition[idx];
+                    if (pos != TChunk::EntriesCount) {
+                        if (const T xval = AtomicLoad(&head->Entries[pos].Value)) {
+                            const ui64 xtag = head->Entries[pos].Tag;
+                            if (xtag < *tag) {
+                                *value = xval;
+                                *tag = xtag;
+                                return true;
+                            }
+                        }
+                    } else if (TChunk* next = AtomicLoad(&head->Next)) {
+                        ReadFrom[idx] = next;
+                        ReadPosition[idx] = 0;
+                        return ChunkHead(idx, tag, value);
+                    }
+
+                    return false;
+                }
+
+            public:
+                TReadIterator(TChunk* const* readFrom, const ui32* readPosition) {
+                    memcpy(ReadFrom, readFrom, TWriteConcurrency * sizeof(TChunk*));
+                    memcpy(ReadPosition, readPosition, TWriteConcurrency * sizeof(ui32));
+                }
+
+                T Next() {
+                    ui64 tag = Max<ui64>();
+                    T ret = T{};
+                    ui32 idx = 0;
+
+                    for (ui32 i = 0; i < TWriteConcurrency; ++i)
+                        if (ChunkHead(i, &tag, &ret))
+                            idx = i;
+
+                    // w/o second pass we could reorder updates with 'already scanned' range
+                    if (ret) {
+                        for (ui32 i = 0; i < TWriteConcurrency; ++i)
+                            if (ChunkHead(i, &tag, &ret))
+                                idx = i;
+                    }
+
+                    if (ret)
+                        ++ReadPosition[idx];
+
+                    return ret;
+                }
+            };
+
+            TReadIterator Iterator() const {
+                return TReadIterator(ReadFrom, ReadPosition);
+            }
+        };
+
+        class TWriter {
+            TChunk* volatile WriteTo[TWriteConcurrency];
+            volatile ui64 Tag;
+            ui32 WritePosition[TWriteConcurrency];
+
+        public:
+            TWriter(const TReader& reader)
+                : Tag(0)
+            {
+                for (ui32 i = 0; i != TWriteConcurrency; ++i) {
+                    WriteTo[i] = reader.ReadFrom[i];
+                    WritePosition[i] = 0;
+                }
+            }
+
+            bool TryPush(T x) {
+                Y_VERIFY(x != 0);
+
+                for (ui32 i = 0; i != TWriteConcurrency; ++i) {
+                    if (RelaxedLoad(&WriteTo[i]) != nullptr) {
+                        if (TChunk* writeTo = AtomicSwap(&WriteTo[i], nullptr)) {
+                            const ui64 nextTag = AtomicIncrement(Tag);
+                            Y_VERIFY_DEBUG(nextTag < Max<ui64>());
+                            const ui32 writePosition = WritePosition[i];
+                            if (writePosition != TChunk::EntriesCount) {
+                                writeTo->Entries[writePosition].Tag = nextTag;
+                                AtomicStore(&writeTo->Entries[writePosition].Value, x);
+                                ++WritePosition[i];
+                            } else {
+                                TChunk* next = new TChunk();
+                                next->Entries[0].Tag = nextTag;
+                                next->Entries[0].Value = x;
+                                AtomicStore(&writeTo->Next, next);
+                                writeTo = next;
+                                WritePosition[i] = 1;
+                            }
+                            AtomicStore(WriteTo + i, writeTo);
+                            return true;
+                        }
+                    }
+                }
+                return false;
+            }
+
+            ui32 Push(T x) {
+                ui32 spins = 0;
+                while (!TryPush(x)) {
+                    ++spins;
+                    SpinLockPause();
+                }
+                return spins;
+            }
+        };
+    };
+}
diff --git a/library/cpp/actors/core/mailbox_queue_simple.h b/library/cpp/actors/core/mailbox_queue_simple.h
new file mode 100644
index 0000000000..2e44c21adb
--- /dev/null
+++ b/library/cpp/actors/core/mailbox_queue_simple.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "defs.h"
+#include <library/cpp/actors/util/ticket_lock.h>
+#include <library/cpp/actors/util/queue_oneone_inplace.h>
+
+namespace NActors {
+    // dead-simple one-one queue, based on serializability guaranties of x64 and ticket lock to ensure writer unicity.
+    template <typename T, ui32 TSize>
+    class TSimpleMailboxQueue {
+        TOneOneQueueInplace<T, TSize> Queue;
+        TTicketLock Lock;
+
+    public:
+        ui32 Push(T x) noexcept {
+            const ui32 spins = Lock.Acquire();
+            Queue.Push(x);
+            Lock.Release();
+            return spins;
+        }
+
+        T Head() {
+            return Queue.Head();
+        }
+
+        T Pop() {
+            return Queue.Pop();
+        }
+
+        typename TOneOneQueueInplace<T, TSize>::TReadIterator ReadIterator() {
+            return Queue.Iterator();
+        }
+    };
+}
diff --git a/library/cpp/actors/core/memory_track.cpp b/library/cpp/actors/core/memory_track.cpp
new file mode 100644
index 0000000000..5f422116be
--- /dev/null
+++ b/library/cpp/actors/core/memory_track.cpp
@@ -0,0 +1,38 @@
+#include "memory_track.h"
+#include "memory_tracker.h"
+
+namespace NActors {
+namespace NMemory {
+
+namespace NPrivate {
+
+TThreadLocalInfo::TThreadLocalInfo()
+    : Metrics(TMemoryTracker::Instance()->GetCount())
+{
+    TMemoryTracker::Instance()->OnCreateThread(this);
+}
+
+TThreadLocalInfo::~TThreadLocalInfo() {
+    TMemoryTracker::Instance()->OnDestroyThread(this);
+}
+
+TMetric* TThreadLocalInfo::GetMetric(size_t index) {
+    if (Y_UNLIKELY(index >= Metrics.size())) {
+        return &Null;
+    }
+    return &Metrics[index];
+}
+
+const std::vector<TMetric>& TThreadLocalInfo::GetMetrics() const {
+    return Metrics;
+}
+
+size_t TBaseLabel::RegisterStaticMemoryLabel(const char* name, bool hasSensor) {
+    return TMemoryTracker::Instance()->RegisterStaticMemoryLabel(name, hasSensor);
+}
+
+}
+
+}
+}
+
diff --git a/library/cpp/actors/core/memory_track.h b/library/cpp/actors/core/memory_track.h
new file mode 100644
index 0000000000..6035333eeb
--- /dev/null
+++ b/library/cpp/actors/core/memory_track.h
@@ -0,0 +1,293 @@
+#pragma once
+
+#include <vector>
+
+#include <util/system/type_name.h>
+#include <util/thread/singleton.h>
+
+#define ENABLE_MEMORY_TRACKING
+
+namespace NActors {
+namespace NMemory {
+
+namespace NPrivate {
+
+class TMetric {
+    std::atomic<ssize_t> Memory;
+    std::atomic<ssize_t> Count;
+
+    void Copy(const TMetric& other) {
+        Memory.store(other.GetMemory(), std::memory_order_relaxed);
+        Count.store(other.GetCount(), std::memory_order_relaxed);
+    }
+
+public:
+    TMetric()
+        : Memory(0)
+        , Count(0)
+    {}
+
+    inline TMetric(const TMetric& other) {
+        Copy(other);
+    }
+
+    inline TMetric(TMetric&& other) {
+        Copy(other);
+    }
+
+    inline TMetric& operator=(const TMetric& other) {
+        Copy(other);
+        return *this;
+    }
+
+    inline TMetric& operator=(TMetric&& other) {
+        Copy(other);
+        return *this;
+    }
+
+    inline ssize_t GetMemory() const {
+        return Memory.load(std::memory_order_relaxed);
+    }
+    inline void SetMemory(ssize_t value) {
+        Memory.store(value, std::memory_order_relaxed);
+    }
+
+    inline ssize_t GetCount() const {
+        return Count.load(std::memory_order_relaxed);
+    }
+    inline void SetCount(ssize_t value) {
+        Count.store(value, std::memory_order_relaxed);
+    }
+
+    inline void operator+=(const TMetric& other) {
+        SetMemory(GetMemory() + other.GetMemory());
+        SetCount(GetCount() + other.GetCount());
+    }
+
+    inline void CalculatePeak(const TMetric& other) {
+        SetMemory(Max(GetMemory(), other.GetMemory()));
+        SetCount(Max(GetCount(), other.GetCount()));
+    }
+
+    inline void Add(size_t size) {
+        SetMemory(GetMemory() + size);
+        SetCount(GetCount() + 1);
+    }
+
+    inline void Sub(size_t size) {
+        SetMemory(GetMemory() - size);
+        SetCount(GetCount() - 1);
+    }
+};
+
+
+class TThreadLocalInfo {
+public:
+    TThreadLocalInfo();
+    ~TThreadLocalInfo();
+
+    TMetric* GetMetric(size_t index);
+    const std::vector<TMetric>& GetMetrics() const;
+
+private:
+    std::vector<TMetric> Metrics;
+
+    inline static TMetric Null = {};
+};
+
+
+class TBaseLabel {
+protected:
+    static size_t RegisterStaticMemoryLabel(const char* name, bool hasSensor);
+
+    inline static TMetric* GetLocalMetric(size_t index) {
+        return FastTlsSingleton<TThreadLocalInfo>()->GetMetric(index);
+    }
+};
+
+
+template <const char* Name>
+class TNameLabel
+    : TBaseLabel
+{
+public:
+    static void Add(size_t size) {
+#if defined(ENABLE_MEMORY_TRACKING)
+        Y_UNUSED(MetricInit);
+
+        if (Y_UNLIKELY(!Metric)) {
+            Metric = GetLocalMetric(Index);
+        }
+
+        Metric->Add(size);
+#else
+        Y_UNUSED(size);
+#endif
+    }
+
+    static void Sub(size_t size) {
+#if defined(ENABLE_MEMORY_TRACKING)
+        Y_UNUSED(MetricInit);
+
+        if (Y_UNLIKELY(!Metric)) {
+            Metric = GetLocalMetric(Index);
+        }
+
+        Metric->Sub(size);
+#else
+        Y_UNUSED(size);
+#endif
+    }
+
+private:
+#if defined(ENABLE_MEMORY_TRACKING)
+    inline static size_t Index = Max<size_t>();
+    inline static struct TMetricInit {
+        TMetricInit() {
+            Index = RegisterStaticMemoryLabel(Name, true);
+        }
+    } MetricInit;
+
+    inline static thread_local TMetric* Metric = nullptr;
+#endif
+};
+
+
+template <typename TType>
+class TTypeLabel
+    : TBaseLabel
+{
+public:
+    static void Add(size_t size) {
+#if defined(ENABLE_MEMORY_TRACKING)
+        Y_UNUSED(MetricInit);
+
+        if (Y_UNLIKELY(!Metric)) {
+            Metric = GetLocalMetric(Index);
+        }
+
+        Metric->Add(size);
+#else
+        Y_UNUSED(size);
+#endif
+    }
+
+    static void Sub(size_t size) {
+#if defined(ENABLE_MEMORY_TRACKING)
+        Y_UNUSED(MetricInit);
+
+        if (Y_UNLIKELY(!Metric)) {
+            Metric = GetLocalMetric(Index);
+        }
+
+        Metric->Sub(size);
+#else
+        Y_UNUSED(size);
+#endif
+    }
+
+private:
+#if defined(ENABLE_MEMORY_TRACKING)
+    inline static size_t Index = Max<size_t>();
+    inline static struct TMetricInit {
+        TMetricInit() {
+            Index = RegisterStaticMemoryLabel(TypeName<TType>().c_str(), false);
+        }
+    } MetricInit;
+
+    inline static thread_local TMetric* Metric = nullptr;
+#endif
+};
+
+
+template <typename T>
+struct TTrackHelper {
+#if defined(ENABLE_MEMORY_TRACKING)
+    void* operator new(size_t size) {
+        T::Add(size);
+        return malloc(size);
+    }
+
+    void* operator new[](size_t size) {
+        T::Add(size);
+        return malloc(size);
+    }
+
+    void operator delete(void* ptr, size_t size) {
+        T::Sub(size);
+        free(ptr);
+    }
+
+    void operator delete[](void* ptr, size_t size) {
+        T::Sub(size);
+        free(ptr);
+    }
+#endif
+};
+
+template <typename TType, typename T>
+struct TAllocHelper {
+    typedef size_t size_type;
+    typedef TType value_type;
+    typedef TType* pointer;
+    typedef const TType* const_pointer;
+
+    struct propagate_on_container_copy_assignment : public std::false_type {};
+    struct propagate_on_container_move_assignment : public std::false_type {};
+    struct propagate_on_container_swap : public std::false_type {};
+
+    pointer allocate(size_type n, const void* hint = nullptr) {
+        Y_UNUSED(hint);
+        auto size = n * sizeof(TType);
+        T::Add(size);
+        return (pointer)malloc(size);
+    }
+
+    void deallocate(pointer ptr, size_t n) {
+        auto size = n * sizeof(TType);
+        T::Sub(size);
+        free((void*)ptr);
+    }
+};
+
+} // NPrivate
+
+
+template <const char* Name>
+using TLabel = NPrivate::TNameLabel<Name>;
+
+template <typename TType, const char* Name = nullptr>
+struct TTrack
+    : public NPrivate::TTrackHelper<NPrivate::TNameLabel<Name>>
+{
+};
+
+template <typename TType>
+struct TTrack<TType, nullptr>
+    : public NPrivate::TTrackHelper<NPrivate::TTypeLabel<TType>>
+{
+};
+
+template <typename TType, const char* Name = nullptr>
+struct TAlloc
+    : public NPrivate::TAllocHelper<TType, NPrivate::TNameLabel<Name>>
+{
+    template<typename U>
+    struct rebind {
+        typedef TAlloc<U, Name> other;
+    };
+};
+
+template <typename TType>
+struct TAlloc<TType, nullptr>
+    : public NPrivate::TAllocHelper<TType, NPrivate::TTypeLabel<TType>>
+{
+    template<typename U>
+    struct rebind {
+        typedef TAlloc<U> other;
+    };
+};
+
+}
+}
+
diff --git a/library/cpp/actors/core/memory_tracker.cpp b/library/cpp/actors/core/memory_tracker.cpp
new file mode 100644
index 0000000000..8a12452c71
--- /dev/null
+++ b/library/cpp/actors/core/memory_tracker.cpp
@@ -0,0 +1,103 @@
+#include "memory_tracker.h"
+
+#include <util/generic/xrange.h>
+
+namespace NActors {
+namespace NMemory {
+
+namespace NPrivate {
+
+TMemoryTracker* TMemoryTracker::Instance() {
+    return SingletonWithPriority<TMemoryTracker, 0>();
+}
+
+void TMemoryTracker::Initialize() {
+    GlobalMetrics.resize(Indices.size());
+}
+
+const std::map<TString, size_t>& TMemoryTracker::GetMetricIndices() const {
+    return Indices;
+}
+
+const std::unordered_set<size_t>& TMemoryTracker::GetSensors() const {
+    return Sensors;
+}
+
+TString TMemoryTracker::GetName(size_t index) const {
+    return Names[index];
+}
+
+size_t TMemoryTracker::GetCount() const {
+    return Indices.size();
+}
+
+void TMemoryTracker::GatherMetrics(std::vector<TMetric>& metrics) const {
+    metrics.resize(0);
+    auto count = GetCount();
+
+    if (!count || GlobalMetrics.size() != count) {
+        return;
+    }
+
+    TReadGuard guard(LockThreadInfo);
+
+    metrics.resize(count);
+    for (size_t i : xrange(count)) {
+        metrics[i] += GlobalMetrics[i];
+    }
+
+    for (auto info : ThreadInfo) {
+        auto& localMetrics = info->GetMetrics();
+        if (localMetrics.size() == count) {
+            for (size_t i : xrange(count)) {
+                metrics[i] += localMetrics[i];
+            }
+        }
+    }
+}
+
+size_t TMemoryTracker::RegisterStaticMemoryLabel(const char* name, bool hasSensor) {
+    size_t index = 0;
+    auto found = Indices.find(name);
+    if (found == Indices.end()) {
+        TString str(name);
+        auto next = Names.size();
+        Indices.emplace(str, next);
+        Names.push_back(str);
+        index = next;
+    } else {
+        index = found->second;
+    }
+
+    if (hasSensor) {
+        Sensors.emplace(index);
+    }
+    return index;
+}
+
+void TMemoryTracker::OnCreateThread(TThreadLocalInfo* info) {
+    TWriteGuard guard(LockThreadInfo);
+    ThreadInfo.insert(info);
+}
+
+void TMemoryTracker::OnDestroyThread(TThreadLocalInfo* info) {
+    TWriteGuard guard(LockThreadInfo);
+
+    auto count = GetCount();
+    if (count && GlobalMetrics.size() == count) {
+        const auto& localMetrics = info->GetMetrics();
+        if (localMetrics.size() == count) {
+            for (size_t i : xrange(count)) {
+                GlobalMetrics[i] += localMetrics[i];
+            }
+        }
+    }
+
+    ThreadInfo.erase(info);
+}
+
+}
+
+}
+}
+
diff --git a/library/cpp/actors/core/memory_tracker.h b/library/cpp/actors/core/memory_tracker.h
new file mode 100644
index 0000000000..e74508191b
--- /dev/null
+++ b/library/cpp/actors/core/memory_tracker.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "memory_track.h"
+
+#include <map>
+#include <unordered_map>
+#include <unordered_set>
+
+#include <util/system/rwlock.h>
+
+namespace NActors {
+namespace NMemory {
+
+namespace NPrivate {
+
+class TMemoryTracker {
+public:
+    static TMemoryTracker* Instance();
+
+    void Initialize();
+
+    const std::map<TString, size_t>& GetMetricIndices() const;
+    const std::unordered_set<size_t>& GetSensors() const;
+    TString GetName(size_t index) const;
+    size_t GetCount() const;
+
+    void GatherMetrics(std::vector<TMetric>& metrics) const;
+
+private:
+    size_t RegisterStaticMemoryLabel(const char* name, bool hasSensor);
+
+    void OnCreateThread(TThreadLocalInfo* info);
+    void OnDestroyThread(TThreadLocalInfo* info);
+
+private:
+    std::map<TString, size_t> Indices;
+    std::vector<TString> Names;
+
+    std::vector<TMetric> GlobalMetrics;
+
+    std::unordered_set<size_t> Sensors;
+
+    std::unordered_set<TThreadLocalInfo*> ThreadInfo;
+    TRWMutex LockThreadInfo;
+
+    friend class TThreadLocalInfo;
+    friend class TBaseLabel;
+};
+
+}
+
+}
+}
diff --git a/library/cpp/actors/core/memory_tracker_ut.cpp b/library/cpp/actors/core/memory_tracker_ut.cpp
new file mode 100644
index 0000000000..d168214da6
--- /dev/null
+++ b/library/cpp/actors/core/memory_tracker_ut.cpp
@@ -0,0 +1,262 @@
+#include "memory_tracker.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/system/hp_timer.h>
+#include <util/system/thread.h>
+
+namespace NActors {
+namespace NMemory {
+
+Y_UNIT_TEST_SUITE(TMemoryTrackerTest) {
+
+#if defined(ENABLE_MEMORY_TRACKING)
+
+using namespace NPrivate;
+
+size_t FindLabelIndex(const char* label) {
+    auto indices = TMemoryTracker::Instance()->GetMetricIndices();
+    auto it = indices.find(label);
+    UNIT_ASSERT(it != indices.end());
+    return it->second;
+}
+
+
+struct TTypeLabeled
+    : public NActors::NMemory::TTrack<TTypeLabeled>
+{
+    char payload[16];
+};
+
+static constexpr char NamedLabel[] = "NamedLabel";
+
+struct TNameLabeled
+    : public NActors::NMemory::TTrack<TNameLabeled, NamedLabel>
+{
+    char payload[32];
+};
+
+Y_UNIT_TEST(Gathering)
+{
+    TMemoryTracker::Instance()->Initialize();
+
+    auto* typed = new TTypeLabeled;
+    auto* typedArray = new TTypeLabeled[3];
+
+    auto* named = new TNameLabeled;
+    auto* namedArray = new TNameLabeled[5];
+    NActors::NMemory::TLabel<NamedLabel>::Add(100);
+
+    std::vector<TMetric> metrics;
+    TMemoryTracker::Instance()->GatherMetrics(metrics);
+
+    auto typeIndex = FindLabelIndex(TypeName<TTypeLabeled>().c_str());
+    UNIT_ASSERT(typeIndex < metrics.size());
+    UNIT_ASSERT(metrics[typeIndex].GetMemory() == sizeof(TTypeLabeled) * 4 + sizeof(size_t));
+    UNIT_ASSERT(metrics[typeIndex].GetCount() == 2);
+
+    auto nameIndex = FindLabelIndex(NamedLabel);
+    UNIT_ASSERT(nameIndex < metrics.size());
+    UNIT_ASSERT(metrics[nameIndex].GetMemory() == sizeof(TNameLabeled) * 6 + sizeof(size_t) + 100);
+    UNIT_ASSERT(metrics[nameIndex].GetCount() == 3);
+
+    NActors::NMemory::TLabel<NamedLabel>::Sub(100);
+    delete [] namedArray;
+    delete named;
+
+    delete [] typedArray;
+    delete typed;
+
+    TMemoryTracker::Instance()->GatherMetrics(metrics);
+
+    UNIT_ASSERT(metrics[typeIndex].GetMemory() == 0);
+    UNIT_ASSERT(metrics[typeIndex].GetCount() == 0);
+
+    UNIT_ASSERT(metrics[nameIndex].GetMemory() == 0);
+    UNIT_ASSERT(metrics[nameIndex].GetCount() == 0);
+}
+
+
+static constexpr char InContainerLabel[] = "InContainerLabel";
+
+struct TInContainer {
+    char payload[16];
+};
+
+Y_UNIT_TEST(Containers) {
+    TMemoryTracker::Instance()->Initialize();
+
+    std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer>> vecT;
+    vecT.resize(5);
+
+    std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer, InContainerLabel>> vecN;
+    vecN.resize(7);
+
+    using TKey = int;
+
+    std::map<TKey, TInContainer, std::less<TKey>,
+        NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> mapT;
+    mapT.emplace(0, TInContainer());
+    mapT.emplace(1, TInContainer());
+
+    std::map<TKey, TInContainer, std::less<TKey>,
+        NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> mapN;
+    mapN.emplace(0, TInContainer());
+
+    std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>,
+        NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> umapT;
+    umapT.emplace(0, TInContainer());
+
+    std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>,
+        NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> umapN;
+    umapN.emplace(0, TInContainer());
+    umapN.emplace(1, TInContainer());
+
+    std::vector<TMetric> metrics;
+    TMemoryTracker::Instance()->GatherMetrics(metrics);
+
+    auto indices = TMemoryTracker::Instance()->GetMetricIndices();
+    for (auto& [name, index] : indices) {
+        Cerr << "---- " << name
+            << ": memory = " << metrics[index].GetMemory()
+            << ", count = " << metrics[index].GetCount() << Endl;
+    }
+
+    auto vecTIndex = FindLabelIndex(TypeName<TInContainer>().c_str());
+    UNIT_ASSERT(metrics[vecTIndex].GetMemory() >= ssize_t(sizeof(TInContainer) * 5));
+    UNIT_ASSERT(metrics[vecTIndex].GetCount() == 1);
+
+    auto labelIndex = FindLabelIndex(InContainerLabel);
+    UNIT_ASSERT(metrics[labelIndex].GetCount() == 5);
+    UNIT_ASSERT(metrics[labelIndex].GetMemory() >= ssize_t(
+        sizeof(TInContainer) * 7 +
+        sizeof(decltype(mapN)::value_type) +
+        sizeof(decltype(umapN)::value_type) * 2));
+}
+
+
+static constexpr char InThreadLabel[] = "InThreadLabel";
+
+struct TInThread
+    : public NActors::NMemory::TTrack<TInThread, InThreadLabel>
+{
+    char payload[16];
+};
+
+void* ThreadProc(void*) {
+    return new TInThread;
+}
+
+Y_UNIT_TEST(Threads) {
+    TMemoryTracker::Instance()->Initialize();
+
+    auto index = FindLabelIndex(InThreadLabel);
+
+    auto* object1 = new TInThread;
+
+    std::vector<TMetric> metrics;
+    TMemoryTracker::Instance()->GatherMetrics(metrics);
+    UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread));
+    UNIT_ASSERT(metrics[index].GetCount() == 1);
+
+    TThread thread(&ThreadProc, nullptr);
+    thread.Start();
+    auto* object2 = static_cast<TInThread*>(thread.Join());
+
+    TMemoryTracker::Instance()->GatherMetrics(metrics);
+    UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread) * 2);
+    UNIT_ASSERT(metrics[index].GetCount() == 2);
+
+    delete object2;
+
+    TMemoryTracker::Instance()->GatherMetrics(metrics);
+    UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread));
+    UNIT_ASSERT(metrics[index].GetCount() == 1);
+
+    delete object1;
+}
+
+
+struct TNotTracked {
+    char payload[16];
+};
+
+struct TTracked
+    : public NActors::NMemory::TTrack<TTracked>
+{
+    char payload[16];
+};
+
+template <typename T>
+double MeasureAllocations() {
+    constexpr size_t objectsCount = 4 << 20;
+
+    std::vector<T*> objects;
+    objects.resize(objectsCount);
+
+    THPTimer timer;
+
+    for (size_t i = 0; i < objectsCount; ++i) {
+        objects[i] = new T;
+    }
+
+    for (size_t i = 0; i < objectsCount; ++i) {
+        delete objects[i];
+    }
+
+    auto seconds = timer.Passed();
+    Cerr << "---- objects: " << objectsCount << ", time: " << seconds << Endl;
+    return seconds;
+}
+
+Y_UNIT_TEST(Performance) {
+    TMemoryTracker::Instance()->Initialize();
+
+    constexpr size_t Runs = 16;
+
+    Cerr << "---- warmup" << Endl;
+    MeasureAllocations<TNotTracked>();
+    MeasureAllocations<TTracked>();
+
+    std::vector<double> noTrack;
+    std::vector<double> track;
+
+    for (size_t run = 0; run < Runs; ++run) {
+        Cerr << "---- no track" << Endl;
+        auto time = MeasureAllocations<TNotTracked>();
+        noTrack.push_back(time);
+
+        Cerr << "---- track" << Endl;
+        time = MeasureAllocations<TTracked>();
+        track.push_back(time);
+    }
+
+    double meanNoTrack = 0, stddevNoTrack = 0;
+    double meanTrack = 0, stddevTrack = 0;
+    for (size_t i = 0; i < Runs; ++i) {
+        meanNoTrack += noTrack[i];
+        meanTrack += track[i];
+    }
+    meanNoTrack /= Runs;
+    meanTrack /= Runs;
+
+    auto sqr = [](double val) { return val * val; };
+
+    for (size_t i = 0; i < Runs; ++i) {
+        stddevNoTrack += sqr(noTrack[i] - meanNoTrack);
+        stddevTrack += sqr(track[i] - meanTrack);
+    }
+    stddevNoTrack = sqrt(stddevNoTrack / (Runs - 1));
+    stddevTrack = sqrt(stddevTrack / (Runs - 1));
+
+    Cerr << "---- no track - mean: " << meanNoTrack << ", stddev: " << stddevNoTrack << Endl;
+    Cerr << "---- track - mean: " << meanTrack << ", stddev: " << stddevTrack << Endl;
+    Cerr << "---- tracking is slower by " << int((meanTrack / meanNoTrack - 1.0) * 100) << "%" << Endl;
+}
+
+#endif
+
+}
+
+}
+}
diff --git a/library/cpp/actors/core/mon.h b/library/cpp/actors/core/mon.h
new file mode 100644
index 0000000000..c450f2338e
--- /dev/null
+++ b/library/cpp/actors/core/mon.h
@@ -0,0 +1,234 @@
+#pragma once
+
+#include "events.h"
+#include "event_local.h"
+#include <library/cpp/monlib/service/monservice.h>
+#include <library/cpp/monlib/service/pages/mon_page.h>
+
+namespace NActors {
+    namespace NMon {
+        enum {
+            HttpInfo = EventSpaceBegin(NActors::TEvents::ES_MON),
+            HttpInfoRes,
+            RemoteHttpInfo,
+            RemoteHttpInfoRes,
+            RemoteJsonInfoRes,
+            RemoteBinaryInfoRes,
+            End
+        };
+
+        static_assert(End < EventSpaceEnd(NActors::TEvents::ES_MON), "expect End < EventSpaceEnd(NActors::TEvents::ES_MON)");
+
+        // request info from an actor in HTML format
+        struct TEvHttpInfo: public NActors::TEventLocal<TEvHttpInfo, HttpInfo> {
+            TEvHttpInfo(const NMonitoring::IMonHttpRequest& request, int subReqId = 0)
+                : Request(request)
+                , SubRequestId(subReqId)
+            {
+            }
+
+            TEvHttpInfo(const NMonitoring::IMonHttpRequest& request, const TString& userToken)
+                : Request(request)
+                , UserToken(userToken)
+                , SubRequestId(0)
+            {
+            }
+
+            const NMonitoring::IMonHttpRequest& Request;
+            TString UserToken; // built and serialized
+            // SubRequestId != 0 means that we assemble reply from multiple parts and SubRequestId contains this part id
+            int SubRequestId;
+        };
+
+        // base class for HTTP info response
+        struct IEvHttpInfoRes: public NActors::TEventLocal<IEvHttpInfoRes, HttpInfoRes> {
+            enum EContentType {
+                Html,
+                Custom,
+            };
+
+            IEvHttpInfoRes() {
+            }
+
+            virtual ~IEvHttpInfoRes() {
+            }
+
+            virtual void Output(IOutputStream& out) const = 0;
+            virtual EContentType GetContentType() const = 0;
+        };
+
+        // Ready to output HTML in TString
+        struct TEvHttpInfoRes: public IEvHttpInfoRes {
+            TEvHttpInfoRes(const TString& answer, int subReqId = 0, EContentType contentType = Html)
+                : Answer(answer)
+                , SubRequestId(subReqId)
+                , ContentType(contentType)
+            {
+            }
+
+            void Output(IOutputStream& out) const override {
+                out << Answer;
+            }
+
+            EContentType GetContentType() const override {
+                return ContentType;
+            }
+
+            const TString Answer;
+            const int SubRequestId;
+            const EContentType ContentType;
+        };
+
+        struct TEvRemoteHttpInfo: public NActors::TEventBase<TEvRemoteHttpInfo, RemoteHttpInfo> {
+            TEvRemoteHttpInfo() {
+            }
+
+            TEvRemoteHttpInfo(const TString& query)
+                : Query(query)
+            {
+            }
+
+            TEvRemoteHttpInfo(const TString& query, HTTP_METHOD method)
+                : Query(query)
+                , Method(method)
+            {
+            }
+
+            TString Query;
+            HTTP_METHOD Method;
+
+            TString PathInfo() const {
+                const size_t pos = Query.find('?');
+                return (pos == TString::npos) ? TString() : Query.substr(0, pos);
+            }
+
+            TCgiParameters Cgi() const {
+                const size_t pos = Query.find('?');
+                return TCgiParameters((pos == TString::npos) ? TString() : Query.substr(pos + 1));
+            }
+
+            TString ToStringHeader() const override {
+                return "TEvRemoteHttpInfo";
+            }
+
+            bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+                return serializer->WriteString(&Query);
+            }
+
+            ui32 CalculateSerializedSize() const override {
+                return Query.size();
+            }
+
+            bool IsSerializable() const override {
+                return true;
+            }
+
+            static IEventBase* Load(TEventSerializedData* bufs) {
+                return new TEvRemoteHttpInfo(bufs->GetString());
+            }
+
+            HTTP_METHOD GetMethod() const
+            {
+                return Method;
+            }
+        };
+
+        struct TEvRemoteHttpInfoRes: public NActors::TEventBase<TEvRemoteHttpInfoRes, RemoteHttpInfoRes> {
+            TEvRemoteHttpInfoRes() {
+            }
+
+            TEvRemoteHttpInfoRes(const TString& html)
+                : Html(html)
+            {
+            }
+
+            TString Html;
+
+            TString ToStringHeader() const override {
+                return "TEvRemoteHttpInfoRes";
+            }
+
+            bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+                return serializer->WriteString(&Html);
+            }
+
+            ui32 CalculateSerializedSize() const override {
+                return Html.size();
+            }
+
+            bool IsSerializable() const override {
+                return true;
+            }
+
+            static IEventBase* Load(TEventSerializedData* bufs) {
+                return new TEvRemoteHttpInfoRes(bufs->GetString());
+            }
+        };
+
+        struct TEvRemoteJsonInfoRes: public NActors::TEventBase<TEvRemoteJsonInfoRes, RemoteJsonInfoRes> {
+            TEvRemoteJsonInfoRes() {
+            }
+
+            TEvRemoteJsonInfoRes(const TString& json)
+                : Json(json)
+            {
+            }
+
+            TString Json;
+
+            TString ToStringHeader() const override {
+                return "TEvRemoteJsonInfoRes";
+            }
+
+            bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+                return serializer->WriteString(&Json);
+            }
+
+            ui32 CalculateSerializedSize() const override {
+                return Json.size();
+            }
+
+            bool IsSerializable() const override {
+                return true;
+            }
+
+            static IEventBase* Load(TEventSerializedData* bufs) {
+                return new TEvRemoteJsonInfoRes(bufs->GetString());
+            }
+        };
+
+        struct TEvRemoteBinaryInfoRes: public NActors::TEventBase<TEvRemoteBinaryInfoRes, RemoteBinaryInfoRes> {
+            TEvRemoteBinaryInfoRes() {
+            }
+
+            TEvRemoteBinaryInfoRes(const TString& blob)
+                : Blob(blob)
+            {
+            }
+
+            TString Blob;
+
+            TString ToStringHeader() const override {
+                return "TEvRemoteBinaryInfoRes";
+            }
+
+            bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override {
+                return serializer->WriteString(&Blob);
+            }
+
+            ui32 CalculateSerializedSize() const override {
+                return Blob.size();
+            }
+
+            bool IsSerializable() const override {
+                return true;
+            }
+
+            static IEventBase* Load(TEventSerializedData* bufs) {
+                return new TEvRemoteBinaryInfoRes(bufs->GetString());
+            }
+        };
+
+    }
+
+}
diff --git a/library/cpp/actors/core/mon_stats.h b/library/cpp/actors/core/mon_stats.h
new file mode 100644
index 0000000000..d55552af0c
--- /dev/null
+++ b/library/cpp/actors/core/mon_stats.h
@@ -0,0 +1,147 @@
+#pragma once
+
+#include "defs.h"
+#include "actor.h"
+#include <library/cpp/monlib/metrics/histogram_snapshot.h>
+#include <util/system/hp_timer.h>
+
+namespace NActors {
+    struct TLogHistogram : public NMonitoring::IHistogramSnapshot {
+        TLogHistogram() {
+            memset(Buckets, 0, sizeof(Buckets));
+        }
+
+        inline void Add(ui64 val, ui64 inc = 1) {
+            size_t ind = 0;
+#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7
+            asm volatile("" ::
+                             : "memory");
+#endif
+            if (val > 1) {
+                ind = GetValueBitCount(val - 1);
+            }
+#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7
+            asm volatile("" ::
+                             : "memory");
+#endif
+            RelaxedStore(&TotalSamples, RelaxedLoad(&TotalSamples) + inc);
+            RelaxedStore(&Buckets[ind], RelaxedLoad(&Buckets[ind]) + inc);
+        }
+
+        void Aggregate(const TLogHistogram& other) {
+            const ui64 inc = RelaxedLoad(&other.TotalSamples);
+            RelaxedStore(&TotalSamples, RelaxedLoad(&TotalSamples) + inc);
+            for (size_t i = 0; i < Y_ARRAY_SIZE(Buckets); ++i) {
+                Buckets[i] += RelaxedLoad(&other.Buckets[i]);
+            }
+        }
+
+        // IHistogramSnapshot
+        ui32 Count() const override {
+            return Y_ARRAY_SIZE(Buckets);
+        }
+
+        NMonitoring::TBucketBound UpperBound(ui32 index) const override {
+            Y_ASSERT(index < Y_ARRAY_SIZE(Buckets));
+            if (index == 0) {
+                return 1;
+            }
+            return NMonitoring::TBucketBound(1ull << (index - 1)) * 2.0;
+        }
+
+        NMonitoring::TBucketValue Value(ui32 index) const override {
+            Y_ASSERT(index < Y_ARRAY_SIZE(Buckets));
+            return Buckets[index];
+        }
+
+        ui64 TotalSamples = 0;
+        ui64 Buckets[65];
+    };
+
+    struct TExecutorPoolStats {
+        ui64 MaxUtilizationTime = 0;
+    };
+
+    struct TExecutorThreadStats {
+        ui64 SentEvents = 0;
+        ui64 ReceivedEvents = 0;
+        ui64 PreemptedEvents = 0; // Number of events experienced hard preemption
+        ui64 NonDeliveredEvents = 0;
+        ui64 EmptyMailboxActivation = 0;
+        ui64 CpuNs = 0; // nanoseconds thread was executing on CPU (accounts for preemtion)
+        NHPTimer::STime ElapsedTicks = 0;
+        NHPTimer::STime ParkedTicks = 0;
+        NHPTimer::STime BlockedTicks = 0;
+        TLogHistogram ActivationTimeHistogram;
+        TLogHistogram EventDeliveryTimeHistogram;
+        TLogHistogram EventProcessingCountHistogram;
+        TLogHistogram EventProcessingTimeHistogram;
+        TVector<NHPTimer::STime> ElapsedTicksByActivity;
+        TVector<ui64> ReceivedEventsByActivity;
+        TVector<i64> ActorsAliveByActivity; // the sum should be positive, but per-thread might be negative
+        TVector<ui64> ScheduledEventsByActivity;
+        ui64 PoolActorRegistrations = 0;
+        ui64 PoolDestroyedActors = 0;
+        ui64 PoolAllocatedMailboxes = 0;
+        ui64 MailboxPushedOutBySoftPreemption = 0;
+        ui64 MailboxPushedOutByTime = 0;
+        ui64 MailboxPushedOutByEventCount = 0;
+
+        TExecutorThreadStats(size_t activityVecSize = 1) // must be not empty as 0 used as default
+            : ElapsedTicksByActivity(activityVecSize)
+            , ReceivedEventsByActivity(activityVecSize)
+            , ActorsAliveByActivity(activityVecSize)
+            , ScheduledEventsByActivity(activityVecSize)
+        {}
+
+        template <typename T>
+        static void AggregateOne(TVector<T>& self, const TVector<T>& other) {
+            const size_t selfSize = self.size();
+            const size_t otherSize = other.size();
+            if (selfSize < otherSize)
+                self.resize(otherSize);
+            for (size_t at = 0; at < otherSize; ++at)
+                self[at] += RelaxedLoad(&other[at]);
+        }
+
+        void Aggregate(const TExecutorThreadStats& other) {
+            SentEvents += RelaxedLoad(&other.SentEvents);
+            ReceivedEvents += RelaxedLoad(&other.ReceivedEvents);
+            PreemptedEvents += RelaxedLoad(&other.PreemptedEvents);
+            NonDeliveredEvents += RelaxedLoad(&other.NonDeliveredEvents);
+            EmptyMailboxActivation += RelaxedLoad(&other.EmptyMailboxActivation);
+            CpuNs += RelaxedLoad(&other.CpuNs);
+            ElapsedTicks += RelaxedLoad(&other.ElapsedTicks);
+            ParkedTicks += RelaxedLoad(&other.ParkedTicks);
+            BlockedTicks += RelaxedLoad(&other.BlockedTicks);
+            MailboxPushedOutBySoftPreemption += RelaxedLoad(&other.MailboxPushedOutBySoftPreemption);
+            MailboxPushedOutByTime += RelaxedLoad(&other.MailboxPushedOutByTime);
+            MailboxPushedOutByEventCount += RelaxedLoad(&other.MailboxPushedOutByEventCount);
+
+            ActivationTimeHistogram.Aggregate(other.ActivationTimeHistogram);
+            EventDeliveryTimeHistogram.Aggregate(other.EventDeliveryTimeHistogram);
+            EventProcessingCountHistogram.Aggregate(other.EventProcessingCountHistogram);
+            EventProcessingTimeHistogram.Aggregate(other.EventProcessingTimeHistogram);
+
+            AggregateOne(ElapsedTicksByActivity, other.ElapsedTicksByActivity);
+            AggregateOne(ReceivedEventsByActivity, other.ReceivedEventsByActivity);
+            AggregateOne(ActorsAliveByActivity, other.ActorsAliveByActivity);
+            AggregateOne(ScheduledEventsByActivity, other.ScheduledEventsByActivity);
+
+            RelaxedStore(
+                &PoolActorRegistrations,
+                std::max(RelaxedLoad(&PoolActorRegistrations), RelaxedLoad(&other.PoolActorRegistrations)));
+            RelaxedStore(
+                &PoolDestroyedActors,
+                std::max(RelaxedLoad(&PoolDestroyedActors), RelaxedLoad(&other.PoolDestroyedActors)));
+            RelaxedStore(
+                &PoolAllocatedMailboxes,
+                std::max(RelaxedLoad(&PoolAllocatedMailboxes), RelaxedLoad(&other.PoolAllocatedMailboxes)));
+        }
+
+        size_t MaxActivityType() const {
+            return ActorsAliveByActivity.size();
+        }
+    };
+
+}
diff --git a/library/cpp/actors/core/monotonic.cpp b/library/cpp/actors/core/monotonic.cpp
new file mode 100644
index 0000000000..3465149dbe
--- /dev/null
+++ b/library/cpp/actors/core/monotonic.cpp
@@ -0,0 +1,23 @@
+#include "monotonic.h"
+
+#include <chrono>
+
+namespace NActors {
+
+    namespace {
+        // Unfortunately time_since_epoch() is sometimes negative on wine
+        // Remember initial time point at program start and use offsets from that
+        std::chrono::steady_clock::time_point MonotonicOffset = std::chrono::steady_clock::now();
+    }
+
+    ui64 GetMonotonicMicroSeconds() {
+        auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - MonotonicOffset).count();
+        // Steady clock is supposed to never jump backwards, but it's better to be safe in case of buggy implementations
+        if (Y_UNLIKELY(microseconds < 0)) {
+            microseconds = 0;
+        }
+        // Add one so we never return zero
+        return microseconds + 1;
+    }
+
+} // namespace NActors
diff --git a/library/cpp/actors/core/monotonic.h b/library/cpp/actors/core/monotonic.h
new file mode 100644
index 0000000000..6fceb91dbe
--- /dev/null
+++ b/library/cpp/actors/core/monotonic.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <util/datetime/base.h>
+
+namespace NActors {
+
+    /**
+     * Returns current monotonic time in microseconds
+     */
+    ui64 GetMonotonicMicroSeconds();
+
+    /**
+     * Similar to TInstant, but measuring monotonic time
+     */
+    class TMonotonic : public TTimeBase<TMonotonic> {
+        using TBase = TTimeBase<TMonotonic>;
+
+    private:
+        constexpr explicit TMonotonic(TValue value) noexcept
+            : TBase(value)
+        { }
+
+    public:
+        constexpr TMonotonic() noexcept {
+        }
+
+        static constexpr TMonotonic FromValue(TValue value) noexcept {
+            return TMonotonic(value);
+        }
+
+        static inline TMonotonic Now() {
+            return TMonotonic::MicroSeconds(GetMonotonicMicroSeconds());
+        }
+
+        using TBase::Days;
+        using TBase::Hours;
+        using TBase::MicroSeconds;
+        using TBase::MilliSeconds;
+        using TBase::Minutes;
+        using TBase::Seconds;
+
+        static constexpr TMonotonic Max() noexcept {
+            return TMonotonic(::Max<ui64>());
+        }
+
+        static constexpr TMonotonic Zero() noexcept {
+            return TMonotonic();
+        }
+
+        static constexpr TMonotonic MicroSeconds(ui64 us) noexcept {
+            return TMonotonic(TInstant::MicroSeconds(us).GetValue());
+        }
+
+        static constexpr TMonotonic MilliSeconds(ui64 ms) noexcept {
+            return TMonotonic(TInstant::MilliSeconds(ms).GetValue());
+        }
+
+        static constexpr TMonotonic Seconds(ui64 s) noexcept {
+            return TMonotonic(TInstant::Seconds(s).GetValue());
+        }
+
+        static constexpr TMonotonic Minutes(ui64 m) noexcept {
+            return TMonotonic(TInstant::Minutes(m).GetValue());
+        }
+
+        static constexpr TMonotonic Hours(ui64 h) noexcept {
+            return TMonotonic(TInstant::Hours(h).GetValue());
+        }
+
+        static constexpr TMonotonic Days(ui64 d) noexcept {
+            return TMonotonic(TInstant::Days(d).GetValue());
+        }
+
+        template<class T>
+        inline TMonotonic& operator+=(const T& t) noexcept {
+            return (*this = (*this + t));
+        }
+
+        template<class T>
+        inline TMonotonic& operator-=(const T& t) noexcept {
+            return (*this = (*this - t));
+        }
+    };
+} // namespace NActors
+
+Y_DECLARE_PODTYPE(NActors::TMonotonic);
+
+template<>
+struct THash<NActors::TMonotonic> {
+    size_t operator()(const NActors::TMonotonic& key) const {
+        return THash<NActors::TMonotonic::TValue>()(key.GetValue());
+    }
+};
+
+namespace NActors {
+
+    constexpr TDuration operator-(const TMonotonic& l, const TMonotonic& r) {
+        return TInstant::FromValue(l.GetValue()) - TInstant::FromValue(r.GetValue());
+    }
+
+    constexpr TMonotonic operator+(const TMonotonic& l, const TDuration& r) {
+        TInstant result = TInstant::FromValue(l.GetValue()) + r;
+        return TMonotonic::FromValue(result.GetValue());
+    }
+
+    constexpr TMonotonic operator-(const TMonotonic& l, const TDuration& r) {
+        TInstant result = TInstant::FromValue(l.GetValue()) - r;
+        return TMonotonic::FromValue(result.GetValue());
+    }
+
+} // namespace NActors
diff --git a/library/cpp/actors/core/probes.cpp b/library/cpp/actors/core/probes.cpp
new file mode 100644
index 0000000000..7ace83e102
--- /dev/null
+++ b/library/cpp/actors/core/probes.cpp
@@ -0,0 +1,28 @@
+#include "probes.h"
+
+#include "actorsystem.h"
+
+#include <util/string/builder.h>
+
+LWTRACE_DEFINE_PROVIDER(ACTORLIB_PROVIDER);
+
+namespace NActors {
+    TVector<NLWTrace::TDashboard> LWTraceDashboards(TActorSystemSetup* setup) {
+        TVector<NLWTrace::TDashboard> result;
+
+        NLWTrace::TDashboard slowDash;
+        ui32 pools = setup->GetExecutorsCount();
+        size_t top = 30;
+        slowDash.SetName("ActorSystem slow events");
+        slowDash.SetDescription(TStringBuilder() << "TOP" << top << " slow event executions >1M cycles for every pool (refresh page to update)");
+        for (ui32 pool = 0; pool < pools; pool++) {
+            auto* row = slowDash.AddRows();
+            auto* cell = row->AddCells();
+            cell->SetTitle(TStringBuilder() << pool << ":" << setup->GetPoolName(pool));
+            cell->SetUrl(TStringBuilder() << "?mode=log&id=.ACTORLIB_PROVIDER.SlowEvent.ppoolId=" << pool << "&s=eventMs&reverse=y&head=30");
+        }
+        result.push_back(slowDash);
+
+        return result;
+    }
+}
diff --git a/library/cpp/actors/core/probes.h b/library/cpp/actors/core/probes.h
new file mode 100644
index 0000000000..4912d6dd26
--- /dev/null
+++ b/library/cpp/actors/core/probes.h
@@ -0,0 +1,176 @@
+#pragma once
+
+#include <library/cpp/lwtrace/all.h>
+#include <util/generic/vector.h>
+
+#define LWACTORID(x) (x).RawX1(), (x).RawX2(), (x).NodeId(), (x).PoolID()
+#define LWTYPE_ACTORID ui64, ui64, ui32, ui32
+#define LWNAME_ACTORID(n) n "Raw1", n "Raw2", n "NodeId", n "PoolId"
+
+#define ACTORLIB_PROVIDER(PROBE, EVENT, GROUPS, TYPES, NAMES)                                                                         \
+    PROBE(SlowEvent, GROUPS("ActorLibSlow"),                                                                                          \
+          TYPES(ui32, double, TString, TString, TString),                                                                             \
+          NAMES("poolId", "eventMs", "eventType", "actorId", "actorType"))                                                            \
+    PROBE(EventSlowDelivery, GROUPS("ActorLibSlow"),                                                                                  \
+          TYPES(ui32, double, double, ui64, TString, TString, TString),                                                               \
+          NAMES("poolId", "deliveryMs", "sinceActivationMs", "eventProcessedBefore", "eventType", "actorId", "actorType"))            \
+    PROBE(SlowActivation, GROUPS("ActorLibSlow"),                                                                                     \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("poolId", "activationMs"))                                                                                            \
+    PROBE(SlowRegisterNew, GROUPS("ActorLibSlow"),                                                                                    \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("poolId", "registerNewMs"))                                                                                           \
+    PROBE(SlowRegisterAdd, GROUPS("ActorLibSlow"),                                                                                    \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("poolId", "registerAddMs"))                                                                                           \
+    PROBE(MailboxPushedOutBySoftPreemption, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"),                                    \
+          TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString),                                                              \
+          NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType"))                               \
+    PROBE(MailboxPushedOutByTime, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"),                                              \
+          TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString),                                                              \
+          NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType"))                               \
+    PROBE(MailboxPushedOutByEventCount, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"),                                        \
+          TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString),                                                              \
+          NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType"))                               \
+    PROBE(MailboxEmpty, GROUPS("ActorLibMailbox"),                                                                                    \
+          TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString),                                                              \
+          NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType"))                               \
+    PROBE(ActivationBegin, GROUPS(),                                                                                                  \
+          TYPES(ui32, ui32, ui32, double),                                                                                            \
+          NAMES("cpu", "poolId", "workerId", "expireMs"))                                                                             \
+    PROBE(ActivationEnd, GROUPS(),                                                                                                    \
+          TYPES(ui32, ui32, ui32),                                                                                                    \
+          NAMES("cpu", "poolId", "workerId"))                                                                                         \
+    PROBE(ExecutorThreadStats, GROUPS("ActorLibStats"),                                                                               \
+          TYPES(ui32, TString, ui64, ui64, ui64, double, double),                                                                     \
+          NAMES("poolId", "pool", "workerId", "execCount", "readyActivationCount", "execMs", "nonExecMs"))                            \
+    PROBE(SlowICReadLoopAdjustSize, GROUPS("ActorLibSlowIC"),                                                                         \
+          TYPES(double),                                                                                                              \
+          NAMES("icReadLoopAdjustSizeMs"))                                                                                            \
+    PROBE(SlowICReadFromSocket, GROUPS("ActorLibSlowIC"),                                                                             \
+          TYPES(double),                                                                                                              \
+          NAMES("icReadFromSocketMs"))                                                                                                \
+    PROBE(SlowICReadLoopSend, GROUPS("ActorLibSlowIC"),                                                                               \
+          TYPES(double),                                                                                                              \
+          NAMES("icReadLoopSendMs"))                                                                                                  \
+    PROBE(SlowICAllocPacketBuffer, GROUPS("ActorLibSlowIC"),                                                                          \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "icAllocPacketBufferMs"))                                                                                   \
+    PROBE(SlowICFillSendingBuffer, GROUPS("ActorLibSlowIC"),                                                                          \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "icFillSendingBufferMs"))                                                                                   \
+    PROBE(SlowICPushSentPackets, GROUPS("ActorLibSlowIC"),                                                                            \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "icPushSentPacketsMs"))                                                                                     \
+    PROBE(SlowICPushSendQueue, GROUPS("ActorLibSlowIC"),                                                                              \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "icPushSendQueueMs"))                                                                                       \
+    PROBE(SlowICWriteData, GROUPS("ActorLibSlowIC"),                                                                                  \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "icWriteDataMs"))                                                                                           \
+    PROBE(SlowICDropConfirmed, GROUPS("ActorLibSlowIC"),                                                                              \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "icDropConfirmedMs"))                                                                                       \
+    PROBE(ActorsystemScheduler, GROUPS("Durations"),                                                                                  \
+          TYPES(ui64, ui64, ui32, ui32, ui64, ui64),                                                                                  \
+          NAMES("timeUs", "timerfd_expirations", "eventsGottenFromQueues", "eventsSent",                                              \
+                "eventsInSendQueue", "eventSchedulingErrorUs"))                                                                       \
+    PROBE(ForwardEvent, GROUPS("Orbit", "InterconnectSessionTCP"),                                                                    \
+          TYPES(ui32, ui32, ui32, LWTYPE_ACTORID, LWTYPE_ACTORID, ui64, ui32),                                                        \
+          NAMES("peerId", "type", "flags", LWNAME_ACTORID("r"), LWNAME_ACTORID("s"),                                                  \
+                "cookie", "eventSerializedSize"))                                                                                     \
+    PROBE(EnqueueEvent, GROUPS("InterconnectSessionTCP"),                                                                             \
+          TYPES(ui32, ui64, TDuration, ui16, ui64, ui64),                                                                             \
+          NAMES("peerId", "numEventsInReadyChannels", "enqueueBlockedTotalMs", "channelId", "queueSizeInEvents", "queueSizeInBytes")) \
+    PROBE(SerializeToPacketBegin, GROUPS("InterconnectSessionTCP"),                                                                   \
+          TYPES(ui32, ui16, ui64),                                                                                                    \
+          NAMES("peerId", "channelId", "outputQueueSize"))                                                                            \
+    PROBE(SerializeToPacketEnd, GROUPS("InterconnectSessionTCP"),                                                                     \
+          TYPES(ui32, ui16, ui64, ui64),                                                                                              \
+          NAMES("peerId", "channelId", "outputQueueSize", "offsetInPacket"))                                                          \
+    PROBE(FillSendingBuffer, GROUPS("InterconnectSessionTCP"),                                                                        \
+          TYPES(ui32, ui32, ui64, TDuration),                                                                                         \
+          NAMES("peerId", "taskBytesGenerated", "numEventsInReadyChannelsBehind", "fillBlockedTotalMs"))                              \
+    PROBE(PacketGenerated, GROUPS("InterconnectSessionTCP"),                                                                          \
+          TYPES(ui32, ui64, ui64, ui64, ui64),                                                                                        \
+          NAMES("peerId", "bytesUnwritten", "inflightBytes", "packetsGenerated", "packetSize"))                                       \
+    PROBE(PacketWrittenToSocket, GROUPS("InterconnectSessionTCP"),                                                                    \
+          TYPES(ui32, ui64, bool, ui64, ui64, TDuration, int),                                                                        \
+          NAMES("peerId", "packetsWrittenToSocket", "triedWriting", "packetDataSize", "bytesUnwritten", "writeBlockedTotalMs", "fd")) \
+    PROBE(GenerateTraffic, GROUPS("InterconnectSessionTCP"),                                                                          \
+          TYPES(ui32, double, ui64, ui32, ui64),                                                                                      \
+          NAMES("peerId", "generateTrafficMs", "dataBytesSent", "generatedPackets", "generatedBytes"))                                \
+    PROBE(WriteToSocket, GROUPS("InterconnectSessionTCP"),                                                                            \
+          TYPES(ui32, ui64, ui64, ui64, ui64, TDuration, int),                                                                        \
+          NAMES("peerId", "bytesWritten", "packetsWritten", "packetsWrittenToSocket", "bytesUnwritten", "writeBlockedTotalMs", "fd")) \
+    PROBE(UpdateFromInputSession, GROUPS("InterconnectSessionTCP"),                                                                   \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "pingMs"))                                                                                                  \
+    PROBE(UnblockByDropConfirmed, GROUPS("InterconnectSessionTCP"),                                                                   \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "updateDeliveryMs"))                                                                                        \
+    PROBE(DropConfirmed, GROUPS("InterconnectSessionTCP"),                                                                            \
+          TYPES(ui32, ui64, ui64),                                                                                                    \
+          NAMES("peerId", "droppedBytes", "inflightBytes"))                                                                           \
+    PROBE(StartRam, GROUPS("InterconnectSessionTCP"),                                                                                 \
+          TYPES(ui32),                                                                                                                \
+          NAMES("peerId"))                                                                                                            \
+    PROBE(FinishRam, GROUPS("InterconnectSessionTCP"),                                                                                \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "ramMs"))                                                                                                   \
+    PROBE(SkipGenerateTraffic, GROUPS("InterconnectSessionTCP"),                                                                      \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "elapsedSinceRamMs"))                                                                                       \
+    PROBE(StartBatching, GROUPS("InterconnectSessionTCP"),                                                                            \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "batchPeriodMs"))                                                                                           \
+    PROBE(FinishBatching, GROUPS("InterconnectSessionTCP"),                                                                           \
+          TYPES(ui32, double),                                                                                                        \
+          NAMES("peerId", "finishBatchDeliveryMs"))                                                                                   \
+    PROBE(BlockedWrite, GROUPS("InterconnectSessionTCP"),                                                                             \
+          TYPES(ui32, double, ui64),                                                                                                  \
+          NAMES("peerId", "sendQueueSize", "writtenBytes"))                                                                           \
+    PROBE(ReadyWrite, GROUPS("InterconnectSessionTCP"),                                                                               \
+          TYPES(ui32, double, double),                                                                                                \
+          NAMES("peerId", "readyWriteDeliveryMs", "blockMs"))                                                                         \
+    PROBE(EpollStartWaitIn, GROUPS("EpollThread"),                                                                                    \
+          TYPES(),                                                                                                                    \
+          NAMES())                                                                                                                    \
+    PROBE(EpollFinishWaitIn, GROUPS("EpollThread"),                                                                                   \
+          TYPES(i32),                                                                                                                 \
+          NAMES("eventsCount"))                                                                                                       \
+    PROBE(EpollWaitOut, GROUPS("EpollThread"),                                                                                        \
+          TYPES(i32),                                                                                                                 \
+          NAMES("eventsCount"))                                                                                                       \
+    PROBE(EpollSendReadyRead, GROUPS("EpollThread"),                                                                                  \
+          TYPES(bool, bool, int),                                                                                                     \
+          NAMES("hangup", "event", "fd"))                                                                                             \
+    PROBE(EpollSendReadyWrite, GROUPS("EpollThread"),                                                                                 \
+          TYPES(bool, bool, int),                                                                                                     \
+          NAMES("hangup", "event", "fd"))                                                                                             \
+    PROBE(HardPreemption, GROUPS("UnitedWorker"),                                                                                     \
+          TYPES(ui32, ui32, ui32, ui32),                                                                                              \
+          NAMES("cpu", "prevPoolId", "prevWorkerId", "nextWorkerId"))                                                                 \
+    PROBE(SetPreemptionTimer, GROUPS("UnitedWorker", "PreemptionTimer"),                                                              \
+          TYPES(ui32, ui32, int, double, double),                                                                                     \
+          NAMES("cpu", "workerId", "fd", "nowMs", "preemptMs"))                                                                       \
+    PROBE(ResetPreemptionTimer, GROUPS("UnitedWorker", "PreemptionTimer"),                                                            \
+          TYPES(ui32, ui32, int, double, double),                                                                                     \
+          NAMES("cpu", "workerId", "fd", "nowMs", "preemptMs"))                                                                       \
+    PROBE(SlowWorkerActionRace, GROUPS("UnitedWorker"),                                                                               \
+          TYPES(ui32, ui32, ui64),                                                                                                    \
+          NAMES("cpu", "poolId", "slowPoolsMask"))                                                                                    \
+    PROBE(PoolStats, GROUPS("PoolCpuBalancer"),                                                                                       \
+          TYPES(ui32, TString, ui64, ui8, ui8, double, double, double, ui64, ui64, ui64),                                             \
+          NAMES("poolId", "pool", "currentCpus", "loadClass", "priority", "scaleFactor", "cpuIdle", "cpuLoad", "importance", "addImportance", "subImportance")) \
+    PROBE(MoveCpu, GROUPS("PoolCpuBalancer"),                                                                                         \
+          TYPES(ui32, ui64, TString, TString, ui32),                                                                                  \
+          NAMES("fromPoolId", "toPoolId", "fromPool", "toPool", "cpu"))                                                               \
+    /**/
+
+LWTRACE_DECLARE_PROVIDER(ACTORLIB_PROVIDER)
+
+namespace NActors {
+    struct TActorSystemSetup;
+    TVector<NLWTrace::TDashboard> LWTraceDashboards(TActorSystemSetup* setup);
+}
diff --git a/library/cpp/actors/core/process_stats.cpp b/library/cpp/actors/core/process_stats.cpp
new file mode 100644
index 0000000000..0e1dbd0031
--- /dev/null
+++ b/library/cpp/actors/core/process_stats.cpp
@@ -0,0 +1,303 @@
+#include "actorsystem.h"
+#include "actor_bootstrapped.h"
+#include "hfunc.h"
+#include "process_stats.h"
+
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+#include <library/cpp/monlib/metrics/metric_registry.h>
+
+#include <util/datetime/uptime.h>
+#include <util/system/defaults.h>
+#include <util/stream/file.h>
+#include <util/string/vector.h>
+#include <util/string/split.h>
+
+#ifndef _win_
+#include <sys/user.h>
+#include <sys/sysctl.h>
+#endif
+
+namespace NActors {
+#ifdef _linux_
+
+    namespace {
+        template <typename TVal>
+        static bool ExtractVal(const TString& str, const TString& name, TVal& res) {
+            if (!str.StartsWith(name))
+                return false;
+            size_t pos = name.size();
+            while (pos < str.size() && (str[pos] == ' ' || str[pos] == '\t')) {
+                pos++;
+            }
+            res = atol(str.data() + pos);
+            return true;
+        }
+
+        float TicksPerMillisec() {
+#ifdef _SC_CLK_TCK
+            return sysconf(_SC_CLK_TCK) / 1000.0;
+#else
+            return 1.f;
+#endif
+        }
+    }
+
+    bool TProcStat::Fill(pid_t pid) {
+        try {
+            TString strPid(ToString(pid));
+            TFileInput proc("/proc/" + strPid + "/status");
+            TString str;
+            while (proc.ReadLine(str)) {
+                if (ExtractVal(str, "VmRSS:", Rss))
+                    continue;
+                if (ExtractVal(str, "voluntary_ctxt_switches:", VolCtxSwtch))
+                    continue;
+                if (ExtractVal(str, "nonvoluntary_ctxt_switches:", NonvolCtxSwtch))
+                    continue;
+            }
+            // Convert from kB to bytes
+            Rss *= 1024;
+
+            float tickPerMillisec = TicksPerMillisec();
+
+            TFileInput procStat("/proc/" + strPid + "/stat");
+            procStat.ReadLine(str);
+            if (!str.empty()) {
+                sscanf(str.data(),
+                       "%d %*s %c %d %d %d %d %d %u %lu %lu "
+                       "%lu %lu %lu %lu %ld %ld %ld %ld %ld "
+                       "%ld %llu %lu %ld %lu",
+                       &Pid, &State, &Ppid, &Pgrp, &Session, &TtyNr, &TPgid, &Flags, &MinFlt, &CMinFlt,
+                       &MajFlt, &CMajFlt, &Utime, &Stime, &CUtime, &CStime, &Priority, &Nice, &NumThreads,
+                       &ItRealValue, &StartTime, &Vsize, &RssPages, &RssLim);
+                Utime /= tickPerMillisec;
+                Stime /= tickPerMillisec;
+                CUtime /= tickPerMillisec;
+                CStime /= tickPerMillisec;
+                SystemUptime = ::Uptime();
+                Uptime = SystemUptime - TDuration::MilliSeconds(StartTime / TicksPerMillisec());
+            }
+
+            TFileInput statm("/proc/" + strPid + "/statm");
+            statm.ReadLine(str);
+            TVector<TString> fields;
+            StringSplitter(str).Split(' ').SkipEmpty().Collect(&fields);
+            if (fields.size() >= 7) {
+                ui64 resident = FromString<ui64>(fields[1]);
+                ui64 shared = FromString<ui64>(fields[2]);
+                if (PageSize == 0) {
+                    PageSize = ObtainPageSize();
+                }
+                FileRss = shared * PageSize;
+                AnonRss = (resident - shared) * PageSize;
+            }
+
+            TFileInput cgroup("/proc/" + strPid + "/cgroup");
+            TString line;
+            TString memoryCGroup;
+            while (cgroup.ReadLine(line) > 0) {
+                StringSplitter(line).Split(':').Collect(&fields);
+                if (fields.size() > 2 && fields[1] == "memory") {
+                    memoryCGroup = fields[2];
+                    break;
+                }
+            }
+            if (!memoryCGroup.empty()) {
+                TFileInput limit("/sys/fs/cgroup/memory" + memoryCGroup + "/memory.limit_in_bytes");
+                if (limit.ReadLine(line) > 0) {
+                    CGroupMemLim = FromString<ui64>(line);
+                    if (CGroupMemLim > (1ULL << 40)) {
+                        CGroupMemLim = 0;
+                    }
+                }
+            }
+
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
+
+    long TProcStat::ObtainPageSize() {
+        long sz = sysconf(_SC_PAGESIZE);
+        return sz;
+    }
+
+#else
+
+    bool TProcStat::Fill(pid_t pid) {
+        Y_UNUSED(pid);
+        return false;
+    }
+
+    long TProcStat::ObtainPageSize() {
+        return 0;
+    }
+
+#endif
+
+namespace {
+    // Periodically collects process stats and exposes them as mon counters
+    template <typename TDerived>
+    class TProcStatCollectingActor: public TActorBootstrapped<TProcStatCollectingActor<TDerived>> {
+    public:
+        static constexpr IActor::EActivityType ActorActivityType() {
+            return IActor::ACTORLIB_STATS;
+        }
+
+        TProcStatCollectingActor(TDuration interval)
+            : Interval(interval)
+        {
+        }
+
+        void Bootstrap(const TActorContext& ctx) {
+            ctx.Schedule(Interval, new TEvents::TEvWakeup());
+            Self()->Become(&TDerived::StateWork);
+        }
+
+        STFUNC(StateWork) {
+            switch (ev->GetTypeRewrite()) {
+                CFunc(TEvents::TSystem::Wakeup, Wakeup);
+            }
+        }
+
+    private:
+        void Wakeup(const TActorContext& ctx) {
+            Self()->UpdateCounters(ProcStat);
+            ctx.Schedule(Interval, new TEvents::TEvWakeup());
+        }
+
+        TDerived* Self() {
+            ProcStat.Fill(getpid());
+            return static_cast<TDerived*>(this);
+        }
+
+    private:
+        const TDuration Interval;
+        TProcStat ProcStat;
+    };
+
+    // Periodically collects process stats and exposes them as mon counters
+    class TDynamicCounterCollector: public TProcStatCollectingActor<TDynamicCounterCollector> {
+        using TBase = TProcStatCollectingActor<TDynamicCounterCollector>;
+    public:
+        TDynamicCounterCollector(
+            ui32 intervalSeconds,
+            NMonitoring::TDynamicCounterPtr counters)
+            : TBase{TDuration::Seconds(intervalSeconds)}
+        {
+            ProcStatGroup = counters->GetSubgroup("counters", "utils");
+
+            VmSize = ProcStatGroup->GetCounter("Process/VmSize", false);
+            AnonRssSize = ProcStatGroup->GetCounter("Process/AnonRssSize", false);
+            FileRssSize = ProcStatGroup->GetCounter("Process/FileRssSize", false);
+            CGroupMemLimit = ProcStatGroup->GetCounter("Process/CGroupMemLimit", false);
+            UserTime = ProcStatGroup->GetCounter("Process/UserTime", true);
+            SysTime = ProcStatGroup->GetCounter("Process/SystemTime", true);
+            MinorPageFaults = ProcStatGroup->GetCounter("Process/MinorPageFaults", true);
+            MajorPageFaults = ProcStatGroup->GetCounter("Process/MajorPageFaults", true);
+            UptimeSeconds = ProcStatGroup->GetCounter("Process/UptimeSeconds", false);
+            NumThreads = ProcStatGroup->GetCounter("Process/NumThreads", false);
+            SystemUptimeSeconds = ProcStatGroup->GetCounter("System/UptimeSeconds", false);
+        }
+
+        void UpdateCounters(const TProcStat& procStat) {
+            *VmSize = procStat.Vsize;
+            *AnonRssSize = procStat.AnonRss;
+            *FileRssSize = procStat.FileRss;
+            if (procStat.CGroupMemLim) {
+                *CGroupMemLimit = procStat.CGroupMemLim;
+            }
+            *UserTime = procStat.Utime;
+            *SysTime = procStat.Stime;
+            *MinorPageFaults = procStat.MinFlt;
+            *MajorPageFaults = procStat.MajFlt;
+            *UptimeSeconds = procStat.Uptime.Seconds();
+            *NumThreads = procStat.NumThreads;
+            *SystemUptimeSeconds = procStat.Uptime.Seconds();
+        }
+
+    private:
+        NMonitoring::TDynamicCounterPtr ProcStatGroup;
+        NMonitoring::TDynamicCounters::TCounterPtr VmSize;
+        NMonitoring::TDynamicCounters::TCounterPtr AnonRssSize;
+        NMonitoring::TDynamicCounters::TCounterPtr FileRssSize;
+        NMonitoring::TDynamicCounters::TCounterPtr CGroupMemLimit;
+        NMonitoring::TDynamicCounters::TCounterPtr UserTime;
+        NMonitoring::TDynamicCounters::TCounterPtr SysTime;
+        NMonitoring::TDynamicCounters::TCounterPtr MinorPageFaults;
+        NMonitoring::TDynamicCounters::TCounterPtr MajorPageFaults;
+        NMonitoring::TDynamicCounters::TCounterPtr UptimeSeconds;
+        NMonitoring::TDynamicCounters::TCounterPtr NumThreads;
+        NMonitoring::TDynamicCounters::TCounterPtr SystemUptimeSeconds;
+    };
+
+
+    class TRegistryCollector: public TProcStatCollectingActor<TRegistryCollector> {
+        using TBase = TProcStatCollectingActor<TRegistryCollector>;
+    public:
+        TRegistryCollector(TDuration interval, NMonitoring::TMetricRegistry& registry)
+            : TBase{interval}
+        {
+            VmSize = registry.IntGauge({{"sensor", "process.VmSize"}});
+            AnonRssSize = registry.IntGauge({{"sensor", "process.AnonRssSize"}});
+            FileRssSize = registry.IntGauge({{"sensor", "process.FileRssSize"}});
+            CGroupMemLimit = registry.IntGauge({{"sensor", "process.CGroupMemLimit"}});
+            UptimeSeconds = registry.IntGauge({{"sensor", "process.UptimeSeconds"}});
+            NumThreads = registry.IntGauge({{"sensor", "process.NumThreads"}});
+            SystemUptimeSeconds = registry.IntGauge({{"sensor", "system.UptimeSeconds"}});
+
+            UserTime = registry.Rate({{"sensor", "process.UserTime"}});
+            SysTime = registry.Rate({{"sensor", "process.SystemTime"}});
+            MinorPageFaults = registry.Rate({{"sensor", "process.MinorPageFaults"}});
+            MajorPageFaults = registry.Rate({{"sensor", "process.MajorPageFaults"}});
+        }
+
+        void UpdateCounters(const TProcStat& procStat) {
+            VmSize->Set(procStat.Vsize);
+            AnonRssSize->Set(procStat.AnonRss);
+            FileRssSize->Set(procStat.FileRss);
+            CGroupMemLimit->Set(procStat.CGroupMemLim);
+            UptimeSeconds->Set(procStat.Uptime.Seconds());
+            NumThreads->Set(procStat.NumThreads);
+            SystemUptimeSeconds->Set(procStat.SystemUptime.Seconds());
+
+            // it is ok here to reset and add metric value, because mutation
+            // is performed in siglethreaded context
+
+            UserTime->Reset();
+            UserTime->Add(procStat.Utime);
+
+            SysTime->Reset();
+            SysTime->Add(procStat.Stime);
+
+            MinorPageFaults->Reset();
+            MinorPageFaults->Add(procStat.MinFlt);
+
+            MajorPageFaults->Reset();
+            MajorPageFaults->Add(procStat.MajFlt);
+        }
+
+    private:
+        NMonitoring::TIntGauge* VmSize;
+        NMonitoring::TIntGauge* AnonRssSize;
+        NMonitoring::TIntGauge* FileRssSize;
+        NMonitoring::TIntGauge* CGroupMemLimit;
+        NMonitoring::TRate* UserTime;
+        NMonitoring::TRate* SysTime;
+        NMonitoring::TRate* MinorPageFaults;
+        NMonitoring::TRate* MajorPageFaults;
+        NMonitoring::TIntGauge* UptimeSeconds;
+        NMonitoring::TIntGauge* NumThreads;
+        NMonitoring::TIntGauge* SystemUptimeSeconds;
+    };
+} // namespace
+
+    IActor* CreateProcStatCollector(ui32 intervalSec, NMonitoring::TDynamicCounterPtr counters) {
+        return new TDynamicCounterCollector(intervalSec, counters);
+    }
+
+    IActor* CreateProcStatCollector(TDuration interval, NMonitoring::TMetricRegistry& registry) {
+        return new TRegistryCollector(interval, registry);
+    }
+}
diff --git a/library/cpp/actors/core/process_stats.h b/library/cpp/actors/core/process_stats.h
new file mode 100644
index 0000000000..66346d0b5a
--- /dev/null
+++ b/library/cpp/actors/core/process_stats.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "defs.h"
+#include "actor.h"
+
+#include <library/cpp/monlib/dynamic_counters/counters.h>
+
+namespace NMonitoring {
+    class TMetricRegistry;
+}
+
+namespace NActors {
+    struct TProcStat {
+        ui64 Rss;
+        ui64 VolCtxSwtch;
+        ui64 NonvolCtxSwtch;
+
+        int Pid;
+        char State;
+        int Ppid;
+        int Pgrp;
+        int Session;
+        int TtyNr;
+        int TPgid;
+        unsigned Flags;
+        unsigned long MinFlt;
+        unsigned long CMinFlt;
+        unsigned long MajFlt;
+        unsigned long CMajFlt;
+        unsigned long Utime;
+        unsigned long Stime;
+        long CUtime;
+        long CStime;
+        long Priority;
+        long Nice;
+        long NumThreads;
+        long ItRealValue;
+        // StartTime is measured from system boot
+        unsigned long long StartTime;
+        unsigned long Vsize;
+        long RssPages;
+        unsigned long RssLim;
+        ui64 FileRss;
+        ui64 AnonRss;
+        ui64 CGroupMemLim = 0;
+
+        TDuration Uptime;
+        TDuration SystemUptime;
+        // ...
+
+        TProcStat() {
+            Zero(*this);
+            Y_UNUSED(PageSize);
+        }
+
+        bool Fill(pid_t pid);
+
+    private:
+        long PageSize = 0;
+
+        long ObtainPageSize();
+    };
+
+    IActor* CreateProcStatCollector(ui32 intervalSec, NMonitoring::TDynamicCounterPtr counters);
+    IActor* CreateProcStatCollector(TDuration interval, NMonitoring::TMetricRegistry& registry);
+}
diff --git a/library/cpp/actors/core/scheduler_actor.cpp b/library/cpp/actors/core/scheduler_actor.cpp
new file mode 100644
index 0000000000..febc5e40dd
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_actor.cpp
@@ -0,0 +1,279 @@
+#include "actor_bootstrapped.h"
+#include "hfunc.h"
+#include "probes.h"
+#include "scheduler_actor.h"
+#include "scheduler_queue.h"
+
+#include <library/cpp/actors/interconnect/poller_actor.h>
+#include <util/system/hp_timer.h>
+
+#ifdef __linux__
+#include <sys/timerfd.h>
+#include <errno.h>
+
+LWTRACE_USING(ACTORLIB_PROVIDER);
+
+namespace NActors {
+    class TTimerDescriptor: public TSharedDescriptor {
+        const int Descriptor;
+
+    public:
+        TTimerDescriptor()
+            : Descriptor(timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK))
+        {
+            Y_VERIFY(Descriptor != -1, "timerfd_create() failed with %s", strerror(errno));
+        }
+
+        ~TTimerDescriptor() override {
+            close(Descriptor);
+        }
+
+        int GetDescriptor() override {
+            return Descriptor;
+        }
+    };
+
+    class TSchedulerActor: public TActor<TSchedulerActor> {
+        const TSchedulerConfig Cfg;
+        TIntrusivePtr<TSharedDescriptor> TimerDescriptor;
+
+        TVector<NSchedulerQueue::TReader*> Readers;
+
+        TActorId PollerActor;
+        TPollerToken::TPtr PollerToken;
+
+        ui64 RealTime;
+        ui64 MonotonicTime;
+
+        ui64 ActiveTick;
+        typedef TMap<ui64, TAutoPtr<NSchedulerQueue::TQueueType>> TMomentMap; // intrasecond queues
+        typedef THashMap<ui64, TAutoPtr<TMomentMap>> TScheduleMap;            // over-second schedule
+
+        TScheduleMap ScheduleMap;
+
+        THolder<NThreading::TLegacyFuture<void, false>> MainCycle;
+
+        static const ui64 IntrasecondThreshold = 1048576; // ~second
+        TAutoPtr<TMomentMap> ActiveSec;
+        volatile ui64* CurrentTimestamp = nullptr;
+        volatile ui64* CurrentMonotonic = nullptr;
+        TDeque<TAutoPtr<IEventHandle>> EventsToBeSent;
+
+    public:
+        static constexpr IActor::EActivityType ActorActivityType() {
+            return IActor::ACTOR_SYSTEM_SCHEDULER_ACTOR;
+        }
+
+        TSchedulerActor(const TSchedulerConfig& cfg)
+            : TActor(&TSchedulerActor::StateFunc)
+            , Cfg(cfg)
+            , TimerDescriptor(new TTimerDescriptor())
+            , PollerActor(MakePollerActorId())
+        {
+            Y_ASSERT(Cfg.ResolutionMicroseconds != 0);
+            Y_ASSERT(Cfg.ProgressThreshold != 0);
+            Become(&TSchedulerActor::StateFunc);
+        }
+
+        void Handle(TEvSchedulerInitialize::TPtr& ev, const TActorContext& ctx) {
+            const TEvSchedulerInitialize& evInitialize = *ev->Get();
+            Y_ASSERT(evInitialize.ScheduleReaders.size() != 0);
+            Readers.resize(evInitialize.ScheduleReaders.size());
+            Copy(evInitialize.ScheduleReaders.begin(), evInitialize.ScheduleReaders.end(), Readers.begin());
+
+            Y_ASSERT(evInitialize.CurrentTimestamp != nullptr);
+            CurrentTimestamp = evInitialize.CurrentTimestamp;
+
+            Y_ASSERT(evInitialize.CurrentMonotonic != nullptr);
+            CurrentMonotonic = evInitialize.CurrentMonotonic;
+
+            struct itimerspec new_time;
+            memset(&new_time, 0, sizeof(new_time));
+            new_time.it_value.tv_nsec = Cfg.ResolutionMicroseconds * 1000;
+            new_time.it_interval.tv_nsec = Cfg.ResolutionMicroseconds * 1000;
+            int ret = timerfd_settime(TimerDescriptor->GetDescriptor(), 0, &new_time, NULL);
+            Y_VERIFY(ret != -1, "timerfd_settime() failed with %s", strerror(errno));
+            const bool success = ctx.Send(PollerActor, new TEvPollerRegister(TimerDescriptor, SelfId(), {}));
+            Y_VERIFY(success);
+
+            RealTime = RelaxedLoad(CurrentTimestamp);
+            MonotonicTime = RelaxedLoad(CurrentMonotonic);
+
+            ActiveTick = AlignUp<ui64>(MonotonicTime, IntrasecondThreshold);
+        }
+
+        void Handle(TEvPollerRegisterResult::TPtr ev, const TActorContext& ctx) {
+            PollerToken = ev->Get()->PollerToken;
+            HandleSchedule(ctx);
+        }
+
+        void UpdateTime() {
+            RealTime = TInstant::Now().MicroSeconds();
+            MonotonicTime = Max(MonotonicTime, GetMonotonicMicroSeconds());
+            AtomicStore(CurrentTimestamp, RealTime);
+            AtomicStore(CurrentMonotonic, MonotonicTime);
+        }
+
+        void TryUpdateTime(NHPTimer::STime* lastTimeUpdate) {
+            NHPTimer::STime hpnow;
+            GetTimeFast(&hpnow);
+            const ui64 elapsedCycles = hpnow > *lastTimeUpdate ? hpnow - *lastTimeUpdate : 0;
+            if (elapsedCycles > Cfg.ResolutionMicroseconds * (NHPTimer::GetCyclesPerSecond() / IntrasecondThreshold)) {
+                UpdateTime();
+                GetTimeFast(lastTimeUpdate);
+            }
+        }
+
+        void HandleSchedule(const TActorContext& ctx) {
+            for (;;) {
+                NHPTimer::STime schedulingStart;
+                GetTimeFast(&schedulingStart);
+                NHPTimer::STime lastTimeUpdate = schedulingStart;
+
+                ui64 expired;
+                ssize_t bytesRead;
+                bytesRead = read(TimerDescriptor->GetDescriptor(), &expired, sizeof(expired));
+                if (bytesRead == -1) {
+                    if (errno == EAGAIN) {
+                        PollerToken->Request(true, false);
+                        break;
+                    } else if (errno == EINTR) {
+                        continue;
+                    }
+                }
+                Y_VERIFY(bytesRead == sizeof(expired), "Error while reading from timerfd, strerror# %s", strerror(errno));
+                UpdateTime();
+
+                ui32 eventsGottenFromQueues = 0;
+                // collect everything from queues
+                for (ui32 i = 0; i != Readers.size(); ++i) {
+                    while (NSchedulerQueue::TEntry* x = Readers[i]->Pop()) {
+                        const ui64 instant = AlignUp<ui64>(x->InstantMicroseconds, Cfg.ResolutionMicroseconds);
+                        IEventHandle* const ev = x->Ev;
+                        ISchedulerCookie* const cookie = x->Cookie;
+
+                        // check is cookie still valid? looks like it will hurt performance w/o sagnificant memory save
+
+                        if (instant <= ActiveTick) {
+                            if (!ActiveSec)
+                                ActiveSec.Reset(new TMomentMap());
+                            TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*ActiveSec)[instant];
+                            if (!queue)
+                                queue.Reset(new NSchedulerQueue::TQueueType());
+                            queue->Writer.Push(instant, ev, cookie);
+                        } else {
+                            const ui64 intrasecond = AlignUp<ui64>(instant, IntrasecondThreshold);
+                            TAutoPtr<TMomentMap>& msec = ScheduleMap[intrasecond];
+                            if (!msec)
+                                msec.Reset(new TMomentMap());
+                            TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*msec)[instant];
+                            if (!queue)
+                                queue.Reset(new NSchedulerQueue::TQueueType());
+                            queue->Writer.Push(instant, ev, cookie);
+                        }
+                        ++eventsGottenFromQueues;
+                        TryUpdateTime(&lastTimeUpdate);
+                    }
+                }
+
+                ui64 eventSchedulingErrorUs = 0;
+                // send everything triggered on schedule
+                for (;;) {
+                    while (!!ActiveSec && !ActiveSec->empty()) {
+                        TMomentMap::iterator it = ActiveSec->begin();
+                        if (it->first <= MonotonicTime) {
+                            if (NSchedulerQueue::TQueueType* q = it->second.Get()) {
+                                while (NSchedulerQueue::TEntry* x = q->Reader.Pop()) {
+                                    Y_VERIFY_DEBUG(x->InstantMicroseconds <= ActiveTick);
+                                    if (eventSchedulingErrorUs == 0 && MonotonicTime > x->InstantMicroseconds) {
+                                        eventSchedulingErrorUs = MonotonicTime - x->InstantMicroseconds;
+                                    }
+                                    IEventHandle* ev = x->Ev;
+                                    ISchedulerCookie* cookie = x->Cookie;
+                                    if (cookie) {
+                                        if (cookie->Detach()) {
+                                            EventsToBeSent.push_back(ev);
+                                        } else {
+                                            delete ev;
+                                        }
+                                    } else {
+                                        EventsToBeSent.push_back(ev);
+                                    }
+                                    TryUpdateTime(&lastTimeUpdate);
+                                }
+                            }
+                            ActiveSec->erase(it);
+                        } else {
+                            break;
+                        }
+                    }
+
+                    if (ActiveTick <= MonotonicTime) {
+                        Y_VERIFY_DEBUG(!ActiveSec || ActiveSec->empty());
+                        ActiveSec.Destroy();
+                        ActiveTick += IntrasecondThreshold;
+                        TScheduleMap::iterator it = ScheduleMap.find(ActiveTick);
+                        if (it != ScheduleMap.end()) {
+                            ActiveSec = it->second;
+                            ScheduleMap.erase(it);
+                        }
+                        continue;
+                    }
+
+                    // ok, if we are here - then nothing is ready, so send step complete
+                    break;
+                }
+
+                // Send all from buffer queue
+                const ui64 eventsToBeSentSize = EventsToBeSent.size();
+                ui32 sentCount = 0;
+                if (eventsToBeSentSize > Cfg.RelaxedSendThresholdEventsPerCycle) {
+                    sentCount = Cfg.RelaxedSendPaceEventsPerCycle +
+                        (eventsToBeSentSize - Cfg.RelaxedSendThresholdEventsPerCycle) / 2;
+                } else {
+                    sentCount = Min(eventsToBeSentSize, Cfg.RelaxedSendPaceEventsPerCycle);
+                }
+                for (ui32 i = 0; i < sentCount; ++i) {
+                    ctx.Send(EventsToBeSent.front());
+                    EventsToBeSent.pop_front();
+                }
+
+                NHPTimer::STime hpnow;
+                GetTimeFast(&hpnow);
+                const ui64 processingTime = hpnow > schedulingStart ? hpnow - schedulingStart : 0;
+                const ui64 elapsedTimeMicroseconds = processingTime / (NHPTimer::GetCyclesPerSecond() / IntrasecondThreshold);
+                LWPROBE(ActorsystemScheduler, elapsedTimeMicroseconds, expired, eventsGottenFromQueues, sentCount,
+                        eventsToBeSentSize, eventSchedulingErrorUs);
+                TryUpdateTime(&lastTimeUpdate);
+            }
+        }
+
+        STRICT_STFUNC(StateFunc,
+            HFunc(TEvSchedulerInitialize, Handle)
+            CFunc(TEvPollerReady::EventType, HandleSchedule)
+            CFunc(TEvents::TSystem::PoisonPill, Die)
+            HFunc(TEvPollerRegisterResult, Handle)
+        )
+    };
+
+    IActor* CreateSchedulerActor(const TSchedulerConfig& cfg) {
+        if (cfg.UseSchedulerActor) {
+            return new TSchedulerActor(cfg);
+        } else {
+            return nullptr;
+        }
+    }
+
+}
+
+#else // linux
+
+namespace NActors {
+    IActor* CreateSchedulerActor(const TSchedulerConfig& cfg) {
+        Y_UNUSED(cfg);
+        return nullptr;
+    }
+
+}
+
+#endif // linux
diff --git a/library/cpp/actors/core/scheduler_actor.h b/library/cpp/actors/core/scheduler_actor.h
new file mode 100644
index 0000000000..c2c561b43d
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_actor.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "actor.h"
+#include "event_local.h"
+#include "events.h"
+#include "scheduler_basic.h"
+
+namespace NActors {
+    struct TEvSchedulerInitialize : TEventLocal<TEvSchedulerInitialize, TEvents::TSystem::Bootstrap> {
+        TVector<NSchedulerQueue::TReader*> ScheduleReaders;
+        volatile ui64* CurrentTimestamp;
+        volatile ui64* CurrentMonotonic;
+
+        TEvSchedulerInitialize(const TVector<NSchedulerQueue::TReader*>& scheduleReaders, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic)
+            : ScheduleReaders(scheduleReaders)
+            , CurrentTimestamp(currentTimestamp)
+            , CurrentMonotonic(currentMonotonic)
+        {
+        }
+    };
+
+    IActor* CreateSchedulerActor(const TSchedulerConfig& cfg);
+
+    inline TActorId MakeSchedulerActorId() {
+        char x[12] = {'s', 'c', 'h', 'e', 'd', 'u', 'l', 'e', 'r', 's', 'e', 'r'};
+        return TActorId(0, TStringBuf(x, 12));
+    }
+
+}
diff --git a/library/cpp/actors/core/scheduler_actor_ut.cpp b/library/cpp/actors/core/scheduler_actor_ut.cpp
new file mode 100644
index 0000000000..09b7369d36
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_actor_ut.cpp
@@ -0,0 +1,100 @@
+#include "actor_coroutine.h"
+#include "actorsystem.h"
+#include "executor_pool_basic.h"
+#include "scheduler_actor.h"
+#include "scheduler_basic.h"
+#include "events.h"
+#include "event_local.h"
+#include "hfunc.h"
+#include <library/cpp/actors/interconnect/poller_actor.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/system/sanitizers.h>
+
+using namespace NActors;
+
+Y_UNIT_TEST_SUITE(SchedulerActor) {
+    class TTestActor: public TActorBootstrapped<TTestActor> {
+        TManualEvent& DoneEvent;
+        TAtomic& EventsProcessed;
+        TInstant LastWakeup;
+        const TAtomicBase EventsTotalCount;
+        const TDuration ScheduleDelta;
+
+    public:
+        TTestActor(TManualEvent& doneEvent, TAtomic& eventsProcessed, TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs)
+            : DoneEvent(doneEvent)
+            , EventsProcessed(eventsProcessed)
+            , EventsTotalCount(eventsTotalCount)
+            , ScheduleDelta(TDuration::MilliSeconds(scheduleDeltaMs))
+        {
+        }
+
+        void Bootstrap(const TActorContext& ctx) {
+            LastWakeup = ctx.Now();
+            Become(&TThis::StateFunc);
+            ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup());
+        }
+
+        void Handle(TEvents::TEvWakeup::TPtr& /*ev*/, const TActorContext& ctx) {
+            const TInstant now = ctx.Now();
+            UNIT_ASSERT(now - LastWakeup >= ScheduleDelta);
+            LastWakeup = now;
+
+            if (AtomicIncrement(EventsProcessed) == EventsTotalCount) {
+                DoneEvent.Signal();
+            } else {
+                ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup());
+            }
+        }
+
+        STRICT_STFUNC(StateFunc, {HFunc(TEvents::TEvWakeup, Handle)})
+    };
+
+    void Test(TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs) {
+        THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>();
+        setup->NodeId = 0;
+        setup->ExecutorsCount = 1;
+        setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]);
+        for (ui32 i = 0; i < setup->ExecutorsCount; ++i) {
+            setup->Executors[i] = new TBasicExecutorPool(i, 5, 10, "basic");
+        }
+        // create poller actor (whether platform supports it)
+        TActorId pollerActorId;
+        if (IActor* poller = CreatePollerActor()) {
+            pollerActorId = MakePollerActorId();
+            setup->LocalServices.emplace_back(pollerActorId, TActorSetupCmd(poller, TMailboxType::ReadAsFilled, 0));
+        }
+        TActorId schedulerActorId;
+        if (IActor* schedulerActor = CreateSchedulerActor(TSchedulerConfig())) {
+            schedulerActorId = MakeSchedulerActorId();
+            setup->LocalServices.emplace_back(schedulerActorId, TActorSetupCmd(schedulerActor, TMailboxType::ReadAsFilled, 0));
+        }
+        setup->Scheduler = CreateSchedulerThread(TSchedulerConfig());
+
+        TActorSystem actorSystem(setup);
+
+        actorSystem.Start();
+
+        TManualEvent doneEvent;
+        TAtomic eventsProcessed = 0;
+        actorSystem.Register(new TTestActor(doneEvent, eventsProcessed, eventsTotalCount, scheduleDeltaMs));
+        doneEvent.WaitI();
+
+        UNIT_ASSERT(AtomicGet(eventsProcessed) == eventsTotalCount);
+
+        actorSystem.Stop();
+    }
+
+    Y_UNIT_TEST(LongEvents) {
+        Test(10, 500);
+    }
+
+    Y_UNIT_TEST(MediumEvents) {
+        Test(100, 50);
+    }
+
+    Y_UNIT_TEST(QuickEvents) {
+        Test(1000, 5);
+    }
+}
diff --git a/library/cpp/actors/core/scheduler_basic.cpp b/library/cpp/actors/core/scheduler_basic.cpp
new file mode 100644
index 0000000000..fba200e16b
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_basic.cpp
@@ -0,0 +1,274 @@
+#include "scheduler_basic.h"
+#include "scheduler_queue.h"
+
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/thread.h>
+
+#ifdef BALLOC
+#include <library/cpp/balloc/optional/operators.h>
+#endif
+
+namespace NActors {
+
+    struct TBasicSchedulerThread::TMonCounters {
+        NMonitoring::TDynamicCounters::TCounterPtr TimeDelayMs;
+        NMonitoring::TDynamicCounters::TCounterPtr QueueSize;
+        NMonitoring::TDynamicCounters::TCounterPtr EventsSent;
+        NMonitoring::TDynamicCounters::TCounterPtr EventsDropped;
+        NMonitoring::TDynamicCounters::TCounterPtr EventsAdded;
+        NMonitoring::TDynamicCounters::TCounterPtr Iterations;
+        NMonitoring::TDynamicCounters::TCounterPtr Sleeps;
+        NMonitoring::TDynamicCounters::TCounterPtr ElapsedMicrosec;
+
+        TMonCounters(const NMonitoring::TDynamicCounterPtr& counters)
+            : TimeDelayMs(counters->GetCounter("Scheduler/TimeDelayMs", false))
+            , QueueSize(counters->GetCounter("Scheduler/QueueSize", false))
+            , EventsSent(counters->GetCounter("Scheduler/EventsSent", true))
+            , EventsDropped(counters->GetCounter("Scheduler/EventsDropped", true))
+            , EventsAdded(counters->GetCounter("Scheduler/EventsAdded", true))
+            , Iterations(counters->GetCounter("Scheduler/Iterations", true))
+            , Sleeps(counters->GetCounter("Scheduler/Sleeps", true))
+            , ElapsedMicrosec(counters->GetCounter("Scheduler/ElapsedMicrosec", true))
+        { }
+    };
+
+    TBasicSchedulerThread::TBasicSchedulerThread(const TSchedulerConfig& config)
+        : Config(config)
+        , MonCounters(Config.MonCounters ? new TMonCounters(Config.MonCounters) : nullptr)
+        , ActorSystem(nullptr)
+        , CurrentTimestamp(nullptr)
+        , CurrentMonotonic(nullptr)
+        , TotalReaders(0)
+        , StopFlag(false)
+        , ScheduleMap(3600)
+    {
+        Y_VERIFY(!Config.UseSchedulerActor, "Cannot create scheduler thread because Config.UseSchedulerActor# true");
+    }
+
+    TBasicSchedulerThread::~TBasicSchedulerThread() {
+        Y_VERIFY(!MainCycle);
+    }
+
+    void TBasicSchedulerThread::CycleFunc() {
+#ifdef BALLOC
+        ThreadDisableBalloc();
+#endif
+        ::SetCurrentThreadName("Scheduler");
+
+        ui64 currentMonotonic = RelaxedLoad(CurrentMonotonic);
+        ui64 throttledMonotonic = currentMonotonic;
+
+        ui64 activeTick = AlignUp<ui64>(throttledMonotonic, IntrasecondThreshold);
+        TAutoPtr<TMomentMap> activeSec;
+
+        NHPTimer::STime hpprev = GetCycleCountFast();
+        ui64 nextTimestamp = TInstant::Now().MicroSeconds();
+        ui64 nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds());
+
+        while (!AtomicLoad(&StopFlag)) {
+            {
+                const ui64 delta = nextMonotonic - throttledMonotonic;
+                const ui64 elapsedDelta = nextMonotonic - currentMonotonic;
+                const ui64 threshold = Max(Min(Config.ProgressThreshold, 2 * elapsedDelta), ui64(1));
+
+                throttledMonotonic = (delta > threshold) ? throttledMonotonic + threshold : nextMonotonic;
+
+                if (MonCounters) {
+                    *MonCounters->TimeDelayMs = (nextMonotonic - throttledMonotonic) / 1000;
+                }
+            }
+            AtomicStore(CurrentTimestamp, nextTimestamp);
+            AtomicStore(CurrentMonotonic, nextMonotonic);
+            currentMonotonic = nextMonotonic;
+
+            if (MonCounters) {
+                ++*MonCounters->Iterations;
+            }
+
+            bool somethingDone = false;
+
+            // first step - send everything triggered on schedule
+            ui64 eventsSent = 0;
+            ui64 eventsDropped = 0;
+            for (;;) {
+                while (!!activeSec && !activeSec->empty()) {
+                    TMomentMap::iterator it = activeSec->begin();
+                    if (it->first <= throttledMonotonic) {
+                        if (NSchedulerQueue::TQueueType* q = it->second.Get()) {
+                            while (NSchedulerQueue::TEntry* x = q->Reader.Pop()) {
+                                somethingDone = true;
+                                Y_VERIFY_DEBUG(x->InstantMicroseconds <= activeTick);
+                                IEventHandle* ev = x->Ev;
+                                ISchedulerCookie* cookie = x->Cookie;
+                                // TODO: lazy send with backoff queue to not hang over contended mailboxes
+                                if (cookie) {
+                                    if (cookie->Detach()) {
+                                        ActorSystem->Send(ev);
+                                        ++eventsSent;
+                                    } else {
+                                        delete ev;
+                                        ++eventsDropped;
+                                    }
+                                } else {
+                                    ActorSystem->Send(ev);
+                                    ++eventsSent;
+                                }
+                            }
+                        }
+                        activeSec->erase(it);
+                    } else
+                        break;
+                }
+
+                if (activeTick <= throttledMonotonic) {
+                    Y_VERIFY_DEBUG(!activeSec || activeSec->empty());
+                    activeSec.Destroy();
+                    activeTick += IntrasecondThreshold;
+                    TScheduleMap::iterator it = ScheduleMap.find(activeTick);
+                    if (it != ScheduleMap.end()) {
+                        activeSec = it->second;
+                        ScheduleMap.erase(it);
+                    }
+                    continue;
+                }
+
+                // ok, if we are here - then nothing is ready, so send step complete
+                break;
+            }
+
+            // second step - collect everything from queues
+
+            ui64 eventsAdded = 0;
+            for (ui32 i = 0; i != TotalReaders; ++i) {
+                while (NSchedulerQueue::TEntry* x = Readers[i]->Pop()) {
+                    somethingDone = true;
+                    const ui64 instant = AlignUp<ui64>(x->InstantMicroseconds, Config.ResolutionMicroseconds);
+                    IEventHandle* const ev = x->Ev;
+                    ISchedulerCookie* const cookie = x->Cookie;
+
+                    // check is cookie still valid? looks like it will hurt performance w/o sagnificant memory save
+
+                    if (instant <= activeTick) {
+                        if (!activeSec)
+                            activeSec.Reset(new TMomentMap());
+                        TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*activeSec)[instant];
+                        if (!queue)
+                            queue.Reset(new NSchedulerQueue::TQueueType());
+                        queue->Writer.Push(instant, ev, cookie);
+                    } else {
+                        const ui64 intrasecond = AlignUp<ui64>(instant, IntrasecondThreshold);
+                        TAutoPtr<TMomentMap>& msec = ScheduleMap[intrasecond];
+                        if (!msec)
+                            msec.Reset(new TMomentMap());
+                        TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*msec)[instant];
+                        if (!queue)
+                            queue.Reset(new NSchedulerQueue::TQueueType());
+                        queue->Writer.Push(instant, ev, cookie);
+                    }
+
+                    ++eventsAdded;
+                }
+            }
+
+            NHPTimer::STime hpnow = GetCycleCountFast();
+
+            if (MonCounters) {
+                *MonCounters->QueueSize -= eventsSent + eventsDropped;
+                *MonCounters->QueueSize += eventsAdded;
+                *MonCounters->EventsSent += eventsSent;
+                *MonCounters->EventsDropped += eventsDropped;
+                *MonCounters->EventsAdded += eventsAdded;
+                *MonCounters->ElapsedMicrosec += NHPTimer::GetSeconds(hpnow - hpprev) * 1000000;
+            }
+
+            hpprev = hpnow;
+            nextTimestamp = TInstant::Now().MicroSeconds();
+            nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds());
+
+            // ok complete, if nothing left - sleep
+            if (!somethingDone) {
+                const ui64 nextInstant = AlignDown<ui64>(throttledMonotonic + Config.ResolutionMicroseconds, Config.ResolutionMicroseconds);
+                if (nextMonotonic >= nextInstant) // already in next time-slice
+                    continue;
+
+                const ui64 delta = nextInstant - nextMonotonic;
+                if (delta < Config.SpinThreshold) // not so much time left, just spin
+                    continue;
+
+                if (MonCounters) {
+                    ++*MonCounters->Sleeps;
+                }
+
+                NanoSleep(delta * 1000); // ok, looks like we should sleep a bit.
+
+                // Don't count sleep in elapsed microseconds
+                hpprev = GetCycleCountFast();
+                nextTimestamp = TInstant::Now().MicroSeconds();
+                nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds());
+            }
+        }
+        // ok, die!
+    }
+
+    void TBasicSchedulerThread::Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) {
+        ActorSystem = actorSystem;
+        CurrentTimestamp = currentTimestamp;
+        CurrentMonotonic = currentMonotonic;
+        *CurrentTimestamp = TInstant::Now().MicroSeconds();
+        *CurrentMonotonic = GetMonotonicMicroSeconds();
+    }
+
+    void TBasicSchedulerThread::PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) {
+        Y_VERIFY(scheduleReadersCount > 0);
+        TotalReaders = scheduleReadersCount;
+        Readers.Reset(new NSchedulerQueue::TReader*[scheduleReadersCount]);
+        Copy(readers, readers + scheduleReadersCount, Readers.Get());
+    }
+
+    void TBasicSchedulerThread::PrepareStart() {
+        // Called after actor system is initialized, but before executor threads
+        // are started, giving us a chance to update current timestamp with a
+        // more recent value, taking initialization time into account. This is
+        // safe to do, since scheduler thread is not started yet, so no other
+        // threads are updating time concurrently.
+        AtomicStore(CurrentTimestamp, TInstant::Now().MicroSeconds());
+        AtomicStore(CurrentMonotonic, Max(RelaxedLoad(CurrentMonotonic), GetMonotonicMicroSeconds()));
+    }
+
+    void TBasicSchedulerThread::Start() {
+        MainCycle.Reset(new NThreading::TLegacyFuture<void, false>(std::bind(&TBasicSchedulerThread::CycleFunc, this)));
+    }
+
+    void TBasicSchedulerThread::PrepareStop() {
+        AtomicStore(&StopFlag, true);
+    }
+
+    void TBasicSchedulerThread::Stop() {
+        MainCycle->Get();
+        MainCycle.Destroy();
+    }
+
+}
+
+#ifdef __linux__
+
+namespace NActors {
+    ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) {
+        if (config.UseSchedulerActor) {
+            return new TMockSchedulerThread();
+        } else {
+            return new TBasicSchedulerThread(config);
+        }
+    }
+
+}
+
+#else //  __linux__
+
+namespace NActors {
+    ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) {
+        return new TBasicSchedulerThread(config);
+    }
+}
+
+#endif // __linux__
diff --git a/library/cpp/actors/core/scheduler_basic.h b/library/cpp/actors/core/scheduler_basic.h
new file mode 100644
index 0000000000..2ccde39235
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_basic.h
@@ -0,0 +1,81 @@
+#pragma once
+
+#include "actorsystem.h"
+#include "monotonic.h"
+#include "scheduler_queue.h"
+#include <library/cpp/actors/util/queue_chunk.h>
+#include <library/cpp/threading/future/legacy_future.h>
+#include <util/generic/hash.h>
+#include <util/generic/map.h>
+
+namespace NActors {
+
+    class TBasicSchedulerThread: public ISchedulerThread {
+        // TODO: replace with NUMA-local threads and per-thread schedules
+        const TSchedulerConfig Config;
+
+        struct TMonCounters;
+        const THolder<TMonCounters> MonCounters;
+
+        TActorSystem* ActorSystem;
+        volatile ui64* CurrentTimestamp;
+        volatile ui64* CurrentMonotonic;
+
+        ui32 TotalReaders;
+        TArrayHolder<NSchedulerQueue::TReader*> Readers;
+
+        volatile bool StopFlag;
+
+        typedef TMap<ui64, TAutoPtr<NSchedulerQueue::TQueueType>> TMomentMap; // intrasecond queues
+        typedef THashMap<ui64, TAutoPtr<TMomentMap>> TScheduleMap;            // over-second schedule
+
+        TScheduleMap ScheduleMap;
+
+        THolder<NThreading::TLegacyFuture<void, false>> MainCycle;
+
+        static const ui64 IntrasecondThreshold = 1048576; // ~second
+
+        void CycleFunc();
+
+    public:
+        TBasicSchedulerThread(const TSchedulerConfig& config = TSchedulerConfig());
+        ~TBasicSchedulerThread();
+
+        void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) override;
+        void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) override;
+
+        void PrepareStart() override;
+        void Start() override;
+        void PrepareStop() override;
+        void Stop() override;
+    };
+
+    class TMockSchedulerThread: public ISchedulerThread {
+    public:
+        virtual ~TMockSchedulerThread() override {
+        }
+
+        void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) override {
+            Y_UNUSED(actorSystem);
+            *currentTimestamp = TInstant::Now().MicroSeconds();
+            *currentMonotonic = GetMonotonicMicroSeconds();
+        }
+
+        void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) override {
+            Y_UNUSED(readers);
+            Y_UNUSED(scheduleReadersCount);
+        }
+
+        void Start() override {
+        }
+
+        void PrepareStop() override {
+        }
+
+        void Stop() override {
+        }
+    };
+
+    ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& cfg);
+
+}
diff --git a/library/cpp/actors/core/scheduler_cookie.cpp b/library/cpp/actors/core/scheduler_cookie.cpp
new file mode 100644
index 0000000000..0fa6f543a7
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_cookie.cpp
@@ -0,0 +1,84 @@
+#include "scheduler_cookie.h"
+
+namespace NActors {
+    class TSchedulerCookie2Way: public ISchedulerCookie {
+        TAtomic Value;
+
+    public:
+        TSchedulerCookie2Way()
+            : Value(2)
+        {
+        }
+
+        bool IsArmed() noexcept override {
+            return (AtomicGet(Value) == 2);
+        }
+
+        bool Detach() noexcept override {
+            const ui64 x = AtomicDecrement(Value);
+            if (x == 1)
+                return true;
+
+            if (x == 0) {
+                delete this;
+                return false;
+            }
+
+            Y_FAIL();
+        }
+
+        bool DetachEvent() noexcept override {
+            Y_FAIL();
+        }
+    };
+
+    ISchedulerCookie* ISchedulerCookie::Make2Way() {
+        return new TSchedulerCookie2Way();
+    }
+
+    class TSchedulerCookie3Way: public ISchedulerCookie {
+        TAtomic Value;
+
+    public:
+        TSchedulerCookie3Way()
+            : Value(3)
+        {
+        }
+
+        bool IsArmed() noexcept override {
+            return (AtomicGet(Value) == 3);
+        }
+
+        bool Detach() noexcept override {
+            const ui64 x = AtomicDecrement(Value);
+            if (x == 2)
+                return true;
+            if (x == 1)
+                return false;
+            if (x == 0) {
+                delete this;
+                return false;
+            }
+
+            Y_FAIL();
+        }
+
+        bool DetachEvent() noexcept override {
+            const ui64 x = AtomicDecrement(Value);
+            if (x == 2)
+                return false;
+            if (x == 1)
+                return true;
+            if (x == 0) {
+                delete this;
+                return false;
+            }
+
+            Y_FAIL();
+        }
+    };
+
+    ISchedulerCookie* ISchedulerCookie::Make3Way() {
+        return new TSchedulerCookie3Way();
+    }
+}
diff --git a/library/cpp/actors/core/scheduler_cookie.h b/library/cpp/actors/core/scheduler_cookie.h
new file mode 100644
index 0000000000..2c20ca67f3
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_cookie.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include "defs.h"
+#include <util/generic/noncopyable.h>
+
+namespace NActors {
+    class ISchedulerCookie : TNonCopyable {
+    protected:
+        virtual ~ISchedulerCookie() {
+        }
+
+    public:
+        virtual bool Detach() noexcept = 0;
+        virtual bool DetachEvent() noexcept = 0;
+        virtual bool IsArmed() noexcept = 0;
+
+        static ISchedulerCookie* Make2Way();
+        static ISchedulerCookie* Make3Way();
+    };
+
+    class TSchedulerCookieHolder : TNonCopyable {
+        ISchedulerCookie* Cookie;
+
+    public:
+        TSchedulerCookieHolder()
+            : Cookie(nullptr)
+        {
+        }
+
+        TSchedulerCookieHolder(ISchedulerCookie* x)
+            : Cookie(x)
+        {
+        }
+
+        ~TSchedulerCookieHolder() {
+            Detach();
+        }
+
+        bool operator==(const TSchedulerCookieHolder& x) const noexcept {
+            return (Cookie == x.Cookie);
+        }
+
+        ISchedulerCookie* Get() const {
+            return Cookie;
+        }
+
+        ISchedulerCookie* Release() {
+            ISchedulerCookie* result = Cookie;
+            Cookie = nullptr;
+            return result;
+        }
+
+        void Reset(ISchedulerCookie* cookie) {
+            Detach();
+            Cookie = cookie;
+        }
+
+        bool Detach() noexcept {
+            if (Cookie) {
+                const bool res = Cookie->Detach();
+                Cookie = nullptr;
+                return res;
+            } else {
+                return false;
+            }
+        }
+
+        bool DetachEvent() noexcept {
+            if (Cookie) {
+                const bool res = Cookie->DetachEvent();
+                Cookie = nullptr;
+                return res;
+            } else {
+                return false;
+            }
+        }
+    };
+}
diff --git a/library/cpp/actors/core/scheduler_queue.h b/library/cpp/actors/core/scheduler_queue.h
new file mode 100644
index 0000000000..3b8fac28f0
--- /dev/null
+++ b/library/cpp/actors/core/scheduler_queue.h
@@ -0,0 +1,120 @@
+#pragma once
+
+#include <library/cpp/actors/util/queue_chunk.h>
+
+namespace NActors {
+    class IEventHandle;
+    class ISchedulerCookie;
+
+    namespace NSchedulerQueue {
+        struct TEntry {
+            ui64 InstantMicroseconds;
+            IEventHandle* Ev;
+            ISchedulerCookie* Cookie;
+        };
+
+        struct TChunk : TQueueChunkDerived<TEntry, 512, TChunk> {};
+
+        class TReader;
+        class TWriter;
+        class TWriterWithPadding;
+
+        class TReader : ::TNonCopyable {
+            TChunk* ReadFrom;
+            ui32 ReadPosition;
+
+            friend class TWriter;
+
+        public:
+            TReader()
+                : ReadFrom(new TChunk())
+                , ReadPosition(0)
+            {
+            }
+
+            ~TReader() {
+                while (TEntry* x = Pop()) {
+                    if (x->Cookie)
+                        x->Cookie->Detach();
+                    delete x->Ev;
+                }
+                delete ReadFrom;
+            }
+
+            TEntry* Pop() {
+                TChunk* head = ReadFrom;
+                if (ReadPosition != TChunk::EntriesCount) {
+                    if (AtomicLoad(&head->Entries[ReadPosition].InstantMicroseconds) != 0)
+                        return const_cast<TEntry*>(&head->Entries[ReadPosition++]);
+                    else
+                        return nullptr;
+                } else if (TChunk* next = AtomicLoad(&head->Next)) {
+                    ReadFrom = next;
+                    delete head;
+                    ReadPosition = 0;
+                    return Pop();
+                }
+
+                return nullptr;
+            }
+        };
+
+        class TWriter : ::TNonCopyable {
+            TChunk* WriteTo;
+            ui32 WritePosition;
+
+        public:
+            TWriter()
+                : WriteTo(nullptr)
+                , WritePosition(0)
+            {
+            }
+
+            void Init(const TReader& reader) {
+                WriteTo = reader.ReadFrom;
+                WritePosition = 0;
+            }
+
+            void Push(ui64 instantMicrosends, IEventHandle* ev, ISchedulerCookie* cookie) {
+                if (Y_UNLIKELY(instantMicrosends == 0)) {
+                    // Protect against Pop() getting stuck forever
+                    instantMicrosends = 1;
+                }
+                if (WritePosition != TChunk::EntriesCount) {
+                    volatile TEntry& entry = WriteTo->Entries[WritePosition];
+                    entry.Cookie = cookie;
+                    entry.Ev = ev;
+                    AtomicStore(&entry.InstantMicroseconds, instantMicrosends);
+                    ++WritePosition;
+                } else {
+                    TChunk* next = new TChunk();
+                    volatile TEntry& entry = next->Entries[0];
+                    entry.Cookie = cookie;
+                    entry.Ev = ev;
+                    entry.InstantMicroseconds = instantMicrosends;
+                    AtomicStore(&WriteTo->Next, next);
+                    WriteTo = next;
+                    WritePosition = 1;
+                }
+            }
+        };
+
+        class TWriterWithPadding: public TWriter {
+        private:
+            ui8 CacheLinePadding[64 - sizeof(TWriter)];
+
+            void UnusedCacheLinePadding() {
+                Y_UNUSED(CacheLinePadding);
+            }
+        };
+
+        struct TQueueType {
+            TReader Reader;
+            TWriter Writer;
+
+            TQueueType() {
+                Writer.Init(Reader);
+            }
+        };
+    }
+}
diff --git a/library/cpp/actors/core/servicemap.h b/library/cpp/actors/core/servicemap.h
new file mode 100644
index 0000000000..d72e50cae5
--- /dev/null
+++ b/library/cpp/actors/core/servicemap.h
@@ -0,0 +1,168 @@
+#pragma once
+
+#include "defs.h"
+
+namespace NActors {
+    // wait-free one writer multi reader hash-tree for service mapping purposes
+    // on fast updates on same key - could lead to false-negatives, we don't care as such cases are broken from service-map app logic
+
+    template <typename TKey, typename TValue, typename THash, ui64 BaseSize = 256 * 1024, ui64 ExtCount = 4, ui64 ExtBranching = 4>
+    class TServiceMap : TNonCopyable {
+        struct TEntry : TNonCopyable {
+            ui32 CounterIn;
+            ui32 CounterOut;
+            TKey Key;
+            TValue Value;
+
+            TEntry()
+                : CounterIn(0)
+                , CounterOut(0)
+                , Key()
+                , Value()
+            {
+            }
+        };
+
+        struct TBranch : TNonCopyable {
+            TEntry Entries[ExtCount];
+            TBranch* Branches[ExtBranching];
+
+            TBranch() {
+                Fill(Branches, Branches + ExtBranching, (TBranch*)nullptr);
+            }
+        };
+
+        ui32 Counter;
+        TBranch* Line[BaseSize];
+
+        bool ScanBranch(TBranch* branch, const TKey& key, ui64 hash, TValue& ret) {
+            for (ui32 i = 0; i != ExtCount; ++i) {
+                const TEntry& entry = branch->Entries[i];
+                const ui32 counterIn = AtomicLoad(&entry.CounterIn);
+                if (counterIn != 0 && entry.Key == key) {
+                    ret = entry.Value;
+                    const ui32 counterOut = AtomicLoad(&entry.CounterOut);
+                    if (counterOut == counterIn)
+                        return true;
+                }
+            }
+
+            const ui64 hash0 = hash % ExtBranching;
+            if (TBranch* next = AtomicLoad(branch->Branches + hash0))
+                return ScanBranch(next, key, hash / ExtBranching, ret);
+
+            return false;
+        }
+
+        void ScanZeroOld(TBranch* branch, const TKey& key, ui64 hash, TEntry** zeroEntry, TEntry*& oldEntry) {
+            for (ui32 i = 0; i != ExtCount; ++i) {
+                TEntry& entry = branch->Entries[i];
+                if (entry.CounterIn == 0) {
+                    if (zeroEntry && !*zeroEntry) {
+                        *zeroEntry = &entry;
+                        if (oldEntry != nullptr)
+                            return;
+                    }
+                } else {
+                    if (entry.Key == key) {
+                        oldEntry = &entry;
+                        if (!zeroEntry || *zeroEntry)
+                            return;
+                    }
+                }
+            }
+
+            const ui64 hash0 = hash % ExtBranching;
+            if (TBranch* next = branch->Branches[hash0]) {
+                ScanZeroOld(next, key, hash / ExtBranching, zeroEntry, oldEntry);
+            } else { // found tail, if zeroEntry requested, but not yet found - insert one
+                if (zeroEntry && !*zeroEntry) {
+                    TBranch* next = new TBranch();
+                    *zeroEntry = next->Entries;
+                    AtomicStore(branch->Branches + hash0, next);
+                }
+            }
+        }
+
+    public:
+        TServiceMap()
+            : Counter(0)
+        {
+            Fill(Line, Line + BaseSize, (TBranch*)nullptr);
+        }
+
+        ~TServiceMap() {
+            for (ui64 i = 0; i < BaseSize; ++i) {
+                delete Line[i];
+            }
+        }
+
+        TValue Find(const TKey& key) {
+            THash hashOp;
+            const ui64 hash = hashOp(key);
+            const ui64 hash0 = hash % BaseSize;
+
+            if (TBranch* branch = AtomicLoad(Line + hash0)) {
+                TValue ret;
+                if (ScanBranch(branch, key, hash / BaseSize, ret))
+                    return ret;
+            }
+
+            return TValue();
+        }
+
+        // returns true on update, false on insert
+        TValue Update(const TKey& key, const TValue& value) {
+            THash hashOp;
+            const ui64 hash = hashOp(key);
+            const ui64 hash0 = hash % BaseSize;
+
+            TEntry* zeroEntry = nullptr;
+            TEntry* oldEntry = nullptr;
+
+            if (TBranch* branch = Line[hash0]) {
+                ScanZeroOld(branch, key, hash / BaseSize, &zeroEntry, oldEntry);
+            } else {
+                TBranch* next = new TBranch();
+                zeroEntry = next->Entries;
+                AtomicStore(Line + hash0, next);
+            }
+
+            // now we got both entries, first - push new one
+            const ui32 counter = AtomicUi32Increment(&Counter);
+            AtomicStore(&zeroEntry->CounterOut, counter);
+            zeroEntry->Key = key;
+            zeroEntry->Value = value;
+            AtomicStore(&zeroEntry->CounterIn, counter);
+
+            if (oldEntry != nullptr) {
+                const TValue ret = oldEntry->Value;
+                AtomicStore<ui32>(&oldEntry->CounterOut, 0);
+                AtomicStore<ui32>(&oldEntry->CounterIn, 0);
+                return ret;
+            } else {
+                return TValue();
+            }
+        }
+
+        bool Erase(const TKey& key) {
+            THash hashOp;
+            const ui64 hash = hashOp(key);
+            const ui64 hash0 = hash % BaseSize;
+
+            TEntry* oldEntry = 0;
+
+            if (TBranch* branch = Line[hash0]) {
+                ScanZeroOld(branch, key, hash / BaseSize, 0, oldEntry);
+            }
+
+            if (oldEntry != 0) {
+                AtomicStore<ui32>(&oldEntry->CounterOut, 0);
+                AtomicStore<ui32>(&oldEntry->CounterIn, 0);
+                return true;
+            } else {
+                return false;
+            }
+        }
+    };
+}
diff --git a/library/cpp/actors/core/ut/ya.make b/library/cpp/actors/core/ut/ya.make
new file mode 100644
index 0000000000..3ee28d5850
--- /dev/null
+++ b/library/cpp/actors/core/ut/ya.make
@@ -0,0 +1,46 @@
+UNITTEST_FOR(library/cpp/actors/core)
+
+OWNER(
+    alexvru
+    g:kikimr
+)
+
+FORK_SUBTESTS()
+IF (SANITIZER_TYPE)
+    SIZE(LARGE)
+    TIMEOUT(1200)
+    TAG(ya:fat)
+    SPLIT_FACTOR(20)
+    REQUIREMENTS(
+        ram:32
+    )
+ELSE()
+    SIZE(MEDIUM)
+    TIMEOUT(600)
+    REQUIREMENTS(
+        ram:16
+    )
+ENDIF()
+
+
+PEERDIR(
+    library/cpp/actors/interconnect
+    library/cpp/actors/testlib
+)
+
+SRCS(
+    actor_coroutine_ut.cpp
+    actor_ut.cpp
+    actorsystem_ut.cpp
+    ask_ut.cpp
+    balancer_ut.cpp
+    event_pb_payload_ut.cpp
+    event_pb_ut.cpp
+    executor_pool_basic_ut.cpp
+    executor_pool_united_ut.cpp
+    log_ut.cpp
+    memory_tracker_ut.cpp
+    scheduler_actor_ut.cpp
+)
+
+END()
diff --git a/library/cpp/actors/core/worker_context.cpp b/library/cpp/actors/core/worker_context.cpp
new file mode 100644
index 0000000000..ada6c997d4
--- /dev/null
+++ b/library/cpp/actors/core/worker_context.cpp
@@ -0,0 +1,7 @@
+#include "worker_context.h"
+#include "probes.h"
+
+namespace NActors {
+    LWTRACE_USING(ACTORLIB_PROVIDER);
+
+}
diff --git a/library/cpp/actors/core/worker_context.h b/library/cpp/actors/core/worker_context.h
new file mode 100644
index 0000000000..b4c37a7629
--- /dev/null
+++ b/library/cpp/actors/core/worker_context.h
@@ -0,0 +1,175 @@
+#pragma once
+
+#include "defs.h"
+
+#include "actorsystem.h"
+#include "event.h"
+#include "lease.h"
+#include "mailbox.h"
+#include "mon_stats.h"
+
+#include <library/cpp/actors/util/datetime.h>
+#include <library/cpp/actors/util/intrinsics.h>
+#include <library/cpp/actors/util/thread.h>
+
+#include <library/cpp/lwtrace/shuttle.h>
+
+namespace NActors {
+    struct TWorkerContext {
+        const TWorkerId WorkerId;
+        const TCpuId CpuId;
+        TLease Lease;
+        IExecutorPool* Executor = nullptr;
+        TMailboxTable* MailboxTable = nullptr;
+        ui64 TimePerMailboxTs = 0;
+        ui32 EventsPerMailbox = 0;
+        ui64 SoftDeadlineTs = ui64(-1);
+        TExecutorThreadStats* Stats = &WorkerStats; // pool stats
+        TExecutorThreadStats WorkerStats;
+        TPoolId PoolId = MaxPools;
+        mutable NLWTrace::TOrbit Orbit;
+
+        TWorkerContext(TWorkerId workerId, TCpuId cpuId, size_t activityVecSize)
+            : WorkerId(workerId)
+            , CpuId(cpuId)
+            , Lease(WorkerId, NeverExpire)
+            , WorkerStats(activityVecSize)
+        {}
+
+#ifdef ACTORSLIB_COLLECT_EXEC_STATS
+        void GetCurrentStats(TExecutorThreadStats& statsCopy) const {
+            statsCopy = TExecutorThreadStats();
+            statsCopy.Aggregate(*Stats);
+        }
+
+        void AddElapsedCycles(ui32 activityType, i64 elapsed) {
+            Y_VERIFY_DEBUG(activityType < Stats->MaxActivityType());
+            RelaxedStore(&Stats->ElapsedTicks, RelaxedLoad(&Stats->ElapsedTicks) + elapsed);
+            RelaxedStore(&Stats->ElapsedTicksByActivity[activityType], RelaxedLoad(&Stats->ElapsedTicksByActivity[activityType]) + elapsed);
+        }
+
+        void AddParkedCycles(i64 elapsed) {
+            RelaxedStore(&Stats->ParkedTicks, RelaxedLoad(&Stats->ParkedTicks) + elapsed);
+        }
+
+        void AddBlockedCycles(i64 elapsed) {
+            RelaxedStore(&Stats->BlockedTicks, RelaxedLoad(&Stats->BlockedTicks) + elapsed);
+        }
+
+        void IncrementSentEvents() {
+            RelaxedStore(&Stats->SentEvents, RelaxedLoad(&Stats->SentEvents) + 1);
+        }
+
+        void IncrementPreemptedEvents() {
+            RelaxedStore(&Stats->PreemptedEvents, RelaxedLoad(&Stats->PreemptedEvents) + 1);
+        }
+
+        void DecrementActorsAliveByActivity(ui32 activityType) {
+            if (activityType >= Stats->MaxActivityType()) {
+                activityType = 0;
+            }
+            RelaxedStore(&Stats->ActorsAliveByActivity[activityType], Stats->ActorsAliveByActivity[activityType] - 1);
+        }
+
+        inline void IncrementNonDeliveredEvents() {
+            RelaxedStore(&Stats->NonDeliveredEvents, RelaxedLoad(&Stats->NonDeliveredEvents) + 1);
+        }
+
+        inline void IncrementMailboxPushedOutBySoftPreemption() {
+            RelaxedStore(&Stats->MailboxPushedOutBySoftPreemption, RelaxedLoad(&Stats->MailboxPushedOutBySoftPreemption) + 1);
+        }
+
+        inline void IncrementMailboxPushedOutByTime() {
+            RelaxedStore(&Stats->MailboxPushedOutByTime, RelaxedLoad(&Stats->MailboxPushedOutByTime) + 1);
+        }
+
+        inline void IncrementMailboxPushedOutByEventCount() {
+            RelaxedStore(&Stats->MailboxPushedOutByEventCount, RelaxedLoad(&Stats->MailboxPushedOutByEventCount) + 1);
+        }
+
+        inline void IncrementEmptyMailboxActivation() {
+            RelaxedStore(&Stats->EmptyMailboxActivation, RelaxedLoad(&Stats->EmptyMailboxActivation) + 1);
+        }
+
+        double AddActivationStats(i64 scheduleTs, i64 deliveredTs) {
+            i64 ts = deliveredTs > scheduleTs ? deliveredTs - scheduleTs : 0;
+            double usec = NHPTimer::GetSeconds(ts) * 1000000.0;
+            Stats->ActivationTimeHistogram.Add(usec);
+            return usec;
+        }
+
+        ui64 AddEventDeliveryStats(i64 sentTs, i64 deliveredTs) {
+            ui64 usecDeliv = deliveredTs > sentTs ? NHPTimer::GetSeconds(deliveredTs - sentTs) * 1000000 : 0;
+            Stats->EventDeliveryTimeHistogram.Add(usecDeliv);
+            return usecDeliv;
+        }
+
+        i64 AddEventProcessingStats(i64 deliveredTs, i64 processedTs, ui32 activityType, ui64 scheduled) {
+            i64 elapsed = processedTs - deliveredTs;
+            ui64 usecElapsed = NHPTimer::GetSeconds(elapsed) * 1000000;
+            activityType = (activityType >= Stats->MaxActivityType()) ? 0 : activityType;
+            Stats->EventProcessingCountHistogram.Add(usecElapsed);
+            Stats->EventProcessingTimeHistogram.Add(usecElapsed, elapsed);
+            RelaxedStore(&Stats->ReceivedEvents, RelaxedLoad(&Stats->ReceivedEvents) + 1);
+            RelaxedStore(&Stats->ReceivedEventsByActivity[activityType], RelaxedLoad(&Stats->ReceivedEventsByActivity[activityType]) + 1);
+            RelaxedStore(&Stats->ScheduledEventsByActivity[activityType], RelaxedLoad(&Stats->ScheduledEventsByActivity[activityType]) + scheduled);
+            AddElapsedCycles(activityType, elapsed);
+            return elapsed;
+        }
+
+        void UpdateActorsStats(size_t dyingActorsCnt) {
+            if (dyingActorsCnt) {
+                AtomicAdd(Executor->DestroyedActors, dyingActorsCnt);
+            }
+            RelaxedStore(&Stats->PoolDestroyedActors, (ui64)RelaxedLoad(&Executor->DestroyedActors));
+            RelaxedStore(&Stats->PoolActorRegistrations, (ui64)RelaxedLoad(&Executor->ActorRegistrations));
+            RelaxedStore(&Stats->PoolAllocatedMailboxes, MailboxTable->GetAllocatedMailboxCount());
+        }
+
+        void UpdateThreadTime() {
+            RelaxedStore(&WorkerStats.CpuNs, ThreadCPUTime() * 1000);
+        }
+#else
+        void GetCurrentStats(TExecutorThreadStats&) const {}
+        inline void AddElapsedCycles(ui32, i64) {}
+        inline void AddParkedCycles(i64) {}
+        inline void AddBlockedCycles(i64) {}
+        inline void IncrementSentEvents() {}
+        inline void IncrementPreemptedEvents() {}
+        inline void IncrementMailboxPushedOutBySoftPreemption() {}
+        inline void IncrementMailboxPushedOutByTime() {}
+        inline void IncrementMailboxPushedOutByEventCount() {}
+        inline void IncrementEmptyMailboxActivation() {}
+        void DecrementActorsAliveByActivity(ui32) {}
+        void IncrementNonDeliveredEvents() {}
+        double AddActivationStats(i64, i64) { return 0; }
+        ui64 AddEventDeliveryStats(i64, i64) { return 0; }
+        i64 AddEventProcessingStats(i64, i64, ui32, ui64) { return 0; }
+        void UpdateActorsStats(size_t, IExecutorPool*) {}
+        void UpdateThreadTime() {}
+#endif
+
+        void Switch(IExecutorPool* executor,
+                    TMailboxTable* mailboxTable,
+                    ui64 timePerMailboxTs,
+                    ui32 eventsPerMailbox,
+                    ui64 softDeadlineTs,
+                    TExecutorThreadStats* stats)
+        {
+            Executor = executor;
+            MailboxTable = mailboxTable;
+            TimePerMailboxTs = timePerMailboxTs;
+            EventsPerMailbox = eventsPerMailbox;
+            SoftDeadlineTs = softDeadlineTs;
+            Stats = stats;
+            PoolId = Executor ? Executor->PoolId : MaxPools;
+        }
+
+        void SwitchToIdle() {
+            Executor = nullptr;
+            MailboxTable = nullptr;
+            //Stats = &WorkerStats; // TODO: in actorsystem 2.0 idle stats cannot be related to specific pool
+            PoolId = MaxPools;
+        }
+    };
+}
diff --git a/library/cpp/actors/core/ya.make b/library/cpp/actors/core/ya.make
new file mode 100644
index 0000000000..880a9d00db
--- /dev/null
+++ b/library/cpp/actors/core/ya.make
@@ -0,0 +1,123 @@
+LIBRARY()
+
+OWNER(
+    ddoarn
+    g:kikimr
+)
+
+NO_WSHADOW()
+
+IF (PROFILE_MEMORY_ALLOCATIONS)
+    CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS)
+ENDIF()
+
+IF (ALLOCATOR == "B" OR ALLOCATOR == "BS" OR ALLOCATOR == "C")
+    CXXFLAGS(-DBALLOC)
+    PEERDIR(
+        library/cpp/balloc/optional
+    )
+ENDIF()
+
+SRCS(
+    actor_bootstrapped.h
+    actor_coroutine.cpp
+    actor_coroutine.h
+    actor.cpp
+    actor.h
+    actorid.cpp
+    actorid.h
+    actorsystem.cpp
+    actorsystem.h
+    ask.cpp
+    ask.h
+    balancer.h
+    balancer.cpp
+    buffer.cpp
+    buffer.h
+    callstack.cpp
+    callstack.h
+    config.h
+    cpu_manager.cpp
+    cpu_manager.h
+    cpu_state.h
+    defs.h
+    event.cpp
+    event.h
+    event_load.h
+    event_local.h
+    event_pb.cpp
+    event_pb.h
+    events.h
+    events_undelivered.cpp
+    executelater.h
+    executor_pool_base.cpp
+    executor_pool_base.h
+    executor_pool_basic.cpp
+    executor_pool_basic.h
+    executor_pool_io.cpp
+    executor_pool_io.h
+    executor_pool_united.cpp
+    executor_pool_united.h
+    executor_thread.cpp
+    executor_thread.h
+    hfunc.h
+    interconnect.cpp
+    interconnect.h
+    invoke.h
+    io_dispatcher.cpp
+    io_dispatcher.h
+    lease.h
+    log.cpp
+    log.h
+    log_settings.cpp
+    log_settings.h
+    mailbox.cpp
+    mailbox.h
+    mailbox_queue_revolving.h
+    mailbox_queue_simple.h
+    memory_track.cpp
+    memory_track.h
+    memory_tracker.cpp
+    memory_tracker.h
+    mon.h
+    mon_stats.h
+    monotonic.cpp
+    monotonic.h
+    worker_context.cpp
+    worker_context.h
+    probes.cpp
+    probes.h
+    process_stats.cpp
+    process_stats.h
+    scheduler_actor.cpp
+    scheduler_actor.h
+    scheduler_basic.cpp
+    scheduler_basic.h
+    scheduler_cookie.cpp
+    scheduler_cookie.h
+    scheduler_queue.h
+    servicemap.h
+)
+
+GENERATE_ENUM_SERIALIZATION(defs.h)
+GENERATE_ENUM_SERIALIZATION(actor.h)
+
+PEERDIR(
+    library/cpp/actors/memory_log
+    library/cpp/actors/prof
+    library/cpp/actors/protos
+    library/cpp/actors/util
+    library/cpp/execprofile
+    library/cpp/json/writer
+    library/cpp/logger
+    library/cpp/lwtrace
+    library/cpp/monlib/dynamic_counters
+    library/cpp/svnversion
+    library/cpp/threading/future
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+    ut
+)
author	Devtools Arcadia <arcadia-devtools@yandex-team.ru>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/actors/core
download	ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz