diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/actors | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/actors')
271 files changed, 48185 insertions, 0 deletions
diff --git a/library/cpp/actors/README.md b/library/cpp/actors/README.md new file mode 100644 index 0000000000..c39908f2f5 --- /dev/null +++ b/library/cpp/actors/README.md @@ -0,0 +1,107 @@ +## Actor library + +### Часть первая, вводная. +Иногда приходится разрабатывать асинхронные, существенно параллельные, местами распределённые программы. Иногда еще и внутренняя логика нетривиальна, разнородна, пишется разными командами не один год. Всё как мы любим. Человечеством придумано не так много способов внутренней организации структуры и кода таких программ. Большинство из них плохие (и именно из-за плохих подходов разработка асинхронных, многопоточных программ приобрела дурную славу). Некоторые получше. А серебряной пули как обычно нет. + +Когда мы начинали разработку Yandex Database (тогда еще KiKiMR), сразу было понятно что простыми наколеночными поделиями обойтись (и сделать при этом хорошо, так что бы не было стыдно) не получится. В качестве базиса мы выбрали мессадж-пассинг и модель акторов. И не пожалели. Постепенно этот подход распространился на смежные проекты. + +### Базовые концепции. +Если отбросить шелуху – представляем сервис (программу в случае запуска изолированного бинарника) как ансамбль независимых агентов, взаимодействующих через отправку асинхронных сообщений внутри общего окружения. Тут все слова важны: + +Независимых – не разделяют состояние и поток выполнения. +Передача сообщений – формализуем протоколы, а не интерфейсы. + +Асинхронная – не блокируемся на отправке сообщений. +Общее окружение – все агенты разделяют общий пул ресурсов и каждый из них, зная адрес, может послать сообщение каждому. + +В более хайповых терминах – очень похоже на колокейтед микросервисы, только уровнем ниже. И да, мы заведомо не хотели прятать асинхронщину и параллелизм от разработчика, показывая прям самое мясо. + +### IActor. +https://a.yandex-team.ru/arc/trunk/arcadia/library/actors/core/actor.h?rev=5315854#L105 +Базовый класс всех агентов, напрямую обычно не используется. Инстанцируется либо TActor, либо TActorBootstrapped. Фактически весь полезный код программы размещается в акторах. +(важное замечание – в коде увидите ручки с TActorContext и без него, схожие по названию и назначению. На данный момент вариант с TActorContext является устаревшим, новый код стоит писать без его использования). +Важные методы: + +PassAway – единственный корректный способ зарегистрированному актору умереть. Может вызываться только находясь внутри обработчика сообщения. +Send – отправка сообщения, зная адрес получателя. В акторе доступен хелпер, принимающий непосредственно сообщение. Базовый вызов, принимающий полный event handle – доступен в контексте. + +Become – установить функцию-обработчик сообщений, которая будет использована при получении следующего сообщения. + +Register – зарегистрировать новый актор в акторсистеме, с выделением нового мейлбокса. Важно – с момента вызова владение актором передается акторсистеме, т.е. уже к моменту выхода актор может начать выполняться на другом потоке, нельзя к нему ни обращаться прямыми вызовами, ни даже предполагать что он еще жив. + +Schedule – зарегистрировать сообщение, которое будет отправлено не менее чем через запрошенную задержку. В акторе доступен хелпер, декорирующий сообщение хендлом отправки самому себе, в контексте можно передать полный хендл. + +SelfId – узнать собственный адрес. Возвращаемый объект TActorIdentity можно передавать если требуется делегировать отправку сообщений от имени актора (например если пишете полезный код пользуясь пассивными объектами). +Посылка сообщений дешёвая, не нужно на ней чрезмерно экономить (но не бесплатная – поэтому посылать сообщения только ради посылки сообщений то же не стоит). + +Инстанцирование акторов так же дёшево, актор на запрос или фазу запроса – вполне нормальная практика. Мультиплексировать обработку разных запросов в одном акторе – так же вполне нормально. В нашем коде много примеров и первого, и второго. Пользуйтесь здравым смыслов и собственным вкусом. +Т.к. на время обработки сообщения актор занимает тред из пула акторсистемы – уходить в длинные вычисления лучше на отдельном отселённом акторе (и либо отселять в отдельный пол акторсистемы, либо контролировать параллельность брокером ресурсов), блокирующие вызовы делать почти всегда ошибка. Стремление написать мютекс - ересь и от лукавого. +Идентифицируются акторы своим TActorID-ом, который уникален и вы не должны его придумывать из воздуха, а только получить из регистрации (для порождённых акторов) или его вам должен рассказать кто-то, законно его знающий. + +Отправка на несуществующий актор (уже умерший) безопасна, сообщение будет просто выброшено в момент обработки (как обрабатывать недоставку сообщений в протоколах расскажу ниже). + +Кроме нормальных TActorID существуют еще и сервисные (составленные из строчки и номера ноды). Под ними может быть зарегистрирован реальный актор и фактически при получении сообщения по сервисному адресу – попробует переправить его текущему фактическому. Это позволяет размещать хорошо известные сервисы по хорошо известному адресу, не выстраивая параллельную машинерию поиска. + +Строить из актора конечный автомат при помощи переключений функции-обработчика – выбор в каждом конкретном случае, иногда удобнее да, иногда сваливать всё в одно состояние, а иногда – применять гибридное решение (когда часть жизненного цикла – обычно инициализации и завершение – выражены в переходах, а часть – нет). +Меньше слов, больше дела – этого уже достаточно что бы прочитать самый простой пример. https://a.yandex-team.ru/arc/trunk/arcadia/library/actors/examples/01_ping_pong +Здесь можно увидеть образец самого простого актора, занимающегося переброской сообщений и использующего все основные вызовы. Заодно покрутив за разные ручки (количество тредов в тредпуле, количество пар перебрасывающихся акторов) можно посмотреть на изменение поведения системы (hint: в таких простых сценариях максимум перфоманса достигается при одном треде в тредпулах). + +### Event и Event Handle. +Полезную нагрузку сообщений заворачиваем в наследника IEventBase, у которого два важных метода – сериализация и загрузка. Сериализация виртуальная, а вот загрузка – нет, и для разбора сообщения из байтовой последовательности – необходимо на стороне получателя сматчить число-идентификатор типа ивента с С++ типом. Именно это делают макросы из hfunc.h. На практике ивенты создаются либо как наследник TEventLocal<> (для строго локальных сообщений) либо как наследник TEventPB<> (для потенциально пересылаемых по сети сообщений, типизируются protobuf-мессаджем). + +Кроме непосредственно ивента (в виде структуры либо в виде байтовой строки) для пересылки сообщения необходим набор дополнительных полей + +Адресат + +Отправитель + +Тип сообщения + +Кука + +Флаги + +Сообщение + дополнительные поля = IEventHandle. Именно хендлами акторсистема и оперирует. <event-type>::TPtr – в примере выше – это и есть указатель на типизированный хендл. + +Технически типом сообщения может быть любое число, которое получатель и отправитель договорились понимать как идентификатор сообщения. Сложившаяся практика – выделять диапазон идентификаторов макросом EventSpaceBegin (фактически блоками по 64к), начиная с блока ES_USERSPACE. +Кука – неинтерпретируемое ui64 число, передаваемое с хендлом. Хорошей практикой является в ответе сервиса на сообщение выставлять куку в куку исходного сообщения, особенно для сервисов, потенциально используемых конкурентно. + +В флагах несколько бит зарезервировано под флаги, декларирующие как необходимо обрабатывать особые ситуации и 12 бит – под номер канала интерконнекта, в котором будет пересылаться сообщение (для локальных сообщений в имеющихся реализациях номер канала не имеет значения - хотя можно представить реализацию где для каналов будут независимые очереди). + +### Тредпулы и мейлбоксы. +В рамках одной акторсистемы может сосуществовать несколько независимых тредпулов, каждый актор регистрируется на конкретном и в процессе жизни не может мигрировать (но может создавать новые акторы на произвольном тредпуле). Используется для крупноблочного разделения ресурсов, либо между разными активностями (вот здесь – обрабатываем один класс запросов, а вот здесь - другой), либо между разными профилями активности (вот здесь обрабатываем быстрые запросы, здесь – медленные, а вот там – вообще батчёвые). Например в YDB работает системный тредпул (в котором запускаются акторы, необходимые для функционирования YDB, и для которого мы следим что бы не было длительной блокировки в обработчиках), пользовательский тредпул (в котором обрабатываются запросы и потенциально обработчики могут уходить в себя подольше, но это не повлияет на инфраструктуру), батчёвый тредпул (куда отгружается длительная обработка – компакшены дисков, сканы таблиц и подобное) и, в жирных нодах – тредпул интерконнекта (как наиболее чувствительного к задержкам). +Пересылка сообщений между акторами разных тредпулов но одной локальной акторсистемы остаётся локальной, принудительной сериализации сообщения не происходит. + +При регистрации актор прикрепляется к мейлбоксу (в типичном случае на собственном мейлбоксе, но по особой нужде можно находясь внутри обработки сообщения прикрепить порождённый актор к текущему активному мейлбоксу – см. RegisterWithSameMailbox (ранее RegisterLocal) – в этом случае будет гарантироваться отсутствие конкурентной обработки сообщений). Собственно Send – это и есть заворачивание ивента в хендл, помещение хендла в очередь мейлбокса и добавление мейлбокса в очередь активации тредпула. В рамках одного мейлбокса – обработка FIFO, между мейлбоксами таких гарантий нет, хотя и стараемся активировать мейлбоксы примерно в порядке появления в них сообщений. + +При регистрации актора можно выбрать тип мейлбокса, они немного отличаются стоимость добавления – либо дёшево, но похуже под контеншеном, либо почти wait-free, но подороже. См. комментарии к TMailboxType за актуальными подсказками что-как. + +Полезные хелперы. + +STFUNC – декларация стейт-функции, рекомендую всегда использовать именно такую форму для декларации, т.к. потом проще искать. + +hFunc – макрос выбора хендлера, передающий ивент в обработчик. + +cFunc – макрос выбора хендлера, не передающий ивент в обработчик. + +### Обработка сбоев. +В рамках локальной акторсистемы доставка сообщений гарантирована, если по какой-то причине сообщение не доставлено (важно! Именно не доставлено, факт обработанности сообщения уже на совести принимающего актора) – то произойдёт одно из: + +Если выставлен флаг FlagForwardOnNondelivery – сообщение будет переправлено на актор, переданный как forwardOnNondelivery при конструировании хендла. Полезно например если какие-то сервисы создаются по требованию и для несозданных сервисов – желаем зароутить в роутер. Работает только в рамках локальной акторсистемы. + +Иначе при выставленном флаге FlagTrackDelivery – для отправителя будет сгенерирован ивент TEvUndelivered от имени недоступного актора. Получение такого сообщения гарантирует что исходный ивент не был обработан и никакие эффекты не произошли. Генерация и доставка нотификации в рамках локальной акторсистемы гарантирована, в распределённой – как повезёт, может и потеряться. + +Иначе, если никакие флаги не выставлены – сообщение будет выброшено. + +Т.к. в распределённой системе доставка нотификаций о недоставке не гарантируется, то для надёжной обработки сбоев необходим дополнительный механизм – по флагу FlagSubscribeOnSession при пересечении границ ноды происходит подписка отправителя на нотификацию о разрыве сетевой сессии, в рамках которой сообщение было отправлено. Теперь при разрыве сетевой сессии отправитель узнает что сообщение могло быть недоставлено (а могло и быть доставлено – мы не знаем) и сможет отреагировать. Нужно не забывать отписываться от нотификации о разрыве сессий – иначе будут копиться вплоть до ближайшего разрыва (который может и не скоро произойти). + +Резюмируя: при необходимости контролировать доставку внутри локальной акторсистемы – выставляем флаг FlagTrackDelivery и обрабатываем TEvUndelivered. Для распределённой – добавляем FlagSubscribeOnSession и дополнительно обрабатываем TEvNodeDisconnected не забывая отписываться от более не нужных подписок. + +### Интерконнект. +Локальная акторсистема – это только половина пирога, возможность объединить их в распределённую – вторая половина. Реализация интерконнекта доступна из коробки и умеет +Передавать сообщения через одно tcp-соединение +Мультиплексировать разные потоки (ака каналы) в рамках одного соединения, гарантируя порядок в рамках канала +Старается делать это хорошо. +В рамках распределённой системы требуется каждой локальной акторсистеме назначить уникальный номер (например табличкой или реализовав динамическую раздачу номеров ноды) и запустить в рамках каждой локальной акторсистемы локальный неймсервис (например по табличке ремапинга номера ноды в сетевой адрес либо как кеш опорного неймсервиса). +Смотрим на второй пример https://a.yandex-team.ru/arc/trunk/arcadia/library/actors/examples/02_discovery +Тут у нас конфигурируется распределённая акторсистема (в примере все пять запускаются в одном бинарнике, но точно так же – можно запускать и частями) на пять нод. На каждой ноде запускается реплика для паблишинга строчек и актор-эндпоинт (каждый со своим портом). Эндпоинты с помощью актора-паблишера публикуют свои явки/пароли на распределённый сторадж (с обработкой нештатных ситауций и поддержанием в актуальном состоянии). И рядом лежит реализация запроса к стораджу на листинг опубликованого по мажорити. Собственно это упрощённый и почищенный от специфики код, используемый в YDB для публикации и нахождения актуальных эндпоинтов пользовательской базы. diff --git a/library/cpp/actors/core/README.md b/library/cpp/actors/core/README.md new file mode 100644 index 0000000000..439a8dd459 --- /dev/null +++ b/library/cpp/actors/core/README.md @@ -0,0 +1,99 @@ +## Memory tracker + +https://a.yandex-team.ru/arc/trunk/arcadia/library/cpp/actors/core/memory_track.h + +Использование: + +* отслеживание аллокаций экземпляров конкретного класса через new/delete и new[]/delete[] +* отслеживание аллокаций в контейнерах +* ручное отслеживание моментов аллокации/деаллокации + +---- + +### Отслеживание аллокаций класса через new/delete + +Использование с автоматически генерируемой меткой: + +```cpp +#include <library/cpp/actors/core/memory_track.h> + +struct TTypeLabeled + : public NActors::NMemory::TTrack<TTypeLabeled> +{ + char payload[16]; +}; +``` + +Использование с пользовательским именем метки: + +```cpp +#include <library/cpp/actors/core/memory_track.h> + +static const char NamedLabel[] = "NamedLabel"; + +struct TNameLabeled + : public NActors::NMemory::TTrack<TNameLabeled, NamedLabel> +{ + char payload[32]; +}; +``` + +---- + +### Отслеживание аллокаций в контейнерах + +```cpp +#include <library/cpp/actors/core/memory_track.h> + +static const char InContainerLabel[] = "InContainerLabel"; + +struct TInContainer { + char payload[16]; +}; + +std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer>> vecT; + +std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer, InContainerLabel>> vecN; + +using TKey = int; + +std::map<TKey, TInContainer, std::less<TKey>, + NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> mapT; + +std::map<TKey, TInContainer, std::less<TKey>, + NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> mapN; + +std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>, + NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> umapT; + +std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>, + NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> umapN; +``` + +---- + +### Ручное отслеживание аллокаций/деаллокаций + +```cpp +#include <library/cpp/actors/core/memory_track.h> + +static const char ManualLabel[] = "ManualLabel"; + +... +NActors::NMemory::TLabel<ManualLabel>::Add(size); + +... +NActors::NMemory::TLabel<ManualLabel>::Sub(size); +``` + +---- + +### Собираемые метрики + +Сервис **utils**, пользовательская метка **label**, сенсоры: + +- MT/Count: количество аллокаций в моменте +- MT/Memory: аллоцированная память в моменте +- MT/PeakCount: пиковое значение количества аллокаций (сэмплится с фиксированной частотой) +- MT/PeakMemory: пиковое значение аллоцированной памяти + diff --git a/library/cpp/actors/core/actor.cpp b/library/cpp/actors/core/actor.cpp new file mode 100644 index 0000000000..6f9ba6a42b --- /dev/null +++ b/library/cpp/actors/core/actor.cpp @@ -0,0 +1,172 @@ +#include "actor.h" +#include "executor_thread.h" +#include "mailbox.h" +#include <library/cpp/actors/util/datetime.h> + +namespace NActors { + Y_POD_THREAD(TActivationContext*) + TlsActivationContext((TActivationContext*)nullptr); + + bool TActorContext::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const { + return Send(new IEventHandle(recipient, SelfID, ev, flags, cookie, nullptr, std::move(traceId))); + } + + bool TActorContext::Send(TAutoPtr<IEventHandle> ev) const { + return ExecutorThread.Send(ev); + } + + void IActor::Registered(TActorSystem* sys, const TActorId& owner) { + // fallback to legacy method, do not use it anymore + if (auto eh = AfterRegister(SelfId(), owner)) + sys->Send(eh); + } + + void IActor::Describe(IOutputStream &out) const noexcept { + SelfActorId.Out(out); + } + + bool IActor::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const noexcept { + return SelfActorId.Send(recipient, ev, flags, cookie, std::move(traceId)); + } + + bool TActivationContext::Send(TAutoPtr<IEventHandle> ev) { + return TlsActivationContext->ExecutorThread.Send(ev); + } + + void TActivationContext::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) { + TlsActivationContext->ExecutorThread.Schedule(deadline, ev, cookie); + } + + void TActivationContext::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) { + TlsActivationContext->ExecutorThread.Schedule(deadline, ev, cookie); + } + + void TActivationContext::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) { + TlsActivationContext->ExecutorThread.Schedule(delta, ev, cookie); + } + + bool TActorIdentity::Send(const TActorId& recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) const { + return TActivationContext::Send(new IEventHandle(recipient, *this, ev, flags, cookie, nullptr, std::move(traceId))); + } + + void TActorIdentity::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const { + return TActivationContext::Schedule(deadline, new IEventHandle(*this, {}, ev), cookie); + } + + void TActorIdentity::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const { + return TActivationContext::Schedule(deadline, new IEventHandle(*this, {}, ev), cookie); + } + + void TActorIdentity::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const { + return TActivationContext::Schedule(delta, new IEventHandle(*this, {}, ev), cookie); + } + + TActorId TActivationContext::RegisterWithSameMailbox(IActor* actor, TActorId parentId) { + Y_VERIFY_DEBUG(parentId); + auto& ctx = *TlsActivationContext; + return ctx.ExecutorThread.RegisterActor(actor, &ctx.Mailbox, parentId.Hint(), parentId); + } + + TActorId TActorContext::RegisterWithSameMailbox(IActor* actor) const { + return ExecutorThread.RegisterActor(actor, &Mailbox, SelfID.Hint(), SelfID); + } + + TActorId IActor::RegisterWithSameMailbox(IActor* actor) const noexcept { + return TlsActivationContext->ExecutorThread.RegisterActor(actor, &TlsActivationContext->Mailbox, SelfActorId.Hint(), SelfActorId); + } + + TActorId TActivationContext::Register(IActor* actor, TActorId parentId, TMailboxType::EType mailboxType, ui32 poolId) { + return TlsActivationContext->ExecutorThread.RegisterActor(actor, mailboxType, poolId, parentId); + } + + TActorId TActivationContext::InterconnectProxy(ui32 destinationNodeId) { + return TlsActivationContext->ExecutorThread.ActorSystem->InterconnectProxy(destinationNodeId); + } + + TActorSystem* TActivationContext::ActorSystem() { + return TlsActivationContext->ExecutorThread.ActorSystem; + } + + i64 TActivationContext::GetCurrentEventTicks() { + return GetCycleCountFast() - TlsActivationContext->EventStart; + } + + double TActivationContext::GetCurrentEventTicksAsSeconds() { + return NHPTimer::GetSeconds(GetCurrentEventTicks()); + } + + TActorId TActorContext::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) const { + return ExecutorThread.RegisterActor(actor, mailboxType, poolId, SelfID); + } + + TActorId IActor::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) const noexcept { + return TlsActivationContext->ExecutorThread.RegisterActor(actor, mailboxType, poolId, SelfActorId); + } + + void TActorContext::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const { + ExecutorThread.Schedule(deadline, new IEventHandle(SelfID, TActorId(), ev), cookie); + } + + void TActorContext::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const { + ExecutorThread.Schedule(deadline, new IEventHandle(SelfID, TActorId(), ev), cookie); + } + + void TActorContext::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const { + ExecutorThread.Schedule(delta, new IEventHandle(SelfID, TActorId(), ev), cookie); + } + + void IActor::Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie) const noexcept { + TlsActivationContext->ExecutorThread.Schedule(deadline, new IEventHandle(SelfActorId, TActorId(), ev), cookie); + } + + void IActor::Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie) const noexcept { + TlsActivationContext->ExecutorThread.Schedule(deadline, new IEventHandle(SelfActorId, TActorId(), ev), cookie); + } + + void IActor::Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie) const noexcept { + TlsActivationContext->ExecutorThread.Schedule(delta, new IEventHandle(SelfActorId, TActorId(), ev), cookie); + } + + TInstant TActivationContext::Now() { + return TlsActivationContext->ExecutorThread.ActorSystem->Timestamp(); + } + + TMonotonic TActivationContext::Monotonic() { + return TlsActivationContext->ExecutorThread.ActorSystem->Monotonic(); + } + + TInstant TActorContext::Now() const { + return ExecutorThread.ActorSystem->Timestamp(); + } + + TMonotonic TActorContext::Monotonic() const { + return ExecutorThread.ActorSystem->Monotonic(); + } + + NLog::TSettings* TActivationContext::LoggerSettings() const { + return ExecutorThread.ActorSystem->LoggerSettings(); + } + + std::pair<ui32, ui32> TActorContext::CountMailboxEvents(ui32 maxTraverse) const { + return Mailbox.CountMailboxEvents(SelfID.LocalId(), maxTraverse); + } + + std::pair<ui32, ui32> IActor::CountMailboxEvents(ui32 maxTraverse) const { + return TlsActivationContext->Mailbox.CountMailboxEvents(SelfActorId.LocalId(), maxTraverse); + } + + void IActor::Die(const TActorContext& ctx) { + if (ctx.SelfID) + Y_VERIFY(ctx.SelfID == SelfActorId); + PassAway(); + } + + void IActor::PassAway() { + auto& cx = *TlsActivationContext; + cx.ExecutorThread.UnregisterActor(&cx.Mailbox, SelfActorId.LocalId()); + } + + double IActor::GetElapsedTicksAsSeconds() const { + return NHPTimer::GetSeconds(ElapsedTicks); + } +} diff --git a/library/cpp/actors/core/actor.h b/library/cpp/actors/core/actor.h new file mode 100644 index 0000000000..ed29bd14b9 --- /dev/null +++ b/library/cpp/actors/core/actor.h @@ -0,0 +1,530 @@ +#pragma once + +#include "event.h" +#include "monotonic.h" +#include <util/system/tls.h> +#include <library/cpp/actors/util/local_process_key.h> + +namespace NActors { + class TActorSystem; + class TMailboxTable; + struct TMailboxHeader; + + class TExecutorThread; + class IActor; + class ISchedulerCookie; + + namespace NLog { + struct TSettings; + } + + struct TActorContext; + + struct TActivationContext { + public: + TMailboxHeader& Mailbox; + TExecutorThread& ExecutorThread; + const NHPTimer::STime EventStart; + + protected: + explicit TActivationContext(TMailboxHeader& mailbox, TExecutorThread& executorThread, NHPTimer::STime eventStart) + : Mailbox(mailbox) + , ExecutorThread(executorThread) + , EventStart(eventStart) + { + } + + public: + static bool Send(TAutoPtr<IEventHandle> ev); + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the wallclock time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + static void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the monotonic time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + static void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); + + /** + * Schedule one-shot event that will be send after given delay. + * + * @param delta the time from now to delay event sending + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + static void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); + + static TInstant Now(); + static TMonotonic Monotonic(); + NLog::TSettings* LoggerSettings() const; + + // register new actor in ActorSystem on new fresh mailbox. + static TActorId Register(IActor* actor, TActorId parentId = TActorId(), TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()); + + // Register new actor in ActorSystem on same _mailbox_ as current actor. + // There is one thread per mailbox to execute actor, which mean + // no _cpu core scalability_ for such actors. + // This method of registration can be usefull if multiple actors share + // some memory. + static TActorId RegisterWithSameMailbox(IActor* actor, TActorId parentId); + + static const TActorContext& AsActorContext(); + static TActorContext ActorContextFor(TActorId id); + + static TActorId InterconnectProxy(ui32 nodeid); + static TActorSystem* ActorSystem(); + + static i64 GetCurrentEventTicks(); + static double GetCurrentEventTicksAsSeconds(); + }; + + struct TActorContext: public TActivationContext { + const TActorId SelfID; + + explicit TActorContext(TMailboxHeader& mailbox, TExecutorThread& executorThread, NHPTimer::STime eventStart, const TActorId& selfID) + : TActivationContext(mailbox, executorThread, eventStart) + , SelfID(selfID) + { + } + + bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const; + template <typename TEvent> + bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const { + return Send(recipient, static_cast<IEventBase*>(ev.Release()), flags, cookie, std::move(traceId)); + } + bool Send(TAutoPtr<IEventHandle> ev) const; + + TInstant Now() const; + TMonotonic Monotonic() const; + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the wallclock time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const; + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the monotonic time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const; + + /** + * Schedule one-shot event that will be send after given delay. + * + * @param delta the time from now to delay event sending + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const; + + TActorContext MakeFor(const TActorId& otherId) const { + return TActorContext(Mailbox, ExecutorThread, EventStart, otherId); + } + + // register new actor in ActorSystem on new fresh mailbox. + TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const; + + // Register new actor in ActorSystem on same _mailbox_ as current actor. + // There is one thread per mailbox to execute actor, which mean + // no _cpu core scalability_ for such actors. + // This method of registration can be usefull if multiple actors share + // some memory. + TActorId RegisterWithSameMailbox(IActor* actor) const; + + std::pair<ui32, ui32> CountMailboxEvents(ui32 maxTraverse = Max<ui32>()) const; + }; + + extern Y_POD_THREAD(TActivationContext*) TlsActivationContext; + + struct TActorIdentity: public TActorId { + explicit TActorIdentity(TActorId actorId) + : TActorId(actorId) + { + } + + void operator=(TActorId actorId) { + *this = TActorIdentity(actorId); + } + + bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const; + void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const; + void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const; + void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const; + }; + + class IActor; + + class IActorOps : TNonCopyable { + public: + virtual void Describe(IOutputStream&) const noexcept = 0; + virtual bool Send(const TActorId& recipient, IEventBase*, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const noexcept = 0; + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the wallclock time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + virtual void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0; + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the monotonic time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + virtual void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0; + + /** + * Schedule one-shot event that will be send after given delay. + * + * @param delta the time from now to delay event sending + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + virtual void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept = 0; + + virtual TActorId Register(IActor*, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const noexcept = 0; + virtual TActorId RegisterWithSameMailbox(IActor*) const noexcept = 0; + }; + + class TDecorator; + + class IActor : protected IActorOps { + public: + typedef void (IActor::*TReceiveFunc)(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx); + + private: + TReceiveFunc StateFunc; + TActorIdentity SelfActorId; + i64 ElapsedTicks; + ui64 HandledEvents; + + friend void DoActorInit(TActorSystem*, IActor*, const TActorId&, const TActorId&); + friend class TDecorator; + + public: + /// @sa services.proto NKikimrServices::TActivity::EType + enum EActorActivity { + OTHER = 0, + ACTOR_SYSTEM = 1, + ACTORLIB_COMMON = 2, + ACTORLIB_STATS = 3, + LOG_ACTOR = 4, + INTERCONNECT_PROXY_TCP = 12, + INTERCONNECT_SESSION_TCP = 13, + INTERCONNECT_COMMON = 171, + SELF_PING_ACTOR = 207, + TEST_ACTOR_RUNTIME = 283, + INTERCONNECT_HANDSHAKE = 284, + INTERCONNECT_POLLER = 285, + INTERCONNECT_SESSION_KILLER = 286, + ACTOR_SYSTEM_SCHEDULER_ACTOR = 312, + ACTOR_FUTURE_CALLBACK = 337, + INTERCONNECT_MONACTOR = 362, + INTERCONNECT_LOAD_ACTOR = 376, + INTERCONNECT_LOAD_RESPONDER = 377, + NAMESERVICE = 450, + DNS_RESOLVER = 481, + INTERCONNECT_PROXY_WRAPPER = 546, + }; + + using EActivityType = EActorActivity; + ui32 ActivityType; + + protected: + IActor(TReceiveFunc stateFunc, ui32 activityType = OTHER) + : StateFunc(stateFunc) + , SelfActorId(TActorId()) + , ElapsedTicks(0) + , HandledEvents(0) + , ActivityType(activityType) + { + } + + public: + virtual ~IActor() { + } // must not be called for registered actors, see Die method instead + + protected: + virtual void Die(const TActorContext& ctx); // would unregister actor so call exactly once and only from inside of message processing + virtual void PassAway(); + + public: + template <typename T> + void Become(T stateFunc) { + StateFunc = static_cast<TReceiveFunc>(stateFunc); + } + + template <typename T, typename... TArgs> + void Become(T stateFunc, const TActorContext& ctx, TArgs&&... args) { + StateFunc = static_cast<TReceiveFunc>(stateFunc); + ctx.Schedule(std::forward<TArgs>(args)...); + } + + template <typename T, typename... TArgs> + void Become(T stateFunc, TArgs&&... args) { + StateFunc = static_cast<TReceiveFunc>(stateFunc); + Schedule(std::forward<TArgs>(args)...); + } + + protected: + void SetActivityType(ui32 activityType) { + ActivityType = activityType; + } + + public: + TReceiveFunc CurrentStateFunc() const { + return StateFunc; + } + + // NOTE: exceptions must not escape state function but if an exception hasn't be caught + // by the actor then we want to crash an see the stack + void Receive(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { + (this->*StateFunc)(ev, ctx); + HandledEvents++; + } + + // must be called to wrap any call trasitions from one actor to another + template<typename TActor, typename TMethod, typename... TArgs> + static decltype((std::declval<TActor>().*std::declval<TMethod>())(std::declval<TArgs>()...)) + InvokeOtherActor(TActor& actor, TMethod&& method, TArgs&&... args) { + struct TRecurseContext : TActorContext { + TActivationContext *Prev; + TRecurseContext(const TActorId& actorId) + : TActorContext(TActivationContext::ActorContextFor(actorId)) + , Prev(TlsActivationContext) + { + TlsActivationContext = this; + } + ~TRecurseContext() { + TlsActivationContext = Prev; + } + } context(actor.SelfId()); + return (actor.*method)(std::forward<TArgs>(args)...); + } + + virtual void Registered(TActorSystem* sys, const TActorId& owner); + + virtual TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parentId) { + Y_UNUSED(self); + Y_UNUSED(parentId); + return TAutoPtr<IEventHandle>(); + } + + i64 GetElapsedTicks() const { + return ElapsedTicks; + } + double GetElapsedTicksAsSeconds() const; + void AddElapsedTicks(i64 ticks) { + ElapsedTicks += ticks; + } + auto GetActivityType() const { + return ActivityType; + } + ui64 GetHandledEvents() const { + return HandledEvents; + } + TActorIdentity SelfId() const { + return SelfActorId; + } + + protected: + void Describe(IOutputStream&) const noexcept override; + bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const noexcept final; + template <typename TEvent> + bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) const{ + return Send(recipient, static_cast<IEventBase*>(ev.Release()), flags, cookie, std::move(traceId)); + } + + template <class TEvent, class ... TEventArgs> + bool Send(TActorId recipient, TEventArgs&& ... args) const { + return Send(recipient, MakeHolder<TEvent>(std::forward<TEventArgs>(args)...)); + } + + void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final; + void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final; + void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) const noexcept final; + + // register new actor in ActorSystem on new fresh mailbox. + TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) const noexcept final; + + // Register new actor in ActorSystem on same _mailbox_ as current actor. + // There is one thread per mailbox to execute actor, which mean + // no _cpu core scalability_ for such actors. + // This method of registration can be usefull if multiple actors share + // some memory. + TActorId RegisterWithSameMailbox(IActor* actor) const noexcept final; + + std::pair<ui32, ui32> CountMailboxEvents(ui32 maxTraverse = Max<ui32>()) const; + + private: + void ChangeSelfId(TActorId actorId) { + SelfActorId = actorId; + } + }; + + struct TActorActivityTag {}; + + inline size_t GetActivityTypeCount() { + return TLocalProcessKeyState<TActorActivityTag>::GetInstance().GetCount(); + } + + inline TStringBuf GetActivityTypeName(size_t index) { + return TLocalProcessKeyState<TActorActivityTag>::GetInstance().GetNameByIndex(index); + } + + template <typename TDerived> + class TActor: public IActor { + private: + template <typename T, typename = const char*> + struct HasActorName: std::false_type { }; + template <typename T> + struct HasActorName<T, decltype((void)T::ActorName, (const char*)nullptr)>: std::true_type { }; + + static ui32 GetActivityTypeIndex() { + if constexpr(HasActorName<TDerived>::value) { + return TLocalProcessKey<TActorActivityTag, TDerived::ActorName>::GetIndex(); + } else { + using TActorActivity = decltype(((TDerived*)nullptr)->ActorActivityType()); + // if constexpr(std::is_enum<TActorActivity>::value) { + return TEnumProcessKey<TActorActivityTag, TActorActivity>::GetIndex( + TDerived::ActorActivityType()); + //} else { + // for int, ui32, ... + // return TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetIndex( + // static_cast<IActor::EActorActivity>(TDerived::ActorActivityType())); + //} + } + } + + protected: + //* Comment this function to find unmarked activities + static constexpr IActor::EActivityType ActorActivityType() { + return EActorActivity::OTHER; + } //*/ + + // static constexpr char ActorName[] = "UNNAMED"; + + TActor(void (TDerived::*func)(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx), ui32 activityType = GetActivityTypeIndex()) + : IActor(static_cast<TReceiveFunc>(func), activityType) + { } + + public: + typedef TDerived TThis; + }; + + +#define STFUNC_SIG TAutoPtr< ::NActors::IEventHandle>&ev, const ::NActors::TActorContext &ctx +#define STATEFN_SIG TAutoPtr<::NActors::IEventHandle>& ev +#define STFUNC(funcName) void funcName(TAutoPtr< ::NActors::IEventHandle>& ev, const ::NActors::TActorContext& ctx) +#define STATEFN(funcName) void funcName(TAutoPtr< ::NActors::IEventHandle>& ev, const ::NActors::TActorContext& ) + +#define STRICT_STFUNC(NAME, HANDLERS) \ + void NAME(STFUNC_SIG) { \ + Y_UNUSED(ctx); \ + switch (const ui32 etype = ev->GetTypeRewrite()) { \ + HANDLERS \ + default: \ + Y_VERIFY_DEBUG(false, "%s: unexpected message type 0x%08" PRIx32, __func__, etype); \ + } \ + } + + inline const TActorContext& TActivationContext::AsActorContext() { + TActivationContext* tls = TlsActivationContext; + return *static_cast<TActorContext*>(tls); + } + + inline TActorContext TActivationContext::ActorContextFor(TActorId id) { + auto& tls = *TlsActivationContext; + return TActorContext(tls.Mailbox, tls.ExecutorThread, tls.EventStart, id); + } + + class TDecorator : public IActor { + protected: + THolder<IActor> Actor; + + public: + TDecorator(THolder<IActor>&& actor) + : IActor(static_cast<TReceiveFunc>(&TDecorator::State), actor->GetActivityType()) + , Actor(std::move(actor)) + { + } + + void Registered(TActorSystem* sys, const TActorId& owner) override { + Actor->ChangeSelfId(SelfId()); + Actor->Registered(sys, owner); + } + + virtual bool DoBeforeReceiving(TAutoPtr<IEventHandle>& /*ev*/, const TActorContext& /*ctx*/) { + return true; + } + + virtual void DoAfterReceiving(const TActorContext& /*ctx*/) + { + } + + STFUNC(State) { + if (DoBeforeReceiving(ev, ctx)) { + Actor->Receive(ev, ctx); + DoAfterReceiving(ctx); + } + } + }; + + // TTestDecorator doesn't work with the real actor system + struct TTestDecorator : public TDecorator { + TTestDecorator(THolder<IActor>&& actor) + : TDecorator(std::move(actor)) + { + } + + virtual ~TTestDecorator() = default; + + // This method must be called in the test actor system + bool BeforeSending(TAutoPtr<IEventHandle>& ev) + { + bool send = true; + TTestDecorator *decorator = dynamic_cast<TTestDecorator*>(Actor.Get()); + if (decorator) { + send = decorator->BeforeSending(ev); + } + return send && ev && DoBeforeSending(ev); + } + + virtual bool DoBeforeSending(TAutoPtr<IEventHandle>& /*ev*/) { + return true; + } + }; +} + +template <> +inline void Out<NActors::TActorIdentity>(IOutputStream& o, const NActors::TActorIdentity& x) { + return x.Out(o); +} + +template <> +struct THash<NActors::TActorIdentity> { + inline ui64 operator()(const NActors::TActorIdentity& x) const { + return x.Hash(); + } +}; diff --git a/library/cpp/actors/core/actor_bootstrapped.h b/library/cpp/actors/core/actor_bootstrapped.h new file mode 100644 index 0000000000..a37887c939 --- /dev/null +++ b/library/cpp/actors/core/actor_bootstrapped.h @@ -0,0 +1,37 @@ +#pragma once + +#include "actor.h" +#include "events.h" + +namespace NActors { + template<typename T> struct dependent_false : std::false_type {}; + + template<typename TDerived> + class TActorBootstrapped : public TActor<TDerived> { + protected: + TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parentId) override { + return new IEventHandle(TEvents::TSystem::Bootstrap, 0, self, parentId, {}, 0); + } + + STFUNC(StateBootstrap) { + Y_VERIFY(ev->GetTypeRewrite() == TEvents::TSystem::Bootstrap, "Unexpected bootstrap message"); + using T = decltype(&TDerived::Bootstrap); + TDerived& self = static_cast<TDerived&>(*this); + if constexpr (std::is_invocable_v<T, TDerived, const TActorContext&>) { + self.Bootstrap(ctx); + } else if constexpr (std::is_invocable_v<T, TDerived, const TActorId&, const TActorContext&>) { + self.Bootstrap(ev->Sender, ctx); + } else if constexpr (std::is_invocable_v<T, TDerived>) { + self.Bootstrap(); + } else if constexpr (std::is_invocable_v<T, TDerived, const TActorId&>) { + self.Bootstrap(ev->Sender); + } else { + static_assert(dependent_false<TDerived>::value, "No correct Bootstrap() signature"); + } + } + + TActorBootstrapped() + : TActor<TDerived>(&TDerived::StateBootstrap) + {} + }; +} diff --git a/library/cpp/actors/core/actor_coroutine.cpp b/library/cpp/actors/core/actor_coroutine.cpp new file mode 100644 index 0000000000..0ab4d2b24d --- /dev/null +++ b/library/cpp/actors/core/actor_coroutine.cpp @@ -0,0 +1,165 @@ +#include "actor_coroutine.h" +#include "executor_thread.h" + +#include <util/system/sanitizers.h> +#include <util/system/type_name.h> + +namespace NActors { + static constexpr size_t StackOverflowGap = 4096; + static char GoodStack[StackOverflowGap]; + + static struct TInitGoodStack { + TInitGoodStack() { + // fill stack with some pseudo-random pattern + for (size_t k = 0; k < StackOverflowGap; ++k) { + GoodStack[k] = k + k * 91; + } + } + } initGoodStack; + + TActorCoroImpl::TActorCoroImpl(size_t stackSize, bool allowUnhandledPoisonPill, bool allowUnhandledDtor) + : Stack(stackSize) + , AllowUnhandledPoisonPill(allowUnhandledPoisonPill) + , AllowUnhandledDtor(allowUnhandledDtor) + , FiberClosure{this, TArrayRef(Stack.Begin(), Stack.End())} + , FiberContext(FiberClosure) + { +#ifndef NDEBUG + char* p; +#if STACK_GROW_DOWN + p = Stack.Begin(); +#else + p = Stack.End() - StackOverflowGap; +#endif + memcpy(p, GoodStack, StackOverflowGap); +#endif + } + + TActorCoroImpl::~TActorCoroImpl() { + if (!Finished && !NSan::TSanIsOn()) { // only resume when we have bootstrapped and Run() was entered and not yet finished; in other case simply terminate + Y_VERIFY(!PendingEvent); + Resume(); + } + } + + bool TActorCoroImpl::Send(TAutoPtr<IEventHandle> ev) { + return GetActorContext().ExecutorThread.Send(ev); + } + + THolder<IEventHandle> TActorCoroImpl::WaitForEvent(TInstant deadline) { + const ui64 cookie = ++WaitCookie; + if (deadline != TInstant::Max()) { + ActorContext->ExecutorThread.Schedule(deadline - Now(), new IEventHandle(SelfActorId, {}, new TEvCoroTimeout, + 0, cookie)); + } + + // ensure we have no unprocessed event and return back to actor system to receive one + Y_VERIFY(!PendingEvent); + ReturnToActorSystem(); + + // obtain pending event and ensure we've got one + while (THolder<IEventHandle> event = std::exchange(PendingEvent, {})) { + if (event->GetTypeRewrite() != TEvents::TSystem::CoroTimeout) { + // special handling for poison pill -- we throw exception + if (event->GetTypeRewrite() == TEvents::TEvPoisonPill::EventType) { + throw TPoisonPillException(); + } + + // otherwise just return received event + return event; + } else if (event->Cookie == cookie) { + return nullptr; // it is not a race -- we've got timeout exactly for our current wait + } else { + ReturnToActorSystem(); // drop this event and wait for the next one + } + } + Y_FAIL("no pending event"); + } + + const TActorContext& TActorCoroImpl::GetActorContext() const { + Y_VERIFY(ActorContext); + return *ActorContext; + } + + bool TActorCoroImpl::ProcessEvent(THolder<IEventHandle> ev) { + Y_VERIFY(!PendingEvent); + if (!SelfActorId) { // process bootstrap message, extract actor ids + Y_VERIFY(ev->GetTypeRewrite() == TEvents::TSystem::Bootstrap); + SelfActorId = ev->Recipient; + ParentActorId = ev->Sender; + } else { // process further messages + PendingEvent = std::move(ev); + } + + // prepare actor context for in-coroutine use + TActivationContext *ac = TlsActivationContext; + TlsActivationContext = nullptr; + TActorContext ctx(ac->Mailbox, ac->ExecutorThread, ac->EventStart, SelfActorId); + ActorContext = &ctx; + + Resume(); + + // drop actor context + TlsActivationContext = ac; + ActorContext = nullptr; + + return Finished; + } + + void TActorCoroImpl::Resume() { + // save caller context for a later return + Y_VERIFY(!ActorSystemContext); + TExceptionSafeContext actorSystemContext; + ActorSystemContext = &actorSystemContext; + + // go to actor coroutine + BeforeResume(); + ActorSystemContext->SwitchTo(&FiberContext); + + // check for stack overflow +#ifndef NDEBUG + const char* p; +#if STACK_GROW_DOWN + p = Stack.Begin(); +#else + p = Stack.End() - StackOverflowGap; +#endif + Y_VERIFY_DEBUG(memcmp(p, GoodStack, StackOverflowGap) == 0); +#endif + } + + void TActorCoroImpl::DoRun() { + try { + if (ActorContext) { // ActorContext may be nullptr here if the destructor was invoked before bootstrapping + Y_VERIFY(!PendingEvent); + Run(); + } + } catch (const TPoisonPillException& /*ex*/) { + if (!AllowUnhandledPoisonPill) { + Y_FAIL("unhandled TPoisonPillException"); + } + } catch (const TDtorException& /*ex*/) { + if (!AllowUnhandledDtor) { + Y_FAIL("unhandled TDtorException"); + } + } catch (const std::exception& ex) { + Y_FAIL("unhandled exception of type %s", TypeName(ex).data()); + } catch (...) { + Y_FAIL("unhandled exception of type not derived from std::exception"); + } + Finished = true; + ReturnToActorSystem(); + } + + void TActorCoroImpl::ReturnToActorSystem() { + TExceptionSafeContext* returnContext = std::exchange(ActorSystemContext, nullptr); + Y_VERIFY(returnContext); + FiberContext.SwitchTo(returnContext); + if (!PendingEvent) { + // we have returned from the actor system and it kindly asks us to terminate the coroutine as it is being + // stopped + throw TDtorException(); + } + } + +} diff --git a/library/cpp/actors/core/actor_coroutine.h b/library/cpp/actors/core/actor_coroutine.h new file mode 100644 index 0000000000..6bcb768eaf --- /dev/null +++ b/library/cpp/actors/core/actor_coroutine.h @@ -0,0 +1,174 @@ +#pragma once + +#include <util/system/context.h> +#include <util/system/filemap.h> + +#include "actor_bootstrapped.h" +#include "executor_thread.h" +#include "event_local.h" + +namespace NActors { + + class TActorCoro; + + class TActorCoroImpl : public ITrampoLine { + TMappedAllocation Stack; + bool AllowUnhandledPoisonPill; + bool AllowUnhandledDtor; + TContClosure FiberClosure; + TExceptionSafeContext FiberContext; + TExceptionSafeContext* ActorSystemContext = nullptr; + THolder<IEventHandle> PendingEvent; + bool Finished = false; + ui64 WaitCookie = 0; + TActorContext *ActorContext = nullptr; + + protected: + TActorIdentity SelfActorId = TActorIdentity(TActorId()); + TActorId ParentActorId; + + private: + template <typename TFirstEvent, typename... TOtherEvents> + struct TIsOneOf: public TIsOneOf<TOtherEvents...> { + bool operator()(IEventHandle& ev) const { + return ev.GetTypeRewrite() == TFirstEvent::EventType || TIsOneOf<TOtherEvents...>()(ev); + } + }; + + template <typename TSingleEvent> + struct TIsOneOf<TSingleEvent> { + bool operator()(IEventHandle& ev) const { + return ev.GetTypeRewrite() == TSingleEvent::EventType; + } + }; + + struct TEvCoroTimeout : TEventLocal<TEvCoroTimeout, TEvents::TSystem::CoroTimeout> {}; + + protected: + struct TPoisonPillException : yexception {}; + struct TDtorException : yexception {}; + + public: + TActorCoroImpl(size_t stackSize, bool allowUnhandledPoisonPill = false, bool allowUnhandledDtor = false); + // specify stackSize explicitly for each actor; don't forget about overflow control gap + + virtual ~TActorCoroImpl(); + + virtual void Run() = 0; + + virtual void BeforeResume() {} + + // Handle all events that are not expected in wait loops. + virtual void ProcessUnexpectedEvent(TAutoPtr<IEventHandle> ev) = 0; + + // Release execution ownership and wait for some event to arrive. When PoisonPill event is received, then + // TPoisonPillException is thrown. + THolder<IEventHandle> WaitForEvent(TInstant deadline = TInstant::Max()); + + // Wait for specific event set by filter functor. Function returns first event that matches filter. On any other + // kind of event ProcessUnexpectedEvent() is called. + // + // Example: WaitForSpecificEvent([](IEventHandle& ev) { return ev.Cookie == 42; }); + template <typename TFunc> + THolder<IEventHandle> WaitForSpecificEvent(TFunc&& filter, TInstant deadline = TInstant::Max()) { + for (;;) { + if (THolder<IEventHandle> event = WaitForEvent(deadline); !event) { + return nullptr; + } else if (filter(*event)) { + return event; + } else { + ProcessUnexpectedEvent(event); + } + } + } + + // Wait for specific event or set of events. Function returns first event that matches enlisted type. On any other + // kind of event ProcessUnexpectedEvent() is called. + // + // Example: WaitForSpecificEvent<TEvReadResult, TEvFinished>(); + template <typename TFirstEvent, typename TSecondEvent, typename... TOtherEvents> + THolder<IEventHandle> WaitForSpecificEvent(TInstant deadline = TInstant::Max()) { + TIsOneOf<TFirstEvent, TSecondEvent, TOtherEvents...> filter; + return WaitForSpecificEvent(filter, deadline); + } + + // Wait for single specific event. + template <typename TEventType> + THolder<typename TEventType::THandle> WaitForSpecificEvent(TInstant deadline = TInstant::Max()) { + auto filter = [](IEventHandle& ev) { + return ev.GetTypeRewrite() == TEventType::EventType; + }; + THolder<IEventHandle> event = WaitForSpecificEvent(filter, deadline); + return THolder<typename TEventType::THandle>(static_cast<typename TEventType::THandle*>(event ? event.Release() : nullptr)); + } + + protected: // Actor System compatibility section + const TActorContext& GetActorContext() const; + TActorSystem *GetActorSystem() const { return GetActorContext().ExecutorThread.ActorSystem; } + TInstant Now() const { return GetActorContext().Now(); } + + bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) { + return GetActorContext().Send(recipient, ev, flags, cookie, std::move(traceId)); + } + + template <typename TEvent> + bool Send(const TActorId& recipient, THolder<TEvent> ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) { + return GetActorContext().Send(recipient, ev.Release(), flags, cookie, std::move(traceId)); + } + + bool Send(TAutoPtr<IEventHandle> ev); + + void Schedule(TDuration delta, IEventBase* ev, ISchedulerCookie* cookie = nullptr) { + return GetActorContext().Schedule(delta, ev, cookie); + } + + void Schedule(TInstant deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) { + return GetActorContext().Schedule(deadline, ev, cookie); + } + + void Schedule(TMonotonic deadline, IEventBase* ev, ISchedulerCookie* cookie = nullptr) { + return GetActorContext().Schedule(deadline, ev, cookie); + } + + TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>()) { + return GetActorContext().Register(actor, mailboxType, poolId); + } + + TActorId RegisterWithSameMailbox(IActor* actor) { + return GetActorContext().RegisterWithSameMailbox(actor); + } + + private: + friend class TActorCoro; + bool ProcessEvent(THolder<IEventHandle> ev); + + private: + /* Resume() function goes to actor coroutine context and continues (or starts) to execute it until actor finishes + * his job or it is blocked on WaitForEvent. Then the function returns. */ + void Resume(); + void ReturnToActorSystem(); + void DoRun() override final; + }; + + class TActorCoro : public IActor { + THolder<TActorCoroImpl> Impl; + + public: + TActorCoro(THolder<TActorCoroImpl> impl, ui32 activityType = IActor::ACTORLIB_COMMON) + : IActor(static_cast<TReceiveFunc>(&TActorCoro::StateFunc), activityType) + , Impl(std::move(impl)) + {} + + TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parent) override { + return new IEventHandle(TEvents::TSystem::Bootstrap, 0, self, parent, {}, 0); + } + + private: + STATEFN(StateFunc) { + if (Impl->ProcessEvent(ev)) { + PassAway(); + } + } + }; + +} diff --git a/library/cpp/actors/core/actor_coroutine_ut.cpp b/library/cpp/actors/core/actor_coroutine_ut.cpp new file mode 100644 index 0000000000..951512b877 --- /dev/null +++ b/library/cpp/actors/core/actor_coroutine_ut.cpp @@ -0,0 +1,141 @@ +#include "actor_coroutine.h" +#include "actorsystem.h" +#include "executor_pool_basic.h" +#include "scheduler_basic.h" +#include "events.h" +#include "event_local.h" +#include "hfunc.h" +#include <library/cpp/testing/unittest/registar.h> + +#include <util/system/sanitizers.h> + +using namespace NActors; + +Y_UNIT_TEST_SUITE(ActorCoro) { + enum { + Begin = EventSpaceBegin(TEvents::ES_USERSPACE), + Request, + Response, + Enough + }; + + struct TEvRequest: public TEventLocal<TEvRequest, Request> { + }; + + struct TEvResponse: public TEventLocal<TEvResponse, Response> { + }; + + struct TEvEnough: public TEventLocal<TEvEnough, Enough> { + }; + + class TBasicResponderActor: public TActorBootstrapped<TBasicResponderActor> { + TDeque<TActorId> RespondTo; + + public: + TBasicResponderActor() { + } + + void Bootstrap(const TActorContext& /*ctx*/) { + Become(&TBasicResponderActor::StateFunc); + } + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvRequest, Handle); + HFunc(TEvents::TEvWakeup, Handle); + HFunc(TEvents::TEvPoisonPill, Handle); + } + } + + void Handle(TEvRequest::TPtr& ev, const TActorContext& ctx) { + RespondTo.push_back(ev->Sender); + ctx.Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup); + } + + void Handle(TEvents::TEvWakeup::TPtr& /*ev*/, const TActorContext& ctx) { + ctx.Send(RespondTo.front(), new TEvResponse()); + RespondTo.pop_front(); + } + + void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) { + Die(ctx); + } + }; + + class TCoroActor: public TActorCoroImpl { + TManualEvent& DoneEvent; + TAtomic& ItemsProcessed; + bool Finish; + + public: + TCoroActor(TManualEvent& doneEvent, TAtomic& itemsProcessed) + : TActorCoroImpl(1 << 20) + , DoneEvent(doneEvent) + , ItemsProcessed(itemsProcessed) + , Finish(false) + { + } + + void Run() override { + TActorId child = GetActorContext().Register(new TBasicResponderActor); + ui32 itemsProcessed = 0; + try { + while (!Finish) { + GetActorContext().Send(child, new TEvRequest()); + THolder<IEventHandle> resp = WaitForSpecificEvent<TEvResponse>(); + UNIT_ASSERT_EQUAL(resp->GetTypeRewrite(), TEvResponse::EventType); + ++itemsProcessed; + } + } catch (const TPoisonPillException& /*ex*/) { + } + GetActorContext().Send(child, new TEvents::TEvPoisonPill); + + AtomicSet(ItemsProcessed, itemsProcessed); + DoneEvent.Signal(); + } + + void ProcessUnexpectedEvent(TAutoPtr<IEventHandle> event) override { + if (event->GetTypeRewrite() == Enough) { + Finish = true; + } + } + }; + + void Check(THolder<IEventBase> && message) { + THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>(); + setup->NodeId = 0; + setup->ExecutorsCount = 1; + setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]); + for (ui32 i = 0; i < setup->ExecutorsCount; ++i) { + setup->Executors[i] = new TBasicExecutorPool(i, 5, 10, "basic"); + } + setup->Scheduler = new TBasicSchedulerThread; + + TActorSystem actorSystem(setup); + + actorSystem.Start(); + + TManualEvent doneEvent; + TAtomic itemsProcessed = 0; + TActorId actor = actorSystem.Register(new TActorCoro(MakeHolder<TCoroActor>(doneEvent, itemsProcessed))); + NanoSleep(3UL * 1000 * 1000 * 1000); + actorSystem.Send(actor, message.Release()); + doneEvent.WaitI(); + + UNIT_ASSERT(AtomicGet(itemsProcessed) >= 2); + + actorSystem.Stop(); + } + + Y_UNIT_TEST(Basic) { + if (NSan::TSanIsOn()) { + // TODO https://st.yandex-team.ru/DEVTOOLS-3154 + return; + } + Check(MakeHolder<TEvEnough>()); + } + + Y_UNIT_TEST(PoisonPill) { + Check(MakeHolder<TEvents::TEvPoisonPill>()); + } +} diff --git a/library/cpp/actors/core/actor_ut.cpp b/library/cpp/actors/core/actor_ut.cpp new file mode 100644 index 0000000000..e1b765ec72 --- /dev/null +++ b/library/cpp/actors/core/actor_ut.cpp @@ -0,0 +1,578 @@ +#include "actor.cpp" +#include "events.h" +#include "actorsystem.h" +#include "executor_pool_basic.h" +#include "scheduler_basic.h" +#include "actor_bootstrapped.h" + +#include <library/cpp/actors/util/threadparkpad.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/algorithm.h> +#include <util/system/atomic.h> +#include <util/system/rwlock.h> +#include <util/system/hp_timer.h> + +using namespace NActors; + +struct TTestEndDecorator : TDecorator { + TThreadParkPad* Pad; + TAtomic* ActorsAlive; + + TTestEndDecorator(THolder<IActor>&& actor, TThreadParkPad* pad, TAtomic* actorsAlive) + : TDecorator(std::move(actor)) + , Pad(pad) + , ActorsAlive(actorsAlive) + { + AtomicIncrement(*ActorsAlive); + } + + ~TTestEndDecorator() { + if (AtomicDecrement(*ActorsAlive) == 0) { + Pad->Unpark(); + } + } +}; + +Y_UNIT_TEST_SUITE(ActorBenchmark) { + static constexpr bool DefaultNoRealtime = true; + static constexpr ui32 DefaultSpinThreshold = 1000000; + static constexpr ui32 TotalEventsAmount = 1000; + + class TDummyActor : public TActor<TDummyActor> { + public: + TDummyActor() : TActor<TDummyActor>(&TDummyActor::StateFunc) {} + STFUNC(StateFunc) { + (void)ev; + (void)ctx; + } + }; + + enum ERole { + Leader, + Follower + }; + + class TSendReceiveActor : public TActorBootstrapped<TSendReceiveActor> { + public: + static constexpr auto ActorActivityType() { + return ACTORLIB_COMMON; + } + + TSendReceiveActor(double* elapsedTime, TActorId receiver, bool allocation, ERole role, ui32 neighbours = 0) + : EventsCounter(TotalEventsAmount) + , ElapsedTime(elapsedTime) + , Receiver(receiver) + , AllocatesMemory(allocation) + , Role(role) + , MailboxNeighboursCount(neighbours) + {} + + void Bootstrap(const TActorContext &ctx) { + if (!Receiver) { + this->Receiver = SelfId(); + } else { + EventsCounter /= 2; // We want to measure CPU requirement for one-way send + } + Timer.Reset(); + Become(&TThis::StateFunc); + for (ui32 i = 0; i < MailboxNeighboursCount; ++i) { + ctx.RegisterWithSameMailbox(new TDummyActor()); + } + if (Role == Leader) { + Send(Receiver, new TEvents::TEvPing()); + } + } + + STATEFN(StateFunc) { + if (EventsCounter == 0 && ElapsedTime != nullptr) { + *ElapsedTime = Timer.Passed() / TotalEventsAmount; + PassAway(); + } + + if (AllocatesMemory) { + Send(ev->Sender, new TEvents::TEvPing()); + } else { + std::swap(*const_cast<TActorId*>(&ev->Sender), *const_cast<TActorId*>(&ev->Recipient)); + ev->DropRewrite(); + TActivationContext::Send(ev.Release()); + } + EventsCounter--; + } + + private: + THPTimer Timer; + ui64 EventsCounter; + double* ElapsedTime; + TActorId Receiver; + bool AllocatesMemory; + ERole Role; + ui32 MailboxNeighboursCount; + }; + + void AddBasicPool(THolder<TActorSystemSetup>& setup, ui32 threads, bool activateEveryEvent) { + TBasicExecutorPoolConfig basic; + basic.PoolId = setup->GetExecutorsCount(); + basic.PoolName = TStringBuilder() << "b" << basic.PoolId; + basic.Threads = threads; + basic.SpinThreshold = DefaultSpinThreshold; + basic.TimePerMailbox = TDuration::Hours(1); + if (activateEveryEvent) { + basic.EventsPerMailbox = 1; + } else { + basic.EventsPerMailbox = Max<ui32>(); + } + setup->CpuManager.Basic.emplace_back(std::move(basic)); + } + + void AddUnitedPool(THolder<TActorSystemSetup>& setup, ui32 concurrency, bool activateEveryEvent) { + TUnitedExecutorPoolConfig united; + united.PoolId = setup->GetExecutorsCount(); + united.PoolName = TStringBuilder() << "u" << united.PoolId; + united.Concurrency = concurrency; + united.TimePerMailbox = TDuration::Hours(1); + if (activateEveryEvent) { + united.EventsPerMailbox = 1; + } else { + united.EventsPerMailbox = Max<ui32>(); + } + setup->CpuManager.United.emplace_back(std::move(united)); + } + + THolder<TActorSystemSetup> GetActorSystemSetup(ui32 unitedCpuCount, bool preemption) { + auto setup = MakeHolder<NActors::TActorSystemSetup>(); + setup->NodeId = 1; + setup->CpuManager.UnitedWorkers.CpuCount = unitedCpuCount; + setup->CpuManager.UnitedWorkers.SpinThresholdUs = DefaultSpinThreshold; + setup->CpuManager.UnitedWorkers.NoRealtime = DefaultNoRealtime; + if (preemption) { + setup->CpuManager.UnitedWorkers.PoolLimitUs = 500; + setup->CpuManager.UnitedWorkers.EventLimitUs = 100; + setup->CpuManager.UnitedWorkers.LimitPrecisionUs = 100; + } else { + setup->CpuManager.UnitedWorkers.PoolLimitUs = 100'000'000'000; + setup->CpuManager.UnitedWorkers.EventLimitUs = 10'000'000'000; + setup->CpuManager.UnitedWorkers.LimitPrecisionUs = 10'000'000'000; + } + setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0)); + return setup; + } + + enum class EPoolType { + Basic, + United + }; + + THolder<TActorSystemSetup> InitActorSystemSetup(EPoolType poolType, ui32 poolsCount, ui32 threads, bool activateEveryEvent, bool preemption) { + if (poolType == EPoolType::Basic) { + THolder<TActorSystemSetup> setup = GetActorSystemSetup(0, false); + for (ui32 i = 0; i < poolsCount; ++i) { + AddBasicPool(setup, threads, activateEveryEvent); + } + return setup; + } else if (poolType == EPoolType::United) { + THolder<TActorSystemSetup> setup = GetActorSystemSetup(poolsCount * threads, preemption); + for (ui32 i = 0; i < poolsCount; ++i) { + AddUnitedPool(setup, threads, activateEveryEvent); + } + return setup; + } + Y_FAIL(); + } + + double BenchSendReceive(bool allocation, NActors::TMailboxType::EType mType, EPoolType poolType) { + THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, 1, false, false); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + TThreadParkPad pad; + TAtomic actorsAlive = 0; + double elapsedTime = 0; + THolder<IActor> endActor{ + new TTestEndDecorator(THolder( + new TSendReceiveActor(&elapsedTime, {}, allocation, Leader)), &pad, &actorsAlive)}; + + actorSystem.Register(endActor.Release(), mType); + + pad.Park(); + actorSystem.Stop(); + + return 1e9 * elapsedTime; + } + + double BenchSendActivateReceive(ui32 poolsCount, ui32 threads, bool allocation, EPoolType poolType) { + THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, poolsCount, threads, true, false); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + TThreadParkPad pad; + TAtomic actorsAlive = 0; + double elapsedTime = 0; + ui32 followerPoolId = 0; + + ui32 leaderPoolId = poolsCount == 1 ? 0 : 1; + TActorId followerId = actorSystem.Register( + new TSendReceiveActor(nullptr, {}, allocation, Follower), TMailboxType::HTSwap, followerPoolId); + THolder<IActor> leader{ + new TTestEndDecorator(THolder( + new TSendReceiveActor(&elapsedTime, followerId, allocation, Leader)), &pad, &actorsAlive)}; + actorSystem.Register(leader.Release(), TMailboxType::HTSwap, leaderPoolId); + + pad.Park(); + actorSystem.Stop(); + + return 1e9 * elapsedTime; + } + + double BenchSendActivateReceiveWithMailboxNeighbours(ui32 MailboxNeighbourActors, EPoolType poolType) { + THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, 1, false, false); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + TThreadParkPad pad; + TAtomic actorsAlive = 0; + double elapsedTime = 0; + + TActorId followerId = actorSystem.Register( + new TSendReceiveActor(nullptr, {}, false, Follower, MailboxNeighbourActors), TMailboxType::HTSwap); + THolder<IActor> leader{ + new TTestEndDecorator(THolder( + new TSendReceiveActor(&elapsedTime, followerId, false, Leader, MailboxNeighbourActors)), &pad, &actorsAlive)}; + actorSystem.Register(leader.Release(), TMailboxType::HTSwap); + + pad.Park(); + actorSystem.Stop(); + + return 1e9 * elapsedTime; + } + + double BenchContentedThreads(ui32 threads, ui32 actorsPairsCount, EPoolType poolType) { + THolder<TActorSystemSetup> setup = InitActorSystemSetup(poolType, 1, threads, true, false); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + TThreadParkPad pad; + TAtomic actorsAlive = 0; + THPTimer Timer; + + TVector<double> dummy(actorsPairsCount); + Timer.Reset(); + for (ui32 i = 0; i < actorsPairsCount; ++i) { + ui32 followerPoolId = 0; + ui32 leaderPoolId = 0; + TActorId followerId = actorSystem.Register( + new TSendReceiveActor(nullptr, {}, true, Follower), TMailboxType::HTSwap, followerPoolId); + THolder<IActor> leader{ + new TTestEndDecorator(THolder( + new TSendReceiveActor(&dummy[i], followerId, true, Leader)), &pad, &actorsAlive)}; + actorSystem.Register(leader.Release(), TMailboxType::HTSwap, leaderPoolId); + } + + pad.Park(); + auto elapsedTime = Timer.Passed() / TotalEventsAmount; + actorSystem.Stop(); + + return 1e9 * elapsedTime; + } + + auto Mean(const TVector<double>& data) { + return Accumulate(data.begin(), data.end(), 0.0) / data.size(); + } + + auto Deviation(const TVector<double>& data) { + auto mean = Mean(data); + double deviation = 0.0; + for (const auto& x : data) { + deviation += (x - mean) * (x - mean); + } + return std::sqrt(deviation / data.size()); + } + + struct TStats { + double Mean; + double Deviation; + TString ToString() { + return TStringBuilder() << Mean << " ± " << Deviation << " ns " << std::ceil(Deviation / Mean * 1000) / 10.0 << "%"; + } + }; + + template <typename Func> + TStats CountStats(Func func, ui32 itersCount = 5) { + TVector<double> elapsedTimes; + for (ui32 i = 0; i < itersCount; ++i) { + auto elapsedTime = func(); + elapsedTimes.push_back(elapsedTime); + } + return {Mean(elapsedTimes), Deviation(elapsedTimes)}; + } + + TVector<NActors::TMailboxType::EType> MailboxTypes = { + TMailboxType::Simple, + TMailboxType::Revolving, + TMailboxType::HTSwap, + TMailboxType::ReadAsFilled, + TMailboxType::TinyReadAsFilled + }; + + Y_UNIT_TEST(SendReceive1Pool1ThreadAlloc) { + for (const auto& mType : MailboxTypes) { + auto stats = CountStats([mType] { + return BenchSendReceive(true, mType, EPoolType::Basic); + }); + Cerr << stats.ToString() << " " << mType << Endl; + } + } + + Y_UNIT_TEST(SendReceive1Pool1ThreadAllocUnited) { + for (const auto& mType : MailboxTypes) { + auto stats = CountStats([mType] { + return BenchSendReceive(true, mType, EPoolType::United); + }); + Cerr << stats.ToString() << " " << mType << Endl; + } + } + + Y_UNIT_TEST(SendReceive1Pool1ThreadNoAlloc) { + for (const auto& mType : MailboxTypes) { + auto stats = CountStats([mType] { + return BenchSendReceive(false, mType, EPoolType::Basic); + }); + Cerr << stats.ToString() << " " << mType << Endl; + } + } + + Y_UNIT_TEST(SendReceive1Pool1ThreadNoAllocUnited) { + for (const auto& mType : MailboxTypes) { + auto stats = CountStats([mType] { + return BenchSendReceive(false, mType, EPoolType::United); + }); + Cerr << stats.ToString() << " " << mType << Endl; + } + } + + Y_UNIT_TEST(SendActivateReceive1Pool1ThreadAlloc) { + auto stats = CountStats([] { + return BenchSendActivateReceive(1, 1, true, EPoolType::Basic); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive1Pool1ThreadAllocUnited) { + auto stats = CountStats([] { + return BenchSendActivateReceive(1, 1, true, EPoolType::United); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive1Pool1ThreadNoAlloc) { + auto stats = CountStats([] { + return BenchSendActivateReceive(1, 1, false, EPoolType::Basic); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive1Pool1ThreadNoAllocUnited) { + auto stats = CountStats([] { + return BenchSendActivateReceive(1, 1, false, EPoolType::United); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsAlloc) { + auto stats = CountStats([] { + return BenchSendActivateReceive(1, 2, true, EPoolType::Basic); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsAllocUnited) { + auto stats = CountStats([] { + return BenchSendActivateReceive(1, 2, true, EPoolType::United); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsNoAlloc) { + auto stats = CountStats([] { + return BenchSendActivateReceive(1, 2, false, EPoolType::Basic); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsNoAllocUnited) { + auto stats = CountStats([] { + return BenchSendActivateReceive(1, 2, false, EPoolType::United); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive2Pool1ThreadAlloc) { + auto stats = CountStats([] { + return BenchSendActivateReceive(2, 1, true, EPoolType::Basic); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive2Pool1ThreadAllocUnited) { + auto stats = CountStats([] { + return BenchSendActivateReceive(2, 1, true, EPoolType::United); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive2Pool1ThreadNoAlloc) { + auto stats = CountStats([] { + return BenchSendActivateReceive(2, 1, false, EPoolType::Basic); + }); + Cerr << stats.ToString() << Endl; + } + + Y_UNIT_TEST(SendActivateReceive2Pool1ThreadNoAllocUnited) { + auto stats = CountStats([] { + return BenchSendActivateReceive(2, 1, false, EPoolType::United); + }); + Cerr << stats.ToString() << Endl; + } + + void RunBenchContentedThreads(ui32 threads, EPoolType poolType) { + for (ui32 actorPairs = 1; actorPairs <= 2 * threads; actorPairs++) { + auto stats = CountStats([threads, actorPairs, poolType] { + return BenchContentedThreads(threads, actorPairs, poolType); + }); + Cerr << stats.ToString() << " actorPairs: " << actorPairs << Endl; + } + } + + Y_UNIT_TEST(SendActivateReceive1Pool1Threads) { RunBenchContentedThreads(1, EPoolType::Basic); } + Y_UNIT_TEST(SendActivateReceive1Pool1ThreadsUnited) { RunBenchContentedThreads(1, EPoolType::United); } + Y_UNIT_TEST(SendActivateReceive1Pool2Threads) { RunBenchContentedThreads(2, EPoolType::Basic); } + Y_UNIT_TEST(SendActivateReceive1Pool2ThreadsUnited) { RunBenchContentedThreads(2, EPoolType::United); } + Y_UNIT_TEST(SendActivateReceive1Pool3Threads) { RunBenchContentedThreads(3, EPoolType::Basic); } + Y_UNIT_TEST(SendActivateReceive1Pool3ThreadsUnited) { RunBenchContentedThreads(3, EPoolType::United); } + Y_UNIT_TEST(SendActivateReceive1Pool4Threads) { RunBenchContentedThreads(4, EPoolType::Basic); } + Y_UNIT_TEST(SendActivateReceive1Pool4ThreadsUnited) { RunBenchContentedThreads(4, EPoolType::United); } + Y_UNIT_TEST(SendActivateReceive1Pool5Threads) { RunBenchContentedThreads(5, EPoolType::Basic); } + Y_UNIT_TEST(SendActivateReceive1Pool5ThreadsUnited) { RunBenchContentedThreads(5, EPoolType::United); } + Y_UNIT_TEST(SendActivateReceive1Pool6Threads) { RunBenchContentedThreads(6, EPoolType::Basic); } + Y_UNIT_TEST(SendActivateReceive1Pool6ThreadsUnited) { RunBenchContentedThreads(6, EPoolType::United); } + Y_UNIT_TEST(SendActivateReceive1Pool7Threads) { RunBenchContentedThreads(7, EPoolType::Basic); } + Y_UNIT_TEST(SendActivateReceive1Pool7ThreadsUnited) { RunBenchContentedThreads(7, EPoolType::United); } + Y_UNIT_TEST(SendActivateReceive1Pool8Threads) { RunBenchContentedThreads(8, EPoolType::Basic); } + Y_UNIT_TEST(SendActivateReceive1Pool8ThreadsUnited) { RunBenchContentedThreads(8, EPoolType::United); } + + Y_UNIT_TEST(SendActivateReceiveWithMailboxNeighbours) { + TVector<ui32> NeighbourActors = {0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256}; + for (const auto& neighbour : NeighbourActors) { + auto stats = CountStats([neighbour] { + return BenchSendActivateReceiveWithMailboxNeighbours(neighbour, EPoolType::Basic); + }); + Cerr << stats.ToString() << " neighbourActors: " << neighbour << Endl; + } + } + + Y_UNIT_TEST(SendActivateReceiveWithMailboxNeighboursUnited) { + TVector<ui32> NeighbourActors = {0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256}; + for (const auto& neighbour : NeighbourActors) { + auto stats = CountStats([neighbour] { + return BenchSendActivateReceiveWithMailboxNeighbours(neighbour, EPoolType::United); + }); + Cerr << stats.ToString() << " neighbourActors: " << neighbour << Endl; + } + } +} + +Y_UNIT_TEST_SUITE(TestDecorator) { + struct TPingDecorator : TDecorator { + TAutoPtr<IEventHandle> SavedEvent = nullptr; + ui64* Counter; + + TPingDecorator(THolder<IActor>&& actor, ui64* counter) + : TDecorator(std::move(actor)) + , Counter(counter) + { + } + + bool DoBeforeReceiving(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) override { + *Counter += 1; + if (ev->Type != TEvents::THelloWorld::Pong) { + TAutoPtr<IEventHandle> pingEv = new IEventHandle(SelfId(), SelfId(), new TEvents::TEvPing()); + SavedEvent = ev; + Actor->Receive(pingEv, ctx); + } else { + Actor->Receive(SavedEvent, ctx); + } + return false; + } + }; + + struct TPongDecorator : TDecorator { + ui64* Counter; + + TPongDecorator(THolder<IActor>&& actor, ui64* counter) + : TDecorator(std::move(actor)) + , Counter(counter) + { + } + + bool DoBeforeReceiving(TAutoPtr<IEventHandle>& ev, const TActorContext&) override { + *Counter += 1; + if (ev->Type == TEvents::THelloWorld::Ping) { + TAutoPtr<IEventHandle> pongEv = new IEventHandle(SelfId(), SelfId(), new TEvents::TEvPong()); + Send(SelfId(), new TEvents::TEvPong()); + return false; + } + return true; + } + }; + + struct TTestActor : TActorBootstrapped<TTestActor> { + static constexpr char ActorName[] = "TestActor"; + + void Bootstrap() + { + const auto& activityTypeIndex = GetActivityType(); + Y_ENSURE(activityTypeIndex < GetActivityTypeCount()); + Y_ENSURE(GetActivityTypeName(activityTypeIndex) == "TestActor"); + PassAway(); + } + }; + + Y_UNIT_TEST(Basic) { + THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>(); + setup->NodeId = 0; + setup->ExecutorsCount = 1; + setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]); + for (ui32 i = 0; i < setup->ExecutorsCount; ++i) { + setup->Executors[i] = new TBasicExecutorPool(i, 1, 10, "basic"); + } + setup->Scheduler = new TBasicSchedulerThread; + + TActorSystem actorSystem(setup); + actorSystem.Start(); + + THolder<IActor> innerActor = MakeHolder<TTestActor>(); + ui64 pongCounter = 0; + THolder<IActor> pongActor = MakeHolder<TPongDecorator>(std::move(innerActor), &pongCounter); + ui64 pingCounter = 0; + THolder<IActor> pingActor = MakeHolder<TPingDecorator>(std::move(pongActor), &pingCounter); + + TThreadParkPad pad; + TAtomic actorsAlive = 0; + + THolder<IActor> endActor = MakeHolder<TTestEndDecorator>(std::move(pingActor), &pad, &actorsAlive); + actorSystem.Register(endActor.Release(), TMailboxType::HTSwap); + + pad.Park(); + actorSystem.Stop(); + UNIT_ASSERT(pongCounter == 2 && pingCounter == 2); + } + + Y_UNIT_TEST(LocalProcessKey) { + static constexpr char ActorName[] = "TestActor"; + + UNIT_ASSERT((TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetName(IActor::INTERCONNECT_PROXY_TCP) == "INTERCONNECT_PROXY_TCP")); + + UNIT_ASSERT((TLocalProcessKey<TActorActivityTag, ActorName>::GetName() == ActorName)); + UNIT_ASSERT((TEnumProcessKey<TActorActivityTag, IActor::EActorActivity>::GetIndex(IActor::INTERCONNECT_PROXY_TCP) == IActor::INTERCONNECT_PROXY_TCP)); + } +} diff --git a/library/cpp/actors/core/actorid.cpp b/library/cpp/actors/core/actorid.cpp new file mode 100644 index 0000000000..ccda035eac --- /dev/null +++ b/library/cpp/actors/core/actorid.cpp @@ -0,0 +1,34 @@ +#include "actorid.h" +#include <util/string/builder.h> +#include <util/string/cast.h> + +namespace NActors { + void TActorId::Out(IOutputStream& o) const { + o << "[" << NodeId() << ":" << LocalId() << ":" << Hint() << "]"; + } + + TString TActorId::ToString() const { + TString x; + TStringOutput o(x); + Out(o); + return x; + } + + bool TActorId::Parse(const char* buf, ui32 sz) { + if (sz < 4 || buf[0] != '[' || buf[sz - 1] != ']') + return false; + + size_t semicolons[2]; + TStringBuf str(buf, sz); + semicolons[0] = str.find(':', 1); + if (semicolons[0] == TStringBuf::npos) + return false; + semicolons[1] = str.find(':', semicolons[0] + 1); + if (semicolons[1] == TStringBuf::npos) + return false; + + bool success = TryFromString(buf + 1, semicolons[0] - 1, Raw.N.NodeId) && TryFromString(buf + semicolons[0] + 1, semicolons[1] - semicolons[0] - 1, Raw.N.LocalId) && TryFromString(buf + semicolons[1] + 1, sz - semicolons[1] - 2, Raw.N.Hint); + + return success; + } +} diff --git a/library/cpp/actors/core/actorid.h b/library/cpp/actors/core/actorid.h new file mode 100644 index 0000000000..d972b1a0ff --- /dev/null +++ b/library/cpp/actors/core/actorid.h @@ -0,0 +1,196 @@ +#pragma once + +#include "defs.h" +#include <util/stream/output.h> // for IOutputStream +#include <util/generic/hash.h> + +namespace NActors { + // used as global uniq address of actor + // also could be used to transport service id (12 byte strings placed in hint-localid) + // highest 1 bit of node - mark of service id + // next 11 bits of node-id - pool id + // next 20 bits - node id itself + + struct TActorId { + static constexpr ui32 MaxServiceIDLength = 12; + static constexpr ui32 MaxPoolID = 0x000007FF; + static constexpr ui32 MaxNodeId = 0x000FFFFF; + static constexpr ui32 PoolIndexShift = 20; + static constexpr ui32 PoolIndexMask = MaxPoolID << PoolIndexShift; + static constexpr ui32 ServiceMask = 0x80000000; + static constexpr ui32 NodeIdMask = MaxNodeId; + + private: + union { + struct { + ui64 LocalId; + ui32 Hint; + ui32 NodeId; + } N; + + struct { + ui64 X1; + ui64 X2; + } X; + + ui8 Buf[16]; + } Raw; + + public: + TActorId() noexcept { + Raw.X.X1 = 0; + Raw.X.X2 = 0; + } + + explicit TActorId(ui32 nodeId, ui32 poolId, ui64 localId, ui32 hint) noexcept { + Y_VERIFY_DEBUG(poolId <= MaxPoolID); + Raw.N.LocalId = localId; + Raw.N.Hint = hint; + Raw.N.NodeId = nodeId | (poolId << PoolIndexShift); + } + + explicit TActorId(ui32 nodeId, const TStringBuf& x) noexcept { + Y_VERIFY(x.size() <= MaxServiceIDLength, "service id is too long"); + Raw.N.LocalId = 0; + Raw.N.Hint = 0; + Raw.N.NodeId = nodeId | ServiceMask; + memcpy(Raw.Buf, x.data(), x.size()); + } + + explicit TActorId(ui64 x1, ui64 x2) noexcept { + Raw.X.X1 = x1; + Raw.X.X2 = x2; + } + + explicit operator bool() const noexcept { + return Raw.X.X1 != 0 || Raw.X.X2 != 0; + } + + ui64 LocalId() const noexcept { + return Raw.N.LocalId; + } + + ui32 Hint() const noexcept { + return Raw.N.Hint; + } + + ui32 NodeId() const noexcept { + return Raw.N.NodeId & NodeIdMask; + } + + bool IsService() const noexcept { + return (Raw.N.NodeId & ServiceMask); + } + + TStringBuf ServiceId() const noexcept { + Y_VERIFY_DEBUG(IsService()); + return TStringBuf((const char*)Raw.Buf, MaxServiceIDLength); + } + + static ui32 PoolIndex(ui32 nodeid) noexcept { + return ((nodeid & PoolIndexMask) >> PoolIndexShift); + } + + ui32 PoolID() const noexcept { + return PoolIndex(Raw.N.NodeId); + } + + ui64 RawX1() const noexcept { + return Raw.X.X1; + } + + ui64 RawX2() const noexcept { + return Raw.X.X2; + } + + bool operator<(const TActorId& x) const noexcept { + const ui64 s1 = Raw.X.X1; + const ui64 s2 = Raw.X.X2; + const ui64 x1 = x.Raw.X.X1; + const ui64 x2 = x.Raw.X.X2; + + return (s1 != x1) ? (s1 < x1) : (s2 < x2); + } + + bool operator!=(const TActorId& x) const noexcept { + return Raw.X.X1 != x.Raw.X.X1 || Raw.X.X2 != x.Raw.X.X2; + } + + bool operator==(const TActorId& x) const noexcept { + return !(x != *this); + } + + ui64 Hash() const noexcept { + const ui32* x = (const ui32*)Raw.Buf; + + const ui64 x1 = x[0] * 0x001DFF3D8DC48F5Dull; + const ui64 x2 = x[1] * 0x179CA10C9242235Dull; + const ui64 x3 = x[2] * 0x0F530CAD458B0FB1ull; + const ui64 x4 = x[3] * 0xB5026F5AA96619E9ull; + + const ui64 z1 = x1 + x2; + const ui64 z2 = x3 + x4; + + const ui64 sum = 0x5851F42D4C957F2D + z1 + z2; + + return (sum >> 32) | (sum << 32); + } + + ui32 Hash32() const noexcept { + const ui32* x = (const ui32*)Raw.Buf; + + const ui64 x1 = x[0] * 0x001DFF3D8DC48F5Dull; + const ui64 x2 = x[1] * 0x179CA10C9242235Dull; + const ui64 x3 = x[2] * 0x0F530CAD458B0FB1ull; + const ui64 x4 = x[3] * 0xB5026F5AA96619E9ull; + + const ui64 z1 = x1 + x2; + const ui64 z2 = x3 + x4; + + const ui64 sum = 0x5851F42D4C957F2D + z1 + z2; + + return sum >> 32; + } + + struct THash { + ui64 operator()(const TActorId& actorId) const noexcept { + return actorId.Hash(); + } + }; + + struct THash32 { + ui64 operator()(const TActorId& actorId) const noexcept { + return actorId.Hash(); + } + }; + + struct TOrderedCmp { + bool operator()(const TActorId &left, const TActorId &right) const noexcept { + Y_VERIFY_DEBUG(!left.IsService() && !right.IsService(), "ordered compare works for plain actorids only"); + const ui32 n1 = left.NodeId(); + const ui32 n2 = right.NodeId(); + + return (n1 != n2) ? (n1 < n2) : left.LocalId() < right.LocalId(); + } + }; + + TString ToString() const; + void Out(IOutputStream& o) const; + bool Parse(const char* buf, ui32 sz); + }; + + static_assert(sizeof(TActorId) == 16, "expect sizeof(TActorId) == 16"); + static_assert(MaxPools < TActorId::MaxPoolID); // current implementation of united pool has limit MaxPools on pool id +} + +template <> +inline void Out<NActors::TActorId>(IOutputStream& o, const NActors::TActorId& x) { + return x.Out(o); +} + +template <> +struct THash<NActors::TActorId> { + inline ui64 operator()(const NActors::TActorId& x) const { + return x.Hash(); + } +}; diff --git a/library/cpp/actors/core/actorsystem.cpp b/library/cpp/actors/core/actorsystem.cpp new file mode 100644 index 0000000000..c58698a206 --- /dev/null +++ b/library/cpp/actors/core/actorsystem.cpp @@ -0,0 +1,277 @@ +#include "defs.h" +#include "actorsystem.h" +#include "callstack.h" +#include "cpu_manager.h" +#include "mailbox.h" +#include "events.h" +#include "interconnect.h" +#include "servicemap.h" +#include "scheduler_queue.h" +#include "scheduler_actor.h" +#include "log.h" +#include "probes.h" +#include "ask.h" +#include <library/cpp/actors/util/affinity.h> +#include <library/cpp/actors/util/datetime.h> +#include <util/generic/hash.h> +#include <util/system/rwlock.h> +#include <util/random/random.h> + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + + struct TActorSystem::TServiceMap : TNonCopyable { + NActors::TServiceMap<TActorId, TActorId, TActorId::THash> LocalMap; + TTicketLock Lock; + + TActorId RegisterLocalService(const TActorId& serviceId, const TActorId& actorId) { + TTicketLock::TGuard guard(&Lock); + const TActorId old = LocalMap.Update(serviceId, actorId); + return old; + } + + TActorId LookupLocal(const TActorId& x) { + return LocalMap.Find(x); + } + }; + + TActorSystem::TActorSystem(THolder<TActorSystemSetup>& setup, void* appData, + TIntrusivePtr<NLog::TSettings> loggerSettings) + : NodeId(setup->NodeId) + , CpuManager(new TCpuManager(setup)) + , ExecutorPoolCount(CpuManager->GetExecutorsCount()) + , Scheduler(setup->Scheduler) + , InterconnectCount((ui32)setup->Interconnect.ProxyActors.size()) + , CurrentTimestamp(0) + , CurrentMonotonic(0) + , CurrentIDCounter(RandomNumber<ui64>()) + , SystemSetup(setup.Release()) + , DefSelfID(NodeId, "actorsystem") + , AppData0(appData) + , LoggerSettings0(loggerSettings) + , StartExecuted(false) + , StopExecuted(false) + , CleanupExecuted(false) + { + ServiceMap.Reset(new TServiceMap()); + } + + TActorSystem::~TActorSystem() { + Cleanup(); + } + + bool TActorSystem::Send(TAutoPtr<IEventHandle> ev) const { + if (Y_UNLIKELY(!ev)) + return false; + +#ifdef USE_ACTOR_CALLSTACK + ev->Callstack.TraceIfEmpty(); +#endif + + TActorId recipient = ev->GetRecipientRewrite(); + const ui32 recpNodeId = recipient.NodeId(); + + if (recpNodeId != NodeId && recpNodeId != 0) { + // if recipient is not local one - rewrite with forward instruction + Y_VERIFY_DEBUG(!ev->HasEvent() || ev->GetBase()->IsSerializable()); + Y_VERIFY(ev->Recipient == recipient, + "Event rewrite from %s to %s would be lost via interconnect", + ev->Recipient.ToString().c_str(), + recipient.ToString().c_str()); + recipient = InterconnectProxy(recpNodeId); + ev->Rewrite(TEvInterconnect::EvForward, recipient); + } + if (recipient.IsService()) { + TActorId target = ServiceMap->LookupLocal(recipient); + if (!target && IsInterconnectProxyId(recipient) && ProxyWrapperFactory) { + const TActorId actorId = ProxyWrapperFactory(const_cast<TActorSystem*>(this), + GetInterconnectProxyNode(recipient)); + with_lock(ProxyCreationLock) { + target = ServiceMap->LookupLocal(recipient); + if (!target) { + target = actorId; + ServiceMap->RegisterLocalService(recipient, target); + } + } + if (target != actorId) { + // a race has occured, terminate newly created actor + Send(new IEventHandle(TEvents::TSystem::Poison, 0, actorId, {}, nullptr, 0)); + } + } + recipient = target; + ev->Rewrite(ev->GetTypeRewrite(), recipient); + } + + Y_VERIFY_DEBUG(recipient == ev->GetRecipientRewrite()); + const ui32 recpPool = recipient.PoolID(); + if (recipient && recpPool < ExecutorPoolCount) { + if (CpuManager->GetExecutorPool(recpPool)->Send(ev)) { + return true; + } + } + + Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown)); + return false; + } + + bool TActorSystem::Send(const TActorId& recipient, IEventBase* ev, ui32 flags) const { + return this->Send(new IEventHandle(recipient, DefSelfID, ev, flags)); + } + + void TActorSystem::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const { + Schedule(deadline - Timestamp(), ev, cookie); + } + + void TActorSystem::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const { + const auto current = Monotonic(); + if (deadline < current) + deadline = current; + + TTicketLock::TGuard guard(&ScheduleLock); + ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie); + } + + void TActorSystem::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) const { + const auto deadline = Monotonic() + delta; + + TTicketLock::TGuard guard(&ScheduleLock); + ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie); + } + + TActorId TActorSystem::Register(IActor* actor, TMailboxType::EType mailboxType, ui32 executorPool, ui64 revolvingCounter, + const TActorId& parentId) { + Y_VERIFY(executorPool < ExecutorPoolCount, "executorPool# %" PRIu32 ", ExecutorPoolCount# %" PRIu32, + (ui32)executorPool, (ui32)ExecutorPoolCount); + return CpuManager->GetExecutorPool(executorPool)->Register(actor, mailboxType, revolvingCounter, parentId); + } + + NThreading::TFuture<THolder<IEventBase>> TActorSystem::AskGeneric(TMaybe<ui32> expectedEventType, + TActorId recipient, THolder<IEventBase> event, + TDuration timeout) { + auto promise = NThreading::NewPromise<THolder<IEventBase>>(); + Register(MakeAskActor(expectedEventType, recipient, std::move(event), timeout, promise).Release()); + return promise.GetFuture(); + } + + ui64 TActorSystem::AllocateIDSpace(ui64 count) { + Y_VERIFY_DEBUG(count < Max<ui32>() / 65536); + + static_assert(sizeof(TAtomic) == sizeof(ui64), "expect sizeof(TAtomic) == sizeof(ui64)"); + + // get high 32 bits as seconds from epoch + // it could wrap every century, but we don't expect any actor-reference to live this long so such wrap will do no harm + const ui64 timeFromEpoch = TInstant::MicroSeconds(RelaxedLoad(&CurrentTimestamp)).Seconds(); + + // get low 32 bits as counter value + ui32 lowPartEnd = (ui32)(AtomicAdd(CurrentIDCounter, count)); + while (lowPartEnd < count) // if our request crosses 32bit boundary - retry + lowPartEnd = (ui32)(AtomicAdd(CurrentIDCounter, count)); + + const ui64 lowPart = lowPartEnd - count; + const ui64 ret = (timeFromEpoch << 32) | lowPart; + + return ret; + } + + TActorId TActorSystem::InterconnectProxy(ui32 destinationNode) const { + if (destinationNode < InterconnectCount) + return Interconnect[destinationNode]; + else if (destinationNode != NodeId) + return MakeInterconnectProxyId(destinationNode); + else + return TActorId(); + } + + ui32 TActorSystem::BroadcastToProxies(const std::function<IEventHandle*(const TActorId&)>& eventFabric) { + // TODO: get rid of this method + for (ui32 i = 0; i < InterconnectCount; ++i) { + Send(eventFabric(Interconnect[i])); + } + return InterconnectCount; + } + + TActorId TActorSystem::LookupLocalService(const TActorId& x) const { + return ServiceMap->LookupLocal(x); + } + + TActorId TActorSystem::RegisterLocalService(const TActorId& serviceId, const TActorId& actorId) { + // TODO: notify old actor about demotion + return ServiceMap->RegisterLocalService(serviceId, actorId); + } + + void TActorSystem::GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const { + CpuManager->GetPoolStats(poolId, poolStats, statsCopy); + } + + void TActorSystem::Start() { + Y_VERIFY(StartExecuted == false); + StartExecuted = true; + + ScheduleQueue.Reset(new NSchedulerQueue::TQueueType()); + TVector<NSchedulerQueue::TReader*> scheduleReaders; + scheduleReaders.push_back(&ScheduleQueue->Reader); + CpuManager->PrepareStart(scheduleReaders, this); + Scheduler->Prepare(this, &CurrentTimestamp, &CurrentMonotonic); + Scheduler->PrepareSchedules(&scheduleReaders.front(), (ui32)scheduleReaders.size()); + + // setup interconnect proxies + { + const TInterconnectSetup& setup = SystemSetup->Interconnect; + Interconnect.Reset(new TActorId[InterconnectCount + 1]); + for (ui32 i = 0, e = InterconnectCount; i != e; ++i) { + const TActorSetupCmd& x = setup.ProxyActors[i]; + if (x.Actor) { + Interconnect[i] = Register(x.Actor, x.MailboxType, x.PoolId, i); + Y_VERIFY(!!Interconnect[i]); + } + } + ProxyWrapperFactory = std::move(SystemSetup->Interconnect.ProxyWrapperFactory); + } + + // setup local services + { + for (ui32 i = 0, e = (ui32)SystemSetup->LocalServices.size(); i != e; ++i) { + const std::pair<TActorId, TActorSetupCmd>& x = SystemSetup->LocalServices[i]; + const TActorId xid = Register(x.second.Actor, x.second.MailboxType, x.second.PoolId, i); + Y_VERIFY(!!xid); + if (!!x.first) + RegisterLocalService(x.first, xid); + } + } + + // ok, setup complete, we could destroy setup config + SystemSetup.Destroy(); + + Scheduler->PrepareStart(); + CpuManager->Start(); + Send(MakeSchedulerActorId(), new TEvSchedulerInitialize(scheduleReaders, &CurrentTimestamp, &CurrentMonotonic)); + Scheduler->Start(); + } + + void TActorSystem::Stop() { + if (StopExecuted || !StartExecuted) + return; + + StopExecuted = true; + + for (auto&& fn : std::exchange(DeferredPreStop, {})) { + fn(); + } + + Scheduler->PrepareStop(); + CpuManager->PrepareStop(); + Scheduler->Stop(); + CpuManager->Shutdown(); + } + + void TActorSystem::Cleanup() { + Stop(); + if (CleanupExecuted || !StartExecuted) + return; + CleanupExecuted = true; + CpuManager->Cleanup(); + Scheduler.Destroy(); + } + + ui32 TActorSystem::MemProfActivityBase; +} diff --git a/library/cpp/actors/core/actorsystem.h b/library/cpp/actors/core/actorsystem.h new file mode 100644 index 0000000000..40499d7586 --- /dev/null +++ b/library/cpp/actors/core/actorsystem.h @@ -0,0 +1,367 @@ +#pragma once + +#include "defs.h" + +#include "actor.h" +#include "balancer.h" +#include "config.h" +#include "event.h" +#include "log_settings.h" +#include "scheduler_cookie.h" +#include "mon_stats.h" + +#include <library/cpp/threading/future/future.h> +#include <library/cpp/actors/util/ticket_lock.h> + +#include <util/generic/vector.h> +#include <util/datetime/base.h> +#include <util/system/mutex.h> + +namespace NActors { + class TActorSystem; + class TCpuManager; + class IExecutorPool; + struct TWorkerContext; + + inline TActorId MakeInterconnectProxyId(ui32 destNodeId) { + char data[12]; + memcpy(data, "ICProxy@", 8); + memcpy(data + 8, &destNodeId, sizeof(ui32)); + return TActorId(0, TStringBuf(data, 12)); + } + + inline bool IsInterconnectProxyId(const TActorId& actorId) { + return actorId.IsService() && !memcmp(actorId.ServiceId().data(), "ICProxy@", 8); + } + + inline ui32 GetInterconnectProxyNode(const TActorId& actorId) { + ui32 nodeId; + memcpy(&nodeId, actorId.ServiceId().data() + 8, sizeof(ui32)); + return nodeId; + } + + namespace NSchedulerQueue { + class TReader; + struct TQueueType; + } + + class IExecutorPool : TNonCopyable { + public: + const ui32 PoolId; + + TAtomic ActorRegistrations; + TAtomic DestroyedActors; + + IExecutorPool(ui32 poolId) + : PoolId(poolId) + , ActorRegistrations(0) + , DestroyedActors(0) + { + } + + virtual ~IExecutorPool() { + } + + // for workers + virtual ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) = 0; + virtual void ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingCounter) = 0; + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the wallclock time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + * @param workerId index of thread which will perform event dispatching + */ + virtual void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0; + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the monotonic time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + * @param workerId index of thread which will perform event dispatching + */ + virtual void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0; + + /** + * Schedule one-shot event that will be send after given delay. + * + * @param delta the time from now to delay event sending + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + * @param workerId index of thread which will perform event dispatching + */ + virtual void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) = 0; + + // for actorsystem + virtual bool Send(TAutoPtr<IEventHandle>& ev) = 0; + virtual void ScheduleActivation(ui32 activation) = 0; + virtual void ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) = 0; + virtual TActorId Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingCounter, const TActorId& parentId) = 0; + virtual TActorId Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) = 0; + + // lifecycle stuff + virtual void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) = 0; + virtual void Start() = 0; + virtual void PrepareStop() = 0; + virtual void Shutdown() = 0; + virtual bool Cleanup() = 0; + + virtual void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const { + // TODO: make pure virtual and override everywhere + Y_UNUSED(poolStats); + Y_UNUSED(statsCopy); + } + + virtual TString GetName() const { + return TString(); + } + + virtual ui32 GetThreads() const { + return 1; + } + + // generic + virtual TAffinity* Affinity() const = 0; + + virtual void SetRealTimeMode() const {} + }; + + // could be proxy to in-pool schedulers (for NUMA-aware executors) + class ISchedulerThread : TNonCopyable { + public: + virtual ~ISchedulerThread() { + } + + virtual void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) = 0; + virtual void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) = 0; + virtual void PrepareStart() { /* empty */ } + virtual void Start() = 0; + virtual void PrepareStop() = 0; + virtual void Stop() = 0; + }; + + struct TActorSetupCmd { + TMailboxType::EType MailboxType; + ui32 PoolId; + IActor* Actor; + + TActorSetupCmd() + : MailboxType(TMailboxType::HTSwap) + , PoolId(0) + , Actor(nullptr) + { + } + + TActorSetupCmd(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) + : MailboxType(mailboxType) + , PoolId(poolId) + , Actor(actor) + { + } + + void Set(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId) { + MailboxType = mailboxType; + PoolId = poolId; + Actor = actor; + } + }; + + using TProxyWrapperFactory = std::function<TActorId(TActorSystem*, ui32)>; + + struct TInterconnectSetup { + TVector<TActorSetupCmd> ProxyActors; + TProxyWrapperFactory ProxyWrapperFactory; + }; + + struct TActorSystemSetup { + ui32 NodeId = 0; + + // Either Executors or CpuManager must be initialized + ui32 ExecutorsCount = 0; + TArrayHolder<TAutoPtr<IExecutorPool>> Executors; + + TAutoPtr<IBalancer> Balancer; // main implementation will be implicitly created if not set + + TCpuManagerConfig CpuManager; + + TAutoPtr<ISchedulerThread> Scheduler; + ui32 MaxActivityType = 5; // for default entries + + TInterconnectSetup Interconnect; + + using TLocalServices = TVector<std::pair<TActorId, TActorSetupCmd>>; + TLocalServices LocalServices; + + ui32 GetExecutorsCount() const { + return Executors ? ExecutorsCount : CpuManager.GetExecutorsCount(); + } + + TString GetPoolName(ui32 poolId) const { + return Executors ? Executors[poolId]->GetName() : CpuManager.GetPoolName(poolId); + } + + ui32 GetThreads(ui32 poolId) const { + return Executors ? Executors[poolId]->GetThreads() : CpuManager.GetThreads(poolId); + } + }; + + class TActorSystem : TNonCopyable { + struct TServiceMap; + + public: + const ui32 NodeId; + + private: + THolder<TCpuManager> CpuManager; + const ui32 ExecutorPoolCount; + + TAutoPtr<ISchedulerThread> Scheduler; + THolder<TServiceMap> ServiceMap; + + const ui32 InterconnectCount; + TArrayHolder<TActorId> Interconnect; + + volatile ui64 CurrentTimestamp; + volatile ui64 CurrentMonotonic; + volatile ui64 CurrentIDCounter; + + THolder<NSchedulerQueue::TQueueType> ScheduleQueue; + mutable TTicketLock ScheduleLock; + + friend class TExecutorThread; + + THolder<TActorSystemSetup> SystemSetup; + TActorId DefSelfID; + void* AppData0; + TIntrusivePtr<NLog::TSettings> LoggerSettings0; + TProxyWrapperFactory ProxyWrapperFactory; + TMutex ProxyCreationLock; + + bool StartExecuted; + bool StopExecuted; + bool CleanupExecuted; + + std::deque<std::function<void()>> DeferredPreStop; + public: + TActorSystem(THolder<TActorSystemSetup>& setup, void* appData = nullptr, + TIntrusivePtr<NLog::TSettings> loggerSettings = TIntrusivePtr<NLog::TSettings>(nullptr)); + ~TActorSystem(); + + void Start(); + void Stop(); + void Cleanup(); + + TActorId Register(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 executorPool = 0, + ui64 revolvingCounter = 0, const TActorId& parentId = TActorId()); + + bool Send(TAutoPtr<IEventHandle> ev) const; + bool Send(const TActorId& recipient, IEventBase* ev, ui32 flags = 0) const; + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the wallclock time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const; + + /** + * Schedule one-shot event that will be send at given time point in the future. + * + * @param deadline the monotonic time point in future when event must be send + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const; + + /** + * Schedule one-shot event that will be send after given delay. + * + * @param delta the time from now to delay event sending + * @param ev the event to send + * @param cookie cookie that will be piggybacked with event + */ + void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr) const; + + /** + * A way to interact with actors from non-actor context. + * + * This method will send the `event` to the `recipient` and then will wait for a response. When response arrives, + * it will be passed to the future. If response is not of type `T`, the future will resolve into an exception. + * + * @tparam T expected response type. Must be derived from `TEventBase`, + * or use `IEventBase` to catch any response. + * @param actorSystem actor system that will be used to register an actor that'll wait for response. + * @param recipient who will get a request. + * @param event a request message. + * @return future that will be resolved when a message from `recipient` arrives. + */ + template <typename T> + [[nodiscard]] + NThreading::TFuture<THolder<T>> Ask(TActorId recipient, THolder<IEventBase> event, TDuration timeout = TDuration::Max()) { + if constexpr (std::is_same_v<T, IEventBase>) { + return AskGeneric(Nothing(), recipient, std::move(event), timeout); + } else { + return AskGeneric(T::EventType, recipient, std::move(event), timeout) + .Apply([](const NThreading::TFuture<THolder<IEventBase>>& ev) { + return THolder<T>(static_cast<T*>(const_cast<THolder<IEventBase>&>(ev.GetValueSync()).Release())); // =( + }); + } + } + + [[nodiscard]] + NThreading::TFuture<THolder<IEventBase>> AskGeneric( + TMaybe<ui32> expectedEventType, + TActorId recipient, + THolder<IEventBase> event, + TDuration timeout); + + ui64 AllocateIDSpace(ui64 count); + + TActorId InterconnectProxy(ui32 destinationNode) const; + ui32 BroadcastToProxies(const std::function<IEventHandle*(const TActorId&)>&); + + void UpdateLinkStatus(ui8 status, ui32 destinationNode); + ui8 LinkStatus(ui32 destinationNode); + + TActorId LookupLocalService(const TActorId& x) const; + TActorId RegisterLocalService(const TActorId& serviceId, const TActorId& actorId); + + ui32 GetMaxActivityType() const { + return SystemSetup ? SystemSetup->MaxActivityType : 1; + } + + TInstant Timestamp() const { + return TInstant::MicroSeconds(RelaxedLoad(&CurrentTimestamp)); + } + + TMonotonic Monotonic() const { + return TMonotonic::MicroSeconds(RelaxedLoad(&CurrentMonotonic)); + } + + template <typename T> + T* AppData() const { + return (T*)AppData0; + } + + NLog::TSettings* LoggerSettings() const { + return LoggerSettings0.Get(); + } + + void GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const; + + void DeferPreStop(std::function<void()> fn) { + DeferredPreStop.push_back(std::move(fn)); + } + + /* This is the base for memory profiling tags. + System sets memory profiling tag for debug version of lfalloc. + The tag is set as "base_tag + actor_activity_type". */ + static ui32 MemProfActivityBase; + }; +} diff --git a/library/cpp/actors/core/actorsystem_ut.cpp b/library/cpp/actors/core/actorsystem_ut.cpp new file mode 100644 index 0000000000..231d6f0ca1 --- /dev/null +++ b/library/cpp/actors/core/actorsystem_ut.cpp @@ -0,0 +1,45 @@ +#include "actorsystem.h" + +#include <library/cpp/actors/testlib/test_runtime.h> +#include <library/cpp/testing/unittest/registar.h> + +using namespace NActors; + +Y_UNIT_TEST_SUITE(TActorSystemTest) { + + class TTestActor: public TActor<TTestActor> { + public: + TTestActor() + : TActor{&TThis::Main} + { + } + + STATEFN(Main) { + Y_UNUSED(ev); + } + }; + + THolder<TTestActorRuntimeBase> CreateRuntime() { + auto runtime = MakeHolder<TTestActorRuntimeBase>(); + runtime->SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; }); + runtime->Initialize(); + return runtime; + } + + Y_UNIT_TEST(LocalService) { + THolder<TTestActorRuntimeBase> runtime = CreateRuntime(); + auto actorA = runtime->Register(new TTestActor); + auto actorB = runtime->Register(new TTestActor); + + TActorId myServiceId{0, TStringBuf{"my-service"}}; + + auto prevActorId = runtime->RegisterService(myServiceId, actorA); + UNIT_ASSERT(!prevActorId); + UNIT_ASSERT_EQUAL(runtime->GetLocalServiceId(myServiceId), actorA); + + prevActorId = runtime->RegisterService(myServiceId, actorB); + UNIT_ASSERT(prevActorId); + UNIT_ASSERT_EQUAL(prevActorId, actorA); + UNIT_ASSERT_EQUAL(runtime->GetLocalServiceId(myServiceId), actorB); + } +} diff --git a/library/cpp/actors/core/ask.cpp b/library/cpp/actors/core/ask.cpp new file mode 100644 index 0000000000..0054c9a906 --- /dev/null +++ b/library/cpp/actors/core/ask.cpp @@ -0,0 +1,74 @@ +#include "ask.h" + +#include "actor_bootstrapped.h" +#include "actorid.h" +#include "event.h" +#include "hfunc.h" + +namespace NActors { + namespace { + class TAskActor: public TActorBootstrapped<TAskActor> { + enum { + Timeout = EventSpaceBegin(TEvents::ES_PRIVATE), + }; + + // We can't use the standard timeout event because recipient may send us one. + struct TTimeout: public TEventLocal<TTimeout, Timeout> { + }; + + public: + TAskActor( + TMaybe<ui32> expectedEventType, + TActorId recipient, + THolder<IEventBase> event, + TDuration timeout, + const NThreading::TPromise<THolder<IEventBase>>& promise) + : ExpectedEventType_(expectedEventType) + , Recipient_(recipient) + , Event_(std::move(event)) + , Timeout_(timeout) + , Promise_(promise) + { + } + + public: + void Bootstrap() { + Send(Recipient_, std::move(Event_)); + Become(&TAskActor::Waiting); + + if (Timeout_ != TDuration::Max()) { + Schedule(Timeout_, new TTimeout); + } + } + + STATEFN(Waiting) { + if (ev->GetTypeRewrite() == TTimeout::EventType) { + Promise_.SetException(std::make_exception_ptr(yexception() << "ask timeout")); + } else if (!ExpectedEventType_ || ev->GetTypeRewrite() == ExpectedEventType_) { + Promise_.SetValue(ev->ReleaseBase()); + } else { + Promise_.SetException(std::make_exception_ptr(yexception() << "received unexpected response " << ev->GetBase()->ToString())); + } + + PassAway(); + } + + public: + TMaybe<ui32> ExpectedEventType_; + TActorId Recipient_; + THolder<IEventBase> Event_; + TDuration Timeout_; + NThreading::TPromise<THolder<IEventBase>> Promise_; + }; + } + + THolder<IActor> MakeAskActor( + TMaybe<ui32> expectedEventType, + TActorId recipient, + THolder<IEventBase> event, + TDuration timeout, + const NThreading::TPromise<THolder<IEventBase>>& promise) + { + return MakeHolder<TAskActor>(expectedEventType, std::move(recipient), std::move(event), timeout, promise); + } +} diff --git a/library/cpp/actors/core/ask.h b/library/cpp/actors/core/ask.h new file mode 100644 index 0000000000..036f1833a4 --- /dev/null +++ b/library/cpp/actors/core/ask.h @@ -0,0 +1,18 @@ +#pragma once + +#include "actor.h" +#include "event.h" + +#include <library/cpp/threading/future/future.h> + +namespace NActors { + /** + * See `TActorSystem::Ask`. + */ + THolder<IActor> MakeAskActor( + TMaybe<ui32> expectedEventType, + TActorId recipient, + THolder<IEventBase> event, + TDuration timeout, + const NThreading::TPromise<THolder<IEventBase>>& promise); +} diff --git a/library/cpp/actors/core/ask_ut.cpp b/library/cpp/actors/core/ask_ut.cpp new file mode 100644 index 0000000000..e72ebdba9b --- /dev/null +++ b/library/cpp/actors/core/ask_ut.cpp @@ -0,0 +1,131 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include "actorsystem.h" + +#include <library/cpp/actors/testlib/test_runtime.h> + +using namespace NActors; + +class TPingPong: public TActor<TPingPong> { +public: + TPingPong() + : TActor(&TPingPong::Main) + { + } + + STATEFN(Main) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvents::TEvPing, OnPing); + hFunc(TEvents::TEvBlob, OnBlob); + } + } + + void OnPing(const TEvents::TEvPing::TPtr& ev) { + Send(ev->Sender, new TEvents::TEvPong); + } + + void OnBlob(const TEvents::TEvBlob::TPtr& ev) { + Send(ev->Sender, ev->Release().Release()); + } +}; + +class TPing: public TActor<TPing> { +public: + TPing() + : TActor(&TPing::Main) + { + } + + STATEFN(Main) { + Y_UNUSED(ev); + } +}; + +THolder<TTestActorRuntimeBase> CreateRuntime() { + auto runtime = MakeHolder<TTestActorRuntimeBase>(); + runtime->SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; }); + runtime->Initialize(); + return runtime; +} + +Y_UNIT_TEST_SUITE(AskActor) { + Y_UNIT_TEST(Ok) { + auto runtime = CreateRuntime(); + auto pingpong = runtime->Register(new TPingPong); + + { + auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvPong>( + pingpong, + THolder(new TEvents::TEvPing)); + runtime->DispatchEvents(); + fut.ExtractValueSync(); + } + + { + auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvBlob>( + pingpong, + THolder(new TEvents::TEvBlob("hello!"))); + runtime->DispatchEvents(); + auto ev = fut.ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(ev->Blob, "hello!"); + } + + { + auto fut = runtime->GetAnyNodeActorSystem()->Ask<IEventBase>( + pingpong, + THolder(new TEvents::TEvPing)); + runtime->DispatchEvents(); + auto ev = fut.ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(ev->Type(), TEvents::TEvPong::EventType); + } + } + + Y_UNIT_TEST(Err) { + auto runtime = CreateRuntime(); + auto pingpong = runtime->Register(new TPingPong); + + { + auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvBlob>( + pingpong, + THolder(new TEvents::TEvPing)); + runtime->DispatchEvents(); + UNIT_ASSERT_EXCEPTION_CONTAINS( + fut.ExtractValueSync(), + yexception, + "received unexpected response HelloWorld: Pong"); + } + } + + Y_UNIT_TEST(Timeout) { + auto runtime = CreateRuntime(); + auto ping = runtime->Register(new TPing); + + { + auto fut = runtime->GetAnyNodeActorSystem()->Ask<TEvents::TEvPong>( + ping, + THolder(new TEvents::TEvPing), + TDuration::Seconds(1)); + auto start = runtime->GetCurrentTime(); + runtime->DispatchEvents({}, TDuration::Seconds(5)); + UNIT_ASSERT_EXCEPTION_CONTAINS( + fut.ExtractValueSync(), + yexception, + "ask timeout"); + UNIT_ASSERT_VALUES_EQUAL(runtime->GetCurrentTime() - start, TDuration::Seconds(1)); + } + + { + auto fut = runtime->GetAnyNodeActorSystem()->Ask<IEventBase>( + ping, + THolder(new TEvents::TEvPing), + TDuration::Seconds(1)); + auto start = runtime->GetCurrentTime(); + runtime->DispatchEvents({}, TDuration::Seconds(5)); + UNIT_ASSERT_EXCEPTION_CONTAINS( + fut.ExtractValueSync(), + yexception, + "ask timeout"); + UNIT_ASSERT_VALUES_EQUAL(runtime->GetCurrentTime() - start, TDuration::Seconds(1)); + } + } +} diff --git a/library/cpp/actors/core/balancer.cpp b/library/cpp/actors/core/balancer.cpp new file mode 100644 index 0000000000..cc5417b0b5 --- /dev/null +++ b/library/cpp/actors/core/balancer.cpp @@ -0,0 +1,293 @@ +#include "balancer.h" + +#include "probes.h" + +#include <library/cpp/actors/util/intrinsics.h> +#include <library/cpp/actors/util/datetime.h> + +#include <util/system/spinlock.h> + +#include <algorithm> + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + + // Describes balancing-related state of pool, the most notable is `Importance` to add new cpu + struct TLevel { + // Balancer will try to give more cpu to overloaded pools + enum ELoadClass { + Underloaded = 0, + Moderate = 1, + Overloaded = 2, + }; + + double ScaleFactor; + ELoadClass LoadClass; + ui64 Importance; // pool with lower importance is allowed to pass cpu to pool with higher, but the opposite is forbidden + + TLevel() {} + + TLevel(const TBalancingConfig& cfg, TPoolId poolId, ui64 currentCpus, double cpuIdle) { + ScaleFactor = double(currentCpus) / cfg.Cpus; + if (cpuIdle > 1.3) { // TODO: add a better underload criterion, based on estimated latency w/o 1 cpu + LoadClass = Underloaded; + } else if (cpuIdle < 0.2) { // TODO: add a better overload criterion, based on latency + LoadClass = Overloaded; + } else { + LoadClass = Moderate; + } + Importance = MakeImportance(LoadClass, cfg.Priority, ScaleFactor, cpuIdle, poolId); + } + + private: + // Importance is simple ui64 value (from highest to lowest): + // 2 Bits: LoadClass + // 8 Bits: Priority + // 10 Bits: -ScaleFactor (for max-min fairness with weights equal to TBalancingConfig::Cpus) + // 10 Bits: -CpuIdle + // 6 Bits: PoolId + static ui64 MakeImportance(ELoadClass load, ui8 priority, double scaleFactor, double cpuIdle, TPoolId poolId) { + ui64 idle = std::clamp<i64>(1024 - cpuIdle * 512, 0, 1023); + ui64 scale = std::clamp<i64>(1024 - scaleFactor * 32, 0, 1023); + + Y_VERIFY(ui64(load) < (1ull << 2ull)); + Y_VERIFY(ui64(priority) < (1ull << 8ull)); + Y_VERIFY(ui64(scale) < (1ull << 10ull)); + Y_VERIFY(ui64(idle) < (1ull << 10ull)); + Y_VERIFY(ui64(poolId) < (1ull << 6ull)); + + static_assert(ui64(MaxPools) <= (1ull << 6ull)); + + ui64 importance = + (ui64(load) << ui64(6 + 10 + 10 + 8)) | + (ui64(priority) << ui64(6 + 10 + 10)) | + (ui64(scale) << ui64(6 + 10)) | + (ui64(idle) << ui64(6)) | + ui64(poolId); + return importance; + } + }; + + // Main balancer implemenation + class TBalancer: public IBalancer { + private: + struct TCpu; + struct TPool; + + bool Disabled = true; + TSpinLock Lock; + ui64 NextBalanceTs; + TVector<TCpu> Cpus; // Indexed by CpuId, can have gaps + TVector<TPool> Pools; // Indexed by PoolId, can have gaps + TBalancerConfig Config; + + public: + // Setup + TBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts); + bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) override; + ~TBalancer(); + + // Balancing + bool TryLock(ui64 ts) override; + void SetPoolStats(TPoolId pool, const TBalancerStats& stats) override; + void Balance() override; + void Unlock() override; + + private: + void MoveCpu(TPool& from, TPool& to); + }; + + struct TBalancer::TPool { + TBalancingConfig Config; + TPoolId PoolId; + TString PoolName; + + // Input data for balancing + TBalancerStats Prev; + TBalancerStats Next; + + // Derived stats + double CpuLoad; + double CpuIdle; + + // Classification + // NOTE: We want to avoid passing cpu back and forth, so we must consider not only current level, + // NOTE: but expected levels after movements also + TLevel CurLevel; // Level with current amount of cpu + TLevel AddLevel; // Level after one cpu acception + TLevel SubLevel; // Level after one cpu donation + + // Balancing state + ui64 CurrentCpus = 0; // Total number of cpus assigned for this pool (zero means pools is not balanced) + ui64 PrevCpus = 0; // Cpus in last period + + explicit TPool(const TBalancingConfig& cfg = {}) + : Config(cfg) + {} + + void Configure(const TBalancingConfig& cfg, const TString& poolName) { + Config = cfg; + // Enforce constraints + Config.MinCpus = std::clamp<ui32>(Config.MinCpus, 1, Config.Cpus); + Config.MaxCpus = Max<ui32>(Config.MaxCpus, Config.Cpus); + PoolName = poolName; + } + }; + + struct TBalancer::TCpu { + TCpuState* State = nullptr; // Cpu state, nullptr means cpu is not used (gap) + TCpuAllocation Alloc; + TPoolId Current; + TPoolId Assigned; + }; + + TBalancer::TBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts) + : NextBalanceTs(ts) + , Config(config) + { + for (TPoolId pool = 0; pool < MaxPools; pool++) { + Pools.emplace_back(); + Pools.back().PoolId = pool; + } + for (const TUnitedExecutorPoolConfig& united : unitedPools) { + Pools[united.PoolId].Configure(united.Balancing, united.PoolName); + } + } + + TBalancer::~TBalancer() { + } + + bool TBalancer::AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* state) { + // Setup + TCpuId cpuId = cpuAlloc.CpuId; + if (Cpus.size() <= cpuId) { + Cpus.resize(cpuId + 1); + } + TCpu& cpu = Cpus[cpuId]; + cpu.State = state; + cpu.Alloc = cpuAlloc; + + // Fill every pool with cpus up to TBalancingConfig::Cpus + TPoolId pool = 0; + for (TPool& p : Pools) { + if (p.CurrentCpus < p.Config.Cpus) { + p.CurrentCpus++; + break; + } + pool++; + } + if (pool != MaxPools) { // cpu under balancer control + state->SwitchPool(pool); + state->AssignPool(pool); + Disabled = false; + return true; + } + return false; // non-balanced cpu + } + + bool TBalancer::TryLock(ui64 ts) { + if (!Disabled && NextBalanceTs < ts && Lock.TryAcquire()) { + NextBalanceTs = ts + Us2Ts(Config.PeriodUs); + return true; + } + return false; + } + + void TBalancer::SetPoolStats(TPoolId pool, const TBalancerStats& stats) { + Y_VERIFY(pool < MaxPools); + TPool& p = Pools[pool]; + p.Prev = p.Next; + p.Next = stats; + } + + void TBalancer::Balance() { + // Update every cpu state + for (TCpu& cpu : Cpus) { + if (cpu.State) { + cpu.State->Load(cpu.Assigned, cpu.Current); + if (cpu.Current < MaxPools && cpu.Current != cpu.Assigned) { + return; // previous movement has not been applied yet, wait + } + } + } + + // Process stats, classify and compute pool importance + TStackVec<TPool*, MaxPools> order; + for (TPool& pool : Pools) { + if (pool.Config.Cpus == 0) { + continue; // skip gaps (non-existent or non-united pools) + } + if (pool.Prev.Ts == 0 || pool.Prev.Ts >= pool.Next.Ts) { + return; // invalid stats + } + + // Compute derived stats + pool.CpuLoad = (pool.Next.CpuUs - pool.Prev.CpuUs) / Ts2Us(pool.Next.Ts - pool.Prev.Ts); + if (pool.Prev.IdleUs == ui64(-1) || pool.Next.IdleUs == ui64(-1)) { + pool.CpuIdle = pool.CurrentCpus - pool.CpuLoad; // for tests + } else { + pool.CpuIdle = (pool.Next.IdleUs - pool.Prev.IdleUs) / Ts2Us(pool.Next.Ts - pool.Prev.Ts); + } + + // Compute levels + pool.CurLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus, pool.CpuIdle); + pool.AddLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus + 1, pool.CpuIdle); // we expect taken cpu to became utilized + pool.SubLevel = TLevel(pool.Config, pool.PoolId, pool.CurrentCpus - 1, pool.CpuIdle - 1); + + // Prepare for balancing + pool.PrevCpus = pool.CurrentCpus; + order.push_back(&pool); + } + + // Sort pools by importance + std::sort(order.begin(), order.end(), [] (TPool* l, TPool* r) {return l->CurLevel.Importance < r->CurLevel.Importance; }); + for (TPool* pool : order) { + LWPROBE(PoolStats, pool->PoolId, pool->PoolName, pool->CurrentCpus, pool->CurLevel.LoadClass, pool->Config.Priority, pool->CurLevel.ScaleFactor, pool->CpuIdle, pool->CpuLoad, pool->CurLevel.Importance, pool->AddLevel.Importance, pool->SubLevel.Importance); + } + + // Move cpus from lower importance to higher importance pools + for (auto toIter = order.rbegin(); toIter != order.rend(); ++toIter) { + TPool& to = **toIter; + if (to.CurLevel.LoadClass == TLevel::Overloaded && // if pool is overloaded + to.CurrentCpus < to.Config.MaxCpus) // and constraints would not be violated + { + for (auto fromIter = order.begin(); (*fromIter)->CurLevel.Importance < to.CurLevel.Importance; ++fromIter) { + TPool& from = **fromIter; + if (from.CurrentCpus == from.PrevCpus && // if not balanced yet + from.CurrentCpus > from.Config.MinCpus && // and constraints would not be violated + from.SubLevel.Importance < to.AddLevel.Importance) // and which of two pools is more important would not change after cpu movement + { + MoveCpu(from, to); + from.CurrentCpus--; + to.CurrentCpus++; + break; + } + } + } + } + } + + void TBalancer::MoveCpu(TBalancer::TPool& from, TBalancer::TPool& to) { + for (auto ci = Cpus.rbegin(), ce = Cpus.rend(); ci != ce; ci++) { + TCpu& cpu = *ci; + if (!cpu.State) { + continue; + } + if (cpu.Assigned == from.PoolId) { + cpu.State->AssignPool(to.PoolId); + cpu.Assigned = to.PoolId; + LWPROBE(MoveCpu, from.PoolId, to.PoolId, from.PoolName, to.PoolName, cpu.Alloc.CpuId); + return; + } + } + Y_FAIL(); + } + + void TBalancer::Unlock() { + Lock.Release(); + } + + IBalancer* MakeBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts) { + return new TBalancer(config, unitedPools, ts); + } +} diff --git a/library/cpp/actors/core/balancer.h b/library/cpp/actors/core/balancer.h new file mode 100644 index 0000000000..9763ec79e1 --- /dev/null +++ b/library/cpp/actors/core/balancer.h @@ -0,0 +1,27 @@ +#pragma once + +#include "defs.h" +#include "config.h" +#include "cpu_state.h" + +namespace NActors { + // Per-pool statistics used by balancer + struct TBalancerStats { + ui64 Ts = 0; // Measurement timestamp + ui64 CpuUs = 0; // Total cpu microseconds consumed by pool on all cpus since start + ui64 IdleUs = ui64(-1); // Total cpu microseconds in spinning or waiting on futex + }; + + // Pool cpu balancer + struct IBalancer { + virtual ~IBalancer() {} + virtual bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) = 0; + virtual bool TryLock(ui64 ts) = 0; + virtual void SetPoolStats(TPoolId pool, const TBalancerStats& stats) = 0; + virtual void Balance() = 0; + virtual void Unlock() = 0; + // TODO: add method for reconfiguration on fly + }; + + IBalancer* MakeBalancer(const TBalancerConfig& config, const TVector<TUnitedExecutorPoolConfig>& unitedPools, ui64 ts); +} diff --git a/library/cpp/actors/core/balancer_ut.cpp b/library/cpp/actors/core/balancer_ut.cpp new file mode 100644 index 0000000000..7e5e95f4b9 --- /dev/null +++ b/library/cpp/actors/core/balancer_ut.cpp @@ -0,0 +1,225 @@ +#include "balancer.h" + +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/lwtrace/all.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/str.h> + +using namespace NActors; + +//////////////////////////////////////////////////////////////////////////////// + +Y_UNIT_TEST_SUITE(PoolCpuBalancer) { + struct TTest { + TCpuManagerConfig Config; + TCpuMask Available; + THolder<IBalancer> Balancer; + TVector<TCpuState> CpuStates; + TVector<ui64> CpuUs; + ui64 Now = 0; + + void SetCpuCount(size_t count) { + Config.UnitedWorkers.CpuCount = count; + for (TCpuId cpuId = 0; cpuId < count; cpuId++) { + Available.Set(cpuId); + } + } + + void AddPool(ui32 minCpus, ui32 cpus, ui32 maxCpus, ui8 priority = 0) { + TUnitedExecutorPoolConfig u; + u.PoolId = TPoolId(Config.United.size()); + u.Balancing.Cpus = cpus; + u.Balancing.MinCpus = minCpus; + u.Balancing.MaxCpus = maxCpus; + u.Balancing.Priority = priority; + Config.United.push_back(u); + } + + void Start() { + TCpuAllocationConfig allocation(Available, Config); + Balancer.Reset(MakeBalancer(Config.UnitedWorkers.Balancer, Config.United, 0)); + CpuStates.resize(allocation.Items.size()); // do not resize it later to avoid dangling pointers + CpuUs.resize(CpuStates.size()); + for (const TCpuAllocation& cpuAlloc : allocation.Items) { + bool added = Balancer->AddCpu(cpuAlloc, &CpuStates[cpuAlloc.CpuId]); + UNIT_ASSERT(added); + } + } + + void Balance(ui64 deltaTs, const TVector<ui64>& cpuUs) { + Now += deltaTs; + ui64 ts = Now; + if (Balancer->TryLock(ts)) { + for (TPoolId pool = 0; pool < cpuUs.size(); pool++) { + CpuUs[pool] += cpuUs[pool]; + TBalancerStats stats; + stats.Ts = ts; + stats.CpuUs = CpuUs[pool]; + Balancer->SetPoolStats(pool, stats); + } + Balancer->Balance(); + Balancer->Unlock(); + } + } + + void ApplyMovements() { + for (TCpuState& state : CpuStates) { + TPoolId current; + TPoolId assigned; + state.Load(assigned, current); + state.SwitchPool(assigned); + } + } + + static TString ToStr(const TVector<ui64>& values) { + TStringStream ss; + ss << "{"; + for (auto v : values) { + ss << " " << v; + } + ss << " }"; + return ss.Str(); + } + + void AssertPoolsCurrentCpus(const TVector<ui64>& cpuRequired) { + TVector<ui64> cpuCurrent; + cpuCurrent.resize(cpuRequired.size()); + for (TCpuState& state : CpuStates) { + TPoolId current; + TPoolId assigned; + state.Load(assigned, current); + cpuCurrent[current]++; + } + for (TPoolId pool = 0; pool < cpuRequired.size(); pool++) { + UNIT_ASSERT_C(cpuCurrent[pool] == cpuRequired[pool], + "cpu distribution mismatch, required " << ToStr(cpuRequired) << " but got " << ToStr(cpuCurrent)); + } + } + }; + + Y_UNIT_TEST(StartLwtrace) { + NLWTrace::StartLwtraceFromEnv(); + } + + Y_UNIT_TEST(AllOverloaded) { + TTest t; + int cpus = 10; + t.SetCpuCount(cpus); + t.AddPool(1, 1, 10); // pool=0 + t.AddPool(1, 2, 10); // pool=1 + t.AddPool(1, 3, 10); // pool=2 + t.AddPool(1, 4, 10); // pool=2 + t.Start(); + ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs); + ui64 totalCpuUs = cpus * Ts2Us(dts); // pretend every pool has consumed as whole actorsystem, overload + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {totalCpuUs, totalCpuUs, totalCpuUs, totalCpuUs}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({1, 2, 3, 4}); + } + + Y_UNIT_TEST(OneOverloaded) { + TTest t; + int cpus = 10; + t.SetCpuCount(cpus); + t.AddPool(1, 1, 10); // pool=0 + t.AddPool(1, 2, 10); // pool=1 + t.AddPool(1, 3, 10); // pool=2 + t.AddPool(1, 4, 10); // pool=2 + t.Start(); + ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs); + ui64 totalCpuUs = cpus * Ts2Us(dts); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {totalCpuUs, 0, 0, 0}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({7, 1, 1, 1}); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {0, totalCpuUs, 0, 0}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({1, 7, 1, 1}); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {0, 0, totalCpuUs, 0}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({1, 1, 7, 1}); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {0, 0, 0, totalCpuUs}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({1, 1, 1, 7}); + } + + Y_UNIT_TEST(TwoOverloadedFairness) { + TTest t; + int cpus = 10; + t.SetCpuCount(cpus); + t.AddPool(1, 1, 10); // pool=0 + t.AddPool(1, 2, 10); // pool=1 + t.AddPool(1, 3, 10); // pool=2 + t.AddPool(1, 4, 10); // pool=2 + t.Start(); + ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs); + ui64 totalCpuUs = cpus * Ts2Us(dts); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {totalCpuUs, totalCpuUs, 0, 0}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({3, 5, 1, 1}); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {totalCpuUs, 0, totalCpuUs, 0}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({2, 1, 6, 1}); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {totalCpuUs, 0, 0, totalCpuUs}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({2, 1, 1, 6}); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {0, totalCpuUs, totalCpuUs, 0}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({1, 3, 5, 1}); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {0, totalCpuUs, 0, totalCpuUs}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({1, 3, 1, 5}); + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {0, 0, totalCpuUs, totalCpuUs}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({1, 1, 3, 5}); + } + + Y_UNIT_TEST(TwoOverloadedPriority) { + TTest t; + int cpus = 20; + t.SetCpuCount(cpus); + t.AddPool(1, 5, 20, 0); // pool=0 + t.AddPool(1, 5, 20, 1); // pool=1 + t.AddPool(1, 5, 20, 2); // pool=2 + t.AddPool(1, 5, 20, 3); // pool=3 + t.Start(); + ui64 dts = 1.01 * Us2Ts(t.Config.UnitedWorkers.Balancer.PeriodUs); + ui64 mErlang = Ts2Us(dts) / 1000; + for (int i = 0; i < cpus; i++) { + t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 4500 * mErlang, 9500 * mErlang}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({2, 3, 5, 10}); + t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 4500 * mErlang, 8500 * mErlang}); + t.ApplyMovements(); + t.AssertPoolsCurrentCpus({3, 3, 5, 9}); + // NOTE: this operation require one move, but we do not make global analysis, so multiple steps (1->2 & 0->1) are required (can be optimized later) + for (int i = 0; i < 3; i++) { + t.Balance(dts, {20000 * mErlang, 2500 * mErlang, 5500 * mErlang, 8500 * mErlang}); + t.ApplyMovements(); + } + t.AssertPoolsCurrentCpus({2, 3, 6, 9}); + } +} diff --git a/library/cpp/actors/core/buffer.cpp b/library/cpp/actors/core/buffer.cpp new file mode 100644 index 0000000000..48128d76ef --- /dev/null +++ b/library/cpp/actors/core/buffer.cpp @@ -0,0 +1,93 @@ +#include "buffer.h" + +#include <util/system/yassert.h> + +#include <algorithm> + +TBufferBase::TBufferBase(size_t size) noexcept + : Size(size) +{ +} + +size_t +TBufferBase::GetSize() const noexcept { + return Size; +} + +void TBufferBase::SetSize(size_t size) noexcept { + Size = size; +} + +///////////////////////////////////////////////////////////////////// + +template <typename PointerType> +TBufferBaseT<PointerType>::TBufferBaseT(PointerType data, size_t size) noexcept + : TBufferBase(size) + , Data(data) +{ +} + +template <typename PointerType> +PointerType +TBufferBaseT<PointerType>::GetPointer() const noexcept { + return Data; +} + +template <typename PointerType> +void TBufferBaseT<PointerType>::Assign(PointerType data, size_t size) noexcept { + Data = data; + Size = size; +} + +template <> +void TBufferBaseT<void*>::Cut(size_t offset) noexcept { + Y_VERIFY_DEBUG(offset <= Size); + Data = static_cast<char*>(Data) + offset; + TBufferBase::Size -= offset; +} + +template <> +void TBufferBaseT<const void*>::Cut(size_t offset) noexcept { + Y_VERIFY_DEBUG(offset <= Size); + Data = static_cast<const char*>(Data) + offset; + TBufferBase::Size -= offset; +} + +template class TBufferBaseT<void*>; +template class TBufferBaseT<const void*>; + +///////////////////////////////////////////////////////////////////// + +TConstBuffer::TConstBuffer(const void* data, size_t size) noexcept + : TBufferBaseT<const void*>(data, size) +{ +} + +TConstBuffer::TConstBuffer(const TMutableBuffer& buffer) noexcept + : TBufferBaseT<const void*>(buffer.GetPointer(), buffer.GetSize()) +{ +} + +TConstBuffer +TConstBuffer::Offset(ptrdiff_t offset, size_t size) const noexcept { + return TConstBuffer(static_cast<const char*>(Data) + offset, std::min(Size - offset, size)); +} + +//////////////////////////////////////////////////////////////////////////////// + +TMutableBuffer::TMutableBuffer(void* data, size_t size) noexcept + : TBufferBaseT<void*>(data, size) +{ +} + +TMutableBuffer +TMutableBuffer::Offset(ptrdiff_t offset, size_t size) const noexcept { + return TMutableBuffer(static_cast<char*>(Data) + offset, std::min(Size - offset, size)); +} + +size_t +TMutableBuffer::CopyFrom(const TConstBuffer& buffer) const noexcept { + const auto size = std::min(Size, buffer.Size); + std::memcpy(Data, buffer.Data, size); + return size; +} diff --git a/library/cpp/actors/core/buffer.h b/library/cpp/actors/core/buffer.h new file mode 100644 index 0000000000..95425046d6 --- /dev/null +++ b/library/cpp/actors/core/buffer.h @@ -0,0 +1,62 @@ +#pragma once + +#include <limits> + +class TConstBuffer; +class TMutableBuffer; + +class TBufferBase { +public: + size_t GetSize() const noexcept; + + void SetSize(size_t newSize) noexcept; + +protected: + TBufferBase(size_t size = 0) noexcept; + + size_t Size; +}; + +template <typename PointerType> +class TBufferBaseT: public TBufferBase { +public: + PointerType GetPointer() const noexcept; + + void Cut(size_t offset) noexcept; + + void Assign(PointerType data = nullptr, size_t size = 0U) noexcept; + +protected: + TBufferBaseT(PointerType data, size_t size) noexcept; + + PointerType Data; +}; + +/// Represents constant memory buffer, but do not owns it. +class TConstBuffer: public TBufferBaseT<const void*> { + friend class TMutableBuffer; + +public: + TConstBuffer(const TMutableBuffer& buffer) noexcept; + + TConstBuffer(const void* data = nullptr, size_t size = 0U) noexcept; + + TConstBuffer Offset(ptrdiff_t offset, size_t size = std::numeric_limits<size_t>::max()) const noexcept; +}; + +/// Represents mutable memory buffer, but do not owns it. +class TMutableBuffer: public TBufferBaseT<void*> { + friend class TConstBuffer; + +public: + TMutableBuffer(void* data = nullptr, size_t size = 0U) noexcept; + + TMutableBuffer(const TMutableBuffer& value) noexcept + : TBufferBaseT<void*>(value) + { + } + + TMutableBuffer Offset(ptrdiff_t offset, size_t size = std::numeric_limits<size_t>::max()) const noexcept; + + size_t CopyFrom(const TConstBuffer& buffer) const noexcept; +}; diff --git a/library/cpp/actors/core/callstack.cpp b/library/cpp/actors/core/callstack.cpp new file mode 100644 index 0000000000..9297c1a079 --- /dev/null +++ b/library/cpp/actors/core/callstack.cpp @@ -0,0 +1,93 @@ +#include "callstack.h" +#include <util/thread/singleton.h> + +#ifdef USE_ACTOR_CALLSTACK + +namespace NActors { + namespace { + void (*PreviousFormatBackTrace)(IOutputStream*) = 0; + ui32 ActorBackTraceEnableCounter = 0; + } + + void ActorFormatBackTrace(IOutputStream* out) { + TStringStream str; + PreviousFormatBackTrace(&str); + str << Endl; + TCallstack::DumpCallstack(str); + *out << str.Str(); + } + + void EnableActorCallstack() { + if (ActorBackTraceEnableCounter == 0) { + Y_VERIFY(PreviousFormatBackTrace == 0); + PreviousFormatBackTrace = SetFormatBackTraceFn(ActorFormatBackTrace); + } + + ++ActorBackTraceEnableCounter; + } + + void DisableActorCallstack() { + --ActorBackTraceEnableCounter; + + if (ActorBackTraceEnableCounter == 0) { + Y_VERIFY(PreviousFormatBackTrace); + SetFormatBackTraceFn(PreviousFormatBackTrace); + PreviousFormatBackTrace = 0; + } + } + + TCallstack::TCallstack() + : BeginIdx(0) + , Size(0) + , LinesToSkip(0) + { + } + + void TCallstack::SetLinesToSkip() { + TTrace record; + LinesToSkip = BackTrace(record.Data, TTrace::CAPACITY); + } + + void TCallstack::Trace() { + size_t currentIdx = (BeginIdx + Size) % RECORDS; + if (Size == RECORDS) { + ++BeginIdx; + } else { + ++Size; + } + TTrace& record = Record[currentIdx]; + record.Size = BackTrace(record.Data, TTrace::CAPACITY); + record.LinesToSkip = LinesToSkip; + } + + void TCallstack::TraceIfEmpty() { + if (Size == 0) { + LinesToSkip = 0; + Trace(); + } + } + + TCallstack& TCallstack::GetTlsCallstack() { + return *FastTlsSingleton<TCallstack>(); + } + + void TCallstack::DumpCallstack(TStringStream& str) { + TCallstack& callstack = GetTlsCallstack(); + for (int i = callstack.Size - 1; i >= 0; --i) { + TTrace& record = callstack.Record[(callstack.BeginIdx + i) % RECORDS]; + str << Endl << "Trace entry " << i << Endl << Endl; + size_t size = record.Size; + if (size > record.LinesToSkip && size < TTrace::CAPACITY) { + size -= record.LinesToSkip; + } + if (size > RECORDS_TO_SKIP) { + FormatBackTrace(&str, &record.Data[RECORDS_TO_SKIP], size - RECORDS_TO_SKIP); + } else { + FormatBackTrace(&str, record.Data, size); + } + str << Endl; + } + } +} + +#endif diff --git a/library/cpp/actors/core/callstack.h b/library/cpp/actors/core/callstack.h new file mode 100644 index 0000000000..176717d2ae --- /dev/null +++ b/library/cpp/actors/core/callstack.h @@ -0,0 +1,58 @@ +#pragma once + +#ifndef NDEBUG +//#define ENABLE_ACTOR_CALLSTACK +#endif + +#ifdef ENABLE_ACTOR_CALLSTACK +#include "defs.h" +#include <util/system/backtrace.h> +#include <util/stream/str.h> +#include <util/generic/deque.h> +#define USE_ACTOR_CALLSTACK + +namespace NActors { + struct TCallstack { + struct TTrace { + static const size_t CAPACITY = 50; + void* Data[CAPACITY]; + size_t Size; + size_t LinesToSkip; + + TTrace() + : Size(0) + , LinesToSkip(0) + { + } + }; + + static const size_t RECORDS = 8; + static const size_t RECORDS_TO_SKIP = 2; + TTrace Record[RECORDS]; + size_t BeginIdx; + size_t Size; + size_t LinesToSkip; + + TCallstack(); + void SetLinesToSkip(); + void Trace(); + void TraceIfEmpty(); + static TCallstack& GetTlsCallstack(); + static void DumpCallstack(TStringStream& str); + }; + + void EnableActorCallstack(); + void DisableActorCallstack(); + +} + +#else + +namespace NActors { + inline void EnableActorCallstack(){}; + + inline void DisableActorCallstack(){}; + +} + +#endif diff --git a/library/cpp/actors/core/config.h b/library/cpp/actors/core/config.h new file mode 100644 index 0000000000..2486bf4c43 --- /dev/null +++ b/library/cpp/actors/core/config.h @@ -0,0 +1,239 @@ +#pragma once + +#include "defs.h" +#include <library/cpp/actors/util/cpumask.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <util/datetime/base.h> +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> + +namespace NActors { + + struct TBalancingConfig { + // Default cpu count (used during overload). Zero value disables this pool balancing + // 1) Sum of `Cpus` on all pools cannot be changed without restart + // (changing cpu mode between Shared and Assigned is not implemented yet) + // 2) This sum must be equal to TUnitedWorkersConfig::CpuCount, + // otherwise `CpuCount - SUM(Cpus)` cpus will be in Shared mode (i.e. actorsystem 2.0) + ui32 Cpus = 0; + + ui32 MinCpus = 0; // Lower balancing bound, should be at least 1, and not greater than `Cpus` + ui32 MaxCpus = 0; // Higher balancing bound, should be not lower than `Cpus` + ui8 Priority = 0; // Priority of pool to obtain cpu due to balancing (higher is better) + ui64 ToleratedLatencyUs = 0; // p100-latency threshold indicating that more cpus are required by pool + }; + + struct TBalancerConfig { + ui64 PeriodUs = 15000000; // Time between balancer steps + }; + + struct TBasicExecutorPoolConfig { + static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TDuration::MilliSeconds(10); + static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100; + + ui32 PoolId = 0; + TString PoolName; + ui32 Threads = 1; + ui64 SpinThreshold = 100; + TCpuMask Affinity; // Executor thread affinity + TDuration TimePerMailbox = DEFAULT_TIME_PER_MAILBOX; + ui32 EventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX; + int RealtimePriority = 0; + ui32 MaxActivityType = 1; + }; + + struct TIOExecutorPoolConfig { + ui32 PoolId = 0; + TString PoolName; + ui32 Threads = 1; + TCpuMask Affinity; // Executor thread affinity + ui32 MaxActivityType = 1; + }; + + struct TUnitedExecutorPoolConfig { + static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TDuration::MilliSeconds(10); + static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100; + + ui32 PoolId = 0; + TString PoolName; + + // Resource sharing + ui32 Concurrency = 0; // Limits simultaneously running mailboxes count if set to non-zero value (do not set if Balancing.Cpus != 0) + TPoolWeight Weight = 0; // Weight in fair cpu-local pool scheduler + TCpuMask Allowed; // Allowed CPUs for workers to run this pool on (ignored if balancer works, i.e. actorsystem 1.5) + + // Single mailbox execution limits + TDuration TimePerMailbox = DEFAULT_TIME_PER_MAILBOX; + ui32 EventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX; + + // Introspection + ui32 MaxActivityType = 1; + + // Long-term balancing + TBalancingConfig Balancing; + }; + + struct TUnitedWorkersConfig { + ui32 CpuCount = 0; // Total CPUs running united workers (i.e. TBasicExecutorPoolConfig::Threads analog); set to zero to disable united workers + ui64 SpinThresholdUs = 100; // Limit for active spinning in case all pools became idle + ui64 PoolLimitUs = 500; // Soft limit on pool execution + ui64 EventLimitUs = 100; // Hard limit on last event execution exceeding pool limit + ui64 LimitPrecisionUs = 100; // Maximum delay of timer on limit excess (delay needed to avoid settimer syscall on every pool switch) + ui64 FastWorkerPriority = 10; // Real-time priority of workers not exceeding hard limits + ui64 IdleWorkerPriority = 20; // Real-time priority of standby workers waiting for hard preemption on timers (should be greater than FastWorkerPriority) + TCpuMask Allowed; // Allowed CPUs for workers to run on (every worker has affinity for exactly one cpu) + bool NoRealtime = false; // For environments w/o permissions for RT-threads + bool NoAffinity = false; // For environments w/o permissions for cpu affinity + TBalancerConfig Balancer; + }; + + struct TCpuManagerConfig { + TUnitedWorkersConfig UnitedWorkers; + TVector<TBasicExecutorPoolConfig> Basic; + TVector<TIOExecutorPoolConfig> IO; + TVector<TUnitedExecutorPoolConfig> United; + + ui32 GetExecutorsCount() const { + return Basic.size() + IO.size() + United.size(); + } + + TString GetPoolName(ui32 poolId) const { + for (const auto& p : Basic) { + if (p.PoolId == poolId) { + return p.PoolName; + } + } + for (const auto& p : IO) { + if (p.PoolId == poolId) { + return p.PoolName; + } + } + for (const auto& p : United) { + if (p.PoolId == poolId) { + return p.PoolName; + } + } + Y_FAIL("undefined pool id: %" PRIu32, (ui32)poolId); + } + + ui32 GetThreads(ui32 poolId) const { + for (const auto& p : Basic) { + if (p.PoolId == poolId) { + return p.Threads; + } + } + for (const auto& p : IO) { + if (p.PoolId == poolId) { + return p.Threads; + } + } + for (const auto& p : United) { + if (p.PoolId == poolId) { + return p.Concurrency ? p.Concurrency : UnitedWorkers.CpuCount; + } + } + Y_FAIL("undefined pool id: %" PRIu32, (ui32)poolId); + } + }; + + struct TSchedulerConfig { + TSchedulerConfig( + ui64 resolution = 1024, + ui64 spinThreshold = 100, + ui64 progress = 10000, + bool useSchedulerActor = false) + : ResolutionMicroseconds(resolution) + , SpinThreshold(spinThreshold) + , ProgressThreshold(progress) + , UseSchedulerActor(useSchedulerActor) + {} + + ui64 ResolutionMicroseconds = 1024; + ui64 SpinThreshold = 100; + ui64 ProgressThreshold = 10000; + bool UseSchedulerActor = false; // False is default because tests use scheduler thread + ui64 RelaxedSendPaceEventsPerSecond = 200000; + ui64 RelaxedSendPaceEventsPerCycle = RelaxedSendPaceEventsPerSecond * ResolutionMicroseconds / 1000000; + // For resolution >= 250000 microseconds threshold is SendPace + // For resolution <= 250 microseconds threshold is 20 * SendPace + ui64 RelaxedSendThresholdEventsPerSecond = RelaxedSendPaceEventsPerSecond * + (20 - ((20 - 1) * ClampVal(ResolutionMicroseconds, ui64(250), ui64(250000)) - 250) / (250000 - 250)); + ui64 RelaxedSendThresholdEventsPerCycle = RelaxedSendThresholdEventsPerSecond * ResolutionMicroseconds / 1000000; + + // Optional subsection for scheduler counters (usually subsystem=utils) + NMonitoring::TDynamicCounterPtr MonCounters = nullptr; + }; + + struct TCpuAllocation { + struct TPoolAllocation { + TPoolId PoolId; + TPoolWeight Weight; + + TPoolAllocation(TPoolId poolId = 0, TPoolWeight weight = 0) + : PoolId(poolId) + , Weight(weight) + {} + }; + + TCpuId CpuId; + TVector<TPoolAllocation> AllowedPools; + + TPoolsMask GetPoolsMask() const { + TPoolsMask mask = 0; + for (const auto& pa : AllowedPools) { + if (pa.PoolId < MaxPools) { + mask &= (1ull << pa.PoolId); + } + } + return mask; + } + + bool HasPool(TPoolId pool) const { + for (const auto& pa : AllowedPools) { + if (pa.PoolId == pool) { + return true; + } + } + return false; + } + }; + + struct TCpuAllocationConfig { + TVector<TCpuAllocation> Items; + + TCpuAllocationConfig(const TCpuMask& available, const TCpuManagerConfig& cfg) { + for (const TUnitedExecutorPoolConfig& pool : cfg.United) { + Y_VERIFY(pool.PoolId < MaxPools, "wrong PoolId of united executor pool: %s(%d)", + pool.PoolName.c_str(), (pool.PoolId)); + } + ui32 allocated[MaxPools] = {0}; + for (TCpuId cpu = 0; cpu < available.Size() && Items.size() < cfg.UnitedWorkers.CpuCount; cpu++) { + if (available.IsSet(cpu)) { + TCpuAllocation item; + item.CpuId = cpu; + for (const TUnitedExecutorPoolConfig& pool : cfg.United) { + if (cfg.UnitedWorkers.Allowed.IsEmpty() || cfg.UnitedWorkers.Allowed.IsSet(cpu)) { + if (pool.Allowed.IsEmpty() || pool.Allowed.IsSet(cpu)) { + item.AllowedPools.emplace_back(pool.PoolId, pool.Weight); + allocated[pool.PoolId]++; + } + } + } + if (!item.AllowedPools.empty()) { + Items.push_back(item); + } + } + } + for (const TUnitedExecutorPoolConfig& pool : cfg.United) { + Y_VERIFY(allocated[pool.PoolId] > 0, "unable to allocate cpu for united executor pool: %s(%d)", + pool.PoolName.c_str(), (pool.PoolId)); + } + } + + operator bool() const { + return !Items.empty(); + } + }; + +} diff --git a/library/cpp/actors/core/cpu_manager.cpp b/library/cpp/actors/core/cpu_manager.cpp new file mode 100644 index 0000000000..39089b5d83 --- /dev/null +++ b/library/cpp/actors/core/cpu_manager.cpp @@ -0,0 +1,108 @@ +#include "cpu_manager.h" +#include "probes.h" + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + + void TCpuManager::Setup() { + TAffinity available; + available.Current(); + TCpuAllocationConfig allocation(available, Config); + + if (allocation) { + if (!Balancer) { + Balancer.Reset(MakeBalancer(Config.UnitedWorkers.Balancer, Config.United, GetCycleCountFast())); + } + UnitedWorkers.Reset(new TUnitedWorkers(Config.UnitedWorkers, Config.United, allocation, Balancer.Get())); + } + + Executors.Reset(new TAutoPtr<IExecutorPool>[ExecutorPoolCount]); + + for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { + Executors[excIdx].Reset(CreateExecutorPool(excIdx)); + } + } + + void TCpuManager::PrepareStart(TVector<NSchedulerQueue::TReader*>& scheduleReaders, TActorSystem* actorSystem) { + if (UnitedWorkers) { + UnitedWorkers->Prepare(actorSystem, scheduleReaders); + } + for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { + NSchedulerQueue::TReader* readers; + ui32 readersCount = 0; + Executors[excIdx]->Prepare(actorSystem, &readers, &readersCount); + for (ui32 i = 0; i != readersCount; ++i, ++readers) { + scheduleReaders.push_back(readers); + } + } + } + + void TCpuManager::Start() { + if (UnitedWorkers) { + UnitedWorkers->Start(); + } + for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { + Executors[excIdx]->Start(); + } + } + + void TCpuManager::PrepareStop() { + for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { + Executors[excIdx]->PrepareStop(); + } + if (UnitedWorkers) { + UnitedWorkers->PrepareStop(); + } + } + + void TCpuManager::Shutdown() { + for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { + Executors[excIdx]->Shutdown(); + } + if (UnitedWorkers) { + UnitedWorkers->Shutdown(); + } + for (ui32 round = 0, done = 0; done < ExecutorPoolCount && round < 3; ++round) { + done = 0; + for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { + if (Executors[excIdx]->Cleanup()) { + ++done; + } + } + } + } + + void TCpuManager::Cleanup() { + for (ui32 round = 0, done = 0; done < ExecutorPoolCount; ++round) { + Y_VERIFY(round < 10, "actorsystem cleanup could not be completed in 10 rounds"); + done = 0; + for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { + if (Executors[excIdx]->Cleanup()) { + ++done; + } + } + } + Executors.Destroy(); + UnitedWorkers.Destroy(); + } + + IExecutorPool* TCpuManager::CreateExecutorPool(ui32 poolId) { + for (TBasicExecutorPoolConfig& cfg : Config.Basic) { + if (cfg.PoolId == poolId) { + return new TBasicExecutorPool(cfg); + } + } + for (TIOExecutorPoolConfig& cfg : Config.IO) { + if (cfg.PoolId == poolId) { + return new TIOExecutorPool(cfg); + } + } + for (TUnitedExecutorPoolConfig& cfg : Config.United) { + if (cfg.PoolId == poolId) { + IExecutorPool* result = new TUnitedExecutorPool(cfg, UnitedWorkers.Get()); + return result; + } + } + Y_FAIL("missing PoolId: %d", int(poolId)); + } +} diff --git a/library/cpp/actors/core/cpu_manager.h b/library/cpp/actors/core/cpu_manager.h new file mode 100644 index 0000000000..454035477b --- /dev/null +++ b/library/cpp/actors/core/cpu_manager.h @@ -0,0 +1,57 @@ +#pragma once + +#include "actorsystem.h" +#include "executor_pool_basic.h" +#include "executor_pool_io.h" +#include "executor_pool_united.h" + +namespace NActors { + class TCpuManager : public TNonCopyable { + const ui32 ExecutorPoolCount; + TArrayHolder<TAutoPtr<IExecutorPool>> Executors; + THolder<TUnitedWorkers> UnitedWorkers; + THolder<IBalancer> Balancer; + TCpuManagerConfig Config; + public: + explicit TCpuManager(THolder<TActorSystemSetup>& setup) + : ExecutorPoolCount(setup->GetExecutorsCount()) + , Balancer(setup->Balancer) + , Config(setup->CpuManager) + { + if (setup->Executors) { // Explicit mode w/o united pools + Executors.Reset(setup->Executors.Release()); + for (ui32 excIdx = 0; excIdx != ExecutorPoolCount; ++excIdx) { + IExecutorPool* pool = Executors[excIdx].Get(); + Y_VERIFY(dynamic_cast<TUnitedExecutorPool*>(pool) == nullptr, + "united executor pool is prohibited in explicit mode of NActors::TCpuManager"); + } + } else { + Setup(); + } + } + + void Setup(); + void PrepareStart(TVector<NSchedulerQueue::TReader*>& scheduleReaders, TActorSystem* actorSystem); + void Start(); + void PrepareStop(); + void Shutdown(); + void Cleanup(); + + ui32 GetExecutorsCount() const { + return ExecutorPoolCount; + } + + IExecutorPool* GetExecutorPool(ui32 poolId) { + return Executors[poolId].Get(); + } + + void GetPoolStats(ui32 poolId, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const { + if (poolId < ExecutorPoolCount) { + Executors[poolId]->GetCurrentStats(poolStats, statsCopy); + } + } + + private: + IExecutorPool* CreateExecutorPool(ui32 poolId); + }; +} diff --git a/library/cpp/actors/core/cpu_state.h b/library/cpp/actors/core/cpu_state.h new file mode 100644 index 0000000000..b8030149a7 --- /dev/null +++ b/library/cpp/actors/core/cpu_state.h @@ -0,0 +1,215 @@ +#pragma once + +#include "defs.h" + +#include <library/cpp/actors/util/futex.h> + +namespace NActors { + + class alignas(64) TCpuState { + // Atomic cachelign-aligned 64-bit state, see description below + TAtomic State = 0; + char Padding[64 - sizeof(TAtomic)]; + + // Bits 0-31: Currently executing pool + // - value less than MaxPools means cpu is executing corresponding pool (fast-worker is executing or waiting for slow-workers) + // - one of Cpu* values in case of idle cpu + // - used as futex by blocked fast-worker + static constexpr ui64 CurrentBits = 32; + static constexpr ui64 CurrentMask = ui64((1ull << CurrentBits) - 1); + + // Bits 32-63: Assigned pool + // - value is set by balancer + // - NOT used as futex + // - Not balanced + static constexpr ui64 AssignedOffs = 32; + static constexpr ui64 AssignedMask = ~CurrentMask; + + public: + TCpuState() { + Y_UNUSED(Padding); + } + + void Load(TPoolId& assigned, TPoolId& current) const { + TAtomicBase state = AtomicLoad(&State); + assigned = (state & AssignedMask) >> AssignedOffs; + current = state & CurrentMask; + } + + TPoolId CurrentPool() const { + return TPoolId(AtomicLoad(&State) & CurrentMask); + } + + void SwitchPool(TPoolId pool) { + while (true) { + TAtomicBase state = AtomicLoad(&State); + if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) { + return; + } + } + } + + TPoolId AssignedPool() const { + return TPoolId((AtomicLoad(&State) & AssignedMask) >> AssignedOffs); + } + + // Assigns new pool to cpu and wakes it up if cpu is idle + void AssignPool(TPoolId pool) { + while (true) { + TAtomicBase state = AtomicLoad(&State); + TPoolId current(state & CurrentMask); + if (Y_UNLIKELY(current == CpuStopped)) { + return; // it would be better to shutdown instead of balancing + } + // Idle cpu must be woken up after balancing to handle pending tokens (if any) in assigned/schedulable pool(s) + if (current == CpuSpinning) { + if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | pool, state)) { + return; // successfully woken up + } + } else if (current == CpuBlocked) { + if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | pool, state)) { + FutexWake(); + return; // successfully woken up + } + } else { + if (AtomicCas(&State, (ui64(pool) << AssignedOffs) | (state & ~AssignedMask), state)) { + return; // wakeup is not required + } + } + } + } + + void Stop() { + while (true) { + TAtomicBase state = AtomicLoad(&State); + if (AtomicCas(&State, (state & ~CurrentMask) | CpuStopped, state)) { + FutexWake(); + return; // successfully stopped + } + } + } + + // Start waiting, returns false in case of actorsystem shutdown + bool StartSpinning() { + while (true) { + TAtomicBase state = AtomicLoad(&State); + TPoolId current(state & CurrentMask); + if (Y_UNLIKELY(current == CpuStopped)) { + return false; + } + Y_VERIFY_DEBUG(current < MaxPools, "unexpected already waiting state of cpu (%d)", (int)current); + if (AtomicCas(&State, (state & ~CurrentMask) | CpuSpinning, state)) { // successfully marked as spinning + return true; + } + } + } + + bool StartBlocking() { + while (true) { + TAtomicBase state = AtomicLoad(&State); + TPoolId current(state & CurrentMask); + if (current == CpuSpinning) { + if (AtomicCas(&State, (state & ~CurrentMask) | CpuBlocked, state)) { + return false; // successful switch + } + } else { + return true; // wakeup + } + } + } + + bool Block(ui64 timeoutNs, TPoolId& result) { +#ifdef _linux_ + timespec timeout; + timeout.tv_sec = timeoutNs / 1'000'000'000; + timeout.tv_nsec = timeoutNs % 1'000'000'000; + SysFutex(Futex(), FUTEX_WAIT_PRIVATE, CpuBlocked, &timeout, nullptr, 0); +#else + NanoSleep(timeoutNs); // non-linux wake is not supported, cpu will go idle on wake after blocked state +#endif + TAtomicBase state = AtomicLoad(&State); + TPoolId current(state & CurrentMask); + if (current == CpuBlocked) { + return false; // timeout + } else { + result = current; + return true; // wakeup + } + } + + enum EWakeResult { + Woken, // successfully woken up + NotIdle, // cpu is already not idle + Forbidden, // cpu is assigned to another pool + Stopped, // cpu is shutdown + }; + + EWakeResult WakeWithoutToken(TPoolId pool) { + while (true) { + TAtomicBase state = RelaxedLoad(&State); + TPoolId current(state & CurrentMask); + TPoolId assigned((state & AssignedMask) >> AssignedOffs); + if (assigned == CpuShared || assigned == pool) { + if (current == CpuSpinning) { + if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) { + return Woken; + } + } else if (current == CpuBlocked) { + if (AtomicCas(&State, (state & ~CurrentMask) | pool, state)) { + FutexWake(); + return Woken; + } + } else if (current == CpuStopped) { + return Stopped; + } else { + return NotIdle; + } + } else { + return Forbidden; + } + } + } + + EWakeResult WakeWithTokenAcquired(TPoolId token) { + while (true) { + TAtomicBase state = RelaxedLoad(&State); + TPoolId current(state & CurrentMask); + // NOTE: We ignore assigned value because we already have token, so + // NOTE: not assigned pool may be run here. This will be fixed + // NOTE: after we finish with current activation + if (current == CpuSpinning) { + if (AtomicCas(&State, (state & ~CurrentMask) | token, state)) { + return Woken; + } + } else if (current == CpuBlocked) { + if (AtomicCas(&State, (state & ~CurrentMask) | token, state)) { + FutexWake(); + return Woken; + } + } else if (current == CpuStopped) { + return Stopped; + } else { + return NotIdle; + } + } + } + + bool IsPoolReassigned(TPoolId current) const { + TAtomicBase state = AtomicLoad(&State); + TPoolId assigned((state & AssignedMask) >> AssignedOffs); + return assigned != current; + } + + private: + void* Futex() { + return (void*)&State; // little endian assumed + } + + void FutexWake() { +#ifdef _linux_ + SysFutex(Futex(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); +#endif + } + }; + +} diff --git a/library/cpp/actors/core/defs.h b/library/cpp/actors/core/defs.h new file mode 100644 index 0000000000..980b7d767b --- /dev/null +++ b/library/cpp/actors/core/defs.h @@ -0,0 +1,69 @@ +#pragma once + +// unique tag to fix pragma once gcc glueing: ./library/actorlib/core/defs.h + +#include <library/cpp/actors/util/defs.h> +#include <util/generic/hash.h> +#include <util/string/printf.h> + +// Enables collection of +// event send/receive counts +// activation time histograms +// event processing time histograms +#define ACTORSLIB_COLLECT_EXEC_STATS + +namespace NActors { + using TPoolId = ui8; + using TPoolsMask = ui64; + static constexpr TPoolId PoolBits = 6; + static constexpr TPoolId MaxPools = (1 << PoolBits) - 1; // maximum amount of pools (poolid=63 is reserved) + static constexpr TPoolsMask WaitPoolsFlag = (1ull << MaxPools); // wait-for-slow-workers flag bitmask + + // Special TPoolId values used by TCpuState + static constexpr TPoolId CpuSpinning = MaxPools; // fast-worker is actively spinning, no slow-workers + static constexpr TPoolId CpuBlocked = MaxPools + 1; // fast-worker is blocked, no slow-workers + static constexpr TPoolId CpuStopped = TPoolId(-1); // special value indicating worker should stop + static constexpr TPoolId CpuShared = MaxPools; // special value for `assigned` meaning balancer disabled, pool scheduler is used instead + + using TPoolWeight = ui16; + static constexpr TPoolWeight MinPoolWeight = 1; + static constexpr TPoolWeight DefPoolWeight = 32; + static constexpr TPoolWeight MaxPoolWeight = 1024; + + using TWorkerId = ui16; + static constexpr TWorkerId WorkerBits = 11; + static constexpr TWorkerId MaxWorkers = 1 << WorkerBits; + + using TThreadId = ui64; + static constexpr TThreadId UnknownThreadId = ui64(-1); + + struct TMailboxType { + enum EType { + Inherited = -1, // inherit mailbox from parent + Simple = 0, // simplest queue under producer lock. fastest in no-contention case + Revolving = 1, // somewhat outdated, tries to be wait-free. replaced by ReadAsFilled + HTSwap = 2, // other simple lf queue, suggested for low-contention case + ReadAsFilled = 3, // wait-free queue, suggested for high-contention or latency critical + TinyReadAsFilled = 4, // same as 3 but with lower overhead + //Inplace; + //Direct; + //Virtual + }; + }; + + struct TScopeId : std::pair<ui64, ui64> { + using TBase = std::pair<ui64, ui64>; + using TBase::TBase; + static const TScopeId LocallyGenerated; + }; + + static inline TString ScopeIdToString(const TScopeId& scopeId) { + return Sprintf("<%" PRIu64 ":%" PRIu64 ">", scopeId.first, scopeId.second); + } + +} + +template<> +struct hash<NActors::TScopeId> : hash<std::pair<ui64, ui64>> {}; + +class TAffinity; diff --git a/library/cpp/actors/core/event.cpp b/library/cpp/actors/core/event.cpp new file mode 100644 index 0000000000..33f8ce2aaf --- /dev/null +++ b/library/cpp/actors/core/event.cpp @@ -0,0 +1,38 @@ +#include "event.h" +#include "event_pb.h" + +namespace NActors { + + const TScopeId TScopeId::LocallyGenerated{ + Max<ui64>(), Max<ui64>() + }; + + TIntrusivePtr<TEventSerializedData> IEventHandle::ReleaseChainBuffer() { + if (Buffer) { + TIntrusivePtr<TEventSerializedData> result; + DoSwap(result, Buffer); + Event.Reset(); + return result; + } + if (Event) { + TAllocChunkSerializer serializer; + Event->SerializeToArcadiaStream(&serializer); + auto chainBuf = serializer.Release(Event->IsExtendedFormat()); + Event.Reset(); + return chainBuf; + } + return new TEventSerializedData; + } + + TIntrusivePtr<TEventSerializedData> IEventHandle::GetChainBuffer() { + if (Buffer) + return Buffer; + if (Event) { + TAllocChunkSerializer serializer; + Event->SerializeToArcadiaStream(&serializer); + Buffer = serializer.Release(Event->IsExtendedFormat()); + return Buffer; + } + return new TEventSerializedData; + } +} diff --git a/library/cpp/actors/core/event.h b/library/cpp/actors/core/event.h new file mode 100644 index 0000000000..6ff02aaf94 --- /dev/null +++ b/library/cpp/actors/core/event.h @@ -0,0 +1,344 @@ +#pragma once + +#include "defs.h" +#include "actorid.h" +#include "callstack.h" +#include "event_load.h" + +#include <library/cpp/actors/wilson/wilson_trace.h> + +#include <util/system/hp_timer.h> +#include <util/generic/maybe.h> + +namespace NActors { + class TChunkSerializer; + + class ISerializerToStream { + public: + virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0; + }; + + class IEventBase + : TNonCopyable, + public ISerializerToStream { + public: + // actual typing is performed by IEventHandle + + virtual ~IEventBase() { + } + + virtual TString ToStringHeader() const = 0; + virtual TString ToString() const { + return ToStringHeader(); + } + virtual ui32 CalculateSerializedSize() const { + return 0; + } + virtual ui32 Type() const = 0; + virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0; + virtual bool IsSerializable() const = 0; + virtual bool IsExtendedFormat() const { + return false; + } + virtual ui32 CalculateSerializedSizeCached() const { + return CalculateSerializedSize(); + } + }; + + // fat handle + class IEventHandle : TNonCopyable { + struct TOnNondelivery { + TActorId Recipient; + + TOnNondelivery(const TActorId& recipient) + : Recipient(recipient) + { + } + }; + + public: + template <typename TEv> + inline TEv* CastAsLocal() const noexcept { + auto fits = GetTypeRewrite() == TEv::EventType; + + return fits ? static_cast<TEv*>(Event.Get()) : nullptr; + } + + template <typename TEventType> + TEventType* Get() { + if (Type != TEventType::EventType) + Y_FAIL("Event type %" PRIu32 " doesn't match the expected type %" PRIu32, Type, TEventType::EventType); + + if (!Event) { + Event.Reset(TEventType::Load(Buffer.Get())); + } + + if (Event) { + return static_cast<TEventType*>(Event.Get()); + } + + Y_FAIL("Failed to Load() event type %" PRIu32 " class %s", Type, TypeName<TEventType>().data()); + } + + template <typename T> + TAutoPtr<T> Release() { + TAutoPtr<T> x = Get<T>(); + Y_UNUSED(Event.Release()); + Buffer.Reset(); + return x; + } + + enum EFlags { + FlagTrackDelivery = 1 << 0, + FlagForwardOnNondelivery = 1 << 1, + FlagSubscribeOnSession = 1 << 2, + FlagUseSubChannel = 1 << 3, + FlagGenerateUnsureUndelivered = 1 << 4, + FlagExtendedFormat = 1 << 5, + }; + + const ui32 Type; + const ui32 Flags; + const TActorId Recipient; + const TActorId Sender; + const ui64 Cookie; + const TScopeId OriginScopeId = TScopeId::LocallyGenerated; // filled in when the message is received from Interconnect + + // if set, used by ActorSystem/Interconnect to report tracepoints + NWilson::TTraceId TraceId; + + // filled if feeded by interconnect session + const TActorId InterconnectSession; + +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + ::NHPTimer::STime SendTime; +#endif + + static const size_t ChannelBits = 12; + static const size_t ChannelShift = (sizeof(ui32) << 3) - ChannelBits; + +#ifdef USE_ACTOR_CALLSTACK + TCallstack Callstack; +#endif + ui16 GetChannel() const noexcept { + return Flags >> ChannelShift; + } + + ui64 GetSubChannel() const noexcept { + return Flags & FlagUseSubChannel ? Sender.LocalId() : 0ULL; + } + + static ui32 MakeFlags(ui32 channel, ui32 flags) { + Y_VERIFY(channel < (1 << ChannelBits)); + Y_VERIFY(flags < (1 << ChannelShift)); + return (flags | (channel << ChannelShift)); + } + + private: + THolder<IEventBase> Event; + TIntrusivePtr<TEventSerializedData> Buffer; + + TActorId RewriteRecipient; + ui32 RewriteType; + + THolder<TOnNondelivery> OnNondeliveryHolder; // only for local events + + public: + void Rewrite(ui32 typeRewrite, TActorId recipientRewrite) { + RewriteRecipient = recipientRewrite; + RewriteType = typeRewrite; + } + + void DropRewrite() { + RewriteRecipient = Recipient; + RewriteType = Type; + } + + const TActorId& GetRecipientRewrite() const { + return RewriteRecipient; + } + + ui32 GetTypeRewrite() const { + return RewriteType; + } + + TActorId GetForwardOnNondeliveryRecipient() const { + return OnNondeliveryHolder.Get() ? OnNondeliveryHolder->Recipient : TActorId(); + } + + IEventHandle(const TActorId& recipient, const TActorId& sender, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, + const TActorId* forwardOnNondelivery = nullptr, NWilson::TTraceId traceId = {}) + : Type(ev->Type()) + , Flags(flags) + , Recipient(recipient) + , Sender(sender) + , Cookie(cookie) + , TraceId(std::move(traceId)) +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + , SendTime(0) +#endif + , Event(ev) + , RewriteRecipient(Recipient) + , RewriteType(Type) + { + if (forwardOnNondelivery) + OnNondeliveryHolder.Reset(new TOnNondelivery(*forwardOnNondelivery)); + } + + IEventHandle(ui32 type, + ui32 flags, + const TActorId& recipient, + const TActorId& sender, + TIntrusivePtr<TEventSerializedData> buffer, + ui64 cookie, + const TActorId* forwardOnNondelivery = nullptr, + NWilson::TTraceId traceId = {}) + : Type(type) + , Flags(flags) + , Recipient(recipient) + , Sender(sender) + , Cookie(cookie) + , TraceId(std::move(traceId)) +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + , SendTime(0) +#endif + , Buffer(std::move(buffer)) + , RewriteRecipient(Recipient) + , RewriteType(Type) + { + if (forwardOnNondelivery) + OnNondeliveryHolder.Reset(new TOnNondelivery(*forwardOnNondelivery)); + } + + // Special ctor for events from interconnect. + IEventHandle(const TActorId& session, + ui32 type, + ui32 flags, + const TActorId& recipient, + const TActorId& sender, + TIntrusivePtr<TEventSerializedData> buffer, + ui64 cookie, + TScopeId originScopeId, + NWilson::TTraceId traceId) noexcept + : Type(type) + , Flags(flags) + , Recipient(recipient) + , Sender(sender) + , Cookie(cookie) + , OriginScopeId(originScopeId) + , TraceId(std::move(traceId)) + , InterconnectSession(session) +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + , SendTime(0) +#endif + , Buffer(std::move(buffer)) + , RewriteRecipient(Recipient) + , RewriteType(Type) + { + } + + TIntrusivePtr<TEventSerializedData> GetChainBuffer(); + TIntrusivePtr<TEventSerializedData> ReleaseChainBuffer(); + + ui32 GetSize() const { + if (Buffer) { + return Buffer->GetSize(); + } else if (Event) { + return Event->CalculateSerializedSize(); + } else { + return 0; + } + } + + bool HasBuffer() const { + return bool(Buffer); + } + + bool HasEvent() const { + return bool(Event); + } + + IEventBase* GetBase() { + if (!Event) { + if (!Buffer) + return nullptr; + else + ythrow TWithBackTrace<yexception>() << "don't know how to load the event from buffer"; + } + + return Event.Get(); + } + + TAutoPtr<IEventBase> ReleaseBase() { + TAutoPtr<IEventBase> x = GetBase(); + Y_UNUSED(Event.Release()); + Buffer.Reset(); + return x; + } + + TAutoPtr<IEventHandle> Forward(const TActorId& dest) { + if (Event) + return new IEventHandle(dest, Sender, Event.Release(), Flags, Cookie, nullptr, std::move(TraceId)); + else + return new IEventHandle(Type, Flags, dest, Sender, Buffer, Cookie, nullptr, std::move(TraceId)); + } + + TAutoPtr<IEventHandle> ForwardOnNondelivery(ui32 reason, bool unsure = false); + }; + + template <typename TEventType> + class TEventHandle: public IEventHandle { + TEventHandle(); // we never made instance of TEventHandle + public: + TEventType* Get() { + return IEventHandle::Get<TEventType>(); + } + + TAutoPtr<TEventType> Release() { + return IEventHandle::Release<TEventType>(); + } + }; + + static_assert(sizeof(TEventHandle<IEventBase>) == sizeof(IEventHandle), "expect sizeof(TEventHandle<IEventBase>) == sizeof(IEventHandle)"); + + template <typename TEventType, ui32 EventType0> + class TEventBase: public IEventBase { + public: + static constexpr ui32 EventType = EventType0; + ui32 Type() const override { + return EventType0; + } + // still abstract + + typedef TEventHandle<TEventType> THandle; + typedef TAutoPtr<THandle> TPtr; + }; + +#define DEFINE_SIMPLE_LOCAL_EVENT(eventType, header) \ + TString ToStringHeader() const override { \ + return TString(header); \ + } \ + bool SerializeToArcadiaStream(NActors::TChunkSerializer*) const override { \ + Y_FAIL("Local event " #eventType " is not serializable"); \ + } \ + static IEventBase* Load(NActors::TEventSerializedData*) { \ + Y_FAIL("Local event " #eventType " has no load method"); \ + } \ + bool IsSerializable() const override { \ + return false; \ + } + +#define DEFINE_SIMPLE_NONLOCAL_EVENT(eventType, header) \ + TString ToStringHeader() const override { \ + return TString(header); \ + } \ + bool SerializeToArcadiaStream(NActors::TChunkSerializer*) const override { \ + return true; \ + } \ + static IEventBase* Load(NActors::TEventSerializedData*) { \ + return new eventType(); \ + } \ + bool IsSerializable() const override { \ + return true; \ + } +} diff --git a/library/cpp/actors/core/event_load.h b/library/cpp/actors/core/event_load.h new file mode 100644 index 0000000000..0dab1dd374 --- /dev/null +++ b/library/cpp/actors/core/event_load.h @@ -0,0 +1,112 @@ +#pragma once + +#include <util/stream/walk.h> +#include <util/system/types.h> +#include <util/generic/string.h> +#include <library/cpp/actors/util/rope.h> +#include <library/cpp/actors/wilson/wilson_trace.h> + +namespace NActors { + class IEventHandle; + + struct TConstIoVec { + const void* Data; + size_t Size; + }; + + struct TIoVec { + void* Data; + size_t Size; + }; + + class TEventSerializedData + : public TThrRefBase + { + TRope Rope; + bool ExtendedFormat = false; + + public: + TEventSerializedData() = default; + + TEventSerializedData(TRope&& rope, bool extendedFormat) + : Rope(std::move(rope)) + , ExtendedFormat(extendedFormat) + {} + + TEventSerializedData(const TEventSerializedData& original, TString extraBuffer) + : Rope(original.Rope) + , ExtendedFormat(original.ExtendedFormat) + { + Append(std::move(extraBuffer)); + } + + TEventSerializedData(TString buffer, bool extendedFormat) + : ExtendedFormat(extendedFormat) + { + Append(std::move(buffer)); + } + + void SetExtendedFormat() { + ExtendedFormat = true; + } + + bool IsExtendedFormat() const { + return ExtendedFormat; + } + + TRope::TConstIterator GetBeginIter() const { + return Rope.Begin(); + } + + size_t GetSize() const { + return Rope.GetSize(); + } + + TString GetString() const { + TString result; + result.reserve(GetSize()); + for (auto it = Rope.Begin(); it.Valid(); it.AdvanceToNextContiguousBlock()) { + result.append(it.ContiguousData(), it.ContiguousSize()); + } + return result; + } + + TRope EraseBack(size_t count) { + Y_VERIFY(count <= Rope.GetSize()); + TRope::TIterator iter = Rope.End(); + iter -= count; + return Rope.Extract(iter, Rope.End()); + } + + void Append(TRope&& from) { + Rope.Insert(Rope.End(), std::move(from)); + } + + void Append(TString buffer) { + if (buffer) { + Rope.Insert(Rope.End(), TRope(std::move(buffer))); + } + } + }; +} + +class TChainBufWalk : public IWalkInput { + TIntrusivePtr<NActors::TEventSerializedData> Buffer; + TRope::TConstIterator Iter; + +public: + TChainBufWalk(TIntrusivePtr<NActors::TEventSerializedData> buffer) + : Buffer(std::move(buffer)) + , Iter(Buffer->GetBeginIter()) + {} + +private: + size_t DoUnboundedNext(const void **ptr) override { + const size_t size = Iter.ContiguousSize(); + *ptr = Iter.ContiguousData(); + if (Iter.Valid()) { + Iter.AdvanceToNextContiguousBlock(); + } + return size; + } +}; diff --git a/library/cpp/actors/core/event_local.h b/library/cpp/actors/core/event_local.h new file mode 100644 index 0000000000..2845aa94dd --- /dev/null +++ b/library/cpp/actors/core/event_local.h @@ -0,0 +1,74 @@ +#pragma once + +#include "event.h" +#include "scheduler_cookie.h" +#include "event_load.h" +#include <util/system/type_name.h> + +namespace NActors { + template <typename TEv, ui32 TEventType> + class TEventLocal: public TEventBase<TEv, TEventType> { + public: + TString ToStringHeader() const override { + return TypeName<TEv>(); + } + + bool SerializeToArcadiaStream(TChunkSerializer* /*serializer*/) const override { + Y_FAIL("Serialization of local event %s type %" PRIu32, TypeName<TEv>().data(), TEventType); + } + + bool IsSerializable() const override { + return false; + } + + static IEventBase* Load(TEventSerializedData*) { + Y_FAIL("Loading of local event %s type %" PRIu32, TypeName<TEv>().data(), TEventType); + } + }; + + template <typename TEv, ui32 TEventType> + class TEventScheduler: public TEventLocal<TEv, TEventType> { + public: + TSchedulerCookieHolder Cookie; + + TEventScheduler(ISchedulerCookie* cookie) + : Cookie(cookie) + { + } + }; + + template <ui32 TEventType> + class TEventSchedulerEv: public TEventScheduler<TEventSchedulerEv<TEventType>, TEventType> { + public: + TEventSchedulerEv(ISchedulerCookie* cookie) + : TEventScheduler<TEventSchedulerEv<TEventType>, TEventType>(cookie) + { + } + }; + + template <typename TEv, ui32 TEventType> + class TEventSimple: public TEventBase<TEv, TEventType> { + public: + TString ToStringHeader() const override { + static TString header(TypeName<TEv>()); + return header; + } + + bool SerializeToArcadiaStream(TChunkSerializer* /*serializer*/) const override { + static_assert(sizeof(TEv) == sizeof(TEventSimple<TEv, TEventType>), "Descendant should be an empty class"); + return true; + } + + bool IsSerializable() const override { + return true; + } + + static IEventBase* Load(NActors::TEventSerializedData*) { + return new TEv(); + } + + static IEventBase* Load(const TString&) { + return new TEv(); + } + }; +} diff --git a/library/cpp/actors/core/event_pb.cpp b/library/cpp/actors/core/event_pb.cpp new file mode 100644 index 0000000000..018ff9ac34 --- /dev/null +++ b/library/cpp/actors/core/event_pb.cpp @@ -0,0 +1,223 @@ +#include "event_pb.h" + +namespace NActors { + bool TRopeStream::Next(const void** data, int* size) { + *data = Iter.ContiguousData(); + *size = Iter.ContiguousSize(); + if (size_t(*size + TotalByteCount) > Size) { + *size = Size - TotalByteCount; + Iter += *size; + } else if (Iter.Valid()) { + Iter.AdvanceToNextContiguousBlock(); + } + TotalByteCount += *size; + return *size != 0; + } + + void TRopeStream::BackUp(int count) { + Y_VERIFY(count <= TotalByteCount); + Iter -= count; + TotalByteCount -= count; + } + + bool TRopeStream::Skip(int count) { + if (static_cast<size_t>(TotalByteCount + count) > Size) { + count = Size - TotalByteCount; + } + Iter += count; + TotalByteCount += count; + return static_cast<size_t>(TotalByteCount) != Size; + } + + TCoroutineChunkSerializer::TCoroutineChunkSerializer() + : TotalSerializedDataSize(0) + , Stack(64 * 1024) + , SelfClosure{this, TArrayRef(Stack.Begin(), Stack.End())} + , InnerContext(SelfClosure) + {} + + TCoroutineChunkSerializer::~TCoroutineChunkSerializer() { + CancelFlag = true; + Resume(); + Y_VERIFY(Finished); + } + + bool TCoroutineChunkSerializer::AllowsAliasing() const { + return true; + } + + bool TCoroutineChunkSerializer::Produce(const void *data, size_t size) { + Y_VERIFY(size <= SizeRemain); + SizeRemain -= size; + TotalSerializedDataSize += size; + + if (NumChunks) { + auto& last = Chunks[NumChunks - 1]; + if (last.first + last.second == data) { + last.second += size; // just extend the last buffer + return true; + } + } + + if (NumChunks == MaxChunks) { + InnerContext.SwitchTo(BufFeedContext); + if (CancelFlag || AbortFlag) { + return false; + } + } + + Y_VERIFY(NumChunks < MaxChunks); + Chunks[NumChunks++] = {static_cast<const char*>(data), size}; + return true; + } + + bool TCoroutineChunkSerializer::WriteAliasedRaw(const void* data, int size) { + Y_VERIFY(size >= 0); + while (size) { + if (CancelFlag || AbortFlag) { + return false; + } else if (const size_t bytesToAppend = Min<size_t>(size, SizeRemain)) { + if (!Produce(data, bytesToAppend)) { + return false; + } + data = static_cast<const char*>(data) + bytesToAppend; + size -= bytesToAppend; + } else { + InnerContext.SwitchTo(BufFeedContext); + } + } + return true; + } + + bool TCoroutineChunkSerializer::Next(void** data, int* size) { + if (CancelFlag || AbortFlag) { + return false; + } + if (!SizeRemain) { + InnerContext.SwitchTo(BufFeedContext); + if (CancelFlag || AbortFlag) { + return false; + } + } + Y_VERIFY(SizeRemain); + *data = BufferPtr; + *size = SizeRemain; + BufferPtr += SizeRemain; + return Produce(*data, *size); + } + + void TCoroutineChunkSerializer::BackUp(int count) { + if (!count) { + return; + } + Y_VERIFY(count > 0); + Y_VERIFY(NumChunks); + TChunk& buf = Chunks[NumChunks - 1]; + Y_VERIFY((size_t)count <= buf.second); + Y_VERIFY(buf.first + buf.second == BufferPtr); + buf.second -= count; + if (!buf.second) { + --NumChunks; + } + BufferPtr -= count; + SizeRemain += count; + TotalSerializedDataSize -= count; + } + + void TCoroutineChunkSerializer::Resume() { + TContMachineContext feedContext; + BufFeedContext = &feedContext; + feedContext.SwitchTo(&InnerContext); + BufFeedContext = nullptr; + } + + bool TCoroutineChunkSerializer::WriteRope(const TRope *rope) { + for (auto iter = rope->Begin(); iter.Valid(); iter.AdvanceToNextContiguousBlock()) { + if (!WriteAliasedRaw(iter.ContiguousData(), iter.ContiguousSize())) { + return false; + } + } + return true; + } + + bool TCoroutineChunkSerializer::WriteString(const TString *s) { + return WriteAliasedRaw(s->data(), s->length()); + } + + std::pair<TCoroutineChunkSerializer::TChunk*, TCoroutineChunkSerializer::TChunk*> TCoroutineChunkSerializer::FeedBuf(void* data, size_t size) { + // fill in base params + BufferPtr = static_cast<char*>(data); + SizeRemain = size; + + // transfer control to the coroutine + Y_VERIFY(Event); + NumChunks = 0; + Resume(); + + return {Chunks, Chunks + NumChunks}; + } + + void TCoroutineChunkSerializer::SetSerializingEvent(const IEventBase *event) { + Y_VERIFY(Event == nullptr); + Event = event; + TotalSerializedDataSize = 0; + AbortFlag = false; + } + + void TCoroutineChunkSerializer::Abort() { + Y_VERIFY(Event); + AbortFlag = true; + Resume(); + } + + void TCoroutineChunkSerializer::DoRun() { + while (!CancelFlag) { + Y_VERIFY(Event); + SerializationSuccess = Event->SerializeToArcadiaStream(this); + Event = nullptr; + if (!CancelFlag) { // cancel flag may have been received during serialization + InnerContext.SwitchTo(BufFeedContext); + } + } + Finished = true; + InnerContext.SwitchTo(BufFeedContext); + } + + bool TAllocChunkSerializer::Next(void** pdata, int* psize) { + if (Backup) { + // we have some data in backup rope -- move the first chunk from the backup rope to the buffer and return + // pointer to the buffer; it is safe to remove 'const' here as we uniquely own this buffer + TRope::TIterator iter = Backup.Begin(); + *pdata = const_cast<char*>(iter.ContiguousData()); + *psize = iter.ContiguousSize(); + iter.AdvanceToNextContiguousBlock(); + Buffers->Append(Backup.Extract(Backup.Begin(), iter)); + } else { + // no backup buffer, so we have to create new one + auto item = TRopeAlignedBuffer::Allocate(4096); + *pdata = item->GetBuffer(); + *psize = item->GetCapacity(); + Buffers->Append(TRope(std::move(item))); + } + return true; + } + + void TAllocChunkSerializer::BackUp(int count) { + Backup.Insert(Backup.Begin(), Buffers->EraseBack(count)); + } + + bool TAllocChunkSerializer::WriteAliasedRaw(const void*, int) { + Y_VERIFY(false); + return false; + } + + bool TAllocChunkSerializer::WriteRope(const TRope *rope) { + Buffers->Append(TRope(*rope)); + return true; + } + + bool TAllocChunkSerializer::WriteString(const TString *s) { + Buffers->Append(*s); + return true; + } +} diff --git a/library/cpp/actors/core/event_pb.h b/library/cpp/actors/core/event_pb.h new file mode 100644 index 0000000000..d7546b901a --- /dev/null +++ b/library/cpp/actors/core/event_pb.h @@ -0,0 +1,500 @@ +#pragma once + +#include "event.h" +#include "event_load.h" + +#include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/arena.h> +#include <library/cpp/actors/protos/actors.pb.h> +#include <util/generic/deque.h> +#include <util/system/context.h> +#include <util/system/filemap.h> +#include <array> + +namespace NActors { + + class TRopeStream : public NProtoBuf::io::ZeroCopyInputStream { + TRope::TConstIterator Iter; + const size_t Size; + + public: + TRopeStream(TRope::TConstIterator iter, size_t size) + : Iter(iter) + , Size(size) + {} + + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override { + return TotalByteCount; + } + + private: + int64_t TotalByteCount = 0; + }; + + class TChunkSerializer : public NProtoBuf::io::ZeroCopyOutputStream { + public: + TChunkSerializer() = default; + virtual ~TChunkSerializer() = default; + + virtual bool WriteRope(const TRope *rope) = 0; + virtual bool WriteString(const TString *s) = 0; + }; + + class TAllocChunkSerializer final : public TChunkSerializer { + public: + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override { + return Buffers->GetSize(); + } + bool WriteAliasedRaw(const void* data, int size) override; + + // WARNING: these methods require owner to retain ownership and immutability of passed objects + bool WriteRope(const TRope *rope) override; + bool WriteString(const TString *s) override; + + inline TIntrusivePtr<TEventSerializedData> Release(bool extendedFormat) { + if (extendedFormat) { + Buffers->SetExtendedFormat(); + } + return std::move(Buffers); + } + + protected: + TIntrusivePtr<TEventSerializedData> Buffers = new TEventSerializedData; + TRope Backup; + }; + + class TCoroutineChunkSerializer final : public TChunkSerializer, protected ITrampoLine { + public: + using TChunk = std::pair<const char*, size_t>; + + TCoroutineChunkSerializer(); + ~TCoroutineChunkSerializer(); + + void SetSerializingEvent(const IEventBase *event); + void Abort(); + std::pair<TChunk*, TChunk*> FeedBuf(void* data, size_t size); + bool IsComplete() const { + return !Event; + } + bool IsSuccessfull() const { + return SerializationSuccess; + } + const IEventBase *GetCurrentEvent() const { + return Event; + } + + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override { + return TotalSerializedDataSize; + } + bool WriteAliasedRaw(const void* data, int size) override; + bool AllowsAliasing() const override; + + bool WriteRope(const TRope *rope) override; + bool WriteString(const TString *s) override; + + protected: + void DoRun() override; + void Resume(); + bool Produce(const void *data, size_t size); + + i64 TotalSerializedDataSize; + TMappedAllocation Stack; + TContClosure SelfClosure; + TContMachineContext InnerContext; + TContMachineContext *BufFeedContext = nullptr; + char *BufferPtr; + size_t SizeRemain; + static constexpr size_t MaxChunks = 16; + TChunk Chunks[MaxChunks]; + size_t NumChunks = 0; + const IEventBase *Event = nullptr; + bool CancelFlag = false; + bool AbortFlag; + bool SerializationSuccess; + bool Finished = false; + }; + +#ifdef ACTORLIB_HUGE_PB_SIZE + static const size_t EventMaxByteSize = 140 << 20; // (140MB) +#else + static const size_t EventMaxByteSize = 67108000; +#endif + + template <typename TEv, typename TRecord /*protobuf record*/, ui32 TEventType, typename TRecHolder> + class TEventPBBase: public TEventBase<TEv, TEventType> , public TRecHolder { + // a vector of data buffers referenced by record; if filled, then extended serialization mechanism applies + TVector<TRope> Payload; + + public: + using TRecHolder::Record; + + public: + using ProtoRecordType = TRecord; + + TEventPBBase() = default; + + explicit TEventPBBase(const TRecord& rec) + { + Record = rec; + } + + explicit TEventPBBase(TRecord&& rec) + { + Record = std::move(rec); + } + + TString ToStringHeader() const override { + return Record.GetTypeName(); + } + + TString ToString() const override { + return Record.ShortDebugString(); + } + + bool IsSerializable() const override { + return true; + } + + bool IsExtendedFormat() const override { + return static_cast<bool>(Payload); + } + + bool SerializeToArcadiaStream(TChunkSerializer* chunker) const override { + // serialize payload first + if (Payload) { + void *data; + int size = 0; + auto append = [&](const char *p, size_t len) { + while (len) { + if (size) { + const size_t numBytesToCopy = std::min<size_t>(size, len); + memcpy(data, p, numBytesToCopy); + data = static_cast<char*>(data) + numBytesToCopy; + size -= numBytesToCopy; + p += numBytesToCopy; + len -= numBytesToCopy; + } else if (!chunker->Next(&data, &size)) { + return false; + } + } + return true; + }; + auto appendNumber = [&](size_t number) { + char buf[MaxNumberBytes]; + return append(buf, SerializeNumber(number, buf)); + }; + char marker = PayloadMarker; + append(&marker, 1); + if (!appendNumber(Payload.size())) { + return false; + } + for (const TRope& rope : Payload) { + if (!appendNumber(rope.GetSize())) { + return false; + } + if (rope) { + if (size) { + chunker->BackUp(std::exchange(size, 0)); + } + if (!chunker->WriteRope(&rope)) { + return false; + } + } + } + if (size) { + chunker->BackUp(size); + } + } + + return Record.SerializeToZeroCopyStream(chunker); + } + + ui32 CalculateSerializedSize() const override { + ssize_t result = Record.ByteSize(); + if (result >= 0 && Payload) { + ++result; // marker + char buf[MaxNumberBytes]; + result += SerializeNumber(Payload.size(), buf); + for (const TRope& rope : Payload) { + result += SerializeNumber(rope.GetSize(), buf); + result += rope.GetSize(); + } + } + return result; + } + + static IEventBase* Load(TIntrusivePtr<TEventSerializedData> input) { + THolder<TEventPBBase> ev(new TEv()); + if (!input->GetSize()) { + Y_PROTOBUF_SUPPRESS_NODISCARD ev->Record.ParseFromString(TString()); + } else { + TRope::TConstIterator iter = input->GetBeginIter(); + ui64 size = input->GetSize(); + + if (input->IsExtendedFormat()) { + // check marker + if (!iter.Valid() || *iter.ContiguousData() != PayloadMarker) { + Y_FAIL("invalid event"); + } + // skip marker + iter += 1; + --size; + // parse number of payload ropes + size_t numRopes = DeserializeNumber(iter, size); + if (numRopes == Max<size_t>()) { + Y_FAIL("invalid event"); + } + while (numRopes--) { + // parse length of the rope + const size_t len = DeserializeNumber(iter, size); + if (len == Max<size_t>() || size < len) { + Y_FAIL("invalid event len# %zu size# %" PRIu64, len, size); + } + // extract the rope + TRope::TConstIterator begin = iter; + iter += len; + size -= len; + ev->Payload.emplace_back(begin, iter); + } + } + + // parse the protobuf + TRopeStream stream(iter, size); + if (!ev->Record.ParseFromZeroCopyStream(&stream)) { + Y_FAIL("Failed to parse protobuf event type %" PRIu32 " class %s", TEventType, TypeName(ev->Record).data()); + } + } + ev->CachedByteSize = input->GetSize(); + return ev.Release(); + } + + size_t GetCachedByteSize() const { + if (CachedByteSize == 0) { + CachedByteSize = CalculateSerializedSize(); + } + return CachedByteSize; + } + + ui32 CalculateSerializedSizeCached() const override { + return GetCachedByteSize(); + } + + void InvalidateCachedByteSize() { + CachedByteSize = 0; + } + + public: + void ReservePayload(size_t size) { + Payload.reserve(size); + } + + ui32 AddPayload(TRope&& rope) { + const ui32 id = Payload.size(); + Payload.push_back(std::move(rope)); + InvalidateCachedByteSize(); + return id; + } + + const TRope& GetPayload(ui32 id) const { + Y_VERIFY(id < Payload.size()); + return Payload[id]; + } + + ui32 GetPayloadCount() const { + return Payload.size(); + } + + void StripPayload() { + Payload.clear(); + } + + protected: + mutable size_t CachedByteSize = 0; + + static constexpr char PayloadMarker = 0x07; + static constexpr size_t MaxNumberBytes = (sizeof(size_t) * CHAR_BIT + 6) / 7; + + static size_t SerializeNumber(size_t num, char *buffer) { + char *begin = buffer; + do { + *buffer++ = (num & 0x7F) | (num >= 128 ? 0x80 : 0x00); + num >>= 7; + } while (num); + return buffer - begin; + } + + static size_t DeserializeNumber(const char **ptr, const char *end) { + const char *p = *ptr; + size_t res = 0; + size_t offset = 0; + for (;;) { + if (p == end) { + return Max<size_t>(); + } + const char byte = *p++; + res |= (static_cast<size_t>(byte) & 0x7F) << offset; + offset += 7; + if (!(byte & 0x80)) { + break; + } + } + *ptr = p; + return res; + } + + static size_t DeserializeNumber(TRope::TConstIterator& iter, ui64& size) { + size_t res = 0; + size_t offset = 0; + for (;;) { + if (!iter.Valid()) { + return Max<size_t>(); + } + const char byte = *iter.ContiguousData(); + iter += 1; + --size; + res |= (static_cast<size_t>(byte) & 0x7F) << offset; + offset += 7; + if (!(byte & 0x80)) { + break; + } + } + return res; + } + }; + + // Protobuf record not using arena + template <typename TRecord> + struct TRecordHolder { + TRecord Record; + }; + + // Protobuf arena and a record allocated on it + template <typename TRecord, size_t InitialBlockSize, size_t MaxBlockSize> + struct TArenaRecordHolder { + google::protobuf::Arena PbArena; + TRecord& Record; + + static const google::protobuf::ArenaOptions GetArenaOptions() { + google::protobuf::ArenaOptions opts; + opts.initial_block_size = InitialBlockSize; + opts.max_block_size = MaxBlockSize; + return opts; + } + + TArenaRecordHolder() + : PbArena(GetArenaOptions()) + , Record(*google::protobuf::Arena::CreateMessage<TRecord>(&PbArena)) + {} + }; + + template <typename TEv, typename TRecord, ui32 TEventType> + class TEventPB : public TEventPBBase<TEv, TRecord, TEventType, TRecordHolder<TRecord> > { + typedef TEventPBBase<TEv, TRecord, TEventType, TRecordHolder<TRecord> > TPbBase; + // NOTE: No extra fields allowed: TEventPB must be a "template typedef" + public: + using TPbBase::TPbBase; + }; + + template <typename TEv, typename TRecord, ui32 TEventType, size_t InitialBlockSize = 512, size_t MaxBlockSize = 16*1024> + using TEventPBWithArena = TEventPBBase<TEv, TRecord, TEventType, TArenaRecordHolder<TRecord, InitialBlockSize, MaxBlockSize> >; + + template <typename TEv, typename TRecord, ui32 TEventType> + class TEventShortDebugPB: public TEventPB<TEv, TRecord, TEventType> { + public: + using TBase = TEventPB<TEv, TRecord, TEventType>; + TEventShortDebugPB() = default; + explicit TEventShortDebugPB(const TRecord& rec) + : TBase(rec) + { + } + explicit TEventShortDebugPB(TRecord&& rec) + : TBase(std::move(rec)) + { + } + TString ToString() const override { + return TypeName<TEv>() + " { " + TBase::Record.ShortDebugString() + " }"; + } + }; + + template <typename TEv, typename TRecord, ui32 TEventType> + class TEventPreSerializedPB: public TEventPB<TEv, TRecord, TEventType> { + protected: + using TBase = TEventPB<TEv, TRecord, TEventType>; + using TSelf = TEventPreSerializedPB<TEv, TRecord, TEventType>; + using TBase::Record; + + public: + TString PreSerializedData; // already serialized PB data (using message::SerializeToString) + + TEventPreSerializedPB() = default; + + explicit TEventPreSerializedPB(const TRecord& rec) + : TBase(rec) + { + } + + explicit TEventPreSerializedPB(TRecord&& rec) + : TBase(std::move(rec)) + { + } + + // when remote event received locally this method will merge preserialized data + const TRecord& GetRecord() { + TRecord& base(TBase::Record); + if (!PreSerializedData.empty()) { + TRecord copy; + Y_PROTOBUF_SUPPRESS_NODISCARD copy.ParseFromString(PreSerializedData); + copy.MergeFrom(base); + base.Swap(©); + PreSerializedData.clear(); + } + return TBase::Record; + } + + const TRecord& GetRecord() const { + return const_cast<TSelf*>(this)->GetRecord(); + } + + TRecord* MutableRecord() { + GetRecord(); // Make sure PreSerializedData is parsed + return &(TBase::Record); + } + + TString ToString() const override { + return GetRecord().ShortDebugString(); + } + + bool SerializeToArcadiaStream(TChunkSerializer* chunker) const override { + return chunker->WriteString(&PreSerializedData) && TBase::SerializeToArcadiaStream(chunker); + } + + ui32 CalculateSerializedSize() const override { + return PreSerializedData.size() + TBase::CalculateSerializedSize(); + } + + size_t GetCachedByteSize() const { + return PreSerializedData.size() + TBase::GetCachedByteSize(); + } + + ui32 CalculateSerializedSizeCached() const override { + return GetCachedByteSize(); + } + }; + + inline TActorId ActorIdFromProto(const NActorsProto::TActorId& actorId) { + return TActorId(actorId.GetRawX1(), actorId.GetRawX2()); + } + + inline void ActorIdToProto(const TActorId& src, NActorsProto::TActorId* dest) { + Y_VERIFY_DEBUG(dest); + dest->SetRawX1(src.RawX1()); + dest->SetRawX2(src.RawX2()); + } +} diff --git a/library/cpp/actors/core/event_pb_payload_ut.cpp b/library/cpp/actors/core/event_pb_payload_ut.cpp new file mode 100644 index 0000000000..eab007bc15 --- /dev/null +++ b/library/cpp/actors/core/event_pb_payload_ut.cpp @@ -0,0 +1,154 @@ +#include "event_pb.h" +#include "events.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/actors/protos/unittests.pb.h> + +using namespace NActors; + +enum { + EvMessageWithPayload = EventSpaceBegin(TEvents::ES_PRIVATE), + EvArenaMessage, + EvArenaMessageBig, + EvMessageWithPayloadPreSerialized +}; + +struct TEvMessageWithPayload : TEventPB<TEvMessageWithPayload, TMessageWithPayload, EvMessageWithPayload> { + TEvMessageWithPayload() = default; + explicit TEvMessageWithPayload(const TMessageWithPayload& p) + : TEventPB<TEvMessageWithPayload, TMessageWithPayload, EvMessageWithPayload>(p) + {} +}; + +struct TEvMessageWithPayloadPreSerialized : TEventPreSerializedPB<TEvMessageWithPayloadPreSerialized, TMessageWithPayload, EvMessageWithPayloadPreSerialized> { +}; + + +TRope MakeStringRope(const TString& message) { + return message ? TRope(message) : TRope(); +} + +TString MakeString(size_t len) { + TString res; + for (size_t i = 0; i < len; ++i) { + res += RandomNumber<char>(); + } + return res; +} + +Y_UNIT_TEST_SUITE(TEventProtoWithPayload) { + + template <class TEventFrom, class TEventTo> + void TestSerializeDeserialize(size_t size1, size_t size2) { + static_assert(TEventFrom::EventType == TEventTo::EventType, "Must be same event type"); + + TEventFrom msg; + msg.Record.SetMeta("hello, world!"); + msg.Record.AddPayloadId(msg.AddPayload(MakeStringRope(MakeString(size1)))); + msg.Record.AddPayloadId(msg.AddPayload(MakeStringRope(MakeString(size2)))); + msg.Record.AddSomeData(MakeString((size1 + size2) % 50 + 11)); + + auto serializer = MakeHolder<TAllocChunkSerializer>(); + msg.SerializeToArcadiaStream(serializer.Get()); + auto buffers = serializer->Release(msg.IsExtendedFormat()); + UNIT_ASSERT_VALUES_EQUAL(buffers->GetSize(), msg.CalculateSerializedSize()); + TString ser = buffers->GetString(); + + TString chunkerRes; + TCoroutineChunkSerializer chunker; + chunker.SetSerializingEvent(&msg); + while (!chunker.IsComplete()) { + char buffer[4096]; + auto range = chunker.FeedBuf(buffer, sizeof(buffer)); + for (auto p = range.first; p != range.second; ++p) { + chunkerRes += TString(p->first, p->second); + } + } + UNIT_ASSERT_VALUES_EQUAL(chunkerRes, ser); + + THolder<IEventBase> ev2 = THolder(TEventTo::Load(buffers)); + TEventTo& msg2 = static_cast<TEventTo&>(*ev2); + UNIT_ASSERT_VALUES_EQUAL(msg2.Record.GetMeta(), msg.Record.GetMeta()); + UNIT_ASSERT_EQUAL(msg2.GetPayload(msg2.Record.GetPayloadId(0)), msg.GetPayload(msg.Record.GetPayloadId(0))); + UNIT_ASSERT_EQUAL(msg2.GetPayload(msg2.Record.GetPayloadId(1)), msg.GetPayload(msg.Record.GetPayloadId(1))); + } + + template <class TEvent> + void TestAllSizes(size_t step1 = 100, size_t step2 = 111) { + for (size_t size1 = 0; size1 < 10000; size1 += step1) { + for (size_t size2 = 0; size2 < 10000; size2 += step2) { + TestSerializeDeserialize<TEvent, TEvent>(size1, size2); + } + } + } + +#if (!defined(_tsan_enabled_)) + Y_UNIT_TEST(SerializeDeserialize) { + TestAllSizes<TEvMessageWithPayload>(); + } +#endif + + + struct TEvArenaMessage : TEventPBWithArena<TEvArenaMessage, TMessageWithPayload, EvArenaMessage> { + }; + + Y_UNIT_TEST(SerializeDeserializeArena) { + TestAllSizes<TEvArenaMessage>(500, 111); + } + + + struct TEvArenaMessageBig : TEventPBWithArena<TEvArenaMessageBig, TMessageWithPayload, EvArenaMessageBig, 4000, 32000> { + }; + + Y_UNIT_TEST(SerializeDeserializeArenaBig) { + TestAllSizes<TEvArenaMessageBig>(111, 500); + } + + + // Compatible with TEvArenaMessage but doesn't use arenas + struct TEvArenaMessageWithoutArena : TEventPB<TEvArenaMessageWithoutArena, TMessageWithPayload, EvArenaMessage> { + }; + + Y_UNIT_TEST(Compatibility) { + TestSerializeDeserialize<TEvArenaMessage, TEvArenaMessageWithoutArena>(200, 14010); + TestSerializeDeserialize<TEvArenaMessageWithoutArena, TEvArenaMessage>(2000, 4010); + } + + Y_UNIT_TEST(PreSerializedCompatibility) { + // ensure TEventPreSerializedPB and TEventPB are interchangable with no compatibility issues + TMessageWithPayload msg; + msg.SetMeta("hello, world!"); + msg.AddPayloadId(123); + msg.AddPayloadId(999); + msg.AddSomeData("abc"); + msg.AddSomeData("xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); + + TEvMessageWithPayloadPreSerialized e1; + Y_PROTOBUF_SUPPRESS_NODISCARD msg.SerializeToString(&e1.PreSerializedData); + + auto serializer1 = MakeHolder<TAllocChunkSerializer>(); + e1.SerializeToArcadiaStream(serializer1.Get()); + auto buffers1 = serializer1->Release(e1.IsExtendedFormat()); + UNIT_ASSERT_VALUES_EQUAL(buffers1->GetSize(), e1.CalculateSerializedSize()); + TString ser1 = buffers1->GetString(); + + TEvMessageWithPayload e2(msg); + auto serializer2 = MakeHolder<TAllocChunkSerializer>(); + e2.SerializeToArcadiaStream(serializer2.Get()); + auto buffers2 = serializer2->Release(e2.IsExtendedFormat()); + UNIT_ASSERT_VALUES_EQUAL(buffers2->GetSize(), e2.CalculateSerializedSize()); + TString ser2 = buffers2->GetString(); + UNIT_ASSERT_VALUES_EQUAL(ser1, ser2); + + // deserialize + auto data = MakeIntrusive<TEventSerializedData>(ser1, false); + THolder<TEvMessageWithPayloadPreSerialized> parsedEvent(static_cast<TEvMessageWithPayloadPreSerialized*>(TEvMessageWithPayloadPreSerialized::Load(data))); + UNIT_ASSERT_VALUES_EQUAL(parsedEvent->PreSerializedData, ""); // this field is empty after deserialization + auto& record = parsedEvent->GetRecord(); + UNIT_ASSERT_VALUES_EQUAL(record.GetMeta(), msg.GetMeta()); + UNIT_ASSERT_VALUES_EQUAL(record.PayloadIdSize(), msg.PayloadIdSize()); + UNIT_ASSERT_VALUES_EQUAL(record.PayloadIdSize(), 2); + UNIT_ASSERT_VALUES_EQUAL(record.GetPayloadId(0), msg.GetPayloadId(0)); + UNIT_ASSERT_VALUES_EQUAL(record.GetPayloadId(1), msg.GetPayloadId(1)); + } +} diff --git a/library/cpp/actors/core/event_pb_ut.cpp b/library/cpp/actors/core/event_pb_ut.cpp new file mode 100644 index 0000000000..a16c3092b3 --- /dev/null +++ b/library/cpp/actors/core/event_pb_ut.cpp @@ -0,0 +1,71 @@ +#include "event_pb.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/actors/protos/unittests.pb.h> + +Y_UNIT_TEST_SUITE(TEventSerialization) { + struct TMockEvent: public NActors::IEventBase { + TBigMessage* msg; + bool + SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override { + return msg->SerializeToZeroCopyStream(chunker); + } + bool IsSerializable() const override { + return true; + } + TString ToStringHeader() const override { + return TString(); + } + virtual TString Serialize() const { + return TString(); + } + ui32 Type() const override { + return 0; + }; + }; + + Y_UNIT_TEST(Coroutine) { + TString strA(507, 'a'); + TString strB(814, 'b'); + TString strC(198, 'c'); + + TBigMessage bm; + + TSimple* simple0 = bm.AddSimples(); + simple0->SetStr1(strA); + simple0->SetStr2(strB); + simple0->SetNumber1(213431324); + + TSimple* simple1 = bm.AddSimples(); + simple1->SetStr1(strC); + simple1->SetStr2(strA); + simple1->SetNumber1(21039313); + + bm.AddManyStr(strA); + bm.AddManyStr(strC); + bm.AddManyStr(strB); + + bm.SetOneMoreStr(strB); + bm.SetYANumber(394143); + + TString bmSerialized; + Y_PROTOBUF_SUPPRESS_NODISCARD bm.SerializeToString(&bmSerialized); + UNIT_ASSERT_UNEQUAL(bmSerialized.size(), 0); + + NActors::TCoroutineChunkSerializer chunker; + for (int i = 0; i < 4; ++i) { + TMockEvent event; + event.msg = &bm; + chunker.SetSerializingEvent(&event); + char buf1[87]; + TString bmChunkedSerialized; + while (!chunker.IsComplete()) { + auto range = chunker.FeedBuf(&buf1[0], sizeof(buf1)); + for (auto p = range.first; p != range.second; ++p) { + bmChunkedSerialized.append(p->first, p->second); + } + } + UNIT_ASSERT_EQUAL(bmSerialized, bmChunkedSerialized); + } + } +} diff --git a/library/cpp/actors/core/events.h b/library/cpp/actors/core/events.h new file mode 100644 index 0000000000..702cf50fad --- /dev/null +++ b/library/cpp/actors/core/events.h @@ -0,0 +1,222 @@ +#pragma once + +#include "event.h" +#include "event_pb.h" + +#include <library/cpp/actors/protos/actors.pb.h> +#include <util/system/unaligned_mem.h> + +namespace NActors { + struct TEvents { + enum EEventSpace { + ES_HELLOWORLD = 0, + ES_SYSTEM = 1, + ES_INTERCONNECT = 2, + ES_INTERCONNECT_MSGBUS = 3, + ES_DNS = 4, + ES_SOCKET_POLLER = 5, + ES_LOGGER = 6, + ES_MON = 7, + ES_INTERCONNECT_TCP = 8, + ES_PROFILER = 9, + ES_YF = 10, + ES_HTTP = 11, + + ES_USERSPACE = 4096, + + ES_PRIVATE = (1 << 15) - 16, + ES_MAX = (1 << 15), + }; + +#define EventSpaceBegin(eventSpace) (eventSpace << 16u) +#define EventSpaceEnd(eventSpace) ((eventSpace << 16u) + (1u << 16u)) + + struct THelloWorld { + enum { + Start = EventSpaceBegin(ES_HELLOWORLD), + Ping, + Pong, + Blob, + End + }; + + static_assert(End < EventSpaceEnd(ES_HELLOWORLD), "expect End < EventSpaceEnd(ES_HELLOWORLD)"); + }; + + struct TEvPing: public TEventBase<TEvPing, THelloWorld::Ping> { + DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPing, "HelloWorld: Ping"); + }; + + struct TEvPong: public TEventBase<TEvPong, THelloWorld::Pong> { + DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPong, "HelloWorld: Pong"); + }; + + struct TEvBlob: public TEventBase<TEvBlob, THelloWorld::Blob> { + const TString Blob; + + TEvBlob(const TString& blob) noexcept + : Blob(blob) + { + } + + TString ToStringHeader() const noexcept override { + return "THelloWorld::Blob"; + } + + bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override { + return serializer->WriteString(&Blob); + } + + static IEventBase* Load(TEventSerializedData* bufs) noexcept { + return new TEvBlob(bufs->GetString()); + } + + bool IsSerializable() const override { + return true; + } + }; + + struct TSystem { + enum { + Start = EventSpaceBegin(ES_SYSTEM), + Bootstrap, // generic bootstrap event + Wakeup, // generic timeout + Subscribe, // generic subscribe to something + Unsubscribe, // generic unsubscribe from something + Delivered, // event delivered + Undelivered, // event undelivered + Poison, // request actor to shutdown + Completed, // generic async job result event + PoisonTaken, // generic Poison taken (reply to PoisonPill event, i.e. died completely) + FlushLog, + CallbackCompletion, + CallbackException, + Gone, // Generic notification of actor death + TrackActor, + UntrackActor, + InvokeResult, + CoroTimeout, + InvokeQuery, + End, + + // Compatibility section + PoisonPill = Poison, + ActorDied = Gone, + }; + + static_assert(End < EventSpaceEnd(ES_SYSTEM), "expect End < EventSpaceEnd(ES_SYSTEM)"); + }; + + struct TEvBootstrap: public TEventBase<TEvBootstrap, TSystem::Bootstrap> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvBootstrap, "System: TEvBootstrap") + }; + + struct TEvPoison : public TEventBase<TEvPoison, TSystem::Poison> { + DEFINE_SIMPLE_NONLOCAL_EVENT(TEvPoison, "System: TEvPoison") + }; + + struct TEvWakeup: public TEventBase<TEvWakeup, TSystem::Wakeup> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvWakeup, "System: TEvWakeup") + + TEvWakeup(ui64 tag = 0) : Tag(tag) { } + + const ui64 Tag = 0; + }; + + struct TEvSubscribe: public TEventBase<TEvSubscribe, TSystem::Subscribe> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvSubscribe, "System: TEvSubscribe") + }; + + struct TEvUnsubscribe: public TEventBase<TEvUnsubscribe, TSystem::Unsubscribe> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvUnsubscribe, "System: TEvUnsubscribe") + }; + + struct TEvUndelivered: public TEventBase<TEvUndelivered, TSystem::Undelivered> { + enum EReason { + ReasonUnknown, + ReasonActorUnknown, + Disconnected + }; + const ui32 SourceType; + const EReason Reason; + const bool Unsure; + const TString Data; + + TEvUndelivered(ui32 sourceType, ui32 reason, bool unsure = false) + : SourceType(sourceType) + , Reason(static_cast<EReason>(reason)) + , Unsure(unsure) + , Data(MakeData(sourceType, reason)) + {} + + TString ToStringHeader() const override; + bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override; + static IEventBase* Load(TEventSerializedData* bufs); + bool IsSerializable() const override; + + ui32 CalculateSerializedSize() const override { return 2 * sizeof(ui32); } + + static void Out(IOutputStream& o, EReason x); + + private: + static TString MakeData(ui32 sourceType, ui32 reason) { + TString s = TString::Uninitialized(sizeof(ui32) + sizeof(ui32)); + char *p = s.Detach(); + WriteUnaligned<ui32>(p + 0, sourceType); + WriteUnaligned<ui32>(p + 4, reason); + return s; + } + }; + + struct TEvCompleted: public TEventBase<TEvCompleted, TSystem::Completed> { + const ui32 Id; + const ui32 Status; + TEvCompleted(ui32 id = 0, ui32 status = 0) + : Id(id) + , Status(status) + { + } + + DEFINE_SIMPLE_LOCAL_EVENT(TEvCompleted, "System: TEvCompleted") + }; + + struct TEvPoisonTaken: public TEventBase<TEvPoisonTaken, TSystem::PoisonTaken> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvPoisonTaken, "System: TEvPoisonTaken") + }; + + struct TEvFlushLog: public TEventBase<TEvFlushLog, TSystem::FlushLog> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvFlushLog, "System: TEvFlushLog") + }; + + struct TEvCallbackException: public TEventPB<TEvCallbackException, + NActorsProto::TCallbackException, + TSystem::CallbackException> { + TEvCallbackException(const TActorId& id, const TString& msg) { + ActorIdToProto(id, Record.MutableActorId()); + Record.SetExceptionMessage(msg); + } + }; + + struct TEvCallbackCompletion: public TEventPB<TEvCallbackCompletion, + NActorsProto::TActorId, + TSystem::CallbackCompletion> { + TEvCallbackCompletion(const TActorId& id) { + ActorIdToProto(id, &Record); + } + }; + + struct TEvGone: public TEventBase<TEvGone, TSystem::Gone> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvGone, "System: TEvGone") + }; + + struct TEvInvokeResult; + + using TEvPoisonPill = TEvPoison; // Legacy name, deprecated + using TEvActorDied = TEvGone; + }; +} + +template <> +inline void Out<NActors::TEvents::TEvUndelivered::EReason>(IOutputStream& o, NActors::TEvents::TEvUndelivered::EReason x) { + NActors::TEvents::TEvUndelivered::Out(o, x); +} diff --git a/library/cpp/actors/core/events_undelivered.cpp b/library/cpp/actors/core/events_undelivered.cpp new file mode 100644 index 0000000000..23deaffd10 --- /dev/null +++ b/library/cpp/actors/core/events_undelivered.cpp @@ -0,0 +1,60 @@ +#include "events.h" +#include "actorsystem.h" + +namespace NActors { + TString TEvents::TEvUndelivered::ToStringHeader() const { + return "TSystem::Undelivered"; + } + + bool TEvents::TEvUndelivered::SerializeToArcadiaStream(TChunkSerializer *serializer) const { + Y_VERIFY(!Unsure); // these are local-only events generated by Interconnect + return serializer->WriteString(&Data); + } + + void TEvents::TEvUndelivered::Out(IOutputStream& o, EReason x) { + switch (x) { + case ReasonActorUnknown: + o << "ActorUnknown"; + break; + case Disconnected: + o << "Disconnected"; + break; + default: + o << "Undefined"; + break; + } + } + + bool TEvents::TEvUndelivered::IsSerializable() const { + return true; + } + + IEventBase* TEvents::TEvUndelivered::Load(TEventSerializedData* bufs) { + TString str = bufs->GetString(); + Y_VERIFY(str.size() == (sizeof(ui32) + sizeof(ui32))); + const char* p = str.data(); + const ui64 sourceType = ReadUnaligned<ui32>(p + 0); + const ui64 reason = ReadUnaligned<ui32>(p + 4); + return new TEvUndelivered(sourceType, reason); + } + + TAutoPtr<IEventHandle> IEventHandle::ForwardOnNondelivery(ui32 reason, bool unsure) { + if (Flags & FlagForwardOnNondelivery) { + const ui32 updatedFlags = Flags & ~(FlagForwardOnNondelivery | FlagSubscribeOnSession); + const TActorId recp = OnNondeliveryHolder ? OnNondeliveryHolder->Recipient : TActorId(); + + if (Event) + return new IEventHandle(recp, Sender, Event.Release(), updatedFlags, Cookie, &Recipient, TraceId.Clone()); + else + return new IEventHandle(Type, updatedFlags, recp, Sender, Buffer, Cookie, &Recipient, TraceId.Clone()); + } + + if (Flags & FlagTrackDelivery) { + const ui32 updatedFlags = Flags & ~(FlagTrackDelivery | FlagSubscribeOnSession | FlagGenerateUnsureUndelivered); + return new IEventHandle(Sender, Recipient, new TEvents::TEvUndelivered(Type, reason, unsure), updatedFlags, + Cookie, nullptr, TraceId.Clone()); + } + + return nullptr; + } +} diff --git a/library/cpp/actors/core/executelater.h b/library/cpp/actors/core/executelater.h new file mode 100644 index 0000000000..e7a13c1005 --- /dev/null +++ b/library/cpp/actors/core/executelater.h @@ -0,0 +1,87 @@ +#pragma once + +#include "actor_bootstrapped.h" + +#include <utility> + +namespace NActors { + template <typename TCallback> + class TExecuteLater: public TActorBootstrapped<TExecuteLater<TCallback>> { + public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::ACTORLIB_COMMON; + } + + TExecuteLater( + TCallback&& callback, + IActor::EActivityType activityType, + ui32 channel = 0, + ui64 cookie = 0, + const TActorId& reportCompletionTo = TActorId(), + const TActorId& reportExceptionTo = TActorId()) noexcept + : Callback(std::move(callback)) + , Channel(channel) + , Cookie(cookie) + , ReportCompletionTo(reportCompletionTo) + , ReportExceptionTo(reportExceptionTo) + { + this->SetActivityType(activityType); + } + + void Bootstrap(const TActorContext& ctx) noexcept { + try { + { + /* RAII, Callback should be destroyed right before sending + TEvCallbackCompletion */ + + auto local = std::move(Callback); + using T = decltype(local); + + if constexpr (std::is_invocable_v<T, const TActorContext&>) { + local(ctx); + } else { + local(); + } + } + + if (ReportCompletionTo) { + ctx.Send(ReportCompletionTo, + new TEvents::TEvCallbackCompletion(ctx.SelfID), + Channel, Cookie); + } + } catch (...) { + if (ReportExceptionTo) { + const TString msg = CurrentExceptionMessage(); + ctx.Send(ReportExceptionTo, + new TEvents::TEvCallbackException(ctx.SelfID, msg), + Channel, Cookie); + } + } + + this->Die(ctx); + } + + private: + TCallback Callback; + const ui32 Channel; + const ui64 Cookie; + const TActorId ReportCompletionTo; + const TActorId ReportExceptionTo; + }; + + template <typename T> + IActor* CreateExecuteLaterActor( + T&& func, + IActor::EActivityType activityType, + ui32 channel = 0, + ui64 cookie = 0, + const TActorId& reportCompletionTo = TActorId(), + const TActorId& reportExceptionTo = TActorId()) noexcept { + return new TExecuteLater<T>(std::forward<T>(func), + activityType, + channel, + cookie, + reportCompletionTo, + reportExceptionTo); + } +} diff --git a/library/cpp/actors/core/executor_pool_base.cpp b/library/cpp/actors/core/executor_pool_base.cpp new file mode 100644 index 0000000000..c3b9999168 --- /dev/null +++ b/library/cpp/actors/core/executor_pool_base.cpp @@ -0,0 +1,168 @@ +#include "executor_pool_base.h" +#include "executor_thread.h" +#include "mailbox.h" +#include "probes.h" +#include <library/cpp/actors/util/datetime.h> + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + + void DoActorInit(TActorSystem* sys, IActor* actor, const TActorId& self, const TActorId& owner) { + actor->SelfActorId = self; + actor->Registered(sys, owner); + } + + TExecutorPoolBaseMailboxed::TExecutorPoolBaseMailboxed(ui32 poolId, ui32 maxActivityType) + : IExecutorPool(poolId) + , ActorSystem(nullptr) + , MailboxTable(new TMailboxTable) +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + , Stats(maxActivityType) +#endif + {} + + TExecutorPoolBaseMailboxed::~TExecutorPoolBaseMailboxed() { + MailboxTable.Destroy(); + } + + TExecutorPoolBase::TExecutorPoolBase(ui32 poolId, ui32 threads, TAffinity* affinity, ui32 maxActivityType) + : TExecutorPoolBaseMailboxed(poolId, maxActivityType) + , PoolThreads(threads) + , ThreadsAffinity(affinity) + {} + + TExecutorPoolBase::~TExecutorPoolBase() { + while (Activations.Pop(0)) + ; + } + + void TExecutorPoolBaseMailboxed::ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingWriteCounter) { + Y_UNUSED(workerId); + MailboxTable->ReclaimMailbox(mailboxType, hint, revolvingWriteCounter); + } + + ui64 TExecutorPoolBaseMailboxed::AllocateID() { + return ActorSystem->AllocateIDSpace(1); + } + + bool TExecutorPoolBaseMailboxed::Send(TAutoPtr<IEventHandle>& ev) { + Y_VERIFY_DEBUG(ev->GetRecipientRewrite().PoolID() == PoolId); +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + RelaxedStore(&ev->SendTime, (::NHPTimer::STime)GetCycleCountFast()); +#endif + return MailboxTable->SendTo(ev, this); + } + + void TExecutorPoolBase::ScheduleActivation(ui32 activation) { + ScheduleActivationEx(activation, AtomicIncrement(ActivationsRevolvingCounter)); + } + + TActorId TExecutorPoolBaseMailboxed::Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingWriteCounter, const TActorId& parentId) { + NHPTimer::STime hpstart = GetCycleCountFast(); +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + ui32 at = actor->GetActivityType(); + if (at >= Stats.MaxActivityType()) + at = 0; + AtomicIncrement(Stats.ActorsAliveByActivity[at]); +#endif + AtomicIncrement(ActorRegistrations); + + // first step - find good enough mailbox + ui32 hint = 0; + TMailboxHeader* mailbox = nullptr; + + if (revolvingWriteCounter == 0) + revolvingWriteCounter = AtomicIncrement(RegisterRevolvingCounter); + + { + ui32 hintBackoff = 0; + + while (hint == 0) { + hint = MailboxTable->AllocateMailbox(mailboxType, ++revolvingWriteCounter); + mailbox = MailboxTable->Get(hint); + + if (!mailbox->LockFromFree()) { + MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingWriteCounter); + hintBackoff = hint; + hint = 0; + } + } + + MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingWriteCounter); + } + + const ui64 localActorId = AllocateID(); + + // ok, got mailbox + mailbox->AttachActor(localActorId, actor); + + // do init + const TActorId actorId(ActorSystem->NodeId, PoolId, localActorId, hint); + DoActorInit(ActorSystem, actor, actorId, parentId); + + // Once we unlock the mailbox the actor starts running and we cannot use the pointer any more + actor = nullptr; + + switch (mailboxType) { + case TMailboxType::Simple: + UnlockFromExecution((TMailboxTable::TSimpleMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter); + break; + case TMailboxType::Revolving: + UnlockFromExecution((TMailboxTable::TRevolvingMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter); + break; + case TMailboxType::HTSwap: + UnlockFromExecution((TMailboxTable::THTSwapMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter); + break; + case TMailboxType::ReadAsFilled: + UnlockFromExecution((TMailboxTable::TReadAsFilledMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter); + break; + case TMailboxType::TinyReadAsFilled: + UnlockFromExecution((TMailboxTable::TTinyReadAsFilledMailbox*)mailbox, this, false, hint, MaxWorkers, ++revolvingWriteCounter); + break; + default: + Y_FAIL(); + } + + NHPTimer::STime elapsed = GetCycleCountFast() - hpstart; + if (elapsed > 1000000) { + LWPROBE(SlowRegisterNew, PoolId, NHPTimer::GetSeconds(elapsed) * 1000.0); + } + + return actorId; + } + + TActorId TExecutorPoolBaseMailboxed::Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) { + NHPTimer::STime hpstart = GetCycleCountFast(); +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + ui32 at = actor->GetActivityType(); + if (at >= Stats.MaxActivityType()) + at = 0; + AtomicIncrement(Stats.ActorsAliveByActivity[at]); +#endif + AtomicIncrement(ActorRegistrations); + + const ui64 localActorId = AllocateID(); + mailbox->AttachActor(localActorId, actor); + + const TActorId actorId(ActorSystem->NodeId, PoolId, localActorId, hint); + DoActorInit(ActorSystem, actor, actorId, parentId); + NHPTimer::STime elapsed = GetCycleCountFast() - hpstart; + if (elapsed > 1000000) { + LWPROBE(SlowRegisterAdd, PoolId, NHPTimer::GetSeconds(elapsed) * 1000.0); + } + + return actorId; + } + + TAffinity* TExecutorPoolBase::Affinity() const { + return ThreadsAffinity.Get(); + } + + bool TExecutorPoolBaseMailboxed::Cleanup() { + return MailboxTable->Cleanup(); + } + + ui32 TExecutorPoolBase::GetThreads() const { + return PoolThreads; + } +} diff --git a/library/cpp/actors/core/executor_pool_base.h b/library/cpp/actors/core/executor_pool_base.h new file mode 100644 index 0000000000..c84ce1af77 --- /dev/null +++ b/library/cpp/actors/core/executor_pool_base.h @@ -0,0 +1,49 @@ +#pragma once + +#include "actorsystem.h" +#include "executor_thread.h" +#include "scheduler_queue.h" +#include <library/cpp/actors/util/affinity.h> +#include <library/cpp/actors/util/unordered_cache.h> +#include <library/cpp/actors/util/threadparkpad.h> + +namespace NActors { + class TExecutorPoolBaseMailboxed: public IExecutorPool { + protected: + TActorSystem* ActorSystem; + THolder<TMailboxTable> MailboxTable; +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + // Need to have per pool object to collect stats like actor registrations (because + // registrations might be done in threads from other pools) + TExecutorThreadStats Stats; +#endif + TAtomic RegisterRevolvingCounter = 0; + ui64 AllocateID(); + public: + TExecutorPoolBaseMailboxed(ui32 poolId, ui32 maxActivityType); + ~TExecutorPoolBaseMailboxed(); + void ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingWriteCounter) override; + bool Send(TAutoPtr<IEventHandle>& ev) override; + TActorId Register(IActor* actor, TMailboxType::EType mailboxType, ui64 revolvingWriteCounter, const TActorId& parentId) override; + TActorId Register(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) override; + bool Cleanup() override; + }; + + class TExecutorPoolBase: public TExecutorPoolBaseMailboxed { + protected: + const ui32 PoolThreads; + TIntrusivePtr<TAffinity> ThreadsAffinity; + TAtomic Semaphore = 0; + TUnorderedCache<ui32, 512, 4> Activations; + TAtomic ActivationsRevolvingCounter = 0; + volatile bool StopFlag = false; + public: + TExecutorPoolBase(ui32 poolId, ui32 threads, TAffinity* affinity, ui32 maxActivityType); + ~TExecutorPoolBase(); + void ScheduleActivation(ui32 activation) override; + TAffinity* Affinity() const override; + ui32 GetThreads() const override; + }; + + void DoActorInit(TActorSystem*, IActor*, const TActorId&, const TActorId&); +} diff --git a/library/cpp/actors/core/executor_pool_basic.cpp b/library/cpp/actors/core/executor_pool_basic.cpp new file mode 100644 index 0000000000..4dce16939a --- /dev/null +++ b/library/cpp/actors/core/executor_pool_basic.cpp @@ -0,0 +1,431 @@ +#include "executor_pool_basic.h" +#include "probes.h" +#include "mailbox.h" +#include <library/cpp/actors/util/affinity.h> +#include <library/cpp/actors/util/datetime.h> + +#ifdef _linux_ +#include <pthread.h> +#endif + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + + constexpr TDuration TBasicExecutorPool::DEFAULT_TIME_PER_MAILBOX; + + TBasicExecutorPool::TBasicExecutorPool( + ui32 poolId, + ui32 threads, + ui64 spinThreshold, + const TString& poolName, + TAffinity* affinity, + TDuration timePerMailbox, + ui32 eventsPerMailbox, + int realtimePriority, + ui32 maxActivityType) + : TExecutorPoolBase(poolId, threads, affinity, maxActivityType) + , SpinThreshold(spinThreshold) + , SpinThresholdCycles(spinThreshold * NHPTimer::GetCyclesPerSecond() * 0.000001) // convert microseconds to cycles + , Threads(new TThreadCtx[threads]) + , PoolName(poolName) + , TimePerMailbox(timePerMailbox) + , EventsPerMailbox(eventsPerMailbox) + , RealtimePriority(realtimePriority) + , ThreadUtilization(0) + , MaxUtilizationCounter(0) + , MaxUtilizationAccumulator(0) + , ThreadCount(threads) + { + } + + TBasicExecutorPool::TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg) + : TBasicExecutorPool( + cfg.PoolId, + cfg.Threads, + cfg.SpinThreshold, + cfg.PoolName, + new TAffinity(cfg.Affinity), + cfg.TimePerMailbox, + cfg.EventsPerMailbox, + cfg.RealtimePriority, + cfg.MaxActivityType + ) + {} + + TBasicExecutorPool::~TBasicExecutorPool() { + Threads.Destroy(); + } + + ui32 TBasicExecutorPool::GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) { + ui32 workerId = wctx.WorkerId; + Y_VERIFY_DEBUG(workerId < PoolThreads); + + NHPTimer::STime elapsed = 0; + NHPTimer::STime parked = 0; + NHPTimer::STime blocked = 0; + NHPTimer::STime hpstart = GetCycleCountFast(); + NHPTimer::STime hpnow; + + TThreadCtx& threadCtx = Threads[workerId]; + AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_NONE); + + if (Y_UNLIKELY(AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE)) { + do { + if (AtomicCas(&threadCtx.BlockedFlag, TThreadCtx::BS_BLOCKED, TThreadCtx::BS_BLOCKING)) { + hpnow = GetCycleCountFast(); + elapsed += hpnow - hpstart; + if (threadCtx.BlockedPad.Park()) // interrupted + return 0; + hpstart = GetCycleCountFast(); + blocked += hpstart - hpnow; + } + } while (AtomicGet(threadCtx.BlockedFlag) != TThreadCtx::BS_NONE && !AtomicLoad(&StopFlag)); + } + + const TAtomic x = AtomicDecrement(Semaphore); + + if (x < 0) { +#if defined ACTORSLIB_COLLECT_EXEC_STATS + if (AtomicGetAndIncrement(ThreadUtilization) == 0) { + // Initially counter contains -t0, the pool start timestamp + // When the first thread goes to sleep we add t1, so the counter + // becomes t1-t0 >= 0, or the duration of max utilization so far. + // If the counter was negative and becomes positive, that means + // counter just turned into a duration and we should store that + // duration. Otherwise another thread raced with us and + // subtracted some other timestamp t2. + const i64 t = GetCycleCountFast(); + const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, t); + if (x < 0 && x + t > 0) + AtomicStore(&MaxUtilizationAccumulator, x + t); + } +#endif + + Y_VERIFY(AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_NONE); + + if (SpinThreshold > 0) { + // spin configured period + AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_ACTIVE); + ui64 start = GetCycleCountFast(); + bool doSpin = true; + while (true) { + for (ui32 j = 0; doSpin && j < 12; ++j) { + if (GetCycleCountFast() >= (start + SpinThresholdCycles)) { + doSpin = false; + break; + } + for (ui32 i = 0; i < 12; ++i) { + if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) { + SpinLockPause(); + } else { + doSpin = false; + break; + } + } + } + if (!doSpin) { + break; + } + if (RelaxedLoad(&StopFlag)) { + break; + } + } + // then - sleep + if (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_ACTIVE) { + if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED, TThreadCtx::WS_ACTIVE)) { + do { + hpnow = GetCycleCountFast(); + elapsed += hpnow - hpstart; + if (threadCtx.Pad.Park()) // interrupted + return 0; + hpstart = GetCycleCountFast(); + parked += hpstart - hpnow; + } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED); + } + } + } else { + AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_BLOCKED); + do { + hpnow = GetCycleCountFast(); + elapsed += hpnow - hpstart; + if (threadCtx.Pad.Park()) // interrupted + return 0; + hpstart = GetCycleCountFast(); + parked += hpstart - hpnow; + } while (AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_BLOCKED); + } + + Y_VERIFY_DEBUG(AtomicLoad(&StopFlag) || AtomicLoad(&threadCtx.WaitingFlag) == TThreadCtx::WS_RUNNING); + +#if defined ACTORSLIB_COLLECT_EXEC_STATS + if (AtomicDecrement(ThreadUtilization) == 0) { + // When we started sleeping counter contained t1-t0, or the + // last duration of max utilization. Now we subtract t2 >= t1, + // which turns counter negative again, and the next sleep cycle + // at timestamp t3 would be adding some new duration t3-t2. + // If the counter was positive and becomes negative that means + // there are no current races with other threads and we should + // store the last positive duration we observed. Multiple + // threads may be adding and subtracting values in potentially + // arbitrary order, which would cause counter to oscillate + // around zero. When it crosses zero is a good indication of a + // correct value. + const i64 t = GetCycleCountFast(); + const i64 x = AtomicGetAndAdd(MaxUtilizationCounter, -t); + if (x > 0 && x - t < 0) + AtomicStore(&MaxUtilizationAccumulator, x); + } +#endif + } else { + AtomicSet(threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING); + } + + // ok, has work suggested, must dequeue + while (!RelaxedLoad(&StopFlag)) { + if (const ui32 activation = Activations.Pop(++revolvingCounter)) { + hpnow = GetCycleCountFast(); + elapsed += hpnow - hpstart; + wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, elapsed); + if (parked > 0) { + wctx.AddParkedCycles(parked); + } + if (blocked > 0) { + wctx.AddBlockedCycles(blocked); + } + return activation; + } + SpinLockPause(); + } + + // stopping, die! + return 0; + } + + inline void TBasicExecutorPool::WakeUpLoop() { + for (ui32 i = 0;;) { + TThreadCtx& threadCtx = Threads[i % PoolThreads]; + switch (AtomicLoad(&threadCtx.WaitingFlag)) { + case TThreadCtx::WS_NONE: + case TThreadCtx::WS_RUNNING: + ++i; + break; + case TThreadCtx::WS_ACTIVE: // in active spin-lock, just set flag + if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_ACTIVE)) { + return; + } + break; + case TThreadCtx::WS_BLOCKED: + if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_BLOCKED)) { + threadCtx.Pad.Unpark(); + return; + } + break; + default: + Y_FAIL(); + } + } + } + + void TBasicExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) { + Activations.Push(activation, revolvingCounter); + const TAtomic x = AtomicIncrement(Semaphore); + if (x <= 0) { // we must find someone to wake-up + WakeUpLoop(); + } + } + + void TBasicExecutorPool::GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const { + poolStats.MaxUtilizationTime = RelaxedLoad(&MaxUtilizationAccumulator) / (i64)(NHPTimer::GetCyclesPerSecond() / 1000); + + statsCopy.resize(PoolThreads + 1); + // Save counters from the pool object + statsCopy[0] = TExecutorThreadStats(); + statsCopy[0].Aggregate(Stats); + // Per-thread stats + for (size_t i = 0; i < PoolThreads; ++i) { + Threads[i].Thread->GetCurrentStats(statsCopy[i + 1]); + } + } + + void TBasicExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) { + TAffinityGuard affinityGuard(Affinity()); + + ActorSystem = actorSystem; + + ScheduleReaders.Reset(new NSchedulerQueue::TReader[PoolThreads]); + ScheduleWriters.Reset(new NSchedulerQueue::TWriter[PoolThreads]); + + for (ui32 i = 0; i != PoolThreads; ++i) { + Threads[i].Thread.Reset( + new TExecutorThread( + i, + 0, // CpuId is not used in BASIC pool + actorSystem, + this, + MailboxTable.Get(), + PoolName, + TimePerMailbox, + EventsPerMailbox)); + ScheduleWriters[i].Init(ScheduleReaders[i]); + } + + *scheduleReaders = ScheduleReaders.Get(); + *scheduleSz = PoolThreads; + } + + void TBasicExecutorPool::Start() { + TAffinityGuard affinityGuard(Affinity()); + + ThreadUtilization = 0; + AtomicAdd(MaxUtilizationCounter, -(i64)GetCycleCountFast()); + + for (ui32 i = 0; i != PoolThreads; ++i) { + Threads[i].Thread->Start(); + } + } + + void TBasicExecutorPool::PrepareStop() { + AtomicStore(&StopFlag, true); + for (ui32 i = 0; i != PoolThreads; ++i) { + Threads[i].Pad.Interrupt(); + Threads[i].BlockedPad.Interrupt(); + } + } + + void TBasicExecutorPool::Shutdown() { + for (ui32 i = 0; i != PoolThreads; ++i) + Threads[i].Thread->Join(); + } + + void TBasicExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + Y_VERIFY_DEBUG(workerId < PoolThreads); + + Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId); + } + + void TBasicExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + Y_VERIFY_DEBUG(workerId < PoolThreads); + + const auto current = ActorSystem->Monotonic(); + if (deadline < current) + deadline = current; + + ScheduleWriters[workerId].Push(deadline.MicroSeconds(), ev.Release(), cookie); + } + + void TBasicExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + Y_VERIFY_DEBUG(workerId < PoolThreads); + + const auto deadline = ActorSystem->Monotonic() + delta; + ScheduleWriters[workerId].Push(deadline.MicroSeconds(), ev.Release(), cookie); + } + + void TBasicExecutorPool::SetRealTimeMode() const { +// TODO: musl-libc version of `sched_param` struct is for some reason different from pthread +// version in Ubuntu 12.04 +#if defined(_linux_) && !defined(_musl_) + if (RealtimePriority != 0) { + pthread_t threadSelf = pthread_self(); + sched_param param = {RealtimePriority}; + if (pthread_setschedparam(threadSelf, SCHED_FIFO, ¶m)) { + Y_FAIL("Cannot set realtime priority"); + } + } +#else + Y_UNUSED(RealtimePriority); +#endif + } + + ui32 TBasicExecutorPool::GetThreadCount() const { + return AtomicGet(ThreadCount); + } + + void TBasicExecutorPool::SetThreadCount(ui32 threads) { + threads = Max(1u, Min(PoolThreads, threads)); + with_lock (ChangeThreadsLock) { + size_t prevCount = GetThreadCount(); + AtomicSet(ThreadCount, threads); + if (prevCount < threads) { + for (size_t i = prevCount; i < threads; ++i) { + bool repeat = true; + while (repeat) { + switch (AtomicGet(Threads[i].BlockedFlag)) { + case TThreadCtx::BS_BLOCKING: + if (AtomicCas(&Threads[i].BlockedFlag, TThreadCtx::BS_NONE, TThreadCtx::BS_BLOCKING)) { + // thread not entry to blocked loop + repeat = false; + } + break; + case TThreadCtx::BS_BLOCKED: + // thread entry to blocked loop and we wake it + AtomicSet(Threads[i].BlockedFlag, TThreadCtx::BS_NONE); + Threads[i].BlockedPad.Unpark(); + repeat = false; + break; + default: + // thread mustn't has TThreadCtx::BS_NONE because last time it was started to block + Y_FAIL("BlockedFlag is not TThreadCtx::BS_BLOCKING and TThreadCtx::BS_BLOCKED when thread was waked up"); + } + } + } + } else if (prevCount > threads) { + // at first, start to block + for (size_t i = threads; i < prevCount; ++i) { + Y_VERIFY(AtomicGet(Threads[i].BlockedFlag) == TThreadCtx::BS_NONE); + AtomicSet(Threads[i].BlockedFlag, TThreadCtx::BS_BLOCKING); + } + // after check need to wake up threads + for (size_t idx = threads; idx < prevCount; ++idx) { + TThreadCtx& threadCtx = Threads[idx]; + auto waitingFlag = AtomicGet(threadCtx.WaitingFlag); + auto blockedFlag = AtomicGet(threadCtx.BlockedFlag); + // while thread has this states (WS_NONE and BS_BLOCKING) we can't guess which way thread will go. + // Either go to sleep and it will have to wake up, + // or go to execute task and after completion will be blocked. + while (waitingFlag == TThreadCtx::WS_NONE && blockedFlag == TThreadCtx::BS_BLOCKING) { + waitingFlag = AtomicGet(threadCtx.WaitingFlag); + blockedFlag = AtomicGet(threadCtx.BlockedFlag); + } + // next states: + // 1) WS_ACTIVE BS_BLOCKING - waiting and start spinig | need wake up to block + // 2) WS_BLOCKED BS_BLOCKING - waiting and start sleep | need wake up to block + // 3) WS_RUNNING BS_BLOCKING - start execute | not need wake up, will block after executing + // 4) WS_NONE BS_BLOCKED - blocked | not need wake up, already blocked + + if (waitingFlag == TThreadCtx::WS_ACTIVE || waitingFlag == TThreadCtx::WS_BLOCKED) { + // need wake up + Y_VERIFY(blockedFlag == TThreadCtx::BS_BLOCKING); + + // creaty empty mailBoxHint, where LineIndex == 1 and LineHint == 0, and activations will be ignored + constexpr auto emptyMailBoxHint = TMailboxTable::LineIndexMask & -TMailboxTable::LineIndexMask; + ui64 revolvingCounter = AtomicGet(ActivationsRevolvingCounter); + + Activations.Push(emptyMailBoxHint, revolvingCounter); + + auto x = AtomicIncrement(Semaphore); + if (x <= 0) { + // try wake up. if success then go to next thread + switch (waitingFlag){ + case TThreadCtx::WS_ACTIVE: // in active spin-lock, just set flag + if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_ACTIVE)) { + continue; + } + break; + case TThreadCtx::WS_BLOCKED: + if (AtomicCas(&threadCtx.WaitingFlag, TThreadCtx::WS_RUNNING, TThreadCtx::WS_BLOCKED)) { + threadCtx.Pad.Unpark(); + continue; + } + break; + default: + ; // other thread woke this sleeping thread + } + // if thread has already been awakened then we must awaken the other + WakeUpLoop(); + } + } + } + } + } + } +} diff --git a/library/cpp/actors/core/executor_pool_basic.h b/library/cpp/actors/core/executor_pool_basic.h new file mode 100644 index 0000000000..023190f7fe --- /dev/null +++ b/library/cpp/actors/core/executor_pool_basic.h @@ -0,0 +1,111 @@ +#pragma once + +#include "actorsystem.h" +#include "executor_thread.h" +#include "scheduler_queue.h" +#include "executor_pool_base.h" +#include <library/cpp/actors/util/unordered_cache.h> +#include <library/cpp/actors/util/threadparkpad.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> + +#include <util/system/mutex.h> + +namespace NActors { + class TBasicExecutorPool: public TExecutorPoolBase { + struct TThreadCtx { + TAutoPtr<TExecutorThread> Thread; + TThreadParkPad Pad; + TThreadParkPad BlockedPad; + TAtomic WaitingFlag; + TAtomic BlockedFlag; + + // different threads must spin/block on different cache-lines. + // we add some padding bytes to enforce this rule + static const size_t SizeWithoutPadding = sizeof(TAutoPtr<TExecutorThread>) + 2 * sizeof(TThreadParkPad) + 2 * sizeof(TAtomic); + ui8 Padding[64 - SizeWithoutPadding]; + static_assert(64 >= SizeWithoutPadding); + + enum EWaitState { + WS_NONE, + WS_ACTIVE, + WS_BLOCKED, + WS_RUNNING + }; + + enum EBlockedState { + BS_NONE, + BS_BLOCKING, + BS_BLOCKED + }; + + TThreadCtx() + : WaitingFlag(WS_NONE) + , BlockedFlag(BS_NONE) + { + } + }; + + const ui64 SpinThreshold; + const ui64 SpinThresholdCycles; + + TArrayHolder<TThreadCtx> Threads; + + TArrayHolder<NSchedulerQueue::TReader> ScheduleReaders; + TArrayHolder<NSchedulerQueue::TWriter> ScheduleWriters; + + const TString PoolName; + const TDuration TimePerMailbox; + const ui32 EventsPerMailbox; + + const int RealtimePriority; + + TAtomic ThreadUtilization; + TAtomic MaxUtilizationCounter; + TAtomic MaxUtilizationAccumulator; + + TAtomic ThreadCount; + TMutex ChangeThreadsLock; + + public: + static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = TBasicExecutorPoolConfig::DEFAULT_TIME_PER_MAILBOX; + static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX; + + TBasicExecutorPool(ui32 poolId, + ui32 threads, + ui64 spinThreshold, + const TString& poolName = "", + TAffinity* affinity = nullptr, + TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX, + ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX, + int realtimePriority = 0, + ui32 maxActivityType = 1); + explicit TBasicExecutorPool(const TBasicExecutorPoolConfig& cfg); + ~TBasicExecutorPool(); + + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override; + + void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + + void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override; + + void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override; + void Start() override; + void PrepareStop() override; + void Shutdown() override; + + void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override; + TString GetName() const override { + return PoolName; + } + + void SetRealTimeMode() const override; + + ui32 GetThreadCount() const; + void SetThreadCount(ui32 threads); + + private: + void WakeUpLoop(); + }; +} diff --git a/library/cpp/actors/core/executor_pool_basic_ut.cpp b/library/cpp/actors/core/executor_pool_basic_ut.cpp new file mode 100644 index 0000000000..76dff693af --- /dev/null +++ b/library/cpp/actors/core/executor_pool_basic_ut.cpp @@ -0,0 +1,435 @@ +#include "actorsystem.h" +#include "executor_pool_basic.h" +#include "hfunc.h" +#include "scheduler_basic.h" + +#include <library/cpp/actors/util/should_continue.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/actors/protos/unittests.pb.h> + +using namespace NActors; + +//////////////////////////////////////////////////////////////////////////////// + +struct TEvMsg : public NActors::TEventBase<TEvMsg, 10347> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvMsg, "ExecutorPoolTest: Msg"); +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TTestSenderActor : public IActor { +private: + using EActivityType = IActor::EActivityType ; + using EActorActivity = IActor::EActorActivity; + +private: + TAtomic Counter; + TActorId Receiver; + + std::function<void(void)> Action; + +public: + TTestSenderActor(std::function<void(void)> action = [](){}, + EActivityType activityType = EActorActivity::OTHER) + : IActor(static_cast<TReceiveFunc>(&TTestSenderActor::Execute), activityType) + , Action(action) + {} + + void Start(TActorId receiver, size_t count) + { + AtomicSet(Counter, count); + Receiver = receiver; + } + + void Stop() { + while (true) { + if (GetCounter() == 0) { + break; + } + + Sleep(TDuration::MilliSeconds(1)); + } + } + + size_t GetCounter() const { + return AtomicGet(Counter); + } + +private: + STFUNC(Execute) + { + Y_UNUSED(ctx); + switch (ev->GetTypeRewrite()) { + hFunc(TEvMsg, Handle); + } + } + + void Handle(TEvMsg::TPtr &ev) + { + Y_UNUSED(ev); + Action(); + TAtomicBase count = AtomicDecrement(Counter); + Y_VERIFY(count != Max<TAtomicBase>()); + if (count) { + Send(Receiver, new TEvMsg()); + } + } +}; + +THolder<TActorSystemSetup> GetActorSystemSetup(TBasicExecutorPool* pool) +{ + auto setup = MakeHolder<NActors::TActorSystemSetup>(); + setup->NodeId = 1; + setup->ExecutorsCount = 1; + setup->Executors.Reset(new TAutoPtr<NActors::IExecutorPool>[1]); + setup->Executors[0] = pool; + setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0)); + return setup; +} + +Y_UNIT_TEST_SUITE(BasicExecutorPool) { + + Y_UNIT_TEST(DecreaseIncreaseThreadsCount) { + const size_t msgCount = 1e4; + const size_t size = 4; + const size_t halfSize = size / 2; + TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); + + auto setup = GetActorSystemSetup(executorPool); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + executorPool->SetThreadCount(halfSize); + TTestSenderActor* actors[size]; + TActorId actorIds[size]; + for (size_t i = 0; i < size; ++i) { + actors[i] = new TTestSenderActor(); + actorIds[i] = actorSystem.Register(actors[i]); + } + + const int testCount = 2; + + TExecutorPoolStats poolStats[testCount]; + TVector<TExecutorThreadStats> statsCopy[testCount]; + + for (size_t testIdx = 0; testIdx < testCount; ++testIdx) { + for (size_t i = 0; i < size; ++i) { + actors[i]->Start(actors[i]->SelfId(), msgCount); + } + for (size_t i = 0; i < size; ++i) { + actorSystem.Send(actorIds[i], new TEvMsg()); + } + + Sleep(TDuration::MilliSeconds(100)); + + for (size_t i = 0; i < size; ++i) { + actors[i]->Stop(); + } + + executorPool->GetCurrentStats(poolStats[testIdx], statsCopy[testIdx]); + } + + for (size_t i = 1; i <= halfSize; ++i) { + UNIT_ASSERT_UNEQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents); + } + + for (size_t i = halfSize + 1; i <= size; ++i) { + UNIT_ASSERT_EQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents); + } + + executorPool->SetThreadCount(size); + + for (size_t testIdx = 0; testIdx < testCount; ++testIdx) { + for (size_t i = 0; i < size; ++i) { + actors[i]->Start(actors[i]->SelfId(), msgCount); + } + for (size_t i = 0; i < size; ++i) { + actorSystem.Send(actorIds[i], new TEvMsg()); + } + + Sleep(TDuration::MilliSeconds(100)); + + for (size_t i = 0; i < size; ++i) { + actors[i]->Stop(); + } + + executorPool->GetCurrentStats(poolStats[testIdx], statsCopy[testIdx]); + } + + for (size_t i = 1; i <= size; ++i) { + UNIT_ASSERT_UNEQUAL(statsCopy[0][i].ReceivedEvents, statsCopy[1][i].ReceivedEvents); + } + } + + Y_UNIT_TEST(ChangeCount) { + const size_t msgCount = 1e3; + const size_t size = 4; + const size_t halfSize = size / 2; + TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); + + auto begin = TInstant::Now(); + + auto setup = GetActorSystemSetup(executorPool); + TActorSystem actorSystem(setup); + actorSystem.Start(); + executorPool->SetThreadCount(halfSize); + + TTestSenderActor* actors[size]; + TActorId actorIds[size]; + for (size_t i = 0; i < size; ++i) { + actors[i] = new TTestSenderActor(); + actorIds[i] = actorSystem.Register(actors[i]); + } + + for (size_t i = 0; i < size; ++i) { + actors[i]->Start(actorIds[i], msgCount); + } + for (size_t i = 0; i < size; ++i) { + actorSystem.Send(actorIds[i], new TEvMsg()); + } + + const i32 N = 6; + const i32 threadsCouns[N] = { 1, 3, 2, 3, 1, 4 }; + + ui64 counter = 0; + + TTestSenderActor* changerActor = new TTestSenderActor([&]{ + executorPool->SetThreadCount(threadsCouns[counter]); + counter++; + if (counter == N) { + counter = 0; + } + }); + TActorId changerActorId = actorSystem.Register(changerActor); + changerActor->Start(changerActorId, msgCount); + actorSystem.Send(changerActorId, new TEvMsg()); + + while (true) { + size_t maxCounter = 0; + for (size_t i = 0; i < size; ++i) { + maxCounter = Max(maxCounter, actors[i]->GetCounter()); + } + + if (maxCounter == 0) { + break; + } + + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter); + + Sleep(TDuration::MilliSeconds(1)); + } + + changerActor->Stop(); + } + + Y_UNIT_TEST(CheckCompleteOne) { + const size_t size = 4; + const size_t msgCount = 1e4; + TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); + + auto setup = GetActorSystemSetup(executorPool); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + auto actor = new TTestSenderActor(); + auto actorId = actorSystem.Register(actor); + actor->Start(actor->SelfId(), msgCount); + actorSystem.Send(actorId, new TEvMsg()); + + while (actor->GetCounter()) { + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter()); + + Sleep(TDuration::MilliSeconds(1)); + } + } + + Y_UNIT_TEST(CheckCompleteAll) { + const size_t size = 4; + const size_t msgCount = 1e4; + TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); + + auto setup = GetActorSystemSetup(executorPool); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + TTestSenderActor* actors[size]; + TActorId actorIds[size]; + + for (size_t i = 0; i < size; ++i) { + actors[i] = new TTestSenderActor(); + actorIds[i] = actorSystem.Register(actors[i]); + } + for (size_t i = 0; i < size; ++i) { + actors[i]->Start(actors[i]->SelfId(), msgCount); + } + for (size_t i = 0; i < size; ++i) { + actorSystem.Send(actorIds[i], new TEvMsg()); + } + + + while (true) { + size_t maxCounter = 0; + for (size_t i = 0; i < size; ++i) { + maxCounter = Max(maxCounter, actors[i]->GetCounter()); + } + + if (maxCounter == 0) { + break; + } + + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter); + + Sleep(TDuration::MilliSeconds(1)); + } + } + + Y_UNIT_TEST(CheckCompleteOver) { + const size_t size = 4; + const size_t actorsCount = size * 2; + const size_t msgCount = 1e4; + TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); + + auto setup = GetActorSystemSetup(executorPool); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + TTestSenderActor* actors[actorsCount]; + TActorId actorIds[actorsCount]; + + for (size_t i = 0; i < actorsCount; ++i) { + actors[i] = new TTestSenderActor(); + actorIds[i] = actorSystem.Register(actors[i]); + } + for (size_t i = 0; i < actorsCount; ++i) { + actors[i]->Start(actors[i]->SelfId(), msgCount); + } + for (size_t i = 0; i < actorsCount; ++i) { + actorSystem.Send(actorIds[i], new TEvMsg()); + } + + + while (true) { + size_t maxCounter = 0; + for (size_t i = 0; i < actorsCount; ++i) { + maxCounter = Max(maxCounter, actors[i]->GetCounter()); + } + + if (maxCounter == 0) { + break; + } + + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter); + + Sleep(TDuration::MilliSeconds(1)); + } + } + + Y_UNIT_TEST(CheckCompleteRoundRobinOver) { + const size_t size = 4; + const size_t actorsCount = size * 2; + const size_t msgCount = 1e2; + TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); + + auto setup = GetActorSystemSetup(executorPool); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + TTestSenderActor* actors[actorsCount]; + TActorId actorIds[actorsCount]; + + for (size_t i = 0; i < actorsCount; ++i) { + actors[i] = new TTestSenderActor(); + actorIds[i] = actorSystem.Register(actors[i]); + } + for (size_t i = 0; i < actorsCount; ++i) { + actors[i]->Start(actorIds[(i + 1) % actorsCount], msgCount); + } + for (size_t i = 0; i < actorsCount; ++i) { + actorSystem.Send(actorIds[i], new TEvMsg()); + } + + while (true) { + size_t maxCounter = 0; + for (size_t i = 0; i < actorsCount; ++i) { + maxCounter = Max(maxCounter, actors[i]->GetCounter()); + } + + if (maxCounter == 0) { + break; + } + + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Max counter is " << maxCounter); + + Sleep(TDuration::MilliSeconds(1)); + } + } + + Y_UNIT_TEST(CheckStats) { + const size_t size = 4; + const size_t msgCount = 1e4; + TBasicExecutorPool* executorPool = new TBasicExecutorPool(0, size, 50); + + auto setup = GetActorSystemSetup(executorPool); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + auto actor = new TTestSenderActor(); + auto actorId = actorSystem.Register(actor); + actor->Start(actor->SelfId(), msgCount); + actorSystem.Send(actorId, new TEvMsg()); + + while (actor->GetCounter()) { + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter()); + + Sleep(TDuration::MilliSeconds(1)); + } + + TVector<TExecutorThreadStats> stats; + TExecutorPoolStats poolStats; + actorSystem.GetPoolStats(0, poolStats, stats); + // Sum all per-thread counters into the 0th element + for (ui32 idx = 1; idx < stats.size(); ++idx) { + stats[0].Aggregate(stats[idx]); + } + + UNIT_ASSERT_VALUES_EQUAL(stats[0].SentEvents, msgCount - 1); + UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].NonDeliveredEvents, 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].EmptyMailboxActivation, 0); + //UNIT_ASSERT_VALUES_EQUAL(stats[0].CpuNs, 0); // depends on total duration of test, so undefined + UNIT_ASSERT(stats[0].ElapsedTicks > 0); + UNIT_ASSERT(stats[0].ParkedTicks > 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].BlockedTicks, 0); + UNIT_ASSERT(stats[0].ActivationTimeHistogram.TotalSamples >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX); + UNIT_ASSERT_VALUES_EQUAL(stats[0].EventDeliveryTimeHistogram.TotalSamples, msgCount); + UNIT_ASSERT_VALUES_EQUAL(stats[0].EventProcessingCountHistogram.TotalSamples, msgCount); + UNIT_ASSERT(stats[0].EventProcessingTimeHistogram.TotalSamples > 0); + UNIT_ASSERT(stats[0].ElapsedTicksByActivity[0] > 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEventsByActivity[0], msgCount); + UNIT_ASSERT_VALUES_EQUAL(stats[0].ActorsAliveByActivity[0], 1); + UNIT_ASSERT_VALUES_EQUAL(stats[0].ScheduledEventsByActivity[0], 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolDestroyedActors, 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolAllocatedMailboxes, 4095); // one line + UNIT_ASSERT(stats[0].MailboxPushedOutByTime + stats[0].MailboxPushedOutByEventCount >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX); + UNIT_ASSERT_VALUES_EQUAL(stats[0].MailboxPushedOutBySoftPreemption, 0); + } +} diff --git a/library/cpp/actors/core/executor_pool_io.cpp b/library/cpp/actors/core/executor_pool_io.cpp new file mode 100644 index 0000000000..fb557ae6b0 --- /dev/null +++ b/library/cpp/actors/core/executor_pool_io.cpp @@ -0,0 +1,151 @@ +#include "executor_pool_io.h" +#include "mailbox.h" +#include <library/cpp/actors/util/affinity.h> +#include <library/cpp/actors/util/datetime.h> + +namespace NActors { + TIOExecutorPool::TIOExecutorPool(ui32 poolId, ui32 threads, const TString& poolName, TAffinity* affinity, ui32 maxActivityType) + : TExecutorPoolBase(poolId, threads, affinity, maxActivityType) + , Threads(new TThreadCtx[threads]) + , PoolName(poolName) + {} + + TIOExecutorPool::TIOExecutorPool(const TIOExecutorPoolConfig& cfg) + : TIOExecutorPool( + cfg.PoolId, + cfg.Threads, + cfg.PoolName, + new TAffinity(cfg.Affinity), + cfg.MaxActivityType + ) + {} + + TIOExecutorPool::~TIOExecutorPool() { + Threads.Destroy(); + while (ThreadQueue.Pop(0)) + ; + } + + ui32 TIOExecutorPool::GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) { + ui32 workerId = wctx.WorkerId; + Y_VERIFY_DEBUG(workerId < PoolThreads); + + NHPTimer::STime elapsed = 0; + NHPTimer::STime parked = 0; + NHPTimer::STime hpstart = GetCycleCountFast(); + NHPTimer::STime hpnow; + + const TAtomic x = AtomicDecrement(Semaphore); + if (x < 0) { + TThreadCtx& threadCtx = Threads[workerId]; + ThreadQueue.Push(workerId + 1, revolvingCounter); + hpnow = GetCycleCountFast(); + elapsed += hpnow - hpstart; + if (threadCtx.Pad.Park()) + return 0; + hpstart = GetCycleCountFast(); + parked += hpstart - hpnow; + } + + while (!RelaxedLoad(&StopFlag)) { + if (const ui32 activation = Activations.Pop(++revolvingCounter)) { + hpnow = GetCycleCountFast(); + elapsed += hpnow - hpstart; + wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, elapsed); + if (parked > 0) { + wctx.AddParkedCycles(parked); + } + return activation; + } + SpinLockPause(); + } + + return 0; + } + + void TIOExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId); + } + + void TIOExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + Y_UNUSED(workerId); + + const auto current = ActorSystem->Monotonic(); + if (deadline < current) + deadline = current; + + TTicketLock::TGuard guard(&ScheduleLock); + ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie); + } + + void TIOExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + Y_UNUSED(workerId); + const auto deadline = ActorSystem->Monotonic() + delta; + + TTicketLock::TGuard guard(&ScheduleLock); + ScheduleQueue->Writer.Push(deadline.MicroSeconds(), ev.Release(), cookie); + } + + void TIOExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) { + Activations.Push(activation, revolvingWriteCounter); + const TAtomic x = AtomicIncrement(Semaphore); + if (x <= 0) { + for (;; ++revolvingWriteCounter) { + if (const ui32 x = ThreadQueue.Pop(revolvingWriteCounter)) { + const ui32 threadIdx = x - 1; + Threads[threadIdx].Pad.Unpark(); + return; + } + SpinLockPause(); + } + } + } + + void TIOExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) { + TAffinityGuard affinityGuard(Affinity()); + + ActorSystem = actorSystem; + + ScheduleQueue.Reset(new NSchedulerQueue::TQueueType()); + + for (ui32 i = 0; i != PoolThreads; ++i) { + Threads[i].Thread.Reset(new TExecutorThread(i, 0, actorSystem, this, MailboxTable.Get(), PoolName)); + } + + *scheduleReaders = &ScheduleQueue->Reader; + *scheduleSz = 1; + } + + void TIOExecutorPool::Start() { + TAffinityGuard affinityGuard(Affinity()); + + for (ui32 i = 0; i != PoolThreads; ++i) + Threads[i].Thread->Start(); + } + + void TIOExecutorPool::PrepareStop() { + AtomicStore(&StopFlag, true); + for (ui32 i = 0; i != PoolThreads; ++i) + Threads[i].Pad.Interrupt(); + } + + void TIOExecutorPool::Shutdown() { + for (ui32 i = 0; i != PoolThreads; ++i) + Threads[i].Thread->Join(); + } + + void TIOExecutorPool::GetCurrentStats(TExecutorPoolStats& /*poolStats*/, TVector<TExecutorThreadStats>& statsCopy) const { + statsCopy.resize(PoolThreads + 1); + // Save counters from the pool object + statsCopy[0] = TExecutorThreadStats(); + statsCopy[0].Aggregate(Stats); + // Per-thread stats + for (size_t i = 0; i < PoolThreads; ++i) { + Threads[i].Thread->GetCurrentStats(statsCopy[i + 1]); + } + } + + TString TIOExecutorPool::GetName() const { + return PoolName; + } +} diff --git a/library/cpp/actors/core/executor_pool_io.h b/library/cpp/actors/core/executor_pool_io.h new file mode 100644 index 0000000000..e576d642a1 --- /dev/null +++ b/library/cpp/actors/core/executor_pool_io.h @@ -0,0 +1,49 @@ +#pragma once + +#include "actorsystem.h" +#include "executor_thread.h" +#include "scheduler_queue.h" +#include "executor_pool_base.h" +#include <library/cpp/actors/util/ticket_lock.h> +#include <library/cpp/actors/util/unordered_cache.h> +#include <library/cpp/actors/util/threadparkpad.h> +#include <util/system/condvar.h> + +namespace NActors { + class TIOExecutorPool: public TExecutorPoolBase { + struct TThreadCtx { + TAutoPtr<TExecutorThread> Thread; + TThreadParkPad Pad; + }; + + TArrayHolder<TThreadCtx> Threads; + TUnorderedCache<ui32, 512, 4> ThreadQueue; + + THolder<NSchedulerQueue::TQueueType> ScheduleQueue; + TTicketLock ScheduleLock; + + const TString PoolName; + + public: + TIOExecutorPool(ui32 poolId, ui32 threads, const TString& poolName = "", TAffinity* affinity = nullptr, + ui32 maxActivityType = 1); + explicit TIOExecutorPool(const TIOExecutorPoolConfig& cfg); + ~TIOExecutorPool(); + + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override; + + void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + + void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override; + + void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override; + void Start() override; + void PrepareStop() override; + void Shutdown() override; + + void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override; + TString GetName() const override; + }; +} diff --git a/library/cpp/actors/core/executor_pool_united.cpp b/library/cpp/actors/core/executor_pool_united.cpp new file mode 100644 index 0000000000..dac6245635 --- /dev/null +++ b/library/cpp/actors/core/executor_pool_united.cpp @@ -0,0 +1,1428 @@ +#include "executor_pool_united.h" + +#include "balancer.h" +#include "cpu_state.h" +#include "executor_thread.h" +#include "probes.h" +#include "mailbox.h" +#include "scheduler_queue.h" +#include <library/cpp/actors/util/affinity.h> +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/util/futex.h> +#include <library/cpp/actors/util/intrinsics.h> +#include <library/cpp/actors/util/timerfd.h> + +#include <util/system/datetime.h> +#include <util/system/hp_timer.h> + +#include <algorithm> + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + + struct TUnitedWorkers::TWorker: public TNonCopyable { + TAutoPtr<TExecutorThread> Thread; + volatile TThreadId ThreadId = UnknownThreadId; + NSchedulerQueue::TQueueType SchedulerQueue; + }; + + struct TUnitedWorkers::TPool: public TNonCopyable { + TAtomic Waiters = 0; // Number of idle cpus, waiting for activations in this pool + char Padding[64 - sizeof(TAtomic)]; + + TUnorderedCache<ui32, 512, 4> Activations; // MPMC-queue for mailbox activations + TAtomic Active = 0; // Number of mailboxes ready for execution or currently executing + TAtomic Tokens = 0; // Pending tokens (token is required for worker to start execution, guarantees concurrency limit and activation availability) + volatile bool StopFlag = false; + + // Configuration + TPoolId PoolId; + TAtomicBase Concurrency; // Max concurrent workers running this pool + IExecutorPool* ExecutorPool; + TMailboxTable* MailboxTable; + ui64 TimePerMailboxTs; + ui32 EventsPerMailbox; + + // Cpus this pool is allowed to run on + // Cpus are specified in wake order + TStackVec<TCpu*, 15> WakeOrderCpus; + + ~TPool() { + while (Activations.Pop(0)) {} + } + + void Stop() { + AtomicStore(&StopFlag, true); + } + + bool IsUnited() const { + return WakeOrderCpus.size(); + } + + // Add activation of newly scheduled mailbox. Returns generated token (unless concurrency is exceeded) + bool PushActivation(ui32 activation, ui64 revolvingCounter) { + Activations.Push(activation, revolvingCounter); + TAtomicBase active = AtomicIncrement(Active); + if (active <= Concurrency) { // token generated + AtomicIncrement(Tokens); + return true; + } + return false; + } + + template <bool Relaxed> + static bool TryAcquireTokenImpl(TAtomic* tokens) { + while (true) { + TAtomicBase value; + if constexpr (Relaxed) { + value = RelaxedLoad(tokens); + } else { + value = AtomicLoad(tokens); + } + if (value > 0) { + if (AtomicCas(tokens, value - 1, value)) { + return true; // token acquired + } + } else { + return false; // no more tokens + } + } + } + + // Try acquire pending token. Must be done before execution + bool TryAcquireToken() { + return TryAcquireTokenImpl<false>(&Tokens); + } + + // Try acquire pending token. Must be done before execution + bool TryAcquireTokenRelaxed() { + return TryAcquireTokenImpl<true>(&Tokens); + } + + // Get activation. Requires acquired token. + void BeginExecution(ui32& activation, ui64 revolvingCounter) { + while (!RelaxedLoad(&StopFlag)) { + if (activation = Activations.Pop(++revolvingCounter)) { + return; + } + SpinLockPause(); + } + activation = 0; // should stop + } + + // End currently active execution and start new one if token is available. + // Reuses token if it's not destroyed. + // Returned `true` means successful switch, `activation` is filled. + // Returned `false` means execution has ended, no need to call StopExecution() + bool NextExecution(ui32& activation, ui64 revolvingCounter) { + if (AtomicDecrement(Active) >= Concurrency) { // reuse just released token + BeginExecution(activation, revolvingCounter); + return true; + } else if (TryAcquireToken()) { // another token acquired + BeginExecution(activation, revolvingCounter); + return true; + } + return false; // no more tokens available + } + + // Stop active execution. Returns released token (unless it is destroyed) + bool StopExecution() { + TAtomicBase active = AtomicDecrement(Active); + if (active >= Concurrency) { // token released + AtomicIncrement(Tokens); + return true; + } + return false; // token destroyed + } + + // Switch worker context into this pool + void Switch(TWorkerContext& wctx, ui64 softDeadlineTs, TExecutorThreadStats& stats) { + wctx.Switch(ExecutorPool, MailboxTable, TimePerMailboxTs, EventsPerMailbox, softDeadlineTs, &stats); + } + }; + + class TPoolScheduler { + class TSchedulable { + // Lower PoolBits store PoolId + // All other higher bits store virtual runtime in cycles + using TValue = ui64; + TValue Value; + + static constexpr ui64 PoolIdMask = ui64((1ull << PoolBits) - 1); + static constexpr ui64 VRunTsMask = ~PoolIdMask; + + public: + explicit TSchedulable(TPoolId poolId = MaxPools, ui64 vrunts = 0) + : Value((poolId & PoolIdMask) | (vrunts & VRunTsMask)) + {} + + TPoolId GetPoolId() const { + return Value & PoolIdMask; + } + + ui64 GetVRunTs() const { + // Do not truncate pool id + // NOTE: it decrease accuracy, but improves performance + return Value; + } + + ui64 GetPreciseVRunTs() const { + return Value & VRunTsMask; + } + + void SetVRunTs(ui64 vrunts) { + Value = (Value & PoolIdMask) | (vrunts & VRunTsMask); + } + + void Account(ui64 base, ui64 ts) { + // Add at least minimum amount to change Value + SetVRunTs(base + Max(ts, PoolIdMask + 1)); + } + }; + + // For min-heap of Items + struct TCmp { + bool operator()(TSchedulable lhs, TSchedulable rhs) const { + return lhs.GetVRunTs() > rhs.GetVRunTs(); + } + }; + + TPoolId Size = 0; // total number of pools on this cpu + TPoolId Current = 0; // index of current pool in `Items` + + // At the beginning `Current` items are orginized as binary min-heap -- ready to be scheduled + // The rest `Size - Current` items are unordered (required to keep track of last vrunts) + TSchedulable Items[MaxPools]; // virtual runtime in cycles for each pool + ui64 MinVRunTs = 0; // virtual runtime used by waking pools (system's vrunts) + ui64 Ts = 0; // real timestamp of current execution start (for accounting) + + // Maps PoolId into it's inverse weight + ui64 InvWeights[MaxPools]; + static constexpr ui64 VRunTsOverflow = ui64(1ull << 62ull) / MaxPoolWeight; + + public: + void AddPool(TPoolId pool, TPoolWeight weight) { + Items[Size] = TSchedulable(pool, MinVRunTs); + Size++; + InvWeights[pool] = MaxPoolWeight / std::clamp(weight ? weight : DefPoolWeight, MinPoolWeight, MaxPoolWeight); + } + + // Iterate over pools in scheduling order + // should be used in construction: + // for (TPoolId pool = Begin(); pool != End(); pool = Next()) + TPoolId Begin() { + // Wrap vruntime around to avoid overflow, if required + if (Y_UNLIKELY(MinVRunTs >= VRunTsOverflow)) { + for (TPoolId i = 0; i < Size; i++) { + ui64 ts = Items[i].GetPreciseVRunTs(); + Items[i].SetVRunTs(ts >= VRunTsOverflow ? ts - VRunTsOverflow : 0); + } + MinVRunTs -= VRunTsOverflow; + } + Current = Size; + std::make_heap(Items, Items + Current, TCmp()); + return Next(); + } + + constexpr TPoolId End() const { + return MaxPools; + } + + TPoolId Next() { + if (Current > 0) { + std::pop_heap(Items, Items + Current, TCmp()); + Current--; + return CurrentPool(); + } else { + return End(); + } + } + + // Scheduling was successful, we are going to run CurrentPool() + void Scheduled() { + MinVRunTs = Max(MinVRunTs, Items[Current].GetPreciseVRunTs()); + // NOTE: Ts is propagated on Account() to avoid gaps + } + + // Schedule specific pool that woke up cpu after idle + void ScheduledAfterIdle(TPoolId pool, ui64 ts) { + if (Y_UNLIKELY(ts < Ts)) { // anomaly: time goes backwards (e.g. rdtsc is reset to zero on cpu reset) + Ts = ts; // just skip anomalous time slice + return; + } + MinVRunTs += (ts - Ts) * (MaxPoolWeight / DefPoolWeight); // propagate system's vrunts to blur difference between pools + Ts = ts; // propagate time w/o accounting to any pool + + // Set specified pool as current, it requires scan + for (Current = 0; Current < Size && pool != Items[Current].GetPoolId(); Current++) {} + Y_VERIFY(Current < Size); + } + + // Account currently running pool till now (ts) + void Account(ui64 ts) { + // Skip time slice for the first run and when time goes backwards (e.g. rdtsc is reset to zero on cpu reset) + if (Y_LIKELY(Ts > 0 && Ts <= ts)) { + TPoolId pool = CurrentPool(); + Y_VERIFY(pool < MaxPools); + Items[Current].Account(MinVRunTs, (ts - Ts) * InvWeights[pool]); + } + Ts = ts; // propagate time + } + + TPoolId CurrentPool() const { + return Items[Current].GetPoolId(); + } + }; + + // Cyclic array of timers for idle workers to wait for hard preemption on + struct TIdleQueue: public TNonCopyable { + TArrayHolder<TTimerFd> Timers; + size_t Size; + TAtomic EnqueueCounter = 0; + TAtomic DequeueCounter = 0; + + explicit TIdleQueue(size_t size) + : Timers(new TTimerFd[size]) + , Size(size) + {} + + void Stop() { + for (size_t i = 0; i < Size; i++) { + Timers[i].Wake(); + } + } + + // Returns timer which new idle-worker should wait for + TTimerFd* Enqueue() { + return &Timers[AtomicGetAndIncrement(EnqueueCounter) % Size]; + } + + // Returns timer that hard preemption should trigger to wake idle-worker + TTimerFd* Dequeue() { + return &Timers[AtomicGetAndIncrement(DequeueCounter) % Size]; + } + }; + + // Base class for cpu-local managers that help workers on single cpu to cooperate + struct TCpuLocalManager: public TThrRefBase { + TUnitedWorkers* United; + + explicit TCpuLocalManager(TUnitedWorkers* united) + : United(united) + {} + + virtual TWorkerId WorkerCount() const = 0; + virtual void AddWorker(TWorkerId workerId) = 0; + virtual void Stop() = 0; + }; + + // Represents cpu with single associated worker that is able to execute any pool. + // It always executes pool assigned by balancer and switch pool only if assigned pool has changed + struct TAssignedCpu: public TCpuLocalManager { + bool Started = false; + + TAssignedCpu(TUnitedWorkers* united) + : TCpuLocalManager(united) + {} + + TWorkerId WorkerCount() const override { + return 1; + } + + void AddWorker(TWorkerId workerId) override { + Y_UNUSED(workerId); + } + + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) { + ui32 activation; + if (Y_UNLIKELY(!Started)) { + Started = true; + } else if (Y_UNLIKELY(United->IsPoolReassigned(wctx))) { + United->StopExecution(wctx.PoolId); // stop current execution and switch pool if reassigned + } else if (United->NextExecution(wctx.PoolId, activation, revolvingCounter)) { + return activation; // another activation from currently executing pool (or 0 if stopped) + } + + // Switch to another pool, it blocks until token is acquired + if (Y_UNLIKELY(!SwitchPool(wctx))) { + return 0; // stopped + } + United->SwitchPool(wctx, 0); + United->BeginExecution(wctx.PoolId, activation, revolvingCounter); + return activation; + } + + void Stop() override { + } + + private: + // Sets next pool to run, and acquires token, blocks if there are no tokens + bool SwitchPool(TWorkerContext& wctx) { + if (Y_UNLIKELY(United->IsStopped())) { + return false; + } + + // Run balancer (if it's time to) + United->Balance(); + + // Select pool to execute + wctx.PoolId = United->AssignedPool(wctx); + Y_VERIFY(wctx.PoolId != CpuShared); + if (United->TryAcquireToken(wctx.PoolId)) { + return true; + } + + // No more work -- wait for activations (spinning, then blocked) + wctx.PoolId = United->Idle(wctx.PoolId, wctx); + + // Wakeup or stop occured + if (Y_UNLIKELY(wctx.PoolId == CpuStopped)) { + return false; + } + return true; // United->Idle() has already acquired token + } + }; + + // Lock-free data structure that help workers on single cpu to discover their state and do hard preemptions + struct TSharedCpu: public TCpuLocalManager { + // Current lease + volatile TLease::TValue CurrentLease; + char Padding1[64 - sizeof(TLease)]; + + // Slow pools + // the highest bit: 1=wait-for-slow-workers mode 0=else + // any lower bit (poolId is bit position): 1=pool-is-slow 0=pool-is-fast + volatile TPoolsMask SlowPoolsMask = 0; + char Padding2[64 - sizeof(TPoolsMask)]; + + // Must be accessed under never expiring lease to avoid races + TPoolScheduler PoolSched; + TWorkerId FastWorker = MaxWorkers; + TTimerFd* PreemptionTimer = nullptr; + ui64 HardPreemptionTs = 0; + bool Started = false; + + TIdleQueue IdleQueue; + + struct TConfig { + const TCpuId CpuId; + const TWorkerId Workers; + ui64 SoftLimitTs; + ui64 HardLimitTs; + ui64 EventLimitTs; + ui64 LimitPrecisionTs; + const int IdleWorkerPriority; + const int FastWorkerPriority; + const bool NoRealtime; + const bool NoAffinity; + const TCpuAllocation CpuAlloc; + + TConfig(const TCpuAllocation& allocation, const TUnitedWorkersConfig& united) + : CpuId(allocation.CpuId) + , Workers(allocation.AllowedPools.size() + 1) + , SoftLimitTs(Us2Ts(united.PoolLimitUs)) + , HardLimitTs(Us2Ts(united.PoolLimitUs + united.EventLimitUs)) + , EventLimitTs(Us2Ts(united.EventLimitUs)) + , LimitPrecisionTs(Us2Ts(united.LimitPrecisionUs)) + , IdleWorkerPriority(std::clamp<ui64>(united.IdleWorkerPriority ? united.IdleWorkerPriority : 20, 1, 99)) + , FastWorkerPriority(std::clamp<ui64>(united.FastWorkerPriority ? united.FastWorkerPriority : 10, 1, IdleWorkerPriority - 1)) + , NoRealtime(united.NoRealtime) + , NoAffinity(united.NoAffinity) + , CpuAlloc(allocation) + {} + }; + + TConfig Config; + TVector<TWorkerId> Workers; + + TSharedCpu(const TConfig& cfg, TUnitedWorkers* united) + : TCpuLocalManager(united) + , IdleQueue(cfg.Workers) + , Config(cfg) + { + for (const auto& pa : Config.CpuAlloc.AllowedPools) { + PoolSched.AddPool(pa.PoolId, pa.Weight); + } + } + + TWorkerId WorkerCount() const override { + return Config.Workers; + } + + void AddWorker(TWorkerId workerId) override { + if (Workers.empty()) { + // Grant lease to the first worker + AtomicStore(&CurrentLease, TLease(workerId, NeverExpire).Value); + } + Workers.push_back(workerId); + } + + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) { + ui32 activation; + if (!wctx.Lease.IsNeverExpiring()) { + if (wctx.SoftDeadlineTs < GetCycleCountFast()) { // stop if lease has expired or is near to be expired + United->StopExecution(wctx.PoolId); + } else if (United->NextExecution(wctx.PoolId, activation, revolvingCounter)) { + return activation; // another activation from currently executing pool (or 0 if stopped) + } + } + + // Switch to another pool, it blocks until token is acquired + if (Y_UNLIKELY(!SwitchPool(wctx))) { + return 0; // stopped + } + United->BeginExecution(wctx.PoolId, activation, revolvingCounter); + return activation; + } + + void Stop() override { + IdleQueue.Stop(); + } + + private: + enum EPriority { + IdlePriority, // highest (real-time, Config.IdleWorkerPriority) + FastPriority, // normal (real-time, Config.FastWorkerPriority) + SlowPriority, // lowest (not real-time) + }; + + enum EWorkerAction { + // Fast-worker + ExecuteFast, + WaitForSlow, + + // Slow-worker + BecameIdle, + WakeFast, + + // Idle-worker + BecameFast, + Standby, + + // Common + Stopped, + }; + + // Thread-safe; should be called from worker + // Blocks for idle-workers; sets lease and next pool to run + bool SwitchPool(TWorkerContext& wctx) { + TTimerFd* idleTimer = nullptr; + while (true) { + if (DisablePreemptionAndTryExtend(wctx.Lease)) { // if fast-worker + if (Y_UNLIKELY(!Started)) { + SetPriority(0, FastPriority); + Started = true; + } + while (true) { + switch (FastWorkerAction(wctx)) { + case ExecuteFast: + United->SwitchPool(wctx, wctx.Lease.GetPreciseExpireTs() - Config.EventLimitTs); + EnablePreemptionAndGrant(wctx.Lease); + return true; + case WaitForSlow: + FastWorkerSleep(GetCycleCountFast() + Config.SoftLimitTs); + break; + case Stopped: return false; + default: Y_FAIL(); + } + } + } else if (wctx.Lease.IsNeverExpiring()) { // if idle-worker + switch (IdleWorkerAction(idleTimer, wctx.Lease.GetWorkerId())) { + case BecameFast: + SetPriority(0, FastPriority); + break; // try acquire new lease + case Standby: + if (!idleTimer) { + idleTimer = IdleQueue.Enqueue(); + } + SetPriority(0, IdlePriority); + idleTimer->Wait(); + break; + case Stopped: return false; + default: Y_FAIL(); + } + } else { // lease has expired and hard preemption occured, so we are executing in a slow-worker + wctx.IncrementPreemptedEvents(); + switch (SlowWorkerAction(wctx.PoolId)) { + case WakeFast: + WakeFastWorker(); + [[fallthrough]]; // no break; pass through + case BecameIdle: + wctx.Lease = wctx.Lease.NeverExpire(); + wctx.PoolId = MaxPools; + idleTimer = nullptr; + break; + case Stopped: return false; + default: Y_FAIL(); + } + } + } + } + + enum ETryRunPool { + RunFastPool, + RunSlowPool, + NoTokens, + }; + + ETryRunPool TryRun(TPoolId pool) { + while (true) { + // updates WaitPoolsFlag in SlowPoolsMask according to scheduled pool slowness + TPoolsMask slow = AtomicLoad(&SlowPoolsMask); + if ((1ull << pool) & slow) { // we are about to execute slow pool (fast-worker will just wait, token is NOT required) + if (slow & WaitPoolsFlag) { + return RunSlowPool; // wait flag is already set + } else { + if (AtomicCas(&SlowPoolsMask, slow | WaitPoolsFlag, slow)) { // try set wait flag + return RunSlowPool; // wait flag has been successfully set + } + } + } else { // we are about to execute fast pool, token required + if (slow & WaitPoolsFlag) { // reset wait flag if required + if (AtomicCas(&SlowPoolsMask, slow & ~WaitPoolsFlag, slow)) { // try reset wait flag + return United->TryAcquireToken(pool) ? RunFastPool : NoTokens; // wait flag has been successfully reset + } + } else { + return United->TryAcquireToken(pool) ? RunFastPool : NoTokens; // wait flag is already reset + } + } + } + } + + EWorkerAction FastWorkerAction(TWorkerContext& wctx) { + if (Y_UNLIKELY(United->IsStopped())) { + return Stopped; + } + + // Account current pool + ui64 ts = GetCycleCountFast(); + PoolSched.Account(ts); + + // Select next pool to execute + for (wctx.PoolId = PoolSched.Begin(); wctx.PoolId != PoolSched.End(); wctx.PoolId = PoolSched.Next()) { + switch (TryRun(wctx.PoolId)) { + case RunFastPool: + PoolSched.Scheduled(); + wctx.Lease = PostponePreemption(wctx.Lease.GetWorkerId(), ts); + return ExecuteFast; + case RunSlowPool: + PoolSched.Scheduled(); + ResetPreemption(wctx.Lease.GetWorkerId(), ts); // there is no point in preemption during wait + return WaitForSlow; + case NoTokens: // concurrency limit reached, or no more work in pool + break; // just try next pool (if any) + } + } + + // No more work, no slow-workers -- wait for activations (active, then blocked) + wctx.PoolId = United->Idle(CpuShared, wctx); + + // Wakeup or stop occured + if (Y_UNLIKELY(wctx.PoolId == CpuStopped)) { + return Stopped; + } + ts = GetCycleCountFast(); + PoolSched.ScheduledAfterIdle(wctx.PoolId, ts); + wctx.Lease = PostponePreemption(wctx.Lease.GetWorkerId(), ts); + return ExecuteFast; // United->Idle() has already acquired token + } + + EWorkerAction IdleWorkerAction(TTimerFd* idleTimer, TWorkerId workerId) { + if (Y_UNLIKELY(United->IsStopped())) { + return Stopped; + } + if (!idleTimer) { // either worker start or became idle -- hard preemption is not required + return Standby; + } + + TLease lease = TLease(AtomicLoad(&CurrentLease)); + ui64 ts = GetCycleCountFast(); + if (lease.GetExpireTs() < ts) { // current lease has expired + if (TryBeginHardPreemption(lease)) { + SetPoolIsSlowFlag(PoolSched.CurrentPool()); + TWorkerId preempted = lease.GetWorkerId(); + SetPriority(United->GetWorkerThreadId(preempted), SlowPriority); + LWPROBE(HardPreemption, Config.CpuId, PoolSched.CurrentPool(), preempted, workerId); + EndHardPreemption(workerId); + return BecameFast; + } else { + // Lease has been changed just now, no way we need preemption right now, so no retry needed + return Standby; + } + } else { + // Lease has not expired yet (maybe never expiring lease) + return Standby; + } + } + + EWorkerAction SlowWorkerAction(TPoolId pool) { + if (Y_UNLIKELY(United->IsStopped())) { + return Stopped; + } + while (true) { + TPoolsMask slow = AtomicLoad(&SlowPoolsMask); + if (slow & (1ull << pool)) { + if (slow == (1ull << pool) & WaitPoolsFlag) { // the last slow pool is going to became fast + if (AtomicCas(&SlowPoolsMask, 0, slow)) { // reset both pool-is-slow flag and WaitPoolsFlag + return WakeFast; + } + } else { // there are (a) several slow-worker or (b) one slow-worker w/o waiting fast-worker + if (AtomicCas(&SlowPoolsMask, slow & ~(1ull << pool), slow)) { // reset pool-is-slow flag + return BecameIdle; + } + } + } else { + // SlowWorkerAction has been called between TryBeginHardPreemption and SetPoolIsSlowFlag + // flag for this pool is not set yet, but we can be sure pool is slow: + // - because SlowWorkerAction has been called; + // - this mean lease has expired and hard preemption occured. + // So just wait other worker to call SetPoolIsSlowFlag + LWPROBE(SlowWorkerActionRace, Config.CpuId, pool, slow); + } + } + } + + void SetPoolIsSlowFlag(TPoolId pool) { + while (true) { + TPoolsMask slow = AtomicLoad(&SlowPoolsMask); + if ((slow & (1ull << pool)) == 0) { // if pool is fast + if (AtomicCas(&SlowPoolsMask, slow | (1ull << pool), slow)) { // set pool-is-slow flag + return; + } + } else { + Y_FAIL("two slow-workers executing the same pool on the same core"); + return; // pool is already slow + } + } + } + + bool TryBeginHardPreemption(TLease lease) { + return AtomicCas(&CurrentLease, HardPreemptionLease, lease); + } + + void EndHardPreemption(TWorkerId to) { + ATOMIC_COMPILER_BARRIER(); + if (!AtomicCas(&CurrentLease, TLease(to, NeverExpire), HardPreemptionLease)) { + Y_FAIL("hard preemption failed"); + } + } + + bool DisablePreemptionAndTryExtend(TLease lease) { + return AtomicCas(&CurrentLease, lease.NeverExpire(), lease); + } + + void EnablePreemptionAndGrant(TLease lease) { + ATOMIC_COMPILER_BARRIER(); + if (!AtomicCas(&CurrentLease, lease, lease.NeverExpire())) { + Y_FAIL("lease grant failed"); + } + } + + void FastWorkerSleep(ui64 deadlineTs) { + while (true) { + TPoolsMask slow = AtomicLoad(&SlowPoolsMask); + if ((slow & WaitPoolsFlag) == 0) { + return; // woken by WakeFast action + } + ui64 ts = GetCycleCountFast(); + if (deadlineTs <= ts) { + if (AtomicCas(&SlowPoolsMask, slow & ~WaitPoolsFlag, slow)) { // try reset wait flag + return; // wait flag has been successfully reset after timeout + } + } else { // should wait + ui64 timeoutNs = Ts2Ns(deadlineTs - ts); +#ifdef _linux_ + timespec timeout; + timeout.tv_sec = timeoutNs / 1'000'000'000; + timeout.tv_nsec = timeoutNs % 1'000'000'000; + SysFutex(FastWorkerFutex(), FUTEX_WAIT_PRIVATE, FastWorkerFutexValue(slow), &timeout, nullptr, 0); +#else + NanoSleep(timeoutNs); // non-linux wake is not supported, cpu will go idle on slow -> fast switch +#endif + } + } + } + + void WakeFastWorker() { +#ifdef _linux_ + SysFutex(FastWorkerFutex(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); +#endif + } + +#ifdef _linux_ + ui32* FastWorkerFutex() { + // Actually we wait on one highest bit, but futex value size is 4 bytes on all platforms + static_assert(sizeof(TPoolsMask) >= 4, "cannot be used as futex value on linux"); + return (ui32*)&SlowPoolsMask + 1; // higher 32 bits (little endian assumed) + } + + ui32 FastWorkerFutexValue(TPoolsMask slow) { + return ui32(slow >> 32); // higher 32 bits + } +#endif + + void SetPriority(TThreadId tid, EPriority priority) { + if (Config.NoRealtime) { + return; + } +#ifdef _linux_ + int policy; + struct sched_param param; + switch (priority) { + case IdlePriority: + policy = SCHED_FIFO; + param.sched_priority = Config.IdleWorkerPriority; + break; + case FastPriority: + policy = SCHED_FIFO; + param.sched_priority = Config.FastWorkerPriority; + break; + case SlowPriority: + policy = SCHED_OTHER; + param.sched_priority = 0; + break; + } + int ret = sched_setscheduler(tid, policy, ¶m); + switch (ret) { + case 0: return; + case EINVAL: + Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> EINVAL", tid, policy, param.sched_priority); + case EPERM: + // Requirements: + // * CAP_SYS_NICE capability to run real-time processes and set cpu affinity. + // Either run under root or set application capabilities: + // sudo setcap cap_sys_nice=eip BINARY + // * Non-zero rt-runtime (in case cgroups are used). + // Either (a) disable global limit on RT processes bandwidth: + // sudo sysctl -w kernel.sched_rt_runtime_us=-1 + // Or (b) set non-zero rt-runtime for your cgroup: + // echo -1 > /sys/fs/cgroup/cpu/[cgroup]/cpu.rt_runtime_us + // (also set the same value for every parent cgroup) + // https://www.kernel.org/doc/Documentation/scheduler/sched-rt-group.txt + Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> EPERM", tid, policy, param.sched_priority); + case ESRCH: + Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> ESRCH", tid, policy, param.sched_priority); + default: + Y_FAIL("sched_setscheduler(%" PRIu64 ", %d, %d) -> %d", tid, policy, param.sched_priority, ret); + } +#else + Y_UNUSED(tid); + Y_UNUSED(priority); +#endif + } + + void ResetPreemption(TWorkerId fastWorkerId, ui64 ts) { + if (Y_UNLIKELY(!PreemptionTimer)) { + return; + } + if (FastWorker == fastWorkerId && HardPreemptionTs > 0) { + PreemptionTimer->Reset(); + LWPROBE(ResetPreemptionTimer, Config.CpuId, FastWorker, PreemptionTimer->Fd, Ts2Ms(ts), Ts2Ms(HardPreemptionTs)); + HardPreemptionTs = 0; + } + } + + TLease PostponePreemption(TWorkerId fastWorkerId, ui64 ts) { + // Select new timer after hard preemption + if (FastWorker != fastWorkerId) { + FastWorker = fastWorkerId; + PreemptionTimer = IdleQueue.Dequeue(); + HardPreemptionTs = 0; + } + + ui64 hardPreemptionTs = ts + Config.HardLimitTs; + if (hardPreemptionTs > HardPreemptionTs) { + // Reset timer (at most once in TickIntervalTs, sacrifice precision) + HardPreemptionTs = hardPreemptionTs + Config.LimitPrecisionTs; + PreemptionTimer->Set(HardPreemptionTs); + LWPROBE(SetPreemptionTimer, Config.CpuId, FastWorker, PreemptionTimer->Fd, Ts2Ms(ts), Ts2Ms(HardPreemptionTs)); + } + + return TLease(fastWorkerId, hardPreemptionTs); + } + }; + + // Proxy for start and switching TUnitedExecutorPool-s on single cpu via GetReadyActivation() + // (does not implement any other method in IExecutorPool) + class TCpuExecutorPool: public IExecutorPool { + const TString Name; + + public: + explicit TCpuExecutorPool(const TString& name) + : IExecutorPool(MaxPools) + , Name(name) + {} + + TString GetName() const override { + return Name; + } + + void SetRealTimeMode() const override { + // derived classes controls rt-priority - do nothing + } + + // Should never be called + void ReclaimMailbox(TMailboxType::EType, ui32, TWorkerId, ui64) override { Y_FAIL(); } + void Schedule(TInstant, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); } + void Schedule(TMonotonic, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); } + void Schedule(TDuration, TAutoPtr<IEventHandle>, ISchedulerCookie*, TWorkerId) override { Y_FAIL(); } + bool Send(TAutoPtr<IEventHandle>&) override { Y_FAIL(); } + void ScheduleActivation(ui32) override { Y_FAIL(); } + void ScheduleActivationEx(ui32, ui64) override { Y_FAIL(); } + TActorId Register(IActor*, TMailboxType::EType, ui64, const TActorId&) override { Y_FAIL(); } + TActorId Register(IActor*, TMailboxHeader*, ui32, const TActorId&) override { Y_FAIL(); } + void Prepare(TActorSystem*, NSchedulerQueue::TReader**, ui32*) override { Y_FAIL(); } + void Start() override { Y_FAIL(); } + void PrepareStop() override { Y_FAIL(); } + void Shutdown() override { Y_FAIL(); } + bool Cleanup() override { Y_FAIL(); } + }; + + // Proxy executor pool working with cpu-local scheduler (aka actorsystem 2.0) + class TSharedCpuExecutorPool: public TCpuExecutorPool { + TSharedCpu* Local; + TIntrusivePtr<TAffinity> SingleCpuAffinity; // no migration support yet + public: + explicit TSharedCpuExecutorPool(TSharedCpu* local, const TUnitedWorkersConfig& config) + : TCpuExecutorPool("u-" + ToString(local->Config.CpuId)) + , Local(local) + , SingleCpuAffinity(config.NoAffinity ? nullptr : new TAffinity(TCpuMask(local->Config.CpuId))) + {} + + TAffinity* Affinity() const override { + return SingleCpuAffinity.Get(); + } + + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override { + return Local->GetReadyActivation(wctx, revolvingCounter); + } + }; + + // Proxy executor pool working with balancer and assigned pools (aka actorsystem 1.5) + class TAssignedCpuExecutorPool: public TCpuExecutorPool { + TAssignedCpu* Local; + TIntrusivePtr<TAffinity> CpuAffinity; + public: + explicit TAssignedCpuExecutorPool(TAssignedCpu* local, const TUnitedWorkersConfig& config) + : TCpuExecutorPool("United") + , Local(local) + , CpuAffinity(config.NoAffinity ? nullptr : new TAffinity(config.Allowed)) + {} + + TAffinity* Affinity() const override { + return CpuAffinity.Get(); + } + + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override { + return Local->GetReadyActivation(wctx, revolvingCounter); + } + }; + + // Representation of a single cpu and it's state visible to other cpus and pools + struct TUnitedWorkers::TCpu: public TNonCopyable { + struct TScopedWaiters { + TCpu& Cpu; + TPool* AssignedPool; // nullptr if CpuShared + + // Subscribe on wakeups from allowed pools + TScopedWaiters(TCpu& cpu, TPool* assignedPool) : Cpu(cpu), AssignedPool(assignedPool) { + if (!AssignedPool) { + for (TPool* pool : Cpu.AllowedPools) { + AtomicIncrement(pool->Waiters); + } + } else { + AtomicIncrement(AssignedPool->Waiters); + } + } + + // Unsubscribe from pools we've subscribed on + ~TScopedWaiters() { + if (!AssignedPool) { + for (TPool* pool : Cpu.AllowedPools) { + AtomicDecrement(pool->Waiters); + } + } else { + AtomicDecrement(AssignedPool->Waiters); + } + } + }; + + // Current cpu state important for other cpus and balancer + TCpuState State; + + // Thread-safe per pool stats + // NOTE: It's guaranteed that cpu never executes two instance of the same pool + TVector<TExecutorThreadStats> PoolStats; + + // Configuration + TCpuId CpuId; + THolder<TCpuLocalManager> LocalManager; + THolder<TCpuExecutorPool> ExecutorPool; + + // Pools allowed to run on this cpu + TStackVec<TPool*, 15> AllowedPools; + + void Stop() { + if (LocalManager) { + State.Stop(); + LocalManager->Stop(); + } + } + + bool StartSpinning(TUnitedWorkers* united, TPool* assignedPool, TPoolId& result) { + // Mark cpu as idle + if (Y_UNLIKELY(!State.StartSpinning())) { + result = CpuStopped; + return true; + } + + // Avoid using multiple atomic seq_cst loads in cycle, use barrier once and relaxed ops + AtomicBarrier(); + + // Check there is no pending tokens (can be released before Waiters increment) + if (!assignedPool) { + for (TPool* pool : AllowedPools) { + if (pool->TryAcquireTokenRelaxed()) { + result = WakeWithTokenAcquired(united, pool->PoolId); + return true; // token acquired or stop + } + } + } else { + if (assignedPool->TryAcquireTokenRelaxed()) { + result = WakeWithTokenAcquired(united, assignedPool->PoolId); + return true; // token acquired or stop + } + } + + // At this point we can be sure wakeup won't be lost + // So we can actively spin or block w/o checking for pending tokens + return false; + } + + bool ActiveWait(ui64 spinThresholdTs, TPoolId& result) { + ui64 deadline = GetCycleCountFast() + spinThresholdTs; + while (GetCycleCountFast() < deadline) { + for (ui32 i = 0; i < 12; ++i) { + TPoolId current = State.CurrentPool(); + if (current == CpuSpinning) { + SpinLockPause(); + } else { + result = current; + return true; // wakeup + } + } + } + return false; // spin threshold exceeded, no wakeups + } + + bool StartBlocking(TPoolId& result) { + // Switch into blocked state + if (State.StartBlocking()) { + result = State.CurrentPool(); + return true; + } else { + return false; + } + } + + bool BlockedWait(TPoolId& result, ui64 timeoutNs) { + return State.Block(timeoutNs, result); + } + + void SwitchPool(TPoolId pool) { + return State.SwitchPool(pool); + } + + private: + TPoolId WakeWithTokenAcquired(TUnitedWorkers* united, TPoolId token) { + switch (State.WakeWithTokenAcquired(token)) { + case TCpuState::Woken: // we've got token and successfully woken up this cpu + // NOTE: sending thread may also wakeup another worker, which wont be able to acquire token and will go idle (it's ok) + return token; + case TCpuState::NotIdle: { // wakeup event has also occured + TPoolId wakeup = State.CurrentPool(); + if (wakeup != token) { // token and wakeup for different pools + united->TryWake(wakeup); // rewake another cpu to avoid losing wakeup + } + return token; + } + case TCpuState::Forbidden: + Y_FAIL(); + case TCpuState::Stopped: + return CpuStopped; + } + } + }; + + TUnitedWorkers::TUnitedWorkers( + const TUnitedWorkersConfig& config, + const TVector<TUnitedExecutorPoolConfig>& unitedPools, + const TCpuAllocationConfig& allocation, + IBalancer* balancer) + : Balancer(balancer) + , Config(config) + , Allocation(allocation) + { + // Find max pool id and initialize pools + PoolCount = 0; + for (const TCpuAllocation& cpuAlloc : allocation.Items) { + for (const auto& pa : cpuAlloc.AllowedPools) { + PoolCount = Max<size_t>(PoolCount, pa.PoolId + 1); + } + } + Pools.Reset(new TPool[PoolCount]); + + // Find max cpu id and initialize cpus + CpuCount = 0; + for (const TCpuAllocation& cpuAlloc : allocation.Items) { + CpuCount = Max<size_t>(CpuCount, cpuAlloc.CpuId + 1); + } + Cpus.Reset(new TCpu[CpuCount]); + + // Setup allocated cpus + // NOTE: leave gaps for not allocated cpus (default-initialized) + WorkerCount = 0; + for (const TCpuAllocation& cpuAlloc : allocation.Items) { + TCpu& cpu = Cpus[cpuAlloc.CpuId]; + cpu.CpuId = cpuAlloc.CpuId; + cpu.PoolStats.resize(PoolCount); // NOTE: also may have gaps + for (const auto& pa : cpuAlloc.AllowedPools) { + cpu.AllowedPools.emplace_back(&Pools[pa.PoolId]); + } + + // Setup balancing and cpu-local manager + if (!Balancer->AddCpu(cpuAlloc, &cpu.State)) { + cpu.State.SwitchPool(0); // set initial state to non-idle to avoid losing wakeups on start + cpu.State.AssignPool(CpuShared); + TSharedCpu* local = new TSharedCpu(TSharedCpu::TConfig(cpuAlloc, Config), this); + cpu.LocalManager.Reset(local); + cpu.ExecutorPool.Reset(new TSharedCpuExecutorPool(local, Config)); + } else { + TAssignedCpu* local = new TAssignedCpu(this); + cpu.LocalManager.Reset(local); + cpu.ExecutorPool.Reset(new TAssignedCpuExecutorPool(local, Config)); + } + WorkerCount += cpu.LocalManager->WorkerCount(); + } + + // Initialize workers + Workers.Reset(new TWorker[WorkerCount]); + + // Setup pools + // NOTE: leave gaps for not united pools (default-initialized) + for (const TUnitedExecutorPoolConfig& cfg : unitedPools) { + TPool& pool = Pools[cfg.PoolId]; + Y_VERIFY(cfg.PoolId < MaxPools); + pool.PoolId = cfg.PoolId; + pool.Concurrency = cfg.Concurrency ? cfg.Concurrency : Config.CpuCount; + pool.ExecutorPool = nullptr; // should be set later using SetupPool() + pool.MailboxTable = nullptr; // should be set later using SetupPool() + pool.TimePerMailboxTs = DurationToCycles(cfg.TimePerMailbox); + pool.EventsPerMailbox = cfg.EventsPerMailbox; + + // Reinitialize per cpu pool stats with right MaxActivityType + for (const TCpuAllocation& cpuAlloc : allocation.Items) { + TCpu& cpu = Cpus[cpuAlloc.CpuId]; + cpu.PoolStats[cfg.PoolId] = TExecutorThreadStats(cfg.MaxActivityType); + } + + // Setup WakeOrderCpus: left to right exclusive cpus, then left to right shared cpus. + // Waking exclusive cpus first reduce load on shared cpu and improve latency isolation, which is + // the point of using exclusive cpu. But note that number of actively spinning idle cpus may increase, + // so cpu consumption on light load is higher. + for (const TCpuAllocation& cpuAlloc : allocation.Items) { + TCpu& cpu = Cpus[cpuAlloc.CpuId]; + if (cpu.AllowedPools.size() == 1 && cpu.AllowedPools[0] == &pool) { + pool.WakeOrderCpus.emplace_back(&cpu); + } + } + for (const TCpuAllocation& cpuAlloc : allocation.Items) { + TCpu& cpu = Cpus[cpuAlloc.CpuId]; + if (cpu.AllowedPools.size() > 1 && cpuAlloc.HasPool(pool.PoolId)) { + pool.WakeOrderCpus.emplace_back(&cpu); + } + } + } + } + + TUnitedWorkers::~TUnitedWorkers() { + } + + void TUnitedWorkers::Prepare(TActorSystem* actorSystem, TVector<NSchedulerQueue::TReader*>& scheduleReaders) { + // Setup allocated cpus + // NOTE: leave gaps for not allocated cpus (default-initialized) + TWorkerId workers = 0; + for (TCpuId cpuId = 0; cpuId < CpuCount; cpuId++) { + TCpu& cpu = Cpus[cpuId]; + + // Setup cpu-local workers + if (cpu.LocalManager) { + for (size_t i = 0; i < cpu.LocalManager->WorkerCount(); i++) { + TWorkerId workerId = workers++; + cpu.LocalManager->AddWorker(workerId); + + // Setup worker + Y_VERIFY(workerId < WorkerCount); + Workers[workerId].Thread.Reset(new TExecutorThread( + workerId, + cpu.CpuId, + actorSystem, + cpu.ExecutorPool.Get(), // use cpu-local manager as proxy executor for all workers on cpu + nullptr, // MailboxTable is pool-specific, will be set on pool switch + cpu.ExecutorPool->GetName())); + // NOTE: TWorker::ThreadId will be initialized after in Start() + + scheduleReaders.push_back(&Workers[workerId].SchedulerQueue.Reader); + } + } + } + } + + void TUnitedWorkers::Start() { + for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) { + Workers[workerId].Thread->Start(); + } + for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) { + AtomicStore(&Workers[workerId].ThreadId, Workers[workerId].Thread->GetThreadId()); + } + } + + inline TThreadId TUnitedWorkers::GetWorkerThreadId(TWorkerId workerId) const { + volatile TThreadId* threadId = &Workers[workerId].ThreadId; +#ifdef _linux_ + while (AtomicLoad(threadId) == UnknownThreadId) { + NanoSleep(1000); + } +#endif + return AtomicLoad(threadId); + } + + inline NSchedulerQueue::TWriter* TUnitedWorkers::GetScheduleWriter(TWorkerId workerId) const { + return &Workers[workerId].SchedulerQueue.Writer; + } + + void TUnitedWorkers::SetupPool(TPoolId pool, IExecutorPool* executorPool, TMailboxTable* mailboxTable) { + Pools[pool].ExecutorPool = executorPool; + Pools[pool].MailboxTable = mailboxTable; + } + + void TUnitedWorkers::PrepareStop() { + AtomicStore(&StopFlag, true); + for (TPoolId pool = 0; pool < PoolCount; pool++) { + Pools[pool].Stop(); + } + for (TCpuId cpuId = 0; cpuId < CpuCount; cpuId++) { + Cpus[cpuId].Stop(); + } + } + + void TUnitedWorkers::Shutdown() { + for (TWorkerId workerId = 0; workerId < WorkerCount; workerId++) { + Workers[workerId].Thread->Join(); + } + } + + inline void TUnitedWorkers::PushActivation(TPoolId pool, ui32 activation, ui64 revolvingCounter) { + if (Pools[pool].PushActivation(activation, revolvingCounter)) { // token generated + TryWake(pool); + } + } + + inline bool TUnitedWorkers::TryAcquireToken(TPoolId pool) { + return Pools[pool].TryAcquireToken(); + } + + inline void TUnitedWorkers::TryWake(TPoolId pool) { + // Avoid using multiple atomic seq_cst loads in cycle, use barrier once + AtomicBarrier(); + + // Scan every allowed cpu in pool's wakeup order and try to wake the first idle cpu + if (RelaxedLoad(&Pools[pool].Waiters) > 0) { + for (TCpu* cpu : Pools[pool].WakeOrderCpus) { + if (cpu->State.WakeWithoutToken(pool) == TCpuState::Woken) { + return; // successful wake up + } + } + } + + // Cpu has not been woken up + } + + inline void TUnitedWorkers::BeginExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter) { + Pools[pool].BeginExecution(activation, revolvingCounter); + } + + inline bool TUnitedWorkers::NextExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter) { + return Pools[pool].NextExecution(activation, revolvingCounter); + } + + inline void TUnitedWorkers::StopExecution(TPoolId pool) { + if (Pools[pool].StopExecution()) { // pending token + TryWake(pool); + } + } + + inline void TUnitedWorkers::Balance() { + ui64 ts = GetCycleCountFast(); + if (Balancer->TryLock(ts)) { + for (TPoolId pool = 0; pool < PoolCount; pool++) { + if (Pools[pool].IsUnited()) { + ui64 ElapsedTs = 0; + ui64 ParkedTs = 0; + for (TCpu* cpu : Pools[pool].WakeOrderCpus) { + const TExecutorThreadStats& cpuStats = cpu->PoolStats[pool]; + ElapsedTs += cpuStats.ElapsedTicks; + ParkedTs += cpuStats.ParkedTicks; + } + TBalancerStats stats; + stats.Ts = ts; + stats.CpuUs = Ts2Us(ElapsedTs); + stats.IdleUs = Ts2Us(ParkedTs); + Balancer->SetPoolStats(pool, stats); + } + } + Balancer->Balance(); + Balancer->Unlock(); + } + } + + inline TPoolId TUnitedWorkers::AssignedPool(TWorkerContext& wctx) { + return Cpus[wctx.CpuId].State.AssignedPool(); + } + + inline bool TUnitedWorkers::IsPoolReassigned(TWorkerContext& wctx) { + return Cpus[wctx.CpuId].State.IsPoolReassigned(wctx.PoolId); + } + + inline void TUnitedWorkers::SwitchPool(TWorkerContext& wctx, ui64 softDeadlineTs) { + Pools[wctx.PoolId].Switch(wctx, softDeadlineTs, Cpus[wctx.CpuId].PoolStats[wctx.PoolId]); + Cpus[wctx.CpuId].SwitchPool(wctx.PoolId); + } + + TPoolId TUnitedWorkers::Idle(TPoolId assigned, TWorkerContext& wctx) { + wctx.SwitchToIdle(); + + TPoolId result; + TTimeTracker timeTracker; + TCpu& cpu = Cpus[wctx.CpuId]; + TPool* assignedPool = assigned == CpuShared ? nullptr : &Pools[assigned]; + TCpu::TScopedWaiters scopedWaiters(cpu, assignedPool); + while (true) { + if (cpu.StartSpinning(this, assignedPool, result)) { + break; // token already acquired (or stop) + } + result = WaitSequence(cpu, wctx, timeTracker); + if (Y_UNLIKELY(result == CpuStopped) || TryAcquireToken(result)) { + break; // token acquired (or stop) + } + } + + wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed()); + return result; + } + + TPoolId TUnitedWorkers::WaitSequence(TCpu& cpu, TWorkerContext& wctx, TTimeTracker& timeTracker) { + TPoolId result; + if (cpu.ActiveWait(Us2Ts(Config.SpinThresholdUs), result)) { + wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed()); + return result; + } + if (cpu.StartBlocking(result)) { + wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed()); + return result; + } + wctx.AddElapsedCycles(IActor::ACTOR_SYSTEM, timeTracker.Elapsed()); + bool wakeup; + do { + wakeup = cpu.BlockedWait(result, Config.Balancer.PeriodUs * 1000); + wctx.AddParkedCycles(timeTracker.Elapsed()); + } while (!wakeup); + return result; + } + + void TUnitedWorkers::GetCurrentStats(TPoolId pool, TVector<TExecutorThreadStats>& statsCopy) const { + size_t idx = 1; + statsCopy.resize(idx + Pools[pool].WakeOrderCpus.size()); + for (TCpu* cpu : Pools[pool].WakeOrderCpus) { + TExecutorThreadStats& s = statsCopy[idx++]; + s = TExecutorThreadStats(); + s.Aggregate(cpu->PoolStats[pool]); + } + } + + TUnitedExecutorPool::TUnitedExecutorPool(const TUnitedExecutorPoolConfig& cfg, TUnitedWorkers* united) + : TExecutorPoolBaseMailboxed(cfg.PoolId, cfg.MaxActivityType) + , United(united) + , PoolName(cfg.PoolName) + { + United->SetupPool(TPoolId(cfg.PoolId), this, MailboxTable.Get()); + } + + void TUnitedExecutorPool::Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) { + ActorSystem = actorSystem; + + // Schedule readers are initialized through TUnitedWorkers::Prepare + *scheduleReaders = nullptr; + *scheduleSz = 0; + } + + void TUnitedExecutorPool::Start() { + // workers are actually started in TUnitedWorkers::Start() + } + + void TUnitedExecutorPool::PrepareStop() { + } + + void TUnitedExecutorPool::Shutdown() { + // workers are actually joined in TUnitedWorkers::Shutdown() + } + + TAffinity* TUnitedExecutorPool::Affinity() const { + Y_FAIL(); // should never be called, TCpuExecutorPool is used instead + } + + ui32 TUnitedExecutorPool::GetThreads() const { + return 0; + } + + ui32 TUnitedExecutorPool::GetReadyActivation(TWorkerContext&, ui64) { + Y_FAIL(); // should never be called, TCpu*ExecutorPool is used instead + } + + inline void TUnitedExecutorPool::ScheduleActivation(ui32 activation) { + TUnitedExecutorPool::ScheduleActivationEx(activation, AtomicIncrement(ActivationsRevolvingCounter)); + } + + inline void TUnitedExecutorPool::ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) { + United->PushActivation(PoolId, activation, revolvingCounter); + } + + void TUnitedExecutorPool::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + TUnitedExecutorPool::Schedule(deadline - ActorSystem->Timestamp(), ev, cookie, workerId); + } + + void TUnitedExecutorPool::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + Y_VERIFY_DEBUG(workerId < United->GetWorkerCount()); + const auto current = ActorSystem->Monotonic(); + if (deadline < current) { + deadline = current; + } + United->GetScheduleWriter(workerId)->Push(deadline.MicroSeconds(), ev.Release(), cookie); + } + + void TUnitedExecutorPool::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) { + Y_VERIFY_DEBUG(workerId < United->GetWorkerCount()); + const auto deadline = ActorSystem->Monotonic() + delta; + United->GetScheduleWriter(workerId)->Push(deadline.MicroSeconds(), ev.Release(), cookie); + } + + void TUnitedExecutorPool::GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const { + Y_UNUSED(poolStats); + if (statsCopy.empty()) { + statsCopy.resize(1); + } + statsCopy[0] = TExecutorThreadStats(); + statsCopy[0].Aggregate(Stats); + United->GetCurrentStats(PoolId, statsCopy); + } +} diff --git a/library/cpp/actors/core/executor_pool_united.h b/library/cpp/actors/core/executor_pool_united.h new file mode 100644 index 0000000000..a090ba2466 --- /dev/null +++ b/library/cpp/actors/core/executor_pool_united.h @@ -0,0 +1,135 @@ +#pragma once + +#include "actorsystem.h" +#include "balancer.h" +#include "scheduler_queue.h" +#include "executor_pool_base.h" + +#include <library/cpp/actors/util/unordered_cache.h> + +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/actors/util/unordered_cache.h> +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <util/generic/noncopyable.h> + +namespace NActors { + class TMailboxTable; + + class TUnitedWorkers: public TNonCopyable { + struct TWorker; + struct TPool; + struct TCpu; + + size_t WorkerCount; + TArrayHolder<TWorker> Workers; // indexed by WorkerId + size_t PoolCount; + TArrayHolder<TPool> Pools; // indexed by PoolId, so may include not used (not united) pools + size_t CpuCount; + TArrayHolder<TCpu> Cpus; // indexed by CpuId, so may include not allocated CPUs + + IBalancer* Balancer; // external pool cpu balancer + + TUnitedWorkersConfig Config; + TCpuAllocationConfig Allocation; + + volatile bool StopFlag = false; + + public: + TUnitedWorkers( + const TUnitedWorkersConfig& config, + const TVector<TUnitedExecutorPoolConfig>& unitedPools, + const TCpuAllocationConfig& allocation, + IBalancer* balancer); + ~TUnitedWorkers(); + void Prepare(TActorSystem* actorSystem, TVector<NSchedulerQueue::TReader*>& scheduleReaders); + void Start(); + void PrepareStop(); + void Shutdown(); + + bool IsStopped() const { + return RelaxedLoad(&StopFlag); + } + + TWorkerId GetWorkerCount() const { + return WorkerCount; + } + + // Returns thread id of a worker + TThreadId GetWorkerThreadId(TWorkerId workerId) const; + + // Returns per worker schedule writers + NSchedulerQueue::TWriter* GetScheduleWriter(TWorkerId workerId) const; + + // Sets executor for specified pool + void SetupPool(TPoolId pool, IExecutorPool* executorPool, TMailboxTable* mailboxTable); + + // Add activation of newly scheduled mailbox and wake cpu to execute it if required + void PushActivation(TPoolId pool, ui32 activation, ui64 revolvingCounter); + + // Try acquire pending token. Must be done before execution + bool TryAcquireToken(TPoolId pool); + + // Try to wake idle cpu waiting for tokens on specified pool + void TryWake(TPoolId pool); + + // Get activation from pool; requires pool's token + void BeginExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter); + + // Stop currently active execution and start new one if token is available + // NOTE: Reuses token if it's not destroyed + bool NextExecution(TPoolId pool, ui32& activation, ui64 revolvingCounter); + + // Stop active execution + void StopExecution(TPoolId pool); + + // Runs balancer to assign pools to cpus + void Balance(); + + // Returns pool to be executed by worker or `CpuShared` + TPoolId AssignedPool(TWorkerContext& wctx); + + // Checks if balancer has assigned another pool for worker's cpu + bool IsPoolReassigned(TWorkerContext& wctx); + + // Switch worker context into specified pool + void SwitchPool(TWorkerContext& wctx, ui64 softDeadlineTs); + + // Wait for tokens from any pool allowed on specified cpu + TPoolId Idle(TPoolId assigned, TWorkerContext& wctx); + + // Fill stats for specified pool + void GetCurrentStats(TPoolId pool, TVector<TExecutorThreadStats>& statsCopy) const; + + private: + TPoolId WaitSequence(TCpu& cpu, TWorkerContext& wctx, TTimeTracker& timeTracker); + }; + + class TUnitedExecutorPool: public TExecutorPoolBaseMailboxed { + TUnitedWorkers* United; + const TString PoolName; + TAtomic ActivationsRevolvingCounter = 0; + public: + TUnitedExecutorPool(const TUnitedExecutorPoolConfig& cfg, TUnitedWorkers* united); + + void Prepare(TActorSystem* actorSystem, NSchedulerQueue::TReader** scheduleReaders, ui32* scheduleSz) override; + void Start() override; + void PrepareStop() override; + void Shutdown() override; + + TAffinity* Affinity() const override; + ui32 GetThreads() const override; + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingReadCounter) override; + void ScheduleActivation(ui32 activation) override; + void ScheduleActivationEx(ui32 activation, ui64 revolvingWriteCounter) override; + void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie, TWorkerId workerId) override; + + void GetCurrentStats(TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& statsCopy) const override; + + TString GetName() const override { + return PoolName; + } + }; +} diff --git a/library/cpp/actors/core/executor_pool_united_ut.cpp b/library/cpp/actors/core/executor_pool_united_ut.cpp new file mode 100644 index 0000000000..d4df17f1b8 --- /dev/null +++ b/library/cpp/actors/core/executor_pool_united_ut.cpp @@ -0,0 +1,338 @@ +#include "actorsystem.h" +#include "executor_pool_basic.h" +#include "hfunc.h" +#include "scheduler_basic.h" + +#include <library/cpp/actors/util/should_continue.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/actors/protos/unittests.pb.h> + +using namespace NActors; + +//////////////////////////////////////////////////////////////////////////////// + +struct TEvMsg : public NActors::TEventBase<TEvMsg, 10347> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvMsg, "ExecutorPoolTest: Msg"); +}; + +//////////////////////////////////////////////////////////////////////////////// + +inline ui64 DoTimedWork(ui64 workUs) { + ui64 startUs = ThreadCPUTime(); + ui64 endUs = startUs + workUs; + ui64 nowUs = startUs; + do { + ui64 endTs = GetCycleCountFast() + Us2Ts(endUs - nowUs); + while (GetCycleCountFast() <= endTs) {} + nowUs = ThreadCPUTime(); + } while (nowUs <= endUs); + return nowUs - startUs; +} + +class TTestSenderActor : public IActor { +private: + using EActivityType = IActor::EActivityType ; + using EActorActivity = IActor::EActorActivity; + +private: + TAtomic Counter; + TActorId Receiver; + + std::function<void(void)> Action; + +public: + TTestSenderActor(std::function<void(void)> action = [](){}, + EActivityType activityType = EActorActivity::OTHER) + : IActor(static_cast<TReceiveFunc>(&TTestSenderActor::Execute), activityType) + , Action(action) + {} + + void Start(TActorId receiver, size_t count) { + AtomicSet(Counter, count); + Receiver = receiver; + } + + void Stop() { + while (true) { + if (GetCounter() == 0) { + break; + } + + Sleep(TDuration::MilliSeconds(1)); + } + } + + size_t GetCounter() const { + return AtomicGet(Counter); + } + +private: + STFUNC(Execute) { + Y_UNUSED(ctx); + switch (ev->GetTypeRewrite()) { + hFunc(TEvMsg, Handle); + } + } + + void Handle(TEvMsg::TPtr &ev) { + Y_UNUSED(ev); + Action(); + TAtomicBase count = AtomicDecrement(Counter); + Y_VERIFY(count != Max<TAtomicBase>()); + if (count) { + Send(Receiver, new TEvMsg()); + } + } +}; + +// Single cpu balancer that switches pool on every activation; not thread-safe +struct TRoundRobinBalancer: public IBalancer { + TCpuState* State; + TMap<TPoolId, TPoolId> NextPool; + + bool AddCpu(const TCpuAllocation& cpuAlloc, TCpuState* cpu) override { + State = cpu; + TPoolId prev = cpuAlloc.AllowedPools.rbegin()->PoolId; + for (auto& p : cpuAlloc.AllowedPools) { + NextPool[prev] = p.PoolId; + prev = p.PoolId; + } + return true; + } + + bool TryLock(ui64) override { return true; } + void SetPoolStats(TPoolId, const TBalancerStats&) override {} + void Unlock() override {} + + void Balance() override { + TPoolId assigned; + TPoolId current; + State->Load(assigned, current); + State->AssignPool(NextPool[assigned]); + } +}; + +void AddUnitedPool(THolder<TActorSystemSetup>& setup, ui32 concurrency = 0) { + TUnitedExecutorPoolConfig united; + united.PoolId = setup->GetExecutorsCount(); + united.Concurrency = concurrency; + setup->CpuManager.United.emplace_back(std::move(united)); +} + +THolder<TActorSystemSetup> GetActorSystemSetup(ui32 cpuCount) { + auto setup = MakeHolder<NActors::TActorSystemSetup>(); + setup->NodeId = 1; + setup->CpuManager.UnitedWorkers.CpuCount = cpuCount; + setup->CpuManager.UnitedWorkers.NoRealtime = true; // unavailable in test environment + setup->Scheduler = new TBasicSchedulerThread(NActors::TSchedulerConfig(512, 0)); + return setup; +} + +Y_UNIT_TEST_SUITE(UnitedExecutorPool) { + +#ifdef _linux_ + + Y_UNIT_TEST(OnePoolManyCpus) { + const size_t msgCount = 1e4; + auto setup = GetActorSystemSetup(4); + AddUnitedPool(setup); + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + auto actor = new TTestSenderActor(); + auto actorId = actorSystem.Register(actor); + actor->Start(actor->SelfId(), msgCount); + actorSystem.Send(actorId, new TEvMsg()); + + while (actor->GetCounter()) { + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "Counter is " << actor->GetCounter()); + + Sleep(TDuration::MilliSeconds(1)); + } + + TVector<TExecutorThreadStats> stats; + TExecutorPoolStats poolStats; + actorSystem.GetPoolStats(0, poolStats, stats); + // Sum all per-thread counters into the 0th element + for (ui32 idx = 1; idx < stats.size(); ++idx) { + stats[0].Aggregate(stats[idx]); + } + + UNIT_ASSERT_VALUES_EQUAL(stats[0].SentEvents, msgCount - 1); + UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount); + //UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, 0); // depends on execution time and system load, so may be non-zero + UNIT_ASSERT_VALUES_EQUAL(stats[0].NonDeliveredEvents, 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].EmptyMailboxActivation, 0); + //UNIT_ASSERT_VALUES_EQUAL(stats[0].CpuNs, 0); // depends on total duration of test, so undefined + UNIT_ASSERT(stats[0].ElapsedTicks > 0); + UNIT_ASSERT(stats[0].ParkedTicks == 0); // per-pool parked time does not make sense for united pools + UNIT_ASSERT_VALUES_EQUAL(stats[0].BlockedTicks, 0); + UNIT_ASSERT(stats[0].ActivationTimeHistogram.TotalSamples >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX); + UNIT_ASSERT_VALUES_EQUAL(stats[0].EventDeliveryTimeHistogram.TotalSamples, msgCount); + UNIT_ASSERT_VALUES_EQUAL(stats[0].EventProcessingCountHistogram.TotalSamples, msgCount); + UNIT_ASSERT(stats[0].EventProcessingTimeHistogram.TotalSamples > 0); + UNIT_ASSERT(stats[0].ElapsedTicksByActivity[0] > 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEventsByActivity[0], msgCount); + UNIT_ASSERT_VALUES_EQUAL(stats[0].ActorsAliveByActivity[0], 1); + UNIT_ASSERT_VALUES_EQUAL(stats[0].ScheduledEventsByActivity[0], 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolDestroyedActors, 0); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolAllocatedMailboxes, 4095); // one line + UNIT_ASSERT(stats[0].MailboxPushedOutByTime + stats[0].MailboxPushedOutByEventCount + stats[0].MailboxPushedOutBySoftPreemption >= msgCount / TBasicExecutorPoolConfig::DEFAULT_EVENTS_PER_MAILBOX); + } + + Y_UNIT_TEST(ManyPoolsOneSharedCpu) { + const size_t msgCount = 1e4; + const size_t pools = 4; + auto setup = GetActorSystemSetup(1); + for (size_t pool = 0; pool < pools; pool++) { + AddUnitedPool(setup); + } + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + TVector<TTestSenderActor*> actors; + for (size_t pool = 0; pool < pools; pool++) { + auto actor = new TTestSenderActor(); + auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool); + actor->Start(actor->SelfId(), msgCount); + actorSystem.Send(actorId, new TEvMsg()); + actors.push_back(actor); + } + + while (true) { + size_t left = 0; + for (auto actor : actors) { + left += actor->GetCounter(); + } + if (left == 0) { + break; + } + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "left " << left); + Sleep(TDuration::MilliSeconds(1)); + } + + for (size_t pool = 0; pool < pools; pool++) { + TVector<TExecutorThreadStats> stats; + TExecutorPoolStats poolStats; + actorSystem.GetPoolStats(pool, poolStats, stats); + // Sum all per-thread counters into the 0th element + for (ui32 idx = 1; idx < stats.size(); ++idx) { + stats[0].Aggregate(stats[idx]); + } + + UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1); + } + } + + Y_UNIT_TEST(ManyPoolsOneAssignedCpu) { + const size_t msgCount = 1e4; + const size_t pools = 4; + auto setup = GetActorSystemSetup(1); + setup->Balancer.Reset(new TRoundRobinBalancer()); + for (size_t pool = 0; pool < pools; pool++) { + AddUnitedPool(setup); + } + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + TVector<TTestSenderActor*> actors; + for (size_t pool = 0; pool < pools; pool++) { + auto actor = new TTestSenderActor(); + auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool); + actor->Start(actor->SelfId(), msgCount); + actorSystem.Send(actorId, new TEvMsg()); + actors.push_back(actor); + } + + while (true) { + size_t left = 0; + for (auto actor : actors) { + left += actor->GetCounter(); + } + if (left == 0) { + break; + } + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(5), "left " << left); + Sleep(TDuration::MilliSeconds(1)); + } + + for (size_t pool = 0; pool < pools; pool++) { + TVector<TExecutorThreadStats> stats; + TExecutorPoolStats poolStats; + actorSystem.GetPoolStats(pool, poolStats, stats); + // Sum all per-thread counters into the 0th element + for (ui32 idx = 1; idx < stats.size(); ++idx) { + stats[0].Aggregate(stats[idx]); + } + + UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1); + } + } + + Y_UNIT_TEST(ManyPoolsOneCpuSlowEvents) { + const size_t msgCount = 3; + const size_t pools = 4; + auto setup = GetActorSystemSetup(1); + for (size_t pool = 0; pool < pools; pool++) { + AddUnitedPool(setup); + } + TActorSystem actorSystem(setup); + actorSystem.Start(); + + auto begin = TInstant::Now(); + + TVector<TTestSenderActor*> actors; + for (size_t pool = 0; pool < pools; pool++) { + auto actor = new TTestSenderActor([]() { + DoTimedWork(100'000); + }); + auto actorId = actorSystem.Register(actor, TMailboxType::HTSwap, pool); + actor->Start(actor->SelfId(), msgCount); + actorSystem.Send(actorId, new TEvMsg()); + actors.push_back(actor); + } + + while (true) { + size_t left = 0; + for (auto actor : actors) { + left += actor->GetCounter(); + } + if (left == 0) { + break; + } + auto now = TInstant::Now(); + UNIT_ASSERT_C(now - begin < TDuration::Seconds(15), "left " << left); + Sleep(TDuration::MilliSeconds(1)); + } + + for (size_t pool = 0; pool < pools; pool++) { + TVector<TExecutorThreadStats> stats; + TExecutorPoolStats poolStats; + actorSystem.GetPoolStats(pool, poolStats, stats); + // Sum all per-thread counters into the 0th element + for (ui32 idx = 1; idx < stats.size(); ++idx) { + stats[0].Aggregate(stats[idx]); + } + + UNIT_ASSERT_VALUES_EQUAL(stats[0].ReceivedEvents, msgCount); + UNIT_ASSERT_VALUES_EQUAL(stats[0].PreemptedEvents, msgCount); // every 100ms event should be preempted + UNIT_ASSERT_VALUES_EQUAL(stats[0].PoolActorRegistrations, 1); + } + } + +#endif + +} diff --git a/library/cpp/actors/core/executor_thread.cpp b/library/cpp/actors/core/executor_thread.cpp new file mode 100644 index 0000000000..446b651efd --- /dev/null +++ b/library/cpp/actors/core/executor_thread.cpp @@ -0,0 +1,563 @@ +#include "executor_thread.h" +#include "actorsystem.h" +#include "callstack.h" +#include "mailbox.h" +#include "event.h" +#include "events.h" + +#include <library/cpp/actors/prof/tag.h> +#include <library/cpp/actors/util/affinity.h> +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/util/thread.h> + +#ifdef BALLOC +#include <library/cpp/balloc/optional/operators.h> +#endif + +#ifdef _linux_ +#include <sys/syscall.h> +#include <unistd.h> +#endif + +#include <util/system/type_name.h> +#include <util/system/datetime.h> + +LWTRACE_USING(ACTORLIB_PROVIDER) + +namespace NActors { + constexpr TDuration TExecutorThread::DEFAULT_TIME_PER_MAILBOX; + + TExecutorThread::TExecutorThread( + TWorkerId workerId, + TWorkerId cpuId, + TActorSystem* actorSystem, + IExecutorPool* executorPool, + TMailboxTable* mailboxTable, + const TString& threadName, + TDuration timePerMailbox, + ui32 eventsPerMailbox) + : ActorSystem(actorSystem) + , ExecutorPool(executorPool) + , Ctx(workerId, cpuId, actorSystem ? actorSystem->GetMaxActivityType() : 1) + , ThreadName(threadName) + { + Ctx.Switch( + ExecutorPool, + mailboxTable, + NHPTimer::GetClockRate() * timePerMailbox.SecondsFloat(), + eventsPerMailbox, + ui64(-1), // infinite soft deadline + &Ctx.WorkerStats); + } + + TActorId TExecutorThread::RegisterActor(IActor* actor, TMailboxType::EType mailboxType, ui32 poolId, const TActorId& parentId) { + if (poolId == Max<ui32>()) + return Ctx.Executor->Register(actor, mailboxType, ++RevolvingWriteCounter, parentId ? parentId : CurrentRecipient); + else + return ActorSystem->Register(actor, mailboxType, poolId, ++RevolvingWriteCounter, parentId ? parentId : CurrentRecipient); + } + + TActorId TExecutorThread::RegisterActor(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId) { + return Ctx.Executor->Register(actor, mailbox, hint, parentId ? parentId : CurrentRecipient); + } + + void TExecutorThread::UnregisterActor(TMailboxHeader* mailbox, ui64 localActorId) { + IActor* actor = mailbox->DetachActor(localActorId); + Ctx.DecrementActorsAliveByActivity(actor->GetActivityType()); + DyingActors.push_back(THolder(actor)); + } + + void TExecutorThread::DropUnregistered() { + DyingActors.clear(); // here is actual destruction of actors + } + + void TExecutorThread::Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) { + ++CurrentActorScheduledEventsCounter; + Ctx.Executor->Schedule(deadline, ev, cookie, Ctx.WorkerId); + } + + void TExecutorThread::Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) { + ++CurrentActorScheduledEventsCounter; + Ctx.Executor->Schedule(deadline, ev, cookie, Ctx.WorkerId); + } + + void TExecutorThread::Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie) { + ++CurrentActorScheduledEventsCounter; + Ctx.Executor->Schedule(delta, ev, cookie, Ctx.WorkerId); + } + + template <class T> + inline TString SafeTypeName(T* t) { + if (t == nullptr) { + return "nullptr"; + } + try { + return TypeName(*t); + } catch (...) { + return "unknown-type"; + } + } + + inline TString ActorTypeName(const IActor* actor, ui32 activityType) { + return actor ? SafeTypeName(actor) : ("activityType_" + ToString(activityType) + " (destroyed)"); + } + + inline void LwTraceSlowDelivery(IEventHandle* ev, const IActor* actor, ui32 poolId, const TActorId& currentRecipient, + double delivMs, double sinceActivationMs, ui32 eventsExecutedBefore) { + const auto baseEv = (ev && ev->HasEvent()) ? ev->GetBase() : nullptr; + LWPROBE(EventSlowDelivery, + poolId, + delivMs, + sinceActivationMs, + eventsExecutedBefore, + baseEv ? SafeTypeName(baseEv) : (ev ? ToString(ev->Type) : TString("nullptr")), + currentRecipient.ToString(), + SafeTypeName(actor)); + } + + inline void LwTraceSlowEvent(IEventHandle* ev, ui32 evTypeForTracing, const IActor* actor, ui32 poolId, ui32 activityType, + const TActorId& currentRecipient, double eventMs) { + // Event could have been destroyed by actor->Receive(); + const auto baseEv = (ev && ev->HasEvent()) ? ev->GetBase() : nullptr; + LWPROBE(SlowEvent, + poolId, + eventMs, + baseEv ? SafeTypeName(baseEv) : ToString(evTypeForTracing), + currentRecipient.ToString(), + ActorTypeName(actor, activityType)); + } + + template <typename TMailbox> + void TExecutorThread::Execute(TMailbox* mailbox, ui32 hint) { + Y_VERIFY_DEBUG(DyingActors.empty()); + + bool reclaimAsFree = false; + + NHPTimer::STime hpstart = GetCycleCountFast(); + NHPTimer::STime hpprev = hpstart; + + IActor* actor = nullptr; + ui32 prevActivityType = std::numeric_limits<ui32>::max(); + TActorId recipient; + for (ui32 executed = 0; executed < Ctx.EventsPerMailbox; ++executed) { + TAutoPtr<IEventHandle> ev(mailbox->Pop()); + if (!!ev) { + NHPTimer::STime hpnow; + recipient = ev->GetRecipientRewrite(); + if (actor = mailbox->FindActor(recipient.LocalId())) { + TActorContext ctx(*mailbox, *this, hpprev, recipient); + TlsActivationContext = &ctx; + +#ifdef USE_ACTOR_CALLSTACK + TCallstack::GetTlsCallstack() = ev->Callstack; + TCallstack::GetTlsCallstack().SetLinesToSkip(); +#endif + CurrentRecipient = recipient; + CurrentActorScheduledEventsCounter = 0; + + if (executed == 0) { + double usec = Ctx.AddActivationStats(AtomicLoad(&mailbox->ScheduleMoment), hpprev); + if (usec > 500) { + GLOBAL_LWPROBE(ACTORLIB_PROVIDER, SlowActivation, Ctx.PoolId, usec / 1000.0); + } + } + + i64 usecDeliv = Ctx.AddEventDeliveryStats(ev->SendTime, hpprev); + if (usecDeliv > 5000) { + double sinceActivationMs = NHPTimer::GetSeconds(hpprev - hpstart) * 1000.0; + LwTraceSlowDelivery(ev.Get(), actor, Ctx.PoolId, CurrentRecipient, NHPTimer::GetSeconds(hpprev - ev->SendTime) * 1000.0, sinceActivationMs, executed); + } + + ui32 evTypeForTracing = ev->Type; + + ui32 activityType = actor->GetActivityType(); + if (activityType != prevActivityType) { + prevActivityType = activityType; + NProfiling::TMemoryTagScope::Reset(ActorSystem->MemProfActivityBase + activityType); + } + + actor->Receive(ev, ctx); + + size_t dyingActorsCnt = DyingActors.size(); + Ctx.UpdateActorsStats(dyingActorsCnt); + if (dyingActorsCnt) { + DropUnregistered(); + actor = nullptr; + } + + if (mailbox->IsEmpty()) // was not-free and become free, we must reclaim mailbox + reclaimAsFree = true; + + hpnow = GetCycleCountFast(); + NHPTimer::STime elapsed = Ctx.AddEventProcessingStats(hpprev, hpnow, activityType, CurrentActorScheduledEventsCounter); + if (elapsed > 1000000) { + LwTraceSlowEvent(ev.Get(), evTypeForTracing, actor, Ctx.PoolId, activityType, CurrentRecipient, NHPTimer::GetSeconds(elapsed) * 1000.0); + } + + // The actor might have been destroyed + if (actor) + actor->AddElapsedTicks(elapsed); + + CurrentRecipient = TActorId(); + } else { + TAutoPtr<IEventHandle> nonDelivered = ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown); + if (nonDelivered.Get()) { + ActorSystem->Send(nonDelivered); + } else { + Ctx.IncrementNonDeliveredEvents(); + } + hpnow = GetCycleCountFast(); + } + + hpprev = hpnow; + + // Soft preemption in united pool + if (Ctx.SoftDeadlineTs < (ui64)hpnow) { + AtomicStore(&mailbox->ScheduleMoment, hpnow); + Ctx.IncrementMailboxPushedOutBySoftPreemption(); + LWTRACK(MailboxPushedOutBySoftPreemption, + Ctx.Orbit, + Ctx.PoolId, + Ctx.Executor->GetName(), + executed + 1, + CyclesToDuration(hpnow - hpstart), + Ctx.WorkerId, + recipient.ToString(), + SafeTypeName(actor)); + break; + } + + // time limit inside one mailbox passed, let others do some work + if (hpnow - hpstart > (i64)Ctx.TimePerMailboxTs) { + AtomicStore(&mailbox->ScheduleMoment, hpnow); + Ctx.IncrementMailboxPushedOutByTime(); + LWTRACK(MailboxPushedOutByTime, + Ctx.Orbit, + Ctx.PoolId, + Ctx.Executor->GetName(), + executed + 1, + CyclesToDuration(hpnow - hpstart), + Ctx.WorkerId, + recipient.ToString(), + SafeTypeName(actor)); + break; + } + + if (executed + 1 == Ctx.EventsPerMailbox) { + AtomicStore(&mailbox->ScheduleMoment, hpnow); + Ctx.IncrementMailboxPushedOutByEventCount(); + LWTRACK(MailboxPushedOutByEventCount, + Ctx.Orbit, + Ctx.PoolId, + Ctx.Executor->GetName(), + executed + 1, + CyclesToDuration(hpnow - hpstart), + Ctx.WorkerId, + recipient.ToString(), + SafeTypeName(actor)); + break; + } + } else { + if (executed == 0) + Ctx.IncrementEmptyMailboxActivation(); + LWTRACK(MailboxEmpty, + Ctx.Orbit, + Ctx.PoolId, + Ctx.Executor->GetName(), + executed, + CyclesToDuration(GetCycleCountFast() - hpstart), + Ctx.WorkerId, + recipient.ToString(), + SafeTypeName(actor)); + break; // empty queue, leave + } + } + + NProfiling::TMemoryTagScope::Reset(0); + TlsActivationContext = nullptr; + UnlockFromExecution(mailbox, Ctx.Executor, reclaimAsFree, hint, Ctx.WorkerId, RevolvingWriteCounter); + } + + TThreadId TExecutorThread::GetThreadId() const { +#ifdef _linux_ + while (AtomicLoad(&ThreadId) == UnknownThreadId) { + NanoSleep(1000); + } +#endif + return ThreadId; + } + + void* TExecutorThread::ThreadProc() { +#ifdef _linux_ + pid_t tid = syscall(SYS_gettid); + AtomicSet(ThreadId, (ui64)tid); +#endif + +#ifdef BALLOC + ThreadDisableBalloc(); +#endif + + if (ThreadName) { + ::SetCurrentThreadName(ThreadName); + } + + ExecutorPool->SetRealTimeMode(); + TAffinityGuard affinity(ExecutorPool->Affinity()); + + NHPTimer::STime hpnow = GetCycleCountFast(); + NHPTimer::STime hpprev = hpnow; + ui64 execCount = 0; + ui64 readyActivationCount = 0; + i64 execCycles = 0; + i64 nonExecCycles = 0; + + for (;;) { + if (ui32 activation = ExecutorPool->GetReadyActivation(Ctx, ++RevolvingReadCounter)) { + LWTRACK(ActivationBegin, Ctx.Orbit, Ctx.CpuId, Ctx.PoolId, Ctx.WorkerId, NHPTimer::GetSeconds(Ctx.Lease.GetPreciseExpireTs()) * 1e3); + readyActivationCount++; + if (TMailboxHeader* header = Ctx.MailboxTable->Get(activation)) { + if (header->LockForExecution()) { + hpnow = GetCycleCountFast(); + nonExecCycles += hpnow - hpprev; + hpprev = hpnow; + switch (header->Type) { + case TMailboxType::Simple: + Execute(static_cast<TMailboxTable::TSimpleMailbox*>(header), activation); + break; + case TMailboxType::Revolving: + Execute(static_cast<TMailboxTable::TRevolvingMailbox*>(header), activation); + break; + case TMailboxType::HTSwap: + Execute(static_cast<TMailboxTable::THTSwapMailbox*>(header), activation); + break; + case TMailboxType::ReadAsFilled: + Execute(static_cast<TMailboxTable::TReadAsFilledMailbox*>(header), activation); + break; + case TMailboxType::TinyReadAsFilled: + Execute(static_cast<TMailboxTable::TTinyReadAsFilledMailbox*>(header), activation); + break; + } + hpnow = GetCycleCountFast(); + execCycles += hpnow - hpprev; + hpprev = hpnow; + execCount++; + if (execCycles + nonExecCycles > 39000000) { // every 15 ms at 2.6GHz, so 1000 items is 15 sec (solomon interval) + LWPROBE(ExecutorThreadStats, ExecutorPool->PoolId, ExecutorPool->GetName(), Ctx.WorkerId, + execCount, readyActivationCount, + NHPTimer::GetSeconds(execCycles) * 1000.0, NHPTimer::GetSeconds(nonExecCycles) * 1000.0); + execCount = 0; + readyActivationCount = 0; + execCycles = 0; + nonExecCycles = 0; + Ctx.UpdateThreadTime(); + } + } + } + LWTRACK(ActivationEnd, Ctx.Orbit, Ctx.CpuId, Ctx.PoolId, Ctx.WorkerId); + Ctx.Orbit.Reset(); + } else { // no activation means PrepareStop was called so thread must terminate + break; + } + } + return nullptr; + } + + // there must be barrier and check-read with following cas + // or just cas w/o read. + // or queue unlocks must be performed with exchange and not generic write + // TODO: check performance of those options under contention + + // placed here in hope for better compiler optimization + + bool TMailboxHeader::MarkForSchedule() { + AtomicBarrier(); + for (;;) { + const ui32 state = AtomicLoad(&ExecutionState); + switch (state) { + case TExecutionState::Inactive: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::Inactive)) + return true; + break; + case TExecutionState::Scheduled: + return false; + case TExecutionState::Leaving: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::LeavingMarked, TExecutionState::Leaving)) + return true; + break; + case TExecutionState::Executing: + case TExecutionState::LeavingMarked: + return false; + case TExecutionState::Free: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::Free)) + return true; + break; + case TExecutionState::FreeScheduled: + return false; + case TExecutionState::FreeLeaving: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeLeavingMarked, TExecutionState::FreeLeaving)) + return true; + break; + case TExecutionState::FreeExecuting: + case TExecutionState::FreeLeavingMarked: + return false; + default: + Y_FAIL(); + } + } + } + + bool TMailboxHeader::LockForExecution() { + AtomicBarrier(); // strictly speaking here should be AtomicBarrier, but as we got mailboxes from queue - this barrier is already set implicitly and could be removed + for (;;) { + const ui32 state = AtomicLoad(&ExecutionState); + switch (state) { + case TExecutionState::Inactive: + return false; + case TExecutionState::Scheduled: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::Scheduled)) + return true; + break; + case TExecutionState::Leaving: + case TExecutionState::Executing: + case TExecutionState::LeavingMarked: + return false; + case TExecutionState::Free: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeExecuting, TExecutionState::Free)) + return true; + break; + case TExecutionState::FreeScheduled: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeExecuting, TExecutionState::FreeScheduled)) + return true; + break; + case TExecutionState::FreeLeaving: + case TExecutionState::FreeExecuting: + case TExecutionState::FreeLeavingMarked: + return false; + default: + Y_FAIL(); + } + } + } + + bool TMailboxHeader::LockFromFree() { + AtomicBarrier(); + for (;;) { + const ui32 state = AtomicLoad(&ExecutionState); + switch (state) { + case TExecutionState::Inactive: + case TExecutionState::Scheduled: + case TExecutionState::Leaving: + case TExecutionState::Executing: + case TExecutionState::LeavingMarked: + Y_FAIL(); + case TExecutionState::Free: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::Free)) + return true; + break; + case TExecutionState::FreeScheduled: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Executing, TExecutionState::FreeScheduled)) + return true; + break; + case TExecutionState::FreeLeaving: + case TExecutionState::FreeExecuting: + case TExecutionState::FreeLeavingMarked: + return false; + default: + Y_FAIL(); + } + } + } + + void TMailboxHeader::UnlockFromExecution1() { + const ui32 state = AtomicLoad(&ExecutionState); + if (state == TExecutionState::Executing) + AtomicStore(&ExecutionState, (ui32)TExecutionState::Leaving); + else if (state == TExecutionState::FreeExecuting) + AtomicStore(&ExecutionState, (ui32)TExecutionState::FreeLeaving); + else + Y_FAIL(); + AtomicBarrier(); + } + + bool TMailboxHeader::UnlockFromExecution2(bool wouldReschedule) { + AtomicBarrier(); + for (;;) { + const ui32 state = AtomicLoad(&ExecutionState); + switch (state) { + case TExecutionState::Inactive: + case TExecutionState::Scheduled: + Y_FAIL(); + case TExecutionState::Leaving: + if (!wouldReschedule) { + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Inactive, TExecutionState::Leaving)) + return false; + } else { + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::Leaving)) + return true; + } + break; + case TExecutionState::Executing: + Y_FAIL(); + case TExecutionState::LeavingMarked: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Scheduled, TExecutionState::LeavingMarked)) + return true; + break; + case TExecutionState::Free: + case TExecutionState::FreeScheduled: + Y_FAIL(); + case TExecutionState::FreeLeaving: + if (!wouldReschedule) { + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Free, TExecutionState::FreeLeaving)) + return false; + } else { + if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::FreeLeaving)) + return true; + } + break; + case TExecutionState::FreeExecuting: + Y_FAIL(); + case TExecutionState::FreeLeavingMarked: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::FreeLeavingMarked)) + return true; + break; + default: + Y_FAIL(); + } + } + } + + bool TMailboxHeader::UnlockAsFree(bool wouldReschedule) { + AtomicBarrier(); + for (;;) { + const ui32 state = AtomicLoad(&ExecutionState); + switch (state) { + case TExecutionState::Inactive: + case TExecutionState::Scheduled: + Y_FAIL(); + case TExecutionState::Leaving: + if (!wouldReschedule) { + if (AtomicUi32Cas(&ExecutionState, TExecutionState::Free, TExecutionState::Leaving)) + return false; + } else { + if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::Leaving)) + return true; + } + break; + case TExecutionState::Executing: + Y_FAIL(); + case TExecutionState::LeavingMarked: + if (AtomicUi32Cas(&ExecutionState, TExecutionState::FreeScheduled, TExecutionState::LeavingMarked)) + return true; + break; + case TExecutionState::Free: + case TExecutionState::FreeScheduled: + case TExecutionState::FreeLeaving: + case TExecutionState::FreeExecuting: + case TExecutionState::FreeLeavingMarked: + Y_FAIL(); + default: + Y_FAIL(); + } + } + } +} diff --git a/library/cpp/actors/core/executor_thread.h b/library/cpp/actors/core/executor_thread.h new file mode 100644 index 0000000000..9d3c573f0d --- /dev/null +++ b/library/cpp/actors/core/executor_thread.h @@ -0,0 +1,112 @@ +#pragma once + +#include "defs.h" +#include "event.h" +#include "actor.h" +#include "actorsystem.h" +#include "callstack.h" +#include "probes.h" +#include "worker_context.h" + +#include <library/cpp/actors/util/datetime.h> + +#include <util/system/thread.h> + +namespace NActors { + + class TExecutorThread: public ISimpleThread { + public: + static constexpr TDuration DEFAULT_TIME_PER_MAILBOX = + TDuration::MilliSeconds(10); + static constexpr ui32 DEFAULT_EVENTS_PER_MAILBOX = 100; + + TExecutorThread(TWorkerId workerId, + TWorkerId cpuId, + TActorSystem* actorSystem, + IExecutorPool* executorPool, + TMailboxTable* mailboxTable, + const TString& threadName, + TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX, + ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX); + + TExecutorThread(TWorkerId workerId, + TActorSystem* actorSystem, + IExecutorPool* executorPool, + TMailboxTable* mailboxTable, + const TString& threadName, + TDuration timePerMailbox = DEFAULT_TIME_PER_MAILBOX, + ui32 eventsPerMailbox = DEFAULT_EVENTS_PER_MAILBOX) + : TExecutorThread(workerId, 0, actorSystem, executorPool, mailboxTable, threadName, timePerMailbox, eventsPerMailbox) + {} + + TActorId RegisterActor(IActor* actor, TMailboxType::EType mailboxType = TMailboxType::HTSwap, ui32 poolId = Max<ui32>(), + const TActorId& parentId = TActorId()); + TActorId RegisterActor(IActor* actor, TMailboxHeader* mailbox, ui32 hint, const TActorId& parentId = TActorId()); + void UnregisterActor(TMailboxHeader* mailbox, ui64 localActorId); + void DropUnregistered(); + const std::vector<THolder<IActor>>& GetUnregistered() const { return DyingActors; } + + void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); + void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); + void Schedule(TDuration delta, TAutoPtr<IEventHandle> ev, ISchedulerCookie* cookie = nullptr); + + bool Send(TAutoPtr<IEventHandle> ev) { +#ifdef USE_ACTOR_CALLSTACK + ev->Callstack = TCallstack::GetTlsCallstack(); + ev->Callstack.Trace(); +#endif + Ctx.IncrementSentEvents(); + return ActorSystem->Send(ev); + } + + void GetCurrentStats(TExecutorThreadStats& statsCopy) const { + Ctx.GetCurrentStats(statsCopy); + } + + TThreadId GetThreadId() const; // blocks, must be called after Start() + TWorkerId GetWorkerId() const { return Ctx.WorkerId; } + + private: + void* ThreadProc(); + + template <typename TMailbox> + void Execute(TMailbox* mailbox, ui32 hint); + + public: + TActorSystem* const ActorSystem; + + private: + // Pool-specific + IExecutorPool* const ExecutorPool; + + // Event-specific (currently executing) + TVector<THolder<IActor>> DyingActors; + TActorId CurrentRecipient; + ui64 CurrentActorScheduledEventsCounter = 0; + + // Thread-specific + TWorkerContext Ctx; + ui64 RevolvingReadCounter = 0; + ui64 RevolvingWriteCounter = 0; + const TString ThreadName; + volatile TThreadId ThreadId = UnknownThreadId; + }; + + template <typename TMailbox> + void UnlockFromExecution(TMailbox* mailbox, IExecutorPool* executorPool, bool asFree, ui32 hint, TWorkerId workerId, ui64& revolvingWriteCounter) { + mailbox->UnlockFromExecution1(); + const bool needReschedule1 = (nullptr != mailbox->Head()); + if (!asFree) { + if (mailbox->UnlockFromExecution2(needReschedule1)) { + RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); + executorPool->ScheduleActivationEx(hint, ++revolvingWriteCounter); + } + } else { + if (mailbox->UnlockAsFree(needReschedule1)) { + RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); + executorPool->ScheduleActivationEx(hint, ++revolvingWriteCounter); + } + executorPool->ReclaimMailbox(TMailbox::MailboxType, hint, workerId, ++revolvingWriteCounter); + } + } +} diff --git a/library/cpp/actors/core/hfunc.h b/library/cpp/actors/core/hfunc.h new file mode 100644 index 0000000000..26f3c65013 --- /dev/null +++ b/library/cpp/actors/core/hfunc.h @@ -0,0 +1,84 @@ +#pragma once + +#include "actor.h" +#include "executor_thread.h" + +#include <util/system/defaults.h> + +#define HFunc(TEvType, HandleFunc) \ + case TEvType::EventType: { \ + typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \ + HandleFunc(*x, ctx); \ + break; \ + } + +#define hFunc(TEvType, HandleFunc) \ + case TEvType::EventType: { \ + typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \ + HandleFunc(*x); \ + break; \ + } + +#define HFuncTraced(TEvType, HandleFunc) \ + case TEvType::EventType: { \ + TRACE_EVENT_TYPE(Y_STRINGIZE(TEvType)); \ + TEvType::TPtr* x = reinterpret_cast<TEvType::TPtr*>(&ev); \ + HandleFunc(*x, ctx); \ + break; \ + } + +#define hFuncTraced(TEvType, HandleFunc) \ + case TEvType::EventType: { \ + TRACE_EVENT_TYPE(Y_STRINGIZE(TEvType)); \ + typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \ + HandleFunc(*x); \ + break; \ + } + +#define HTemplFunc(TEvType, HandleFunc) \ + case TEvType::EventType: { \ + typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \ + HandleFunc(*x, ctx); \ + break; \ + } + +#define hTemplFunc(TEvType, HandleFunc) \ + case TEvType::EventType: { \ + typename TEvType::TPtr* x = reinterpret_cast<typename TEvType::TPtr*>(&ev); \ + HandleFunc(*x); \ + break; \ + } + +#define SFunc(TEvType, HandleFunc) \ + case TEvType::EventType: \ + HandleFunc(ctx); \ + break; + +#define sFunc(TEvType, HandleFunc) \ + case TEvType::EventType: \ + HandleFunc(); \ + break; + +#define CFunc(TEventType, HandleFunc) \ + case TEventType: \ + HandleFunc(ctx); \ + break; + +#define cFunc(TEventType, HandleFunc) \ + case TEventType: \ + HandleFunc(); \ + break; + +#define FFunc(TEventType, HandleFunc) \ + case TEventType: \ + HandleFunc(ev, ctx); \ + break; + +#define fFunc(TEventType, HandleFunc) \ + case TEventType: \ + HandleFunc(ev); \ + break; + +#define IgnoreFunc(TEvType) \ + case TEvType::EventType: \ + break; diff --git a/library/cpp/actors/core/interconnect.cpp b/library/cpp/actors/core/interconnect.cpp new file mode 100644 index 0000000000..9fb33413b2 --- /dev/null +++ b/library/cpp/actors/core/interconnect.cpp @@ -0,0 +1,170 @@ +#include "interconnect.h" +#include <util/digest/murmur.h> +#include <google/protobuf/text_format.h> + +namespace NActors { + + TNodeLocation::TNodeLocation(const NActorsInterconnect::TNodeLocation& location) { + const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor(); + const NActorsInterconnect::TNodeLocation *locp = &location; + NActorsInterconnect::TNodeLocation temp; // for legacy location case + + // WalleConfig compatibility section + if (locp->HasBody()) { + if (locp == &location) { + temp.CopyFrom(*locp); + locp = &temp; + } + temp.SetUnit(::ToString(temp.GetBody())); + temp.ClearBody(); + } + + // legacy value processing + if (locp->HasDataCenterNum() || locp->HasRoomNum() || locp->HasRackNum() || locp->HasBodyNum()) { + if (locp == &location) { + temp.CopyFrom(*locp); + locp = &temp; + } + LegacyValue = TLegacyValue{temp.GetDataCenterNum(), temp.GetRoomNum(), temp.GetRackNum(), temp.GetBodyNum()}; + temp.ClearDataCenterNum(); + temp.ClearRoomNum(); + temp.ClearRackNum(); + temp.ClearBodyNum(); + + // legacy format must not interfere with new one + const NProtoBuf::Reflection *reflection = temp.GetReflection(); + for (int i = 0, count = descriptor->field_count(); i < count; ++i) { + Y_VERIFY(!reflection->HasField(temp, descriptor->field(i))); + } + + const auto& v = LegacyValue->DataCenter; + const char *p = reinterpret_cast<const char*>(&v); + temp.SetDataCenter(TString(p, strnlen(p, sizeof(ui32)))); + temp.SetModule(::ToString(LegacyValue->Room)); + temp.SetRack(::ToString(LegacyValue->Rack)); + temp.SetUnit(::ToString(LegacyValue->Body)); + } + + auto makeString = [&] { + NProtoBuf::TextFormat::Printer p; + p.SetSingleLineMode(true); + TString s; + p.PrintToString(*locp, &s); + return s; + }; + + // modern format parsing + const NProtoBuf::Reflection *reflection = locp->GetReflection(); + for (int i = 0, count = descriptor->field_count(); i < count; ++i) { + const NProtoBuf::FieldDescriptor *field = descriptor->field(i); + if (reflection->HasField(*locp, field)) { + Y_VERIFY(field->type() == NProtoBuf::FieldDescriptor::TYPE_STRING, "Location# %s", makeString().data()); + Items.emplace_back(TKeys::E(field->number()), reflection->GetString(*locp, field)); + } + } + const NProtoBuf::UnknownFieldSet& unknown = locp->unknown_fields(); + for (int i = 0, count = unknown.field_count(); i < count; ++i) { + const NProtoBuf::UnknownField& field = unknown.field(i); + Y_VERIFY(field.type() == NProtoBuf::UnknownField::TYPE_LENGTH_DELIMITED, "Location# %s", makeString().data()); + Items.emplace_back(TKeys::E(field.number()), field.length_delimited()); + } + std::sort(Items.begin(), Items.end()); + } + + TNodeLocation::TNodeLocation(TFromSerialized, const TString& s) + : TNodeLocation(ParseLocation(s)) + {} + + NActorsInterconnect::TNodeLocation TNodeLocation::ParseLocation(const TString& s) { + NActorsInterconnect::TNodeLocation res; + const bool success = res.ParseFromString(s); + Y_VERIFY(success); + return res; + } + + TString TNodeLocation::ToStringUpTo(TKeys::E upToKey) const { + const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor(); + + TStringBuilder res; + for (const auto& [key, value] : Items) { + if (upToKey < key) { + break; + } + TString name; + if (const NProtoBuf::FieldDescriptor *field = descriptor->FindFieldByNumber(key)) { + name = field->options().GetExtension(NActorsInterconnect::PrintName); + } else { + name = ::ToString(int(key)); + } + if (key != upToKey) { + res << name << "=" << value << "/"; + } else { + res << value; + } + } + return res; + } + + void TNodeLocation::Serialize(NActorsInterconnect::TNodeLocation *pb) const { + const NProtoBuf::Descriptor *descriptor = NActorsInterconnect::TNodeLocation::descriptor(); + const NProtoBuf::Reflection *reflection = pb->GetReflection(); + NProtoBuf::UnknownFieldSet *unknown = pb->mutable_unknown_fields(); + for (const auto& [key, value] : Items) { + if (const NProtoBuf::FieldDescriptor *field = descriptor->FindFieldByNumber(key)) { + reflection->SetString(pb, field, value); + } else { + unknown->AddLengthDelimited(key)->assign(value); + } + } + } + + TString TNodeLocation::GetSerializedLocation() const { + NActorsInterconnect::TNodeLocation pb; + Serialize(&pb); + TString s; + const bool success = pb.SerializeToString(&s); + Y_VERIFY(success); + return s; + } + + TNodeLocation::TLegacyValue TNodeLocation::GetLegacyValue() const { + if (LegacyValue) { + return *LegacyValue; + } + + ui32 dataCenterId = 0, moduleId = 0, rackId = 0, unitId = 0; + + for (const auto& [key, value] : Items) { + switch (key) { + case TKeys::DataCenter: + memcpy(&dataCenterId, value.data(), Min<size_t>(sizeof(dataCenterId), value.length())); + break; + + case TKeys::Module: { + const bool success = TryFromString(value, moduleId); + Y_VERIFY(success); + break; + } + + case TKeys::Rack: + // hacky way to obtain numeric id by a rack name + if (!TryFromString(value, rackId)) { + rackId = MurmurHash<ui32>(value.data(), value.length()); + } + break; + + case TKeys::Unit: { + const bool success = TryFromString(value, unitId); + Y_VERIFY(success); + break; + } + + default: + Y_FAIL("unexpected legacy key# %d", key); + } + } + + return {dataCenterId, moduleId, rackId, unitId}; + } + +} // NActors diff --git a/library/cpp/actors/core/interconnect.h b/library/cpp/actors/core/interconnect.h new file mode 100644 index 0000000000..8d1cbd1e77 --- /dev/null +++ b/library/cpp/actors/core/interconnect.h @@ -0,0 +1,248 @@ +#pragma once + +#include "events.h" +#include "event_local.h" +#include <library/cpp/actors/protos/interconnect.pb.h> +#include <util/string/cast.h> +#include <util/string/builder.h> + +namespace NActors { + class TNodeLocation { + public: + struct TKeys { + enum E : int { + DataCenter = 10, + Module = 20, + Rack = 30, + Unit = 40, + }; + }; + + struct TLegacyValue { + ui32 DataCenter; + ui32 Room; + ui32 Rack; + ui32 Body; + + auto ConvertToTuple() const { return std::make_tuple(DataCenter, Room, Rack, Body); } + + int Compare(const TLegacyValue& other) const { + const auto x = ConvertToTuple(); + const auto y = other.ConvertToTuple(); + if (x < y) { + return -1; + } else if (y < x) { + return 1; + } else { + return 0; + } + } + + friend bool operator ==(const TLegacyValue& x, const TLegacyValue& y) { return x.Compare(y) == 0; } + }; + + private: + std::optional<TLegacyValue> LegacyValue; + std::vector<std::pair<TKeys::E, TString>> Items; + + public: + // generic ctors + TNodeLocation() = default; + TNodeLocation(const TNodeLocation&) = default; + TNodeLocation(TNodeLocation&&) = default; + + // protobuf-parser ctor + explicit TNodeLocation(const NActorsInterconnect::TNodeLocation& location); + + // serialized protobuf ctor + static constexpr struct TFromSerialized {} FromSerialized {}; + TNodeLocation(TFromSerialized, const TString& s); + + // parser helper function + static NActorsInterconnect::TNodeLocation ParseLocation(const TString& s); + + // assignment operators + TNodeLocation& operator =(const TNodeLocation&) = default; + TNodeLocation& operator =(TNodeLocation&&) = default; + + void Serialize(NActorsInterconnect::TNodeLocation *pb) const; + TString GetSerializedLocation() const; + + TString GetDataCenterId() const { return ToStringUpTo(TKeys::DataCenter); } + TString GetModuleId() const { return ToStringUpTo(TKeys::Module); } + TString GetRackId() const { return ToStringUpTo(TKeys::Rack); } + TString ToString() const { return ToStringUpTo(TKeys::E(Max<int>())); } + TString ToStringUpTo(TKeys::E upToKey) const; + + TLegacyValue GetLegacyValue() const; + + const std::vector<std::pair<TKeys::E, TString>>& GetItems() const { return Items; } + + bool HasKey(TKeys::E key) const { + auto comp = [](const auto& p, TKeys::E value) { return p.first < value; }; + const auto it = std::lower_bound(Items.begin(), Items.end(), key, comp); + return it != Items.end() && it->first == key; + } + + int Compare(const TNodeLocation& other) const { + if (LegacyValue || other.LegacyValue) { + return GetLegacyValue().Compare(other.GetLegacyValue()); + } else if (Items < other.Items) { + return -1; + } else if (other.Items < Items) { + return 1; + } else { + return 0; + } + } + + void InheritLegacyValue(const TNodeLocation& other) { + LegacyValue = other.GetLegacyValue(); + } + + friend bool operator ==(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) == 0; } + friend bool operator !=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) != 0; } + friend bool operator < (const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) < 0; } + friend bool operator <=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) <= 0; } + friend bool operator > (const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) > 0; } + friend bool operator >=(const TNodeLocation& x, const TNodeLocation& y) { return x.Compare(y) >= 0; } + }; + + struct TEvInterconnect { + enum EEv { + EvForward = EventSpaceBegin(TEvents::ES_INTERCONNECT), + EvResolveNode, // resolve info about node (internal) + EvNodeAddress, // node info (internal) + EvConnectNode, // request proxy to establish connection (like: we would send something there soon) + EvAcceptIncoming, + EvNodeConnected, // node connected notify + EvNodeDisconnected, // node disconnected notify + EvRegisterNode, + EvRegisterNodeResult, + EvListNodes, + EvNodesInfo, + EvDisconnect, + EvGetNode, + EvNodeInfo, + EvClosePeerSocket, + EvCloseInputSession, + EvPoisonSession, + EvTerminate, + EvEnd + }; + + enum ESubscribes { + SubConnected, + SubDisconnected, + }; + + static_assert(EvEnd < EventSpaceEnd(TEvents::ES_INTERCONNECT), "expect EvEnd < EventSpaceEnd(TEvents::ES_INTERCONNECT)"); + + struct TEvResolveNode; + struct TEvNodeAddress; + + struct TEvConnectNode: public TEventBase<TEvConnectNode, EvConnectNode> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvConnectNode, "TEvInterconnect::TEvConnectNode") + }; + + struct TEvAcceptIncoming; + + struct TEvNodeConnected: public TEventLocal<TEvNodeConnected, EvNodeConnected> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvNodeConnected, "TEvInterconnect::TEvNodeConnected") + TEvNodeConnected(ui32 node) noexcept + : NodeId(node) + { + } + const ui32 NodeId; + }; + + struct TEvNodeDisconnected: public TEventLocal<TEvNodeDisconnected, EvNodeDisconnected> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvNodeDisconnected, "TEvInterconnect::TEvNodeDisconnected") + TEvNodeDisconnected(ui32 node) noexcept + : NodeId(node) + { + } + const ui32 NodeId; + }; + + struct TEvRegisterNode; + struct TEvRegisterNodeResult; + + struct TEvListNodes: public TEventLocal<TEvListNodes, EvListNodes> { + }; + + struct TNodeInfo { + ui32 NodeId; + TString Address; + TString Host; + TString ResolveHost; + ui16 Port; + TNodeLocation Location; + + TNodeInfo() = default; + TNodeInfo(const TNodeInfo&) = default; + TNodeInfo& operator =(const TNodeInfo&) = default; + TNodeInfo(ui32 nodeId, + const TString& address, + const TString& host, + const TString& resolveHost, + ui16 port, + const TNodeLocation& location) + : NodeId(nodeId) + , Address(address) + , Host(host) + , ResolveHost(resolveHost) + , Port(port) + , Location(location) + { + } + + operator ui32() const { + return NodeId; + } + }; + + struct TEvNodesInfo: public TEventLocal<TEvNodesInfo, EvNodesInfo> { + TVector<TNodeInfo> Nodes; + + const TNodeInfo* GetNodeInfo(ui32 nodeId) const { + for (const auto& x : Nodes) { + if (x.NodeId == nodeId) + return &x; + } + return nullptr; + } + }; + + struct TEvDisconnect; + + struct TEvGetNode: public TEventLocal<TEvGetNode, EvGetNode> { + ui32 NodeId; + TInstant Deadline; + + TEvGetNode(ui32 nodeId, TInstant deadline = TInstant::Max()) + : NodeId(nodeId) + , Deadline(deadline) + { + } + }; + + struct TEvNodeInfo: public TEventLocal<TEvNodeInfo, EvNodeInfo> { + TEvNodeInfo(ui32 nodeId) + : NodeId(nodeId) + { + } + + ui32 NodeId; + THolder<TNodeInfo> Node; + }; + + struct TEvClosePeerSocket : TEventLocal<TEvClosePeerSocket, EvClosePeerSocket> {}; + + struct TEvCloseInputSession : TEventLocal<TEvCloseInputSession, EvCloseInputSession> {}; + + struct TEvPoisonSession : TEventLocal<TEvPoisonSession, EvPoisonSession> {}; + + struct TEvTerminate : TEventLocal<TEvTerminate, EvTerminate> {}; + }; +} diff --git a/library/cpp/actors/core/invoke.h b/library/cpp/actors/core/invoke.h new file mode 100644 index 0000000000..931a9767dd --- /dev/null +++ b/library/cpp/actors/core/invoke.h @@ -0,0 +1,110 @@ +#pragma once + +#include "actor_bootstrapped.h" +#include "events.h" +#include "event_local.h" + +#include <any> +#include <type_traits> +#include <utility> +#include <variant> + +#include <util/system/type_name.h> + +namespace NActors { + + struct TEvents::TEvInvokeResult + : TEventLocal<TEvInvokeResult, TSystem::InvokeResult> + { + using TProcessCallback = std::function<void(TEvInvokeResult&, const TActorContext&)>; + TProcessCallback ProcessCallback; + std::variant<std::any /* the value */, std::exception_ptr> Result; + + // This constructor creates TEvInvokeResult with the result of calling callback(args...) or exception_ptr, + // if exception occurs during evaluation. + template<typename TCallback, typename... TArgs> + TEvInvokeResult(TProcessCallback&& process, TCallback&& callback, TArgs&&... args) + : ProcessCallback(std::move(process)) + { + try { + if constexpr (std::is_void_v<std::invoke_result_t<TCallback, TArgs...>>) { + // just invoke callback without saving any value + std::invoke(std::forward<TCallback>(callback), std::forward<TArgs>(args)...); + } else { + Result.emplace<std::any>(std::invoke(std::forward<TCallback>(callback), std::forward<TArgs>(args)...)); + } + } catch (...) { + Result.emplace<std::exception_ptr>(std::current_exception()); + } + } + + void Process(const TActorContext& ctx) { + ProcessCallback(*this, ctx); + } + + template<typename TCallback> + std::invoke_result_t<TCallback, const TActorContext&> GetResult() { + using T = std::invoke_result_t<TCallback, const TActorContext&>; + return std::visit([](auto& arg) -> T { + using TArg = std::decay_t<decltype(arg)>; + if constexpr (std::is_same_v<TArg, std::exception_ptr>) { + std::rethrow_exception(arg); + } else if constexpr (std::is_void_v<T>) { + Y_VERIFY(!arg.has_value()); + } else if (auto *value = std::any_cast<T>(&arg)) { + return std::move(*value); + } else { + Y_FAIL("unspported return type for TEvInvokeResult: actual# %s != expected# %s", + TypeName(arg.type()).data(), TypeName<T>().data()); + } + }, Result); + } + }; + + // Invoke Actor is used to make different procedure calls in specific threads pools. + // + // Actor is created by CreateInvokeActor(callback, complete) where `callback` is the function that will be invoked + // upon actor registration, which will issue then TEvInvokeResult to the parent actor with the result of called + // function. If the called function throws exception, then the exception will arrive in the result. Receiver of + // this message can either handle it by its own means calling ev.GetResult() (which will rethrow exception if it + // has occured in called function or return its return value; notice that when there is no return value, then + // GetResult() should also be called to prevent losing exception), or invoke ev.Process(), which will invoke + // callback provided as `complete` parameter to the CreateInvokeActor function. Complete handler is invoked with + // the result-getter lambda as the first argument and the actor system context as the second one. Result-getter + // should be called to obtain resulting value or exception like the GetResult() method of the TEvInvokeResult event. + // + // Notice that `callback` execution usually occurs in separate actor on separate mailbox and should not use parent + // actor's class. But `complete` handler is invoked in parent context and can use its contents. Do not forget to + // handle TEvInvokeResult event by calling Process/GetResult method, whichever is necessary. + + template<typename TCallback, typename TCompletion, ui32 Activity> + class TInvokeActor : public TActorBootstrapped<TInvokeActor<TCallback, TCompletion, Activity>> { + TCallback Callback; + TCompletion Complete; + + public: + static constexpr auto ActorActivityType() { + return static_cast<IActor::EActorActivity>(Activity); + } + + TInvokeActor(TCallback&& callback, TCompletion&& complete) + : Callback(std::move(callback)) + , Complete(std::move(complete)) + {} + + void Bootstrap(const TActorId& parentId, const TActorContext& ctx) { + auto process = [complete = std::move(Complete)](TEvents::TEvInvokeResult& res, const TActorContext& ctx) { + complete([&] { return res.GetResult<TCallback>(); }, ctx); + }; + ctx.Send(parentId, new TEvents::TEvInvokeResult(std::move(process), std::move(Callback), ctx)); + TActorBootstrapped<TInvokeActor>::Die(ctx); + } + }; + + template<ui32 Activity, typename TCallback, typename TCompletion> + std::unique_ptr<IActor> CreateInvokeActor(TCallback&& callback, TCompletion&& complete) { + return std::make_unique<TInvokeActor<std::decay_t<TCallback>, std::decay_t<TCompletion>, Activity>>( + std::forward<TCallback>(callback), std::forward<TCompletion>(complete)); + } + +} // NActors diff --git a/library/cpp/actors/core/io_dispatcher.cpp b/library/cpp/actors/core/io_dispatcher.cpp new file mode 100644 index 0000000000..90699ff16c --- /dev/null +++ b/library/cpp/actors/core/io_dispatcher.cpp @@ -0,0 +1,234 @@ +#include "io_dispatcher.h" +#include "actor_bootstrapped.h" +#include "hfunc.h" +#include <util/system/mutex.h> +#include <util/system/condvar.h> +#include <util/system/thread.h> +#include <map> +#include <list> + +namespace NActors { + + class TIoDispatcherActor : public TActorBootstrapped<TIoDispatcherActor> { + enum { + EvNotifyThreadStopped = EventSpaceBegin(TEvents::ES_PRIVATE), + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // IO task queue + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class TTask { + TInstant Timestamp; + std::function<void()> Callback; + + public: + TTask(TInstant timestamp, TEvInvokeQuery *ev) + : Timestamp(timestamp) + , Callback(std::move(ev->Callback)) + {} + + void Execute() { + Callback(); + } + + TInstant GetTimestamp() const { + return Timestamp; + } + }; + + class TTaskQueue { + std::list<TTask> Tasks; + TMutex Mutex; + TCondVar CondVar; + size_t NumThreadsToStop = 0; + + public: + void Enqueue(TInstant timestamp, TEvInvokeQuery *ev) { + std::list<TTask> list; + list.emplace_back(timestamp, ev); + with_lock (Mutex) { + Tasks.splice(Tasks.end(), std::move(list)); + } + CondVar.Signal(); + } + + bool Dequeue(std::list<TTask>& list, bool *sendNotify) { + with_lock (Mutex) { + CondVar.Wait(Mutex, [&] { return NumThreadsToStop || !Tasks.empty(); }); + if (NumThreadsToStop) { + *sendNotify = NumThreadsToStop != Max<size_t>(); + if (*sendNotify) { + --NumThreadsToStop; + } + return false; + } else { + list.splice(list.end(), Tasks, Tasks.begin()); + return true; + } + } + } + + void Stop() { + with_lock (Mutex) { + NumThreadsToStop = Max<size_t>(); + } + CondVar.BroadCast(); + } + + void StopOne() { + with_lock (Mutex) { + ++NumThreadsToStop; + Y_VERIFY(NumThreadsToStop); + } + CondVar.Signal(); + } + + std::optional<TInstant> GetEarliestTaskTimestamp() { + with_lock (Mutex) { + return Tasks.empty() ? std::nullopt : std::make_optional(Tasks.front().GetTimestamp()); + } + } + }; + + TTaskQueue TaskQueue; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // IO dispatcher threads + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + class TThread : public ISimpleThread { + TIoDispatcherActor& Actor; + TActorSystem* const ActorSystem; + + public: + TThread(TIoDispatcherActor& actor, TActorSystem *actorSystem) + : Actor(actor) + , ActorSystem(actorSystem) + { + Start(); + } + + void *ThreadProc() override { + SetCurrentThreadName("kikimr IO"); + for (;;) { + std::list<TTask> tasks; + bool sendNotify; + if (!Actor.TaskQueue.Dequeue(tasks, &sendNotify)) { + if (sendNotify) { + ActorSystem->Send(new IEventHandle(EvNotifyThreadStopped, 0, Actor.SelfId(), TActorId(), + nullptr, TThread::CurrentThreadId())); + } + break; + } + for (TTask& task : tasks) { + task.Execute(); + ++*Actor.TasksCompleted; + } + } + return nullptr; + } + }; + + static constexpr size_t MinThreadCount = 4; + static constexpr size_t MaxThreadCount = 64; + std::map<TThread::TId, std::unique_ptr<TThread>> Threads; + size_t NumRunningThreads = 0; + + void StartThread() { + auto thread = std::make_unique<TThread>(*this, TlsActivationContext->ExecutorThread.ActorSystem); + const TThread::TId id = thread->Id(); + Threads.emplace(id, std::move(thread)); + *NumThreads = ++NumRunningThreads; + ++*ThreadsStarted; + } + + void StopThread() { + Y_VERIFY(Threads.size()); + TaskQueue.StopOne(); + *NumThreads = --NumRunningThreads; + ++*ThreadsStopped; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Counters + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + NMonitoring::TDynamicCounters::TCounterPtr NumThreads; + NMonitoring::TDynamicCounters::TCounterPtr TasksAdded; + NMonitoring::TDynamicCounters::TCounterPtr TasksCompleted; + NMonitoring::TDynamicCounters::TCounterPtr ThreadsStarted; + NMonitoring::TDynamicCounters::TCounterPtr ThreadsStopped; + + public: + TIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters) + : NumThreads(counters->GetCounter("NumThreads")) + , TasksAdded(counters->GetCounter("TasksAdded", true)) + , TasksCompleted(counters->GetCounter("TasksCompleted", true)) + , ThreadsStarted(counters->GetCounter("ThreadsStarted", true)) + , ThreadsStopped(counters->GetCounter("ThreadsStopped", true)) + {} + + ~TIoDispatcherActor() override { + TaskQueue.Stop(); + } + + void Bootstrap() { + while (NumRunningThreads < MinThreadCount) { + StartThread(); + } + HandleWakeup(); + Become(&TThis::StateFunc); + } + + void HandleThreadStopped(TAutoPtr<IEventHandle> ev) { + auto it = Threads.find(ev->Cookie); + Y_VERIFY(it != Threads.end()); + it->second->Join(); + Threads.erase(it); + } + + void Handle(TEvInvokeQuery::TPtr ev) { + ++*TasksAdded; + TaskQueue.Enqueue(TActivationContext::Now(), ev->Get()); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Thread usage counter logic + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + std::optional<TInstant> IdleTimestamp; + static constexpr TDuration ThreadStartTime = TDuration::MilliSeconds(500); + static constexpr TDuration ThreadStopTime = TDuration::MilliSeconds(500); + + void HandleWakeup() { + const TInstant now = TActivationContext::Now(); + std::optional<TInstant> earliest = TaskQueue.GetEarliestTaskTimestamp(); + if (earliest) { + if (now >= *earliest + ThreadStartTime && NumRunningThreads < MaxThreadCount) { + StartThread(); + } + IdleTimestamp.reset(); + } else if (!IdleTimestamp) { + IdleTimestamp = now; + } else if (now >= *IdleTimestamp + ThreadStopTime) { + IdleTimestamp.reset(); + if (NumRunningThreads > MinThreadCount) { + StopThread(); + } + } + Schedule(TDuration::MilliSeconds(100), new TEvents::TEvWakeup); + } + + STRICT_STFUNC(StateFunc, { + fFunc(EvNotifyThreadStopped, HandleThreadStopped); + hFunc(TEvInvokeQuery, Handle); + cFunc(TEvents::TSystem::Wakeup, HandleWakeup); + cFunc(TEvents::TSystem::Poison, PassAway); + }) + }; + + IActor *CreateIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters) { + return new TIoDispatcherActor(counters); + } + +} // NActors diff --git a/library/cpp/actors/core/io_dispatcher.h b/library/cpp/actors/core/io_dispatcher.h new file mode 100644 index 0000000000..b0e4e60d1a --- /dev/null +++ b/library/cpp/actors/core/io_dispatcher.h @@ -0,0 +1,38 @@ +#pragma once + +#include "actor.h" +#include "event_local.h" +#include "events.h" +#include "actorsystem.h" +#include "executor_thread.h" +#include "executelater.h" + +namespace NActors { + + struct TEvInvokeQuery : TEventLocal<TEvInvokeQuery, TEvents::TSystem::InvokeQuery> { + std::function<void()> Callback; + + TEvInvokeQuery(std::function<void()>&& callback) + : Callback(std::move(callback)) + {} + }; + + inline TActorId MakeIoDispatcherActorId() { + return TActorId(0, TStringBuf("IoDispatcher", 12)); + } + + extern IActor *CreateIoDispatcherActor(const NMonitoring::TDynamicCounterPtr& counters); + + /* InvokeIoCallback enqueues callback() to be executed in IO thread pool and then return result in TEvInvokeResult + * message to parentId actor. + */ + template<typename TCallback> + static void InvokeIoCallback(TCallback&& callback, ui32 poolId, IActor::EActivityType activityType) { + if (!TActivationContext::Send(new IEventHandle(MakeIoDispatcherActorId(), TActorId(), + new TEvInvokeQuery(callback)))) { + TActivationContext::Register(CreateExecuteLaterActor(std::move(callback), activityType), TActorId(), + TMailboxType::HTSwap, poolId); + } + } + +} // NActors diff --git a/library/cpp/actors/core/lease.h b/library/cpp/actors/core/lease.h new file mode 100644 index 0000000000..650ae7b122 --- /dev/null +++ b/library/cpp/actors/core/lease.h @@ -0,0 +1,56 @@ +#pragma once + +#include "defs.h" + +namespace NActors { + // Value representing specific worker's permission for exclusive use of CPU till specific deadline + struct TLease { + // Lower WorkerBits store current fast worker id + // All other higher bits store expiration (hard preemption) timestamp + using TValue = ui64; + TValue Value; + + static constexpr ui64 WorkerIdMask = ui64((1ull << WorkerBits) - 1); + static constexpr ui64 ExpireTsMask = ~WorkerIdMask; + + explicit constexpr TLease(ui64 value) + : Value(value) + {} + + constexpr TLease(TWorkerId workerId, ui64 expireTs) + : Value((workerId & WorkerIdMask) | (expireTs & ExpireTsMask)) + {} + + TWorkerId GetWorkerId() const { + return Value & WorkerIdMask; + } + + TLease NeverExpire() const { + return TLease(Value | ExpireTsMask); + } + + bool IsNeverExpiring() const { + return (Value & ExpireTsMask) == ExpireTsMask; + } + + ui64 GetExpireTs() const { + // Do not truncate worker id + // NOTE: it decrease accuracy, but improves performance + return Value; + } + + ui64 GetPreciseExpireTs() const { + return Value & ExpireTsMask; + } + + operator TValue() const { + return Value; + } + }; + + // Special expire timestamp values + static constexpr ui64 NeverExpire = ui64(-1); + + // Special hard-preemption-in-progress lease + static constexpr TLease HardPreemptionLease = TLease(TLease::WorkerIdMask, NeverExpire); +} diff --git a/library/cpp/actors/core/log.cpp b/library/cpp/actors/core/log.cpp new file mode 100644 index 0000000000..5f63b5af58 --- /dev/null +++ b/library/cpp/actors/core/log.cpp @@ -0,0 +1,753 @@ +#include "log.h" +#include "log_settings.h" + +#include <library/cpp/monlib/service/pages/templates.h> + +static_assert(int(NActors::NLog::PRI_EMERG) == int(::TLOG_EMERG), "expect int(NActors::NLog::PRI_EMERG) == int(::TLOG_EMERG)"); +static_assert(int(NActors::NLog::PRI_ALERT) == int(::TLOG_ALERT), "expect int(NActors::NLog::PRI_ALERT) == int(::TLOG_ALERT)"); +static_assert(int(NActors::NLog::PRI_CRIT) == int(::TLOG_CRIT), "expect int(NActors::NLog::PRI_CRIT) == int(::TLOG_CRIT)"); +static_assert(int(NActors::NLog::PRI_ERROR) == int(::TLOG_ERR), "expect int(NActors::NLog::PRI_ERROR) == int(::TLOG_ERR)"); +static_assert(int(NActors::NLog::PRI_WARN) == int(::TLOG_WARNING), "expect int(NActors::NLog::PRI_WARN) == int(::TLOG_WARNING)"); +static_assert(int(NActors::NLog::PRI_NOTICE) == int(::TLOG_NOTICE), "expect int(NActors::NLog::PRI_NOTICE) == int(::TLOG_NOTICE)"); +static_assert(int(NActors::NLog::PRI_INFO) == int(::TLOG_INFO), "expect int(NActors::NLog::PRI_INFO) == int(::TLOG_INFO)"); +static_assert(int(NActors::NLog::PRI_DEBUG) == int(::TLOG_DEBUG), "expect int(NActors::NLog::PRI_DEBUG) == int(::TLOG_DEBUG)"); +static_assert(int(NActors::NLog::PRI_TRACE) == int(::TLOG_RESOURCES), "expect int(NActors::NLog::PRI_TRACE) == int(::TLOG_RESOURCES)"); + +namespace { + struct TRecordWithNewline { + ELogPriority Priority; + TTempBuf Buf; + + TRecordWithNewline(const TLogRecord& rec) + : Priority(rec.Priority) + , Buf(rec.Len + 1) + { + Buf.Append(rec.Data, rec.Len); + *Buf.Proceed(1) = '\n'; + } + + operator TLogRecord() const { + return TLogRecord(Priority, Buf.Data(), Buf.Filled()); + } + }; +} + +namespace NActors { + + class TLoggerCounters : public ILoggerMetrics { + public: + TLoggerCounters(TIntrusivePtr<NMonitoring::TDynamicCounters> counters) + : DynamicCounters(counters) + { + ActorMsgs_ = DynamicCounters->GetCounter("ActorMsgs", true); + DirectMsgs_ = DynamicCounters->GetCounter("DirectMsgs", true); + LevelRequests_ = DynamicCounters->GetCounter("LevelRequests", true); + IgnoredMsgs_ = DynamicCounters->GetCounter("IgnoredMsgs", true); + DroppedMsgs_ = DynamicCounters->GetCounter("DroppedMsgs", true); + + AlertMsgs_ = DynamicCounters->GetCounter("AlertMsgs", true); + EmergMsgs_ = DynamicCounters->GetCounter("EmergMsgs", true); + } + + ~TLoggerCounters() = default; + + void IncActorMsgs() override { + ++*ActorMsgs_; + } + void IncDirectMsgs() override { + ++*DirectMsgs_; + } + void IncLevelRequests() override { + ++*LevelRequests_; + } + void IncIgnoredMsgs() override { + ++*IgnoredMsgs_; + } + void IncAlertMsgs() override { + ++*AlertMsgs_; + } + void IncEmergMsgs() override { + ++*EmergMsgs_; + } + void IncDroppedMsgs() override { + DroppedMsgs_->Inc(); + }; + + void GetOutputHtml(IOutputStream& str) override { + HTML(str) { + DIV_CLASS("row") { + DIV_CLASS("col-md-12") { + H4() { + str << "Counters" << Endl; + } + DynamicCounters->OutputHtml(str); + } + } + } + } + + private: + NMonitoring::TDynamicCounters::TCounterPtr ActorMsgs_; + NMonitoring::TDynamicCounters::TCounterPtr DirectMsgs_; + NMonitoring::TDynamicCounters::TCounterPtr LevelRequests_; + NMonitoring::TDynamicCounters::TCounterPtr IgnoredMsgs_; + NMonitoring::TDynamicCounters::TCounterPtr AlertMsgs_; + NMonitoring::TDynamicCounters::TCounterPtr EmergMsgs_; + // Dropped while the logger backend was unavailable + NMonitoring::TDynamicCounters::TCounterPtr DroppedMsgs_; + + TIntrusivePtr<NMonitoring::TDynamicCounters> DynamicCounters; + }; + + class TLoggerMetrics : public ILoggerMetrics { + public: + TLoggerMetrics(std::shared_ptr<NMonitoring::TMetricRegistry> metrics) + : Metrics(metrics) + { + ActorMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.actor_msgs"}}); + DirectMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.direct_msgs"}}); + LevelRequests_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.level_requests"}}); + IgnoredMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.ignored_msgs"}}); + DroppedMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.dropped_msgs"}}); + + AlertMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.alert_msgs"}}); + EmergMsgs_ = Metrics->Rate(NMonitoring::TLabels{{"sensor", "logger.emerg_msgs"}}); + } + + ~TLoggerMetrics() = default; + + void IncActorMsgs() override { + ActorMsgs_->Inc(); + } + void IncDirectMsgs() override { + DirectMsgs_->Inc(); + } + void IncLevelRequests() override { + LevelRequests_->Inc(); + } + void IncIgnoredMsgs() override { + IgnoredMsgs_->Inc(); + } + void IncAlertMsgs() override { + AlertMsgs_->Inc(); + } + void IncEmergMsgs() override { + EmergMsgs_->Inc(); + } + void IncDroppedMsgs() override { + DroppedMsgs_->Inc(); + }; + + void GetOutputHtml(IOutputStream& str) override { + HTML(str) { + DIV_CLASS("row") { + DIV_CLASS("col-md-12") { + H4() { + str << "Metrics" << Endl; + } + // TODO: Now, TMetricRegistry does not have the GetOutputHtml function + } + } + } + } + + private: + NMonitoring::TRate* ActorMsgs_; + NMonitoring::TRate* DirectMsgs_; + NMonitoring::TRate* LevelRequests_; + NMonitoring::TRate* IgnoredMsgs_; + NMonitoring::TRate* AlertMsgs_; + NMonitoring::TRate* EmergMsgs_; + // Dropped while the logger backend was unavailable + NMonitoring::TRate* DroppedMsgs_; + + std::shared_ptr<NMonitoring::TMetricRegistry> Metrics; + }; + + TAtomic TLoggerActor::IsOverflow = 0; + + TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, + TAutoPtr<TLogBackend> logBackend, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters) + : TActor(&TLoggerActor::StateFunc) + , Settings(settings) + , LogBackend(logBackend.Release()) + , Metrics(std::make_unique<TLoggerCounters>(counters)) + { + } + + TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, + std::shared_ptr<TLogBackend> logBackend, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters) + : TActor(&TLoggerActor::StateFunc) + , Settings(settings) + , LogBackend(logBackend) + , Metrics(std::make_unique<TLoggerCounters>(counters)) + { + } + + TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, + TAutoPtr<TLogBackend> logBackend, + std::shared_ptr<NMonitoring::TMetricRegistry> metrics) + : TActor(&TLoggerActor::StateFunc) + , Settings(settings) + , LogBackend(logBackend.Release()) + , Metrics(std::make_unique<TLoggerMetrics>(metrics)) + { + } + + TLoggerActor::TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, + std::shared_ptr<TLogBackend> logBackend, + std::shared_ptr<NMonitoring::TMetricRegistry> metrics) + : TActor(&TLoggerActor::StateFunc) + , Settings(settings) + , LogBackend(logBackend) + , Metrics(std::make_unique<TLoggerMetrics>(metrics)) + { + } + + TLoggerActor::~TLoggerActor() { + } + + void TLoggerActor::Log(TInstant time, NLog::EPriority priority, NLog::EComponent component, const char* c, ...) { + Metrics->IncDirectMsgs(); + if (Settings && Settings->Satisfies(priority, component, 0ull)) { + va_list params; + va_start(params, c); + TString formatted; + vsprintf(formatted, c, params); + + auto ok = OutputRecord(time, NLog::EPrio(priority), component, formatted); + Y_UNUSED(ok); + va_end(params); + } + } + + void TLoggerActor::Throttle(const NLog::TSettings& settings) { + if (AtomicGet(IsOverflow)) + Sleep(settings.ThrottleDelay); + } + + void TLoggerActor::LogIgnoredCount(TInstant now) { + TString message = Sprintf("Ignored IgnoredCount# %" PRIu64 " log records due to logger overflow!", IgnoredCount); + if (!OutputRecord(now, NActors::NLog::EPrio::Error, Settings->LoggerComponent, message)) { + BecomeDefunct(); + } + } + + void TLoggerActor::HandleIgnoredEvent(TLogIgnored::TPtr& ev, const NActors::TActorContext& ctx) { + Y_UNUSED(ev); + LogIgnoredCount(ctx.Now()); + IgnoredCount = 0; + PassedCount = 0; + } + + void TLoggerActor::HandleIgnoredEventDrop() { + // logger backend is unavailable, just ignore + } + + void TLoggerActor::WriteMessageStat(const NLog::TEvLog& ev) { + Metrics->IncActorMsgs(); + + const auto prio = ev.Level.ToPrio(); + + switch (prio) { + case ::NActors::NLog::EPrio::Alert: + Metrics->IncAlertMsgs(); + break; + case ::NActors::NLog::EPrio::Emerg: + Metrics->IncEmergMsgs(); + break; + default: + break; + } + + } + + void TLoggerActor::HandleLogEvent(NLog::TEvLog::TPtr& ev, const NActors::TActorContext& ctx) { + i64 delayMillisec = (ctx.Now() - ev->Get()->Stamp).MilliSeconds(); + WriteMessageStat(*ev->Get()); + if (Settings->AllowDrop) { + // Disable throttling if it was enabled previously + if (AtomicGet(IsOverflow)) + AtomicSet(IsOverflow, 0); + + // Check if some records have to be dropped + if ((PassedCount > 10 && delayMillisec > (i64)Settings->TimeThresholdMs) || IgnoredCount > 0) { + Metrics->IncIgnoredMsgs(); + if (IgnoredCount == 0) { + ctx.Send(ctx.SelfID, new TLogIgnored()); + } + ++IgnoredCount; + PassedCount = 0; + return; + } + PassedCount++; + } else { + // Enable of disable throttling depending on the load + if (delayMillisec > (i64)Settings->TimeThresholdMs && !AtomicGet(IsOverflow)) + AtomicSet(IsOverflow, 1); + else if (delayMillisec <= (i64)Settings->TimeThresholdMs && AtomicGet(IsOverflow)) + AtomicSet(IsOverflow, 0); + } + + const auto prio = ev->Get()->Level.ToPrio(); + if (!OutputRecord(ev->Get()->Stamp, prio, ev->Get()->Component, ev->Get()->Line)) { + BecomeDefunct(); + } + } + + void TLoggerActor::BecomeDefunct() { + Become(&TThis::StateDefunct); + Schedule(WakeupInterval, new TEvents::TEvWakeup); + } + + void TLoggerActor::HandleLogComponentLevelRequest(TLogComponentLevelRequest::TPtr& ev, const NActors::TActorContext& ctx) { + Metrics->IncLevelRequests(); + TString explanation; + int code = Settings->SetLevel(ev->Get()->Priority, ev->Get()->Component, explanation); + ctx.Send(ev->Sender, new TLogComponentLevelResponse(code, explanation)); + } + + void TLoggerActor::RenderComponentPriorities(IOutputStream& str) { + using namespace NLog; + HTML(str) { + H4() { + str << "Priority Settings for the Components"; + } + TABLE_SORTABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() { + str << "Component"; + } + TABLEH() { + str << "Level"; + } + TABLEH() { + str << "Sampling Level"; + } + TABLEH() { + str << "Sampling Rate"; + } + } + } + TABLEBODY() { + for (EComponent i = Settings->MinVal; i < Settings->MaxVal; i++) { + auto name = Settings->ComponentName(i); + if (!*name) + continue; + NLog::TComponentSettings componentSettings = Settings->GetComponentSettings(i); + + TABLER() { + TABLED() { + str << "<a href='logger?c=" << i << "'>" << name << "</a>"; + } + TABLED() { + str << PriorityToString(EPrio(componentSettings.Raw.X.Level)); + } + TABLED() { + str << PriorityToString(EPrio(componentSettings.Raw.X.SamplingLevel)); + } + TABLED() { + str << componentSettings.Raw.X.SamplingRate; + } + } + } + } + } + } + } + + /* + * Logger INFO: + * 1. Current priority settings from components + * 2. Number of log messages (via actors events, directly) + * 3. Number of messages per components, per priority + * 4. Log level changes (last N changes) + */ + void TLoggerActor::HandleMonInfo(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { + const auto& params = ev->Get()->Request.GetParams(); + NLog::EComponent component = NLog::InvalidComponent; + NLog::EPriority priority = NLog::PRI_DEBUG; + NLog::EPriority samplingPriority = NLog::PRI_DEBUG; + ui32 samplingRate = 0; + bool hasComponent = false; + bool hasPriority = false; + bool hasSamplingPriority = false; + bool hasSamplingRate = false; + bool hasAllowDrop = false; + int allowDrop = 0; + if (params.Has("c")) { + if (TryFromString(params.Get("c"), component) && (component == NLog::InvalidComponent || Settings->IsValidComponent(component))) { + hasComponent = true; + if (params.Has("p")) { + int rawPriority; + if (TryFromString(params.Get("p"), rawPriority) && NLog::TSettings::IsValidPriority((NLog::EPriority)rawPriority)) { + priority = (NLog::EPriority)rawPriority; + hasPriority = true; + } + } + if (params.Has("sp")) { + int rawPriority; + if (TryFromString(params.Get("sp"), rawPriority) && NLog::TSettings::IsValidPriority((NLog::EPriority)rawPriority)) { + samplingPriority = (NLog::EPriority)rawPriority; + hasSamplingPriority = true; + } + } + if (params.Has("sr")) { + if (TryFromString(params.Get("sr"), samplingRate)) { + hasSamplingRate = true; + } + } + } + } + if (params.Has("allowdrop")) { + if (TryFromString(params.Get("allowdrop"), allowDrop)) { + hasAllowDrop = true; + } + } + + TStringStream str; + if (hasComponent && !hasPriority && !hasSamplingPriority && !hasSamplingRate) { + NLog::TComponentSettings componentSettings = Settings->GetComponentSettings(component); + ui32 samplingRate = componentSettings.Raw.X.SamplingRate; + HTML(str) { + DIV_CLASS("row") { + DIV_CLASS("col-md-12") { + H4() { + str << "Current log settings for " << Settings->ComponentName(component) << Endl; + } + UL() { + LI() { + str << "Priority: " + << NLog::PriorityToString(NLog::EPrio(componentSettings.Raw.X.Level)); + } + LI() { + str << "Sampling priority: " + << NLog::PriorityToString(NLog::EPrio(componentSettings.Raw.X.SamplingLevel)); + } + LI() { + str << "Sampling rate: " + << samplingRate; + } + } + } + } + + DIV_CLASS("row") { + DIV_CLASS("col-md-12") { + H4() { + str << "Change priority" << Endl; + } + UL() { + for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) { + LI() { + str << "<a href='logger?c=" << component << "&p=" << p << "'>" + << NLog::PriorityToString(NLog::EPrio(p)) << "</a>"; + } + } + } + H4() { + str << "Change sampling priority" << Endl; + } + UL() { + for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) { + LI() { + str << "<a href='logger?c=" << component << "&sp=" << p << "'>" + << NLog::PriorityToString(NLog::EPrio(p)) << "</a>"; + } + } + } + H4() { + str << "Change sampling rate" << Endl; + } + str << "<form method=\"GET\">" << Endl; + str << "Rate: <input type=\"number\" name=\"sr\" value=\"" << samplingRate << "\"/>" << Endl; + str << "<input type=\"hidden\" name=\"c\" value=\"" << component << "\">" << Endl; + str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"Change\"/>" << Endl; + str << "</form>" << Endl; + H4() { + str << "<a href='logger'>Cancel</a>" << Endl; + } + } + } + } + + } else { + TString explanation; + if (hasComponent && hasPriority) { + Settings->SetLevel(priority, component, explanation); + } + if (hasComponent && hasSamplingPriority) { + Settings->SetSamplingLevel(samplingPriority, component, explanation); + } + if (hasComponent && hasSamplingRate) { + Settings->SetSamplingRate(samplingRate, component, explanation); + } + if (hasAllowDrop) { + Settings->SetAllowDrop(allowDrop); + } + + HTML(str) { + if (!explanation.empty()) { + DIV_CLASS("row") { + DIV_CLASS("col-md-12 alert alert-info") { + str << explanation; + } + } + } + + DIV_CLASS("row") { + DIV_CLASS("col-md-6") { + RenderComponentPriorities(str); + } + DIV_CLASS("col-md-6") { + H4() { + str << "Change priority for all components"; + } + TABLE_CLASS("table table-condensed") { + TABLEHEAD() { + TABLER() { + TABLEH() { + str << "Priority"; + } + } + } + TABLEBODY() { + for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) { + TABLER() { + TABLED() { + str << "<a href = 'logger?c=-1&p=" << p << "'>" + << NLog::PriorityToString(NLog::EPrio(p)) << "</a>"; + } + } + } + } + } + H4() { + str << "Change sampling priority for all components"; + } + TABLE_CLASS("table table-condensed") { + TABLEHEAD() { + TABLER() { + TABLEH() { + str << "Priority"; + } + } + } + TABLEBODY() { + for (int p = NLog::PRI_EMERG; p <= NLog::PRI_TRACE; ++p) { + TABLER() { + TABLED() { + str << "<a href = 'logger?c=-1&sp=" << p << "'>" + << NLog::PriorityToString(NLog::EPrio(p)) << "</a>"; + } + } + } + } + } + H4() { + str << "Change sampling rate for all components"; + } + str << "<form method=\"GET\">" << Endl; + str << "Rate: <input type=\"number\" name=\"sr\" value=\"0\"/>" << Endl; + str << "<input type=\"hidden\" name=\"c\" value=\"-1\">" << Endl; + str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"Change\"/>" << Endl; + str << "</form>" << Endl; + H4() { + str << "Drop log entries in case of overflow: " + << (Settings->AllowDrop ? "Enabled" : "Disabled"); + } + str << "<form method=\"GET\">" << Endl; + str << "<input type=\"hidden\" name=\"allowdrop\" value=\"" << (Settings->AllowDrop ? "0" : "1") << "\"/>" << Endl; + str << "<input class=\"btn btn-primary\" type=\"submit\" value=\"" << (Settings->AllowDrop ? "Disable" : "Enable") << "\"/>" << Endl; + str << "</form>" << Endl; + } + } + Metrics->GetOutputHtml(str); + } + } + + ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str())); + } + + constexpr size_t TimeBufSize = 512; + + bool TLoggerActor::OutputRecord(TInstant time, NLog::EPrio priority, NLog::EComponent component, + const TString& formatted) noexcept try { + const auto logPrio = ::ELogPriority(ui16(priority)); + + char buf[TimeBufSize]; + switch (Settings->Format) { + case NActors::NLog::TSettings::PLAIN_FULL_FORMAT: { + TStringBuilder logRecord; + if (Settings->UseLocalTimestamps) { + logRecord << FormatLocalTimestamp(time, buf); + } else { + logRecord << time; + } + logRecord + << Settings->MessagePrefix + << " :" << Settings->ComponentName(component) + << " " << PriorityToString(priority) + << ": " << formatted; + LogBackend->WriteData( + TLogRecord(logPrio, logRecord.data(), logRecord.size())); + } break; + + case NActors::NLog::TSettings::PLAIN_SHORT_FORMAT: { + TStringBuilder logRecord; + logRecord + << Settings->ComponentName(component) + << ": " << formatted; + LogBackend->WriteData( + TLogRecord(logPrio, logRecord.data(), logRecord.size())); + } break; + + case NActors::NLog::TSettings::JSON_FORMAT: { + NJsonWriter::TBuf json; + json.BeginObject() + .WriteKey("@timestamp") + .WriteString(Settings->UseLocalTimestamps ? FormatLocalTimestamp(time, buf) : time.ToString().data()) + .WriteKey("microseconds") + .WriteULongLong(time.MicroSeconds()) + .WriteKey("host") + .WriteString(Settings->ShortHostName) + .WriteKey("cluster") + .WriteString(Settings->ClusterName) + .WriteKey("priority") + .WriteString(PriorityToString(priority)) + .WriteKey("npriority") + .WriteInt((int)priority) + .WriteKey("component") + .WriteString(Settings->ComponentName(component)) + .WriteKey("tag") + .WriteString("KIKIMR") + .WriteKey("revision") + .WriteInt(GetProgramSvnRevision()) + .WriteKey("message") + .WriteString(formatted) + .EndObject(); + auto logRecord = json.Str(); + LogBackend->WriteData( + TLogRecord(logPrio, logRecord.data(), logRecord.size())); + } break; + } + + return true; + } catch (...) { + return false; + } + + void TLoggerActor::HandleLogEventDrop(const NLog::TEvLog::TPtr& ev) { + WriteMessageStat(*ev->Get()); + Metrics->IncDroppedMsgs(); + } + + void TLoggerActor::HandleWakeup() { + Become(&TThis::StateFunc); + } + + const char* TLoggerActor::FormatLocalTimestamp(TInstant time, char* buf) { + struct tm localTime; + time.LocalTime(&localTime); + int r = strftime(buf, TimeBufSize, "%Y-%m-%d-%H-%M-%S", &localTime); + Y_VERIFY(r != 0); + return buf; + } + + TAutoPtr<TLogBackend> CreateSysLogBackend(const TString& ident, + bool logPError, bool logCons) { + int flags = 0; + if (logPError) + flags |= TSysLogBackend::LogPerror; + if (logCons) + flags |= TSysLogBackend::LogCons; + + return new TSysLogBackend(ident.data(), TSysLogBackend::TSYSLOG_LOCAL1, flags); + } + + class TStderrBackend: public TLogBackend { + public: + TStderrBackend() { + } + void WriteData(const TLogRecord& rec) override { +#ifdef _MSC_VER + if (IsDebuggerPresent()) { + TString x; + x.reserve(rec.Len + 2); + x.append(rec.Data, rec.Len); + x.append('\n'); + OutputDebugString(x.c_str()); + } +#endif + bool isOk = false; + do { + try { + TRecordWithNewline r(rec); + Cerr.Write(r.Buf.Data(), r.Buf.Filled()); + isOk = true; + } catch (TSystemError err) { + // Interrupted system call + Y_UNUSED(err); + } + } while (!isOk); + } + + void ReopenLog() override { + } + + private: + const TString Indent; + }; + + class TLineFileLogBackend: public TFileLogBackend { + public: + TLineFileLogBackend(const TString& path) + : TFileLogBackend(path) + { + } + + // Append newline after every record + void WriteData(const TLogRecord& rec) override { + TFileLogBackend::WriteData(TRecordWithNewline(rec)); + } + }; + + class TCompositeLogBackend: public TLogBackend { + public: + TCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends) + : UnderlyingBackends(std::move(underlyingBackends)) + { + } + + void WriteData(const TLogRecord& rec) override { + for (auto& b: UnderlyingBackends) { + b->WriteData(rec); + } + } + + void ReopenLog() override { + } + + private: + TVector<TAutoPtr<TLogBackend>> UnderlyingBackends; + }; + + TAutoPtr<TLogBackend> CreateStderrBackend() { + return new TStderrBackend(); + } + + TAutoPtr<TLogBackend> CreateFileBackend(const TString& fileName) { + return new TLineFileLogBackend(fileName); + } + + TAutoPtr<TLogBackend> CreateNullBackend() { + return new TNullLogBackend(); + } + + TAutoPtr<TLogBackend> CreateCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends) { + return new TCompositeLogBackend(std::move(underlyingBackends)); + } +} diff --git a/library/cpp/actors/core/log.h b/library/cpp/actors/core/log.h new file mode 100644 index 0000000000..c11a7cf3c1 --- /dev/null +++ b/library/cpp/actors/core/log.h @@ -0,0 +1,369 @@ +#pragma once + +#include "defs.h" + +#include "log_iface.h" +#include "log_settings.h" +#include "actorsystem.h" +#include "events.h" +#include "event_local.h" +#include "hfunc.h" +#include "mon.h" + +#include <util/generic/vector.h> +#include <util/string/printf.h> +#include <util/string/builder.h> +#include <library/cpp/logger/all.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/monlib/metrics/metric_registry.h> +#include <library/cpp/json/writer/json.h> +#include <library/cpp/svnversion/svnversion.h> + +#include <library/cpp/actors/memory_log/memlog.h> + +// TODO: limit number of messages per second +// TODO: make TLogComponentLevelRequest/Response network messages + +#define IS_LOG_PRIORITY_ENABLED(actorCtxOrSystem, priority, component) \ + (static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings()) && \ + static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings())->Satisfies( \ + static_cast<::NActors::NLog::EPriority>(priority), \ + static_cast<::NActors::NLog::EComponent>(component), \ + 0ull) \ + ) + +#define LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, ...) \ + do { \ + ::NActors::NLog::TSettings* mSettings = static_cast<::NActors::NLog::TSettings*>((actorCtxOrSystem).LoggerSettings()); \ + ::NActors::NLog::EPriority mPriority = static_cast<::NActors::NLog::EPriority>(priority); \ + ::NActors::NLog::EComponent mComponent = static_cast<::NActors::NLog::EComponent>(component); \ + if (mSettings && mSettings->Satisfies(mPriority, mComponent, sampleBy)) { \ + ::NActors::MemLogAdapter( \ + actorCtxOrSystem, priority, component, __VA_ARGS__); \ + } \ + } while (0) /**/ + +#define LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, stream) \ + LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, sampleBy, "%s", [&]() { \ + TStringBuilder logStringBuilder; \ + logStringBuilder << stream; \ + return static_cast<TString>(logStringBuilder); \ + }().data()) + +#define LOG_LOG(actorCtxOrSystem, priority, component, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, priority, component, 0ull, __VA_ARGS__) +#define LOG_LOG_S(actorCtxOrSystem, priority, component, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, priority, component, 0ull, stream) + +// use these macros for logging via actor system or actor context +#define LOG_EMERG(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, __VA_ARGS__) +#define LOG_ALERT(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, __VA_ARGS__) +#define LOG_CRIT(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, __VA_ARGS__) +#define LOG_ERROR(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, __VA_ARGS__) +#define LOG_WARN(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, __VA_ARGS__) +#define LOG_NOTICE(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, __VA_ARGS__) +#define LOG_INFO(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, __VA_ARGS__) +#define LOG_DEBUG(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, __VA_ARGS__) +#define LOG_TRACE(actorCtxOrSystem, component, ...) LOG_LOG(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, __VA_ARGS__) + +#define LOG_EMERG_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, stream) +#define LOG_ALERT_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, stream) +#define LOG_CRIT_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, stream) +#define LOG_ERROR_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, stream) +#define LOG_WARN_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, stream) +#define LOG_NOTICE_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, stream) +#define LOG_INFO_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, stream) +#define LOG_DEBUG_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, stream) +#define LOG_TRACE_S(actorCtxOrSystem, component, stream) LOG_LOG_S(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, stream) + +#define LOG_EMERG_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, sampleBy, __VA_ARGS__) +#define LOG_ALERT_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, sampleBy, __VA_ARGS__) +#define LOG_CRIT_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, sampleBy, __VA_ARGS__) +#define LOG_ERROR_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, sampleBy, __VA_ARGS__) +#define LOG_WARN_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, sampleBy, __VA_ARGS__) +#define LOG_NOTICE_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, sampleBy, __VA_ARGS__) +#define LOG_INFO_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, sampleBy, __VA_ARGS__) +#define LOG_DEBUG_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, sampleBy, __VA_ARGS__) +#define LOG_TRACE_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, ...) LOG_LOG_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, sampleBy, __VA_ARGS__) + +#define LOG_EMERG_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_EMERG, component, sampleBy, stream) +#define LOG_ALERT_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ALERT, component, sampleBy, stream) +#define LOG_CRIT_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_CRIT, component, sampleBy, stream) +#define LOG_ERROR_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_ERROR, component, sampleBy, stream) +#define LOG_WARN_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_WARN, component, sampleBy, stream) +#define LOG_NOTICE_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_NOTICE, component, sampleBy, stream) +#define LOG_INFO_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_INFO, component, sampleBy, stream) +#define LOG_DEBUG_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_DEBUG, component, sampleBy, stream) +#define LOG_TRACE_S_SAMPLED_BY(actorCtxOrSystem, component, sampleBy, stream) LOG_LOG_S_SAMPLED_BY(actorCtxOrSystem, NActors::NLog::PRI_TRACE, component, sampleBy, stream) + +// Log Throttling +#define LOG_LOG_THROTTLE(throttler, actorCtxOrSystem, priority, component, ...) \ + do { \ + if ((throttler).Kick()) { \ + LOG_LOG(actorCtxOrSystem, priority, component, __VA_ARGS__); \ + } \ + } while (0) /**/ + +#define TRACE_EVENT(component) \ + const auto& currentTracer = component; \ + if (ev->HasEvent()) { \ + LOG_TRACE(*TlsActivationContext, currentTracer, "%s, received event# %" PRIu32 ", Sender %s, Recipient %s: %s", \ + __FUNCTION__, ev->Type, ev->Sender.ToString().data(), SelfId().ToString().data(), ev->GetBase()->ToString().substr(0, 1000).data()); \ + } else { \ + LOG_TRACE(*TlsActivationContext, currentTracer, "%s, received event# %" PRIu32 ", Sender %s, Recipient %s", \ + __FUNCTION__, ev->Type, ev->Sender.ToString().data(), ev->Recipient.ToString().data()); \ + } +#define TRACE_EVENT_TYPE(eventType) LOG_TRACE(*TlsActivationContext, currentTracer, "%s, processing event %s", __FUNCTION__, eventType) + +class TLog; +class TLogBackend; + +namespace NActors { + class TLoggerActor; + + //////////////////////////////////////////////////////////////////////////////// + // SET LOG LEVEL FOR A COMPONENT + //////////////////////////////////////////////////////////////////////////////// + class TLogComponentLevelRequest: public TEventLocal<TLogComponentLevelRequest, int(NLog::EEv::LevelReq)> { + public: + // set given priority for the component + TLogComponentLevelRequest(NLog::EPriority priority, NLog::EComponent component) + : Priority(priority) + , Component(component) + { + } + + // set given priority for all components + TLogComponentLevelRequest(NLog::EPriority priority) + : Priority(priority) + , Component(NLog::InvalidComponent) + { + } + + protected: + NLog::EPriority Priority; + NLog::EComponent Component; + + friend class TLoggerActor; + }; + + class TLogComponentLevelResponse: public TEventLocal<TLogComponentLevelResponse, int(NLog::EEv::LevelResp)> { + public: + TLogComponentLevelResponse(int code, const TString& explanation) + : Code(code) + , Explanation(explanation) + { + } + + int GetCode() const { + return Code; + } + + const TString& GetExplanation() const { + return Explanation; + } + + protected: + int Code; + TString Explanation; + }; + + class TLogIgnored: public TEventLocal<TLogIgnored, int(NLog::EEv::Ignored)> { + public: + TLogIgnored() { + } + }; + + //////////////////////////////////////////////////////////////////////////////// + // LOGGER ACTOR + //////////////////////////////////////////////////////////////////////////////// + class ILoggerMetrics { + public: + virtual ~ILoggerMetrics() = default; + + virtual void IncActorMsgs() = 0; + virtual void IncDirectMsgs() = 0; + virtual void IncLevelRequests() = 0; + virtual void IncIgnoredMsgs() = 0; + virtual void IncAlertMsgs() = 0; + virtual void IncEmergMsgs() = 0; + virtual void IncDroppedMsgs() = 0; + + virtual void GetOutputHtml(IOutputStream&) = 0; + }; + + class TLoggerActor: public TActor<TLoggerActor> { + public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::LOG_ACTOR; + } + + TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, + TAutoPtr<TLogBackend> logBackend, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters); + TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, + std::shared_ptr<TLogBackend> logBackend, + TIntrusivePtr<NMonitoring::TDynamicCounters> counters); + TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, + TAutoPtr<TLogBackend> logBackend, + std::shared_ptr<NMonitoring::TMetricRegistry> metrics); + TLoggerActor(TIntrusivePtr<NLog::TSettings> settings, + std::shared_ptr<TLogBackend> logBackend, + std::shared_ptr<NMonitoring::TMetricRegistry> metrics); + ~TLoggerActor(); + + void StateFunc(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) { + switch (ev->GetTypeRewrite()) { + HFunc(TLogIgnored, HandleIgnoredEvent); + HFunc(NLog::TEvLog, HandleLogEvent); + HFunc(TLogComponentLevelRequest, HandleLogComponentLevelRequest); + HFunc(NMon::TEvHttpInfo, HandleMonInfo); + } + } + + STFUNC(StateDefunct) { + switch (ev->GetTypeRewrite()) { + cFunc(TLogIgnored::EventType, HandleIgnoredEventDrop); + hFunc(NLog::TEvLog, HandleLogEventDrop); + HFunc(TLogComponentLevelRequest, HandleLogComponentLevelRequest); + HFunc(NMon::TEvHttpInfo, HandleMonInfo); + cFunc(TEvents::TEvWakeup::EventType, HandleWakeup); + } + } + + // Directly call logger instead of sending a message + void Log(TInstant time, NLog::EPriority priority, NLog::EComponent component, const char* c, ...); + + static void Throttle(const NLog::TSettings& settings); + + private: + TIntrusivePtr<NLog::TSettings> Settings; + std::shared_ptr<TLogBackend> LogBackend; + ui64 IgnoredCount = 0; + ui64 PassedCount = 0; + static TAtomic IsOverflow; + TDuration WakeupInterval{TDuration::Seconds(5)}; + std::unique_ptr<ILoggerMetrics> Metrics; + + void BecomeDefunct(); + void HandleIgnoredEvent(TLogIgnored::TPtr& ev, const NActors::TActorContext& ctx); + void HandleIgnoredEventDrop(); + void HandleLogEvent(NLog::TEvLog::TPtr& ev, const TActorContext& ctx); + void HandleLogEventDrop(const NLog::TEvLog::TPtr& ev); + void HandleLogComponentLevelRequest(TLogComponentLevelRequest::TPtr& ev, const TActorContext& ctx); + void HandleMonInfo(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx); + void HandleWakeup(); + [[nodiscard]] bool OutputRecord(TInstant time, NLog::EPrio priority, NLog::EComponent component, const TString& formatted) noexcept; + void RenderComponentPriorities(IOutputStream& str); + void LogIgnoredCount(TInstant now); + void WriteMessageStat(const NLog::TEvLog& ev); + static const char* FormatLocalTimestamp(TInstant time, char* buf); + }; + + //////////////////////////////////////////////////////////////////////////////// + // LOG THROTTLING + // TTrivialLogThrottler -- log a message every 'period' duration + // Use case: + // TTrivialLogThrottler throttler(TDuration::Minutes(1)); + // .... + // LOG_LOG_THROTTLE(throttler, ctx, NActors::NLog::PRI_ERROR, SOME, "Error"); + //////////////////////////////////////////////////////////////////////////////// + class TTrivialLogThrottler { + public: + TTrivialLogThrottler(TDuration period) + : Period(period) + { + } + + // return value: + // true -- write to log + // false -- don't write to log, throttle + bool Kick() { + auto now = TInstant::Now(); + if (now >= (LastWrite + Period)) { + LastWrite = now; + return true; + } else { + return false; + } + } + + private: + TInstant LastWrite; + TDuration Period; + }; + + //////////////////////////////////////////////////////////////////////////////// + // SYSLOG BACKEND + //////////////////////////////////////////////////////////////////////////////// + TAutoPtr<TLogBackend> CreateSysLogBackend(const TString& ident, + bool logPError, bool logCons); + TAutoPtr<TLogBackend> CreateStderrBackend(); + TAutoPtr<TLogBackend> CreateFileBackend(const TString& fileName); + TAutoPtr<TLogBackend> CreateNullBackend(); + TAutoPtr<TLogBackend> CreateCompositeLogBackend(TVector<TAutoPtr<TLogBackend>>&& underlyingBackends); + + ///////////////////////////////////////////////////////////////////// + // Logging adaptors for memory log and logging into filesystem + ///////////////////////////////////////////////////////////////////// + + namespace NDetail { + inline void Y_PRINTF_FORMAT(2, 3) PrintfV(TString& dst, const char* format, ...) { + va_list params; + va_start(params, format); + vsprintf(dst, format, params); + va_end(params); + } + + inline void PrintfV(TString& dst, const char* format, va_list params) { + vsprintf(dst, format, params); + } + } // namespace NDetail + + template <typename TCtx> + inline void DeliverLogMessage(TCtx& ctx, NLog::EPriority mPriority, NLog::EComponent mComponent, TString &&str) + { + const NLog::TSettings *mSettings = ctx.LoggerSettings(); + TLoggerActor::Throttle(*mSettings); + ctx.Send(new IEventHandle(mSettings->LoggerActorId, TActorId(), new NLog::TEvLog(mPriority, mComponent, std::move(str)))); + } + + template <typename TCtx, typename... TArgs> + inline void MemLogAdapter( + TCtx& actorCtxOrSystem, + NLog::EPriority mPriority, + NLog::EComponent mComponent, + const char* format, TArgs&&... params) { + TString Formatted; + + + if constexpr (sizeof... (params) > 0) { + NDetail::PrintfV(Formatted, format, std::forward<TArgs>(params)...); + } else { + NDetail::PrintfV(Formatted, "%s", format); + } + + MemLogWrite(Formatted.data(), Formatted.size(), true); + DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(Formatted)); + } + + template <typename TCtx> + Y_WRAPPER inline void MemLogAdapter( + TCtx& actorCtxOrSystem, + NLog::EPriority mPriority, + NLog::EComponent mComponent, + const TString& str) { + + MemLogWrite(str.data(), str.size(), true); + DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, TString(str)); + } + + template <typename TCtx> + Y_WRAPPER inline void MemLogAdapter( + TCtx& actorCtxOrSystem, + NLog::EPriority mPriority, + NLog::EComponent mComponent, + TString&& str) { + + MemLogWrite(str.data(), str.size(), true); + DeliverLogMessage(actorCtxOrSystem, mPriority, mComponent, std::move(str)); + } +} diff --git a/library/cpp/actors/core/log_iface.h b/library/cpp/actors/core/log_iface.h new file mode 100644 index 0000000000..b331db9ca8 --- /dev/null +++ b/library/cpp/actors/core/log_iface.h @@ -0,0 +1,109 @@ +#pragma once + +#include "events.h" +#include "event_local.h" + +namespace NActors { + namespace NLog { + using EComponent = int; + + enum EPriority : ui16 { // migrate it to EPrio whenever possible + PRI_EMERG, + PRI_ALERT, + PRI_CRIT, + PRI_ERROR, + PRI_WARN, + PRI_NOTICE, + PRI_INFO, + PRI_DEBUG, + PRI_TRACE + }; + + enum class EPrio : ui16 { + Emerg = 0, + Alert = 1, + Crit = 2, + Error = 3, + Warn = 4, + Notice = 5, + Info = 6, + Debug = 7, + Trace = 8, + }; + + struct TLevel { + TLevel(ui32 raw) + : Raw(raw) + { + } + + TLevel(EPrio prio) + : Raw((ui16(prio) + 1) << 8) + { + } + + EPrio ToPrio() const noexcept { + const auto major = Raw >> 8; + + return major > 0 ? EPrio(major - 1) : EPrio::Emerg; + } + + bool IsUrgentAbortion() const noexcept { + return (Raw >> 8) == 0; + } + + /* Generalized monotonic level value composed with major and minor + levels. Minor is used for verbosity within major, basic EPrio + mapped to (EPrio + 1, 0) and Major = 0 is reserved as special + space with meaning like EPrio::Emerg but with extened actions. + Thus logger should map Major = 0 to EPrio::Emerg if it have no + idea how to handle special emergency actions. + */ + + ui32 Raw = 0; // ((ui16(EPrio) + 1) << 8) | ui8(minor) + }; + + enum class EEv { + Log = EventSpaceBegin(TEvents::ES_LOGGER), + LevelReq, + LevelResp, + Ignored, + End + }; + + static_assert(int(EEv::End) < EventSpaceEnd(TEvents::ES_LOGGER), ""); + + class TEvLog: public TEventLocal<TEvLog, int(EEv::Log)> { + public: + TEvLog(TInstant stamp, TLevel level, EComponent comp, const TString &line) + : Stamp(stamp) + , Level(level) + , Component(comp) + , Line(line) + { + } + + TEvLog(TInstant stamp, TLevel level, EComponent comp, TString &&line) + : Stamp(stamp) + , Level(level) + , Component(comp) + , Line(std::move(line)) + { + } + + TEvLog(EPriority prio, EComponent comp, TString line, TInstant time = TInstant::Now()) + : Stamp(time) + , Level(EPrio(prio)) + , Component(comp) + , Line(std::move(line)) + { + } + + const TInstant Stamp = TInstant::Max(); + const TLevel Level; + const EComponent Component = 0; + TString Line; + }; + + } +} diff --git a/library/cpp/actors/core/log_settings.cpp b/library/cpp/actors/core/log_settings.cpp new file mode 100644 index 0000000000..f52f2fc5d2 --- /dev/null +++ b/library/cpp/actors/core/log_settings.cpp @@ -0,0 +1,230 @@ +#include "log_settings.h" + +#include <util/stream/str.h> + +namespace NActors { + namespace NLog { + TSettings::TSettings(const TActorId& loggerActorId, const EComponent loggerComponent, + EComponent minVal, EComponent maxVal, EComponentToStringFunc func, + EPriority defPriority, EPriority defSamplingPriority, + ui32 defSamplingRate, ui64 timeThresholdMs) + : LoggerActorId(loggerActorId) + , LoggerComponent(loggerComponent) + , TimeThresholdMs(timeThresholdMs) + , AllowDrop(true) + , ThrottleDelay(TDuration::MilliSeconds(100)) + , MinVal(0) + , MaxVal(0) + , Mask(0) + , DefPriority(defPriority) + , DefSamplingPriority(defSamplingPriority) + , DefSamplingRate(defSamplingRate) + , UseLocalTimestamps(false) + , Format(PLAIN_FULL_FORMAT) + , ShortHostName("") + , ClusterName("") + { + Append(minVal, maxVal, func); + } + + TSettings::TSettings(const TActorId& loggerActorId, const EComponent loggerComponent, + EPriority defPriority, EPriority defSamplingPriority, + ui32 defSamplingRate, ui64 timeThresholdMs) + : LoggerActorId(loggerActorId) + , LoggerComponent(loggerComponent) + , TimeThresholdMs(timeThresholdMs) + , AllowDrop(true) + , ThrottleDelay(TDuration::MilliSeconds(100)) + , MinVal(0) + , MaxVal(0) + , Mask(0) + , DefPriority(defPriority) + , DefSamplingPriority(defSamplingPriority) + , DefSamplingRate(defSamplingRate) + , UseLocalTimestamps(false) + , Format(PLAIN_FULL_FORMAT) + , ShortHostName("") + , ClusterName("") + { + } + + void TSettings::Append(EComponent minVal, EComponent maxVal, EComponentToStringFunc func) { + Y_VERIFY(minVal >= 0, "NLog::TSettings: minVal must be non-negative"); + Y_VERIFY(maxVal > minVal, "NLog::TSettings: maxVal must be greater than minVal"); + + // update bounds + if (!MaxVal || minVal < MinVal) { + MinVal = minVal; + } + + if (!MaxVal || maxVal > MaxVal) { + MaxVal = maxVal; + + // expand ComponentNames to the new bounds + auto oldMask = Mask; + Mask = PowerOf2Mask(MaxVal); + + TArrayHolder<TAtomic> oldComponentInfo(new TAtomic[Mask + 1]); + ComponentInfo.Swap(oldComponentInfo); + int startVal = oldMask ? oldMask + 1 : 0; + for (int i = 0; i < startVal; i++) { + AtomicSet(ComponentInfo[i], AtomicGet(oldComponentInfo[i])); + } + + TComponentSettings defSetting(DefPriority, DefSamplingPriority, DefSamplingRate); + for (int i = startVal; i < Mask + 1; i++) { + AtomicSet(ComponentInfo[i], defSetting.Raw.Data); + } + + ComponentNames.resize(Mask + 1); + } + + // assign new names but validate if newly added members were not used before + for (int i = minVal; i <= maxVal; i++) { + Y_VERIFY(!ComponentNames[i], "component name at %d already set: %s", + i, ComponentNames[i].data()); + ComponentNames[i] = func(i); + } + } + + int TSettings::SetLevelImpl( + const TString& name, bool isSampling, + EPriority priority, EComponent component, TString& explanation) { + TString titleName(name); + titleName.to_title(); + + // check priority + if (!IsValidPriority(priority)) { + TStringStream str; + str << "Invalid " << name; + explanation = str.Str(); + return 1; + } + + if (component == InvalidComponent) { + for (int i = 0; i < Mask + 1; i++) { + TComponentSettings settings = AtomicGet(ComponentInfo[i]); + if (isSampling) { + settings.Raw.X.SamplingLevel = priority; + } else { + settings.Raw.X.Level = priority; + } + AtomicSet(ComponentInfo[i], settings.Raw.Data); + } + + TStringStream str; + + str << titleName + << " for all components has been changed to " + << PriorityToString(EPrio(priority)); + explanation = str.Str(); + return 0; + } else { + if (!IsValidComponent(component)) { + explanation = "Invalid component"; + return 1; + } + TComponentSettings settings = AtomicGet(ComponentInfo[component]); + EPriority oldPriority; + if (isSampling) { + oldPriority = (EPriority)settings.Raw.X.SamplingLevel; + settings.Raw.X.SamplingLevel = priority; + } else { + oldPriority = (EPriority)settings.Raw.X.Level; + settings.Raw.X.Level = priority; + } + AtomicSet(ComponentInfo[component], settings.Raw.Data); + TStringStream str; + str << titleName << " for the component " << ComponentNames[component] + << " has been changed from " << PriorityToString(EPrio(oldPriority)) + << " to " << PriorityToString(EPrio(priority)); + explanation = str.Str(); + return 0; + } + } + + int TSettings::SetLevel(EPriority priority, EComponent component, TString& explanation) { + return SetLevelImpl("priority", false, + priority, component, explanation); + } + + int TSettings::SetSamplingLevel(EPriority priority, EComponent component, TString& explanation) { + return SetLevelImpl("sampling priority", true, + priority, component, explanation); + } + + int TSettings::SetSamplingRate(ui32 sampling, EComponent component, TString& explanation) { + if (component == InvalidComponent) { + for (int i = 0; i < Mask + 1; i++) { + TComponentSettings settings = AtomicGet(ComponentInfo[i]); + settings.Raw.X.SamplingRate = sampling; + AtomicSet(ComponentInfo[i], settings.Raw.Data); + } + TStringStream str; + str << "Sampling rate for all components has been changed to " << sampling; + explanation = str.Str(); + } else { + if (!IsValidComponent(component)) { + explanation = "Invalid component"; + return 1; + } + TComponentSettings settings = AtomicGet(ComponentInfo[component]); + ui32 oldSampling = settings.Raw.X.SamplingRate; + settings.Raw.X.SamplingRate = sampling; + AtomicSet(ComponentInfo[component], settings.Raw.Data); + TStringStream str; + str << "Sampling rate for the component " << ComponentNames[component] + << " has been changed from " << oldSampling + << " to " << sampling; + explanation = str.Str(); + } + return 0; + } + + int TSettings::PowerOf2Mask(int val) { + int mask = 1; + while ((val & mask) != val) { + mask <<= 1; + mask |= 1; + } + return mask; + } + + bool TSettings::IsValidPriority(EPriority priority) { + return priority == PRI_EMERG || priority == PRI_ALERT || + priority == PRI_CRIT || priority == PRI_ERROR || + priority == PRI_WARN || priority == PRI_NOTICE || + priority == PRI_INFO || priority == PRI_DEBUG || priority == PRI_TRACE; + } + + bool TSettings::IsValidComponent(EComponent component) { + return (MinVal <= component) && (component <= MaxVal) && !ComponentNames[component].empty(); + } + + void TSettings::SetAllowDrop(bool val) { + AllowDrop = val; + } + + void TSettings::SetThrottleDelay(TDuration value) { + ThrottleDelay = value; + } + + void TSettings::SetUseLocalTimestamps(bool value) { + UseLocalTimestamps = value; + } + + EComponent TSettings::FindComponent(const TStringBuf& componentName) const { + if (componentName.empty()) + return InvalidComponent; + + for (EComponent component = MinVal; component <= MaxVal; ++component) { + if (ComponentNames[component] == componentName) + return component; + } + + return InvalidComponent; + } + + } + +} diff --git a/library/cpp/actors/core/log_settings.h b/library/cpp/actors/core/log_settings.h new file mode 100644 index 0000000000..7fe4504edd --- /dev/null +++ b/library/cpp/actors/core/log_settings.h @@ -0,0 +1,176 @@ +#pragma once + +#include "actor.h" +#include "log_iface.h" +#include <util/generic/vector.h> +#include <util/digest/murmur.h> +#include <util/random/easy.h> + +namespace NActors { + namespace NLog { + inline const char* PriorityToString(EPrio priority) { + switch (priority) { + case EPrio::Emerg: + return "EMERG"; + case EPrio::Alert: + return "ALERT"; + case EPrio::Crit: + return "CRIT"; + case EPrio::Error: + return "ERROR"; + case EPrio::Warn: + return "WARN"; + case EPrio::Notice: + return "NOTICE"; + case EPrio::Info: + return "INFO"; + case EPrio::Debug: + return "DEBUG"; + case EPrio::Trace: + return "TRACE"; + default: + return "UNKNOWN"; + } + } + + // You can structure your program to have multiple logical components. + // In this case you can set different log priorities for different + // components. And you can change component's priority while system + // is running. Suspect a component has a bug? Turn DEBUG priority level on + // for this component. + static const int InvalidComponent = -1; + + // Functions converts EComponent id to string + using EComponentToStringFunc = std::function<const TString&(EComponent)>; + ; + + // Log settings + struct TComponentSettings { + union { + struct { + ui32 SamplingRate; + ui8 SamplingLevel; + ui8 Level; + } X; + + ui64 Data; + } Raw; + + TComponentSettings(TAtomicBase data) { + Raw.Data = (ui64)data; + } + + TComponentSettings(ui8 level, ui8 samplingLevel, ui32 samplingRate) { + Raw.X.Level = level; + Raw.X.SamplingLevel = samplingLevel; + Raw.X.SamplingRate = samplingRate; + } + }; + + struct TSettings: public TThrRefBase { + public: + TActorId LoggerActorId; + EComponent LoggerComponent; + ui64 TimeThresholdMs; + bool AllowDrop; + TDuration ThrottleDelay; + TArrayHolder<TAtomic> ComponentInfo; + TVector<TString> ComponentNames; + EComponent MinVal; + EComponent MaxVal; + EComponent Mask; + EPriority DefPriority; + EPriority DefSamplingPriority; + ui32 DefSamplingRate; + bool UseLocalTimestamps; + + enum ELogFormat { + PLAIN_FULL_FORMAT, + PLAIN_SHORT_FORMAT, + JSON_FORMAT + }; + ELogFormat Format; + TString ShortHostName; + TString ClusterName; + TString MessagePrefix; + + // The best way to provide minVal, maxVal and func is to have + // protobuf enumeration of components. In this case protoc + // automatically generates YOURTYPE_MIN, YOURTYPE_MAX and + // YOURTYPE_Name for you. + TSettings(const TActorId& loggerActorId, const EComponent loggerComponent, + EComponent minVal, EComponent maxVal, EComponentToStringFunc func, + EPriority defPriority, EPriority defSamplingPriority = PRI_DEBUG, + ui32 defSamplingRate = 0, ui64 timeThresholdMs = 1000); + + TSettings(const TActorId& loggerActorId, const EComponent loggerComponent, + EPriority defPriority, EPriority defSamplingPriority = PRI_DEBUG, + ui32 defSamplingRate = 0, ui64 timeThresholdMs = 1000); + + void Append(EComponent minVal, EComponent maxVal, EComponentToStringFunc func); + + template <typename T> + void Append(T minVal, T maxVal, const TString& (*func)(T)) { + Append( + static_cast<EComponent>(minVal), + static_cast<EComponent>(maxVal), + [func](EComponent c) -> const TString& { + return func(static_cast<T>(c)); + } + ); + } + + inline bool Satisfies(EPriority priority, EComponent component, ui64 sampleBy = 0) const { + // by using Mask we don't get outside of array boundaries + TComponentSettings settings = GetComponentSettings(component); + if (priority > settings.Raw.X.Level) { + if (priority > settings.Raw.X.SamplingLevel) { + return false; // priority > both levels ==> do not log + } + // priority <= sampling level ==> apply sampling + ui32 samplingRate = settings.Raw.X.SamplingRate; + if (samplingRate) { + ui32 samplingValue = sampleBy ? MurmurHash<ui32>((const char*)&sampleBy, sizeof(sampleBy)) + : samplingRate != 1 ? RandomNumber<ui32>() : 0; + return (samplingValue % samplingRate == 0); + } else { + // sampling rate not set ==> do not log + return false; + } + } else { + // priority <= log level ==> log + return true; + } + } + + inline TComponentSettings GetComponentSettings(EComponent component) const { + Y_VERIFY_DEBUG((component & Mask) == component); + // by using Mask we don't get outside of array boundaries + return TComponentSettings(AtomicGet(ComponentInfo[component & Mask])); + } + + const char* ComponentName(EComponent component) const { + Y_VERIFY_DEBUG((component & Mask) == component); + return ComponentNames[component & Mask].data(); + } + + int SetLevel(EPriority priority, EComponent component, TString& explanation); + int SetSamplingLevel(EPriority priority, EComponent component, TString& explanation); + int SetSamplingRate(ui32 sampling, EComponent component, TString& explanation); + EComponent FindComponent(const TStringBuf& componentName) const; + static int PowerOf2Mask(int val); + static bool IsValidPriority(EPriority priority); + bool IsValidComponent(EComponent component); + void SetAllowDrop(bool val); + void SetThrottleDelay(TDuration value); + void SetUseLocalTimestamps(bool value); + + private: + int SetLevelImpl( + const TString& name, bool isSampling, + EPriority priority, EComponent component, TString& explanation); + }; + + } + +} diff --git a/library/cpp/actors/core/log_ut.cpp b/library/cpp/actors/core/log_ut.cpp new file mode 100644 index 0000000000..09b5f88ea2 --- /dev/null +++ b/library/cpp/actors/core/log_ut.cpp @@ -0,0 +1,185 @@ +#include "log.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <library/cpp/actors/testlib/test_runtime.h> + +using namespace NMonitoring; +using namespace NActors; +using namespace NActors::NLog; + +namespace { + const TString& ServiceToString(int) { + static const TString FAKE{"FAKE"}; + return FAKE; + } + + TIntrusivePtr<TSettings> DefaultSettings() { + auto loggerId = TActorId{0, "Logger"}; + auto s = MakeIntrusive<TSettings>(loggerId, 0, EPriority::PRI_TRACE); + s->SetAllowDrop(false); + s->Append(0, 1, ServiceToString); + return s; + } + + TIntrusivePtr<TSettings> DroppingSettings(ui64 timeThresholdMs) { + auto loggerId = TActorId{0, "Logger"}; + auto s = MakeIntrusive<TSettings>( + loggerId, + 0, + EPriority::PRI_TRACE, + EPriority::PRI_DEBUG, + (ui32)0, + timeThresholdMs); + s->Append(0, 1, ServiceToString); + return s; + } + + class TMockBackend: public TLogBackend { + public: + using TWriteImpl = std::function<void(const TLogRecord&)>; + using TReopenImpl = std::function<void()>; + + static void REOPEN_NOP() { } + + TMockBackend(TWriteImpl writeImpl, TReopenImpl reopenImpl = REOPEN_NOP) + : WriteImpl_{writeImpl} + , ReopenImpl_{reopenImpl} + { + } + + void WriteData(const TLogRecord& r) override { + WriteImpl_(r); + } + + void ReopenLog() override { } + + void SetWriteImpl(TWriteImpl writeImpl) { + WriteImpl_ = writeImpl; + } + + private: + TWriteImpl WriteImpl_; + TReopenImpl ReopenImpl_; + }; + + void ThrowAlways(const TLogRecord&) { + ythrow yexception(); + }; + + struct TFixture { + TFixture( + TIntrusivePtr<TSettings> settings, + TMockBackend::TWriteImpl writeImpl = ThrowAlways) + { + Runtime.Initialize(); + LogBackend.reset(new TMockBackend{writeImpl}); + LoggerActor = Runtime.Register(new TLoggerActor{settings, LogBackend, Counters}); + Runtime.SetScheduledEventFilter([] (auto&&, auto&&, auto&&, auto) { + return false; + }); + } + + TFixture(TMockBackend::TWriteImpl writeImpl = ThrowAlways) + : TFixture(DefaultSettings(), writeImpl) + {} + + void WriteLog() { + Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvLog(TInstant::Zero(), TLevel{EPrio::Emerg}, 0, "foo")}); + } + + void WriteLog(TInstant ts) { + Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvLog(ts, TLevel{EPrio::Emerg}, 0, "foo")}); + } + + void Wakeup() { + Runtime.Send(new IEventHandle{LoggerActor, {}, new TEvents::TEvWakeup}); + } + + TIntrusivePtr<TDynamicCounters> Counters{MakeIntrusive<TDynamicCounters>()}; + std::shared_ptr<TMockBackend> LogBackend; + TActorId LoggerActor; + TTestActorRuntimeBase Runtime; + }; +} + + +Y_UNIT_TEST_SUITE(TLoggerActorTest) { + Y_UNIT_TEST(NoCrashOnWriteFailure) { + TFixture test; + test.WriteLog(); + // everything is okay as long as we get here + } + + Y_UNIT_TEST(SubsequentWritesAreIgnored) { + size_t count{0}; + auto countWrites = [&count] (auto&& r) { + count++; + ThrowAlways(r); + }; + + TFixture test{countWrites}; + test.WriteLog(); + UNIT_ASSERT_VALUES_EQUAL(count, 1); + + // at this point we should have started dropping messages + for (auto i = 0; i < 5; ++i) { + test.WriteLog(); + } + + UNIT_ASSERT_VALUES_EQUAL(count, 1); + } + + Y_UNIT_TEST(LoggerCanRecover) { + TFixture test; + test.WriteLog(); + + TVector<TString> messages; + auto acceptWrites = [&] (const TLogRecord& r) { + messages.emplace_back(r.Data, r.Len); + }; + + auto scheduled = test.Runtime.CaptureScheduledEvents(); + UNIT_ASSERT_VALUES_EQUAL(scheduled.size(), 1); + + test.LogBackend->SetWriteImpl(acceptWrites); + test.Wakeup(); + + const auto COUNT = 10; + for (auto i = 0; i < COUNT; ++i) { + test.WriteLog(); + } + + UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT); + } + + Y_UNIT_TEST(ShouldObeyTimeThresholdMsWhenOverloaded) { + TFixture test{DroppingSettings(5000)}; + + TVector<TString> messages; + auto acceptWrites = [&] (const TLogRecord& r) { + messages.emplace_back(r.Data, r.Len); + }; + + test.LogBackend->SetWriteImpl(acceptWrites); + test.Wakeup(); + + const auto COUNT = 11; + for (auto i = 0; i < COUNT; ++i) { + test.WriteLog(); + } + + UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT); + + test.Runtime.AdvanceCurrentTime(TDuration::Seconds(20)); + auto now = test.Runtime.GetCurrentTime(); + + test.WriteLog(now - TDuration::Seconds(5)); + + UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT + 1); + + test.WriteLog(now - TDuration::Seconds(6)); + + UNIT_ASSERT_VALUES_EQUAL(messages.size(), COUNT + 1); + } +} diff --git a/library/cpp/actors/core/mailbox.cpp b/library/cpp/actors/core/mailbox.cpp new file mode 100644 index 0000000000..d84b4f9e46 --- /dev/null +++ b/library/cpp/actors/core/mailbox.cpp @@ -0,0 +1,551 @@ +#include "mailbox.h" +#include "actorsystem.h" + +#include <library/cpp/actors/util/datetime.h> + +#include <util/system/sanitizers.h> + +namespace NActors { + TMailboxTable::TMailboxTable() + : LastAllocatedLine(0) + , AllocatedMailboxCount(0) + , CachedSimpleMailboxes(0) + , CachedRevolvingMailboxes(0) + , CachedHTSwapMailboxes(0) + , CachedReadAsFilledMailboxes(0) + , CachedTinyReadAsFilledMailboxes(0) + { + memset((void*)Lines, 0, sizeof(Lines)); + } + + bool IsGoodForCleanup(const TMailboxHeader* header) { + switch (AtomicLoad(&header->ExecutionState)) { + case TMailboxHeader::TExecutionState::Inactive: + case TMailboxHeader::TExecutionState::Scheduled: + return true; + case TMailboxHeader::TExecutionState::Leaving: + case TMailboxHeader::TExecutionState::Executing: + case TMailboxHeader::TExecutionState::LeavingMarked: + return false; + case TMailboxHeader::TExecutionState::Free: + case TMailboxHeader::TExecutionState::FreeScheduled: + return true; + case TMailboxHeader::TExecutionState::FreeLeaving: + case TMailboxHeader::TExecutionState::FreeExecuting: + case TMailboxHeader::TExecutionState::FreeLeavingMarked: + return false; + default: + Y_FAIL(); + } + } + + template <typename TMailbox> + void DestructMailboxLine(ui8* begin, ui8* end) { + const ui32 sx = TMailbox::AlignedSize(); + for (ui8* x = begin; x + sx <= end; x += sx) { + TMailbox* mailbox = reinterpret_cast<TMailbox*>(x); + Y_VERIFY(IsGoodForCleanup(mailbox)); + mailbox->ExecutionState = Max<ui32>(); + mailbox->~TMailbox(); + } + } + + template <typename TMailbox> + bool CleanupMailboxLine(ui8* begin, ui8* end) { + const ui32 sx = TMailbox::AlignedSize(); + bool done = true; + for (ui8* x = begin; x + sx <= end; x += sx) { + TMailbox* mailbox = reinterpret_cast<TMailbox*>(x); + Y_VERIFY(IsGoodForCleanup(mailbox)); + done &= mailbox->CleanupActors() && mailbox->CleanupEvents(); + } + return done; + } + + TMailboxTable::~TMailboxTable() { + // on cleanup we must traverse everything and free stuff + for (ui32 i = 0; i < LastAllocatedLine; ++i) { + if (TMailboxLineHeader* lineHeader = Lines[i]) { + switch (lineHeader->MailboxType) { + case TMailboxType::Simple: + DestructMailboxLine<TSimpleMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + case TMailboxType::Revolving: + DestructMailboxLine<TRevolvingMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + case TMailboxType::HTSwap: + DestructMailboxLine<THTSwapMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + case TMailboxType::ReadAsFilled: + DestructMailboxLine<TReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + case TMailboxType::TinyReadAsFilled: + DestructMailboxLine<TTinyReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + default: + Y_FAIL(); + } + + lineHeader->~TMailboxLineHeader(); + free(lineHeader); + Lines[i] = nullptr; + } + } + + while (MailboxCacheSimple.Pop(0)) + ; + while (MailboxCacheRevolving.Pop(0)) + ; + while (MailboxCacheHTSwap.Pop(0)) + ; + while (MailboxCacheReadAsFilled.Pop(0)) + ; + while (MailboxCacheTinyReadAsFilled.Pop(0)) + ; + } + + bool TMailboxTable::Cleanup() { + bool done = true; + for (ui32 i = 0; i < LastAllocatedLine; ++i) { + if (TMailboxLineHeader* lineHeader = Lines[i]) { + switch (lineHeader->MailboxType) { + case TMailboxType::Simple: + done &= CleanupMailboxLine<TSimpleMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + case TMailboxType::Revolving: + done &= CleanupMailboxLine<TRevolvingMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + case TMailboxType::HTSwap: + done &= CleanupMailboxLine<THTSwapMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + case TMailboxType::ReadAsFilled: + done &= CleanupMailboxLine<TReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + case TMailboxType::TinyReadAsFilled: + done &= CleanupMailboxLine<TTinyReadAsFilledMailbox>((ui8*)lineHeader + 64, (ui8*)lineHeader + LineSize); + break; + default: + Y_FAIL(); + } + } + } + return done; + } + + TMailboxHeader* TMailboxTable::Get(ui32 hint) { + // get line + const ui32 lineIndex = (hint & LineIndexMask) >> LineIndexShift; + const ui32 lineHint = hint & LineHintMask; + + Y_VERIFY((lineIndex < MaxLines) && (lineHint < LineSize / 64)); + if (lineHint == 0) + return nullptr; + + if (TMailboxLineHeader* const x = AtomicLoad(Lines + lineIndex)) { + switch (x->MailboxType) { + case TMailboxType::Simple: + return TSimpleMailbox::Get(lineHint, x); + case TMailboxType::Revolving: + return TRevolvingMailbox::Get(lineHint, x); + case TMailboxType::HTSwap: + return THTSwapMailbox::Get(lineHint, x); + case TMailboxType::ReadAsFilled: + return TReadAsFilledMailbox::Get(lineHint, x); + case TMailboxType::TinyReadAsFilled: + return TTinyReadAsFilledMailbox::Get(lineHint, x); + default: + Y_VERIFY_DEBUG(false); + break; + } + } + + return nullptr; + } + + bool TMailboxTable::SendTo(TAutoPtr<IEventHandle>& ev, IExecutorPool* executorPool) { + const TActorId& recipient = ev->GetRecipientRewrite(); + const ui32 hint = recipient.Hint(); + + // copy-paste from Get to avoid duplicated type-switches + const ui32 lineIndex = (hint & LineIndexMask) >> LineIndexShift; + const ui32 lineHint = hint & LineHintMask; + + Y_VERIFY((lineIndex < MaxLines) && (lineHint < LineSize / 64)); + if (lineHint == 0) + return false; + + if (TMailboxLineHeader* const x = AtomicLoad(Lines + lineIndex)) { + switch (x->MailboxType) { + case TMailboxType::Simple: { + TSimpleMailbox* const mailbox = TSimpleMailbox::Get(lineHint, x); +#if (!defined(_tsan_enabled_)) + Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); +#endif + mailbox->Queue.Push(ev.Release()); + if (mailbox->MarkForSchedule()) { + RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); + executorPool->ScheduleActivation(hint); + } + } + return true; + case TMailboxType::Revolving: { + // The actorid could be stale and coming from a different machine. If local process has restarted than + // the stale actorid coming from a remote machine might be referencing an actor with simple mailbox + // which is smaller than revolving mailbox. In this cases 'lineHint' index might be greater than actual + // array size. Normally its ok to store stale event to other actor's valid mailbox beacuse Receive will + // compare receiver actor id and discard stale event. But in this case we should discard the event right away + // instead of trying to enque it to a mailbox at invalid address. + // NOTE: lineHint is 1-based + static_assert(TSimpleMailbox::AlignedSize() <= TRevolvingMailbox::AlignedSize(), + "We expect that one line can store more simple mailboxes than revolving mailboxes"); + if (lineHint > TRevolvingMailbox::MaxMailboxesInLine()) + return false; + + TRevolvingMailbox* const mailbox = TRevolvingMailbox::Get(lineHint, x); +#if (!defined(_tsan_enabled_)) + Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); +#endif + mailbox->QueueWriter.Push(ev.Release()); + if (mailbox->MarkForSchedule()) { + RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); + executorPool->ScheduleActivation(hint); + } + } + return true; + case TMailboxType::HTSwap: { + THTSwapMailbox* const mailbox = THTSwapMailbox::Get(lineHint, x); +#if (!defined(_tsan_enabled_)) + Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); +#endif + mailbox->Queue.Push(ev.Release()); + if (mailbox->MarkForSchedule()) { + RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); + executorPool->ScheduleActivation(hint); + } + } + return true; + case TMailboxType::ReadAsFilled: { + if (lineHint > TReadAsFilledMailbox::MaxMailboxesInLine()) + return false; + + TReadAsFilledMailbox* const mailbox = TReadAsFilledMailbox::Get(lineHint, x); +#if (!defined(_tsan_enabled_)) + Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); +#endif + mailbox->Queue.Push(ev.Release()); + if (mailbox->MarkForSchedule()) { + RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); + executorPool->ScheduleActivation(hint); + } + } + return true; + case TMailboxType::TinyReadAsFilled: { + if (lineHint > TTinyReadAsFilledMailbox::MaxMailboxesInLine()) + return false; + + TTinyReadAsFilledMailbox* const mailbox = TTinyReadAsFilledMailbox::Get(lineHint, x); +#if (!defined(_tsan_enabled_)) + Y_VERIFY_DEBUG(mailbox->Type == (ui32)x->MailboxType); +#endif + mailbox->Queue.Push(ev.Release()); + if (mailbox->MarkForSchedule()) { + RelaxedStore<NHPTimer::STime>(&mailbox->ScheduleMoment, GetCycleCountFast()); + executorPool->ScheduleActivation(hint); + } + } + return true; + default: + Y_FAIL("unknown mailbox type"); + } + } + + return false; + } + + ui32 TMailboxTable::AllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter) { + ui32 x = TryAllocateMailbox(type, revolvingCounter); + if (x == 0) + x = AllocateNewLine(type); + return x; + } + + ui32 TMailboxTable::TryAllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter) { + switch (type) { + case TMailboxType::Simple: + do { + if (ui32 ret = MailboxCacheSimple.Pop(revolvingCounter)) { + AtomicDecrement(CachedSimpleMailboxes); + return ret; + } + } while (AtomicGet(CachedSimpleMailboxes) > (MailboxCacheSimple.Concurrency * 512)); + return 0; + case TMailboxType::Revolving: + do { + if (ui32 ret = MailboxCacheRevolving.Pop(revolvingCounter)) { + AtomicDecrement(CachedRevolvingMailboxes); + return ret; + } + } while (AtomicGet(CachedRevolvingMailboxes) > (MailboxCacheRevolving.Concurrency * 512)); + return 0; + case TMailboxType::HTSwap: + do { + if (ui32 ret = MailboxCacheHTSwap.Pop(revolvingCounter)) { + AtomicDecrement(CachedHTSwapMailboxes); + return ret; + } + } while (AtomicGet(CachedHTSwapMailboxes) > (MailboxCacheHTSwap.Concurrency * 512)); + return 0; + case TMailboxType::ReadAsFilled: + do { + if (ui32 ret = MailboxCacheReadAsFilled.Pop(revolvingCounter)) { + AtomicDecrement(CachedReadAsFilledMailboxes); + return ret; + } + } while (AtomicGet(CachedReadAsFilledMailboxes) > (MailboxCacheReadAsFilled.Concurrency * 512)); + return 0; + case TMailboxType::TinyReadAsFilled: + do { + if (ui32 ret = MailboxCacheTinyReadAsFilled.Pop(revolvingCounter)) { + AtomicDecrement(CachedTinyReadAsFilledMailboxes); + return ret; + } + } while (AtomicGet(CachedTinyReadAsFilledMailboxes) > (MailboxCacheTinyReadAsFilled.Concurrency * 512)); + return 0; + default: + Y_FAIL("Unknown mailbox type"); + } + } + + void TMailboxTable::ReclaimMailbox(TMailboxType::EType type, ui32 hint, ui64 revolvingCounter) { + if (hint != 0) { + switch (type) { + case TMailboxType::Simple: + MailboxCacheSimple.Push(hint, revolvingCounter); + AtomicIncrement(CachedSimpleMailboxes); + break; + case TMailboxType::Revolving: + MailboxCacheRevolving.Push(hint, revolvingCounter); + AtomicIncrement(CachedRevolvingMailboxes); + break; + case TMailboxType::HTSwap: + MailboxCacheHTSwap.Push(hint, revolvingCounter); + AtomicIncrement(CachedHTSwapMailboxes); + break; + case TMailboxType::ReadAsFilled: + MailboxCacheReadAsFilled.Push(hint, revolvingCounter); + AtomicIncrement(CachedReadAsFilledMailboxes); + break; + case TMailboxType::TinyReadAsFilled: + MailboxCacheTinyReadAsFilled.Push(hint, revolvingCounter); + AtomicIncrement(CachedTinyReadAsFilledMailboxes); + break; + default: + Y_FAIL(); + } + } + } + + TMailboxHeader::TMailboxHeader(TMailboxType::EType type) + : ExecutionState(TExecutionState::Free) + , Reserved(0) + , Type(type) + , ActorPack(TMailboxActorPack::Simple) + , Knobs(0) + { + ActorsInfo.Simple.ActorId = 0; + ActorsInfo.Simple.Actor = nullptr; + } + + TMailboxHeader::~TMailboxHeader() { + CleanupActors(); + } + + bool TMailboxHeader::CleanupActors() { + bool done = true; + switch (ActorPack) { + case TMailboxActorPack::Simple: { + if (ActorsInfo.Simple.ActorId != 0) { + delete ActorsInfo.Simple.Actor; + done = false; + } + break; + } + case TMailboxActorPack::Map: { + for (auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) { + delete actor; + } + delete ActorsInfo.Map.ActorsMap; + done = false; + break; + } + case TMailboxActorPack::Array: { + for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) { + delete ActorsInfo.Array.ActorsArray->Actors[i].Actor; + } + delete ActorsInfo.Array.ActorsArray; + done = false; + break; + } + } + ActorPack = TMailboxActorPack::Simple; + ActorsInfo.Simple.ActorId = 0; + ActorsInfo.Simple.Actor = nullptr; + return done; + } + + std::pair<ui32, ui32> TMailboxHeader::CountMailboxEvents(ui64 localActorId, ui32 maxTraverse) { + switch (Type) { + case TMailboxType::Simple: + return static_cast<TMailboxTable::TSimpleMailbox*>(this)->CountSimpleMailboxEvents(localActorId, maxTraverse); + case TMailboxType::Revolving: + return static_cast<TMailboxTable::TRevolvingMailbox*>(this)->CountRevolvingMailboxEvents(localActorId, maxTraverse); + default: + return {0, 0}; + } + } + + TMailboxTable::TSimpleMailbox::TSimpleMailbox() + : TMailboxHeader(TMailboxType::Simple) + , ScheduleMoment(0) + { + } + + TMailboxTable::TSimpleMailbox::~TSimpleMailbox() { + CleanupEvents(); + } + + bool TMailboxTable::TSimpleMailbox::CleanupEvents() { + const bool done = (Queue.Head() == nullptr); + while (IEventHandle* ev = Queue.Pop()) + delete ev; + return done; + } + + std::pair<ui32, ui32> TMailboxTable::TSimpleMailbox::CountSimpleMailboxEvents(ui64 localActorId, ui32 maxTraverse) { + ui32 local = 0; + ui32 total = 0; + + auto it = Queue.ReadIterator(); + while (IEventHandle* x = it.Next()) { + ++total; + if (x->GetRecipientRewrite().LocalId() == localActorId) + ++local; + if (total >= maxTraverse) + break; + } + + return std::make_pair(local, total); + } + + TMailboxTable::TRevolvingMailbox::TRevolvingMailbox() + : TMailboxHeader(TMailboxType::Revolving) + , QueueWriter(QueueReader) + , Reserved1(0) + , Reserved2(0) + , ScheduleMoment(0) + { + } + + TMailboxTable::TRevolvingMailbox::~TRevolvingMailbox() { + CleanupEvents(); + } + + bool TMailboxTable::TRevolvingMailbox::CleanupEvents() { + const bool done = (QueueReader.Head() == nullptr); + while (IEventHandle* ev = QueueReader.Pop()) + delete ev; + return done; + } + + std::pair<ui32, ui32> TMailboxTable::TRevolvingMailbox::CountRevolvingMailboxEvents(ui64 localActorId, ui32 maxTraverse) { + ui32 local = 0; + ui32 total = 0; + + auto it = QueueReader.Iterator(); + + while (IEventHandle* x = it.Next()) { + ++total; + if (x->GetRecipientRewrite().LocalId() == localActorId) + ++local; + if (total >= maxTraverse) + break; + } + + return std::make_pair(local, total); + } + + template <typename T> + static ui32 InitNewLine(ui8* x, ui8* end) { + const ui32 sx = T::AlignedSize(); + + for (ui32 index = 1; x + sx <= end; x += sx, ++index) + ::new (x) T(); + + return sx; + } + + ui32 TMailboxTable::AllocateNewLine(TMailboxType::EType type) { + ui8* ptr = (ui8*)malloc(LineSize); + ui8* end = ptr + LineSize; + + const ui32 lineIndex = (ui32)AtomicIncrement(LastAllocatedLine) - 1; + const ui32 lineIndexMask = (lineIndex << LineIndexShift) & LineIndexMask; + + // first 64 bytes is TMailboxLineHeader + TMailboxLineHeader* header = ::new (ptr) TMailboxLineHeader(type, lineIndex); + + ui8* x = ptr + 64; + ui32 sx = 0; + TMailboxCache* cache = nullptr; + TAtomic* counter = nullptr; + + switch (type) { + case TMailboxType::Simple: + sx = InitNewLine<TSimpleMailbox>(x, end); + cache = &MailboxCacheSimple; + counter = &CachedSimpleMailboxes; + break; + case TMailboxType::Revolving: + sx = InitNewLine<TRevolvingMailbox>(x, end); + cache = &MailboxCacheRevolving; + counter = &CachedRevolvingMailboxes; + break; + case TMailboxType::HTSwap: + sx = InitNewLine<THTSwapMailbox>(x, end); + cache = &MailboxCacheHTSwap; + counter = &CachedHTSwapMailboxes; + break; + case TMailboxType::ReadAsFilled: + sx = InitNewLine<TReadAsFilledMailbox>(x, end); + cache = &MailboxCacheReadAsFilled; + counter = &CachedReadAsFilledMailboxes; + break; + case TMailboxType::TinyReadAsFilled: + sx = InitNewLine<TTinyReadAsFilledMailbox>(x, end); + cache = &MailboxCacheTinyReadAsFilled; + counter = &CachedTinyReadAsFilledMailboxes; + break; + default: + Y_FAIL(); + } + + AtomicStore(Lines + lineIndex, header); + + ui32 ret = lineIndexMask | 1; + + ui32 index = 2; + for (ui32 endIndex = LineSize / sx; index != endIndex;) { + const ui32 bufSize = 8; + ui32 buf[bufSize]; + ui32 bufIndex; + for (bufIndex = 0; index != endIndex && bufIndex != bufSize; ++bufIndex, ++index) + buf[bufIndex] = lineIndexMask | index; + cache->PushBulk(buf, bufIndex, index); + AtomicAdd(*counter, bufIndex); + } + + AtomicAdd(AllocatedMailboxCount, index - 1); + + return ret; + } +} diff --git a/library/cpp/actors/core/mailbox.h b/library/cpp/actors/core/mailbox.h new file mode 100644 index 0000000000..0bd9c4d314 --- /dev/null +++ b/library/cpp/actors/core/mailbox.h @@ -0,0 +1,553 @@ +#pragma once + +#include "defs.h" +#include "event.h" +#include "actor.h" +#include "mailbox_queue_simple.h" +#include "mailbox_queue_revolving.h" +#include <library/cpp/actors/util/unordered_cache.h> +#include <library/cpp/threading/queue/mpsc_htswap.h> +#include <library/cpp/threading/queue/mpsc_read_as_filled.h> +#include <util/generic/hash.h> +#include <util/system/hp_timer.h> +#include <util/generic/ptr.h> +// TODO: clean all broken arcadia atomic stuff and replace with intrinsics + +namespace NActors { + class IActor; + class IExecutorPool; + + const ui64 ARRAY_CAPACITY = 8; + + // structure of hint: + // 1 bit: is service or direct hint + // 2 bits: pool index + // 17 bits: line + // 12 bits: index of mailbox inside of line + + struct TMailboxHeader { + struct TMailboxActorPack { + enum EType { + Simple = 0, + Array = 1, + Map = 2 + }; + }; + + using TActorMap = THashMap<ui64, IActor*>; + + struct TExecutionState { + enum EState { + // normal states + Inactive = 0, + Scheduled = 1, + Leaving = 2, + Executing = 3, + LeavingMarked = 4, + // states for free mailboxes (they can still be scheduled so we need duplicates) + Free = 5, + FreeScheduled = 6, + FreeLeaving = 7, + FreeExecuting = 8, + FreeLeavingMarked = 9, + }; + }; + + volatile ui32 ExecutionState; + ui32 Reserved : 4; // never changes, always zero + ui32 Type : 4; // never changes + ui32 ActorPack : 2; + ui32 Knobs : 22; + + struct TActorPair { + IActor *Actor; + ui64 ActorId; + }; + + struct alignas(64) TActorArray { + TActorPair Actors[ARRAY_CAPACITY]; + }; + + union TActorsInfo { + TActorPair Simple; + struct { + TActorArray* ActorsArray; + ui64 ActorsCount; + } Array; + struct { + TActorMap* ActorsMap; + } Map; + } ActorsInfo; + + TMailboxHeader(TMailboxType::EType type); + ~TMailboxHeader(); + + bool CleanupActors(); + + // this interface is used exclusively by executor thread, so implementation is there + + bool MarkForSchedule(); // we put something in queue, check should we schedule? + + bool LockForExecution(); // we got activation, try to lock mailbox + bool LockFromFree(); // try to claim mailbox from recycled (could fail if other thread process garbage) + + void UnlockFromExecution1(); // prepare for releasing lock + bool UnlockFromExecution2(bool wouldReschedule); // proceed with releasing lock + bool UnlockAsFree(bool wouldReschedule); // preceed with releasing lock, but mark as free one + + bool IsEmpty() const noexcept { + return (ActorPack == TMailboxActorPack::Simple && ActorsInfo.Simple.ActorId == 0); + } + + template<typename T> + void ForEach(T&& callback) noexcept { + switch (ActorPack) { + case TMailboxActorPack::Simple: + if (ActorsInfo.Simple.ActorId) { + callback(ActorsInfo.Simple.ActorId, ActorsInfo.Simple.Actor); + } + break; + + case TMailboxActorPack::Map: + for (const auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) { + callback(actorId, actor); + } + break; + + case TMailboxActorPack::Array: + for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) { + auto& row = ActorsInfo.Array.ActorsArray->Actors[i]; + callback(row.ActorId, row.Actor); + } + break; + } + } + + IActor* FindActor(ui64 localActorId) noexcept { + switch (ActorPack) { + case TMailboxActorPack::Simple: { + if (ActorsInfo.Simple.ActorId == localActorId) + return ActorsInfo.Simple.Actor; + break; + } + case TMailboxActorPack::Map: { + TActorMap::iterator it = ActorsInfo.Map.ActorsMap->find(localActorId); + if (it != ActorsInfo.Map.ActorsMap->end()) + return it->second; + break; + } + case TMailboxActorPack::Array: { + for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) { + if (ActorsInfo.Array.ActorsArray->Actors[i].ActorId == localActorId) { + return ActorsInfo.Array.ActorsArray->Actors[i].Actor; + } + } + break; + } + default: + Y_FAIL(); + } + return nullptr; + } + + void AttachActor(ui64 localActorId, IActor* actor) noexcept { + switch (ActorPack) { + case TMailboxActorPack::Simple: { + if (ActorsInfo.Simple.ActorId == 0) { + ActorsInfo.Simple.ActorId = localActorId; + ActorsInfo.Simple.Actor = actor; + return; + } else { + auto ar = new TActorArray; + ar->Actors[0] = ActorsInfo.Simple; + ar->Actors[1] = TActorPair{actor, localActorId}; + ActorsInfo.Array.ActorsCount = 2; + ActorPack = TMailboxActorPack::Array; + ActorsInfo.Array.ActorsArray = ar; + } + break; + } + case TMailboxActorPack::Map: { + ActorsInfo.Map.ActorsMap->insert(TActorMap::value_type(localActorId, actor)); + break; + } + case TMailboxActorPack::Array: { + if (ActorsInfo.Array.ActorsCount == ARRAY_CAPACITY) { + TActorMap* mp = new TActorMap(); + for (ui64 i = 0; i < ARRAY_CAPACITY; ++i) { + mp->emplace(ActorsInfo.Array.ActorsArray->Actors[i].ActorId, ActorsInfo.Array.ActorsArray->Actors[i].Actor); + } + mp->emplace(localActorId, actor); + ActorPack = TMailboxActorPack::Map; + ActorsInfo.Array.ActorsCount = 0; + delete ActorsInfo.Array.ActorsArray; + ActorsInfo.Map.ActorsMap = mp; + } else { + ActorsInfo.Array.ActorsArray->Actors[ActorsInfo.Array.ActorsCount++] = TActorPair{actor, localActorId}; + } + break; + } + default: + Y_FAIL(); + } + } + + IActor* DetachActor(ui64 localActorId) noexcept { + Y_VERIFY_DEBUG(FindActor(localActorId) != nullptr); + + IActor* actorToDestruct = nullptr; + + switch (ActorPack) { + case TMailboxActorPack::Simple: { + Y_VERIFY(ActorsInfo.Simple.ActorId == localActorId); + actorToDestruct = ActorsInfo.Simple.Actor; + + ActorsInfo.Simple.ActorId = 0; + ActorsInfo.Simple.Actor = nullptr; + break; + } + case TMailboxActorPack::Map: { + TActorMap::iterator it = ActorsInfo.Map.ActorsMap->find(localActorId); + Y_VERIFY(it != ActorsInfo.Map.ActorsMap->end()); + + actorToDestruct = it->second; + ActorsInfo.Map.ActorsMap->erase(it); + + if (ActorsInfo.Map.ActorsMap->size() == ARRAY_CAPACITY) { + auto ar = new TActorArray; + ui64 i = 0; + for (auto& [actorId, actor] : *ActorsInfo.Map.ActorsMap) { + ar->Actors[i++] = TActorPair{actor, actorId}; + } + delete ActorsInfo.Map.ActorsMap; + ActorPack = TMailboxActorPack::Array; + ActorsInfo.Array.ActorsArray = ar; + ActorsInfo.Array.ActorsCount = ARRAY_CAPACITY; + } + break; + } + case TMailboxActorPack::Array: { + bool found = false; + for (ui64 i = 0; i < ActorsInfo.Array.ActorsCount; ++i) { + if (ActorsInfo.Array.ActorsArray->Actors[i].ActorId == localActorId) { + found = true; + actorToDestruct = ActorsInfo.Array.ActorsArray->Actors[i].Actor; + ActorsInfo.Array.ActorsArray->Actors[i] = ActorsInfo.Array.ActorsArray->Actors[ActorsInfo.Array.ActorsCount - 1]; + ActorsInfo.Array.ActorsCount -= 1; + break; + } + } + Y_VERIFY(found); + + if (ActorsInfo.Array.ActorsCount == 1) { + const TActorPair Actor = ActorsInfo.Array.ActorsArray->Actors[0]; + delete ActorsInfo.Array.ActorsArray; + ActorPack = TMailboxActorPack::Simple; + ActorsInfo.Simple = Actor; + } + break; + } + } + + return actorToDestruct; + } + + std::pair<ui32, ui32> CountMailboxEvents(ui64 localActorId, ui32 maxTraverse); + }; + + class TMailboxTable : TNonCopyable { + private: + struct TMailboxLineHeader { + const TMailboxType::EType MailboxType; + const ui32 Index; + // some more stuff in first cache line, then goes mailboxes + ui8 Padding[52]; + + TMailboxLineHeader(TMailboxType::EType type, ui32 index) + : MailboxType(type) + , Index(index) + { + } + }; + static_assert(sizeof(TMailboxLineHeader) <= 64, "expect sizeof(TMailboxLineHeader) <= 64"); + + constexpr static ui64 MaxLines = 131000; // somewhat less then 2^17. + constexpr static ui64 LineSize = 262144; // 64 * 2^12. + + TAtomic LastAllocatedLine; + TAtomic AllocatedMailboxCount; + + typedef TUnorderedCache<ui32, 512, 4> TMailboxCache; + TMailboxCache MailboxCacheSimple; + TAtomic CachedSimpleMailboxes; + TMailboxCache MailboxCacheRevolving; + TAtomic CachedRevolvingMailboxes; + TMailboxCache MailboxCacheHTSwap; + TAtomic CachedHTSwapMailboxes; + TMailboxCache MailboxCacheReadAsFilled; + TAtomic CachedReadAsFilledMailboxes; + TMailboxCache MailboxCacheTinyReadAsFilled; + TAtomic CachedTinyReadAsFilledMailboxes; + + // and here goes large chunk of lines + // presented as array of static size to avoid sync on access + TMailboxLineHeader* volatile Lines[MaxLines]; + + ui32 AllocateNewLine(TMailboxType::EType type); + ui32 TryAllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter); + + public: + TMailboxTable(); + ~TMailboxTable(); + + bool Cleanup(); // returns true if nothing found to destruct (so nothing new is possible to be created) + + static const ui32 LineIndexShift = 12; + static const ui32 LineIndexMask = 0x1FFFFu << LineIndexShift; + static const ui32 LineHintMask = 0xFFFu; + static const ui32 PoolIndexShift = TActorId::PoolIndexShift; + static const ui32 PoolIndexMask = TActorId::PoolIndexMask; + + static ui32 LineIndex(ui32 hint) { + return ((hint & LineIndexMask) >> LineIndexShift); + } + static ui32 PoolIndex(ui32 hint) { + return TActorId::PoolIndex(hint); + } + + TMailboxHeader* Get(ui32 hint); + ui32 AllocateMailbox(TMailboxType::EType type, ui64 revolvingCounter); + void ReclaimMailbox(TMailboxType::EType type, ui32 hint, ui64 revolvingCounter); + ui64 GetAllocatedMailboxCount() const { + return RelaxedLoad(&AllocatedMailboxCount); + } + + bool SendTo(TAutoPtr<IEventHandle>& ev, IExecutorPool* executorPool); + + struct TSimpleMailbox: public TMailboxHeader { + // 4 bytes - state + // 4 bytes - knobs + // 8 bytes - actorid + // 8 bytes - actor* + TSimpleMailboxQueue<IEventHandle*, 64> Queue; // 24 + 8 bytes (body, lock) + NHPTimer::STime ScheduleMoment; + + TSimpleMailbox(); + ~TSimpleMailbox(); + + IEventHandle* Pop() { + return Queue.Pop(); + } + IEventHandle* Head() { + return Queue.Head(); + } + + static TSimpleMailbox* Get(ui32 hint, void* line) { + return (TSimpleMailbox*)((ui8*)line + hint * 64); // + } + static const TMailboxType::EType MailboxType = TMailboxType::Simple; + constexpr static ui32 AlignedSize() { + return ((sizeof(TSimpleMailbox) + 63) / 64) * 64; + } + + std::pair<ui32, ui32> CountSimpleMailboxEvents(ui64 localActorId, ui32 maxTraverse); + bool CleanupEvents(); + }; + + static_assert(sizeof(TSimpleMailbox) == 64, "expect sizeof(TSimpleMailbox) == 64"); + + struct TRevolvingMailbox: public TMailboxHeader { + // 4 bytes - state + // 4 bytes - knobs + // 8 bytes - actorid + // 8 bytes - actor* + TRevolvingMailboxQueue<IEventHandle*, 3, 128>::TReader QueueReader; // 8 * 3 + 4 * 3 + (padding): 40 bytes + // here goes next cache-line, so less writers<-> reader interference + TRevolvingMailboxQueue<IEventHandle*, 3, 128>::TWriter QueueWriter; // 8 * 3 + 4 * 3 + 8 : 48 bytes + ui32 Reserved1; + ui32 Reserved2; + NHPTimer::STime ScheduleMoment; + + TRevolvingMailbox(); + ~TRevolvingMailbox(); + + IEventHandle* Pop() { + return QueueReader.Pop(); + } + IEventHandle* Head() { + return QueueReader.Head(); + } + + static TRevolvingMailbox* Get(ui32 hint, void* line) { + return (TRevolvingMailbox*)((ui8*)line + 64 + (hint - 1) * 128); + } + + constexpr static ui64 MaxMailboxesInLine() { + return (LineSize - 64) / AlignedSize(); + } + static const TMailboxType::EType MailboxType = TMailboxType::Revolving; + constexpr static ui32 AlignedSize() { + return ((sizeof(TRevolvingMailbox) + 63) / 64) * 64; + } + + std::pair<ui32, ui32> CountRevolvingMailboxEvents(ui64 localActorId, ui32 maxTraverse); + bool CleanupEvents(); + }; + + static_assert(sizeof(TRevolvingMailbox) == 128, "expect sizeof(TRevolvingMailbox) == 128"); + + struct THTSwapMailbox: public TMailboxHeader { + using TQueueType = NThreading::THTSwapQueue<IEventHandle*>; + + TQueueType Queue; + NHPTimer::STime ScheduleMoment; + char Padding_[16]; + + THTSwapMailbox() + : TMailboxHeader(TMailboxType::HTSwap) + , ScheduleMoment(0) + { + } + + ~THTSwapMailbox() { + CleanupEvents(); + } + + IEventHandle* Pop() { + return Queue.Pop(); + } + + IEventHandle* Head() { + return Queue.Peek(); + } + + static THTSwapMailbox* Get(ui32 hint, void* line) { + return (THTSwapMailbox*)((ui8*)line + 64 + (hint - 1) * 64); + } + + constexpr static ui64 MaxMailboxesInLine() { + return (LineSize - 64) / AlignedSize(); + } + + static const TMailboxType::EType MailboxType = TMailboxType::HTSwap; + + constexpr static ui32 AlignedSize() { + return ((sizeof(THTSwapMailbox) + 63) / 64) * 64; + } + + bool CleanupEvents() { + const bool done = (Queue.Peek() == nullptr); + while (IEventHandle* ev = Queue.Pop()) + delete ev; + return done; + } + }; + + static_assert(sizeof(THTSwapMailbox) == 64, + "expect sizeof(THTSwapMailbox) == 64"); + + struct TReadAsFilledMailbox: public TMailboxHeader { + using TQueueType = NThreading::TReadAsFilledQueue<IEventHandle>; + + TQueueType Queue; + NHPTimer::STime ScheduleMoment; + char Padding_[8]; + + TReadAsFilledMailbox() + : TMailboxHeader(TMailboxType::ReadAsFilled) + , ScheduleMoment(0) + { + } + + ~TReadAsFilledMailbox() { + CleanupEvents(); + } + + IEventHandle* Pop() { + return Queue.Pop(); + } + + IEventHandle* Head() { + return Queue.Peek(); + } + + static TReadAsFilledMailbox* Get(ui32 hint, void* line) { + return (TReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192); + } + + constexpr static ui64 MaxMailboxesInLine() { + return (LineSize - 64) / AlignedSize(); + } + + static const TMailboxType::EType MailboxType = + TMailboxType::ReadAsFilled; + + constexpr static ui32 AlignedSize() { + return ((sizeof(TReadAsFilledMailbox) + 63) / 64) * 64; + } + + bool CleanupEvents() { + const bool done = (Queue.Peek() == nullptr); + while (IEventHandle* ev = Queue.Pop()) + delete ev; + return done; + } + }; + + static_assert(sizeof(TReadAsFilledMailbox) == 192, + "expect sizeof(TReadAsFilledMailbox) == 192"); + + struct TTinyReadAsFilledMailbox: public TMailboxHeader { + using TQueueType = NThreading::TReadAsFilledQueue< + IEventHandle, + NThreading::TRaFQueueBunchSize<4>>; + + TQueueType Queue; + NHPTimer::STime ScheduleMoment; + char Padding_[8]; + + TTinyReadAsFilledMailbox() + : TMailboxHeader(TMailboxType::TinyReadAsFilled) + , ScheduleMoment(0) + { + } + + ~TTinyReadAsFilledMailbox() { + CleanupEvents(); + } + + IEventHandle* Pop() { + return Queue.Pop(); + } + + IEventHandle* Head() { + return Queue.Peek(); + } + + static TTinyReadAsFilledMailbox* Get(ui32 hint, void* line) { + return (TTinyReadAsFilledMailbox*)((ui8*)line + 64 + (hint - 1) * 192); + } + + constexpr static ui64 MaxMailboxesInLine() { + return (LineSize - 64) / AlignedSize(); + } + + static const TMailboxType::EType MailboxType = + TMailboxType::TinyReadAsFilled; + + constexpr static ui32 AlignedSize() { + return ((sizeof(TTinyReadAsFilledMailbox) + 63) / 64) * 64; + } + + bool CleanupEvents() { + const bool done = (Queue.Peek() == nullptr); + while (IEventHandle* ev = Queue.Pop()) + delete ev; + return done; + } + }; + + static_assert(sizeof(TTinyReadAsFilledMailbox) == 192, + "expect sizeof(TTinyReadAsFilledMailbox) == 192"); + }; +} diff --git a/library/cpp/actors/core/mailbox_queue_revolving.h b/library/cpp/actors/core/mailbox_queue_revolving.h new file mode 100644 index 0000000000..b0e78a18db --- /dev/null +++ b/library/cpp/actors/core/mailbox_queue_revolving.h @@ -0,0 +1,214 @@ +#pragma once + +#include "defs.h" +#include <library/cpp/actors/util/queue_chunk.h> + +namespace NActors { + // add some concurrency to basic queue to avoid hangs under contention (we pay with memory, so use only when really expect contention) + // ordering: every completed push guarantied to seen before any not-yet-initiated push. parallel pushes could reorder (and that is natural for concurrent queues). + // try to place reader/writer on different cache-lines to avoid congestion b/w reader and writers. + // if strict ordering does not matter - look at TManyOneQueue. + + template <typename T, ui32 TWriteConcurrency = 3, ui32 TSize = 128> + class TRevolvingMailboxQueue { + static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value"); + + struct TValTagPair { + volatile T Value; + volatile ui64 Tag; + }; + + typedef TQueueChunk<TValTagPair, TSize> TChunk; + + static_assert(sizeof(TAtomic) == sizeof(TChunk*), "expect sizeof(TAtomic) == sizeof(TChunk*)"); + static_assert(sizeof(TAtomic) == sizeof(ui64), "expect sizeof(TAtomic) == sizeof(ui64)"); + + public: + class TWriter; + + class TReader { + TChunk* ReadFrom[TWriteConcurrency]; + ui32 ReadPosition[TWriteConcurrency]; + + friend class TRevolvingMailboxQueue<T, TWriteConcurrency, TSize>::TWriter; // for access to ReadFrom in constructor + + bool ChunkHead(ui32 idx, ui64* tag, T* value) { + TChunk* head = ReadFrom[idx]; + const ui32 pos = ReadPosition[idx]; + if (pos != TChunk::EntriesCount) { + if (const T xval = AtomicLoad(&head->Entries[pos].Value)) { + const ui64 xtag = head->Entries[pos].Tag; + if (xtag < *tag) { + *value = xval; + *tag = xtag; + return true; + } + } + } else if (TChunk* next = AtomicLoad(&head->Next)) { + ReadFrom[idx] = next; + delete head; + ReadPosition[idx] = 0; + return ChunkHead(idx, tag, value); + } + + return false; + } + + T Head(bool pop) { + ui64 tag = Max<ui64>(); + T ret = T{}; + ui32 idx = 0; + + for (ui32 i = 0; i < TWriteConcurrency; ++i) + if (ChunkHead(i, &tag, &ret)) + idx = i; + + // w/o second pass we could reorder updates with 'already scanned' range + if (ret) { + for (ui32 i = 0; i < TWriteConcurrency; ++i) + if (ChunkHead(i, &tag, &ret)) + idx = i; + } + + if (pop && ret) + ++ReadPosition[idx]; + + return ret; + } + + public: + TReader() { + for (ui32 i = 0; i != TWriteConcurrency; ++i) { + ReadFrom[i] = new TChunk(); + ReadPosition[i] = 0; + } + } + + ~TReader() { + Y_VERIFY_DEBUG(Head() == 0); + for (ui32 i = 0; i < TWriteConcurrency; ++i) + delete ReadFrom[i]; + } + + T Pop() { + return Head(true); + } + + T Head() { + return Head(false); + } + + class TReadIterator { + TChunk* ReadFrom[TWriteConcurrency]; + ui32 ReadPosition[TWriteConcurrency]; + + bool ChunkHead(ui32 idx, ui64* tag, T* value) { + TChunk* head = ReadFrom[idx]; + const ui32 pos = ReadPosition[idx]; + if (pos != TChunk::EntriesCount) { + if (const T xval = AtomicLoad(&head->Entries[pos].Value)) { + const ui64 xtag = head->Entries[pos].Tag; + if (xtag < *tag) { + *value = xval; + *tag = xtag; + return true; + } + } + } else if (TChunk* next = AtomicLoad(&head->Next)) { + ReadFrom[idx] = next; + ReadPosition[idx] = 0; + return ChunkHead(idx, tag, value); + } + + return false; + } + + public: + TReadIterator(TChunk* const* readFrom, const ui32* readPosition) { + memcpy(ReadFrom, readFrom, TWriteConcurrency * sizeof(TChunk*)); + memcpy(ReadPosition, readPosition, TWriteConcurrency * sizeof(ui32)); + } + + T Next() { + ui64 tag = Max<ui64>(); + T ret = T{}; + ui32 idx = 0; + + for (ui32 i = 0; i < TWriteConcurrency; ++i) + if (ChunkHead(i, &tag, &ret)) + idx = i; + + // w/o second pass we could reorder updates with 'already scanned' range + if (ret) { + for (ui32 i = 0; i < TWriteConcurrency; ++i) + if (ChunkHead(i, &tag, &ret)) + idx = i; + } + + if (ret) + ++ReadPosition[idx]; + + return ret; + } + }; + + TReadIterator Iterator() const { + return TReadIterator(ReadFrom, ReadPosition); + } + }; + + class TWriter { + TChunk* volatile WriteTo[TWriteConcurrency]; + volatile ui64 Tag; + ui32 WritePosition[TWriteConcurrency]; + + public: + TWriter(const TReader& reader) + : Tag(0) + { + for (ui32 i = 0; i != TWriteConcurrency; ++i) { + WriteTo[i] = reader.ReadFrom[i]; + WritePosition[i] = 0; + } + } + + bool TryPush(T x) { + Y_VERIFY(x != 0); + + for (ui32 i = 0; i != TWriteConcurrency; ++i) { + if (RelaxedLoad(&WriteTo[i]) != nullptr) { + if (TChunk* writeTo = AtomicSwap(&WriteTo[i], nullptr)) { + const ui64 nextTag = AtomicIncrement(Tag); + Y_VERIFY_DEBUG(nextTag < Max<ui64>()); + const ui32 writePosition = WritePosition[i]; + if (writePosition != TChunk::EntriesCount) { + writeTo->Entries[writePosition].Tag = nextTag; + AtomicStore(&writeTo->Entries[writePosition].Value, x); + ++WritePosition[i]; + } else { + TChunk* next = new TChunk(); + next->Entries[0].Tag = nextTag; + next->Entries[0].Value = x; + AtomicStore(&writeTo->Next, next); + writeTo = next; + WritePosition[i] = 1; + } + AtomicStore(WriteTo + i, writeTo); + return true; + } + } + } + return false; + } + + ui32 Push(T x) { + ui32 spins = 0; + while (!TryPush(x)) { + ++spins; + SpinLockPause(); + } + return spins; + } + }; + }; +} diff --git a/library/cpp/actors/core/mailbox_queue_simple.h b/library/cpp/actors/core/mailbox_queue_simple.h new file mode 100644 index 0000000000..2e44c21adb --- /dev/null +++ b/library/cpp/actors/core/mailbox_queue_simple.h @@ -0,0 +1,34 @@ +#pragma once + +#include "defs.h" +#include <library/cpp/actors/util/ticket_lock.h> +#include <library/cpp/actors/util/queue_oneone_inplace.h> + +namespace NActors { + // dead-simple one-one queue, based on serializability guaranties of x64 and ticket lock to ensure writer unicity. + template <typename T, ui32 TSize> + class TSimpleMailboxQueue { + TOneOneQueueInplace<T, TSize> Queue; + TTicketLock Lock; + + public: + ui32 Push(T x) noexcept { + const ui32 spins = Lock.Acquire(); + Queue.Push(x); + Lock.Release(); + return spins; + } + + T Head() { + return Queue.Head(); + } + + T Pop() { + return Queue.Pop(); + } + + typename TOneOneQueueInplace<T, TSize>::TReadIterator ReadIterator() { + return Queue.Iterator(); + } + }; +} diff --git a/library/cpp/actors/core/memory_track.cpp b/library/cpp/actors/core/memory_track.cpp new file mode 100644 index 0000000000..5f422116be --- /dev/null +++ b/library/cpp/actors/core/memory_track.cpp @@ -0,0 +1,38 @@ +#include "memory_track.h" +#include "memory_tracker.h" + +namespace NActors { +namespace NMemory { + +namespace NPrivate { + +TThreadLocalInfo::TThreadLocalInfo() + : Metrics(TMemoryTracker::Instance()->GetCount()) +{ + TMemoryTracker::Instance()->OnCreateThread(this); +} + +TThreadLocalInfo::~TThreadLocalInfo() { + TMemoryTracker::Instance()->OnDestroyThread(this); +} + +TMetric* TThreadLocalInfo::GetMetric(size_t index) { + if (Y_UNLIKELY(index >= Metrics.size())) { + return &Null; + } + return &Metrics[index]; +} + +const std::vector<TMetric>& TThreadLocalInfo::GetMetrics() const { + return Metrics; +} + +size_t TBaseLabel::RegisterStaticMemoryLabel(const char* name, bool hasSensor) { + return TMemoryTracker::Instance()->RegisterStaticMemoryLabel(name, hasSensor); +} + +} + +} +} + diff --git a/library/cpp/actors/core/memory_track.h b/library/cpp/actors/core/memory_track.h new file mode 100644 index 0000000000..6035333eeb --- /dev/null +++ b/library/cpp/actors/core/memory_track.h @@ -0,0 +1,293 @@ +#pragma once + +#include <vector> + +#include <util/system/type_name.h> +#include <util/thread/singleton.h> + +#define ENABLE_MEMORY_TRACKING + +namespace NActors { +namespace NMemory { + +namespace NPrivate { + +class TMetric { + std::atomic<ssize_t> Memory; + std::atomic<ssize_t> Count; + + void Copy(const TMetric& other) { + Memory.store(other.GetMemory(), std::memory_order_relaxed); + Count.store(other.GetCount(), std::memory_order_relaxed); + } + +public: + TMetric() + : Memory(0) + , Count(0) + {} + + inline TMetric(const TMetric& other) { + Copy(other); + } + + inline TMetric(TMetric&& other) { + Copy(other); + } + + inline TMetric& operator=(const TMetric& other) { + Copy(other); + return *this; + } + + inline TMetric& operator=(TMetric&& other) { + Copy(other); + return *this; + } + + inline ssize_t GetMemory() const { + return Memory.load(std::memory_order_relaxed); + } + inline void SetMemory(ssize_t value) { + Memory.store(value, std::memory_order_relaxed); + } + + inline ssize_t GetCount() const { + return Count.load(std::memory_order_relaxed); + } + inline void SetCount(ssize_t value) { + Count.store(value, std::memory_order_relaxed); + } + + inline void operator+=(const TMetric& other) { + SetMemory(GetMemory() + other.GetMemory()); + SetCount(GetCount() + other.GetCount()); + } + + inline void CalculatePeak(const TMetric& other) { + SetMemory(Max(GetMemory(), other.GetMemory())); + SetCount(Max(GetCount(), other.GetCount())); + } + + inline void Add(size_t size) { + SetMemory(GetMemory() + size); + SetCount(GetCount() + 1); + } + + inline void Sub(size_t size) { + SetMemory(GetMemory() - size); + SetCount(GetCount() - 1); + } +}; + + +class TThreadLocalInfo { +public: + TThreadLocalInfo(); + ~TThreadLocalInfo(); + + TMetric* GetMetric(size_t index); + const std::vector<TMetric>& GetMetrics() const; + +private: + std::vector<TMetric> Metrics; + + inline static TMetric Null = {}; +}; + + +class TBaseLabel { +protected: + static size_t RegisterStaticMemoryLabel(const char* name, bool hasSensor); + + inline static TMetric* GetLocalMetric(size_t index) { + return FastTlsSingleton<TThreadLocalInfo>()->GetMetric(index); + } +}; + + +template <const char* Name> +class TNameLabel + : TBaseLabel +{ +public: + static void Add(size_t size) { +#if defined(ENABLE_MEMORY_TRACKING) + Y_UNUSED(MetricInit); + + if (Y_UNLIKELY(!Metric)) { + Metric = GetLocalMetric(Index); + } + + Metric->Add(size); +#else + Y_UNUSED(size); +#endif + } + + static void Sub(size_t size) { +#if defined(ENABLE_MEMORY_TRACKING) + Y_UNUSED(MetricInit); + + if (Y_UNLIKELY(!Metric)) { + Metric = GetLocalMetric(Index); + } + + Metric->Sub(size); +#else + Y_UNUSED(size); +#endif + } + +private: +#if defined(ENABLE_MEMORY_TRACKING) + inline static size_t Index = Max<size_t>(); + inline static struct TMetricInit { + TMetricInit() { + Index = RegisterStaticMemoryLabel(Name, true); + } + } MetricInit; + + inline static thread_local TMetric* Metric = nullptr; +#endif +}; + + +template <typename TType> +class TTypeLabel + : TBaseLabel +{ +public: + static void Add(size_t size) { +#if defined(ENABLE_MEMORY_TRACKING) + Y_UNUSED(MetricInit); + + if (Y_UNLIKELY(!Metric)) { + Metric = GetLocalMetric(Index); + } + + Metric->Add(size); +#else + Y_UNUSED(size); +#endif + } + + static void Sub(size_t size) { +#if defined(ENABLE_MEMORY_TRACKING) + Y_UNUSED(MetricInit); + + if (Y_UNLIKELY(!Metric)) { + Metric = GetLocalMetric(Index); + } + + Metric->Sub(size); +#else + Y_UNUSED(size); +#endif + } + +private: +#if defined(ENABLE_MEMORY_TRACKING) + inline static size_t Index = Max<size_t>(); + inline static struct TMetricInit { + TMetricInit() { + Index = RegisterStaticMemoryLabel(TypeName<TType>().c_str(), false); + } + } MetricInit; + + inline static thread_local TMetric* Metric = nullptr; +#endif +}; + + +template <typename T> +struct TTrackHelper { +#if defined(ENABLE_MEMORY_TRACKING) + void* operator new(size_t size) { + T::Add(size); + return malloc(size); + } + + void* operator new[](size_t size) { + T::Add(size); + return malloc(size); + } + + void operator delete(void* ptr, size_t size) { + T::Sub(size); + free(ptr); + } + + void operator delete[](void* ptr, size_t size) { + T::Sub(size); + free(ptr); + } +#endif +}; + +template <typename TType, typename T> +struct TAllocHelper { + typedef size_t size_type; + typedef TType value_type; + typedef TType* pointer; + typedef const TType* const_pointer; + + struct propagate_on_container_copy_assignment : public std::false_type {}; + struct propagate_on_container_move_assignment : public std::false_type {}; + struct propagate_on_container_swap : public std::false_type {}; + + pointer allocate(size_type n, const void* hint = nullptr) { + Y_UNUSED(hint); + auto size = n * sizeof(TType); + T::Add(size); + return (pointer)malloc(size); + } + + void deallocate(pointer ptr, size_t n) { + auto size = n * sizeof(TType); + T::Sub(size); + free((void*)ptr); + } +}; + +} // NPrivate + + +template <const char* Name> +using TLabel = NPrivate::TNameLabel<Name>; + +template <typename TType, const char* Name = nullptr> +struct TTrack + : public NPrivate::TTrackHelper<NPrivate::TNameLabel<Name>> +{ +}; + +template <typename TType> +struct TTrack<TType, nullptr> + : public NPrivate::TTrackHelper<NPrivate::TTypeLabel<TType>> +{ +}; + +template <typename TType, const char* Name = nullptr> +struct TAlloc + : public NPrivate::TAllocHelper<TType, NPrivate::TNameLabel<Name>> +{ + template<typename U> + struct rebind { + typedef TAlloc<U, Name> other; + }; +}; + +template <typename TType> +struct TAlloc<TType, nullptr> + : public NPrivate::TAllocHelper<TType, NPrivate::TTypeLabel<TType>> +{ + template<typename U> + struct rebind { + typedef TAlloc<U> other; + }; +}; + +} +} + diff --git a/library/cpp/actors/core/memory_tracker.cpp b/library/cpp/actors/core/memory_tracker.cpp new file mode 100644 index 0000000000..8a12452c71 --- /dev/null +++ b/library/cpp/actors/core/memory_tracker.cpp @@ -0,0 +1,103 @@ +#include "memory_tracker.h" + +#include <util/generic/xrange.h> + +namespace NActors { +namespace NMemory { + +namespace NPrivate { + +TMemoryTracker* TMemoryTracker::Instance() { + return SingletonWithPriority<TMemoryTracker, 0>(); +} + +void TMemoryTracker::Initialize() { + GlobalMetrics.resize(Indices.size()); +} + +const std::map<TString, size_t>& TMemoryTracker::GetMetricIndices() const { + return Indices; +} + +const std::unordered_set<size_t>& TMemoryTracker::GetSensors() const { + return Sensors; +} + +TString TMemoryTracker::GetName(size_t index) const { + return Names[index]; +} + +size_t TMemoryTracker::GetCount() const { + return Indices.size(); +} + +void TMemoryTracker::GatherMetrics(std::vector<TMetric>& metrics) const { + metrics.resize(0); + auto count = GetCount(); + + if (!count || GlobalMetrics.size() != count) { + return; + } + + TReadGuard guard(LockThreadInfo); + + metrics.resize(count); + for (size_t i : xrange(count)) { + metrics[i] += GlobalMetrics[i]; + } + + for (auto info : ThreadInfo) { + auto& localMetrics = info->GetMetrics(); + if (localMetrics.size() == count) { + for (size_t i : xrange(count)) { + metrics[i] += localMetrics[i]; + } + } + } +} + +size_t TMemoryTracker::RegisterStaticMemoryLabel(const char* name, bool hasSensor) { + size_t index = 0; + auto found = Indices.find(name); + if (found == Indices.end()) { + TString str(name); + auto next = Names.size(); + Indices.emplace(str, next); + Names.push_back(str); + index = next; + } else { + index = found->second; + } + + if (hasSensor) { + Sensors.emplace(index); + } + return index; +} + +void TMemoryTracker::OnCreateThread(TThreadLocalInfo* info) { + TWriteGuard guard(LockThreadInfo); + ThreadInfo.insert(info); +} + +void TMemoryTracker::OnDestroyThread(TThreadLocalInfo* info) { + TWriteGuard guard(LockThreadInfo); + + auto count = GetCount(); + if (count && GlobalMetrics.size() == count) { + const auto& localMetrics = info->GetMetrics(); + if (localMetrics.size() == count) { + for (size_t i : xrange(count)) { + GlobalMetrics[i] += localMetrics[i]; + } + } + } + + ThreadInfo.erase(info); +} + +} + +} +} + diff --git a/library/cpp/actors/core/memory_tracker.h b/library/cpp/actors/core/memory_tracker.h new file mode 100644 index 0000000000..e74508191b --- /dev/null +++ b/library/cpp/actors/core/memory_tracker.h @@ -0,0 +1,53 @@ +#pragma once + +#include "memory_track.h" + +#include <map> +#include <unordered_map> +#include <unordered_set> + +#include <util/system/rwlock.h> + +namespace NActors { +namespace NMemory { + +namespace NPrivate { + +class TMemoryTracker { +public: + static TMemoryTracker* Instance(); + + void Initialize(); + + const std::map<TString, size_t>& GetMetricIndices() const; + const std::unordered_set<size_t>& GetSensors() const; + TString GetName(size_t index) const; + size_t GetCount() const; + + void GatherMetrics(std::vector<TMetric>& metrics) const; + +private: + size_t RegisterStaticMemoryLabel(const char* name, bool hasSensor); + + void OnCreateThread(TThreadLocalInfo* info); + void OnDestroyThread(TThreadLocalInfo* info); + +private: + std::map<TString, size_t> Indices; + std::vector<TString> Names; + + std::vector<TMetric> GlobalMetrics; + + std::unordered_set<size_t> Sensors; + + std::unordered_set<TThreadLocalInfo*> ThreadInfo; + TRWMutex LockThreadInfo; + + friend class TThreadLocalInfo; + friend class TBaseLabel; +}; + +} + +} +} diff --git a/library/cpp/actors/core/memory_tracker_ut.cpp b/library/cpp/actors/core/memory_tracker_ut.cpp new file mode 100644 index 0000000000..d168214da6 --- /dev/null +++ b/library/cpp/actors/core/memory_tracker_ut.cpp @@ -0,0 +1,262 @@ +#include "memory_tracker.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/system/hp_timer.h> +#include <util/system/thread.h> + +namespace NActors { +namespace NMemory { + +Y_UNIT_TEST_SUITE(TMemoryTrackerTest) { + +#if defined(ENABLE_MEMORY_TRACKING) + +using namespace NPrivate; + +size_t FindLabelIndex(const char* label) { + auto indices = TMemoryTracker::Instance()->GetMetricIndices(); + auto it = indices.find(label); + UNIT_ASSERT(it != indices.end()); + return it->second; +} + + +struct TTypeLabeled + : public NActors::NMemory::TTrack<TTypeLabeled> +{ + char payload[16]; +}; + +static constexpr char NamedLabel[] = "NamedLabel"; + +struct TNameLabeled + : public NActors::NMemory::TTrack<TNameLabeled, NamedLabel> +{ + char payload[32]; +}; + +Y_UNIT_TEST(Gathering) +{ + TMemoryTracker::Instance()->Initialize(); + + auto* typed = new TTypeLabeled; + auto* typedArray = new TTypeLabeled[3]; + + auto* named = new TNameLabeled; + auto* namedArray = new TNameLabeled[5]; + NActors::NMemory::TLabel<NamedLabel>::Add(100); + + std::vector<TMetric> metrics; + TMemoryTracker::Instance()->GatherMetrics(metrics); + + auto typeIndex = FindLabelIndex(TypeName<TTypeLabeled>().c_str()); + UNIT_ASSERT(typeIndex < metrics.size()); + UNIT_ASSERT(metrics[typeIndex].GetMemory() == sizeof(TTypeLabeled) * 4 + sizeof(size_t)); + UNIT_ASSERT(metrics[typeIndex].GetCount() == 2); + + auto nameIndex = FindLabelIndex(NamedLabel); + UNIT_ASSERT(nameIndex < metrics.size()); + UNIT_ASSERT(metrics[nameIndex].GetMemory() == sizeof(TNameLabeled) * 6 + sizeof(size_t) + 100); + UNIT_ASSERT(metrics[nameIndex].GetCount() == 3); + + NActors::NMemory::TLabel<NamedLabel>::Sub(100); + delete [] namedArray; + delete named; + + delete [] typedArray; + delete typed; + + TMemoryTracker::Instance()->GatherMetrics(metrics); + + UNIT_ASSERT(metrics[typeIndex].GetMemory() == 0); + UNIT_ASSERT(metrics[typeIndex].GetCount() == 0); + + UNIT_ASSERT(metrics[nameIndex].GetMemory() == 0); + UNIT_ASSERT(metrics[nameIndex].GetCount() == 0); +} + + +static constexpr char InContainerLabel[] = "InContainerLabel"; + +struct TInContainer { + char payload[16]; +}; + +Y_UNIT_TEST(Containers) { + TMemoryTracker::Instance()->Initialize(); + + std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer>> vecT; + vecT.resize(5); + + std::vector<TInContainer, NActors::NMemory::TAlloc<TInContainer, InContainerLabel>> vecN; + vecN.resize(7); + + using TKey = int; + + std::map<TKey, TInContainer, std::less<TKey>, + NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> mapT; + mapT.emplace(0, TInContainer()); + mapT.emplace(1, TInContainer()); + + std::map<TKey, TInContainer, std::less<TKey>, + NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> mapN; + mapN.emplace(0, TInContainer()); + + std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>, + NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>>> umapT; + umapT.emplace(0, TInContainer()); + + std::unordered_map<TKey, TInContainer, std::hash<TKey>, std::equal_to<TKey>, + NActors::NMemory::TAlloc<std::pair<const TKey, TInContainer>, InContainerLabel>> umapN; + umapN.emplace(0, TInContainer()); + umapN.emplace(1, TInContainer()); + + std::vector<TMetric> metrics; + TMemoryTracker::Instance()->GatherMetrics(metrics); + + auto indices = TMemoryTracker::Instance()->GetMetricIndices(); + for (auto& [name, index] : indices) { + Cerr << "---- " << name + << ": memory = " << metrics[index].GetMemory() + << ", count = " << metrics[index].GetCount() << Endl; + } + + auto vecTIndex = FindLabelIndex(TypeName<TInContainer>().c_str()); + UNIT_ASSERT(metrics[vecTIndex].GetMemory() >= ssize_t(sizeof(TInContainer) * 5)); + UNIT_ASSERT(metrics[vecTIndex].GetCount() == 1); + + auto labelIndex = FindLabelIndex(InContainerLabel); + UNIT_ASSERT(metrics[labelIndex].GetCount() == 5); + UNIT_ASSERT(metrics[labelIndex].GetMemory() >= ssize_t( + sizeof(TInContainer) * 7 + + sizeof(decltype(mapN)::value_type) + + sizeof(decltype(umapN)::value_type) * 2)); +} + + +static constexpr char InThreadLabel[] = "InThreadLabel"; + +struct TInThread + : public NActors::NMemory::TTrack<TInThread, InThreadLabel> +{ + char payload[16]; +}; + +void* ThreadProc(void*) { + return new TInThread; +} + +Y_UNIT_TEST(Threads) { + TMemoryTracker::Instance()->Initialize(); + + auto index = FindLabelIndex(InThreadLabel); + + auto* object1 = new TInThread; + + std::vector<TMetric> metrics; + TMemoryTracker::Instance()->GatherMetrics(metrics); + UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread)); + UNIT_ASSERT(metrics[index].GetCount() == 1); + + TThread thread(&ThreadProc, nullptr); + thread.Start(); + auto* object2 = static_cast<TInThread*>(thread.Join()); + + TMemoryTracker::Instance()->GatherMetrics(metrics); + UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread) * 2); + UNIT_ASSERT(metrics[index].GetCount() == 2); + + delete object2; + + TMemoryTracker::Instance()->GatherMetrics(metrics); + UNIT_ASSERT(metrics[index].GetMemory() == sizeof(TInThread)); + UNIT_ASSERT(metrics[index].GetCount() == 1); + + delete object1; +} + + +struct TNotTracked { + char payload[16]; +}; + +struct TTracked + : public NActors::NMemory::TTrack<TTracked> +{ + char payload[16]; +}; + +template <typename T> +double MeasureAllocations() { + constexpr size_t objectsCount = 4 << 20; + + std::vector<T*> objects; + objects.resize(objectsCount); + + THPTimer timer; + + for (size_t i = 0; i < objectsCount; ++i) { + objects[i] = new T; + } + + for (size_t i = 0; i < objectsCount; ++i) { + delete objects[i]; + } + + auto seconds = timer.Passed(); + Cerr << "---- objects: " << objectsCount << ", time: " << seconds << Endl; + return seconds; +} + +Y_UNIT_TEST(Performance) { + TMemoryTracker::Instance()->Initialize(); + + constexpr size_t Runs = 16; + + Cerr << "---- warmup" << Endl; + MeasureAllocations<TNotTracked>(); + MeasureAllocations<TTracked>(); + + std::vector<double> noTrack; + std::vector<double> track; + + for (size_t run = 0; run < Runs; ++run) { + Cerr << "---- no track" << Endl; + auto time = MeasureAllocations<TNotTracked>(); + noTrack.push_back(time); + + Cerr << "---- track" << Endl; + time = MeasureAllocations<TTracked>(); + track.push_back(time); + } + + double meanNoTrack = 0, stddevNoTrack = 0; + double meanTrack = 0, stddevTrack = 0; + for (size_t i = 0; i < Runs; ++i) { + meanNoTrack += noTrack[i]; + meanTrack += track[i]; + } + meanNoTrack /= Runs; + meanTrack /= Runs; + + auto sqr = [](double val) { return val * val; }; + + for (size_t i = 0; i < Runs; ++i) { + stddevNoTrack += sqr(noTrack[i] - meanNoTrack); + stddevTrack += sqr(track[i] - meanTrack); + } + stddevNoTrack = sqrt(stddevNoTrack / (Runs - 1)); + stddevTrack = sqrt(stddevTrack / (Runs - 1)); + + Cerr << "---- no track - mean: " << meanNoTrack << ", stddev: " << stddevNoTrack << Endl; + Cerr << "---- track - mean: " << meanTrack << ", stddev: " << stddevTrack << Endl; + Cerr << "---- tracking is slower by " << int((meanTrack / meanNoTrack - 1.0) * 100) << "%" << Endl; +} + +#endif + +} + +} +} diff --git a/library/cpp/actors/core/mon.h b/library/cpp/actors/core/mon.h new file mode 100644 index 0000000000..c450f2338e --- /dev/null +++ b/library/cpp/actors/core/mon.h @@ -0,0 +1,234 @@ +#pragma once + +#include "events.h" +#include "event_local.h" +#include <library/cpp/monlib/service/monservice.h> +#include <library/cpp/monlib/service/pages/mon_page.h> + +namespace NActors { + namespace NMon { + enum { + HttpInfo = EventSpaceBegin(NActors::TEvents::ES_MON), + HttpInfoRes, + RemoteHttpInfo, + RemoteHttpInfoRes, + RemoteJsonInfoRes, + RemoteBinaryInfoRes, + End + }; + + static_assert(End < EventSpaceEnd(NActors::TEvents::ES_MON), "expect End < EventSpaceEnd(NActors::TEvents::ES_MON)"); + + // request info from an actor in HTML format + struct TEvHttpInfo: public NActors::TEventLocal<TEvHttpInfo, HttpInfo> { + TEvHttpInfo(const NMonitoring::IMonHttpRequest& request, int subReqId = 0) + : Request(request) + , SubRequestId(subReqId) + { + } + + TEvHttpInfo(const NMonitoring::IMonHttpRequest& request, const TString& userToken) + : Request(request) + , UserToken(userToken) + , SubRequestId(0) + { + } + + const NMonitoring::IMonHttpRequest& Request; + TString UserToken; // built and serialized + // SubRequestId != 0 means that we assemble reply from multiple parts and SubRequestId contains this part id + int SubRequestId; + }; + + // base class for HTTP info response + struct IEvHttpInfoRes: public NActors::TEventLocal<IEvHttpInfoRes, HttpInfoRes> { + enum EContentType { + Html, + Custom, + }; + + IEvHttpInfoRes() { + } + + virtual ~IEvHttpInfoRes() { + } + + virtual void Output(IOutputStream& out) const = 0; + virtual EContentType GetContentType() const = 0; + }; + + // Ready to output HTML in TString + struct TEvHttpInfoRes: public IEvHttpInfoRes { + TEvHttpInfoRes(const TString& answer, int subReqId = 0, EContentType contentType = Html) + : Answer(answer) + , SubRequestId(subReqId) + , ContentType(contentType) + { + } + + void Output(IOutputStream& out) const override { + out << Answer; + } + + EContentType GetContentType() const override { + return ContentType; + } + + const TString Answer; + const int SubRequestId; + const EContentType ContentType; + }; + + struct TEvRemoteHttpInfo: public NActors::TEventBase<TEvRemoteHttpInfo, RemoteHttpInfo> { + TEvRemoteHttpInfo() { + } + + TEvRemoteHttpInfo(const TString& query) + : Query(query) + { + } + + TEvRemoteHttpInfo(const TString& query, HTTP_METHOD method) + : Query(query) + , Method(method) + { + } + + TString Query; + HTTP_METHOD Method; + + TString PathInfo() const { + const size_t pos = Query.find('?'); + return (pos == TString::npos) ? TString() : Query.substr(0, pos); + } + + TCgiParameters Cgi() const { + const size_t pos = Query.find('?'); + return TCgiParameters((pos == TString::npos) ? TString() : Query.substr(pos + 1)); + } + + TString ToStringHeader() const override { + return "TEvRemoteHttpInfo"; + } + + bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override { + return serializer->WriteString(&Query); + } + + ui32 CalculateSerializedSize() const override { + return Query.size(); + } + + bool IsSerializable() const override { + return true; + } + + static IEventBase* Load(TEventSerializedData* bufs) { + return new TEvRemoteHttpInfo(bufs->GetString()); + } + + HTTP_METHOD GetMethod() const + { + return Method; + } + }; + + struct TEvRemoteHttpInfoRes: public NActors::TEventBase<TEvRemoteHttpInfoRes, RemoteHttpInfoRes> { + TEvRemoteHttpInfoRes() { + } + + TEvRemoteHttpInfoRes(const TString& html) + : Html(html) + { + } + + TString Html; + + TString ToStringHeader() const override { + return "TEvRemoteHttpInfoRes"; + } + + bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override { + return serializer->WriteString(&Html); + } + + ui32 CalculateSerializedSize() const override { + return Html.size(); + } + + bool IsSerializable() const override { + return true; + } + + static IEventBase* Load(TEventSerializedData* bufs) { + return new TEvRemoteHttpInfoRes(bufs->GetString()); + } + }; + + struct TEvRemoteJsonInfoRes: public NActors::TEventBase<TEvRemoteJsonInfoRes, RemoteJsonInfoRes> { + TEvRemoteJsonInfoRes() { + } + + TEvRemoteJsonInfoRes(const TString& json) + : Json(json) + { + } + + TString Json; + + TString ToStringHeader() const override { + return "TEvRemoteJsonInfoRes"; + } + + bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override { + return serializer->WriteString(&Json); + } + + ui32 CalculateSerializedSize() const override { + return Json.size(); + } + + bool IsSerializable() const override { + return true; + } + + static IEventBase* Load(TEventSerializedData* bufs) { + return new TEvRemoteJsonInfoRes(bufs->GetString()); + } + }; + + struct TEvRemoteBinaryInfoRes: public NActors::TEventBase<TEvRemoteBinaryInfoRes, RemoteBinaryInfoRes> { + TEvRemoteBinaryInfoRes() { + } + + TEvRemoteBinaryInfoRes(const TString& blob) + : Blob(blob) + { + } + + TString Blob; + + TString ToStringHeader() const override { + return "TEvRemoteBinaryInfoRes"; + } + + bool SerializeToArcadiaStream(TChunkSerializer *serializer) const override { + return serializer->WriteString(&Blob); + } + + ui32 CalculateSerializedSize() const override { + return Blob.size(); + } + + bool IsSerializable() const override { + return true; + } + + static IEventBase* Load(TEventSerializedData* bufs) { + return new TEvRemoteBinaryInfoRes(bufs->GetString()); + } + }; + + } + +} diff --git a/library/cpp/actors/core/mon_stats.h b/library/cpp/actors/core/mon_stats.h new file mode 100644 index 0000000000..d55552af0c --- /dev/null +++ b/library/cpp/actors/core/mon_stats.h @@ -0,0 +1,147 @@ +#pragma once + +#include "defs.h" +#include "actor.h" +#include <library/cpp/monlib/metrics/histogram_snapshot.h> +#include <util/system/hp_timer.h> + +namespace NActors { + struct TLogHistogram : public NMonitoring::IHistogramSnapshot { + TLogHistogram() { + memset(Buckets, 0, sizeof(Buckets)); + } + + inline void Add(ui64 val, ui64 inc = 1) { + size_t ind = 0; +#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7 + asm volatile("" :: + : "memory"); +#endif + if (val > 1) { + ind = GetValueBitCount(val - 1); + } +#if defined(__clang__) && __clang_major__ == 3 && __clang_minor__ == 7 + asm volatile("" :: + : "memory"); +#endif + RelaxedStore(&TotalSamples, RelaxedLoad(&TotalSamples) + inc); + RelaxedStore(&Buckets[ind], RelaxedLoad(&Buckets[ind]) + inc); + } + + void Aggregate(const TLogHistogram& other) { + const ui64 inc = RelaxedLoad(&other.TotalSamples); + RelaxedStore(&TotalSamples, RelaxedLoad(&TotalSamples) + inc); + for (size_t i = 0; i < Y_ARRAY_SIZE(Buckets); ++i) { + Buckets[i] += RelaxedLoad(&other.Buckets[i]); + } + } + + // IHistogramSnapshot + ui32 Count() const override { + return Y_ARRAY_SIZE(Buckets); + } + + NMonitoring::TBucketBound UpperBound(ui32 index) const override { + Y_ASSERT(index < Y_ARRAY_SIZE(Buckets)); + if (index == 0) { + return 1; + } + return NMonitoring::TBucketBound(1ull << (index - 1)) * 2.0; + } + + NMonitoring::TBucketValue Value(ui32 index) const override { + Y_ASSERT(index < Y_ARRAY_SIZE(Buckets)); + return Buckets[index]; + } + + ui64 TotalSamples = 0; + ui64 Buckets[65]; + }; + + struct TExecutorPoolStats { + ui64 MaxUtilizationTime = 0; + }; + + struct TExecutorThreadStats { + ui64 SentEvents = 0; + ui64 ReceivedEvents = 0; + ui64 PreemptedEvents = 0; // Number of events experienced hard preemption + ui64 NonDeliveredEvents = 0; + ui64 EmptyMailboxActivation = 0; + ui64 CpuNs = 0; // nanoseconds thread was executing on CPU (accounts for preemtion) + NHPTimer::STime ElapsedTicks = 0; + NHPTimer::STime ParkedTicks = 0; + NHPTimer::STime BlockedTicks = 0; + TLogHistogram ActivationTimeHistogram; + TLogHistogram EventDeliveryTimeHistogram; + TLogHistogram EventProcessingCountHistogram; + TLogHistogram EventProcessingTimeHistogram; + TVector<NHPTimer::STime> ElapsedTicksByActivity; + TVector<ui64> ReceivedEventsByActivity; + TVector<i64> ActorsAliveByActivity; // the sum should be positive, but per-thread might be negative + TVector<ui64> ScheduledEventsByActivity; + ui64 PoolActorRegistrations = 0; + ui64 PoolDestroyedActors = 0; + ui64 PoolAllocatedMailboxes = 0; + ui64 MailboxPushedOutBySoftPreemption = 0; + ui64 MailboxPushedOutByTime = 0; + ui64 MailboxPushedOutByEventCount = 0; + + TExecutorThreadStats(size_t activityVecSize = 1) // must be not empty as 0 used as default + : ElapsedTicksByActivity(activityVecSize) + , ReceivedEventsByActivity(activityVecSize) + , ActorsAliveByActivity(activityVecSize) + , ScheduledEventsByActivity(activityVecSize) + {} + + template <typename T> + static void AggregateOne(TVector<T>& self, const TVector<T>& other) { + const size_t selfSize = self.size(); + const size_t otherSize = other.size(); + if (selfSize < otherSize) + self.resize(otherSize); + for (size_t at = 0; at < otherSize; ++at) + self[at] += RelaxedLoad(&other[at]); + } + + void Aggregate(const TExecutorThreadStats& other) { + SentEvents += RelaxedLoad(&other.SentEvents); + ReceivedEvents += RelaxedLoad(&other.ReceivedEvents); + PreemptedEvents += RelaxedLoad(&other.PreemptedEvents); + NonDeliveredEvents += RelaxedLoad(&other.NonDeliveredEvents); + EmptyMailboxActivation += RelaxedLoad(&other.EmptyMailboxActivation); + CpuNs += RelaxedLoad(&other.CpuNs); + ElapsedTicks += RelaxedLoad(&other.ElapsedTicks); + ParkedTicks += RelaxedLoad(&other.ParkedTicks); + BlockedTicks += RelaxedLoad(&other.BlockedTicks); + MailboxPushedOutBySoftPreemption += RelaxedLoad(&other.MailboxPushedOutBySoftPreemption); + MailboxPushedOutByTime += RelaxedLoad(&other.MailboxPushedOutByTime); + MailboxPushedOutByEventCount += RelaxedLoad(&other.MailboxPushedOutByEventCount); + + ActivationTimeHistogram.Aggregate(other.ActivationTimeHistogram); + EventDeliveryTimeHistogram.Aggregate(other.EventDeliveryTimeHistogram); + EventProcessingCountHistogram.Aggregate(other.EventProcessingCountHistogram); + EventProcessingTimeHistogram.Aggregate(other.EventProcessingTimeHistogram); + + AggregateOne(ElapsedTicksByActivity, other.ElapsedTicksByActivity); + AggregateOne(ReceivedEventsByActivity, other.ReceivedEventsByActivity); + AggregateOne(ActorsAliveByActivity, other.ActorsAliveByActivity); + AggregateOne(ScheduledEventsByActivity, other.ScheduledEventsByActivity); + + RelaxedStore( + &PoolActorRegistrations, + std::max(RelaxedLoad(&PoolActorRegistrations), RelaxedLoad(&other.PoolActorRegistrations))); + RelaxedStore( + &PoolDestroyedActors, + std::max(RelaxedLoad(&PoolDestroyedActors), RelaxedLoad(&other.PoolDestroyedActors))); + RelaxedStore( + &PoolAllocatedMailboxes, + std::max(RelaxedLoad(&PoolAllocatedMailboxes), RelaxedLoad(&other.PoolAllocatedMailboxes))); + } + + size_t MaxActivityType() const { + return ActorsAliveByActivity.size(); + } + }; + +} diff --git a/library/cpp/actors/core/monotonic.cpp b/library/cpp/actors/core/monotonic.cpp new file mode 100644 index 0000000000..3465149dbe --- /dev/null +++ b/library/cpp/actors/core/monotonic.cpp @@ -0,0 +1,23 @@ +#include "monotonic.h" + +#include <chrono> + +namespace NActors { + + namespace { + // Unfortunately time_since_epoch() is sometimes negative on wine + // Remember initial time point at program start and use offsets from that + std::chrono::steady_clock::time_point MonotonicOffset = std::chrono::steady_clock::now(); + } + + ui64 GetMonotonicMicroSeconds() { + auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - MonotonicOffset).count(); + // Steady clock is supposed to never jump backwards, but it's better to be safe in case of buggy implementations + if (Y_UNLIKELY(microseconds < 0)) { + microseconds = 0; + } + // Add one so we never return zero + return microseconds + 1; + } + +} // namespace NActors diff --git a/library/cpp/actors/core/monotonic.h b/library/cpp/actors/core/monotonic.h new file mode 100644 index 0000000000..6fceb91dbe --- /dev/null +++ b/library/cpp/actors/core/monotonic.h @@ -0,0 +1,111 @@ +#pragma once + +#include <util/datetime/base.h> + +namespace NActors { + + /** + * Returns current monotonic time in microseconds + */ + ui64 GetMonotonicMicroSeconds(); + + /** + * Similar to TInstant, but measuring monotonic time + */ + class TMonotonic : public TTimeBase<TMonotonic> { + using TBase = TTimeBase<TMonotonic>; + + private: + constexpr explicit TMonotonic(TValue value) noexcept + : TBase(value) + { } + + public: + constexpr TMonotonic() noexcept { + } + + static constexpr TMonotonic FromValue(TValue value) noexcept { + return TMonotonic(value); + } + + static inline TMonotonic Now() { + return TMonotonic::MicroSeconds(GetMonotonicMicroSeconds()); + } + + using TBase::Days; + using TBase::Hours; + using TBase::MicroSeconds; + using TBase::MilliSeconds; + using TBase::Minutes; + using TBase::Seconds; + + static constexpr TMonotonic Max() noexcept { + return TMonotonic(::Max<ui64>()); + } + + static constexpr TMonotonic Zero() noexcept { + return TMonotonic(); + } + + static constexpr TMonotonic MicroSeconds(ui64 us) noexcept { + return TMonotonic(TInstant::MicroSeconds(us).GetValue()); + } + + static constexpr TMonotonic MilliSeconds(ui64 ms) noexcept { + return TMonotonic(TInstant::MilliSeconds(ms).GetValue()); + } + + static constexpr TMonotonic Seconds(ui64 s) noexcept { + return TMonotonic(TInstant::Seconds(s).GetValue()); + } + + static constexpr TMonotonic Minutes(ui64 m) noexcept { + return TMonotonic(TInstant::Minutes(m).GetValue()); + } + + static constexpr TMonotonic Hours(ui64 h) noexcept { + return TMonotonic(TInstant::Hours(h).GetValue()); + } + + static constexpr TMonotonic Days(ui64 d) noexcept { + return TMonotonic(TInstant::Days(d).GetValue()); + } + + template<class T> + inline TMonotonic& operator+=(const T& t) noexcept { + return (*this = (*this + t)); + } + + template<class T> + inline TMonotonic& operator-=(const T& t) noexcept { + return (*this = (*this - t)); + } + }; +} // namespace NActors + +Y_DECLARE_PODTYPE(NActors::TMonotonic); + +template<> +struct THash<NActors::TMonotonic> { + size_t operator()(const NActors::TMonotonic& key) const { + return THash<NActors::TMonotonic::TValue>()(key.GetValue()); + } +}; + +namespace NActors { + + constexpr TDuration operator-(const TMonotonic& l, const TMonotonic& r) { + return TInstant::FromValue(l.GetValue()) - TInstant::FromValue(r.GetValue()); + } + + constexpr TMonotonic operator+(const TMonotonic& l, const TDuration& r) { + TInstant result = TInstant::FromValue(l.GetValue()) + r; + return TMonotonic::FromValue(result.GetValue()); + } + + constexpr TMonotonic operator-(const TMonotonic& l, const TDuration& r) { + TInstant result = TInstant::FromValue(l.GetValue()) - r; + return TMonotonic::FromValue(result.GetValue()); + } + +} // namespace NActors diff --git a/library/cpp/actors/core/probes.cpp b/library/cpp/actors/core/probes.cpp new file mode 100644 index 0000000000..7ace83e102 --- /dev/null +++ b/library/cpp/actors/core/probes.cpp @@ -0,0 +1,28 @@ +#include "probes.h" + +#include "actorsystem.h" + +#include <util/string/builder.h> + +LWTRACE_DEFINE_PROVIDER(ACTORLIB_PROVIDER); + +namespace NActors { + TVector<NLWTrace::TDashboard> LWTraceDashboards(TActorSystemSetup* setup) { + TVector<NLWTrace::TDashboard> result; + + NLWTrace::TDashboard slowDash; + ui32 pools = setup->GetExecutorsCount(); + size_t top = 30; + slowDash.SetName("ActorSystem slow events"); + slowDash.SetDescription(TStringBuilder() << "TOP" << top << " slow event executions >1M cycles for every pool (refresh page to update)"); + for (ui32 pool = 0; pool < pools; pool++) { + auto* row = slowDash.AddRows(); + auto* cell = row->AddCells(); + cell->SetTitle(TStringBuilder() << pool << ":" << setup->GetPoolName(pool)); + cell->SetUrl(TStringBuilder() << "?mode=log&id=.ACTORLIB_PROVIDER.SlowEvent.ppoolId=" << pool << "&s=eventMs&reverse=y&head=30"); + } + result.push_back(slowDash); + + return result; + } +} diff --git a/library/cpp/actors/core/probes.h b/library/cpp/actors/core/probes.h new file mode 100644 index 0000000000..4912d6dd26 --- /dev/null +++ b/library/cpp/actors/core/probes.h @@ -0,0 +1,176 @@ +#pragma once + +#include <library/cpp/lwtrace/all.h> +#include <util/generic/vector.h> + +#define LWACTORID(x) (x).RawX1(), (x).RawX2(), (x).NodeId(), (x).PoolID() +#define LWTYPE_ACTORID ui64, ui64, ui32, ui32 +#define LWNAME_ACTORID(n) n "Raw1", n "Raw2", n "NodeId", n "PoolId" + +#define ACTORLIB_PROVIDER(PROBE, EVENT, GROUPS, TYPES, NAMES) \ + PROBE(SlowEvent, GROUPS("ActorLibSlow"), \ + TYPES(ui32, double, TString, TString, TString), \ + NAMES("poolId", "eventMs", "eventType", "actorId", "actorType")) \ + PROBE(EventSlowDelivery, GROUPS("ActorLibSlow"), \ + TYPES(ui32, double, double, ui64, TString, TString, TString), \ + NAMES("poolId", "deliveryMs", "sinceActivationMs", "eventProcessedBefore", "eventType", "actorId", "actorType")) \ + PROBE(SlowActivation, GROUPS("ActorLibSlow"), \ + TYPES(ui32, double), \ + NAMES("poolId", "activationMs")) \ + PROBE(SlowRegisterNew, GROUPS("ActorLibSlow"), \ + TYPES(ui32, double), \ + NAMES("poolId", "registerNewMs")) \ + PROBE(SlowRegisterAdd, GROUPS("ActorLibSlow"), \ + TYPES(ui32, double), \ + NAMES("poolId", "registerAddMs")) \ + PROBE(MailboxPushedOutBySoftPreemption, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"), \ + TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString), \ + NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType")) \ + PROBE(MailboxPushedOutByTime, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"), \ + TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString), \ + NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType")) \ + PROBE(MailboxPushedOutByEventCount, GROUPS("ActorLibMailbox", "ActorLibMailboxPushedOut"), \ + TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString), \ + NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType")) \ + PROBE(MailboxEmpty, GROUPS("ActorLibMailbox"), \ + TYPES(ui32, TString, ui32, TDuration, ui64, TString, TString), \ + NAMES("poolId", "pool", "eventsProcessed", "procTimeMs", "workerId", "actorId", "actorType")) \ + PROBE(ActivationBegin, GROUPS(), \ + TYPES(ui32, ui32, ui32, double), \ + NAMES("cpu", "poolId", "workerId", "expireMs")) \ + PROBE(ActivationEnd, GROUPS(), \ + TYPES(ui32, ui32, ui32), \ + NAMES("cpu", "poolId", "workerId")) \ + PROBE(ExecutorThreadStats, GROUPS("ActorLibStats"), \ + TYPES(ui32, TString, ui64, ui64, ui64, double, double), \ + NAMES("poolId", "pool", "workerId", "execCount", "readyActivationCount", "execMs", "nonExecMs")) \ + PROBE(SlowICReadLoopAdjustSize, GROUPS("ActorLibSlowIC"), \ + TYPES(double), \ + NAMES("icReadLoopAdjustSizeMs")) \ + PROBE(SlowICReadFromSocket, GROUPS("ActorLibSlowIC"), \ + TYPES(double), \ + NAMES("icReadFromSocketMs")) \ + PROBE(SlowICReadLoopSend, GROUPS("ActorLibSlowIC"), \ + TYPES(double), \ + NAMES("icReadLoopSendMs")) \ + PROBE(SlowICAllocPacketBuffer, GROUPS("ActorLibSlowIC"), \ + TYPES(ui32, double), \ + NAMES("peerId", "icAllocPacketBufferMs")) \ + PROBE(SlowICFillSendingBuffer, GROUPS("ActorLibSlowIC"), \ + TYPES(ui32, double), \ + NAMES("peerId", "icFillSendingBufferMs")) \ + PROBE(SlowICPushSentPackets, GROUPS("ActorLibSlowIC"), \ + TYPES(ui32, double), \ + NAMES("peerId", "icPushSentPacketsMs")) \ + PROBE(SlowICPushSendQueue, GROUPS("ActorLibSlowIC"), \ + TYPES(ui32, double), \ + NAMES("peerId", "icPushSendQueueMs")) \ + PROBE(SlowICWriteData, GROUPS("ActorLibSlowIC"), \ + TYPES(ui32, double), \ + NAMES("peerId", "icWriteDataMs")) \ + PROBE(SlowICDropConfirmed, GROUPS("ActorLibSlowIC"), \ + TYPES(ui32, double), \ + NAMES("peerId", "icDropConfirmedMs")) \ + PROBE(ActorsystemScheduler, GROUPS("Durations"), \ + TYPES(ui64, ui64, ui32, ui32, ui64, ui64), \ + NAMES("timeUs", "timerfd_expirations", "eventsGottenFromQueues", "eventsSent", \ + "eventsInSendQueue", "eventSchedulingErrorUs")) \ + PROBE(ForwardEvent, GROUPS("Orbit", "InterconnectSessionTCP"), \ + TYPES(ui32, ui32, ui32, LWTYPE_ACTORID, LWTYPE_ACTORID, ui64, ui32), \ + NAMES("peerId", "type", "flags", LWNAME_ACTORID("r"), LWNAME_ACTORID("s"), \ + "cookie", "eventSerializedSize")) \ + PROBE(EnqueueEvent, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, ui64, TDuration, ui16, ui64, ui64), \ + NAMES("peerId", "numEventsInReadyChannels", "enqueueBlockedTotalMs", "channelId", "queueSizeInEvents", "queueSizeInBytes")) \ + PROBE(SerializeToPacketBegin, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, ui16, ui64), \ + NAMES("peerId", "channelId", "outputQueueSize")) \ + PROBE(SerializeToPacketEnd, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, ui16, ui64, ui64), \ + NAMES("peerId", "channelId", "outputQueueSize", "offsetInPacket")) \ + PROBE(FillSendingBuffer, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, ui32, ui64, TDuration), \ + NAMES("peerId", "taskBytesGenerated", "numEventsInReadyChannelsBehind", "fillBlockedTotalMs")) \ + PROBE(PacketGenerated, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, ui64, ui64, ui64, ui64), \ + NAMES("peerId", "bytesUnwritten", "inflightBytes", "packetsGenerated", "packetSize")) \ + PROBE(PacketWrittenToSocket, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, ui64, bool, ui64, ui64, TDuration, int), \ + NAMES("peerId", "packetsWrittenToSocket", "triedWriting", "packetDataSize", "bytesUnwritten", "writeBlockedTotalMs", "fd")) \ + PROBE(GenerateTraffic, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double, ui64, ui32, ui64), \ + NAMES("peerId", "generateTrafficMs", "dataBytesSent", "generatedPackets", "generatedBytes")) \ + PROBE(WriteToSocket, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, ui64, ui64, ui64, ui64, TDuration, int), \ + NAMES("peerId", "bytesWritten", "packetsWritten", "packetsWrittenToSocket", "bytesUnwritten", "writeBlockedTotalMs", "fd")) \ + PROBE(UpdateFromInputSession, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double), \ + NAMES("peerId", "pingMs")) \ + PROBE(UnblockByDropConfirmed, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double), \ + NAMES("peerId", "updateDeliveryMs")) \ + PROBE(DropConfirmed, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, ui64, ui64), \ + NAMES("peerId", "droppedBytes", "inflightBytes")) \ + PROBE(StartRam, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32), \ + NAMES("peerId")) \ + PROBE(FinishRam, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double), \ + NAMES("peerId", "ramMs")) \ + PROBE(SkipGenerateTraffic, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double), \ + NAMES("peerId", "elapsedSinceRamMs")) \ + PROBE(StartBatching, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double), \ + NAMES("peerId", "batchPeriodMs")) \ + PROBE(FinishBatching, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double), \ + NAMES("peerId", "finishBatchDeliveryMs")) \ + PROBE(BlockedWrite, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double, ui64), \ + NAMES("peerId", "sendQueueSize", "writtenBytes")) \ + PROBE(ReadyWrite, GROUPS("InterconnectSessionTCP"), \ + TYPES(ui32, double, double), \ + NAMES("peerId", "readyWriteDeliveryMs", "blockMs")) \ + PROBE(EpollStartWaitIn, GROUPS("EpollThread"), \ + TYPES(), \ + NAMES()) \ + PROBE(EpollFinishWaitIn, GROUPS("EpollThread"), \ + TYPES(i32), \ + NAMES("eventsCount")) \ + PROBE(EpollWaitOut, GROUPS("EpollThread"), \ + TYPES(i32), \ + NAMES("eventsCount")) \ + PROBE(EpollSendReadyRead, GROUPS("EpollThread"), \ + TYPES(bool, bool, int), \ + NAMES("hangup", "event", "fd")) \ + PROBE(EpollSendReadyWrite, GROUPS("EpollThread"), \ + TYPES(bool, bool, int), \ + NAMES("hangup", "event", "fd")) \ + PROBE(HardPreemption, GROUPS("UnitedWorker"), \ + TYPES(ui32, ui32, ui32, ui32), \ + NAMES("cpu", "prevPoolId", "prevWorkerId", "nextWorkerId")) \ + PROBE(SetPreemptionTimer, GROUPS("UnitedWorker", "PreemptionTimer"), \ + TYPES(ui32, ui32, int, double, double), \ + NAMES("cpu", "workerId", "fd", "nowMs", "preemptMs")) \ + PROBE(ResetPreemptionTimer, GROUPS("UnitedWorker", "PreemptionTimer"), \ + TYPES(ui32, ui32, int, double, double), \ + NAMES("cpu", "workerId", "fd", "nowMs", "preemptMs")) \ + PROBE(SlowWorkerActionRace, GROUPS("UnitedWorker"), \ + TYPES(ui32, ui32, ui64), \ + NAMES("cpu", "poolId", "slowPoolsMask")) \ + PROBE(PoolStats, GROUPS("PoolCpuBalancer"), \ + TYPES(ui32, TString, ui64, ui8, ui8, double, double, double, ui64, ui64, ui64), \ + NAMES("poolId", "pool", "currentCpus", "loadClass", "priority", "scaleFactor", "cpuIdle", "cpuLoad", "importance", "addImportance", "subImportance")) \ + PROBE(MoveCpu, GROUPS("PoolCpuBalancer"), \ + TYPES(ui32, ui64, TString, TString, ui32), \ + NAMES("fromPoolId", "toPoolId", "fromPool", "toPool", "cpu")) \ + /**/ + +LWTRACE_DECLARE_PROVIDER(ACTORLIB_PROVIDER) + +namespace NActors { + struct TActorSystemSetup; + TVector<NLWTrace::TDashboard> LWTraceDashboards(TActorSystemSetup* setup); +} diff --git a/library/cpp/actors/core/process_stats.cpp b/library/cpp/actors/core/process_stats.cpp new file mode 100644 index 0000000000..0e1dbd0031 --- /dev/null +++ b/library/cpp/actors/core/process_stats.cpp @@ -0,0 +1,303 @@ +#include "actorsystem.h" +#include "actor_bootstrapped.h" +#include "hfunc.h" +#include "process_stats.h" + +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/monlib/metrics/metric_registry.h> + +#include <util/datetime/uptime.h> +#include <util/system/defaults.h> +#include <util/stream/file.h> +#include <util/string/vector.h> +#include <util/string/split.h> + +#ifndef _win_ +#include <sys/user.h> +#include <sys/sysctl.h> +#endif + +namespace NActors { +#ifdef _linux_ + + namespace { + template <typename TVal> + static bool ExtractVal(const TString& str, const TString& name, TVal& res) { + if (!str.StartsWith(name)) + return false; + size_t pos = name.size(); + while (pos < str.size() && (str[pos] == ' ' || str[pos] == '\t')) { + pos++; + } + res = atol(str.data() + pos); + return true; + } + + float TicksPerMillisec() { +#ifdef _SC_CLK_TCK + return sysconf(_SC_CLK_TCK) / 1000.0; +#else + return 1.f; +#endif + } + } + + bool TProcStat::Fill(pid_t pid) { + try { + TString strPid(ToString(pid)); + TFileInput proc("/proc/" + strPid + "/status"); + TString str; + while (proc.ReadLine(str)) { + if (ExtractVal(str, "VmRSS:", Rss)) + continue; + if (ExtractVal(str, "voluntary_ctxt_switches:", VolCtxSwtch)) + continue; + if (ExtractVal(str, "nonvoluntary_ctxt_switches:", NonvolCtxSwtch)) + continue; + } + // Convert from kB to bytes + Rss *= 1024; + + float tickPerMillisec = TicksPerMillisec(); + + TFileInput procStat("/proc/" + strPid + "/stat"); + procStat.ReadLine(str); + if (!str.empty()) { + sscanf(str.data(), + "%d %*s %c %d %d %d %d %d %u %lu %lu " + "%lu %lu %lu %lu %ld %ld %ld %ld %ld " + "%ld %llu %lu %ld %lu", + &Pid, &State, &Ppid, &Pgrp, &Session, &TtyNr, &TPgid, &Flags, &MinFlt, &CMinFlt, + &MajFlt, &CMajFlt, &Utime, &Stime, &CUtime, &CStime, &Priority, &Nice, &NumThreads, + &ItRealValue, &StartTime, &Vsize, &RssPages, &RssLim); + Utime /= tickPerMillisec; + Stime /= tickPerMillisec; + CUtime /= tickPerMillisec; + CStime /= tickPerMillisec; + SystemUptime = ::Uptime(); + Uptime = SystemUptime - TDuration::MilliSeconds(StartTime / TicksPerMillisec()); + } + + TFileInput statm("/proc/" + strPid + "/statm"); + statm.ReadLine(str); + TVector<TString> fields; + StringSplitter(str).Split(' ').SkipEmpty().Collect(&fields); + if (fields.size() >= 7) { + ui64 resident = FromString<ui64>(fields[1]); + ui64 shared = FromString<ui64>(fields[2]); + if (PageSize == 0) { + PageSize = ObtainPageSize(); + } + FileRss = shared * PageSize; + AnonRss = (resident - shared) * PageSize; + } + + TFileInput cgroup("/proc/" + strPid + "/cgroup"); + TString line; + TString memoryCGroup; + while (cgroup.ReadLine(line) > 0) { + StringSplitter(line).Split(':').Collect(&fields); + if (fields.size() > 2 && fields[1] == "memory") { + memoryCGroup = fields[2]; + break; + } + } + if (!memoryCGroup.empty()) { + TFileInput limit("/sys/fs/cgroup/memory" + memoryCGroup + "/memory.limit_in_bytes"); + if (limit.ReadLine(line) > 0) { + CGroupMemLim = FromString<ui64>(line); + if (CGroupMemLim > (1ULL << 40)) { + CGroupMemLim = 0; + } + } + } + + } catch (...) { + return false; + } + return true; + } + + long TProcStat::ObtainPageSize() { + long sz = sysconf(_SC_PAGESIZE); + return sz; + } + +#else + + bool TProcStat::Fill(pid_t pid) { + Y_UNUSED(pid); + return false; + } + + long TProcStat::ObtainPageSize() { + return 0; + } + +#endif + +namespace { + // Periodically collects process stats and exposes them as mon counters + template <typename TDerived> + class TProcStatCollectingActor: public TActorBootstrapped<TProcStatCollectingActor<TDerived>> { + public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::ACTORLIB_STATS; + } + + TProcStatCollectingActor(TDuration interval) + : Interval(interval) + { + } + + void Bootstrap(const TActorContext& ctx) { + ctx.Schedule(Interval, new TEvents::TEvWakeup()); + Self()->Become(&TDerived::StateWork); + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TSystem::Wakeup, Wakeup); + } + } + + private: + void Wakeup(const TActorContext& ctx) { + Self()->UpdateCounters(ProcStat); + ctx.Schedule(Interval, new TEvents::TEvWakeup()); + } + + TDerived* Self() { + ProcStat.Fill(getpid()); + return static_cast<TDerived*>(this); + } + + private: + const TDuration Interval; + TProcStat ProcStat; + }; + + // Periodically collects process stats and exposes them as mon counters + class TDynamicCounterCollector: public TProcStatCollectingActor<TDynamicCounterCollector> { + using TBase = TProcStatCollectingActor<TDynamicCounterCollector>; + public: + TDynamicCounterCollector( + ui32 intervalSeconds, + NMonitoring::TDynamicCounterPtr counters) + : TBase{TDuration::Seconds(intervalSeconds)} + { + ProcStatGroup = counters->GetSubgroup("counters", "utils"); + + VmSize = ProcStatGroup->GetCounter("Process/VmSize", false); + AnonRssSize = ProcStatGroup->GetCounter("Process/AnonRssSize", false); + FileRssSize = ProcStatGroup->GetCounter("Process/FileRssSize", false); + CGroupMemLimit = ProcStatGroup->GetCounter("Process/CGroupMemLimit", false); + UserTime = ProcStatGroup->GetCounter("Process/UserTime", true); + SysTime = ProcStatGroup->GetCounter("Process/SystemTime", true); + MinorPageFaults = ProcStatGroup->GetCounter("Process/MinorPageFaults", true); + MajorPageFaults = ProcStatGroup->GetCounter("Process/MajorPageFaults", true); + UptimeSeconds = ProcStatGroup->GetCounter("Process/UptimeSeconds", false); + NumThreads = ProcStatGroup->GetCounter("Process/NumThreads", false); + SystemUptimeSeconds = ProcStatGroup->GetCounter("System/UptimeSeconds", false); + } + + void UpdateCounters(const TProcStat& procStat) { + *VmSize = procStat.Vsize; + *AnonRssSize = procStat.AnonRss; + *FileRssSize = procStat.FileRss; + if (procStat.CGroupMemLim) { + *CGroupMemLimit = procStat.CGroupMemLim; + } + *UserTime = procStat.Utime; + *SysTime = procStat.Stime; + *MinorPageFaults = procStat.MinFlt; + *MajorPageFaults = procStat.MajFlt; + *UptimeSeconds = procStat.Uptime.Seconds(); + *NumThreads = procStat.NumThreads; + *SystemUptimeSeconds = procStat.Uptime.Seconds(); + } + + private: + NMonitoring::TDynamicCounterPtr ProcStatGroup; + NMonitoring::TDynamicCounters::TCounterPtr VmSize; + NMonitoring::TDynamicCounters::TCounterPtr AnonRssSize; + NMonitoring::TDynamicCounters::TCounterPtr FileRssSize; + NMonitoring::TDynamicCounters::TCounterPtr CGroupMemLimit; + NMonitoring::TDynamicCounters::TCounterPtr UserTime; + NMonitoring::TDynamicCounters::TCounterPtr SysTime; + NMonitoring::TDynamicCounters::TCounterPtr MinorPageFaults; + NMonitoring::TDynamicCounters::TCounterPtr MajorPageFaults; + NMonitoring::TDynamicCounters::TCounterPtr UptimeSeconds; + NMonitoring::TDynamicCounters::TCounterPtr NumThreads; + NMonitoring::TDynamicCounters::TCounterPtr SystemUptimeSeconds; + }; + + + class TRegistryCollector: public TProcStatCollectingActor<TRegistryCollector> { + using TBase = TProcStatCollectingActor<TRegistryCollector>; + public: + TRegistryCollector(TDuration interval, NMonitoring::TMetricRegistry& registry) + : TBase{interval} + { + VmSize = registry.IntGauge({{"sensor", "process.VmSize"}}); + AnonRssSize = registry.IntGauge({{"sensor", "process.AnonRssSize"}}); + FileRssSize = registry.IntGauge({{"sensor", "process.FileRssSize"}}); + CGroupMemLimit = registry.IntGauge({{"sensor", "process.CGroupMemLimit"}}); + UptimeSeconds = registry.IntGauge({{"sensor", "process.UptimeSeconds"}}); + NumThreads = registry.IntGauge({{"sensor", "process.NumThreads"}}); + SystemUptimeSeconds = registry.IntGauge({{"sensor", "system.UptimeSeconds"}}); + + UserTime = registry.Rate({{"sensor", "process.UserTime"}}); + SysTime = registry.Rate({{"sensor", "process.SystemTime"}}); + MinorPageFaults = registry.Rate({{"sensor", "process.MinorPageFaults"}}); + MajorPageFaults = registry.Rate({{"sensor", "process.MajorPageFaults"}}); + } + + void UpdateCounters(const TProcStat& procStat) { + VmSize->Set(procStat.Vsize); + AnonRssSize->Set(procStat.AnonRss); + FileRssSize->Set(procStat.FileRss); + CGroupMemLimit->Set(procStat.CGroupMemLim); + UptimeSeconds->Set(procStat.Uptime.Seconds()); + NumThreads->Set(procStat.NumThreads); + SystemUptimeSeconds->Set(procStat.SystemUptime.Seconds()); + + // it is ok here to reset and add metric value, because mutation + // is performed in siglethreaded context + + UserTime->Reset(); + UserTime->Add(procStat.Utime); + + SysTime->Reset(); + SysTime->Add(procStat.Stime); + + MinorPageFaults->Reset(); + MinorPageFaults->Add(procStat.MinFlt); + + MajorPageFaults->Reset(); + MajorPageFaults->Add(procStat.MajFlt); + } + + private: + NMonitoring::TIntGauge* VmSize; + NMonitoring::TIntGauge* AnonRssSize; + NMonitoring::TIntGauge* FileRssSize; + NMonitoring::TIntGauge* CGroupMemLimit; + NMonitoring::TRate* UserTime; + NMonitoring::TRate* SysTime; + NMonitoring::TRate* MinorPageFaults; + NMonitoring::TRate* MajorPageFaults; + NMonitoring::TIntGauge* UptimeSeconds; + NMonitoring::TIntGauge* NumThreads; + NMonitoring::TIntGauge* SystemUptimeSeconds; + }; +} // namespace + + IActor* CreateProcStatCollector(ui32 intervalSec, NMonitoring::TDynamicCounterPtr counters) { + return new TDynamicCounterCollector(intervalSec, counters); + } + + IActor* CreateProcStatCollector(TDuration interval, NMonitoring::TMetricRegistry& registry) { + return new TRegistryCollector(interval, registry); + } +} diff --git a/library/cpp/actors/core/process_stats.h b/library/cpp/actors/core/process_stats.h new file mode 100644 index 0000000000..66346d0b5a --- /dev/null +++ b/library/cpp/actors/core/process_stats.h @@ -0,0 +1,66 @@ +#pragma once + +#include "defs.h" +#include "actor.h" + +#include <library/cpp/monlib/dynamic_counters/counters.h> + +namespace NMonitoring { + class TMetricRegistry; +} + +namespace NActors { + struct TProcStat { + ui64 Rss; + ui64 VolCtxSwtch; + ui64 NonvolCtxSwtch; + + int Pid; + char State; + int Ppid; + int Pgrp; + int Session; + int TtyNr; + int TPgid; + unsigned Flags; + unsigned long MinFlt; + unsigned long CMinFlt; + unsigned long MajFlt; + unsigned long CMajFlt; + unsigned long Utime; + unsigned long Stime; + long CUtime; + long CStime; + long Priority; + long Nice; + long NumThreads; + long ItRealValue; + // StartTime is measured from system boot + unsigned long long StartTime; + unsigned long Vsize; + long RssPages; + unsigned long RssLim; + ui64 FileRss; + ui64 AnonRss; + ui64 CGroupMemLim = 0; + + TDuration Uptime; + TDuration SystemUptime; + // ... + + TProcStat() { + Zero(*this); + Y_UNUSED(PageSize); + } + + bool Fill(pid_t pid); + + private: + long PageSize = 0; + + long ObtainPageSize(); + }; + + IActor* CreateProcStatCollector(ui32 intervalSec, NMonitoring::TDynamicCounterPtr counters); + IActor* CreateProcStatCollector(TDuration interval, NMonitoring::TMetricRegistry& registry); +} diff --git a/library/cpp/actors/core/scheduler_actor.cpp b/library/cpp/actors/core/scheduler_actor.cpp new file mode 100644 index 0000000000..febc5e40dd --- /dev/null +++ b/library/cpp/actors/core/scheduler_actor.cpp @@ -0,0 +1,279 @@ +#include "actor_bootstrapped.h" +#include "hfunc.h" +#include "probes.h" +#include "scheduler_actor.h" +#include "scheduler_queue.h" + +#include <library/cpp/actors/interconnect/poller_actor.h> +#include <util/system/hp_timer.h> + +#ifdef __linux__ +#include <sys/timerfd.h> +#include <errno.h> + +LWTRACE_USING(ACTORLIB_PROVIDER); + +namespace NActors { + class TTimerDescriptor: public TSharedDescriptor { + const int Descriptor; + + public: + TTimerDescriptor() + : Descriptor(timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) + { + Y_VERIFY(Descriptor != -1, "timerfd_create() failed with %s", strerror(errno)); + } + + ~TTimerDescriptor() override { + close(Descriptor); + } + + int GetDescriptor() override { + return Descriptor; + } + }; + + class TSchedulerActor: public TActor<TSchedulerActor> { + const TSchedulerConfig Cfg; + TIntrusivePtr<TSharedDescriptor> TimerDescriptor; + + TVector<NSchedulerQueue::TReader*> Readers; + + TActorId PollerActor; + TPollerToken::TPtr PollerToken; + + ui64 RealTime; + ui64 MonotonicTime; + + ui64 ActiveTick; + typedef TMap<ui64, TAutoPtr<NSchedulerQueue::TQueueType>> TMomentMap; // intrasecond queues + typedef THashMap<ui64, TAutoPtr<TMomentMap>> TScheduleMap; // over-second schedule + + TScheduleMap ScheduleMap; + + THolder<NThreading::TLegacyFuture<void, false>> MainCycle; + + static const ui64 IntrasecondThreshold = 1048576; // ~second + TAutoPtr<TMomentMap> ActiveSec; + volatile ui64* CurrentTimestamp = nullptr; + volatile ui64* CurrentMonotonic = nullptr; + TDeque<TAutoPtr<IEventHandle>> EventsToBeSent; + + public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::ACTOR_SYSTEM_SCHEDULER_ACTOR; + } + + TSchedulerActor(const TSchedulerConfig& cfg) + : TActor(&TSchedulerActor::StateFunc) + , Cfg(cfg) + , TimerDescriptor(new TTimerDescriptor()) + , PollerActor(MakePollerActorId()) + { + Y_ASSERT(Cfg.ResolutionMicroseconds != 0); + Y_ASSERT(Cfg.ProgressThreshold != 0); + Become(&TSchedulerActor::StateFunc); + } + + void Handle(TEvSchedulerInitialize::TPtr& ev, const TActorContext& ctx) { + const TEvSchedulerInitialize& evInitialize = *ev->Get(); + Y_ASSERT(evInitialize.ScheduleReaders.size() != 0); + Readers.resize(evInitialize.ScheduleReaders.size()); + Copy(evInitialize.ScheduleReaders.begin(), evInitialize.ScheduleReaders.end(), Readers.begin()); + + Y_ASSERT(evInitialize.CurrentTimestamp != nullptr); + CurrentTimestamp = evInitialize.CurrentTimestamp; + + Y_ASSERT(evInitialize.CurrentMonotonic != nullptr); + CurrentMonotonic = evInitialize.CurrentMonotonic; + + struct itimerspec new_time; + memset(&new_time, 0, sizeof(new_time)); + new_time.it_value.tv_nsec = Cfg.ResolutionMicroseconds * 1000; + new_time.it_interval.tv_nsec = Cfg.ResolutionMicroseconds * 1000; + int ret = timerfd_settime(TimerDescriptor->GetDescriptor(), 0, &new_time, NULL); + Y_VERIFY(ret != -1, "timerfd_settime() failed with %s", strerror(errno)); + const bool success = ctx.Send(PollerActor, new TEvPollerRegister(TimerDescriptor, SelfId(), {})); + Y_VERIFY(success); + + RealTime = RelaxedLoad(CurrentTimestamp); + MonotonicTime = RelaxedLoad(CurrentMonotonic); + + ActiveTick = AlignUp<ui64>(MonotonicTime, IntrasecondThreshold); + } + + void Handle(TEvPollerRegisterResult::TPtr ev, const TActorContext& ctx) { + PollerToken = ev->Get()->PollerToken; + HandleSchedule(ctx); + } + + void UpdateTime() { + RealTime = TInstant::Now().MicroSeconds(); + MonotonicTime = Max(MonotonicTime, GetMonotonicMicroSeconds()); + AtomicStore(CurrentTimestamp, RealTime); + AtomicStore(CurrentMonotonic, MonotonicTime); + } + + void TryUpdateTime(NHPTimer::STime* lastTimeUpdate) { + NHPTimer::STime hpnow; + GetTimeFast(&hpnow); + const ui64 elapsedCycles = hpnow > *lastTimeUpdate ? hpnow - *lastTimeUpdate : 0; + if (elapsedCycles > Cfg.ResolutionMicroseconds * (NHPTimer::GetCyclesPerSecond() / IntrasecondThreshold)) { + UpdateTime(); + GetTimeFast(lastTimeUpdate); + } + } + + void HandleSchedule(const TActorContext& ctx) { + for (;;) { + NHPTimer::STime schedulingStart; + GetTimeFast(&schedulingStart); + NHPTimer::STime lastTimeUpdate = schedulingStart; + + ui64 expired; + ssize_t bytesRead; + bytesRead = read(TimerDescriptor->GetDescriptor(), &expired, sizeof(expired)); + if (bytesRead == -1) { + if (errno == EAGAIN) { + PollerToken->Request(true, false); + break; + } else if (errno == EINTR) { + continue; + } + } + Y_VERIFY(bytesRead == sizeof(expired), "Error while reading from timerfd, strerror# %s", strerror(errno)); + UpdateTime(); + + ui32 eventsGottenFromQueues = 0; + // collect everything from queues + for (ui32 i = 0; i != Readers.size(); ++i) { + while (NSchedulerQueue::TEntry* x = Readers[i]->Pop()) { + const ui64 instant = AlignUp<ui64>(x->InstantMicroseconds, Cfg.ResolutionMicroseconds); + IEventHandle* const ev = x->Ev; + ISchedulerCookie* const cookie = x->Cookie; + + // check is cookie still valid? looks like it will hurt performance w/o sagnificant memory save + + if (instant <= ActiveTick) { + if (!ActiveSec) + ActiveSec.Reset(new TMomentMap()); + TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*ActiveSec)[instant]; + if (!queue) + queue.Reset(new NSchedulerQueue::TQueueType()); + queue->Writer.Push(instant, ev, cookie); + } else { + const ui64 intrasecond = AlignUp<ui64>(instant, IntrasecondThreshold); + TAutoPtr<TMomentMap>& msec = ScheduleMap[intrasecond]; + if (!msec) + msec.Reset(new TMomentMap()); + TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*msec)[instant]; + if (!queue) + queue.Reset(new NSchedulerQueue::TQueueType()); + queue->Writer.Push(instant, ev, cookie); + } + ++eventsGottenFromQueues; + TryUpdateTime(&lastTimeUpdate); + } + } + + ui64 eventSchedulingErrorUs = 0; + // send everything triggered on schedule + for (;;) { + while (!!ActiveSec && !ActiveSec->empty()) { + TMomentMap::iterator it = ActiveSec->begin(); + if (it->first <= MonotonicTime) { + if (NSchedulerQueue::TQueueType* q = it->second.Get()) { + while (NSchedulerQueue::TEntry* x = q->Reader.Pop()) { + Y_VERIFY_DEBUG(x->InstantMicroseconds <= ActiveTick); + if (eventSchedulingErrorUs == 0 && MonotonicTime > x->InstantMicroseconds) { + eventSchedulingErrorUs = MonotonicTime - x->InstantMicroseconds; + } + IEventHandle* ev = x->Ev; + ISchedulerCookie* cookie = x->Cookie; + if (cookie) { + if (cookie->Detach()) { + EventsToBeSent.push_back(ev); + } else { + delete ev; + } + } else { + EventsToBeSent.push_back(ev); + } + TryUpdateTime(&lastTimeUpdate); + } + } + ActiveSec->erase(it); + } else { + break; + } + } + + if (ActiveTick <= MonotonicTime) { + Y_VERIFY_DEBUG(!ActiveSec || ActiveSec->empty()); + ActiveSec.Destroy(); + ActiveTick += IntrasecondThreshold; + TScheduleMap::iterator it = ScheduleMap.find(ActiveTick); + if (it != ScheduleMap.end()) { + ActiveSec = it->second; + ScheduleMap.erase(it); + } + continue; + } + + // ok, if we are here - then nothing is ready, so send step complete + break; + } + + // Send all from buffer queue + const ui64 eventsToBeSentSize = EventsToBeSent.size(); + ui32 sentCount = 0; + if (eventsToBeSentSize > Cfg.RelaxedSendThresholdEventsPerCycle) { + sentCount = Cfg.RelaxedSendPaceEventsPerCycle + + (eventsToBeSentSize - Cfg.RelaxedSendThresholdEventsPerCycle) / 2; + } else { + sentCount = Min(eventsToBeSentSize, Cfg.RelaxedSendPaceEventsPerCycle); + } + for (ui32 i = 0; i < sentCount; ++i) { + ctx.Send(EventsToBeSent.front()); + EventsToBeSent.pop_front(); + } + + NHPTimer::STime hpnow; + GetTimeFast(&hpnow); + const ui64 processingTime = hpnow > schedulingStart ? hpnow - schedulingStart : 0; + const ui64 elapsedTimeMicroseconds = processingTime / (NHPTimer::GetCyclesPerSecond() / IntrasecondThreshold); + LWPROBE(ActorsystemScheduler, elapsedTimeMicroseconds, expired, eventsGottenFromQueues, sentCount, + eventsToBeSentSize, eventSchedulingErrorUs); + TryUpdateTime(&lastTimeUpdate); + } + } + + STRICT_STFUNC(StateFunc, + HFunc(TEvSchedulerInitialize, Handle) + CFunc(TEvPollerReady::EventType, HandleSchedule) + CFunc(TEvents::TSystem::PoisonPill, Die) + HFunc(TEvPollerRegisterResult, Handle) + ) + }; + + IActor* CreateSchedulerActor(const TSchedulerConfig& cfg) { + if (cfg.UseSchedulerActor) { + return new TSchedulerActor(cfg); + } else { + return nullptr; + } + } + +} + +#else // linux + +namespace NActors { + IActor* CreateSchedulerActor(const TSchedulerConfig& cfg) { + Y_UNUSED(cfg); + return nullptr; + } + +} + +#endif // linux diff --git a/library/cpp/actors/core/scheduler_actor.h b/library/cpp/actors/core/scheduler_actor.h new file mode 100644 index 0000000000..c2c561b43d --- /dev/null +++ b/library/cpp/actors/core/scheduler_actor.h @@ -0,0 +1,29 @@ +#pragma once + +#include "actor.h" +#include "event_local.h" +#include "events.h" +#include "scheduler_basic.h" + +namespace NActors { + struct TEvSchedulerInitialize : TEventLocal<TEvSchedulerInitialize, TEvents::TSystem::Bootstrap> { + TVector<NSchedulerQueue::TReader*> ScheduleReaders; + volatile ui64* CurrentTimestamp; + volatile ui64* CurrentMonotonic; + + TEvSchedulerInitialize(const TVector<NSchedulerQueue::TReader*>& scheduleReaders, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) + : ScheduleReaders(scheduleReaders) + , CurrentTimestamp(currentTimestamp) + , CurrentMonotonic(currentMonotonic) + { + } + }; + + IActor* CreateSchedulerActor(const TSchedulerConfig& cfg); + + inline TActorId MakeSchedulerActorId() { + char x[12] = {'s', 'c', 'h', 'e', 'd', 'u', 'l', 'e', 'r', 's', 'e', 'r'}; + return TActorId(0, TStringBuf(x, 12)); + } + +} diff --git a/library/cpp/actors/core/scheduler_actor_ut.cpp b/library/cpp/actors/core/scheduler_actor_ut.cpp new file mode 100644 index 0000000000..09b7369d36 --- /dev/null +++ b/library/cpp/actors/core/scheduler_actor_ut.cpp @@ -0,0 +1,100 @@ +#include "actor_coroutine.h" +#include "actorsystem.h" +#include "executor_pool_basic.h" +#include "scheduler_actor.h" +#include "scheduler_basic.h" +#include "events.h" +#include "event_local.h" +#include "hfunc.h" +#include <library/cpp/actors/interconnect/poller_actor.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/system/sanitizers.h> + +using namespace NActors; + +Y_UNIT_TEST_SUITE(SchedulerActor) { + class TTestActor: public TActorBootstrapped<TTestActor> { + TManualEvent& DoneEvent; + TAtomic& EventsProcessed; + TInstant LastWakeup; + const TAtomicBase EventsTotalCount; + const TDuration ScheduleDelta; + + public: + TTestActor(TManualEvent& doneEvent, TAtomic& eventsProcessed, TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs) + : DoneEvent(doneEvent) + , EventsProcessed(eventsProcessed) + , EventsTotalCount(eventsTotalCount) + , ScheduleDelta(TDuration::MilliSeconds(scheduleDeltaMs)) + { + } + + void Bootstrap(const TActorContext& ctx) { + LastWakeup = ctx.Now(); + Become(&TThis::StateFunc); + ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup()); + } + + void Handle(TEvents::TEvWakeup::TPtr& /*ev*/, const TActorContext& ctx) { + const TInstant now = ctx.Now(); + UNIT_ASSERT(now - LastWakeup >= ScheduleDelta); + LastWakeup = now; + + if (AtomicIncrement(EventsProcessed) == EventsTotalCount) { + DoneEvent.Signal(); + } else { + ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup()); + } + } + + STRICT_STFUNC(StateFunc, {HFunc(TEvents::TEvWakeup, Handle)}) + }; + + void Test(TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs) { + THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>(); + setup->NodeId = 0; + setup->ExecutorsCount = 1; + setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]); + for (ui32 i = 0; i < setup->ExecutorsCount; ++i) { + setup->Executors[i] = new TBasicExecutorPool(i, 5, 10, "basic"); + } + // create poller actor (whether platform supports it) + TActorId pollerActorId; + if (IActor* poller = CreatePollerActor()) { + pollerActorId = MakePollerActorId(); + setup->LocalServices.emplace_back(pollerActorId, TActorSetupCmd(poller, TMailboxType::ReadAsFilled, 0)); + } + TActorId schedulerActorId; + if (IActor* schedulerActor = CreateSchedulerActor(TSchedulerConfig())) { + schedulerActorId = MakeSchedulerActorId(); + setup->LocalServices.emplace_back(schedulerActorId, TActorSetupCmd(schedulerActor, TMailboxType::ReadAsFilled, 0)); + } + setup->Scheduler = CreateSchedulerThread(TSchedulerConfig()); + + TActorSystem actorSystem(setup); + + actorSystem.Start(); + + TManualEvent doneEvent; + TAtomic eventsProcessed = 0; + actorSystem.Register(new TTestActor(doneEvent, eventsProcessed, eventsTotalCount, scheduleDeltaMs)); + doneEvent.WaitI(); + + UNIT_ASSERT(AtomicGet(eventsProcessed) == eventsTotalCount); + + actorSystem.Stop(); + } + + Y_UNIT_TEST(LongEvents) { + Test(10, 500); + } + + Y_UNIT_TEST(MediumEvents) { + Test(100, 50); + } + + Y_UNIT_TEST(QuickEvents) { + Test(1000, 5); + } +} diff --git a/library/cpp/actors/core/scheduler_basic.cpp b/library/cpp/actors/core/scheduler_basic.cpp new file mode 100644 index 0000000000..fba200e16b --- /dev/null +++ b/library/cpp/actors/core/scheduler_basic.cpp @@ -0,0 +1,274 @@ +#include "scheduler_basic.h" +#include "scheduler_queue.h" + +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/util/thread.h> + +#ifdef BALLOC +#include <library/cpp/balloc/optional/operators.h> +#endif + +namespace NActors { + + struct TBasicSchedulerThread::TMonCounters { + NMonitoring::TDynamicCounters::TCounterPtr TimeDelayMs; + NMonitoring::TDynamicCounters::TCounterPtr QueueSize; + NMonitoring::TDynamicCounters::TCounterPtr EventsSent; + NMonitoring::TDynamicCounters::TCounterPtr EventsDropped; + NMonitoring::TDynamicCounters::TCounterPtr EventsAdded; + NMonitoring::TDynamicCounters::TCounterPtr Iterations; + NMonitoring::TDynamicCounters::TCounterPtr Sleeps; + NMonitoring::TDynamicCounters::TCounterPtr ElapsedMicrosec; + + TMonCounters(const NMonitoring::TDynamicCounterPtr& counters) + : TimeDelayMs(counters->GetCounter("Scheduler/TimeDelayMs", false)) + , QueueSize(counters->GetCounter("Scheduler/QueueSize", false)) + , EventsSent(counters->GetCounter("Scheduler/EventsSent", true)) + , EventsDropped(counters->GetCounter("Scheduler/EventsDropped", true)) + , EventsAdded(counters->GetCounter("Scheduler/EventsAdded", true)) + , Iterations(counters->GetCounter("Scheduler/Iterations", true)) + , Sleeps(counters->GetCounter("Scheduler/Sleeps", true)) + , ElapsedMicrosec(counters->GetCounter("Scheduler/ElapsedMicrosec", true)) + { } + }; + + TBasicSchedulerThread::TBasicSchedulerThread(const TSchedulerConfig& config) + : Config(config) + , MonCounters(Config.MonCounters ? new TMonCounters(Config.MonCounters) : nullptr) + , ActorSystem(nullptr) + , CurrentTimestamp(nullptr) + , CurrentMonotonic(nullptr) + , TotalReaders(0) + , StopFlag(false) + , ScheduleMap(3600) + { + Y_VERIFY(!Config.UseSchedulerActor, "Cannot create scheduler thread because Config.UseSchedulerActor# true"); + } + + TBasicSchedulerThread::~TBasicSchedulerThread() { + Y_VERIFY(!MainCycle); + } + + void TBasicSchedulerThread::CycleFunc() { +#ifdef BALLOC + ThreadDisableBalloc(); +#endif + ::SetCurrentThreadName("Scheduler"); + + ui64 currentMonotonic = RelaxedLoad(CurrentMonotonic); + ui64 throttledMonotonic = currentMonotonic; + + ui64 activeTick = AlignUp<ui64>(throttledMonotonic, IntrasecondThreshold); + TAutoPtr<TMomentMap> activeSec; + + NHPTimer::STime hpprev = GetCycleCountFast(); + ui64 nextTimestamp = TInstant::Now().MicroSeconds(); + ui64 nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds()); + + while (!AtomicLoad(&StopFlag)) { + { + const ui64 delta = nextMonotonic - throttledMonotonic; + const ui64 elapsedDelta = nextMonotonic - currentMonotonic; + const ui64 threshold = Max(Min(Config.ProgressThreshold, 2 * elapsedDelta), ui64(1)); + + throttledMonotonic = (delta > threshold) ? throttledMonotonic + threshold : nextMonotonic; + + if (MonCounters) { + *MonCounters->TimeDelayMs = (nextMonotonic - throttledMonotonic) / 1000; + } + } + AtomicStore(CurrentTimestamp, nextTimestamp); + AtomicStore(CurrentMonotonic, nextMonotonic); + currentMonotonic = nextMonotonic; + + if (MonCounters) { + ++*MonCounters->Iterations; + } + + bool somethingDone = false; + + // first step - send everything triggered on schedule + ui64 eventsSent = 0; + ui64 eventsDropped = 0; + for (;;) { + while (!!activeSec && !activeSec->empty()) { + TMomentMap::iterator it = activeSec->begin(); + if (it->first <= throttledMonotonic) { + if (NSchedulerQueue::TQueueType* q = it->second.Get()) { + while (NSchedulerQueue::TEntry* x = q->Reader.Pop()) { + somethingDone = true; + Y_VERIFY_DEBUG(x->InstantMicroseconds <= activeTick); + IEventHandle* ev = x->Ev; + ISchedulerCookie* cookie = x->Cookie; + // TODO: lazy send with backoff queue to not hang over contended mailboxes + if (cookie) { + if (cookie->Detach()) { + ActorSystem->Send(ev); + ++eventsSent; + } else { + delete ev; + ++eventsDropped; + } + } else { + ActorSystem->Send(ev); + ++eventsSent; + } + } + } + activeSec->erase(it); + } else + break; + } + + if (activeTick <= throttledMonotonic) { + Y_VERIFY_DEBUG(!activeSec || activeSec->empty()); + activeSec.Destroy(); + activeTick += IntrasecondThreshold; + TScheduleMap::iterator it = ScheduleMap.find(activeTick); + if (it != ScheduleMap.end()) { + activeSec = it->second; + ScheduleMap.erase(it); + } + continue; + } + + // ok, if we are here - then nothing is ready, so send step complete + break; + } + + // second step - collect everything from queues + + ui64 eventsAdded = 0; + for (ui32 i = 0; i != TotalReaders; ++i) { + while (NSchedulerQueue::TEntry* x = Readers[i]->Pop()) { + somethingDone = true; + const ui64 instant = AlignUp<ui64>(x->InstantMicroseconds, Config.ResolutionMicroseconds); + IEventHandle* const ev = x->Ev; + ISchedulerCookie* const cookie = x->Cookie; + + // check is cookie still valid? looks like it will hurt performance w/o sagnificant memory save + + if (instant <= activeTick) { + if (!activeSec) + activeSec.Reset(new TMomentMap()); + TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*activeSec)[instant]; + if (!queue) + queue.Reset(new NSchedulerQueue::TQueueType()); + queue->Writer.Push(instant, ev, cookie); + } else { + const ui64 intrasecond = AlignUp<ui64>(instant, IntrasecondThreshold); + TAutoPtr<TMomentMap>& msec = ScheduleMap[intrasecond]; + if (!msec) + msec.Reset(new TMomentMap()); + TAutoPtr<NSchedulerQueue::TQueueType>& queue = (*msec)[instant]; + if (!queue) + queue.Reset(new NSchedulerQueue::TQueueType()); + queue->Writer.Push(instant, ev, cookie); + } + + ++eventsAdded; + } + } + + NHPTimer::STime hpnow = GetCycleCountFast(); + + if (MonCounters) { + *MonCounters->QueueSize -= eventsSent + eventsDropped; + *MonCounters->QueueSize += eventsAdded; + *MonCounters->EventsSent += eventsSent; + *MonCounters->EventsDropped += eventsDropped; + *MonCounters->EventsAdded += eventsAdded; + *MonCounters->ElapsedMicrosec += NHPTimer::GetSeconds(hpnow - hpprev) * 1000000; + } + + hpprev = hpnow; + nextTimestamp = TInstant::Now().MicroSeconds(); + nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds()); + + // ok complete, if nothing left - sleep + if (!somethingDone) { + const ui64 nextInstant = AlignDown<ui64>(throttledMonotonic + Config.ResolutionMicroseconds, Config.ResolutionMicroseconds); + if (nextMonotonic >= nextInstant) // already in next time-slice + continue; + + const ui64 delta = nextInstant - nextMonotonic; + if (delta < Config.SpinThreshold) // not so much time left, just spin + continue; + + if (MonCounters) { + ++*MonCounters->Sleeps; + } + + NanoSleep(delta * 1000); // ok, looks like we should sleep a bit. + + // Don't count sleep in elapsed microseconds + hpprev = GetCycleCountFast(); + nextTimestamp = TInstant::Now().MicroSeconds(); + nextMonotonic = Max(currentMonotonic, GetMonotonicMicroSeconds()); + } + } + // ok, die! + } + + void TBasicSchedulerThread::Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) { + ActorSystem = actorSystem; + CurrentTimestamp = currentTimestamp; + CurrentMonotonic = currentMonotonic; + *CurrentTimestamp = TInstant::Now().MicroSeconds(); + *CurrentMonotonic = GetMonotonicMicroSeconds(); + } + + void TBasicSchedulerThread::PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) { + Y_VERIFY(scheduleReadersCount > 0); + TotalReaders = scheduleReadersCount; + Readers.Reset(new NSchedulerQueue::TReader*[scheduleReadersCount]); + Copy(readers, readers + scheduleReadersCount, Readers.Get()); + } + + void TBasicSchedulerThread::PrepareStart() { + // Called after actor system is initialized, but before executor threads + // are started, giving us a chance to update current timestamp with a + // more recent value, taking initialization time into account. This is + // safe to do, since scheduler thread is not started yet, so no other + // threads are updating time concurrently. + AtomicStore(CurrentTimestamp, TInstant::Now().MicroSeconds()); + AtomicStore(CurrentMonotonic, Max(RelaxedLoad(CurrentMonotonic), GetMonotonicMicroSeconds())); + } + + void TBasicSchedulerThread::Start() { + MainCycle.Reset(new NThreading::TLegacyFuture<void, false>(std::bind(&TBasicSchedulerThread::CycleFunc, this))); + } + + void TBasicSchedulerThread::PrepareStop() { + AtomicStore(&StopFlag, true); + } + + void TBasicSchedulerThread::Stop() { + MainCycle->Get(); + MainCycle.Destroy(); + } + +} + +#ifdef __linux__ + +namespace NActors { + ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) { + if (config.UseSchedulerActor) { + return new TMockSchedulerThread(); + } else { + return new TBasicSchedulerThread(config); + } + } + +} + +#else // __linux__ + +namespace NActors { + ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) { + return new TBasicSchedulerThread(config); + } +} + +#endif // __linux__ diff --git a/library/cpp/actors/core/scheduler_basic.h b/library/cpp/actors/core/scheduler_basic.h new file mode 100644 index 0000000000..2ccde39235 --- /dev/null +++ b/library/cpp/actors/core/scheduler_basic.h @@ -0,0 +1,81 @@ +#pragma once + +#include "actorsystem.h" +#include "monotonic.h" +#include "scheduler_queue.h" +#include <library/cpp/actors/util/queue_chunk.h> +#include <library/cpp/threading/future/legacy_future.h> +#include <util/generic/hash.h> +#include <util/generic/map.h> + +namespace NActors { + + class TBasicSchedulerThread: public ISchedulerThread { + // TODO: replace with NUMA-local threads and per-thread schedules + const TSchedulerConfig Config; + + struct TMonCounters; + const THolder<TMonCounters> MonCounters; + + TActorSystem* ActorSystem; + volatile ui64* CurrentTimestamp; + volatile ui64* CurrentMonotonic; + + ui32 TotalReaders; + TArrayHolder<NSchedulerQueue::TReader*> Readers; + + volatile bool StopFlag; + + typedef TMap<ui64, TAutoPtr<NSchedulerQueue::TQueueType>> TMomentMap; // intrasecond queues + typedef THashMap<ui64, TAutoPtr<TMomentMap>> TScheduleMap; // over-second schedule + + TScheduleMap ScheduleMap; + + THolder<NThreading::TLegacyFuture<void, false>> MainCycle; + + static const ui64 IntrasecondThreshold = 1048576; // ~second + + void CycleFunc(); + + public: + TBasicSchedulerThread(const TSchedulerConfig& config = TSchedulerConfig()); + ~TBasicSchedulerThread(); + + void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) override; + void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) override; + + void PrepareStart() override; + void Start() override; + void PrepareStop() override; + void Stop() override; + }; + + class TMockSchedulerThread: public ISchedulerThread { + public: + virtual ~TMockSchedulerThread() override { + } + + void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) override { + Y_UNUSED(actorSystem); + *currentTimestamp = TInstant::Now().MicroSeconds(); + *currentMonotonic = GetMonotonicMicroSeconds(); + } + + void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) override { + Y_UNUSED(readers); + Y_UNUSED(scheduleReadersCount); + } + + void Start() override { + } + + void PrepareStop() override { + } + + void Stop() override { + } + }; + + ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& cfg); + +} diff --git a/library/cpp/actors/core/scheduler_cookie.cpp b/library/cpp/actors/core/scheduler_cookie.cpp new file mode 100644 index 0000000000..0fa6f543a7 --- /dev/null +++ b/library/cpp/actors/core/scheduler_cookie.cpp @@ -0,0 +1,84 @@ +#include "scheduler_cookie.h" + +namespace NActors { + class TSchedulerCookie2Way: public ISchedulerCookie { + TAtomic Value; + + public: + TSchedulerCookie2Way() + : Value(2) + { + } + + bool IsArmed() noexcept override { + return (AtomicGet(Value) == 2); + } + + bool Detach() noexcept override { + const ui64 x = AtomicDecrement(Value); + if (x == 1) + return true; + + if (x == 0) { + delete this; + return false; + } + + Y_FAIL(); + } + + bool DetachEvent() noexcept override { + Y_FAIL(); + } + }; + + ISchedulerCookie* ISchedulerCookie::Make2Way() { + return new TSchedulerCookie2Way(); + } + + class TSchedulerCookie3Way: public ISchedulerCookie { + TAtomic Value; + + public: + TSchedulerCookie3Way() + : Value(3) + { + } + + bool IsArmed() noexcept override { + return (AtomicGet(Value) == 3); + } + + bool Detach() noexcept override { + const ui64 x = AtomicDecrement(Value); + if (x == 2) + return true; + if (x == 1) + return false; + if (x == 0) { + delete this; + return false; + } + + Y_FAIL(); + } + + bool DetachEvent() noexcept override { + const ui64 x = AtomicDecrement(Value); + if (x == 2) + return false; + if (x == 1) + return true; + if (x == 0) { + delete this; + return false; + } + + Y_FAIL(); + } + }; + + ISchedulerCookie* ISchedulerCookie::Make3Way() { + return new TSchedulerCookie3Way(); + } +} diff --git a/library/cpp/actors/core/scheduler_cookie.h b/library/cpp/actors/core/scheduler_cookie.h new file mode 100644 index 0000000000..2c20ca67f3 --- /dev/null +++ b/library/cpp/actors/core/scheduler_cookie.h @@ -0,0 +1,78 @@ +#pragma once + +#include "defs.h" +#include <util/generic/noncopyable.h> + +namespace NActors { + class ISchedulerCookie : TNonCopyable { + protected: + virtual ~ISchedulerCookie() { + } + + public: + virtual bool Detach() noexcept = 0; + virtual bool DetachEvent() noexcept = 0; + virtual bool IsArmed() noexcept = 0; + + static ISchedulerCookie* Make2Way(); + static ISchedulerCookie* Make3Way(); + }; + + class TSchedulerCookieHolder : TNonCopyable { + ISchedulerCookie* Cookie; + + public: + TSchedulerCookieHolder() + : Cookie(nullptr) + { + } + + TSchedulerCookieHolder(ISchedulerCookie* x) + : Cookie(x) + { + } + + ~TSchedulerCookieHolder() { + Detach(); + } + + bool operator==(const TSchedulerCookieHolder& x) const noexcept { + return (Cookie == x.Cookie); + } + + ISchedulerCookie* Get() const { + return Cookie; + } + + ISchedulerCookie* Release() { + ISchedulerCookie* result = Cookie; + Cookie = nullptr; + return result; + } + + void Reset(ISchedulerCookie* cookie) { + Detach(); + Cookie = cookie; + } + + bool Detach() noexcept { + if (Cookie) { + const bool res = Cookie->Detach(); + Cookie = nullptr; + return res; + } else { + return false; + } + } + + bool DetachEvent() noexcept { + if (Cookie) { + const bool res = Cookie->DetachEvent(); + Cookie = nullptr; + return res; + } else { + return false; + } + } + }; +} diff --git a/library/cpp/actors/core/scheduler_queue.h b/library/cpp/actors/core/scheduler_queue.h new file mode 100644 index 0000000000..3b8fac28f0 --- /dev/null +++ b/library/cpp/actors/core/scheduler_queue.h @@ -0,0 +1,120 @@ +#pragma once + +#include <library/cpp/actors/util/queue_chunk.h> + +namespace NActors { + class IEventHandle; + class ISchedulerCookie; + + namespace NSchedulerQueue { + struct TEntry { + ui64 InstantMicroseconds; + IEventHandle* Ev; + ISchedulerCookie* Cookie; + }; + + struct TChunk : TQueueChunkDerived<TEntry, 512, TChunk> {}; + + class TReader; + class TWriter; + class TWriterWithPadding; + + class TReader : ::TNonCopyable { + TChunk* ReadFrom; + ui32 ReadPosition; + + friend class TWriter; + + public: + TReader() + : ReadFrom(new TChunk()) + , ReadPosition(0) + { + } + + ~TReader() { + while (TEntry* x = Pop()) { + if (x->Cookie) + x->Cookie->Detach(); + delete x->Ev; + } + delete ReadFrom; + } + + TEntry* Pop() { + TChunk* head = ReadFrom; + if (ReadPosition != TChunk::EntriesCount) { + if (AtomicLoad(&head->Entries[ReadPosition].InstantMicroseconds) != 0) + return const_cast<TEntry*>(&head->Entries[ReadPosition++]); + else + return nullptr; + } else if (TChunk* next = AtomicLoad(&head->Next)) { + ReadFrom = next; + delete head; + ReadPosition = 0; + return Pop(); + } + + return nullptr; + } + }; + + class TWriter : ::TNonCopyable { + TChunk* WriteTo; + ui32 WritePosition; + + public: + TWriter() + : WriteTo(nullptr) + , WritePosition(0) + { + } + + void Init(const TReader& reader) { + WriteTo = reader.ReadFrom; + WritePosition = 0; + } + + void Push(ui64 instantMicrosends, IEventHandle* ev, ISchedulerCookie* cookie) { + if (Y_UNLIKELY(instantMicrosends == 0)) { + // Protect against Pop() getting stuck forever + instantMicrosends = 1; + } + if (WritePosition != TChunk::EntriesCount) { + volatile TEntry& entry = WriteTo->Entries[WritePosition]; + entry.Cookie = cookie; + entry.Ev = ev; + AtomicStore(&entry.InstantMicroseconds, instantMicrosends); + ++WritePosition; + } else { + TChunk* next = new TChunk(); + volatile TEntry& entry = next->Entries[0]; + entry.Cookie = cookie; + entry.Ev = ev; + entry.InstantMicroseconds = instantMicrosends; + AtomicStore(&WriteTo->Next, next); + WriteTo = next; + WritePosition = 1; + } + } + }; + + class TWriterWithPadding: public TWriter { + private: + ui8 CacheLinePadding[64 - sizeof(TWriter)]; + + void UnusedCacheLinePadding() { + Y_UNUSED(CacheLinePadding); + } + }; + + struct TQueueType { + TReader Reader; + TWriter Writer; + + TQueueType() { + Writer.Init(Reader); + } + }; + } +} diff --git a/library/cpp/actors/core/servicemap.h b/library/cpp/actors/core/servicemap.h new file mode 100644 index 0000000000..d72e50cae5 --- /dev/null +++ b/library/cpp/actors/core/servicemap.h @@ -0,0 +1,168 @@ +#pragma once + +#include "defs.h" + +namespace NActors { + // wait-free one writer multi reader hash-tree for service mapping purposes + // on fast updates on same key - could lead to false-negatives, we don't care as such cases are broken from service-map app logic + + template <typename TKey, typename TValue, typename THash, ui64 BaseSize = 256 * 1024, ui64 ExtCount = 4, ui64 ExtBranching = 4> + class TServiceMap : TNonCopyable { + struct TEntry : TNonCopyable { + ui32 CounterIn; + ui32 CounterOut; + TKey Key; + TValue Value; + + TEntry() + : CounterIn(0) + , CounterOut(0) + , Key() + , Value() + { + } + }; + + struct TBranch : TNonCopyable { + TEntry Entries[ExtCount]; + TBranch* Branches[ExtBranching]; + + TBranch() { + Fill(Branches, Branches + ExtBranching, (TBranch*)nullptr); + } + }; + + ui32 Counter; + TBranch* Line[BaseSize]; + + bool ScanBranch(TBranch* branch, const TKey& key, ui64 hash, TValue& ret) { + for (ui32 i = 0; i != ExtCount; ++i) { + const TEntry& entry = branch->Entries[i]; + const ui32 counterIn = AtomicLoad(&entry.CounterIn); + if (counterIn != 0 && entry.Key == key) { + ret = entry.Value; + const ui32 counterOut = AtomicLoad(&entry.CounterOut); + if (counterOut == counterIn) + return true; + } + } + + const ui64 hash0 = hash % ExtBranching; + if (TBranch* next = AtomicLoad(branch->Branches + hash0)) + return ScanBranch(next, key, hash / ExtBranching, ret); + + return false; + } + + void ScanZeroOld(TBranch* branch, const TKey& key, ui64 hash, TEntry** zeroEntry, TEntry*& oldEntry) { + for (ui32 i = 0; i != ExtCount; ++i) { + TEntry& entry = branch->Entries[i]; + if (entry.CounterIn == 0) { + if (zeroEntry && !*zeroEntry) { + *zeroEntry = &entry; + if (oldEntry != nullptr) + return; + } + } else { + if (entry.Key == key) { + oldEntry = &entry; + if (!zeroEntry || *zeroEntry) + return; + } + } + } + + const ui64 hash0 = hash % ExtBranching; + if (TBranch* next = branch->Branches[hash0]) { + ScanZeroOld(next, key, hash / ExtBranching, zeroEntry, oldEntry); + } else { // found tail, if zeroEntry requested, but not yet found - insert one + if (zeroEntry && !*zeroEntry) { + TBranch* next = new TBranch(); + *zeroEntry = next->Entries; + AtomicStore(branch->Branches + hash0, next); + } + } + } + + public: + TServiceMap() + : Counter(0) + { + Fill(Line, Line + BaseSize, (TBranch*)nullptr); + } + + ~TServiceMap() { + for (ui64 i = 0; i < BaseSize; ++i) { + delete Line[i]; + } + } + + TValue Find(const TKey& key) { + THash hashOp; + const ui64 hash = hashOp(key); + const ui64 hash0 = hash % BaseSize; + + if (TBranch* branch = AtomicLoad(Line + hash0)) { + TValue ret; + if (ScanBranch(branch, key, hash / BaseSize, ret)) + return ret; + } + + return TValue(); + } + + // returns true on update, false on insert + TValue Update(const TKey& key, const TValue& value) { + THash hashOp; + const ui64 hash = hashOp(key); + const ui64 hash0 = hash % BaseSize; + + TEntry* zeroEntry = nullptr; + TEntry* oldEntry = nullptr; + + if (TBranch* branch = Line[hash0]) { + ScanZeroOld(branch, key, hash / BaseSize, &zeroEntry, oldEntry); + } else { + TBranch* next = new TBranch(); + zeroEntry = next->Entries; + AtomicStore(Line + hash0, next); + } + + // now we got both entries, first - push new one + const ui32 counter = AtomicUi32Increment(&Counter); + AtomicStore(&zeroEntry->CounterOut, counter); + zeroEntry->Key = key; + zeroEntry->Value = value; + AtomicStore(&zeroEntry->CounterIn, counter); + + if (oldEntry != nullptr) { + const TValue ret = oldEntry->Value; + AtomicStore<ui32>(&oldEntry->CounterOut, 0); + AtomicStore<ui32>(&oldEntry->CounterIn, 0); + return ret; + } else { + return TValue(); + } + } + + bool Erase(const TKey& key) { + THash hashOp; + const ui64 hash = hashOp(key); + const ui64 hash0 = hash % BaseSize; + + TEntry* oldEntry = 0; + + if (TBranch* branch = Line[hash0]) { + ScanZeroOld(branch, key, hash / BaseSize, 0, oldEntry); + } + + if (oldEntry != 0) { + AtomicStore<ui32>(&oldEntry->CounterOut, 0); + AtomicStore<ui32>(&oldEntry->CounterIn, 0); + return true; + } else { + return false; + } + } + }; +} diff --git a/library/cpp/actors/core/ut/ya.make b/library/cpp/actors/core/ut/ya.make new file mode 100644 index 0000000000..3ee28d5850 --- /dev/null +++ b/library/cpp/actors/core/ut/ya.make @@ -0,0 +1,46 @@ +UNITTEST_FOR(library/cpp/actors/core) + +OWNER( + alexvru + g:kikimr +) + +FORK_SUBTESTS() +IF (SANITIZER_TYPE) + SIZE(LARGE) + TIMEOUT(1200) + TAG(ya:fat) + SPLIT_FACTOR(20) + REQUIREMENTS( + ram:32 + ) +ELSE() + SIZE(MEDIUM) + TIMEOUT(600) + REQUIREMENTS( + ram:16 + ) +ENDIF() + + +PEERDIR( + library/cpp/actors/interconnect + library/cpp/actors/testlib +) + +SRCS( + actor_coroutine_ut.cpp + actor_ut.cpp + actorsystem_ut.cpp + ask_ut.cpp + balancer_ut.cpp + event_pb_payload_ut.cpp + event_pb_ut.cpp + executor_pool_basic_ut.cpp + executor_pool_united_ut.cpp + log_ut.cpp + memory_tracker_ut.cpp + scheduler_actor_ut.cpp +) + +END() diff --git a/library/cpp/actors/core/worker_context.cpp b/library/cpp/actors/core/worker_context.cpp new file mode 100644 index 0000000000..ada6c997d4 --- /dev/null +++ b/library/cpp/actors/core/worker_context.cpp @@ -0,0 +1,7 @@ +#include "worker_context.h" +#include "probes.h" + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + +} diff --git a/library/cpp/actors/core/worker_context.h b/library/cpp/actors/core/worker_context.h new file mode 100644 index 0000000000..b4c37a7629 --- /dev/null +++ b/library/cpp/actors/core/worker_context.h @@ -0,0 +1,175 @@ +#pragma once + +#include "defs.h" + +#include "actorsystem.h" +#include "event.h" +#include "lease.h" +#include "mailbox.h" +#include "mon_stats.h" + +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/util/intrinsics.h> +#include <library/cpp/actors/util/thread.h> + +#include <library/cpp/lwtrace/shuttle.h> + +namespace NActors { + struct TWorkerContext { + const TWorkerId WorkerId; + const TCpuId CpuId; + TLease Lease; + IExecutorPool* Executor = nullptr; + TMailboxTable* MailboxTable = nullptr; + ui64 TimePerMailboxTs = 0; + ui32 EventsPerMailbox = 0; + ui64 SoftDeadlineTs = ui64(-1); + TExecutorThreadStats* Stats = &WorkerStats; // pool stats + TExecutorThreadStats WorkerStats; + TPoolId PoolId = MaxPools; + mutable NLWTrace::TOrbit Orbit; + + TWorkerContext(TWorkerId workerId, TCpuId cpuId, size_t activityVecSize) + : WorkerId(workerId) + , CpuId(cpuId) + , Lease(WorkerId, NeverExpire) + , WorkerStats(activityVecSize) + {} + +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + void GetCurrentStats(TExecutorThreadStats& statsCopy) const { + statsCopy = TExecutorThreadStats(); + statsCopy.Aggregate(*Stats); + } + + void AddElapsedCycles(ui32 activityType, i64 elapsed) { + Y_VERIFY_DEBUG(activityType < Stats->MaxActivityType()); + RelaxedStore(&Stats->ElapsedTicks, RelaxedLoad(&Stats->ElapsedTicks) + elapsed); + RelaxedStore(&Stats->ElapsedTicksByActivity[activityType], RelaxedLoad(&Stats->ElapsedTicksByActivity[activityType]) + elapsed); + } + + void AddParkedCycles(i64 elapsed) { + RelaxedStore(&Stats->ParkedTicks, RelaxedLoad(&Stats->ParkedTicks) + elapsed); + } + + void AddBlockedCycles(i64 elapsed) { + RelaxedStore(&Stats->BlockedTicks, RelaxedLoad(&Stats->BlockedTicks) + elapsed); + } + + void IncrementSentEvents() { + RelaxedStore(&Stats->SentEvents, RelaxedLoad(&Stats->SentEvents) + 1); + } + + void IncrementPreemptedEvents() { + RelaxedStore(&Stats->PreemptedEvents, RelaxedLoad(&Stats->PreemptedEvents) + 1); + } + + void DecrementActorsAliveByActivity(ui32 activityType) { + if (activityType >= Stats->MaxActivityType()) { + activityType = 0; + } + RelaxedStore(&Stats->ActorsAliveByActivity[activityType], Stats->ActorsAliveByActivity[activityType] - 1); + } + + inline void IncrementNonDeliveredEvents() { + RelaxedStore(&Stats->NonDeliveredEvents, RelaxedLoad(&Stats->NonDeliveredEvents) + 1); + } + + inline void IncrementMailboxPushedOutBySoftPreemption() { + RelaxedStore(&Stats->MailboxPushedOutBySoftPreemption, RelaxedLoad(&Stats->MailboxPushedOutBySoftPreemption) + 1); + } + + inline void IncrementMailboxPushedOutByTime() { + RelaxedStore(&Stats->MailboxPushedOutByTime, RelaxedLoad(&Stats->MailboxPushedOutByTime) + 1); + } + + inline void IncrementMailboxPushedOutByEventCount() { + RelaxedStore(&Stats->MailboxPushedOutByEventCount, RelaxedLoad(&Stats->MailboxPushedOutByEventCount) + 1); + } + + inline void IncrementEmptyMailboxActivation() { + RelaxedStore(&Stats->EmptyMailboxActivation, RelaxedLoad(&Stats->EmptyMailboxActivation) + 1); + } + + double AddActivationStats(i64 scheduleTs, i64 deliveredTs) { + i64 ts = deliveredTs > scheduleTs ? deliveredTs - scheduleTs : 0; + double usec = NHPTimer::GetSeconds(ts) * 1000000.0; + Stats->ActivationTimeHistogram.Add(usec); + return usec; + } + + ui64 AddEventDeliveryStats(i64 sentTs, i64 deliveredTs) { + ui64 usecDeliv = deliveredTs > sentTs ? NHPTimer::GetSeconds(deliveredTs - sentTs) * 1000000 : 0; + Stats->EventDeliveryTimeHistogram.Add(usecDeliv); + return usecDeliv; + } + + i64 AddEventProcessingStats(i64 deliveredTs, i64 processedTs, ui32 activityType, ui64 scheduled) { + i64 elapsed = processedTs - deliveredTs; + ui64 usecElapsed = NHPTimer::GetSeconds(elapsed) * 1000000; + activityType = (activityType >= Stats->MaxActivityType()) ? 0 : activityType; + Stats->EventProcessingCountHistogram.Add(usecElapsed); + Stats->EventProcessingTimeHistogram.Add(usecElapsed, elapsed); + RelaxedStore(&Stats->ReceivedEvents, RelaxedLoad(&Stats->ReceivedEvents) + 1); + RelaxedStore(&Stats->ReceivedEventsByActivity[activityType], RelaxedLoad(&Stats->ReceivedEventsByActivity[activityType]) + 1); + RelaxedStore(&Stats->ScheduledEventsByActivity[activityType], RelaxedLoad(&Stats->ScheduledEventsByActivity[activityType]) + scheduled); + AddElapsedCycles(activityType, elapsed); + return elapsed; + } + + void UpdateActorsStats(size_t dyingActorsCnt) { + if (dyingActorsCnt) { + AtomicAdd(Executor->DestroyedActors, dyingActorsCnt); + } + RelaxedStore(&Stats->PoolDestroyedActors, (ui64)RelaxedLoad(&Executor->DestroyedActors)); + RelaxedStore(&Stats->PoolActorRegistrations, (ui64)RelaxedLoad(&Executor->ActorRegistrations)); + RelaxedStore(&Stats->PoolAllocatedMailboxes, MailboxTable->GetAllocatedMailboxCount()); + } + + void UpdateThreadTime() { + RelaxedStore(&WorkerStats.CpuNs, ThreadCPUTime() * 1000); + } +#else + void GetCurrentStats(TExecutorThreadStats&) const {} + inline void AddElapsedCycles(ui32, i64) {} + inline void AddParkedCycles(i64) {} + inline void AddBlockedCycles(i64) {} + inline void IncrementSentEvents() {} + inline void IncrementPreemptedEvents() {} + inline void IncrementMailboxPushedOutBySoftPreemption() {} + inline void IncrementMailboxPushedOutByTime() {} + inline void IncrementMailboxPushedOutByEventCount() {} + inline void IncrementEmptyMailboxActivation() {} + void DecrementActorsAliveByActivity(ui32) {} + void IncrementNonDeliveredEvents() {} + double AddActivationStats(i64, i64) { return 0; } + ui64 AddEventDeliveryStats(i64, i64) { return 0; } + i64 AddEventProcessingStats(i64, i64, ui32, ui64) { return 0; } + void UpdateActorsStats(size_t, IExecutorPool*) {} + void UpdateThreadTime() {} +#endif + + void Switch(IExecutorPool* executor, + TMailboxTable* mailboxTable, + ui64 timePerMailboxTs, + ui32 eventsPerMailbox, + ui64 softDeadlineTs, + TExecutorThreadStats* stats) + { + Executor = executor; + MailboxTable = mailboxTable; + TimePerMailboxTs = timePerMailboxTs; + EventsPerMailbox = eventsPerMailbox; + SoftDeadlineTs = softDeadlineTs; + Stats = stats; + PoolId = Executor ? Executor->PoolId : MaxPools; + } + + void SwitchToIdle() { + Executor = nullptr; + MailboxTable = nullptr; + //Stats = &WorkerStats; // TODO: in actorsystem 2.0 idle stats cannot be related to specific pool + PoolId = MaxPools; + } + }; +} diff --git a/library/cpp/actors/core/ya.make b/library/cpp/actors/core/ya.make new file mode 100644 index 0000000000..880a9d00db --- /dev/null +++ b/library/cpp/actors/core/ya.make @@ -0,0 +1,123 @@ +LIBRARY() + +OWNER( + ddoarn + g:kikimr +) + +NO_WSHADOW() + +IF (PROFILE_MEMORY_ALLOCATIONS) + CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS) +ENDIF() + +IF (ALLOCATOR == "B" OR ALLOCATOR == "BS" OR ALLOCATOR == "C") + CXXFLAGS(-DBALLOC) + PEERDIR( + library/cpp/balloc/optional + ) +ENDIF() + +SRCS( + actor_bootstrapped.h + actor_coroutine.cpp + actor_coroutine.h + actor.cpp + actor.h + actorid.cpp + actorid.h + actorsystem.cpp + actorsystem.h + ask.cpp + ask.h + balancer.h + balancer.cpp + buffer.cpp + buffer.h + callstack.cpp + callstack.h + config.h + cpu_manager.cpp + cpu_manager.h + cpu_state.h + defs.h + event.cpp + event.h + event_load.h + event_local.h + event_pb.cpp + event_pb.h + events.h + events_undelivered.cpp + executelater.h + executor_pool_base.cpp + executor_pool_base.h + executor_pool_basic.cpp + executor_pool_basic.h + executor_pool_io.cpp + executor_pool_io.h + executor_pool_united.cpp + executor_pool_united.h + executor_thread.cpp + executor_thread.h + hfunc.h + interconnect.cpp + interconnect.h + invoke.h + io_dispatcher.cpp + io_dispatcher.h + lease.h + log.cpp + log.h + log_settings.cpp + log_settings.h + mailbox.cpp + mailbox.h + mailbox_queue_revolving.h + mailbox_queue_simple.h + memory_track.cpp + memory_track.h + memory_tracker.cpp + memory_tracker.h + mon.h + mon_stats.h + monotonic.cpp + monotonic.h + worker_context.cpp + worker_context.h + probes.cpp + probes.h + process_stats.cpp + process_stats.h + scheduler_actor.cpp + scheduler_actor.h + scheduler_basic.cpp + scheduler_basic.h + scheduler_cookie.cpp + scheduler_cookie.h + scheduler_queue.h + servicemap.h +) + +GENERATE_ENUM_SERIALIZATION(defs.h) +GENERATE_ENUM_SERIALIZATION(actor.h) + +PEERDIR( + library/cpp/actors/memory_log + library/cpp/actors/prof + library/cpp/actors/protos + library/cpp/actors/util + library/cpp/execprofile + library/cpp/json/writer + library/cpp/logger + library/cpp/lwtrace + library/cpp/monlib/dynamic_counters + library/cpp/svnversion + library/cpp/threading/future +) + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/library/cpp/actors/dnscachelib/dnscache.cpp b/library/cpp/actors/dnscachelib/dnscache.cpp new file mode 100644 index 0000000000..649339ddb2 --- /dev/null +++ b/library/cpp/actors/dnscachelib/dnscache.cpp @@ -0,0 +1,445 @@ +#include "dnscache.h" +#include "probes.h" +#include "timekeeper.h" + +#include <contrib/libs/c-ares/ares.h> +#include <util/system/guard.h> +#include <util/datetime/systime.h> + +const TDnsCache::THost TDnsCache::NullHost; + +LWTRACE_USING(DNSCACHELIB_PROVIDER); + +static_assert(sizeof(ares_channel) == sizeof(void*), "expect sizeof(ares_channel) == sizeof(void *)"); + +TDnsCache::TDnsCache(bool allowIpv4, bool allowIpv6, time_t lifetime, time_t neg, ui32 timeout) + : EntryLifetime(lifetime) + , NegativeLifetime(neg) + , Timeout(TDuration::MicroSeconds(timeout)) + , AllowIpV4(allowIpv4) + , AllowIpV6(allowIpv6) + , ACacheHits(0) + , ACacheMisses(0) + , PtrCacheHits(0) + , PtrCacheMisses(0) +{ +#ifdef _win_ + if (ares_library_init(ARES_LIB_INIT_WIN32) != ARES_SUCCESS) { + LWPROBE(AresInitFailed); + ythrow yexception() << "ares_init() failed"; + } +#endif + + ares_channel chan; + + if (ares_init(&chan) != ARES_SUCCESS) { + LWPROBE(AresInitFailed); + ythrow yexception() << "ares_init() failed"; + } + Channel = chan; + LWPROBE(Created); +} + +TDnsCache::~TDnsCache(void) { + ares_channel chan = static_cast<ares_channel>(Channel); + + ares_cancel(chan); + ares_destroy(chan); + LWPROBE(Destroyed); + +#ifdef _win_ + ares_library_cleanup(); +#endif +} + +TString TDnsCache::GetHostByAddr(const NAddr::IRemoteAddr& addr) { + in6_addr key; + + if (addr.Addr()->sa_family == AF_INET6) { + const struct sockaddr_in6* s6 = (const struct sockaddr_in6*)(addr.Addr()); + memcpy(&key, &s6->sin6_addr, sizeof(s6->sin6_addr)); + } else if (addr.Addr()->sa_family == AF_INET) { + const struct sockaddr_in* s4 = (const struct sockaddr_in*)(addr.Addr()); + memset(&key, 0, sizeof(key)); + memcpy(&key, &s4->sin_addr, sizeof(s4->sin_addr)); + } else { + return ""; + } + const TAddr& host = ResolveAddr(key, addr.Addr()->sa_family); + + return host.Hostname; +} + +TIpHost TDnsCache::Get(const TString& hostname) { + if (!AllowIpV4) + return TIpHost(-1); + + const THost& addr = Resolve(hostname, AF_INET); + + TGuard<TMutex> lock(CacheMtx); + if (addr.AddrsV4.empty()) { + return TIpHost(-1); + } + return addr.AddrsV4.front(); +} + +NAddr::IRemoteAddrPtr TDnsCache::GetAddr( + const TString& hostname, + int family, + TIpPort port, + bool cacheOnly) { + if (family != AF_INET && AllowIpV6) { + const THost& addr = Resolve(hostname, AF_INET6, cacheOnly); + + TGuard<TMutex> lock(CacheMtx); + if (!addr.AddrsV6.empty()) { + struct sockaddr_in6 sin6; + Zero(sin6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = addr.AddrsV6.front(); + sin6.sin6_port = HostToInet(port); + + return MakeHolder<NAddr::TIPv6Addr>(sin6); + } + } + + if (family != AF_INET6 && AllowIpV4) { + const THost& addr = Resolve(hostname, AF_INET, cacheOnly); + + TGuard<TMutex> lock(CacheMtx); + if (!addr.AddrsV4.empty()) { + return MakeHolder<NAddr::TIPv4Addr>(TIpAddress(addr.AddrsV4.front(), port)); + } + } + + LWPROBE(FamilyMismatch, family, AllowIpV4, AllowIpV6); + return nullptr; +} + +void TDnsCache::GetAllAddresses( + const TString& hostname, + TVector<NAddr::IRemoteAddrPtr>& addrs) { + if (AllowIpV4) { + const THost& addr4 = Resolve(hostname, AF_INET); + + TGuard<TMutex> lock(CacheMtx); + for (size_t i = 0; i < addr4.AddrsV4.size(); i++) { + addrs.push_back(MakeHolder<NAddr::TIPv4Addr>(TIpAddress(addr4.AddrsV4[i], 0))); + } + } + + if (AllowIpV6) { + const THost& addr6 = Resolve(hostname, AF_INET6); + + struct sockaddr_in6 sin6; + Zero(sin6); + sin6.sin6_family = AF_INET6; + + TGuard<TMutex> lock(CacheMtx); + for (size_t i = 0; i < addr6.AddrsV6.size(); i++) { + sin6.sin6_addr = addr6.AddrsV6[i]; + + addrs.push_back(MakeHolder<NAddr::TIPv6Addr>(sin6)); + } + } +} + +void TDnsCache::GetStats(ui64& a_cache_hits, ui64& a_cache_misses, + ui64& ptr_cache_hits, ui64& ptr_cache_misses) { + TGuard<TMutex> lock(CacheMtx); + + a_cache_hits = ACacheHits; + a_cache_misses = ACacheMisses; + ptr_cache_hits = PtrCacheHits; + ptr_cache_misses = PtrCacheMisses; +} + +bool TDnsCache::THost::IsStale(int family, const TDnsCache* ctx) const noexcept { + time_t resolved = family == AF_INET ? ResolvedV4 : ResolvedV6; + time_t notfound = family == AF_INET ? NotFoundV4 : NotFoundV6; + + if (TTimeKeeper::GetTime() - resolved < ctx->EntryLifetime) + return false; + + if (TTimeKeeper::GetTime() - notfound < ctx->NegativeLifetime) + return false; + + return true; +} + +const TDnsCache::THost& +TDnsCache::Resolve(const TString& hostname, int family, bool cacheOnly) { + if (!ValidateHName(hostname)) { + LWPROBE(ResolveNullHost, hostname, family); + return NullHost; + } + + THostCache::iterator p; + + Y_ASSERT(family == AF_INET || family == AF_INET6); + + { + TGuard<TMutex> lock(CacheMtx); + p = HostCache.find(hostname); + if (p != HostCache.end()) { + if (!p->second.IsStale(family, this)) { + /* Recently resolved, just return cached value */ + ACacheHits += 1; + THost& host = p->second; + LWPROBE(ResolveFromCache, hostname, family, host.AddrsV4ToString(), host.AddrsV6ToString(), ACacheHits); + return host; + } else { + LWPROBE(ResolveCacheTimeout, hostname); + } + } else { + /* Never resolved, create cache entry */ + LWPROBE(ResolveCacheNew, hostname); + p = HostCache.insert(std::make_pair(hostname, THost())).first; + } + ACacheMisses += 1; + } + + if (cacheOnly) + return NullHost; + + TAtomic& inprogress = (family == AF_INET ? p->second.InProgressV4 : p->second.InProgressV6); + + { + /* This way only! CacheMtx should always be taken AFTER AresMtx, + * because later in ares_process it can only be done this way. + * Lock order reversal will cause deadlock in unfortunate monents. + */ + TGuard<TMutex> areslock(AresMtx); + TGuard<TMutex> cachelock(CacheMtx); + + if (!inprogress) { + ares_channel chan = static_cast<ares_channel>(Channel); + TGHBNContext* ctx = new TGHBNContext(); + ctx->Owner = this; + ctx->Hostname = hostname; + ctx->Family = family; + + AtomicSet(inprogress, 1); + ares_gethostbyname(chan, hostname.c_str(), family, + &TDnsCache::GHBNCallback, ctx); + } + } + + WaitTask(inprogress); + + LWPROBE(ResolveDone, hostname, family, p->second.AddrsV4ToString(), p->second.AddrsV6ToString()); + return p->second; +} + +bool TDnsCache::ValidateHName(const TString& name) const noexcept { + return name.size() > 0; +} + +const TDnsCache::TAddr& TDnsCache::ResolveAddr(const in6_addr& addr, int family) { + TAddrCache::iterator p; + + { + TGuard<TMutex> lock(CacheMtx); + p = AddrCache.find(addr); + if (p != AddrCache.end()) { + if (TTimeKeeper::GetTime() - p->second.Resolved < EntryLifetime || TTimeKeeper::GetTime() - p->second.NotFound < NegativeLifetime) { + /* Recently resolved, just return cached value */ + PtrCacheHits += 1; + return p->second; + } + } else { + /* Never resolved, create cache entry */ + + p = AddrCache.insert(std::make_pair(addr, TAddr())).first; + } + PtrCacheMisses += 1; + } + + { + /* This way only! CacheMtx should always be taken AFTER AresMtx, + * because later in ares_process it can only be done this way. + * Lock order reversal will cause deadlock in unfortunate monents. + */ + TGuard<TMutex> areslock(AresMtx); + TGuard<TMutex> cachelock(CacheMtx); + + if (!p->second.InProgress) { + ares_channel chan = static_cast<ares_channel>(Channel); + TGHBAContext* ctx = new TGHBAContext(); + ctx->Owner = this; + ctx->Addr = addr; + + AtomicSet(p->second.InProgress, 1); + ares_gethostbyaddr(chan, &addr, + family == AF_INET ? sizeof(in_addr) : sizeof(in6_addr), + family, &TDnsCache::GHBACallback, ctx); + } + } + + WaitTask(p->second.InProgress); + + return p->second; +} + +void TDnsCache::WaitTask(TAtomic& flag) { + const TInstant start = TInstant(TTimeKeeper::GetTimeval()); + + while (AtomicGet(flag)) { + ares_channel chan = static_cast<ares_channel>(Channel); + + struct pollfd pfd[ARES_GETSOCK_MAXNUM]; + int nfds; + ares_socket_t socks[ARES_GETSOCK_MAXNUM]; + int bits; + + { + TGuard<TMutex> lock(AresMtx); + bits = ares_getsock(chan, socks, ARES_GETSOCK_MAXNUM); + if (bits == 0) { + /* other thread did our job */ + continue; + } + } + + for (nfds = 0; nfds < ARES_GETSOCK_MAXNUM; nfds++) { + pfd[nfds].events = 0; + pfd[nfds].revents = 0; + if (ARES_GETSOCK_READABLE(bits, nfds)) { + pfd[nfds].fd = socks[nfds]; + pfd[nfds].events |= POLLRDNORM | POLLIN; + } + if (ARES_GETSOCK_WRITABLE(bits, nfds)) { + pfd[nfds].fd = socks[nfds]; + pfd[nfds].events |= POLLWRNORM | POLLOUT; + } + if (pfd[nfds].events == 0) { + break; + } + } + + Y_ASSERT(nfds != 0); + + const TDuration left = TInstant(TTimeKeeper::GetTimeval()) - start; + const TDuration wait = Max(Timeout - left, TDuration::Zero()); + + int rv = poll(pfd, nfds, wait.MilliSeconds()); + + if (rv == -1) { + if (errno == EINTR) { + continue; + } + /* Unknown error in select, can't recover. Just pretend there was no reply */ + rv = 0; + } + + if (rv == 0) { + /* poll() timed out */ + TGuard<TMutex> lock(AresMtx); + ares_process_fd(chan, ARES_SOCKET_BAD, ARES_SOCKET_BAD); + } else { + for (int i = 0; i < nfds; i++) { + if (pfd[i].revents == 0) { + continue; + } + TGuard<TMutex> lock(AresMtx); + ares_process_fd(chan, + pfd[i].revents & (POLLRDNORM | POLLIN) + ? pfd[i].fd + : ARES_SOCKET_BAD, + pfd[i].revents & (POLLWRNORM | POLLOUT) + ? pfd[i].fd + : ARES_SOCKET_BAD); + } + } + + if (start + Timeout <= TInstant(TTimeKeeper::GetTimeval())) { + break; + } + } +} + +void TDnsCache::GHBNCallback(void* arg, int status, int, struct hostent* info) { + THolder<TGHBNContext> ctx(static_cast<TGHBNContext*>(arg)); + TGuard<TMutex> lock(ctx->Owner->CacheMtx); + THostCache::iterator p = ctx->Owner->HostCache.find(ctx->Hostname); + + Y_ASSERT(p != ctx->Owner->HostCache.end()); + + time_t& resolved = (ctx->Family == AF_INET ? p->second.ResolvedV4 : p->second.ResolvedV6); + time_t& notfound = (ctx->Family == AF_INET ? p->second.NotFoundV4 : p->second.NotFoundV6); + TAtomic& inprogress = (ctx->Family == AF_INET ? p->second.InProgressV4 : p->second.InProgressV6); + + if (status == ARES_SUCCESS) { + if (info->h_addrtype == AF_INET) { + p->second.AddrsV4.clear(); + for (int i = 0; info->h_addr_list[i] != nullptr; i++) { + p->second.AddrsV4.push_back(*(TIpHost*)(info->h_addr_list[i])); + } + /* It is possible to ask ares for IPv6 and have IPv4 addrs instead, + so take care and set V4 timers anyway. + */ + p->second.ResolvedV4 = TTimeKeeper::GetTime(); + p->second.ResolvedV4 = 0; + AtomicSet(p->second.InProgressV4, 0); + } else if (info->h_addrtype == AF_INET6) { + p->second.AddrsV6.clear(); + for (int i = 0; info->h_addr_list[i] != nullptr; i++) { + p->second.AddrsV6.push_back(*(struct in6_addr*)(info->h_addr_list[i])); + } + } else { + Y_FAIL("unknown address type in ares callback"); + } + resolved = TTimeKeeper::GetTime(); + notfound = 0; + } else { + notfound = TTimeKeeper::GetTime(); + resolved = 0; + } + AtomicSet(inprogress, 0); +} + +void TDnsCache::GHBACallback(void* arg, int status, int, struct hostent* info) { + THolder<TGHBAContext> ctx(static_cast<TGHBAContext*>(arg)); + TGuard<TMutex> lock(ctx->Owner->CacheMtx); + TAddrCache::iterator p = ctx->Owner->AddrCache.find(ctx->Addr); + + Y_ASSERT(p != ctx->Owner->AddrCache.end()); + + if (status == ARES_SUCCESS) { + p->second.Hostname = info->h_name; + p->second.Resolved = TTimeKeeper::GetTime(); + p->second.NotFound = 0; + } else { + p->second.NotFound = TTimeKeeper::GetTime(); + p->second.Resolved = 0; + } + AtomicSet(p->second.InProgress, 0); +} + +TString TDnsCache::THost::AddrsV4ToString() const { + TStringStream ss; + bool first = false; + for (TIpHost addr : AddrsV4) { + ss << (first ? "" : " ") << IpToString(addr); + first = false; + } + return ss.Str(); +} + +TString TDnsCache::THost::AddrsV6ToString() const { + TStringStream ss; + bool first = false; + for (in6_addr addr : AddrsV6) { + struct sockaddr_in6 sin6; + Zero(sin6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = addr; + + NAddr::TIPv6Addr addr6(sin6); + ss << (first ? "" : " ") << NAddr::PrintHost(addr6); + first = false; + } + return ss.Str(); +} + +TDnsCache::TAresLibInit TDnsCache::InitAresLib; diff --git a/library/cpp/actors/dnscachelib/dnscache.h b/library/cpp/actors/dnscachelib/dnscache.h new file mode 100644 index 0000000000..3313a251a1 --- /dev/null +++ b/library/cpp/actors/dnscachelib/dnscache.h @@ -0,0 +1,148 @@ +#pragma once + +#include <contrib/libs/c-ares/ares.h> +#include <util/generic/map.h> +#include <util/generic/vector.h> +#include <util/network/address.h> +#include <util/system/mutex.h> +#include <util/datetime/base.h> + +/** Asynchronous DNS resolver. + * + * This is NOT general purpose resolver! It is designed with very specific assumptions: + * 1) there is relatively small and rarely changed set of resolved names (like, server pool in cluster) + * 2) this names supposed to have addresses, absense of A record is equal to DNS error + * 3) most of the time IP addresses do not change + * 4) it's OK to return old IP address when DNS server not responding in time + */ + +class TDnsCache { +public: + TDnsCache(bool allowIpv4 = true, bool allowIpv6 = true, time_t entry_lifetime = 1800, time_t neg_lifetime = 1, ui32 request_timeout = 500000); + ~TDnsCache(); + + TString GetHostByAddr(const NAddr::IRemoteAddr&); + + // ip in network byte order + TIpHost Get(const TString& host); + + /* use with AF_INET, AF_INET6 or AF_UNSPEC */ + NAddr::IRemoteAddrPtr GetAddr(const TString& host, + int family, + TIpPort port = 0, + bool cacheOnly = false); + + void GetAllAddresses(const TString& host, TVector<NAddr::IRemoteAddrPtr>&); + + void GetStats(ui64& a_cache_hits, ui64& a_cache_misses, + ui64& ptr_cache_hits, ui64& ptr_cache_misses); + +protected: + bool ValidateHName(const TString& host) const noexcept; + +private: + struct TGHBNContext { + TDnsCache* Owner; + TString Hostname; + int Family; + }; + + struct TGHBAContext { + TDnsCache* Owner; + in6_addr Addr; + }; + + struct THost { + THost() noexcept { + } + + TVector<TIpHost> AddrsV4; + time_t ResolvedV4 = 0; + time_t NotFoundV4 = 0; + TAtomic InProgressV4 = 0; + + TVector<in6_addr> AddrsV6; + time_t ResolvedV6 = 0; + time_t NotFoundV6 = 0; + TAtomic InProgressV6 = 0; + + TString AddrsV4ToString() const; + TString AddrsV6ToString() const; + + bool IsStale(int family, const TDnsCache* ctx) const noexcept; + }; + + typedef TMap<TString, THost> THostCache; + + struct TAddr { + TString Hostname; + time_t Resolved = 0; + time_t NotFound = 0; + TAtomic InProgress = 0; + }; + /* IRemoteAddr is annoingly hard to use, so I'll use in6_addr as key + * and put v4 addrs in it. + */ + struct TAddrCmp { + bool operator()(const in6_addr& left, const in6_addr& right) const { + for (size_t i = 0; i < sizeof(left); i++) { + if (left.s6_addr[i] < right.s6_addr[i]) { + return true; + } else if (left.s6_addr[i] > right.s6_addr[i]) { + return false; + } + } + // equal + return false; + } + }; + typedef TMap<in6_addr, TAddr, TAddrCmp> TAddrCache; + + const THost& Resolve(const TString&, int family, bool cacheOnly = false); + + const TAddr& ResolveAddr(const in6_addr&, int family); + + void WaitTask(TAtomic&); + + static void GHBNCallback(void* arg, int status, int timeouts, + struct hostent* info); + + static void GHBACallback(void* arg, int status, int timeouts, + struct hostent* info); + + const time_t EntryLifetime; + const time_t NegativeLifetime; + const TDuration Timeout; + const bool AllowIpV4; + const bool AllowIpV6; + + TMutex CacheMtx; + THostCache HostCache; + TAddrCache AddrCache; + ui64 ACacheHits; + ui64 ACacheMisses; + ui64 PtrCacheHits; + ui64 PtrCacheMisses; + + const static THost NullHost; + + TMutex AresMtx; + void* Channel; + + struct TAresLibInit { + TAresLibInit() { +#ifdef _win_ + const auto res = ares_library_init(ARES_LIB_INIT_ALL); + Y_VERIFY(res == 0); +#endif + } + + ~TAresLibInit() { +#ifdef _win_ + ares_library_cleanup(); +#endif + } + }; + + static TAresLibInit InitAresLib; +}; diff --git a/library/cpp/actors/dnscachelib/probes.cpp b/library/cpp/actors/dnscachelib/probes.cpp new file mode 100644 index 0000000000..07734ab20f --- /dev/null +++ b/library/cpp/actors/dnscachelib/probes.cpp @@ -0,0 +1,3 @@ +#include "probes.h" + +LWTRACE_DEFINE_PROVIDER(DNSCACHELIB_PROVIDER) diff --git a/library/cpp/actors/dnscachelib/probes.h b/library/cpp/actors/dnscachelib/probes.h new file mode 100644 index 0000000000..313b7b8712 --- /dev/null +++ b/library/cpp/actors/dnscachelib/probes.h @@ -0,0 +1,35 @@ +#pragma once + +#include <library/cpp/lwtrace/all.h> + +#define DNSCACHELIB_PROVIDER(PROBE, EVENT, GROUPS, TYPES, NAMES) \ + PROBE(Created, GROUPS(), TYPES(), NAMES()) \ + PROBE(Destroyed, GROUPS(), TYPES(), NAMES()) \ + PROBE(AresInitFailed, GROUPS(), TYPES(), NAMES()) \ + PROBE(FamilyMismatch, \ + GROUPS(), \ + TYPES(int, bool, bool), \ + NAMES("family", "allowIpV4", "allowIpV6")) \ + PROBE(ResolveNullHost, \ + GROUPS(), \ + TYPES(TString, int), \ + NAMES("hostname", "family")) \ + PROBE(ResolveFromCache, \ + GROUPS(), \ + TYPES(TString, int, TString, TString, ui64), \ + NAMES("hostname", "family", "addrsV4", "addrsV6", "aCacheHits")) \ + PROBE(ResolveDone, \ + GROUPS(), \ + TYPES(TString, int, TString, TString), \ + NAMES("hostname", "family", "addrsV4", "addrsV6")) \ + PROBE(ResolveCacheTimeout, \ + GROUPS(), \ + TYPES(TString), \ + NAMES("hostname")) \ + PROBE(ResolveCacheNew, \ + GROUPS(), \ + TYPES(TString), \ + NAMES("hostname")) \ + /**/ + +LWTRACE_DECLARE_PROVIDER(DNSCACHELIB_PROVIDER) diff --git a/library/cpp/actors/dnscachelib/timekeeper.h b/library/cpp/actors/dnscachelib/timekeeper.h new file mode 100644 index 0000000000..0528d8549c --- /dev/null +++ b/library/cpp/actors/dnscachelib/timekeeper.h @@ -0,0 +1,70 @@ +#pragma once + +#include <util/datetime/base.h> +#include <util/generic/singleton.h> +#include <util/string/cast.h> +#include <util/system/thread.h> +#include <util/system/event.h> +#include <util/system/env.h> + +#include <cstdlib> + +/* Keeps current time accurate up to 1/10 second */ + +class TTimeKeeper { +public: + static TInstant GetNow(void) { + return TInstant::MicroSeconds(GetTime()); + } + + static time_t GetTime(void) { + return Singleton<TTimeKeeper>()->CurrentTime.tv_sec; + } + + static const struct timeval& GetTimeval(void) { + return Singleton<TTimeKeeper>()->CurrentTime; + } + + TTimeKeeper() + : Thread(&TTimeKeeper::Worker, this) + { + ConstTime = !!GetEnv("TEST_TIME"); + if (ConstTime) { + try { + CurrentTime.tv_sec = FromString<ui32>(GetEnv("TEST_TIME")); + } catch (TFromStringException exc) { + ConstTime = false; + } + } + if (!ConstTime) { + gettimeofday(&CurrentTime, nullptr); + Thread.Start(); + } + } + + ~TTimeKeeper() { + if (!ConstTime) { + Exit.Signal(); + Thread.Join(); + } + } + +private: + static const ui32 UpdateInterval = 100000; + struct timeval CurrentTime; + bool ConstTime; + TSystemEvent Exit; + TThread Thread; + + static void* Worker(void* arg) { + TTimeKeeper* owner = static_cast<TTimeKeeper*>(arg); + + do { + /* Race condition may occur here but locking looks too expensive */ + + gettimeofday(&owner->CurrentTime, nullptr); + } while (!owner->Exit.WaitT(TDuration::MicroSeconds(UpdateInterval))); + + return nullptr; + } +}; diff --git a/library/cpp/actors/dnscachelib/ya.make b/library/cpp/actors/dnscachelib/ya.make new file mode 100644 index 0000000000..e3a6ad6202 --- /dev/null +++ b/library/cpp/actors/dnscachelib/ya.make @@ -0,0 +1,24 @@ +LIBRARY() + +OWNER( + davenger + fomichev + serxa + dimanne + single +) + +SRCS( + dnscache.cpp + dnscache.h + probes.cpp + probes.h + timekeeper.h +) + +PEERDIR( + contrib/libs/c-ares + library/cpp/lwtrace +) + +END() diff --git a/library/cpp/actors/dnsresolver/dnsresolver.cpp b/library/cpp/actors/dnsresolver/dnsresolver.cpp new file mode 100644 index 0000000000..6329bb0083 --- /dev/null +++ b/library/cpp/actors/dnsresolver/dnsresolver.cpp @@ -0,0 +1,475 @@ +#include "dnsresolver.h" + +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/threading/queue/mpsc_htswap.h> +#include <util/network/pair.h> +#include <util/network/socket.h> +#include <util/string/builder.h> +#include <util/system/thread.h> + +#include <ares.h> + +#include <atomic> + +namespace NActors { +namespace NDnsResolver { + + class TAresLibraryInitBase { + protected: + TAresLibraryInitBase() noexcept { + int status = ares_library_init(ARES_LIB_INIT_ALL); + Y_VERIFY(status == ARES_SUCCESS, "Unexpected failure to initialize c-ares library"); + } + + ~TAresLibraryInitBase() noexcept { + ares_library_cleanup(); + } + }; + + class TCallbackQueueBase { + protected: + TCallbackQueueBase() noexcept { + int err = SocketPair(Sockets, false, true); + Y_VERIFY(err == 0, "Unexpected failure to create a socket pair"); + SetNonBlock(Sockets[0]); + SetNonBlock(Sockets[1]); + } + + ~TCallbackQueueBase() noexcept { + closesocket(Sockets[0]); + closesocket(Sockets[1]); + } + + protected: + using TCallback = std::function<void()>; + using TCallbackQueue = NThreading::THTSwapQueue<TCallback>; + + void PushCallback(TCallback callback) { + Y_VERIFY(callback, "Cannot push an empty callback"); + CallbackQueue.Push(std::move(callback)); // this is a lockfree queue + + // Wake up worker thread on the first activation + if (Activations.fetch_add(1, std::memory_order_acq_rel) == 0) { + char ch = 'x'; + ssize_t ret; +#ifdef _win_ + ret = send(SignalSock(), &ch, 1, 0); + if (ret == -1) { + Y_VERIFY(WSAGetLastError() == WSAEWOULDBLOCK, "Unexpected send error"); + return; + } +#else + do { + ret = send(SignalSock(), &ch, 1, 0); + } while (ret == -1 && errno == EINTR); + if (ret == -1) { + Y_VERIFY(errno == EAGAIN || errno == EWOULDBLOCK, "Unexpected send error"); + return; + } +#endif + Y_VERIFY(ret == 1, "Unexpected send result"); + } + } + + void RunCallbacks() noexcept { + char ch[32]; + ssize_t ret; + bool signalled = false; + for (;;) { + ret = recv(WaitSock(), ch, sizeof(ch), 0); + if (ret > 0) { + signalled = true; + } + if (ret == sizeof(ch)) { + continue; + } + if (ret != -1) { + break; + } +#ifdef _win_ + if (WSAGetLastError() == WSAEWOULDBLOCK) { + break; + } + Y_FAIL("Unexpected recv error"); +#else + if (errno == EAGAIN || errno == EWOULDBLOCK) { + break; + } + Y_VERIFY(errno == EINTR, "Unexpected recv error"); +#endif + } + + if (signalled) { + // There's exactly one write to SignalSock while Activations != 0 + // It's impossible to get signalled while Activations == 0 + // We must set Activations = 0 to receive new signals + size_t count = Activations.exchange(0, std::memory_order_acq_rel); + Y_VERIFY(count != 0); + + // N.B. due to the way HTSwap works we may not be able to pop + // all callbacks on this activation, however we expect a new + // delayed activation to happen at a later time. + while (auto callback = CallbackQueue.Pop()) { + callback(); + } + } + } + + SOCKET SignalSock() { + return Sockets[0]; + } + + SOCKET WaitSock() { + return Sockets[1]; + } + + private: + SOCKET Sockets[2]; + TCallbackQueue CallbackQueue; + std::atomic<size_t> Activations{ 0 }; + }; + + class TSimpleDnsResolver + : public TActor<TSimpleDnsResolver> + , private TAresLibraryInitBase + , private TCallbackQueueBase + { + public: + TSimpleDnsResolver(TSimpleDnsResolverOptions options) noexcept + : TActor(&TThis::StateWork) + , Options(std::move(options)) + , WorkerThread(&TThis::WorkerThreadStart, this) + { + InitAres(); + + WorkerThread.Start(); + } + + ~TSimpleDnsResolver() noexcept override { + if (!Stopped) { + PushCallback([this] { + // Mark as stopped first + Stopped = true; + + // Cancel all current ares requests (will not send replies) + ares_cancel(AresChannel); + }); + + WorkerThread.Join(); + } + + StopAres(); + } + + static constexpr EActivityType ActorActivityType() { + return DNS_RESOLVER; + } + + private: + void InitAres() noexcept { + struct ares_options options; + memset(&options, 0, sizeof(options)); + int optmask = 0; + + options.flags = ARES_FLAG_STAYOPEN; + optmask |= ARES_OPT_FLAGS; + + options.sock_state_cb = &TThis::SockStateCallback; + options.sock_state_cb_data = this; + optmask |= ARES_OPT_SOCK_STATE_CB; + + options.timeout = Options.Timeout.MilliSeconds(); + if (options.timeout > 0) { + optmask |= ARES_OPT_TIMEOUTMS; + } + + options.tries = Options.Attempts; + if (options.tries > 0) { + optmask |= ARES_OPT_TRIES; + } + + int err = ares_init_options(&AresChannel, &options, optmask); + Y_VERIFY(err == 0, "Unexpected failure to initialize c-ares channel"); + + if (Options.Servers) { + TStringBuilder csv; + for (const TString& server : Options.Servers) { + if (csv) { + csv << ','; + } + csv << server; + } + err = ares_set_servers_ports_csv(AresChannel, csv.c_str()); + Y_VERIFY(err == 0, "Unexpected failure to set a list of dns servers: %s", ares_strerror(err)); + } + } + + void StopAres() noexcept { + // Destroy the ares channel + ares_destroy(AresChannel); + AresChannel = nullptr; + } + + private: + STRICT_STFUNC(StateWork, { + hFunc(TEvents::TEvPoison, Handle); + hFunc(TEvDns::TEvGetHostByName, Handle); + hFunc(TEvDns::TEvGetAddr, Handle); + }) + + void Handle(TEvents::TEvPoison::TPtr&) { + Y_VERIFY(!Stopped); + + PushCallback([this] { + // Cancel all current ares requests (will send notifications) + ares_cancel(AresChannel); + + // Mark as stopped last + Stopped = true; + }); + + WorkerThread.Join(); + PassAway(); + } + + private: + enum class ERequestType { + GetHostByName, + GetAddr, + }; + + struct TRequestContext : public TThrRefBase { + using TPtr = TIntrusivePtr<TRequestContext>; + + TThis* Self; + TActorSystem* ActorSystem; + TActorId SelfId; + TActorId Sender; + ui64 Cookie; + ERequestType Type; + + TRequestContext(TThis* self, TActorSystem* as, TActorId selfId, TActorId sender, ui64 cookie, ERequestType type) + : Self(self) + , ActorSystem(as) + , SelfId(selfId) + , Sender(sender) + , Cookie(cookie) + , Type(type) + { } + }; + + private: + void Handle(TEvDns::TEvGetHostByName::TPtr& ev) { + auto* msg = ev->Get(); + auto reqCtx = MakeIntrusive<TRequestContext>( + this, TActivationContext::ActorSystem(), SelfId(), ev->Sender, ev->Cookie, ERequestType::GetHostByName); + PushCallback([this, reqCtx = std::move(reqCtx), name = std::move(msg->Name), family = msg->Family] () mutable { + StartGetHostByName(std::move(reqCtx), std::move(name), family); + }); + } + + void Handle(TEvDns::TEvGetAddr::TPtr& ev) { + auto* msg = ev->Get(); + auto reqCtx = MakeIntrusive<TRequestContext>( + this, TActivationContext::ActorSystem(), SelfId(), ev->Sender, ev->Cookie, ERequestType::GetAddr); + PushCallback([this, reqCtx = std::move(reqCtx), name = std::move(msg->Name), family = msg->Family] () mutable { + StartGetHostByName(std::move(reqCtx), std::move(name), family); + }); + } + + void StartGetHostByName(TRequestContext::TPtr reqCtx, TString name, int family) noexcept { + reqCtx->Ref(); + ares_gethostbyname(AresChannel, name.c_str(), family, + &TThis::GetHostByNameAresCallback, reqCtx.Get()); + } + + private: + static void GetHostByNameAresCallback(void* arg, int status, int timeouts, struct hostent* info) { + Y_UNUSED(timeouts); + TRequestContext::TPtr reqCtx(static_cast<TRequestContext*>(arg)); + reqCtx->UnRef(); + + if (reqCtx->Self->Stopped) { + // Don't send any replies after destruction + return; + } + + switch (reqCtx->Type) { + case ERequestType::GetHostByName: { + auto result = MakeHolder<TEvDns::TEvGetHostByNameResult>(); + if (status == 0) { + switch (info->h_addrtype) { + case AF_INET: { + for (int i = 0; info->h_addr_list[i] != nullptr; ++i) { + result->AddrsV4.emplace_back(*(struct in_addr*)(info->h_addr_list[i])); + } + break; + } + case AF_INET6: { + for (int i = 0; info->h_addr_list[i] != nullptr; ++i) { + result->AddrsV6.emplace_back(*(struct in6_addr*)(info->h_addr_list[i])); + } + break; + } + default: + Y_FAIL("unknown address family in ares callback"); + } + } else { + result->ErrorText = ares_strerror(status); + } + result->Status = status; + + reqCtx->ActorSystem->Send(new IEventHandle(reqCtx->Sender, reqCtx->SelfId, result.Release(), 0, reqCtx->Cookie)); + break; + } + + case ERequestType::GetAddr: { + auto result = MakeHolder<TEvDns::TEvGetAddrResult>(); + if (status == 0 && Y_UNLIKELY(info->h_addr_list[0] == nullptr)) { + status = ARES_ENODATA; + } + if (status == 0) { + switch (info->h_addrtype) { + case AF_INET: { + result->Addr = *(struct in_addr*)(info->h_addr_list[0]); + break; + } + case AF_INET6: { + result->Addr = *(struct in6_addr*)(info->h_addr_list[0]); + break; + } + default: + Y_FAIL("unknown address family in ares callback"); + } + } else { + result->ErrorText = ares_strerror(status); + } + result->Status = status; + + reqCtx->ActorSystem->Send(new IEventHandle(reqCtx->Sender, reqCtx->SelfId, result.Release(), 0, reqCtx->Cookie)); + break; + } + } + } + + private: + static void SockStateCallback(void* data, ares_socket_t socket_fd, int readable, int writable) { + static_cast<TThis*>(data)->DoSockStateCallback(socket_fd, readable, writable); + } + + void DoSockStateCallback(ares_socket_t socket_fd, int readable, int writable) noexcept { + int events = (readable ? (POLLRDNORM | POLLIN) : 0) | (writable ? (POLLWRNORM | POLLOUT) : 0); + if (events == 0) { + AresSockStates.erase(socket_fd); + } else { + AresSockStates[socket_fd].NeededEvents = events; + } + } + + private: + static void* WorkerThreadStart(void* arg) noexcept { + static_cast<TSimpleDnsResolver*>(arg)->WorkerThreadLoop(); + return nullptr; + } + + void WorkerThreadLoop() noexcept { + TThread::SetCurrentThreadName("DnsResolver"); + + TVector<struct pollfd> fds; + while (!Stopped) { + fds.clear(); + fds.reserve(1 + AresSockStates.size()); + { + auto& entry = fds.emplace_back(); + entry.fd = WaitSock(); + entry.events = POLLRDNORM | POLLIN; + } + for (auto& kv : AresSockStates) { + auto& entry = fds.emplace_back(); + entry.fd = kv.first; + entry.events = kv.second.NeededEvents; + } + + int timeout = -1; + struct timeval tv; + if (ares_timeout(AresChannel, nullptr, &tv)) { + timeout = tv.tv_sec * 1000 + tv.tv_usec / 1000; + } + + int ret = poll(fds.data(), fds.size(), timeout); + if (ret == -1) { + if (errno == EINTR) { + continue; + } + // we cannot handle failures, run callbacks and pretend everything is ok + RunCallbacks(); + if (Stopped) { + break; + } + ret = 0; + } + + bool ares_called = false; + if (ret > 0) { + for (size_t i = 0; i < fds.size(); ++i) { + auto& entry = fds[i]; + + // Handle WaitSock activation and run callbacks + if (i == 0) { + if (entry.revents & (POLLRDNORM | POLLIN)) { + RunCallbacks(); + if (Stopped) { + break; + } + } + continue; + } + + // All other sockets belong to ares + if (entry.revents == 0) { + continue; + } + // Previous invocation of aress_process_fd might have removed some sockets + if (Y_UNLIKELY(!AresSockStates.contains(entry.fd))) { + continue; + } + ares_process_fd( + AresChannel, + entry.revents & (POLLRDNORM | POLLIN) ? entry.fd : ARES_SOCKET_BAD, + entry.revents & (POLLWRNORM | POLLOUT) ? entry.fd : ARES_SOCKET_BAD); + ares_called = true; + } + + if (Stopped) { + break; + } + } + + if (!ares_called) { + // Let ares handle timeouts + ares_process_fd(AresChannel, ARES_SOCKET_BAD, ARES_SOCKET_BAD); + } + } + } + + private: + struct TSockState { + short NeededEvents = 0; // poll events + }; + + private: + TSimpleDnsResolverOptions Options; + TThread WorkerThread; + + ares_channel AresChannel; + THashMap<SOCKET, TSockState> AresSockStates; + + bool Stopped = false; + }; + + IActor* CreateSimpleDnsResolver(TSimpleDnsResolverOptions options) { + return new TSimpleDnsResolver(std::move(options)); + } + +} // namespace NDnsResolver +} // namespace NActors diff --git a/library/cpp/actors/dnsresolver/dnsresolver.h b/library/cpp/actors/dnsresolver/dnsresolver.h new file mode 100644 index 0000000000..88fc74df7d --- /dev/null +++ b/library/cpp/actors/dnsresolver/dnsresolver.h @@ -0,0 +1,128 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/event_local.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <util/network/address.h> +#include <variant> + +namespace NActors { +namespace NDnsResolver { + + struct TEvDns { + enum EEv { + EvGetHostByName = EventSpaceBegin(TEvents::ES_DNS), + EvGetHostByNameResult, + EvGetAddr, + EvGetAddrResult, + }; + + /** + * TEvGetHostByName returns the result of ares_gethostbyname + */ + struct TEvGetHostByName : public TEventLocal<TEvGetHostByName, EvGetHostByName> { + TString Name; + int Family; + + explicit TEvGetHostByName(TString name, int family = AF_UNSPEC) + : Name(std::move(name)) + , Family(family) + { } + }; + + struct TEvGetHostByNameResult : public TEventLocal<TEvGetHostByNameResult, EvGetHostByNameResult> { + TVector<struct in_addr> AddrsV4; + TVector<struct in6_addr> AddrsV6; + TString ErrorText; + int Status = 0; + }; + + /** + * TEvGetAddr returns a single address for a given hostname + */ + struct TEvGetAddr : public TEventLocal<TEvGetAddr, EvGetAddr> { + TString Name; + int Family; + + explicit TEvGetAddr(TString name, int family = AF_UNSPEC) + : Name(std::move(name)) + , Family(family) + { } + }; + + struct TEvGetAddrResult : public TEventLocal<TEvGetAddrResult, EvGetAddrResult> { + // N.B. "using" here doesn't work with Visual Studio compiler + typedef struct in6_addr TIPv6Addr; + typedef struct in_addr TIPv4Addr; + + std::variant<std::monostate, TIPv6Addr, TIPv4Addr> Addr; + TString ErrorText; + int Status = 0; + + bool IsV6() const { + return std::holds_alternative<TIPv6Addr>(Addr); + } + + bool IsV4() const { + return std::holds_alternative<TIPv4Addr>(Addr); + } + + const TIPv6Addr& GetAddrV6() const { + const TIPv6Addr* p = std::get_if<TIPv6Addr>(&Addr); + Y_VERIFY(p, "Result is not an ipv6 address"); + return *p; + } + + const TIPv4Addr& GetAddrV4() const { + const TIPv4Addr* p = std::get_if<TIPv4Addr>(&Addr); + Y_VERIFY(p, "Result is not an ipv4 address"); + return *p; + } + }; + }; + + struct TSimpleDnsResolverOptions { + // Initial per-server timeout, grows exponentially with each retry + TDuration Timeout = TDuration::Seconds(1); + // Number of attempts per-server + int Attempts = 2; + // Optional list of custom dns servers (ip.v4[:port], ip::v6 or [ip::v6]:port format) + TVector<TString> Servers; + }; + + IActor* CreateSimpleDnsResolver(TSimpleDnsResolverOptions options = TSimpleDnsResolverOptions()); + + struct TCachingDnsResolverOptions { + // Soft expire time specifies delay before name is refreshed in background + TDuration SoftNegativeExpireTime = TDuration::Seconds(1); + TDuration SoftPositiveExpireTime = TDuration::Seconds(10); + // Hard expire time specifies delay before the last result is forgotten + TDuration HardNegativeExpireTime = TDuration::Seconds(10); + TDuration HardPositiveExpireTime = TDuration::Hours(2); + // Allow these request families + bool AllowIPv6 = true; + bool AllowIPv4 = true; + // Optional counters + NMonitoring::TDynamicCounterPtr MonCounters = nullptr; + }; + + IActor* CreateCachingDnsResolver(TActorId upstream, TCachingDnsResolverOptions options = TCachingDnsResolverOptions()); + + struct TOnDemandDnsResolverOptions + : public TSimpleDnsResolverOptions + , public TCachingDnsResolverOptions + { + }; + + IActor* CreateOnDemandDnsResolver(TOnDemandDnsResolverOptions options = TOnDemandDnsResolverOptions()); + + /** + * Returns actor id of a globally registered dns resolver + */ + inline TActorId MakeDnsResolverActorId() { + return TActorId(0, TStringBuf("dnsresolver")); + } + +} // namespace NDnsResolver +} // namespace NActors diff --git a/library/cpp/actors/dnsresolver/dnsresolver_caching.cpp b/library/cpp/actors/dnsresolver/dnsresolver_caching.cpp new file mode 100644 index 0000000000..02760f4c27 --- /dev/null +++ b/library/cpp/actors/dnsresolver/dnsresolver_caching.cpp @@ -0,0 +1,730 @@ +#include "dnsresolver.h" + +#include <library/cpp/actors/core/hfunc.h> +#include <util/generic/intrlist.h> + +#include <ares.h> + +#include <queue> + +namespace NActors { +namespace NDnsResolver { + + class TCachingDnsResolver : public TActor<TCachingDnsResolver> { + public: + struct TMonCounters { + NMonitoring::TDynamicCounters::TCounterPtr OutgoingInFlightV4; + NMonitoring::TDynamicCounters::TCounterPtr OutgoingInFlightV6; + NMonitoring::TDynamicCounters::TCounterPtr OutgoingErrorsV4; + NMonitoring::TDynamicCounters::TCounterPtr OutgoingErrorsV6; + NMonitoring::TDynamicCounters::TCounterPtr OutgoingTotalV4; + NMonitoring::TDynamicCounters::TCounterPtr OutgoingTotalV6; + + NMonitoring::TDynamicCounters::TCounterPtr IncomingInFlight; + NMonitoring::TDynamicCounters::TCounterPtr IncomingErrors; + NMonitoring::TDynamicCounters::TCounterPtr IncomingTotal; + + NMonitoring::TDynamicCounters::TCounterPtr CacheSize; + NMonitoring::TDynamicCounters::TCounterPtr CacheHits; + NMonitoring::TDynamicCounters::TCounterPtr CacheMisses; + + TMonCounters(const NMonitoring::TDynamicCounterPtr& counters) + : OutgoingInFlightV4(counters->GetCounter("DnsResolver/Outgoing/InFlight/V4", false)) + , OutgoingInFlightV6(counters->GetCounter("DnsResolver/Outgoing/InFlight/V6", false)) + , OutgoingErrorsV4(counters->GetCounter("DnsResolver/Outgoing/Errors/V4", true)) + , OutgoingErrorsV6(counters->GetCounter("DnsResolver/Outgoing/Errors/V6", true)) + , OutgoingTotalV4(counters->GetCounter("DnsResolver/Outgoing/Total/V4", true)) + , OutgoingTotalV6(counters->GetCounter("DnsResolver/Outgoing/Total/V6", true)) + , IncomingInFlight(counters->GetCounter("DnsResolver/Incoming/InFlight", false)) + , IncomingErrors(counters->GetCounter("DnsResolver/Incoming/Errors", true)) + , IncomingTotal(counters->GetCounter("DnsResolver/Incoming/Total", true)) + , CacheSize(counters->GetCounter("DnsResolver/Cache/Size", false)) + , CacheHits(counters->GetCounter("DnsResolver/Cache/Hits", true)) + , CacheMisses(counters->GetCounter("DnsResolver/Cache/Misses", true)) + { } + }; + + public: + TCachingDnsResolver(TActorId upstream, TCachingDnsResolverOptions options) + : TActor(&TThis::StateWork) + , Upstream(upstream) + , Options(std::move(options)) + , MonCounters(Options.MonCounters ? new TMonCounters(Options.MonCounters) : nullptr) + { } + + static constexpr EActivityType ActorActivityType() { + return DNS_RESOLVER; + } + + private: + STRICT_STFUNC(StateWork, { + hFunc(TEvents::TEvPoison, Handle); + hFunc(TEvDns::TEvGetHostByName, Handle); + hFunc(TEvDns::TEvGetAddr, Handle); + hFunc(TEvDns::TEvGetHostByNameResult, Handle); + hFunc(TEvents::TEvUndelivered, Handle); + }); + + void Handle(TEvents::TEvPoison::TPtr&) { + DropPending(ARES_ECANCELLED); + PassAway(); + } + + void Handle(TEvDns::TEvGetHostByName::TPtr& ev) { + auto req = MakeHolder<TIncomingRequest>(); + req->Type = EIncomingRequestType::GetHostByName; + req->Sender = ev->Sender; + req->Cookie = ev->Cookie; + req->Name = std::move(ev->Get()->Name); + req->Family = ev->Get()->Family; + EnqueueRequest(std::move(req)); + } + + void Handle(TEvDns::TEvGetAddr::TPtr& ev) { + auto req = MakeHolder<TIncomingRequest>(); + req->Type = EIncomingRequestType::GetAddr; + req->Sender = ev->Sender; + req->Cookie = ev->Cookie; + req->Name = std::move(ev->Get()->Name); + req->Family = ev->Get()->Family; + EnqueueRequest(std::move(req)); + } + + void Handle(TEvDns::TEvGetHostByNameResult::TPtr& ev) { + auto waitingIt = WaitingRequests.find(ev->Cookie); + Y_VERIFY(waitingIt != WaitingRequests.end(), "Unexpected reply, reqId=%" PRIu64, ev->Cookie); + auto waitingInfo = waitingIt->second; + WaitingRequests.erase(waitingIt); + + switch (waitingInfo.Family) { + case AF_INET6: + if (ev->Get()->Status) { + ProcessErrorV6(waitingInfo.Position, ev->Get()->Status, std::move(ev->Get()->ErrorText)); + } else { + ProcessAddrsV6(waitingInfo.Position, std::move(ev->Get()->AddrsV6)); + } + break; + + case AF_INET: + if (ev->Get()->Status) { + ProcessErrorV4(waitingInfo.Position, ev->Get()->Status, std::move(ev->Get()->ErrorText)); + } else { + ProcessAddrsV4(waitingInfo.Position, std::move(ev->Get()->AddrsV4)); + } + break; + + default: + Y_FAIL("Unexpected request family %d", waitingInfo.Family); + } + } + + void Handle(TEvents::TEvUndelivered::TPtr& ev) { + switch (ev->Get()->SourceType) { + case TEvDns::TEvGetHostByName::EventType: { + auto waitingIt = WaitingRequests.find(ev->Cookie); + Y_VERIFY(waitingIt != WaitingRequests.end(), "Unexpected TEvUndelivered, reqId=%" PRIu64, ev->Cookie); + auto waitingInfo = waitingIt->second; + WaitingRequests.erase(waitingIt); + + switch (waitingInfo.Family) { + case AF_INET6: + ProcessErrorV6(waitingInfo.Position, ARES_ENOTINITIALIZED, "Caching dns resolver cannot deliver to the underlying resolver"); + break; + case AF_INET: + ProcessErrorV4(waitingInfo.Position, ARES_ENOTINITIALIZED, "Caching dns resolver cannot deliver to the underlying resolver"); + break; + default: + Y_FAIL("Unexpected request family %d", waitingInfo.Family); + } + + break; + } + + default: + Y_FAIL("Unexpected TEvUndelievered, type=%" PRIu32, ev->Get()->SourceType); + } + } + + private: + enum EIncomingRequestType { + GetHostByName, + GetAddr, + }; + + struct TIncomingRequest : public TIntrusiveListItem<TIncomingRequest> { + EIncomingRequestType Type; + TActorId Sender; + ui64 Cookie; + TString Name; + int Family; + }; + + using TIncomingRequestList = TIntrusiveListWithAutoDelete<TIncomingRequest, TDelete>; + + void EnqueueRequest(THolder<TIncomingRequest> req) { + if (MonCounters) { + ++*MonCounters->IncomingTotal; + } + + CleanupExpired(TActivationContext::Now()); + + switch (req->Family) { + case AF_UNSPEC: + if (Options.AllowIPv6) { + EnqueueRequestIPv6(std::move(req)); + return; + } + if (Options.AllowIPv4) { + EnqueueRequestIPv4(std::move(req)); + return; + } + break; + + case AF_INET6: + if (Options.AllowIPv6) { + EnqueueRequestIPv6(std::move(req)); + return; + } + break; + + case AF_INET: + if (Options.AllowIPv4) { + EnqueueRequestIPv4(std::move(req)); + return; + } + break; + } + + ReplyWithError(std::move(req), ARES_EBADFAMILY); + } + + void EnqueueRequestIPv6(THolder<TIncomingRequest> req) { + auto now = TActivationContext::Now(); + + auto& fullState = NameToState[req->Name]; + if (MonCounters) { + *MonCounters->CacheSize = NameToState.size(); + } + + auto& state = fullState.StateIPv6; + EnsureRequest(state, req->Name, AF_INET6, now); + + if (state.IsHardExpired(now)) { + Y_VERIFY(state.Waiting); + if (MonCounters) { + ++*MonCounters->CacheMisses; + } + // We need to wait for ipv6 reply, schedule ipv4 request in parallel if needed + if (Options.AllowIPv4) { + EnsureRequest(fullState.StateIPv4, req->Name, AF_INET, now); + } + state.WaitingRequests.PushBack(req.Release()); + return; + } + + // We want to retry AF_UNSPEC with IPv4 in some cases + if (req->Family == AF_UNSPEC && Options.AllowIPv4 && state.RetryUnspec()) { + EnqueueRequestIPv4(std::move(req)); + return; + } + + if (MonCounters) { + ++*MonCounters->CacheHits; + } + + if (state.Status != 0) { + ReplyWithError(std::move(req), state.Status, state.ErrorText); + } else { + ReplyWithAddrs(std::move(req), fullState.AddrsIPv6); + } + } + + void EnqueueRequestIPv4(THolder<TIncomingRequest> req, bool isCacheMiss = false) { + auto now = TActivationContext::Now(); + + auto& fullState = NameToState[req->Name]; + if (MonCounters) { + *MonCounters->CacheSize = NameToState.size(); + } + + auto& state = fullState.StateIPv4; + EnsureRequest(state, req->Name, AF_INET, now); + + if (state.IsHardExpired(now)) { + Y_VERIFY(state.Waiting); + if (MonCounters && !isCacheMiss) { + ++*MonCounters->CacheMisses; + } + state.WaitingRequests.PushBack(req.Release()); + return; + } + + if (MonCounters && !isCacheMiss) { + ++*MonCounters->CacheHits; + } + + if (state.Status != 0) { + ReplyWithError(std::move(req), state.Status, state.ErrorText); + } else { + ReplyWithAddrs(std::move(req), fullState.AddrsIPv4); + } + } + + private: + struct TFamilyState { + TIncomingRequestList WaitingRequests; + TInstant SoftDeadline; + TInstant HardDeadline; + TInstant NextSoftDeadline; + TInstant NextHardDeadline; + TString ErrorText; + int Status = -1; // never requested before + bool InSoftHeap = false; + bool InHardHeap = false; + bool Waiting = false; + + bool Needed() const { + return InSoftHeap || InHardHeap || Waiting; + } + + bool RetryUnspec() const { + return ( + Status == ARES_ENODATA || + Status == ARES_EBADRESP || + Status == ARES_ETIMEOUT); + } + + bool ServerReplied() const { + return ServerReplied(Status); + } + + bool IsSoftExpired(TInstant now) const { + return !InSoftHeap || NextSoftDeadline < now; + } + + bool IsHardExpired(TInstant now) const { + return !InHardHeap || NextHardDeadline < now; + } + + static bool ServerReplied(int status) { + return ( + status == ARES_SUCCESS || + status == ARES_ENODATA || + status == ARES_ENOTFOUND); + } + }; + + struct TState { + TFamilyState StateIPv6; + TFamilyState StateIPv4; + TVector<struct in6_addr> AddrsIPv6; + TVector<struct in_addr> AddrsIPv4; + + bool Needed() const { + return StateIPv6.Needed() || StateIPv4.Needed(); + } + }; + + using TNameToState = THashMap<TString, TState>; + + template<const TFamilyState TState::* StateToFamily, + const TInstant TFamilyState::* FamilyToDeadline> + struct THeapCompare { + // returns true when b < a + bool operator()(TNameToState::iterator a, TNameToState::iterator b) const { + const TState& aState = a->second; + const TState& bState = b->second; + const TFamilyState& aFamily = aState.*StateToFamily; + const TFamilyState& bFamily = bState.*StateToFamily; + const TInstant& aDeadline = aFamily.*FamilyToDeadline; + const TInstant& bDeadline = bFamily.*FamilyToDeadline; + return bDeadline < aDeadline; + } + }; + + template<const TFamilyState TState::* StateToFamily, + const TInstant TFamilyState::* FamilyToDeadline> + using TStateHeap = std::priority_queue< + TNameToState::iterator, + std::vector<TNameToState::iterator>, + THeapCompare<StateToFamily, FamilyToDeadline> + >; + + struct TWaitingInfo { + TNameToState::iterator Position; + int Family; + }; + + private: + void EnsureRequest(TFamilyState& state, const TString& name, int family, TInstant now) { + if (state.Waiting) { + return; // request is already pending + } + + if (!state.IsSoftExpired(now) && !state.IsHardExpired(now)) { + return; // response is not expired yet + } + + if (MonCounters) { + switch (family) { + case AF_INET6: + ++*MonCounters->OutgoingInFlightV6; + ++*MonCounters->OutgoingTotalV6; + break; + case AF_INET: + ++*MonCounters->OutgoingInFlightV4; + ++*MonCounters->OutgoingTotalV4; + break; + } + } + + ui64 reqId = ++LastRequestId; + auto& req = WaitingRequests[reqId]; + req.Position = NameToState.find(name); + req.Family = family; + Y_VERIFY(req.Position != NameToState.end()); + + Send(Upstream, new TEvDns::TEvGetHostByName(name, family), IEventHandle::FlagTrackDelivery, reqId); + state.Waiting = true; + } + + template<TFamilyState TState::* StateToFamily, + TInstant TFamilyState::* FamilyToDeadline, + TInstant TFamilyState::* FamilyToNextDeadline, + bool TFamilyState::* FamilyToFlag, + class THeap> + void PushToHeap(THeap& heap, TNameToState::iterator it, TInstant newDeadline) { + auto& family = it->second.*StateToFamily; + TInstant& deadline = family.*FamilyToDeadline; + TInstant& nextDeadline = family.*FamilyToNextDeadline; + bool& flag = family.*FamilyToFlag; + nextDeadline = newDeadline; + if (!flag) { + deadline = newDeadline; + heap.push(it); + flag = true; + } + } + + void PushSoftV6(TNameToState::iterator it, TInstant newDeadline) { + PushToHeap<&TState::StateIPv6, &TFamilyState::SoftDeadline, &TFamilyState::NextSoftDeadline, &TFamilyState::InSoftHeap>(SoftHeapIPv6, it, newDeadline); + } + + void PushHardV6(TNameToState::iterator it, TInstant newDeadline) { + PushToHeap<&TState::StateIPv6, &TFamilyState::HardDeadline, &TFamilyState::NextHardDeadline, &TFamilyState::InHardHeap>(HardHeapIPv6, it, newDeadline); + } + + void PushSoftV4(TNameToState::iterator it, TInstant newDeadline) { + PushToHeap<&TState::StateIPv4, &TFamilyState::SoftDeadline, &TFamilyState::NextSoftDeadline, &TFamilyState::InSoftHeap>(SoftHeapIPv4, it, newDeadline); + } + + void PushHardV4(TNameToState::iterator it, TInstant newDeadline) { + PushToHeap<&TState::StateIPv4, &TFamilyState::HardDeadline, &TFamilyState::NextHardDeadline, &TFamilyState::InHardHeap>(HardHeapIPv4, it, newDeadline); + } + + void ProcessErrorV6(TNameToState::iterator it, int status, TString errorText) { + auto now = TActivationContext::Now(); + if (MonCounters) { + --*MonCounters->OutgoingInFlightV6; + ++*MonCounters->OutgoingErrorsV6; + } + + auto& state = it->second.StateIPv6; + Y_VERIFY(state.Waiting, "Got error for a state we are not waiting"); + state.Waiting = false; + + // When we have a cached positive reply, don't overwrite it with spurious errors + const bool serverReplied = TFamilyState::ServerReplied(status); + if (!serverReplied && state.ServerReplied() && !state.IsHardExpired(now)) { + PushSoftV6(it, now + Options.SoftNegativeExpireTime); + if (state.Status == ARES_SUCCESS) { + SendAddrsV6(it); + } else { + SendErrorsV6(it, now); + } + return; + } + + state.Status = status; + state.ErrorText = std::move(errorText); + PushSoftV6(it, now + Options.SoftNegativeExpireTime); + if (serverReplied) { + // Server actually replied, so keep it cached for longer + PushHardV6(it, now + Options.HardPositiveExpireTime); + } else { + PushHardV6(it, now + Options.HardNegativeExpireTime); + } + + SendErrorsV6(it, now); + } + + void SendErrorsV6(TNameToState::iterator it, TInstant now) { + bool cleaned = false; + auto& state = it->second.StateIPv6; + while (state.WaitingRequests) { + THolder<TIncomingRequest> req(state.WaitingRequests.PopFront()); + if (req->Family == AF_UNSPEC && Options.AllowIPv4 && state.RetryUnspec()) { + if (!cleaned) { + CleanupExpired(now); + cleaned = true; + } + EnqueueRequestIPv4(std::move(req), /* isCacheMiss */ true); + } else { + ReplyWithError(std::move(req), state.Status, state.ErrorText); + } + } + } + + void ProcessErrorV4(TNameToState::iterator it, int status, TString errorText) { + auto now = TActivationContext::Now(); + if (MonCounters) { + --*MonCounters->OutgoingInFlightV4; + ++*MonCounters->OutgoingErrorsV4; + } + + auto& state = it->second.StateIPv4; + Y_VERIFY(state.Waiting, "Got error for a state we are not waiting"); + state.Waiting = false; + + // When we have a cached positive reply, don't overwrite it with spurious errors + const bool serverReplied = TFamilyState::ServerReplied(status); + if (!serverReplied && state.ServerReplied() && !state.IsHardExpired(now)) { + PushSoftV4(it, now + Options.SoftNegativeExpireTime); + if (state.Status == ARES_SUCCESS) { + SendAddrsV4(it); + } else { + SendErrorsV4(it); + } + return; + } + + state.Status = status; + state.ErrorText = std::move(errorText); + PushSoftV4(it, now + Options.SoftNegativeExpireTime); + if (serverReplied) { + // Server actually replied, so keep it cached for longer + PushHardV4(it, now + Options.HardPositiveExpireTime); + } else { + PushHardV4(it, now + Options.HardNegativeExpireTime); + } + + SendErrorsV4(it); + } + + void SendErrorsV4(TNameToState::iterator it) { + auto& state = it->second.StateIPv4; + while (state.WaitingRequests) { + THolder<TIncomingRequest> req(state.WaitingRequests.PopFront()); + ReplyWithError(std::move(req), state.Status, state.ErrorText); + } + } + + void ProcessAddrsV6(TNameToState::iterator it, TVector<struct in6_addr> addrs) { + if (Y_UNLIKELY(addrs.empty())) { + // Probably unnecessary: we don't want to deal with empty address lists + return ProcessErrorV6(it, ARES_ENODATA, ares_strerror(ARES_ENODATA)); + } + + auto now = TActivationContext::Now(); + if (MonCounters) { + --*MonCounters->OutgoingInFlightV6; + } + + auto& state = it->second.StateIPv6; + Y_VERIFY(state.Waiting, "Got reply for a state we are not waiting"); + state.Waiting = false; + + state.Status = ARES_SUCCESS; + it->second.AddrsIPv6 = std::move(addrs); + PushSoftV6(it, now + Options.SoftPositiveExpireTime); + PushHardV6(it, now + Options.HardPositiveExpireTime); + + SendAddrsV6(it); + } + + void SendAddrsV6(TNameToState::iterator it) { + auto& state = it->second.StateIPv6; + while (state.WaitingRequests) { + THolder<TIncomingRequest> req(state.WaitingRequests.PopFront()); + ReplyWithAddrs(std::move(req), it->second.AddrsIPv6); + } + } + + void ProcessAddrsV4(TNameToState::iterator it, TVector<struct in_addr> addrs) { + if (Y_UNLIKELY(addrs.empty())) { + // Probably unnecessary: we don't want to deal with empty address lists + return ProcessErrorV4(it, ARES_ENODATA, ares_strerror(ARES_ENODATA)); + } + + auto now = TActivationContext::Now(); + if (MonCounters) { + --*MonCounters->OutgoingInFlightV4; + } + + auto& state = it->second.StateIPv4; + Y_VERIFY(state.Waiting, "Got reply for a state we are not waiting"); + state.Waiting = false; + + state.Status = ARES_SUCCESS; + it->second.AddrsIPv4 = std::move(addrs); + PushSoftV4(it, now + Options.SoftPositiveExpireTime); + PushHardV4(it, now + Options.HardPositiveExpireTime); + + SendAddrsV4(it); + } + + void SendAddrsV4(TNameToState::iterator it) { + auto& state = it->second.StateIPv4; + while (state.WaitingRequests) { + THolder<TIncomingRequest> req(state.WaitingRequests.PopFront()); + ReplyWithAddrs(std::move(req), it->second.AddrsIPv4); + } + } + + private: + template<TFamilyState TState::*StateToFamily, + TInstant TFamilyState::* FamilyToDeadline, + TInstant TFamilyState::* FamilyToNextDeadline, + bool TFamilyState::* FamilyToFlag> + void DoCleanupExpired(TStateHeap<StateToFamily, FamilyToDeadline>& heap, TInstant now) { + while (!heap.empty()) { + auto it = heap.top(); + auto& family = it->second.*StateToFamily; + TInstant& deadline = family.*FamilyToDeadline; + if (now <= deadline) { + break; + } + + bool& flag = family.*FamilyToFlag; + Y_VERIFY(flag); + heap.pop(); + flag = false; + + TInstant& nextDeadline = family.*FamilyToNextDeadline; + if (now < nextDeadline) { + deadline = nextDeadline; + heap.push(it); + flag = true; + continue; + } + + // Remove unnecessary items + if (!it->second.Needed()) { + NameToState.erase(it); + if (MonCounters) { + *MonCounters->CacheSize = NameToState.size(); + } + } + } + } + + void CleanupExpired(TInstant now) { + DoCleanupExpired<&TState::StateIPv6, &TFamilyState::SoftDeadline, &TFamilyState::NextSoftDeadline, &TFamilyState::InSoftHeap>(SoftHeapIPv6, now); + DoCleanupExpired<&TState::StateIPv6, &TFamilyState::HardDeadline, &TFamilyState::NextHardDeadline, &TFamilyState::InHardHeap>(HardHeapIPv6, now); + DoCleanupExpired<&TState::StateIPv4, &TFamilyState::SoftDeadline, &TFamilyState::NextSoftDeadline, &TFamilyState::InSoftHeap>(SoftHeapIPv4, now); + DoCleanupExpired<&TState::StateIPv4, &TFamilyState::HardDeadline, &TFamilyState::NextHardDeadline, &TFamilyState::InHardHeap>(HardHeapIPv4, now); + } + + template<class TEvent> + void SendError(TActorId replyTo, ui64 cookie, int status, const TString& errorText) { + auto reply = MakeHolder<TEvent>(); + reply->Status = status; + reply->ErrorText = errorText; + this->Send(replyTo, reply.Release(), 0, cookie); + } + + void ReplyWithError(THolder<TIncomingRequest> req, int status, const TString& errorText) { + if (MonCounters) { + ++*MonCounters->IncomingErrors; + } + switch (req->Type) { + case EIncomingRequestType::GetHostByName: { + SendError<TEvDns::TEvGetHostByNameResult>(req->Sender, req->Cookie, status, errorText); + break; + } + case EIncomingRequestType::GetAddr: { + SendError<TEvDns::TEvGetAddrResult>(req->Sender, req->Cookie, status, errorText); + break; + } + } + } + + void ReplyWithAddrs(THolder<TIncomingRequest> req, const TVector<struct in6_addr>& addrs) { + switch (req->Type) { + case EIncomingRequestType::GetHostByName: { + auto reply = MakeHolder<TEvDns::TEvGetHostByNameResult>(); + reply->AddrsV6 = addrs; + Send(req->Sender, reply.Release(), 0, req->Cookie); + break; + } + case EIncomingRequestType::GetAddr: { + Y_VERIFY(!addrs.empty()); + auto reply = MakeHolder<TEvDns::TEvGetAddrResult>(); + reply->Addr = addrs.front(); + Send(req->Sender, reply.Release(), 0, req->Cookie); + break; + } + } + } + + void ReplyWithAddrs(THolder<TIncomingRequest> req, const TVector<struct in_addr>& addrs) { + switch (req->Type) { + case EIncomingRequestType::GetHostByName: { + auto reply = MakeHolder<TEvDns::TEvGetHostByNameResult>(); + reply->AddrsV4 = addrs; + Send(req->Sender, reply.Release(), 0, req->Cookie); + break; + } + case EIncomingRequestType::GetAddr: { + Y_VERIFY(!addrs.empty()); + auto reply = MakeHolder<TEvDns::TEvGetAddrResult>(); + reply->Addr = addrs.front(); + Send(req->Sender, reply.Release(), 0, req->Cookie); + break; + } + } + } + + void ReplyWithError(THolder<TIncomingRequest> req, int status) { + ReplyWithError(std::move(req), status, ares_strerror(status)); + } + + void DropPending(TIncomingRequestList& list, int status, const TString& errorText) { + while (list) { + THolder<TIncomingRequest> req(list.PopFront()); + ReplyWithError(std::move(req), status, errorText); + } + } + + void DropPending(int status, const TString& errorText) { + for (auto& [name, state] : NameToState) { + DropPending(state.StateIPv6.WaitingRequests, status, errorText); + DropPending(state.StateIPv4.WaitingRequests, status, errorText); + } + } + + void DropPending(int status) { + DropPending(status, ares_strerror(status)); + } + + private: + const TActorId Upstream; + const TCachingDnsResolverOptions Options; + const THolder<TMonCounters> MonCounters; + + TNameToState NameToState; + TStateHeap<&TState::StateIPv6, &TFamilyState::SoftDeadline> SoftHeapIPv6; + TStateHeap<&TState::StateIPv6, &TFamilyState::HardDeadline> HardHeapIPv6; + TStateHeap<&TState::StateIPv4, &TFamilyState::SoftDeadline> SoftHeapIPv4; + TStateHeap<&TState::StateIPv4, &TFamilyState::HardDeadline> HardHeapIPv4; + + THashMap<ui64, TWaitingInfo> WaitingRequests; + ui64 LastRequestId = 0; + }; + + IActor* CreateCachingDnsResolver(TActorId upstream, TCachingDnsResolverOptions options) { + return new TCachingDnsResolver(upstream, std::move(options)); + } + +} // namespace NDnsResolver +} // namespace NActors diff --git a/library/cpp/actors/dnsresolver/dnsresolver_caching_ut.cpp b/library/cpp/actors/dnsresolver/dnsresolver_caching_ut.cpp new file mode 100644 index 0000000000..c3b7cb3c77 --- /dev/null +++ b/library/cpp/actors/dnsresolver/dnsresolver_caching_ut.cpp @@ -0,0 +1,630 @@ +#include "dnsresolver.h" + +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/testlib/test_runtime.h> +#include <library/cpp/testing/unittest/registar.h> +#include <util/string/builder.h> + +#include <ares.h> + +using namespace NActors; +using namespace NActors::NDnsResolver; + +// FIXME: use a mock resolver +Y_UNIT_TEST_SUITE(CachingDnsResolver) { + + struct TAddrToString { + TString operator()(const std::monostate&) const { + return "<nothing>"; + } + + TString operator()(const struct in6_addr& addr) const { + char dst[INET6_ADDRSTRLEN]; + auto res = ares_inet_ntop(AF_INET6, &addr, dst, INET6_ADDRSTRLEN); + Y_VERIFY(res, "Cannot convert ipv6 address"); + return dst; + } + + TString operator()(const struct in_addr& addr) const { + char dst[INET_ADDRSTRLEN]; + auto res = ares_inet_ntop(AF_INET, &addr, dst, INET_ADDRSTRLEN); + Y_VERIFY(res, "Cannot convert ipv4 address"); + return dst; + } + }; + + TString AddrToString(const std::variant<std::monostate, struct in6_addr, struct in_addr>& v) { + return std::visit(TAddrToString(), v); + } + + struct TMockReply { + static constexpr TDuration DefaultDelay = TDuration::MilliSeconds(1); + + int Status = 0; + TDuration Delay; + TVector<struct in6_addr> AddrsV6; + TVector<struct in_addr> AddrsV4; + + static TMockReply Error(int status, TDuration delay = DefaultDelay) { + Y_VERIFY(status != 0); + TMockReply reply; + reply.Status = status; + reply.Delay = delay; + return reply; + } + + static TMockReply Empty(TDuration delay = DefaultDelay) { + TMockReply reply; + reply.Delay = delay; + return reply; + } + + static TMockReply ManyV6(const TVector<TString>& addrs, TDuration delay = DefaultDelay) { + TMockReply reply; + reply.Delay = delay; + for (const TString& addr : addrs) { + void* dst = &reply.AddrsV6.emplace_back(); + int status = ares_inet_pton(AF_INET6, addr.c_str(), dst); + Y_VERIFY(status == 1, "Invalid ipv6 address: %s", addr.c_str()); + } + return reply; + } + + static TMockReply ManyV4(const TVector<TString>& addrs, TDuration delay = DefaultDelay) { + TMockReply reply; + reply.Delay = delay; + for (const TString& addr : addrs) { + void* dst = &reply.AddrsV4.emplace_back(); + int status = ares_inet_pton(AF_INET, addr.c_str(), dst); + Y_VERIFY(status == 1, "Invalid ipv4 address: %s", addr.c_str()); + } + return reply; + } + + static TMockReply SingleV6(const TString& addr, TDuration delay = DefaultDelay) { + return ManyV6({ addr }, delay); + } + + static TMockReply SingleV4(const TString& addr, TDuration delay = DefaultDelay) { + return ManyV4({ addr }, delay); + } + }; + + using TMockDnsCallback = std::function<TMockReply (const TString&, int)>; + + class TMockDnsResolver : public TActor<TMockDnsResolver> { + public: + TMockDnsResolver(TMockDnsCallback callback) + : TActor(&TThis::StateWork) + , Callback(std::move(callback)) + { } + + private: + struct TEvPrivate { + enum EEv { + EvScheduled = EventSpaceBegin(TEvents::ES_PRIVATE), + }; + + struct TEvScheduled : public TEventLocal<TEvScheduled, EvScheduled> { + TActorId Sender; + ui64 Cookie; + TMockReply Reply; + + TEvScheduled(TActorId sender, ui64 cookie, TMockReply reply) + : Sender(sender) + , Cookie(cookie) + , Reply(std::move(reply)) + { } + }; + }; + + private: + STRICT_STFUNC(StateWork, { + hFunc(TEvents::TEvPoison, Handle); + hFunc(TEvDns::TEvGetHostByName, Handle); + hFunc(TEvPrivate::TEvScheduled, Handle); + }); + + void Handle(TEvents::TEvPoison::TPtr&) { + PassAway(); + } + + void Handle(TEvDns::TEvGetHostByName::TPtr& ev) { + auto reply = Callback(ev->Get()->Name, ev->Get()->Family); + if (reply.Delay) { + Schedule(reply.Delay, new TEvPrivate::TEvScheduled(ev->Sender, ev->Cookie, std::move(reply))); + } else { + SendReply(ev->Sender, ev->Cookie, std::move(reply)); + } + } + + void Handle(TEvPrivate::TEvScheduled::TPtr& ev) { + SendReply(ev->Get()->Sender, ev->Get()->Cookie, std::move(ev->Get()->Reply)); + } + + private: + void SendReply(const TActorId& sender, ui64 cookie, TMockReply&& reply) { + auto res = MakeHolder<TEvDns::TEvGetHostByNameResult>(); + res->Status = reply.Status; + if (res->Status != 0) { + res->ErrorText = ares_strerror(res->Status); + } else { + res->AddrsV6 = std::move(reply.AddrsV6); + res->AddrsV4 = std::move(reply.AddrsV4); + } + Send(sender, res.Release(), 0, cookie); + } + + private: + TMockDnsCallback Callback; + }; + + struct TCachingDnsRuntime : public TTestActorRuntimeBase { + TCachingDnsResolverOptions ResolverOptions; + TActorId MockResolver; + TActorId Resolver; + TActorId Sleeper; + TString Section_; + + NMonitoring::TDynamicCounters::TCounterPtr InFlight6; + NMonitoring::TDynamicCounters::TCounterPtr InFlight4; + NMonitoring::TDynamicCounters::TCounterPtr Total6; + NMonitoring::TDynamicCounters::TCounterPtr Total4; + NMonitoring::TDynamicCounters::TCounterPtr Misses; + NMonitoring::TDynamicCounters::TCounterPtr Hits; + + THashMap<TString, TMockReply> ReplyV6; + THashMap<TString, TMockReply> ReplyV4; + + TCachingDnsRuntime() { + SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; }); + ResolverOptions.MonCounters = new NMonitoring::TDynamicCounters; + + ReplyV6["localhost"] = TMockReply::SingleV6("::1"); + ReplyV4["localhost"] = TMockReply::SingleV4("127.0.0.1"); + ReplyV6["yandex.ru"] = TMockReply::SingleV6("2a02:6b8:a::a", TDuration::MilliSeconds(500)); + ReplyV4["yandex.ru"] = TMockReply::SingleV4("77.88.55.77", TDuration::MilliSeconds(250)); + ReplyV6["router.asus.com"] = TMockReply::Error(ARES_ENODATA); + ReplyV4["router.asus.com"] = TMockReply::SingleV4("192.168.0.1"); + } + + void Start(TMockDnsCallback callback) { + MockResolver = Register(new TMockDnsResolver(std::move(callback))); + EnableScheduleForActor(MockResolver); + Resolver = Register(CreateCachingDnsResolver(MockResolver, ResolverOptions)); + Sleeper = AllocateEdgeActor(); + + InFlight6 = ResolverOptions.MonCounters->GetCounter("DnsResolver/Outgoing/InFlight/V6", false); + InFlight4 = ResolverOptions.MonCounters->GetCounter("DnsResolver/Outgoing/InFlight/V4", false); + Total6 = ResolverOptions.MonCounters->GetCounter("DnsResolver/Outgoing/Total/V6", true); + Total4 = ResolverOptions.MonCounters->GetCounter("DnsResolver/Outgoing/Total/V4", true); + Misses = ResolverOptions.MonCounters->GetCounter("DnsResolver/Cache/Misses", true); + Hits = ResolverOptions.MonCounters->GetCounter("DnsResolver/Cache/Hits", true); + } + + void Start() { + Start([this](const TString& name, int family) { + switch (family) { + case AF_INET6: { + auto it = ReplyV6.find(name); + if (it != ReplyV6.end()) { + return it->second; + } + break; + } + case AF_INET: { + auto it = ReplyV4.find(name); + if (it != ReplyV4.end()) { + return it->second; + } + break; + } + } + return TMockReply::Error(ARES_ENOTFOUND); + }); + } + + void Section(const TString& section) { + Section_ = section; + } + + void Sleep(TDuration duration) { + Schedule(new IEventHandle(Sleeper, Sleeper, new TEvents::TEvWakeup), duration); + GrabEdgeEventRethrow<TEvents::TEvWakeup>(Sleeper); + } + + void WaitNoInFlight() { + if (*InFlight6 || *InFlight4) { + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return !*InFlight6 && !*InFlight4; + }; + DispatchEvents(options); + UNIT_ASSERT_C(!*InFlight6 && !*InFlight4, "Failed to wait for no inflight in " << Section_); + } + } + + void SendGetHostByName(const TActorId& sender, const TString& name, int family = AF_UNSPEC) { + Send(new IEventHandle(Resolver, sender, new TEvDns::TEvGetHostByName(name, family)), 0, true); + } + + void SendGetAddr(const TActorId& sender, const TString& name, int family = AF_UNSPEC) { + Send(new IEventHandle(Resolver, sender, new TEvDns::TEvGetAddr(name, family)), 0, true); + } + + TEvDns::TEvGetHostByNameResult::TPtr WaitGetHostByName(const TActorId& sender) { + return GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender); + } + + TEvDns::TEvGetAddrResult::TPtr WaitGetAddr(const TActorId& sender) { + return GrabEdgeEventRethrow<TEvDns::TEvGetAddrResult>(sender); + } + + void ExpectInFlight6(i64 count) { + UNIT_ASSERT_VALUES_EQUAL_C(InFlight6->Val(), count, Section_); + } + + void ExpectInFlight4(i64 count) { + UNIT_ASSERT_VALUES_EQUAL_C(InFlight4->Val(), count, Section_); + } + + void ExpectTotal6(i64 count) { + UNIT_ASSERT_VALUES_EQUAL_C(Total6->Val(), count, Section_); + } + + void ExpectTotal4(i64 count) { + UNIT_ASSERT_VALUES_EQUAL_C(Total4->Val(), count, Section_); + } + + void Expect6(i64 total, i64 inflight) { + UNIT_ASSERT_C( + Total6->Val() == total && InFlight6->Val() == inflight, + Section_ << ": Expect6(" << total << ", " << inflight << ") " + << " but got (" << Total6->Val() << ", " << InFlight6->Val() << ")"); + } + + void Expect4(i64 total, i64 inflight) { + UNIT_ASSERT_C( + Total4->Val() == total && InFlight4->Val() == inflight, + Section_ << ": Expect4(" << total << ", " << inflight << ") " + << " got (" << Total4->Val() << ", " << InFlight4->Val() << ")"); + } + + void ExpectMisses(i64 count) { + UNIT_ASSERT_VALUES_EQUAL_C(Misses->Val(), count, Section_); + } + + void ExpectHits(i64 count) { + UNIT_ASSERT_VALUES_EQUAL_C(Hits->Val(), count, Section_); + } + + void ExpectGetHostByNameError(const TActorId& sender, int status) { + auto ev = WaitGetHostByName(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, status, Section_ << ": " << ev->Get()->ErrorText); + } + + void ExpectGetAddrError(const TActorId& sender, int status) { + auto ev = WaitGetAddr(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, status, Section_ << ": " << ev->Get()->ErrorText); + } + + void ExpectGetHostByNameSuccess(const TActorId& sender, const TString& expected) { + auto ev = WaitGetHostByName(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, Section_ << ": " << ev->Get()->ErrorText); + TStringBuilder result; + for (const auto& addr : ev->Get()->AddrsV6) { + if (result) { + result << ','; + } + result << TAddrToString()(addr); + } + for (const auto& addr : ev->Get()->AddrsV4) { + if (result) { + result << ','; + } + result << TAddrToString()(addr); + } + UNIT_ASSERT_VALUES_EQUAL_C(TString(result), expected, Section_); + } + + void ExpectGetAddrSuccess(const TActorId& sender, const TString& expected) { + auto ev = WaitGetAddr(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, Section_ << ": " << ev->Get()->ErrorText); + TString result = AddrToString(ev->Get()->Addr); + UNIT_ASSERT_VALUES_EQUAL_C(result, expected, Section_); + } + }; + + Y_UNIT_TEST(UnusableResolver) { + TCachingDnsRuntime runtime; + runtime.Initialize(); + runtime.Start(); + + auto sender = runtime.AllocateEdgeActor(); + + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + + runtime.Send(new IEventHandle(runtime.MockResolver, { }, new TEvents::TEvPoison), 0, true); + runtime.SendGetAddr(sender, "foo.ru", AF_UNSPEC); + runtime.ExpectGetAddrError(sender, ARES_ENOTINITIALIZED); + } + + Y_UNIT_TEST(ResolveCaching) { + TCachingDnsRuntime runtime; + runtime.Initialize(); + runtime.Start(); + + auto sender = runtime.AllocateEdgeActor(); + + // First time resolve, ipv4 and ipv6 sent in parallel, we wait for ipv6 result + runtime.Section("First time resolve"); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + runtime.Expect6(1, 0); + runtime.Expect4(1, 0); + runtime.ExpectMisses(1); + runtime.ExpectHits(0); + + // Second resolve, ipv6 and ipv4 queries result in a cache hit + runtime.Section("Second resolve, ipv6"); + runtime.SendGetAddr(sender, "yandex.ru", AF_INET6); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + runtime.Expect6(1, 0); + runtime.ExpectHits(1); + runtime.Section("Second resolve, ipv4"); + runtime.SendGetAddr(sender, "yandex.ru", AF_INET); + runtime.ExpectGetAddrSuccess(sender, "77.88.55.77"); + runtime.Expect4(1, 0); + runtime.ExpectHits(2); + + // Wait until soft expiration and try ipv4 again + // Will cause a cache hit, but will start a new ipv4 request in background + runtime.Section("Retry ipv4 after soft expiration"); + runtime.Sleep(TDuration::Seconds(15)); + runtime.SendGetAddr(sender, "yandex.ru", AF_INET); + runtime.ExpectGetAddrSuccess(sender, "77.88.55.77"); + runtime.Expect6(1, 0); + runtime.Expect4(2, 1); + runtime.ExpectMisses(1); + runtime.ExpectHits(3); + runtime.WaitNoInFlight(); + + // Wait until soft expiration and try both again + // Will cause a cache hit, but will start a new ipv6 request in background + runtime.Section("Retry both after soft expiration"); + runtime.Sleep(TDuration::Seconds(15)); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + runtime.Expect6(2, 1); + runtime.Expect4(2, 0); + runtime.ExpectMisses(1); + runtime.ExpectHits(4); + runtime.WaitNoInFlight(); + + // Wait until hard expiration and try both again + // Will cause a cache miss and new resolve requests + runtime.Section("Retry both after hard expiration"); + runtime.Sleep(TDuration::Hours(2)); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + runtime.Expect6(3, 0); + runtime.Expect4(3, 0); + runtime.ExpectMisses(2); + runtime.ExpectHits(4); + + // Wait half the hard expiration time, must always result in a cache hit + runtime.Section("Retry both after half hard expiration"); + for (ui64 i = 1; i <= 4; ++i) { + runtime.Sleep(TDuration::Hours(1)); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + runtime.Expect6(3 + i, 1); + runtime.ExpectHits(4 + i); + runtime.WaitNoInFlight(); + } + + // Change v6 result to a timeout, must keep using cached result until hard expiration + runtime.Section("Dns keeps timing out"); + runtime.ReplyV6["yandex.ru"] = TMockReply::Error(ARES_ETIMEOUT); + for (ui64 i = 1; i <= 4; ++i) { + runtime.Sleep(TDuration::Seconds(15)); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + runtime.Expect6(7 + i, 1); + runtime.ExpectHits(8 + i); + runtime.WaitNoInFlight(); + } + + // Change v6 result to nodata, must switch to a v4 result eventually + runtime.Section("Host changes to being ipv4 only"); + runtime.ReplyV6["yandex.ru"] = TMockReply::Error(ARES_ENODATA); + runtime.Sleep(TDuration::Seconds(2)); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + runtime.WaitNoInFlight(); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "77.88.55.77"); + runtime.Expect6(12, 0); + runtime.Expect4(4, 0); + runtime.ExpectMisses(3); + + // Change v6 result to nxdomain, must not fall back to a v4 result + runtime.Section("Host is removed from dns"); + runtime.ReplyV6["yandex.ru"] = TMockReply::Error(ARES_ENOTFOUND); + runtime.Sleep(TDuration::Seconds(15)); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "77.88.55.77"); + runtime.WaitNoInFlight(); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND); + } + + Y_UNIT_TEST(ResolveCachingV4) { + TCachingDnsRuntime runtime; + runtime.Initialize(); + runtime.Start(); + + auto sender = runtime.AllocateEdgeActor(); + + runtime.Section("First request"); + runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "192.168.0.1"); + runtime.ExpectMisses(1); + + runtime.Section("Second request"); + runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "192.168.0.1"); + runtime.ExpectHits(1); + + runtime.Section("Dns keeps timing out"); + runtime.ReplyV6["router.asus.com"] = TMockReply::Error(ARES_ETIMEOUT); + runtime.ReplyV4["router.asus.com"] = TMockReply::Error(ARES_ETIMEOUT); + for (ui64 i = 1; i <= 4; ++i) { + runtime.Sleep(TDuration::Seconds(15)); + runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "192.168.0.1"); + runtime.Expect6(1 + i, 1); + runtime.Expect4(1 + i, 1); + runtime.ExpectHits(1 + i); + runtime.WaitNoInFlight(); + } + + runtime.Section("Host is removed from ipv4 dns"); + runtime.ReplyV4["router.asus.com"] = TMockReply::Error(ARES_ENOTFOUND); + runtime.Sleep(TDuration::Seconds(15)); + runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC); + runtime.ExpectGetAddrSuccess(sender, "192.168.0.1"); + runtime.WaitNoInFlight(); + runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC); + runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND); + } + + Y_UNIT_TEST(EventualTimeout) { + TCachingDnsRuntime runtime; + runtime.Initialize(); + runtime.Start(); + + auto sender = runtime.AllocateEdgeActor(); + + runtime.ReplyV6["notfound.ru"] = TMockReply::Error(ARES_ENODATA); + runtime.ReplyV4["notfound.ru"] = TMockReply::Error(ARES_ENOTFOUND); + runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC); + runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND); + + runtime.ReplyV4["notfound.ru"] = TMockReply::Error(ARES_ETIMEOUT); + runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC); + runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND); + runtime.WaitNoInFlight(); + + bool timeout = false; + for (ui64 i = 1; i <= 8; ++i) { + runtime.Sleep(TDuration::Minutes(30)); + runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC); + auto ev = runtime.WaitGetAddr(sender); + if (ev->Get()->Status == ARES_ETIMEOUT && i > 2) { + timeout = true; + break; + } + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, ARES_ENOTFOUND, + "Iteration " << i << ": " << ev->Get()->ErrorText); + } + + UNIT_ASSERT_C(timeout, "DnsResolver did not reply with a timeout"); + } + + Y_UNIT_TEST(MultipleRequestsAndHosts) { + TCachingDnsRuntime runtime; + runtime.Initialize(); + runtime.Start(); + + auto sender = runtime.AllocateEdgeActor(); + + runtime.SendGetHostByName(sender, "router.asus.com", AF_UNSPEC); + runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC); + runtime.SendGetHostByName(sender, "yandex.ru", AF_UNSPEC); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetHostByNameSuccess(sender, "192.168.0.1"); + runtime.ExpectGetAddrSuccess(sender, "192.168.0.1"); + runtime.ExpectGetHostByNameSuccess(sender, "2a02:6b8:a::a"); + runtime.ExpectGetAddrSuccess(sender, "2a02:6b8:a::a"); + + runtime.SendGetHostByName(sender, "notfound.ru", AF_UNSPEC); + runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC); + runtime.ExpectGetHostByNameError(sender, ARES_ENOTFOUND); + runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND); + } + + Y_UNIT_TEST(DisabledIPv6) { + TCachingDnsRuntime runtime; + runtime.ResolverOptions.AllowIPv6 = false; + runtime.Initialize(); + runtime.Start(); + + auto sender = runtime.AllocateEdgeActor(); + + runtime.SendGetHostByName(sender, "yandex.ru", AF_UNSPEC); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetHostByNameSuccess(sender, "77.88.55.77"); + runtime.ExpectGetAddrSuccess(sender, "77.88.55.77"); + + runtime.SendGetHostByName(sender, "yandex.ru", AF_INET6); + runtime.SendGetAddr(sender, "yandex.ru", AF_INET6); + runtime.ExpectGetHostByNameError(sender, ARES_EBADFAMILY); + runtime.ExpectGetAddrError(sender, ARES_EBADFAMILY); + + runtime.SendGetHostByName(sender, "yandex.ru", AF_UNSPEC); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.ExpectGetHostByNameSuccess(sender, "77.88.55.77"); + runtime.ExpectGetAddrSuccess(sender, "77.88.55.77"); + + runtime.SendGetHostByName(sender, "notfound.ru", AF_UNSPEC); + runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC); + runtime.ExpectGetHostByNameError(sender, ARES_ENOTFOUND); + runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND); + } + + Y_UNIT_TEST(DisabledIPv4) { + TCachingDnsRuntime runtime; + runtime.ResolverOptions.AllowIPv4 = false; + runtime.Initialize(); + runtime.Start(); + + auto sender = runtime.AllocateEdgeActor(); + + runtime.SendGetHostByName(sender, "router.asus.com", AF_UNSPEC); + runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC); + runtime.ExpectGetHostByNameError(sender, ARES_ENODATA); + runtime.ExpectGetAddrError(sender, ARES_ENODATA); + + runtime.SendGetHostByName(sender, "router.asus.com", AF_INET); + runtime.SendGetAddr(sender, "router.asus.com", AF_INET); + runtime.ExpectGetHostByNameError(sender, ARES_EBADFAMILY); + runtime.ExpectGetAddrError(sender, ARES_EBADFAMILY); + + runtime.SendGetHostByName(sender, "router.asus.com", AF_UNSPEC); + runtime.SendGetAddr(sender, "router.asus.com", AF_UNSPEC); + runtime.ExpectGetHostByNameError(sender, ARES_ENODATA); + runtime.ExpectGetAddrError(sender, ARES_ENODATA); + + runtime.SendGetHostByName(sender, "notfound.ru", AF_UNSPEC); + runtime.SendGetAddr(sender, "notfound.ru", AF_UNSPEC); + runtime.ExpectGetHostByNameError(sender, ARES_ENOTFOUND); + runtime.ExpectGetAddrError(sender, ARES_ENOTFOUND); + } + + Y_UNIT_TEST(PoisonPill) { + TCachingDnsRuntime runtime; + runtime.Initialize(); + runtime.Start(); + + auto sender = runtime.AllocateEdgeActor(); + + runtime.SendGetHostByName(sender, "yandex.ru", AF_UNSPEC); + runtime.SendGetAddr(sender, "yandex.ru", AF_UNSPEC); + runtime.Send(new IEventHandle(runtime.Resolver, sender, new TEvents::TEvPoison), 0, true); + runtime.ExpectGetHostByNameError(sender, ARES_ECANCELLED); + runtime.ExpectGetAddrError(sender, ARES_ECANCELLED); + } + +} diff --git a/library/cpp/actors/dnsresolver/dnsresolver_ondemand.cpp b/library/cpp/actors/dnsresolver/dnsresolver_ondemand.cpp new file mode 100644 index 0000000000..2025162e95 --- /dev/null +++ b/library/cpp/actors/dnsresolver/dnsresolver_ondemand.cpp @@ -0,0 +1,64 @@ +#include "dnsresolver.h" + +#include <library/cpp/actors/core/hfunc.h> + +namespace NActors { +namespace NDnsResolver { + + class TOnDemandDnsResolver : public TActor<TOnDemandDnsResolver> { + public: + TOnDemandDnsResolver(TOnDemandDnsResolverOptions options) + : TActor(&TThis::StateWork) + , Options(std::move(options)) + { } + + static constexpr EActivityType ActorActivityType() { + return DNS_RESOLVER; + } + + private: + STRICT_STFUNC(StateWork, { + cFunc(TEvents::TEvPoison::EventType, PassAway); + fFunc(TEvDns::TEvGetHostByName::EventType, Forward); + fFunc(TEvDns::TEvGetAddr::EventType, Forward); + }); + + void Forward(STATEFN_SIG) { + ev->Rewrite(ev->GetTypeRewrite(), GetUpstream()); + TActivationContext::Send(std::move(ev)); + } + + private: + TActorId GetUpstream() { + if (Y_UNLIKELY(!CachingResolverId)) { + if (Y_LIKELY(!SimpleResolverId)) { + SimpleResolverId = RegisterWithSameMailbox(CreateSimpleDnsResolver(Options)); + } + CachingResolverId = RegisterWithSameMailbox(CreateCachingDnsResolver(SimpleResolverId, Options)); + } + return CachingResolverId; + } + + void PassAway() override { + if (CachingResolverId) { + Send(CachingResolverId, new TEvents::TEvPoison); + CachingResolverId = { }; + } + if (SimpleResolverId) { + Send(SimpleResolverId, new TEvents::TEvPoison); + SimpleResolverId = { }; + } + } + + private: + TOnDemandDnsResolverOptions Options; + TActorId SimpleResolverId; + TActorId CachingResolverId; + }; + + IActor* CreateOnDemandDnsResolver(TOnDemandDnsResolverOptions options) { + return new TOnDemandDnsResolver(std::move(options)); + } + +} // namespace NDnsResolver +} // namespace NActors diff --git a/library/cpp/actors/dnsresolver/dnsresolver_ondemand_ut.cpp b/library/cpp/actors/dnsresolver/dnsresolver_ondemand_ut.cpp new file mode 100644 index 0000000000..2758484552 --- /dev/null +++ b/library/cpp/actors/dnsresolver/dnsresolver_ondemand_ut.cpp @@ -0,0 +1,24 @@ +#include "dnsresolver.h" + +#include <library/cpp/actors/testlib/test_runtime.h> +#include <library/cpp/testing/unittest/registar.h> + +using namespace NActors; +using namespace NActors::NDnsResolver; + +Y_UNIT_TEST_SUITE(OnDemandDnsResolver) { + + Y_UNIT_TEST(ResolveLocalHost) { + TTestActorRuntimeBase runtime; + runtime.Initialize(); + auto sender = runtime.AllocateEdgeActor(); + auto resolver = runtime.Register(CreateOnDemandDnsResolver()); + runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("localhost", AF_UNSPEC)), + 0, true); + auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, ev->Get()->ErrorText); + size_t addrs = ev->Get()->AddrsV4.size() + ev->Get()->AddrsV6.size(); + UNIT_ASSERT_C(addrs > 0, "Got " << addrs << " addresses"); + } + +} diff --git a/library/cpp/actors/dnsresolver/dnsresolver_ut.cpp b/library/cpp/actors/dnsresolver/dnsresolver_ut.cpp new file mode 100644 index 0000000000..0c343a805c --- /dev/null +++ b/library/cpp/actors/dnsresolver/dnsresolver_ut.cpp @@ -0,0 +1,98 @@ +#include "dnsresolver.h" + +#include <library/cpp/actors/testlib/test_runtime.h> +#include <library/cpp/testing/unittest/registar.h> +#include <util/string/builder.h> + +#include <ares.h> + +using namespace NActors; +using namespace NActors::NDnsResolver; + +Y_UNIT_TEST_SUITE(DnsResolver) { + + struct TSilentUdpServer { + TInetDgramSocket Socket; + ui16 Port; + + TSilentUdpServer() { + TSockAddrInet addr("127.0.0.1", 0); + int err = Socket.Bind(&addr); + Y_VERIFY(err == 0, "Cannot bind a udp socket"); + Port = addr.GetPort(); + } + }; + + Y_UNIT_TEST(ResolveLocalHost) { + TTestActorRuntimeBase runtime; + runtime.Initialize(); + auto sender = runtime.AllocateEdgeActor(); + auto resolver = runtime.Register(CreateSimpleDnsResolver()); + runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("localhost", AF_UNSPEC)), + 0, true); + auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, ev->Get()->ErrorText); + size_t addrs = ev->Get()->AddrsV4.size() + ev->Get()->AddrsV6.size(); + UNIT_ASSERT_C(addrs > 0, "Got " << addrs << " addresses"); + } + + Y_UNIT_TEST(ResolveYandexRu) { + TTestActorRuntimeBase runtime; + runtime.Initialize(); + auto sender = runtime.AllocateEdgeActor(); + auto resolver = runtime.Register(CreateSimpleDnsResolver()); + runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("yandex.ru", AF_UNSPEC)), + 0, true); + auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, ev->Get()->ErrorText); + size_t addrs = ev->Get()->AddrsV4.size() + ev->Get()->AddrsV6.size(); + UNIT_ASSERT_C(addrs > 0, "Got " << addrs << " addresses"); + } + + Y_UNIT_TEST(GetAddrYandexRu) { + TTestActorRuntimeBase runtime; + runtime.Initialize(); + auto sender = runtime.AllocateEdgeActor(); + auto resolver = runtime.Register(CreateSimpleDnsResolver()); + + runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetAddr("yandex.ru", AF_UNSPEC)), + 0, true); + auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetAddrResult>(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, 0, ev->Get()->ErrorText); + UNIT_ASSERT_C(ev->Get()->IsV4() || ev->Get()->IsV6(), "Expect v4 or v6 address"); + } + + Y_UNIT_TEST(ResolveTimeout) { + TSilentUdpServer server; + TTestActorRuntimeBase runtime; + runtime.Initialize(); + auto sender = runtime.AllocateEdgeActor(); + TSimpleDnsResolverOptions options; + options.Timeout = TDuration::MilliSeconds(250); + options.Attempts = 2; + options.Servers.emplace_back(TStringBuilder() << "127.0.0.1:" << server.Port); + auto resolver = runtime.Register(CreateSimpleDnsResolver(options)); + runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("timeout.yandex.ru", AF_INET)), + 0, true); + auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, ARES_ETIMEOUT, ev->Get()->ErrorText); + } + + Y_UNIT_TEST(ResolveGracefulStop) { + TSilentUdpServer server; + TTestActorRuntimeBase runtime; + runtime.Initialize(); + auto sender = runtime.AllocateEdgeActor(); + TSimpleDnsResolverOptions options; + options.Timeout = TDuration::Seconds(5); + options.Attempts = 5; + options.Servers.emplace_back(TStringBuilder() << "127.0.0.1:" << server.Port); + auto resolver = runtime.Register(CreateSimpleDnsResolver(options)); + runtime.Send(new IEventHandle(resolver, sender, new TEvDns::TEvGetHostByName("timeout.yandex.ru", AF_INET)), + 0, true); + runtime.Send(new IEventHandle(resolver, sender, new TEvents::TEvPoison), 0, true); + auto ev = runtime.GrabEdgeEventRethrow<TEvDns::TEvGetHostByNameResult>(sender); + UNIT_ASSERT_VALUES_EQUAL_C(ev->Get()->Status, ARES_ECANCELLED, ev->Get()->ErrorText); + } + +} diff --git a/library/cpp/actors/dnsresolver/ut/ya.make b/library/cpp/actors/dnsresolver/ut/ya.make new file mode 100644 index 0000000000..ad936bdacd --- /dev/null +++ b/library/cpp/actors/dnsresolver/ut/ya.make @@ -0,0 +1,20 @@ +UNITTEST_FOR(library/cpp/actors/dnsresolver) + +OWNER(g:kikimr) + +PEERDIR( + library/cpp/actors/testlib +) + +SRCS( + dnsresolver_caching_ut.cpp + dnsresolver_ondemand_ut.cpp + dnsresolver_ut.cpp +) + +ADDINCL(contrib/libs/c-ares) + +TAG(ya:external) +REQUIREMENTS(network:full) + +END() diff --git a/library/cpp/actors/dnsresolver/ya.make b/library/cpp/actors/dnsresolver/ya.make new file mode 100644 index 0000000000..329c56c5b3 --- /dev/null +++ b/library/cpp/actors/dnsresolver/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +OWNER(g:kikimr) + +SRCS( + dnsresolver.cpp + dnsresolver_caching.cpp + dnsresolver_ondemand.cpp +) + +PEERDIR( + library/cpp/actors/core + contrib/libs/c-ares +) + +ADDINCL(contrib/libs/c-ares) + +END() + +RECURSE_FOR_TESTS(ut) diff --git a/library/cpp/actors/helpers/activeactors.cpp b/library/cpp/actors/helpers/activeactors.cpp new file mode 100644 index 0000000000..145e97dc57 --- /dev/null +++ b/library/cpp/actors/helpers/activeactors.cpp @@ -0,0 +1,2 @@ +#include "activeactors.h" + diff --git a/library/cpp/actors/helpers/activeactors.h b/library/cpp/actors/helpers/activeactors.h new file mode 100644 index 0000000000..0fdb0fab10 --- /dev/null +++ b/library/cpp/actors/helpers/activeactors.h @@ -0,0 +1,42 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/actors/core/events.h> +#include <util/generic/hash_set.h> + +namespace NActors { + + //////////////////////////////////////////////////////////////////////////// + // TActiveActors + // This class helps manage created actors and kill them all on PoisonPill. + //////////////////////////////////////////////////////////////////////////// + class TActiveActors : public THashSet<TActorId> { + public: + void Insert(const TActorId &aid) { + bool inserted = insert(aid).second; + Y_VERIFY(inserted); + } + + void Insert(const TActiveActors &moreActors) { + for (const auto &aid : moreActors) { + Insert(aid); + } + } + + void Erase(const TActorId &aid) { + auto num = erase(aid); + Y_VERIFY(num == 1); + } + + size_t KillAndClear(const TActorContext &ctx) { + size_t s = size(); // number of actors managed + for (const auto &x: *this) { + ctx.Send(x, new TEvents::TEvPoisonPill()); + } + clear(); + return s; // how many actors we killed + } + }; + +} // NKikimr + diff --git a/library/cpp/actors/helpers/flow_controlled_queue.cpp b/library/cpp/actors/helpers/flow_controlled_queue.cpp new file mode 100644 index 0000000000..d75cc54023 --- /dev/null +++ b/library/cpp/actors/helpers/flow_controlled_queue.cpp @@ -0,0 +1,215 @@ +#include "flow_controlled_queue.h" + +#include <library/cpp/actors/core/interconnect.h> +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/util/datetime.h> + +#include <util/generic/deque.h> +#include <util/datetime/cputimer.h> +#include <util/generic/algorithm.h> + +namespace NActors { + +class TFlowControlledRequestQueue; + +class TFlowControlledRequestActor : public IActor { + TFlowControlledRequestQueue * const QueueActor; + + void HandleReply(TAutoPtr<IEventHandle> &ev); + void HandleUndelivered(TEvents::TEvUndelivered::TPtr &ev); +public: + const TActorId Source; + const ui64 Cookie; + const ui32 Flags; + const ui64 StartCounter; + + TFlowControlledRequestActor(ui32 activity, TFlowControlledRequestQueue *queue, TActorId source, ui64 cookie, ui32 flags) + : IActor(static_cast<TReceiveFunc>(&TFlowControlledRequestActor::StateWait), activity) + , QueueActor(queue) + , Source(source) + , Cookie(cookie) + , Flags(flags) + , StartCounter(GetCycleCountFast()) + {} + + STATEFN(StateWait) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvents::TEvUndelivered, HandleUndelivered); + default: + HandleReply(ev); + } + } + + TDuration AccumulatedLatency() const { + const ui64 cc = GetCycleCountFast() - StartCounter; + return CyclesToDuration(cc); + } + + using IActor::PassAway; +}; + +class TFlowControlledRequestQueue : public IActor { + const TActorId Target; + const TFlowControlledQueueConfig Config; + + TDeque<THolder<IEventHandle>> UnhandledRequests; + TDeque<TFlowControlledRequestActor *> RegisteredRequests; + + bool Subscribed = false; + + TDuration MinimalSeenLatency; + + bool CanRegister() { + const ui64 inFly = RegisteredRequests.size(); + if (inFly <= Config.MinAllowedInFly) // <= for handling minAllowed == 0 + return true; + + if (inFly >= Config.MaxAllowedInFly) + return false; + + if (Config.TargetDynamicRate) { + if (const ui64 dynMax = MinimalSeenLatency.MicroSeconds() * Config.TargetDynamicRate / 1000000) { + if (inFly >= dynMax) + return false; + } + } + + const TDuration currentLatency = RegisteredRequests.front()->AccumulatedLatency(); + if (currentLatency <= Config.MinTrackedLatency) + return true; + + if (currentLatency <= MinimalSeenLatency * Config.LatencyFactor) + return true; + + return false; + } + + void HandleForwardedEvent(TAutoPtr<IEventHandle> &ev) { + if (CanRegister()) { + RegisterReqActor(ev); + } else { + UnhandledRequests.emplace_back(ev.Release()); + } + } + + void RegisterReqActor(THolder<IEventHandle> ev) { + TFlowControlledRequestActor *reqActor = new TFlowControlledRequestActor(ActivityType, this, ev->Sender, ev->Cookie, ev->Flags); + const TActorId reqActorId = RegisterWithSameMailbox(reqActor); + RegisteredRequests.emplace_back(reqActor); + + if (!Subscribed && (Target.NodeId() != SelfId().NodeId())) { + Send(TActivationContext::InterconnectProxy(Target.NodeId()), new TEvents::TEvSubscribe(), IEventHandle::FlagTrackDelivery); + Subscribed = true; + } + + TActivationContext::Send(new IEventHandle(Target, reqActorId, ev->ReleaseBase().Release(), IEventHandle::FlagTrackDelivery, ev->Cookie)); + } + + void PumpQueue() { + while (RegisteredRequests && RegisteredRequests.front() == nullptr) + RegisteredRequests.pop_front(); + + while (UnhandledRequests && CanRegister()) { + RegisterReqActor(std::move(UnhandledRequests.front())); + UnhandledRequests.pop_front(); + } + } + + void HandleDisconnected() { + Subscribed = false; + + const ui32 nodeid = Target.NodeId(); + for (TFlowControlledRequestActor *reqActor : RegisteredRequests) { + if (reqActor) { + if (reqActor->Flags & IEventHandle::FlagSubscribeOnSession) { + TActivationContext::Send( + new IEventHandle(reqActor->Source, TActorId(), new TEvInterconnect::TEvNodeDisconnected(nodeid), 0, reqActor->Cookie) + ); + } + reqActor->PassAway(); + } + } + + RegisteredRequests.clear(); + + for (auto &ev : UnhandledRequests) { + const auto reason = TEvents::TEvUndelivered::Disconnected; + if (ev->Flags & IEventHandle::FlagTrackDelivery) { + TActivationContext::Send( + new IEventHandle(ev->Sender, ev->Recipient, new TEvents::TEvUndelivered(ev->GetTypeRewrite(), reason), 0, ev->Cookie) + ); + } + } + + UnhandledRequests.clear(); + } + + void HandlePoison() { + HandleDisconnected(); + + if (SelfId().NodeId() != Target.NodeId()) + Send(TActivationContext::InterconnectProxy(Target.NodeId()), new TEvents::TEvUnsubscribe()); + + PassAway(); + } +public: + TFlowControlledRequestQueue(TActorId target, ui32 activity, const TFlowControlledQueueConfig &config) + : IActor(static_cast<TReceiveFunc>(&TFlowControlledRequestQueue::StateWork), activity) + , Target(target) + , Config(config) + , MinimalSeenLatency(TDuration::Seconds(1)) + {} + + STATEFN(StateWork) { + switch (ev->GetTypeRewrite()) { + cFunc(TEvInterconnect::TEvNodeDisconnected::EventType, HandleDisconnected); + IgnoreFunc(TEvInterconnect::TEvNodeConnected); + cFunc(TEvents::TEvUndelivered::EventType, HandleDisconnected); + cFunc(TEvents::TEvPoison::EventType, HandlePoison); + default: + HandleForwardedEvent(ev); + } + } + + void HandleRequestReply(TAutoPtr<IEventHandle> &ev, TFlowControlledRequestActor *reqActor) { + auto it = Find(RegisteredRequests, reqActor); + if (it == RegisteredRequests.end()) + return; + + TActivationContext::Send(ev->Forward(reqActor->Source)); + const TDuration reqLatency = reqActor->AccumulatedLatency(); + if (reqLatency < MinimalSeenLatency) + MinimalSeenLatency = reqLatency; + + *it = nullptr; + PumpQueue(); + } + + void HandleRequestUndelivered(TEvents::TEvUndelivered::TPtr &ev, TFlowControlledRequestActor *reqActor) { + auto it = Find(RegisteredRequests, reqActor); + if (it == RegisteredRequests.end()) + return; + + TActivationContext::Send(ev->Forward(reqActor->Source)); + + *it = nullptr; + PumpQueue(); + } +}; + +void TFlowControlledRequestActor::HandleReply(TAutoPtr<IEventHandle> &ev) { + QueueActor->HandleRequestReply(ev, this); + PassAway(); +} + +void TFlowControlledRequestActor::HandleUndelivered(TEvents::TEvUndelivered::TPtr &ev) { + QueueActor->HandleRequestUndelivered(ev, this); + PassAway(); +} + + +IActor* CreateFlowControlledRequestQueue(TActorId targetId, ui32 activity, const TFlowControlledQueueConfig &config) { + return new TFlowControlledRequestQueue(targetId, activity, config); +} + +} diff --git a/library/cpp/actors/helpers/flow_controlled_queue.h b/library/cpp/actors/helpers/flow_controlled_queue.h new file mode 100644 index 0000000000..d250405304 --- /dev/null +++ b/library/cpp/actors/helpers/flow_controlled_queue.h @@ -0,0 +1,18 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> + +namespace NActors { + + struct TFlowControlledQueueConfig { + ui32 MinAllowedInFly = 20; + ui32 MaxAllowedInFly = 100; + ui32 TargetDynamicRate = 0; + + TDuration MinTrackedLatency = TDuration::MilliSeconds(20); + ui32 LatencyFactor = 4; + }; + + IActor* CreateFlowControlledRequestQueue(TActorId targetId, ui32 activity = IActor::ACTORLIB_COMMON, const TFlowControlledQueueConfig &config = TFlowControlledQueueConfig()); + +} diff --git a/library/cpp/actors/helpers/future_callback.h b/library/cpp/actors/helpers/future_callback.h new file mode 100644 index 0000000000..8ca0d99fda --- /dev/null +++ b/library/cpp/actors/helpers/future_callback.h @@ -0,0 +1,33 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/actors/core/hfunc.h> + +namespace NActors { + +template <typename EventType> +struct TActorFutureCallback : TActor<TActorFutureCallback<EventType>> { + using TCallback = std::function<void(TAutoPtr<TEventHandle<EventType>>&)>; + using TBase = TActor<TActorFutureCallback<EventType>>; + TCallback Callback; + + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::ACTOR_FUTURE_CALLBACK; + } + + TActorFutureCallback(TCallback&& callback) + : TBase(&TActorFutureCallback::StateWaitForEvent) + , Callback(std::move(callback)) + {} + + STRICT_STFUNC(StateWaitForEvent, + HFunc(EventType, Handle) + ) + + void Handle(typename EventType::TPtr ev, const TActorContext& ctx) { + Callback(ev); + TBase::Die(ctx); + } +}; + +} // NActors diff --git a/library/cpp/actors/helpers/mon_histogram_helper.h b/library/cpp/actors/helpers/mon_histogram_helper.h new file mode 100644 index 0000000000..a9a57e3823 --- /dev/null +++ b/library/cpp/actors/helpers/mon_histogram_helper.h @@ -0,0 +1,86 @@ +#pragma once + +#include <library/cpp/monlib/dynamic_counters/counters.h> + +#include <util/string/cast.h> + +namespace NActors { + namespace NMon { + class THistogramCounterHelper { + public: + THistogramCounterHelper() + : FirstBucketVal(0) + , BucketCount(0) + { + } + + THistogramCounterHelper(const THistogramCounterHelper&) = default; + + void Init(NMonitoring::TDynamicCounters* group, const TString& baseName, const TString& unit, + ui64 firstBucket, ui64 bucketCnt, bool useSensorLabelName = true) + { + Y_ASSERT(FirstBucketVal == 0); + Y_ASSERT(BucketCount == 0); + + FirstBucketVal = firstBucket; + BucketCount = bucketCnt; + BucketsHolder.reserve(BucketCount); + Buckets.reserve(BucketCount); + for (size_t i = 0; i < BucketCount; ++i) { + TString bucketName = GetBucketName(i) + " " + unit; + auto labelName = useSensorLabelName ? "sensor" : "name"; + BucketsHolder.push_back(group->GetSubgroup(labelName, baseName)->GetNamedCounter("range", bucketName, true)); + Buckets.push_back(BucketsHolder.back().Get()); + } + } + + void Add(ui64 val) { + Y_ASSERT(FirstBucketVal != 0); + Y_ASSERT(BucketCount != 0); + Y_VERIFY(val <= (1ULL << 63ULL)); + size_t ind = 0; + if (val > FirstBucketVal) { + ind = GetValueBitCount((2 * val - 1) / FirstBucketVal) - 1; + if (ind >= BucketCount) { + ind = BucketCount - 1; + } + } + Buckets[ind]->Inc(); + } + + ui64 GetBucketCount() const { + return BucketCount; + } + + ui64 GetBucketValue(size_t index) const { + Y_ASSERT(index < BucketCount); + return Buckets[index]->Val(); + } + + void SetBucketValue(ui64 index, ui64 value) { + Y_ASSERT(index < BucketCount); + *Buckets[index] = value; + } + + private: + TString GetBucketName(size_t ind) const { + Y_ASSERT(FirstBucketVal != 0); + Y_ASSERT(BucketCount != 0); + Y_ASSERT(ind < BucketCount); + if (ind + 1 < BucketCount) { + return ToString<ui64>(FirstBucketVal << ind); + } else { + // Last slot is up to +INF + return "INF"; + } + } + + private: + ui64 FirstBucketVal; + ui64 BucketCount; + TVector<NMonitoring::TDynamicCounters::TCounterPtr> BucketsHolder; + TVector<NMonitoring::TDeprecatedCounter*> Buckets; + }; + + } +} diff --git a/library/cpp/actors/helpers/pool_stats_collector.h b/library/cpp/actors/helpers/pool_stats_collector.h new file mode 100644 index 0000000000..61d0b45780 --- /dev/null +++ b/library/cpp/actors/helpers/pool_stats_collector.h @@ -0,0 +1,314 @@ +#pragma once + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/core/executor_thread.h> +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> + +#include <util/generic/vector.h> +#include <util/generic/xrange.h> +#include <util/string/printf.h> + +namespace NActors { + +// Periodically collects stats from executor threads and exposes them as mon counters +class TStatsCollectingActor : public TActorBootstrapped<TStatsCollectingActor> { +private: + struct THistogramCounters { + void Init(NMonitoring::TDynamicCounters* group, const TString& baseName, const TString& unit, ui64 maxVal) { + for (size_t i = 0; (1ull<<i) <= maxVal; ++i) { + TString bucketName = ToString(1ull<<i) + " " + unit; + Buckets.push_back(group->GetSubgroup("sensor", baseName)->GetNamedCounter("range", bucketName, true)); + } + Buckets.push_back(group->GetSubgroup("sensor", baseName)->GetNamedCounter("range", "INF", true)); + } + + void Set(const TLogHistogram& data) { + ui32 i = 0; + for (;i < Y_ARRAY_SIZE(data.Buckets) && i < Buckets.size()-1; ++i) + *Buckets[i] = data.Buckets[i]; + ui64 last = 0; + for (;i < Y_ARRAY_SIZE(data.Buckets); ++i) + last += data.Buckets[i]; + *Buckets.back() = last; + } + + void Set(const TLogHistogram& data, double factor) { + ui32 i = 0; + for (;i < Y_ARRAY_SIZE(data.Buckets) && i < Buckets.size()-1; ++i) + *Buckets[i] = data.Buckets[i]*factor; + ui64 last = 0; + for (;i < Y_ARRAY_SIZE(data.Buckets); ++i) + last += data.Buckets[i]; + *Buckets.back() = last*factor; + } + + private: + TVector<NMonitoring::TDynamicCounters::TCounterPtr> Buckets; + }; + + struct TActivityStats { + void Init(NMonitoring::TDynamicCounterPtr group) { + Group = group; + + ElapsedMicrosecByActivityBuckets.resize(GetActivityTypeCount()); + ReceivedEventsByActivityBuckets.resize(GetActivityTypeCount()); + ActorsAliveByActivityBuckets.resize(GetActivityTypeCount()); + ScheduledEventsByActivityBuckets.resize(GetActivityTypeCount()); + } + + void Set(const TExecutorThreadStats& stats) { + for (ui32 i : xrange(stats.MaxActivityType())) { + Y_VERIFY(i < GetActivityTypeCount()); + ui64 ticks = stats.ElapsedTicksByActivity[i]; + ui64 events = stats.ReceivedEventsByActivity[i]; + ui64 actors = stats.ActorsAliveByActivity[i]; + ui64 scheduled = stats.ScheduledEventsByActivity[i]; + + if (!ActorsAliveByActivityBuckets[i]) { + if (ticks || events || actors || scheduled) { + InitCountersForActivity(i); + } else { + continue; + } + } + + *ElapsedMicrosecByActivityBuckets[i] = ::NHPTimer::GetSeconds(ticks)*1000000; + *ReceivedEventsByActivityBuckets[i] = events; + *ActorsAliveByActivityBuckets[i] = actors; + *ScheduledEventsByActivityBuckets[i] = scheduled; + } + } + + private: + void InitCountersForActivity(ui32 activityType) { + Y_VERIFY(activityType < GetActivityTypeCount()); + + auto bucketName = TString(GetActivityTypeName(activityType)); + + ElapsedMicrosecByActivityBuckets[activityType] = + Group->GetSubgroup("sensor", "ElapsedMicrosecByActivity")->GetNamedCounter("activity", bucketName, true); + ReceivedEventsByActivityBuckets[activityType] = + Group->GetSubgroup("sensor", "ReceivedEventsByActivity")->GetNamedCounter("activity", bucketName, true); + ActorsAliveByActivityBuckets[activityType] = + Group->GetSubgroup("sensor", "ActorsAliveByActivity")->GetNamedCounter("activity", bucketName, false); + ScheduledEventsByActivityBuckets[activityType] = + Group->GetSubgroup("sensor", "ScheduledEventsByActivity")->GetNamedCounter("activity", bucketName, true); + } + + private: + NMonitoring::TDynamicCounterPtr Group; + + TVector<NMonitoring::TDynamicCounters::TCounterPtr> ElapsedMicrosecByActivityBuckets; + TVector<NMonitoring::TDynamicCounters::TCounterPtr> ReceivedEventsByActivityBuckets; + TVector<NMonitoring::TDynamicCounters::TCounterPtr> ActorsAliveByActivityBuckets; + TVector<NMonitoring::TDynamicCounters::TCounterPtr> ScheduledEventsByActivityBuckets; + }; + + struct TExecutorPoolCounters { + TIntrusivePtr<NMonitoring::TDynamicCounters> PoolGroup; + + NMonitoring::TDynamicCounters::TCounterPtr SentEvents; + NMonitoring::TDynamicCounters::TCounterPtr ReceivedEvents; + NMonitoring::TDynamicCounters::TCounterPtr PreemptedEvents; + NMonitoring::TDynamicCounters::TCounterPtr NonDeliveredEvents; + NMonitoring::TDynamicCounters::TCounterPtr DestroyedActors; + NMonitoring::TDynamicCounters::TCounterPtr EmptyMailboxActivation; + NMonitoring::TDynamicCounters::TCounterPtr CpuMicrosec; + NMonitoring::TDynamicCounters::TCounterPtr ElapsedMicrosec; + NMonitoring::TDynamicCounters::TCounterPtr ParkedMicrosec; + NMonitoring::TDynamicCounters::TCounterPtr ActorRegistrations; + NMonitoring::TDynamicCounters::TCounterPtr ActorsAlive; + NMonitoring::TDynamicCounters::TCounterPtr AllocatedMailboxes; + NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutBySoftPreemption; + NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutByTime; + NMonitoring::TDynamicCounters::TCounterPtr MailboxPushedOutByEventCount; + + THistogramCounters LegacyActivationTimeHistogram; + NMonitoring::THistogramPtr ActivationTimeHistogram; + THistogramCounters LegacyEventDeliveryTimeHistogram; + NMonitoring::THistogramPtr EventDeliveryTimeHistogram; + THistogramCounters LegacyEventProcessingCountHistogram; + NMonitoring::THistogramPtr EventProcessingCountHistogram; + THistogramCounters LegacyEventProcessingTimeHistogram; + NMonitoring::THistogramPtr EventProcessingTimeHistogram; + + TActivityStats ActivityStats; + NMonitoring::TDynamicCounters::TCounterPtr MaxUtilizationTime; + + double Usage = 0; + double LastElapsedSeconds = 0; + THPTimer UsageTimer; + TString Name; + ui32 Threads; + + void Init(NMonitoring::TDynamicCounters* group, const TString& poolName, ui32 threads) { + LastElapsedSeconds = 0; + Usage = 0; + UsageTimer.Reset(); + Name = poolName; + Threads = threads; + + PoolGroup = group->GetSubgroup("execpool", poolName); + + SentEvents = PoolGroup->GetCounter("SentEvents", true); + ReceivedEvents = PoolGroup->GetCounter("ReceivedEvents", true); + PreemptedEvents = PoolGroup->GetCounter("PreemptedEvents", true); + NonDeliveredEvents = PoolGroup->GetCounter("NonDeliveredEvents", true); + DestroyedActors = PoolGroup->GetCounter("DestroyedActors", true); + CpuMicrosec = PoolGroup->GetCounter("CpuMicrosec", true); + ElapsedMicrosec = PoolGroup->GetCounter("ElapsedMicrosec", true); + ParkedMicrosec = PoolGroup->GetCounter("ParkedMicrosec", true); + EmptyMailboxActivation = PoolGroup->GetCounter("EmptyMailboxActivation", true); + ActorRegistrations = PoolGroup->GetCounter("ActorRegistrations", true); + ActorsAlive = PoolGroup->GetCounter("ActorsAlive", false); + AllocatedMailboxes = PoolGroup->GetCounter("AllocatedMailboxes", false); + MailboxPushedOutBySoftPreemption = PoolGroup->GetCounter("MailboxPushedOutBySoftPreemption", true); + MailboxPushedOutByTime = PoolGroup->GetCounter("MailboxPushedOutByTime", true); + MailboxPushedOutByEventCount = PoolGroup->GetCounter("MailboxPushedOutByEventCount", true); + + LegacyActivationTimeHistogram.Init(PoolGroup.Get(), "ActivationTime", "usec", 5*1000*1000); + ActivationTimeHistogram = PoolGroup->GetHistogram( + "ActivationTimeUs", NMonitoring::ExponentialHistogram(24, 2, 1)); + LegacyEventDeliveryTimeHistogram.Init(PoolGroup.Get(), "EventDeliveryTime", "usec", 5*1000*1000); + EventDeliveryTimeHistogram = PoolGroup->GetHistogram( + "EventDeliveryTimeUs", NMonitoring::ExponentialHistogram(24, 2, 1)); + LegacyEventProcessingCountHistogram.Init(PoolGroup.Get(), "EventProcessingCount", "usec", 5*1000*1000); + EventProcessingCountHistogram = PoolGroup->GetHistogram( + "EventProcessingCountUs", NMonitoring::ExponentialHistogram(24, 2, 1)); + LegacyEventProcessingTimeHistogram.Init(PoolGroup.Get(), "EventProcessingTime", "usec", 5*1000*1000); + EventProcessingTimeHistogram = PoolGroup->GetHistogram( + "EventProcessingTimeUs", NMonitoring::ExponentialHistogram(24, 2, 1)); + + ActivityStats.Init(PoolGroup.Get()); + + MaxUtilizationTime = PoolGroup->GetCounter("MaxUtilizationTime", true); + } + + void Set(const TExecutorPoolStats& poolStats, const TExecutorThreadStats& stats, ui32 numThreads) { +#ifdef ACTORSLIB_COLLECT_EXEC_STATS + *SentEvents = stats.SentEvents; + *ReceivedEvents = stats.ReceivedEvents; + *PreemptedEvents = stats.PreemptedEvents; + *NonDeliveredEvents = stats.NonDeliveredEvents; + *DestroyedActors = stats.PoolDestroyedActors; + *EmptyMailboxActivation = stats.EmptyMailboxActivation; + *CpuMicrosec = stats.CpuNs / 1000; + *ElapsedMicrosec = ::NHPTimer::GetSeconds(stats.ElapsedTicks)*1000000; + *ParkedMicrosec = ::NHPTimer::GetSeconds(stats.ParkedTicks)*1000000; + *ActorRegistrations = stats.PoolActorRegistrations; + *ActorsAlive = stats.PoolActorRegistrations - stats.PoolDestroyedActors; + *AllocatedMailboxes = stats.PoolAllocatedMailboxes; + *MailboxPushedOutBySoftPreemption = stats.MailboxPushedOutBySoftPreemption; + *MailboxPushedOutByTime = stats.MailboxPushedOutByTime; + *MailboxPushedOutByEventCount = stats.MailboxPushedOutByEventCount; + + LegacyActivationTimeHistogram.Set(stats.ActivationTimeHistogram); + ActivationTimeHistogram->Reset(); + ActivationTimeHistogram->Collect(stats.ActivationTimeHistogram); + + LegacyEventDeliveryTimeHistogram.Set(stats.EventDeliveryTimeHistogram); + EventDeliveryTimeHistogram->Reset(); + EventDeliveryTimeHistogram->Collect(stats.EventDeliveryTimeHistogram); + + LegacyEventProcessingCountHistogram.Set(stats.EventProcessingCountHistogram); + EventProcessingCountHistogram->Reset(); + EventProcessingCountHistogram->Collect(stats.EventProcessingCountHistogram); + + double toMicrosec = 1000000 / NHPTimer::GetClockRate(); + LegacyEventProcessingTimeHistogram.Set(stats.EventProcessingTimeHistogram, toMicrosec); + EventProcessingTimeHistogram->Reset(); + for (ui32 i = 0; i < stats.EventProcessingTimeHistogram.Count(); ++i) { + EventProcessingTimeHistogram->Collect( + stats.EventProcessingTimeHistogram.UpperBound(i), + stats.EventProcessingTimeHistogram.Value(i) * toMicrosec); + } + + ActivityStats.Set(stats); + + *MaxUtilizationTime = poolStats.MaxUtilizationTime; + + double seconds = UsageTimer.PassedReset(); + + // TODO[serxa]: It doesn't account for contention. Use 1 - parkedTicksDelta / seconds / numThreads KIKIMR-11916 + const double elapsed = NHPTimer::GetSeconds(stats.ElapsedTicks); + const double currentUsage = numThreads > 0 ? ((elapsed - LastElapsedSeconds) / seconds / numThreads) : 0; + LastElapsedSeconds = elapsed; + + // update usage factor according to smoothness + const double smoothness = 0.5; + Usage = currentUsage * smoothness + Usage * (1.0 - smoothness); +#else + Y_UNUSED(poolStats); + Y_UNUSED(stats); + Y_UNUSED(numThreads); +#endif + } + }; + +public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::ACTORLIB_STATS; + } + + TStatsCollectingActor( + ui32 intervalSec, + const TActorSystemSetup& setup, + NMonitoring::TDynamicCounterPtr counters) + : IntervalSec(intervalSec) + , Counters(counters) + { + PoolCounters.resize(setup.GetExecutorsCount()); + for (size_t poolId = 0; poolId < PoolCounters.size(); ++poolId) { + PoolCounters[poolId].Init(Counters.Get(), setup.GetPoolName(poolId), setup.GetThreads(poolId)); + } + } + + void Bootstrap(const TActorContext& ctx) { + ctx.Schedule(TDuration::Seconds(IntervalSec), new TEvents::TEvWakeup()); + Become(&TThis::StateWork); + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TSystem::Wakeup, Wakeup); + } + } + +private: + virtual void OnWakeup(const TActorContext &ctx) { + Y_UNUSED(ctx); + } + + void Wakeup(const TActorContext &ctx) { + for (size_t poolId = 0; poolId < PoolCounters.size(); ++poolId) { + TVector<TExecutorThreadStats> stats; + TExecutorPoolStats poolStats; + ctx.ExecutorThread.ActorSystem->GetPoolStats(poolId, poolStats, stats); + SetAggregatedCounters(PoolCounters[poolId], poolStats, stats); + } + + OnWakeup(ctx); + + ctx.Schedule(TDuration::Seconds(IntervalSec), new TEvents::TEvWakeup()); + } + + void SetAggregatedCounters(TExecutorPoolCounters& poolCounters, TExecutorPoolStats& poolStats, TVector<TExecutorThreadStats>& stats) { + // Sum all per-thread counters into the 0th element + for (ui32 idx = 1; idx < stats.size(); ++idx) { + stats[0].Aggregate(stats[idx]); + } + if (stats.size()) { + poolCounters.Set(poolStats, stats[0], stats.size() - 1); + } + } + +protected: + const ui32 IntervalSec; + NMonitoring::TDynamicCounterPtr Counters; + + TVector<TExecutorPoolCounters> PoolCounters; +}; + +} // NActors diff --git a/library/cpp/actors/helpers/selfping_actor.cpp b/library/cpp/actors/helpers/selfping_actor.cpp new file mode 100644 index 0000000000..f9bfaf8dc0 --- /dev/null +++ b/library/cpp/actors/helpers/selfping_actor.cpp @@ -0,0 +1,183 @@ +#include "selfping_actor.h" + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/hfunc.h> + +#include <library/cpp/containers/stack_vector/stack_vec.h> +#include <library/cpp/sliding_window/sliding_window.h> + +namespace NActors { + +namespace { + +struct TEvPing: public TEventLocal<TEvPing, TEvents::THelloWorld::Ping> { + TEvPing(double timeStart) + : TimeStart(timeStart) + {} + + const double TimeStart; +}; + +template <class TValueType_> +struct TAvgOperation { + struct TValueType { + ui64 Count = 0; + TValueType_ Sum = TValueType_(); + }; + using TValueVector = TVector<TValueType>; + + static constexpr TValueType InitialValue() { + return TValueType(); // zero + } + + // Updates value in current bucket and returns window value + static TValueType UpdateBucket(TValueType windowValue, TValueVector& buckets, size_t index, TValueType newVal) { + Y_ASSERT(index < buckets.size()); + buckets[index].Sum += newVal.Sum; + buckets[index].Count += newVal.Count; + windowValue.Sum += newVal.Sum; + windowValue.Count += newVal.Count; + return windowValue; + } + + static TValueType ClearBuckets(TValueType windowValue, TValueVector& buckets, size_t firstElemIndex, size_t bucketsToClear) { + Y_ASSERT(!buckets.empty()); + Y_ASSERT(firstElemIndex < buckets.size()); + Y_ASSERT(bucketsToClear <= buckets.size()); + + const size_t arraySize = buckets.size(); + for (size_t i = 0; i < bucketsToClear; ++i) { + TValueType& curVal = buckets[firstElemIndex]; + windowValue.Sum -= curVal.Sum; + windowValue.Count -= curVal.Count; + curVal = InitialValue(); + firstElemIndex = (firstElemIndex + 1) % arraySize; + } + return windowValue; + } + +}; + +class TSelfPingActor : public TActorBootstrapped<TSelfPingActor> { +private: + const TDuration SendInterval; + const NMonitoring::TDynamicCounters::TCounterPtr Counter; + const NMonitoring::TDynamicCounters::TCounterPtr CalculationTimeCounter; + + NSlidingWindow::TSlidingWindow<NSlidingWindow::TMaxOperation<ui64>> SlidingWindow; + NSlidingWindow::TSlidingWindow<TAvgOperation<ui64>> CalculationSlidingWindow; + + THPTimer Timer; + +public: + static constexpr auto ActorActivityType() { + return SELF_PING_ACTOR; + } + + TSelfPingActor(TDuration sendInterval, const NMonitoring::TDynamicCounters::TCounterPtr& counter, + const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter) + : SendInterval(sendInterval) + , Counter(counter) + , CalculationTimeCounter(calculationTimeCounter) + , SlidingWindow(TDuration::Seconds(15), 100) + , CalculationSlidingWindow(TDuration::Seconds(15), 100) + { + } + + void Bootstrap(const TActorContext& ctx) + { + Become(&TSelfPingActor::RunningState); + SchedulePing(ctx, Timer.Passed()); + } + + STFUNC(RunningState) + { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPing, HandlePing); + default: + Y_FAIL("TSelfPingActor::RunningState: unexpected event 0x%08" PRIx32, ev->GetTypeRewrite()); + } + } + + ui64 MeasureTaskDurationNs() { + // Prepare worm test data + // 11 * 11 * 3 * 8 = 2904 bytes, fits in L1 cache + constexpr ui64 Size = 11; + // Align the data to reduce random alignment effects + alignas(64) TStackVec<ui64, Size * Size * 3> data; + ui64 s = 0; + NHPTimer::STime beginTime; + NHPTimer::STime endTime; + // Prepare the data + data.resize(Size * Size * 3); + for (ui64 matrixIdx = 0; matrixIdx < 3; ++matrixIdx) { + for (ui64 y = 0; y < Size; ++y) { + for (ui64 x = 0; x < Size; ++x) { + data[matrixIdx * (Size * Size) + y * Size + x] = y * Size + x; + } + } + } + // Warm-up the cache + NHPTimer::GetTime(&beginTime); + for (ui64 idx = 0; idx < data.size(); ++idx) { + s += data[idx]; + } + NHPTimer::GetTime(&endTime); + s += (ui64)(1000000.0 * NHPTimer::GetSeconds(endTime - beginTime)); + + // Measure the CPU performance + // C = A * B with injected dependency to s + NHPTimer::GetTime(&beginTime); + for (ui64 y = 0; y < Size; ++y) { + for (ui64 x = 0; x < Size; ++x) { + for (ui64 i = 0; i < Size; ++i) { + s += data[y * Size + i] * data[Size * Size + i * Size + x]; + } + data[2 * Size * Size + y * Size + x] = s; + s = 0; + } + } + for (ui64 idx = 0; idx < data.size(); ++idx) { + s += data[idx]; + } + NHPTimer::GetTime(&endTime); + // Prepare the result + double d = 1000000000.0 * (NHPTimer::GetSeconds(endTime - beginTime) + 0.000000001 * (s & 1)); + return (ui64)d; + } + + void HandlePing(TEvPing::TPtr &ev, const TActorContext &ctx) + { + const auto now = ctx.Now(); + const double hpNow = Timer.Passed(); + const auto& e = *ev->Get(); + const double passedTime = hpNow - e.TimeStart; + const ui64 delayUs = passedTime > 0.0 ? static_cast<ui64>(passedTime * 1e6) : 0; + + *Counter = SlidingWindow.Update(delayUs, now); + + ui64 d = MeasureTaskDurationNs(); + auto res = CalculationSlidingWindow.Update({1, d}, now); + *CalculationTimeCounter = double(res.Sum) / double(res.Count + 1); + + SchedulePing(ctx, hpNow); + } + +private: + void SchedulePing(const TActorContext &ctx, double hpNow) const + { + ctx.Schedule(SendInterval, new TEvPing(hpNow)); + } +}; + +} // namespace + +IActor* CreateSelfPingActor( + TDuration sendInterval, + const NMonitoring::TDynamicCounters::TCounterPtr& counter, + const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter) +{ + return new TSelfPingActor(sendInterval, counter, calculationTimeCounter); +} + +} // NActors diff --git a/library/cpp/actors/helpers/selfping_actor.h b/library/cpp/actors/helpers/selfping_actor.h new file mode 100644 index 0000000000..d7d07f9fa8 --- /dev/null +++ b/library/cpp/actors/helpers/selfping_actor.h @@ -0,0 +1,13 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> + +namespace NActors { + +NActors::IActor* CreateSelfPingActor( + TDuration sendInterval, + const NMonitoring::TDynamicCounters::TCounterPtr& counter, + const NMonitoring::TDynamicCounters::TCounterPtr& calculationTimeCounter); + +} // NActors diff --git a/library/cpp/actors/helpers/selfping_actor_ut.cpp b/library/cpp/actors/helpers/selfping_actor_ut.cpp new file mode 100644 index 0000000000..459635fa24 --- /dev/null +++ b/library/cpp/actors/helpers/selfping_actor_ut.cpp @@ -0,0 +1,45 @@ +#include "selfping_actor.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/actors/testlib/test_runtime.h> + +namespace NActors { +namespace Tests { + +THolder<TTestActorRuntimeBase> CreateRuntime() { + auto runtime = MakeHolder<TTestActorRuntimeBase>(); + runtime->SetScheduledEventFilter([](auto&&, auto&&, auto&&, auto&&) { return false; }); + runtime->Initialize(); + return runtime; +} + +Y_UNIT_TEST_SUITE(TSelfPingTest) { + Y_UNIT_TEST(Basic) + { + auto runtime = CreateRuntime(); + + //const TActorId sender = runtime.AllocateEdgeActor(); + + NMonitoring::TDynamicCounters::TCounterPtr counter(new NMonitoring::TCounterForPtr()); + NMonitoring::TDynamicCounters::TCounterPtr counter2(new NMonitoring::TCounterForPtr()); + + auto actor = CreateSelfPingActor( + TDuration::MilliSeconds(100), // sendInterval (unused in test) + counter, counter2); + + UNIT_ASSERT_VALUES_EQUAL(counter->Val(), 0); + UNIT_ASSERT_VALUES_EQUAL(counter2->Val(), 0); + + const TActorId actorId = runtime->Register(actor); + Y_UNUSED(actorId); + + //runtime.Send(new IEventHandle(actorId, sender, new TEvSelfPing::TEvPing(0.0))); + + // TODO check after events are handled + //Sleep(TDuration::Seconds(1)); + //UNIT_ASSERT((intmax_t)counter->Val() >= (intmax_t)Delay.MicroSeconds()); + } +} + +} // namespace Tests +} // namespace NActors diff --git a/library/cpp/actors/helpers/ut/ya.make b/library/cpp/actors/helpers/ut/ya.make new file mode 100644 index 0000000000..cba4d6d1d9 --- /dev/null +++ b/library/cpp/actors/helpers/ut/ya.make @@ -0,0 +1,36 @@ +UNITTEST_FOR(library/cpp/actors/helpers) + +OWNER( + alexvru + g:kikimr +) + +FORK_SUBTESTS() +IF (SANITIZER_TYPE) + SIZE(LARGE) + TIMEOUT(1200) + TAG(ya:fat) + SPLIT_FACTOR(20) + REQUIREMENTS( + ram:32 + ) +ELSE() + SIZE(MEDIUM) + TIMEOUT(600) + REQUIREMENTS( + ram:16 + ) +ENDIF() + + +PEERDIR( + library/cpp/actors/interconnect + library/cpp/actors/testlib + library/cpp/actors/core +) + +SRCS( + selfping_actor_ut.cpp +) + +END() diff --git a/library/cpp/actors/helpers/ya.make b/library/cpp/actors/helpers/ya.make new file mode 100644 index 0000000000..d8771179de --- /dev/null +++ b/library/cpp/actors/helpers/ya.make @@ -0,0 +1,25 @@ +LIBRARY() + +OWNER(g:kikimr) + +SRCS( + activeactors.cpp + activeactors.h + flow_controlled_queue.cpp + flow_controlled_queue.h + future_callback.h + mon_histogram_helper.h + selfping_actor.cpp +) + +PEERDIR( + library/cpp/actors/core + library/cpp/monlib/dynamic_counters +) + +END() + +RECURSE_FOR_TESTS( + ut +) + diff --git a/library/cpp/actors/http/http.cpp b/library/cpp/actors/http/http.cpp new file mode 100644 index 0000000000..7125f9d8b0 --- /dev/null +++ b/library/cpp/actors/http/http.cpp @@ -0,0 +1,653 @@ +#include "http.h" +#include <library/cpp/string_utils/quote/quote.h> + +inline TStringBuf operator +(TStringBuf l, TStringBuf r) { + if (l.empty()) { + return r; + } + if (r.empty()) { + return l; + } + if (l.end() == r.begin()) { + return TStringBuf(l.data(), l.size() + r.size()); + } + if (r.end() == l.begin()) { + return TStringBuf(r.data(), l.size() + r.size()); + } + Y_FAIL("oops"); + return TStringBuf(); +} + +inline TStringBuf operator +=(TStringBuf& l, TStringBuf r) { + return l = l + r; +} + +namespace NHttp { + +template <> TStringBuf THttpRequest::GetName<&THttpRequest::Host>() { return "Host"; } +template <> TStringBuf THttpRequest::GetName<&THttpRequest::Accept>() { return "Accept"; } +template <> TStringBuf THttpRequest::GetName<&THttpRequest::Connection>() { return "Connection"; } +template <> TStringBuf THttpRequest::GetName<&THttpRequest::ContentType>() { return "Content-Type"; } +template <> TStringBuf THttpRequest::GetName<&THttpRequest::ContentLength>() { return "Content-Length"; } +template <> TStringBuf THttpRequest::GetName<&THttpRequest::TransferEncoding>() { return "Transfer-Encoding"; } + +const TMap<TStringBuf, TStringBuf THttpRequest::*, TLessNoCase> THttpRequest::HeadersLocation = { + { THttpRequest::GetName<&THttpRequest::Host>(), &THttpRequest::Host }, + { THttpRequest::GetName<&THttpRequest::Accept>(), &THttpRequest::Accept }, + { THttpRequest::GetName<&THttpRequest::Connection>(), &THttpRequest::Connection }, + { THttpRequest::GetName<&THttpRequest::ContentType>(), &THttpRequest::ContentType }, + { THttpRequest::GetName<&THttpRequest::ContentLength>(), &THttpRequest::ContentLength }, + { THttpRequest::GetName<&THttpRequest::TransferEncoding>(), &THttpRequest::TransferEncoding }, +}; + +template <> TStringBuf THttpResponse::GetName<&THttpResponse::Connection>() { return "Connection"; } +template <> TStringBuf THttpResponse::GetName<&THttpResponse::ContentType>() { return "Content-Type"; } +template <> TStringBuf THttpResponse::GetName<&THttpResponse::ContentLength>() { return "Content-Length"; } +template <> TStringBuf THttpResponse::GetName<&THttpResponse::TransferEncoding>() { return "Transfer-Encoding"; } +template <> TStringBuf THttpResponse::GetName<&THttpResponse::LastModified>() { return "Last-Modified"; } +template <> TStringBuf THttpResponse::GetName<&THttpResponse::ContentEncoding>() { return "Content-Encoding"; } + +const TMap<TStringBuf, TStringBuf THttpResponse::*, TLessNoCase> THttpResponse::HeadersLocation = { + { THttpResponse::GetName<&THttpResponse::Connection>(), &THttpResponse::Connection }, + { THttpResponse::GetName<&THttpResponse::ContentType>(), &THttpResponse::ContentType }, + { THttpResponse::GetName<&THttpResponse::ContentLength>(), &THttpResponse::ContentLength }, + { THttpResponse::GetName<&THttpResponse::TransferEncoding>(), &THttpResponse::TransferEncoding }, + { THttpResponse::GetName<&THttpResponse::LastModified>(), &THttpResponse::LastModified }, + { THttpResponse::GetName<&THttpResponse::ContentEncoding>(), &THttpResponse::ContentEncoding } +}; + +void THttpRequest::Clear() { + // a dirty little trick + this->~THttpRequest(); // basically, do nothing + new (this) THttpRequest(); // reset all fields +} + +template <> +void THttpParser<THttpRequest, TSocketBuffer>::Advance(size_t len) { + TStringBuf data(Pos(), len); + while (!data.empty()) { + if (Stage != EParseStage::Error) { + LastSuccessStage = Stage; + } + switch (Stage) { + case EParseStage::Method: { + if (ProcessData(Method, data, ' ', MaxMethodSize)) { + Stage = EParseStage::URL; + } + break; + } + case EParseStage::URL: { + if (ProcessData(URL, data, ' ', MaxURLSize)) { + Stage = EParseStage::Protocol; + } + break; + } + case EParseStage::Protocol: { + if (ProcessData(Protocol, data, '/', MaxProtocolSize)) { + Stage = EParseStage::Version; + } + break; + } + case EParseStage::Version: { + if (ProcessData(Version, data, "\r\n", MaxVersionSize)) { + Stage = EParseStage::Header; + Headers = data; + } + break; + } + case EParseStage::Header: { + if (ProcessData(Header, data, "\r\n", MaxHeaderSize)) { + if (Header.empty()) { + Headers = TStringBuf(Headers.data(), data.begin() - Headers.begin()); + if (HaveBody()) { + Stage = EParseStage::Body; + } else { + Stage = EParseStage::Done; + } + } else { + ProcessHeader(Header); + } + } + break; + } + case EParseStage::Body: { + if (!ContentLength.empty()) { + if (ProcessData(Content, data, FromString(ContentLength))) { + Body = Content; + Stage = EParseStage::Done; + } + } else if (TransferEncoding == "chunked") { + Stage = EParseStage::ChunkLength; + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } + break; + } + case EParseStage::ChunkLength: { + if (ProcessData(Line, data, "\r\n", MaxChunkLengthSize)) { + if (!Line.empty()) { + ChunkLength = ParseHex(Line); + if (ChunkLength <= MaxChunkSize) { + ContentSize = Content.size() + ChunkLength; + if (ContentSize <= MaxChunkContentSize) { + Stage = EParseStage::ChunkData; + Line.Clear(); + } else { + // Invalid chunk content length + Stage = EParseStage::Error; + } + } else { + // Invalid chunk length + Stage = EParseStage::Error; + } + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } + } + break; + } + case EParseStage::ChunkData: { + if (!IsError()) { + if (ProcessData(Content, data, ContentSize)) { + if (ProcessData(Line, data, 2)) { + if (Line == "\r\n") { + if (ChunkLength == 0) { + Body = Content; + Stage = EParseStage::Done; + } else { + Stage = EParseStage::ChunkLength; + } + Line.Clear(); + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } + } + } + } + break; + } + + case EParseStage::Done: + case EParseStage::Error: { + data.Clear(); + break; + } + default: + Y_FAIL("Invalid processing sequence"); + break; + } + } + TSocketBuffer::Advance(len); +} + +template <> +THttpParser<THttpRequest, TSocketBuffer>::EParseStage THttpParser<THttpRequest, TSocketBuffer>::GetInitialStage() { + return EParseStage::Method; +} + +template <> +THttpParser<THttpResponse, TSocketBuffer>::EParseStage THttpParser<THttpResponse, TSocketBuffer>::GetInitialStage() { + return EParseStage::Protocol; +} + +void THttpResponse::Clear() { + // a dirty little trick + this->~THttpResponse(); // basically, do nothing + new (this) THttpResponse(); // reset all fields +} + +template <> +void THttpParser<THttpResponse, TSocketBuffer>::Advance(size_t len) { + TStringBuf data(Pos(), len); + while (!data.empty()) { + if (Stage != EParseStage::Error) { + LastSuccessStage = Stage; + } + switch (Stage) { + case EParseStage::Protocol: { + if (ProcessData(Protocol, data, '/', MaxProtocolSize)) { + Stage = EParseStage::Version; + } + break; + } + case EParseStage::Version: { + if (ProcessData(Version, data, ' ', MaxVersionSize)) { + Stage = EParseStage::Status; + } + break; + } + case EParseStage::Status: { + if (ProcessData(Status, data, ' ', MaxStatusSize)) { + Stage = EParseStage::Message; + } + break; + } + case EParseStage::Message: { + if (ProcessData(Message, data, "\r\n", MaxMessageSize)) { + Stage = EParseStage::Header; + Headers = TStringBuf(data.data(), size_t(0)); + } + break; + } + case EParseStage::Header: { + if (ProcessData(Header, data, "\r\n", MaxHeaderSize)) { + if (Header.empty()) { + if (HaveBody() && (ContentLength.empty() || ContentLength != "0")) { + Stage = EParseStage::Body; + } else { + Stage = EParseStage::Done; + } + } else { + ProcessHeader(Header); + } + Headers = TStringBuf(Headers.data(), data.data() - Headers.data()); + } + break; + } + case EParseStage::Body: { + if (!ContentLength.empty()) { + if (ProcessData(Body, data, FromString(ContentLength))) { + Stage = EParseStage::Done; + } + } else if (TransferEncoding == "chunked") { + Stage = EParseStage::ChunkLength; + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } + break; + } + case EParseStage::ChunkLength: { + if (ProcessData(Line, data, "\r\n", MaxChunkLengthSize)) { + if (!Line.empty()) { + ChunkLength = ParseHex(Line); + if (ChunkLength <= MaxChunkSize) { + ContentSize = Content.size() + ChunkLength; + if (ContentSize <= MaxChunkContentSize) { + Stage = EParseStage::ChunkData; + Line.Clear(); + } else { + // Invalid chunk content length + Stage = EParseStage::Error; + } + } else { + // Invalid chunk length + Stage = EParseStage::Error; + } + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } + } + break; + } + case EParseStage::ChunkData: { + if (!IsError()) { + if (ProcessData(Content, data, ContentSize)) { + if (ProcessData(Line, data, 2)) { + if (Line == "\r\n") { + if (ChunkLength == 0) { + Body = Content; + Stage = EParseStage::Done; + } else { + Stage = EParseStage::ChunkLength; + } + Line.Clear(); + } else { + // Invalid body encoding + Stage = EParseStage::Error; + } + } + } + } + break; + } + case EParseStage::Done: + case EParseStage::Error: + data.Clear(); + break; + default: + // Invalid processing sequence + Stage = EParseStage::Error; + break; + } + } + TSocketBuffer::Advance(len); +} + +template <> +void THttpParser<THttpResponse, TSocketBuffer>::ConnectionClosed() { + if (Stage == EParseStage::Done) { + return; + } + if (Stage == EParseStage::Body) { + // ? + Stage = EParseStage::Done; + } else { + LastSuccessStage = Stage; + Stage = EParseStage::Error; + } +} + +THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseString(TStringBuf data) { + THttpOutgoingResponsePtr response = new THttpOutgoingResponse(this); + response->Append(data); + response->Reparse(); + return response; +} + +THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseOK(TStringBuf body, TStringBuf contentType, TInstant lastModified) { + return CreateResponse("200", "OK", contentType, body, lastModified); +} + +THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseBadRequest(TStringBuf html, TStringBuf contentType) { + if (html.empty() && IsError()) { + contentType = "text/plain"; + html = GetErrorText(); + } + return CreateResponse("400", "Bad Request", contentType, html); +} + +THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseNotFound(TStringBuf html, TStringBuf contentType) { + return CreateResponse("404", "Not Found", contentType, html); +} + +THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseServiceUnavailable(TStringBuf html, TStringBuf contentType) { + return CreateResponse("503", "Service Unavailable", contentType, html); +} + +THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponseGatewayTimeout(TStringBuf html, TStringBuf contentType) { + return CreateResponse("504", "Gateway Timeout", contentType, html); +} + +THttpIncomingResponse::THttpIncomingResponse(THttpOutgoingRequestPtr request) + : Request(request) +{} + +THttpOutgoingResponsePtr THttpIncomingRequest::CreateResponse(TStringBuf status, TStringBuf message, TStringBuf contentType, TStringBuf body, TInstant lastModified) { + TStringBuf version = Version; + if (version != "1.0" && version != "1.1") { + version = "1.1"; + } + THttpOutgoingResponsePtr response = new THttpOutgoingResponse(this, "HTTP", version, status, message); + response->Set<&THttpResponse::Connection>(GetConnection()); + if (!WorkerName.empty()) { + response->Set("X-Worker-Name", WorkerName); + } + if (!contentType.empty() && !body.empty()) { + response->Set<&THttpResponse::ContentType>(contentType); + } + if (lastModified) { + response->Set<&THttpResponse::LastModified>(lastModified.FormatGmTime("%a, %d %b %Y %H:%M:%S GMT")); + } + if (response->IsNeedBody() || !body.empty()) { + if (Method == "HEAD") { + response->Set<&THttpResponse::ContentLength>(ToString(body.size())); + } else { + response->Set<&THttpResponse::Body>(body); + } + } + return response; +} + +THttpIncomingRequestPtr THttpIncomingRequest::Duplicate() { + THttpIncomingRequestPtr request = new THttpIncomingRequest(*this); + request->Reparse(); + request->Timer.Reset(); + return request; +} + +THttpIncomingResponsePtr THttpIncomingResponse::Duplicate(THttpOutgoingRequestPtr request) { + THttpIncomingResponsePtr response = new THttpIncomingResponse(*this); + response->Reparse(); + response->Request = request; + return response; +} + +THttpOutgoingResponsePtr THttpOutgoingResponse::Duplicate(THttpIncomingRequestPtr request) { + THttpOutgoingResponsePtr response = new THttpOutgoingResponse(*this); + response->Reparse(); + response->Request = request; + return response; +} + + +THttpOutgoingResponsePtr THttpIncomingResponse::Reverse(THttpIncomingRequestPtr request) { + THttpOutgoingResponsePtr response = new THttpOutgoingResponse(request); + response->Assign(Data(), Size()); + response->Reparse(); + return response; +} + +THttpOutgoingRequest::THttpOutgoingRequest(TStringBuf method, TStringBuf scheme, TStringBuf host, TStringBuf uri, TStringBuf protocol, TStringBuf version) { + Secure = (scheme == "https"); + TString urie = UrlEscapeRet(uri); + InitRequest(method, urie, protocol, version); + if (host) { + Set<&THttpRequest::Host>(host); + } +} + +THttpOutgoingRequest::THttpOutgoingRequest(TStringBuf method, TStringBuf url, TStringBuf protocol, TStringBuf version) { + TStringBuf scheme, host, uri; + if (!CrackURL(url, scheme, host, uri)) { + Y_FAIL("Invalid URL specified"); + } + if (!scheme.empty() && scheme != "http" && scheme != "https") { + Y_FAIL("Invalid URL specified"); + } + Secure = (scheme == "https"); + TString urie = UrlEscapeRet(uri); + InitRequest(method, urie, protocol, version); + if (host) { + Set<&THttpRequest::Host>(host); + } +} + +THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestString(const TString& data) { + THttpOutgoingRequestPtr request = new THttpOutgoingRequest(); + request->Assign(data.data(), data.size()); + request->Reparse(); + return request; +} + +THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestGet(TStringBuf url) { + return CreateRequest("GET", url); +} + +THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestGet(TStringBuf host, TStringBuf uri) { + return CreateHttpRequest("GET", host, uri); +} + +THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestPost(TStringBuf url, TStringBuf contentType, TStringBuf body) { + return CreateRequest("POST", url, contentType, body); +} + +THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequestPost(TStringBuf host, TStringBuf uri, TStringBuf contentType, TStringBuf body) { + return CreateHttpRequest("POST", host, uri, contentType, body); +} + +THttpOutgoingRequestPtr THttpOutgoingRequest::CreateRequest(TStringBuf method, TStringBuf url, TStringBuf contentType, TStringBuf body) { + THttpOutgoingRequestPtr request = new THttpOutgoingRequest(method, url, "HTTP", "1.1"); + request->Set<&THttpRequest::Accept>("*/*"); + if (!contentType.empty()) { + request->Set<&THttpRequest::ContentType>(contentType); + request->Set<&THttpRequest::Body>(body); + } + return request; +} + +THttpOutgoingRequestPtr THttpOutgoingRequest::CreateHttpRequest(TStringBuf method, TStringBuf host, TStringBuf uri, TStringBuf contentType, TStringBuf body) { + THttpOutgoingRequestPtr request = new THttpOutgoingRequest(method, "http", host, uri, "HTTP", "1.1"); + request->Set<&THttpRequest::Accept>("*/*"); + if (!contentType.empty()) { + request->Set<&THttpRequest::ContentType>(contentType); + request->Set<&THttpRequest::Body>(body); + } + return request; +} + +THttpOutgoingRequestPtr THttpOutgoingRequest::Duplicate() { + THttpOutgoingRequestPtr request = new THttpOutgoingRequest(*this); + request->Reparse(); + return request; +} + +THttpOutgoingResponse::THttpOutgoingResponse(THttpIncomingRequestPtr request) + : Request(request) +{} + +THttpOutgoingResponse::THttpOutgoingResponse(THttpIncomingRequestPtr request, TStringBuf protocol, TStringBuf version, TStringBuf status, TStringBuf message) + : Request(request) +{ + InitResponse(protocol, version, status, message); +} + +const size_t THttpConfig::BUFFER_MIN_STEP; +const TDuration THttpConfig::CONNECTION_TIMEOUT; + +TUrlParameters::TUrlParameters(TStringBuf url) { + TStringBuf base; + TStringBuf params; + if (url.TrySplit('?', base, params)) { + for (TStringBuf param = params.NextTok('&'); !param.empty(); param = params.NextTok('&')) { + TStringBuf name = param.NextTok('='); + Parameters[name] = param; + } + } +} + +TString TUrlParameters::operator [](TStringBuf name) const { + TString value(Get(name)); + CGIUnescape(value); + return value; +} + +bool TUrlParameters::Has(TStringBuf name) const { + return Parameters.count(name) != 0; +} + +TStringBuf TUrlParameters::Get(TStringBuf name) const { + auto it = Parameters.find(name); + if (it != Parameters.end()) { + return it->second; + } + return TStringBuf(); +} + +TString TUrlParameters::Render() const { + TStringBuilder parameters; + for (const std::pair<TStringBuf, TStringBuf> parameter : Parameters) { + if (parameters.empty()) { + parameters << '?'; + } else { + parameters << '&'; + } + parameters << parameter.first; + parameters << '='; + parameters << parameter.second; + } + return parameters; +} + +TCookies::TCookies(TStringBuf cookie) { + for (TStringBuf param = cookie.NextTok(';'); !param.empty(); param = cookie.NextTok(';')) { + param.SkipPrefix(" "); + TStringBuf name = param.NextTok('='); + Cookies[name] = param; + } +} + +TStringBuf TCookies::operator [](TStringBuf name) const { + return Get(name); +} + +bool TCookies::Has(TStringBuf name) const { + return Cookies.count(name) != 0; +} + +TStringBuf TCookies::Get(TStringBuf name) const { + auto it = Cookies.find(name); + if (it != Cookies.end()) { + return it->second; + } + return TStringBuf(); +} + +TString TCookies::Render() const { + TStringBuilder cookies; + for (const std::pair<TStringBuf, TStringBuf> cookie : Cookies) { + if (!cookies.empty()) { + cookies << ' '; + } + cookies << cookie.first; + cookies << '='; + cookies << cookie.second; + cookies << ';'; + } + return cookies; +} + +TCookiesBuilder::TCookiesBuilder() + :TCookies(TStringBuf()) +{} + +void TCookiesBuilder::Set(TStringBuf name, TStringBuf data) { + Data.emplace_back(name, data); + Cookies[Data.back().first] = Data.back().second; +} + +THeaders::THeaders(TStringBuf headers) { + for (TStringBuf param = headers.NextTok("\r\n"); !param.empty(); param = headers.NextTok("\r\n")) { + TStringBuf name = param.NextTok(":"); + param.SkipPrefix(" "); + Headers[name] = param; + } +} + +TStringBuf THeaders::operator [](TStringBuf name) const { + return Get(name); +} + +bool THeaders::Has(TStringBuf name) const { + return Headers.count(name) != 0; +} + +TStringBuf THeaders::Get(TStringBuf name) const { + auto it = Headers.find(name); + if (it != Headers.end()) { + return it->second; + } + return TStringBuf(); +} + +TString THeaders::Render() const { + TStringBuilder headers; + for (const std::pair<TStringBuf, TStringBuf> header : Headers) { + headers << header.first; + headers << ": "; + headers << header.second; + headers << "\r\n"; + } + return headers; +} + +THeadersBuilder::THeadersBuilder() + :THeaders(TStringBuf()) +{} + +THeadersBuilder::THeadersBuilder(const THeadersBuilder& builder) { + for (const auto& pr : builder.Headers) { + Set(pr.first, pr.second); + } +} + +void THeadersBuilder::Set(TStringBuf name, TStringBuf data) { + Data.emplace_back(name, data); + Headers[Data.back().first] = Data.back().second; +} + +} diff --git a/library/cpp/actors/http/http.h b/library/cpp/actors/http/http.h new file mode 100644 index 0000000000..96c5c1ec48 --- /dev/null +++ b/library/cpp/actors/http/http.h @@ -0,0 +1,703 @@ +#pragma once +#include <util/datetime/base.h> +#include <util/string/builder.h> +#include <util/system/thread.h> +#include <util/system/hp_timer.h> +#include <util/generic/hash_set.h> +#include <util/generic/buffer.h> +#include <util/generic/intrlist.h> +#include "http_config.h" + +// TODO(xenoxeno): hide in implementation +template <typename Type> +struct THash<TIntrusivePtr<Type>> { + size_t operator ()(const TIntrusivePtr<Type>& ptr) const { return reinterpret_cast<size_t>(ptr.Get()); } +}; + +template<> +inline void Out<TSockAddrInet6>(IOutputStream& o, const TSockAddrInet6& x) { + o << x.ToString(); +} + +namespace NHttp { + +bool IsIPv6(const TString& host); +bool CrackURL(TStringBuf url, TStringBuf& scheme, TStringBuf& host, TStringBuf& uri); +void CrackAddress(const TString& address, TString& hostname, TIpPort& port); +void TrimBegin(TStringBuf& target, char delim); +void TrimEnd(TStringBuf& target, char delim); +void Trim(TStringBuf& target, char delim); +void TrimEnd(TString& target, char delim); + +struct TLessNoCase { + bool operator()(TStringBuf l, TStringBuf r) const { + auto ll = l.length(); + auto rl = r.length(); + if (ll != rl) { + return ll < rl; + } + return strnicmp(l.data(), r.data(), ll) < 0; + } +}; + +struct TUrlParameters { + THashMap<TStringBuf, TStringBuf> Parameters; + + TUrlParameters(TStringBuf url); + TString operator [](TStringBuf name) const; + bool Has(TStringBuf name) const; + TStringBuf Get(TStringBuf name) const; // raw + TString Render() const; +}; + +struct TCookies { + THashMap<TStringBuf, TStringBuf> Cookies; + + TCookies(TStringBuf cookie); + TCookies(const TCookies&) = delete; + TStringBuf operator [](TStringBuf name) const; + bool Has(TStringBuf name) const; + TStringBuf Get(TStringBuf name) const; // raw + TString Render() const; +}; + +struct TCookiesBuilder : TCookies { + TDeque<std::pair<TString, TString>> Data; + + TCookiesBuilder(); + void Set(TStringBuf name, TStringBuf data); +}; + +struct THeaders { + TMap<TStringBuf, TStringBuf, TLessNoCase> Headers; + + THeaders() = default; + THeaders(TStringBuf headers); + THeaders(const THeaders&) = delete; + TStringBuf operator [](TStringBuf name) const; + bool Has(TStringBuf name) const; + TStringBuf Get(TStringBuf name) const; // raw + TString Render() const; +}; + +struct THeadersBuilder : THeaders { + TDeque<std::pair<TString, TString>> Data; + + THeadersBuilder(); + THeadersBuilder(const THeadersBuilder& builder); + void Set(TStringBuf name, TStringBuf data); +}; + +class TSocketBuffer : public TBuffer, public THttpConfig { +public: + TSocketBuffer() + : TBuffer(BUFFER_SIZE) + {} + + bool EnsureEnoughSpaceAvailable(size_t need) { + size_t avail = Avail(); + if (avail < need) { + Reserve(Capacity() + std::max(need, BUFFER_MIN_STEP)); + return false; + } + return true; + } +}; + +class THttpRequest { +public: + TStringBuf Method; + TStringBuf URL; + TStringBuf Protocol; + TStringBuf Version; + TStringBuf Headers; + + TStringBuf Host; + TStringBuf Accept; + TStringBuf Connection; + TStringBuf ContentType; + TStringBuf ContentLength; + TStringBuf TransferEncoding; + + TStringBuf Body; + + static const TMap<TStringBuf, TStringBuf THttpRequest::*, TLessNoCase> HeadersLocation; + + template <TStringBuf THttpRequest::* Header> + static TStringBuf GetName(); + void Clear(); +}; + +class THttpResponse { +public: + TStringBuf Protocol; + TStringBuf Version; + TStringBuf Status; + TStringBuf Message; + TStringBuf Headers; + + TStringBuf Connection; + TStringBuf ContentType; + TStringBuf ContentLength; + TStringBuf TransferEncoding; + TStringBuf LastModified; + TStringBuf ContentEncoding; + + TStringBuf Body; + + static const TMap<TStringBuf, TStringBuf THttpResponse::*, TLessNoCase> HeadersLocation; + + template <TStringBuf THttpResponse::* Header> + static TStringBuf GetName(); + void Clear(); +}; + +template <typename HeaderType, typename BufferType> +class THttpParser : public HeaderType, public BufferType { +public: + enum class EParseStage : ui8 { + Method, + URL, + Protocol, + Version, + Status, + Message, + Header, + Body, + ChunkLength, + ChunkData, + Done, + Error, + }; + + static constexpr size_t MaxMethodSize = 6; + static constexpr size_t MaxURLSize = 1024; + static constexpr size_t MaxProtocolSize = 4; + static constexpr size_t MaxVersionSize = 4; + static constexpr size_t MaxStatusSize = 3; + static constexpr size_t MaxMessageSize = 1024; + static constexpr size_t MaxHeaderSize = 8192; + static constexpr size_t MaxChunkLengthSize = 8; + static constexpr size_t MaxChunkSize = 256 * 1024 * 1024; + static constexpr size_t MaxChunkContentSize = 1 * 1024 * 1024 * 1024; + + EParseStage Stage; + EParseStage LastSuccessStage; + TStringBuf Line; + TStringBuf& Header = Line; + size_t ChunkLength = 0; + size_t ContentSize = 0; + TString Content; + + THttpParser(const THttpParser& src) + : HeaderType(src) + , BufferType(src) + , Stage(src.Stage) + , LastSuccessStage(src.LastSuccessStage) + , Line() + , Header(Line) + , ChunkLength(src.ChunkLength) + , ContentSize(src.ContentSize) + , Content(src.Content) + {} + + template <typename StringType> + bool ProcessData(StringType& target, TStringBuf& source, char delim, size_t maxLen) { + TStringBuf maxSource(source.substr(0, maxLen + 1 - target.size())); + size_t pos = maxSource.find(delim); + target += maxSource.substr(0, pos); + source.Skip(pos); + if (target.size() > maxLen) { + Stage = EParseStage::Error; + return false; + } + if (!source.empty() && *source.begin() == delim) { + source.Skip(1); + } + return pos != TStringBuf::npos; + } + + template <typename StringType> + bool ProcessData(StringType& target, TStringBuf& source, TStringBuf delim, size_t maxLen) { + if (delim.empty()) { + return false; + } + if (delim.size() == 1) { + return ProcessData(target, source, delim[0], maxLen); + } + if (ProcessData(target, source, delim.back(), maxLen + 1)) { + for (signed i = delim.size() - 2; i >= 0; --i) { + TrimEnd(target, delim[i]); + } + return true; + } + return false; + } + + template <typename StringType> + bool ProcessData(StringType& target, TStringBuf& source, size_t size) { + TStringBuf maxSource(source.substr(0, size - target.size())); + target += maxSource; + source.Skip(maxSource.size()); + if (target.size() > size && !source.empty()) { + Stage = EParseStage::Error; + return false; + } + return target.size() == size; + } + + void ProcessHeader(TStringBuf& header) { + TStringBuf name = header.NextTok(':'); + TrimBegin(name, ' '); + TStringBuf value = header; + Trim(value, ' '); + auto cit = HeaderType::HeadersLocation.find(name); + if (cit != HeaderType::HeadersLocation.end()) { + this->*cit->second = value; + } + header.Clear(); + } + + size_t ParseHex(TStringBuf value) { + size_t result = 0; + for (char ch : value) { + if (ch >= '0' && ch <= '9') { + result *= 16; + result += ch - '0'; + } else if (ch >= 'a' && ch <= 'f') { + result *= 16; + result += 10 + ch - 'a'; + } else if (ch >= 'A' && ch <= 'F') { + result *= 16; + result += 10 + ch - 'A'; + } else if (ch == ';') { + break; + } else if (isspace(ch)) { + continue; + } else { + Stage = EParseStage::Error; + return 0; + } + } + return result; + } + + void Advance(size_t len); + void ConnectionClosed(); + + void Clear() { + BufferType::Clear(); + HeaderType::Clear(); + Stage = GetInitialStage(); + Line.Clear(); + Content.clear(); + } + + bool IsReady() const { + return Stage == EParseStage::Done; + } + + bool IsError() const { + return Stage == EParseStage::Error; + } + + TStringBuf GetErrorText() const { + switch (LastSuccessStage) { + case EParseStage::Method: + return "Invalid http method"; + case EParseStage::URL: + return "Invalid url"; + case EParseStage::Protocol: + return "Invalid http protocol"; + case EParseStage::Version: + return "Invalid http version"; + case EParseStage::Status: + return "Invalid http status"; + case EParseStage::Message: + return "Invalid http message"; + case EParseStage::Header: + return "Invalid http header"; + case EParseStage::Body: + return "Invalid content body"; + case EParseStage::ChunkLength: + case EParseStage::ChunkData: + return "Broken chunked data"; + case EParseStage::Done: + return "Everything is fine"; + case EParseStage::Error: + return "Error on error"; // wat? ...because we don't want to include default label here + } + } + + bool IsDone() const { + return IsReady() || IsError(); + } + + bool HaveBody() const { + return !HeaderType::ContentType.empty() || !HeaderType::ContentLength.empty() || !HeaderType::TransferEncoding.empty(); + } + + bool EnsureEnoughSpaceAvailable(size_t need = BufferType::BUFFER_MIN_STEP) { + bool result = BufferType::EnsureEnoughSpaceAvailable(need); + if (!result && !BufferType::Empty()) { + Reparse(); + } + return true; + } + + void Reparse() { + size_t size = BufferType::Size(); + Clear(); + Advance(size); + } + + TStringBuf GetRawData() const { + return TStringBuf(BufferType::Data(), BufferType::Size()); + } + + TString GetObfuscatedData() const { + THeaders headers(HeaderType::Headers); + TStringBuf authorization(headers["Authorization"]); + TStringBuf cookie(headers["Cookie"]); + TStringBuf x_ydb_auth_ticket(headers["x-ydb-auth-ticket"]); + TStringBuf x_yacloud_subjecttoken(headers["x-yacloud-subjecttoken"]); + TString data(GetRawData()); + if (!authorization.empty()) { + auto pos = data.find(authorization); + if (pos != TString::npos) { + data.replace(pos, authorization.size(), TString("<obfuscated>")); + } + } + if (!cookie.empty()) { + auto pos = data.find(cookie); + if (pos != TString::npos) { + data.replace(pos, cookie.size(), TString("<obfuscated>")); + } + } + if (!x_ydb_auth_ticket.empty()) { + auto pos = data.find(x_ydb_auth_ticket); + if (pos != TString::npos) { + data.replace(pos, x_ydb_auth_ticket.size(), TString("<obfuscated>")); + } + } + if (!x_yacloud_subjecttoken.empty()) { + auto pos = data.find(x_yacloud_subjecttoken); + if (pos != TString::npos) { + data.replace(pos, x_yacloud_subjecttoken.size(), TString("<obfuscated>")); + } + } + return data; + } + + static EParseStage GetInitialStage(); + + THttpParser() + : Stage(GetInitialStage()) + , LastSuccessStage(Stage) + {} +}; + +template <typename HeaderType, typename BufferType> +class THttpRenderer : public HeaderType, public BufferType { +public: + enum class ERenderStage { + Init, + Header, + Body, + Done, + Error, + }; + + ERenderStage Stage = ERenderStage::Init; + + void Append(TStringBuf text) { + EnsureEnoughSpaceAvailable(text.size()); + BufferType::Append(text.data(), text.size()); + } + + void Append(char c) { + EnsureEnoughSpaceAvailable(sizeof(c)); + BufferType::Append(c); + } + + template <TStringBuf HeaderType::* string> + void AppendParsedValue(TStringBuf value) { + Append(value); + static_cast<HeaderType*>(this)->*string = TStringBuf(BufferType::Pos() - value.size(), value.size()); + } + + template <TStringBuf HeaderType::* name> + void Set(TStringBuf value) { + Y_VERIFY_DEBUG(Stage == ERenderStage::Header); + Append(HeaderType::template GetName<name>()); + Append(": "); + AppendParsedValue<name>(value); + Append("\r\n"); + HeaderType::Headers = TStringBuf(HeaderType::Headers.Data(), BufferType::Pos() - HeaderType::Headers.Data()); + } + + void Set(TStringBuf name, TStringBuf value) { + Y_VERIFY_DEBUG(Stage == ERenderStage::Header); + Append(name); + Append(": "); + Append(value); + Append("\r\n"); + HeaderType::Headers = TStringBuf(HeaderType::Headers.Data(), BufferType::Pos() - HeaderType::Headers.Data()); + } + + void Set(const THeaders& headers) { + Y_VERIFY_DEBUG(Stage == ERenderStage::Header); + Append(headers.Render()); + HeaderType::Headers = TStringBuf(HeaderType::Headers.Data(), BufferType::Pos() - HeaderType::Headers.Data()); + } + + //THttpRenderer(TStringBuf method, TStringBuf url, TStringBuf protocol, TStringBuf version); // request + void InitRequest(TStringBuf method, TStringBuf url, TStringBuf protocol, TStringBuf version) { + Y_VERIFY_DEBUG(Stage == ERenderStage::Init); + AppendParsedValue<&THttpRequest::Method>(method); + Append(' '); + AppendParsedValue<&THttpRequest::URL>(url); + Append(' '); + AppendParsedValue<&THttpRequest::Protocol>(protocol); + Append('/'); + AppendParsedValue<&THttpRequest::Version>(version); + Append("\r\n"); + Stage = ERenderStage::Header; + HeaderType::Headers = TStringBuf(BufferType::Pos(), size_t(0)); + } + + //THttpRenderer(TStringBuf protocol, TStringBuf version, TStringBuf status, TStringBuf message); // response + void InitResponse(TStringBuf protocol, TStringBuf version, TStringBuf status, TStringBuf message) { + Y_VERIFY_DEBUG(Stage == ERenderStage::Init); + AppendParsedValue<&THttpResponse::Protocol>(protocol); + Append('/'); + AppendParsedValue<&THttpResponse::Version>(version); + Append(' '); + AppendParsedValue<&THttpResponse::Status>(status); + Append(' '); + AppendParsedValue<&THttpResponse::Message>(message); + Append("\r\n"); + Stage = ERenderStage::Header; + HeaderType::Headers = TStringBuf(BufferType::Pos(), size_t(0)); + } + + void FinishHeader() { + Append("\r\n"); + HeaderType::Headers = TStringBuf(HeaderType::Headers.Data(), BufferType::Pos() - HeaderType::Headers.Data()); + Stage = ERenderStage::Body; + } + + void SetBody(TStringBuf body) { + Y_VERIFY_DEBUG(Stage == ERenderStage::Header); + if (HeaderType::ContentLength.empty()) { + Set<&HeaderType::ContentLength>(ToString(body.size())); + } + FinishHeader(); + AppendParsedValue<&HeaderType::Body>(body); + Stage = ERenderStage::Done; + } + + bool IsDone() const { + return Stage == ERenderStage::Done; + } + + void Finish() { + switch (Stage) { + case ERenderStage::Header: + FinishHeader(); + break; + default: + break; + } + } + + bool EnsureEnoughSpaceAvailable(size_t need = BufferType::BUFFER_MIN_STEP) { + bool result = BufferType::EnsureEnoughSpaceAvailable(need); + if (!result && !BufferType::Empty()) { + Reparse(); + } + return true; + } + + void Clear() { + BufferType::Clear(); + HeaderType::Clear(); + } + + void Reparse() { + // move-magic + size_t size = BufferType::Size(); + THttpParser<HeaderType, BufferType> parser; + // move the buffer to parser + static_cast<BufferType&>(parser) = std::move(static_cast<BufferType&>(*this)); + // reparse + parser.Clear(); + parser.Advance(size); + // move buffer and result back + static_cast<HeaderType&>(*this) = std::move(static_cast<HeaderType&>(parser)); + static_cast<BufferType&>(*this) = std::move(static_cast<BufferType&>(parser)); + switch (parser.Stage) { + case THttpParser<HeaderType, BufferType>::EParseStage::Method: + case THttpParser<HeaderType, BufferType>::EParseStage::URL: + case THttpParser<HeaderType, BufferType>::EParseStage::Protocol: + case THttpParser<HeaderType, BufferType>::EParseStage::Version: + case THttpParser<HeaderType, BufferType>::EParseStage::Status: + case THttpParser<HeaderType, BufferType>::EParseStage::Message: + Stage = ERenderStage::Init; + break; + case THttpParser<HeaderType, BufferType>::EParseStage::Header: + Stage = ERenderStage::Header; + break; + case THttpParser<HeaderType, BufferType>::EParseStage::Body: + case THttpParser<HeaderType, BufferType>::EParseStage::ChunkLength: + case THttpParser<HeaderType, BufferType>::EParseStage::ChunkData: + Stage = ERenderStage::Body; + break; + case THttpParser<HeaderType, BufferType>::EParseStage::Done: + Stage = ERenderStage::Done; + break; + case THttpParser<HeaderType, BufferType>::EParseStage::Error: + Stage = ERenderStage::Error; + break; + } + Y_VERIFY(size == BufferType::Size()); + } + + TStringBuf GetRawData() const { + return TStringBuf(BufferType::Data(), BufferType::Size()); + } +}; + +template <> +template <> +inline void THttpRenderer<THttpResponse, TSocketBuffer>::Set<&THttpResponse::Body>(TStringBuf value) { + SetBody(value); +} + +template <> +template <> +inline void THttpRenderer<THttpRequest, TSocketBuffer>::Set<&THttpRequest::Body>(TStringBuf value) { + SetBody(value); +} + +class THttpIncomingRequest; +using THttpIncomingRequestPtr = TIntrusivePtr<THttpIncomingRequest>; + +class THttpOutgoingResponse; +using THttpOutgoingResponsePtr = TIntrusivePtr<THttpOutgoingResponse>; + +class THttpIncomingRequest : + public THttpParser<THttpRequest, TSocketBuffer>, + public TRefCounted<THttpIncomingRequest, TAtomicCounter> { +public: + THttpConfig::SocketAddressType Address; + TString WorkerName; + THPTimer Timer; + bool Secure = false; + + bool IsConnectionClose() const { + if (Connection.empty()) { + return Version == "1.0"; + } else { + return Connection == "close"; + } + } + + TStringBuf GetConnection() const { + if (!Connection.empty()) { + return Connection; + } + return Version == "1.0" ? "close" : "keep-alive"; + } + + THttpOutgoingResponsePtr CreateResponseOK(TStringBuf body, TStringBuf contentType = "text/html", TInstant lastModified = TInstant()); + THttpOutgoingResponsePtr CreateResponseString(TStringBuf data); + THttpOutgoingResponsePtr CreateResponseBadRequest(TStringBuf html = TStringBuf(), TStringBuf contentType = "text/html"); // 400 + THttpOutgoingResponsePtr CreateResponseNotFound(TStringBuf html = TStringBuf(), TStringBuf contentType = "text/html"); // 404 + THttpOutgoingResponsePtr CreateResponseServiceUnavailable(TStringBuf html = TStringBuf(), TStringBuf contentType = "text/html"); // 503 + THttpOutgoingResponsePtr CreateResponseGatewayTimeout(TStringBuf html = TStringBuf(), TStringBuf contentType = "text/html"); // 504 + THttpOutgoingResponsePtr CreateResponse( + TStringBuf status, + TStringBuf message, + TStringBuf contentType = TStringBuf(), + TStringBuf body = TStringBuf(), + TInstant lastModified = TInstant()); + + THttpIncomingRequestPtr Duplicate(); +}; + +class THttpIncomingResponse; +using THttpIncomingResponsePtr = TIntrusivePtr<THttpIncomingResponse>; + +class THttpOutgoingRequest; +using THttpOutgoingRequestPtr = TIntrusivePtr<THttpOutgoingRequest>; + +class THttpIncomingResponse : + public THttpParser<THttpResponse, TSocketBuffer>, + public TRefCounted<THttpIncomingResponse, TAtomicCounter> { +public: + THttpIncomingResponse(THttpOutgoingRequestPtr request); + + THttpOutgoingRequestPtr GetRequest() const { + return Request; + } + + THttpIncomingResponsePtr Duplicate(THttpOutgoingRequestPtr request); + THttpOutgoingResponsePtr Reverse(THttpIncomingRequestPtr request); + +protected: + THttpOutgoingRequestPtr Request; +}; + +class THttpOutgoingRequest : + public THttpRenderer<THttpRequest, TSocketBuffer>, + public TRefCounted<THttpOutgoingRequest, TAtomicCounter> { +public: + THPTimer Timer; + bool Secure = false; + + THttpOutgoingRequest() = default; + THttpOutgoingRequest(TStringBuf method, TStringBuf url, TStringBuf protocol, TStringBuf version); + THttpOutgoingRequest(TStringBuf method, TStringBuf scheme, TStringBuf host, TStringBuf uri, TStringBuf protocol, TStringBuf version); + static THttpOutgoingRequestPtr CreateRequestString(TStringBuf data); + static THttpOutgoingRequestPtr CreateRequestString(const TString& data); + static THttpOutgoingRequestPtr CreateRequestGet(TStringBuf url); + static THttpOutgoingRequestPtr CreateRequestGet(TStringBuf host, TStringBuf uri); // http only + static THttpOutgoingRequestPtr CreateRequestPost(TStringBuf url, TStringBuf contentType = {}, TStringBuf body = {}); + static THttpOutgoingRequestPtr CreateRequestPost(TStringBuf host, TStringBuf uri, TStringBuf contentType, TStringBuf body); // http only + static THttpOutgoingRequestPtr CreateRequest(TStringBuf method, TStringBuf url, TStringBuf contentType = TStringBuf(), TStringBuf body = TStringBuf()); + static THttpOutgoingRequestPtr CreateHttpRequest(TStringBuf method, TStringBuf host, TStringBuf uri, TStringBuf contentType = TStringBuf(), TStringBuf body = TStringBuf()); + THttpOutgoingRequestPtr Duplicate(); +}; + +class THttpOutgoingResponse : + public THttpRenderer<THttpResponse, TSocketBuffer>, + public TRefCounted<THttpOutgoingResponse, TAtomicCounter> { +public: + THttpOutgoingResponse(THttpIncomingRequestPtr request); + THttpOutgoingResponse(THttpIncomingRequestPtr request, TStringBuf protocol, TStringBuf version, TStringBuf status, TStringBuf message); + + bool IsConnectionClose() const { + if (!Connection.empty()) { + return Connection == "close"; + } else { + return Request->IsConnectionClose(); + } + } + + bool IsNeedBody() const { + return Status != "204"; + } + + THttpIncomingRequestPtr GetRequest() const { + return Request; + } + + THttpOutgoingResponsePtr Duplicate(THttpIncomingRequestPtr request); + +// it's temporary accessible for cleanup +//protected: + THttpIncomingRequestPtr Request; +}; + +} diff --git a/library/cpp/actors/http/http_cache.cpp b/library/cpp/actors/http/http_cache.cpp new file mode 100644 index 0000000000..27c4eeb6f3 --- /dev/null +++ b/library/cpp/actors/http/http_cache.cpp @@ -0,0 +1,599 @@ +#include "http.h" +#include "http_proxy.h" +#include "http_cache.h" +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/executor_pool_basic.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/core/scheduler_basic.h> +#include <library/cpp/actors/http/http.h> +#include <library/cpp/digest/md5/md5.h> +#include <util/digest/multi.h> +#include <util/generic/queue.h> +#include <util/string/cast.h> + +namespace NHttp { + +class THttpOutgoingCacheActor : public NActors::TActorBootstrapped<THttpOutgoingCacheActor>, THttpConfig { +public: + using TBase = NActors::TActorBootstrapped<THttpOutgoingCacheActor>; + NActors::TActorId HttpProxyId; + TGetCachePolicy GetCachePolicy; + static constexpr TDuration RefreshTimeout = TDuration::Seconds(1); + + struct TCacheKey { + TString Host; + TString URL; + TString Headers; + + operator size_t() const { + return MultiHash(Host, URL, Headers); + } + + TString GetId() const { + return MD5::Calc(Host + ':' + URL + ':' + Headers); + } + }; + + struct TCacheRecord { + TInstant RefreshTime; + TInstant DeathTime; + TCachePolicy CachePolicy; + NHttp::THttpOutgoingRequestPtr Request; + NHttp::THttpOutgoingRequestPtr OutgoingRequest; + TDuration Timeout; + NHttp::THttpIncomingResponsePtr Response; + TString Error; + TVector<NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr> Waiters; + + TCacheRecord(const TCachePolicy cachePolicy) + : CachePolicy(cachePolicy) + {} + + bool IsValid() const { + return Response != nullptr || !Error.empty(); + } + + void UpdateResponse(NHttp::THttpIncomingResponsePtr response, const TString& error, TInstant now) { + if (error.empty() || Response == nullptr || !CachePolicy.KeepOnError) { + Response = response; + Error = error; + } + RefreshTime = now + CachePolicy.TimeToRefresh; + if (CachePolicy.PaceToRefresh) { + RefreshTime += TDuration::MilliSeconds(RandomNumber<ui64>() % CachePolicy.PaceToRefresh.MilliSeconds()); + } + } + + TString GetName() const { + return TStringBuilder() << (Request->Secure ? "https://" : "http://") << Request->Host << Request->URL; + } + }; + + struct TRefreshRecord { + TCacheKey Key; + TInstant RefreshTime; + + bool operator <(const TRefreshRecord& b) const { + return RefreshTime > b.RefreshTime; + } + }; + + THashMap<TCacheKey, TCacheRecord> Cache; + TPriorityQueue<TRefreshRecord> RefreshQueue; + THashMap<THttpOutgoingRequest*, TCacheKey> OutgoingRequests; + + THttpOutgoingCacheActor(const NActors::TActorId& httpProxyId, TGetCachePolicy getCachePolicy) + : HttpProxyId(httpProxyId) + , GetCachePolicy(std::move(getCachePolicy)) + {} + + void Bootstrap(const NActors::TActorContext&) { + // + Become(&THttpOutgoingCacheActor::StateWork, RefreshTimeout, new NActors::TEvents::TEvWakeup()); + } + + static TString GetCacheHeadersKey(const NHttp::THttpOutgoingRequest* request, const TCachePolicy& policy) { + TStringBuilder key; + if (!policy.HeadersToCacheKey.empty()) { + NHttp::THeaders headers(request->Headers); + for (const TString& header : policy.HeadersToCacheKey) { + key << headers[header]; + } + } + return key; + } + + static TCacheKey GetCacheKey(const NHttp::THttpOutgoingRequest* request, const TCachePolicy& policy) { + return { ToString(request->Host), ToString(request->URL), GetCacheHeadersKey(request, policy) }; + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse::TPtr event, const NActors::TActorContext& ctx) { + ctx.Send(event->Forward(HttpProxyId)); + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingRequest::TPtr event, const NActors::TActorContext& ctx) { + ctx.Send(event->Forward(HttpProxyId)); + } + + void Handle(NHttp::TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) { + ctx.Send(event->Forward(HttpProxyId)); + } + + void Handle(NHttp::TEvHttpProxy::TEvRegisterHandler::TPtr event, const NActors::TActorContext& ctx) { + ctx.Send(event->Forward(HttpProxyId)); + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr event, const NActors::TActorContext& ctx) { + NHttp::THttpOutgoingRequestPtr request(event->Get()->Request); + NHttp::THttpIncomingResponsePtr response(event->Get()->Response); + auto itRequests = OutgoingRequests.find(request.Get()); + if (itRequests == OutgoingRequests.end()) { + LOG_ERROR_S(ctx, HttpLog, "Cache received response to unknown request " << request->Host << request->URL); + return; + } + auto key = itRequests->second; + OutgoingRequests.erase(itRequests); + auto it = Cache.find(key); + if (it == Cache.end()) { + LOG_ERROR_S(ctx, HttpLog, "Cache received response to unknown cache key " << request->Host << request->URL); + return; + } + TCacheRecord& cacheRecord = it->second; + cacheRecord.OutgoingRequest.Reset(); + for (auto& waiter : cacheRecord.Waiters) { + NHttp::THttpIncomingResponsePtr response2; + TString error2; + if (response != nullptr) { + response2 = response->Duplicate(waiter->Get()->Request); + } + if (!event->Get()->Error.empty()) { + error2 = event->Get()->Error; + } + ctx.Send(waiter->Sender, new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(waiter->Get()->Request, response2, error2)); + } + cacheRecord.Waiters.clear(); + TString error; + if (event->Get()->Error.empty()) { + if (event->Get()->Response != nullptr && event->Get()->Response->Status != "200") { + error = event->Get()->Response->Message; + } + } else { + error = event->Get()->Error; + } + if (!error.empty()) { + LOG_WARN_S(ctx, HttpLog, "Error from " << cacheRecord.GetName() << ": " << error); + } + LOG_DEBUG_S(ctx, HttpLog, "OutgoingUpdate " << cacheRecord.GetName()); + cacheRecord.UpdateResponse(response, event->Get()->Error, ctx.Now()); + RefreshQueue.push({it->first, it->second.RefreshTime}); + LOG_DEBUG_S(ctx, HttpLog, "OutgoingSchedule " << cacheRecord.GetName() << " at " << cacheRecord.RefreshTime << " until " << cacheRecord.DeathTime); + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) { + const NHttp::THttpOutgoingRequest* request = event->Get()->Request.Get(); + auto policy = GetCachePolicy(request); + if (policy.TimeToExpire == TDuration()) { + ctx.Send(event->Forward(HttpProxyId)); + return; + } + auto key = GetCacheKey(request, policy); + auto it = Cache.find(key); + if (it != Cache.end()) { + if (it->second.IsValid()) { + LOG_DEBUG_S(ctx, HttpLog, "OutgoingRespond " + << it->second.GetName() + << " (" + << ((it->second.Response != nullptr) ? ToString(it->second.Response->Size()) : TString("error")) + << ")"); + NHttp::THttpIncomingResponsePtr response = it->second.Response; + if (response != nullptr) { + response = response->Duplicate(event->Get()->Request); + } + ctx.Send(event->Sender, + new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(event->Get()->Request, + response, + it->second.Error)); + it->second.DeathTime = ctx.Now() + it->second.CachePolicy.TimeToExpire; // prolong active cache items + return; + } + } else { + it = Cache.emplace(key, policy).first; + it->second.Request = event->Get()->Request; + it->second.Timeout = event->Get()->Timeout; + it->second.OutgoingRequest = it->second.Request->Duplicate(); + OutgoingRequests[it->second.OutgoingRequest.Get()] = key; + LOG_DEBUG_S(ctx, HttpLog, "OutgoingInitiate " << it->second.GetName()); + ctx.Send(HttpProxyId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(it->second.OutgoingRequest, it->second.Timeout)); + } + it->second.DeathTime = ctx.Now() + it->second.CachePolicy.TimeToExpire; + it->second.Waiters.emplace_back(std::move(event)); + } + + void HandleRefresh(const NActors::TActorContext& ctx) { + while (!RefreshQueue.empty() && RefreshQueue.top().RefreshTime <= ctx.Now()) { + TRefreshRecord rrec = RefreshQueue.top(); + RefreshQueue.pop(); + auto it = Cache.find(rrec.Key); + if (it != Cache.end()) { + if (it->second.DeathTime > ctx.Now()) { + LOG_DEBUG_S(ctx, HttpLog, "OutgoingRefresh " << it->second.GetName()); + it->second.OutgoingRequest = it->second.Request->Duplicate(); + OutgoingRequests[it->second.OutgoingRequest.Get()] = it->first; + ctx.Send(HttpProxyId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(it->second.OutgoingRequest, it->second.Timeout)); + } else { + LOG_DEBUG_S(ctx, HttpLog, "OutgoingForget " << it->second.GetName()); + if (it->second.OutgoingRequest) { + OutgoingRequests.erase(it->second.OutgoingRequest.Get()); + } + Cache.erase(it); + } + } + } + ctx.Schedule(RefreshTimeout, new NActors::TEvents::TEvWakeup()); + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingResponse, Handle); + HFunc(NHttp::TEvHttpProxy::TEvHttpOutgoingRequest, Handle); + HFunc(NHttp::TEvHttpProxy::TEvAddListeningPort, Handle); + HFunc(NHttp::TEvHttpProxy::TEvRegisterHandler, Handle); + HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle); + HFunc(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse, Handle); + CFunc(NActors::TEvents::TSystem::Wakeup, HandleRefresh); + } + } +}; + +const TDuration THttpOutgoingCacheActor::RefreshTimeout; + +class THttpIncomingCacheActor : public NActors::TActorBootstrapped<THttpIncomingCacheActor>, THttpConfig { +public: + using TBase = NActors::TActorBootstrapped<THttpIncomingCacheActor>; + NActors::TActorId HttpProxyId; + TGetCachePolicy GetCachePolicy; + static constexpr TDuration RefreshTimeout = TDuration::Seconds(1); + THashMap<TString, TActorId> Handlers; + + struct TCacheKey { + TString Host; + TString URL; + TString Headers; + + operator size_t() const { + return MultiHash(Host, URL, Headers); + } + + TString GetId() const { + return MD5::Calc(Host + ':' + URL + ':' + Headers); + } + }; + + struct TCacheRecord { + TInstant RefreshTime; + TInstant DeathTime; + TCachePolicy CachePolicy; + TString CacheId; + NHttp::THttpIncomingRequestPtr Request; + TDuration Timeout; + NHttp::THttpOutgoingResponsePtr Response; + TVector<NHttp::TEvHttpProxy::TEvHttpIncomingRequest::TPtr> Waiters; + ui32 Retries = 0; + bool Enqueued = false; + + TCacheRecord(const TCachePolicy cachePolicy) + : CachePolicy(cachePolicy) + {} + + bool IsValid() const { + return Response != nullptr; + } + + void InitRequest(NHttp::THttpIncomingRequestPtr request) { + Request = request; + if (CachePolicy.TimeToExpire) { + DeathTime = NActors::TlsActivationContext->Now() + CachePolicy.TimeToExpire; + } + } + + void UpdateResponse(NHttp::THttpOutgoingResponsePtr response, const TString& error, TInstant now) { + if (error.empty() || !CachePolicy.KeepOnError) { + Response = response; + } + Retries = 0; + if (CachePolicy.TimeToRefresh) { + RefreshTime = now + CachePolicy.TimeToRefresh; + if (CachePolicy.PaceToRefresh) { + RefreshTime += TDuration::MilliSeconds(RandomNumber<ui64>() % CachePolicy.PaceToRefresh.MilliSeconds()); + } + } + } + + void UpdateExpireTime() { + if (CachePolicy.TimeToExpire) { + DeathTime = NActors::TlsActivationContext->Now() + CachePolicy.TimeToExpire; + } + } + + TString GetName() const { + return TStringBuilder() << (Request->Secure ? "https://" : "http://") << Request->Host << Request->URL + << " (" << CacheId << ")"; + } + }; + + struct TRefreshRecord { + TCacheKey Key; + TInstant RefreshTime; + + bool operator <(const TRefreshRecord& b) const { + return RefreshTime > b.RefreshTime; + } + }; + + THashMap<TCacheKey, TCacheRecord> Cache; + TPriorityQueue<TRefreshRecord> RefreshQueue; + THashMap<THttpIncomingRequest*, TCacheKey> IncomingRequests; + + THttpIncomingCacheActor(const NActors::TActorId& httpProxyId, TGetCachePolicy getCachePolicy) + : HttpProxyId(httpProxyId) + , GetCachePolicy(std::move(getCachePolicy)) + {} + + void Bootstrap(const NActors::TActorContext&) { + // + Become(&THttpIncomingCacheActor::StateWork, RefreshTimeout, new NActors::TEvents::TEvWakeup()); + } + + static TString GetCacheHeadersKey(const NHttp::THttpIncomingRequest* request, const TCachePolicy& policy) { + TStringBuilder key; + if (!policy.HeadersToCacheKey.empty()) { + NHttp::THeaders headers(request->Headers); + for (const TString& header : policy.HeadersToCacheKey) { + key << headers[header]; + } + } + return key; + } + + static TCacheKey GetCacheKey(const NHttp::THttpIncomingRequest* request, const TCachePolicy& policy) { + return { ToString(request->Host), ToString(request->URL), GetCacheHeadersKey(request, policy) }; + } + + TActorId GetRequestHandler(NHttp::THttpIncomingRequestPtr request) { + TStringBuf url = request->URL.Before('?'); + THashMap<TString, TActorId>::iterator it; + while (!url.empty()) { + it = Handlers.find(url); + if (it != Handlers.end()) { + return it->second; + } else { + if (url.EndsWith('/')) { + url.Trunc(url.size() - 1); + } + size_t pos = url.rfind('/'); + if (pos == TStringBuf::npos) { + break; + } else { + url = url.substr(0, pos + 1); + } + } + } + return {}; + } + + void SendCacheRequest(const TCacheKey& cacheKey, TCacheRecord& cacheRecord, const NActors::TActorContext& ctx) { + cacheRecord.Request = cacheRecord.Request->Duplicate(); + IncomingRequests[cacheRecord.Request.Get()] = cacheKey; + TActorId handler = GetRequestHandler(cacheRecord.Request); + if (handler) { + Send(handler, new NHttp::TEvHttpProxy::TEvHttpIncomingRequest(cacheRecord.Request)); + } else { + LOG_ERROR_S(ctx, HttpLog, "Can't find cache handler for " << cacheRecord.GetName()); + } + } + + void DropCacheRecord(THashMap<TCacheKey, TCacheRecord>::iterator it) { + if (it->second.Request) { + IncomingRequests.erase(it->second.Request.Get()); + } + for (auto& waiter : it->second.Waiters) { + NHttp::THttpOutgoingResponsePtr response; + response = waiter->Get()->Request->CreateResponseGatewayTimeout("Timeout", "text/plain"); + Send(waiter->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); + } + Cache.erase(it); + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr event, const NActors::TActorContext& ctx) { + ctx.Send(event->Forward(HttpProxyId)); + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) { + ctx.Send(event->Forward(HttpProxyId)); + } + + void Handle(NHttp::TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) { + ctx.Send(event->Forward(HttpProxyId)); + } + + void Handle(NHttp::TEvHttpProxy::TEvRegisterHandler::TPtr event, const NActors::TActorContext& ctx) { + Handlers[event->Get()->Path] = event->Get()->Handler; + ctx.Send(HttpProxyId, new NHttp::TEvHttpProxy::TEvRegisterHandler(event->Get()->Path, ctx.SelfID)); + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse::TPtr event, const NActors::TActorContext& ctx) { + NHttp::THttpIncomingRequestPtr request(event->Get()->Response->GetRequest()); + NHttp::THttpOutgoingResponsePtr response(event->Get()->Response); + auto itRequests = IncomingRequests.find(request.Get()); + if (itRequests == IncomingRequests.end()) { + LOG_ERROR_S(ctx, HttpLog, "Cache received response to unknown request " << request->Host << request->URL); + return; + } + + TCacheKey key = itRequests->second; + auto it = Cache.find(key); + if (it == Cache.end()) { + LOG_ERROR_S(ctx, HttpLog, "Cache received response to unknown cache key " << request->Host << request->URL); + return; + } + + IncomingRequests.erase(itRequests); + TCacheRecord& cacheRecord = it->second; + TStringBuf status; + TString error; + + if (event->Get()->Response != nullptr) { + status = event->Get()->Response->Status; + if (!status.StartsWith("2")) { + error = event->Get()->Response->Message; + } + } + if (cacheRecord.CachePolicy.RetriesCount > 0) { + auto itStatusToRetry = std::find(cacheRecord.CachePolicy.StatusesToRetry.begin(), cacheRecord.CachePolicy.StatusesToRetry.end(), status); + if (itStatusToRetry != cacheRecord.CachePolicy.StatusesToRetry.end()) { + if (cacheRecord.Retries < cacheRecord.CachePolicy.RetriesCount) { + ++cacheRecord.Retries; + LOG_WARN_S(ctx, HttpLog, "IncomingRetry " << cacheRecord.GetName() << ": " << status << " " << error); + SendCacheRequest(key, cacheRecord, ctx); + return; + } + } + } + for (auto& waiter : cacheRecord.Waiters) { + NHttp::THttpOutgoingResponsePtr response2; + response2 = response->Duplicate(waiter->Get()->Request); + ctx.Send(waiter->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response2)); + } + cacheRecord.Waiters.clear(); + if (!error.empty()) { + LOG_WARN_S(ctx, HttpLog, "Error from " << cacheRecord.GetName() << ": " << error); + if (!cacheRecord.Response) { + LOG_DEBUG_S(ctx, HttpLog, "IncomingDiscard " << cacheRecord.GetName()); + DropCacheRecord(it); + return; + } + } + if (cacheRecord.CachePolicy.TimeToRefresh) { + LOG_DEBUG_S(ctx, HttpLog, "IncomingUpdate " << cacheRecord.GetName()); + cacheRecord.UpdateResponse(response, error, ctx.Now()); + if (!cacheRecord.Enqueued) { + RefreshQueue.push({it->first, it->second.RefreshTime}); + cacheRecord.Enqueued = true; + } + LOG_DEBUG_S(ctx, HttpLog, "IncomingSchedule " << cacheRecord.GetName() << " at " << cacheRecord.RefreshTime << " until " << cacheRecord.DeathTime); + } else { + LOG_DEBUG_S(ctx, HttpLog, "IncomingDrop " << cacheRecord.GetName()); + DropCacheRecord(it); + } + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingRequest::TPtr event, const NActors::TActorContext& ctx) { + const NHttp::THttpIncomingRequest* request = event->Get()->Request.Get(); + TCachePolicy policy = GetCachePolicy(request); + if (policy.TimeToExpire == TDuration() && policy.RetriesCount == 0) { + TActorId handler = GetRequestHandler(event->Get()->Request); + if (handler) { + ctx.Send(event->Forward(handler)); + } + return; + } + auto key = GetCacheKey(request, policy); + auto it = Cache.find(key); + if (it != Cache.end() && !policy.DiscardCache) { + it->second.UpdateExpireTime(); + if (it->second.IsValid()) { + LOG_DEBUG_S(ctx, HttpLog, "IncomingRespond " + << it->second.GetName() + << " (" + << ((it->second.Response != nullptr) ? ToString(it->second.Response->Size()) : TString("error")) + << ")"); + NHttp::THttpOutgoingResponsePtr response = it->second.Response; + if (response != nullptr) { + response = response->Duplicate(event->Get()->Request); + } + ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); + return; + } + } else { + it = Cache.emplace(key, policy).first; + it->second.CacheId = key.GetId(); // for debugging + it->second.InitRequest(event->Get()->Request); + if (policy.DiscardCache) { + LOG_DEBUG_S(ctx, HttpLog, "IncomingDiscardCache " << it->second.GetName()); + } + LOG_DEBUG_S(ctx, HttpLog, "IncomingInitiate " << it->second.GetName()); + SendCacheRequest(key, it->second, ctx); + } + it->second.Waiters.emplace_back(std::move(event)); + } + + void HandleRefresh(const NActors::TActorContext& ctx) { + while (!RefreshQueue.empty() && RefreshQueue.top().RefreshTime <= ctx.Now()) { + TRefreshRecord rrec = RefreshQueue.top(); + RefreshQueue.pop(); + auto it = Cache.find(rrec.Key); + if (it != Cache.end()) { + it->second.Enqueued = false; + if (it->second.DeathTime > ctx.Now()) { + LOG_DEBUG_S(ctx, HttpLog, "IncomingRefresh " << it->second.GetName()); + SendCacheRequest(it->first, it->second, ctx); + } else { + LOG_DEBUG_S(ctx, HttpLog, "IncomingForget " << it->second.GetName()); + DropCacheRecord(it); + } + } + } + ctx.Schedule(RefreshTimeout, new NActors::TEvents::TEvWakeup()); + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingResponse, Handle); + HFunc(NHttp::TEvHttpProxy::TEvHttpOutgoingRequest, Handle); + HFunc(NHttp::TEvHttpProxy::TEvAddListeningPort, Handle); + HFunc(NHttp::TEvHttpProxy::TEvRegisterHandler, Handle); + HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle); + HFunc(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse, Handle); + CFunc(NActors::TEvents::TSystem::Wakeup, HandleRefresh); + } + } +}; + +TCachePolicy GetDefaultCachePolicy(const THttpRequest* request, const TCachePolicy& defaultPolicy) { + TCachePolicy policy = defaultPolicy; + THeaders headers(request->Headers); + TStringBuf cacheControl(headers["Cache-Control"]); + while (TStringBuf cacheItem = cacheControl.NextTok(',')) { + Trim(cacheItem, ' '); + if (cacheItem == "no-store" || cacheItem == "no-cache") { + policy.DiscardCache = true; + } + TStringBuf itemName = cacheItem.NextTok('='); + TrimEnd(itemName, ' '); + TrimBegin(cacheItem, ' '); + if (itemName == "max-age") { + policy.TimeToRefresh = policy.TimeToExpire = TDuration::Seconds(FromString(cacheItem)); + } + if (itemName == "min-fresh") { + policy.TimeToRefresh = policy.TimeToExpire = TDuration::Seconds(FromString(cacheItem)); + } + if (itemName == "stale-if-error") { + policy.KeepOnError = true; + } + } + return policy; +} + +NActors::IActor* CreateHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy) { + return new THttpOutgoingCacheActor(httpProxyId, std::move(cachePolicy)); +} + +NActors::IActor* CreateOutgoingHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy) { + return new THttpOutgoingCacheActor(httpProxyId, std::move(cachePolicy)); +} + +NActors::IActor* CreateIncomingHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy) { + return new THttpIncomingCacheActor(httpProxyId, std::move(cachePolicy)); +} + +} diff --git a/library/cpp/actors/http/http_cache.h b/library/cpp/actors/http/http_cache.h new file mode 100644 index 0000000000..ac38bdcac8 --- /dev/null +++ b/library/cpp/actors/http/http_cache.h @@ -0,0 +1,27 @@ +#pragma once +#include <library/cpp/actors/core/actor.h> +#include "http.h" + +namespace NHttp { + +struct TCachePolicy { + TDuration TimeToExpire; + TDuration TimeToRefresh; + TDuration PaceToRefresh; + bool KeepOnError = false; + bool DiscardCache = false; + TArrayRef<TString> HeadersToCacheKey; + TArrayRef<TString> StatusesToRetry; + ui32 RetriesCount = 0; + + TCachePolicy() = default; +}; + +using TGetCachePolicy = std::function<TCachePolicy(const THttpRequest*)>; + +NActors::IActor* CreateHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy); +NActors::IActor* CreateOutgoingHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy); +NActors::IActor* CreateIncomingHttpCache(const NActors::TActorId& httpProxyId, TGetCachePolicy cachePolicy); +TCachePolicy GetDefaultCachePolicy(const THttpRequest* request, const TCachePolicy& policy = TCachePolicy()); + +} diff --git a/library/cpp/actors/http/http_config.h b/library/cpp/actors/http/http_config.h new file mode 100644 index 0000000000..faeff79449 --- /dev/null +++ b/library/cpp/actors/http/http_config.h @@ -0,0 +1,19 @@ +#pragma once +#include <util/network/sock.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/protos/services_common.pb.h> + +namespace NHttp { + +struct THttpConfig { + static constexpr NActors::NLog::EComponent HttpLog = NActorsServices::EServiceCommon::HTTP; + static constexpr size_t BUFFER_SIZE = 64 * 1024; + static constexpr size_t BUFFER_MIN_STEP = 10 * 1024; + static constexpr int LISTEN_QUEUE = 10; + static constexpr TDuration SOCKET_TIMEOUT = TDuration::MilliSeconds(60000); + static constexpr TDuration CONNECTION_TIMEOUT = TDuration::MilliSeconds(60000); + using SocketType = TInet6StreamSocket; + using SocketAddressType = TSockAddrInet6; +}; + +} diff --git a/library/cpp/actors/http/http_proxy.cpp b/library/cpp/actors/http/http_proxy.cpp new file mode 100644 index 0000000000..36c6855d93 --- /dev/null +++ b/library/cpp/actors/http/http_proxy.cpp @@ -0,0 +1,314 @@ +#include <library/cpp/actors/core/events.h> +#include <library/cpp/monlib/metrics/metric_registry.h> +#include "http_proxy.h" + +namespace NHttp { + +class THttpProxy : public NActors::TActorBootstrapped<THttpProxy>, public THttpConfig { +public: + IActor* AddListeningPort(TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) { + IActor* listeningSocket = CreateHttpAcceptorActor(ctx.SelfID, Poller); + TActorId acceptorId = ctx.Register(listeningSocket); + ctx.Send(event->Forward(acceptorId)); + Acceptors.emplace_back(acceptorId); + return listeningSocket; + } + + IActor* AddOutgoingConnection(const TString& address, bool secure, const NActors::TActorContext& ctx) { + IActor* connectionSocket = CreateOutgoingConnectionActor(ctx.SelfID, address, secure, Poller); + TActorId connectionId = ctx.Register(connectionSocket); + Connections.emplace(connectionId); + return connectionSocket; + } + + void Bootstrap(const NActors::TActorContext& ctx) { + Poller = ctx.Register(NActors::CreatePollerActor()); + Become(&THttpProxy::StateWork); + } + + THttpProxy(NMonitoring::TMetricRegistry& sensors) + : Sensors(sensors) + {} + +protected: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvHttpProxy::TEvAddListeningPort, Handle); + HFunc(TEvHttpProxy::TEvRegisterHandler, Handle); + HFunc(TEvHttpProxy::TEvHttpIncomingRequest, Handle); + HFunc(TEvHttpProxy::TEvHttpOutgoingRequest, Handle); + HFunc(TEvHttpProxy::TEvHttpIncomingResponse, Handle); + HFunc(TEvHttpProxy::TEvHttpOutgoingResponse, Handle); + HFunc(TEvHttpProxy::TEvHttpAcceptorClosed, Handle); + HFunc(TEvHttpProxy::TEvHttpConnectionClosed, Handle); + HFunc(TEvHttpProxy::TEvResolveHostRequest, Handle); + HFunc(TEvHttpProxy::TEvReportSensors, Handle); + HFunc(NActors::TEvents::TEvPoison, Handle); + } + } + + void PassAway() override { + Send(Poller, new NActors::TEvents::TEvPoisonPill()); + for (const NActors::TActorId& connection : Connections) { + Send(connection, new NActors::TEvents::TEvPoisonPill()); + } + for (const NActors::TActorId& acceptor : Acceptors) { + Send(acceptor, new NActors::TEvents::TEvPoisonPill()); + } + NActors::TActorBootstrapped<THttpProxy>::PassAway(); + } + + void Handle(TEvHttpProxy::TEvHttpIncomingRequest::TPtr event, const NActors::TActorContext& ctx) { + TStringBuf url = event->Get()->Request->URL.Before('?'); + THashMap<TString, TActorId>::iterator it; + while (!url.empty()) { + it = Handlers.find(url); + if (it != Handlers.end()) { + ctx.Send(event->Forward(it->second)); + return; + } else { + if (url.EndsWith('/')) { + url.Trunc(url.size() - 1); + } + size_t pos = url.rfind('/'); + if (pos == TStringBuf::npos) { + break; + } else { + url = url.substr(0, pos + 1); + } + } + } + ctx.Send(event->Sender, new TEvHttpProxy::TEvHttpOutgoingResponse(event->Get()->Request->CreateResponseNotFound())); + } + + void Handle(TEvHttpProxy::TEvHttpIncomingResponse::TPtr event, const NActors::TActorContext& ctx) { + Y_UNUSED(event); + Y_UNUSED(ctx); + Y_FAIL("This event shouldn't be there, it should go to the http connection owner directly"); + } + + void Handle(TEvHttpProxy::TEvHttpOutgoingResponse::TPtr event, const NActors::TActorContext& ctx) { + Y_UNUSED(event); + Y_UNUSED(ctx); + Y_FAIL("This event shouldn't be there, it should go to the http connection directly"); + } + + void Handle(TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) { + TStringBuf host(event->Get()->Request->Host); + bool secure(event->Get()->Request->Secure); + NActors::IActor* actor = AddOutgoingConnection(TString(host), secure, ctx); + ctx.Send(event->Forward(actor->SelfId())); + } + + void Handle(TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) { + AddListeningPort(event, ctx); + } + + void Handle(TEvHttpProxy::TEvHttpAcceptorClosed::TPtr event, const NActors::TActorContext&) { + for (auto it = Acceptors.begin(); it != Acceptors.end(); ++it) { + if (*it == event->Get()->ConnectionID) { + Acceptors.erase(it); + break; + } + } + } + + void Handle(TEvHttpProxy::TEvHttpConnectionClosed::TPtr event, const NActors::TActorContext&) { + Connections.erase(event->Get()->ConnectionID); + } + + void Handle(TEvHttpProxy::TEvRegisterHandler::TPtr event, const NActors::TActorContext&) { + Handlers[event->Get()->Path] = event->Get()->Handler; + } + + void Handle(TEvHttpProxy::TEvResolveHostRequest::TPtr event, const NActors::TActorContext& ctx) { + const TString& host(event->Get()->Host); + auto it = Hosts.find(host); + if (it == Hosts.end() || it->second.DeadlineTime > ctx.Now()) { + TString addressPart; + TIpPort portPart = 0; + CrackAddress(host, addressPart, portPart); + if (IsIPv6(addressPart)) { + TSockAddrInet6 address(addressPart.c_str(), portPart); + if (it == Hosts.end()) { + it = Hosts.emplace(host, THostEntry()).first; + } + it->second.Address = address; + it->second.DeadlineTime = ctx.Now() + HostsTimeToLive; + } else { + // TODO(xenoxeno): move to another, possible blocking actor + try { + const NDns::TResolvedHost* result = NDns::CachedResolve(NDns::TResolveInfo(addressPart, portPart)); + if (result != nullptr) { + auto pAddr = result->Addr.Begin(); + while (pAddr != result->Addr.End() && pAddr->ai_family != AF_INET6) { + ++pAddr; + } + if (pAddr == result->Addr.End()) { + ctx.Send(event->Sender, new TEvHttpProxy::TEvResolveHostResponse("Invalid address family resolved")); + return; + } + TSockAddrInet6 address = {}; + static_cast<sockaddr_in6&>(address) = *reinterpret_cast<sockaddr_in6*>(pAddr->ai_addr); + LOG_DEBUG_S(ctx, HttpLog, "Host " << host << " resolved to " << address.ToString()); + if (it == Hosts.end()) { + it = Hosts.emplace(host, THostEntry()).first; + } + it->second.Address = address; + it->second.DeadlineTime = ctx.Now() + HostsTimeToLive; + } else { + ctx.Send(event->Sender, new TEvHttpProxy::TEvResolveHostResponse("Error resolving host")); + return; + } + } + catch (const yexception& e) { + ctx.Send(event->Sender, new TEvHttpProxy::TEvResolveHostResponse(e.what())); + return; + } + } + } + ctx.Send(event->Sender, new TEvHttpProxy::TEvResolveHostResponse(it->first, it->second.Address)); + } + + void Handle(TEvHttpProxy::TEvReportSensors::TPtr event, const NActors::TActorContext&) { + const TEvHttpProxy::TEvReportSensors& sensors(*event->Get()); + const static TString urlNotFound = "not-found"; + const TString& url = (sensors.Status == "404" ? urlNotFound : sensors.Url); + + Sensors.Rate({ + {"sensor", "count"}, + {"direction", sensors.Direction}, + {"peer", sensors.Host}, + {"url", url}, + {"status", sensors.Status} + })->Inc(); + Sensors.HistogramRate({ + {"sensor", "time_us"}, + {"direction", sensors.Direction}, + {"peer", sensors.Host}, + {"url", url}, + {"status", sensors.Status} + }, + NMonitoring::ExplicitHistogram({1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 30000, 60000}))->Record(sensors.Time.MicroSeconds()); + Sensors.HistogramRate({ + {"sensor", "time_ms"}, + {"direction", sensors.Direction}, + {"peer", sensors.Host}, + {"url", url}, + {"status", sensors.Status} + }, + NMonitoring::ExplicitHistogram({1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 30000, 60000}))->Record(sensors.Time.MilliSeconds()); + } + + void Handle(NActors::TEvents::TEvPoison::TPtr, const NActors::TActorContext&) { + PassAway(); + } + + NActors::TActorId Poller; + TVector<TActorId> Acceptors; + + struct THostEntry { + TSockAddrInet6 Address; + TInstant DeadlineTime; + }; + + static constexpr TDuration HostsTimeToLive = TDuration::Seconds(60); + + THashMap<TString, THostEntry> Hosts; + THashMap<TString, TActorId> Handlers; + THashSet<TActorId> Connections; // outgoing + NMonitoring::TMetricRegistry& Sensors; +}; + +TEvHttpProxy::TEvReportSensors* BuildOutgoingRequestSensors(const THttpOutgoingRequestPtr& request, const THttpIncomingResponsePtr& response) { + return new TEvHttpProxy::TEvReportSensors( + "out", + request->Host, + request->URL.Before('?'), + response ? response->Status : "504", + TDuration::Seconds(std::abs(request->Timer.Passed())) + ); +} + +TEvHttpProxy::TEvReportSensors* BuildIncomingRequestSensors(const THttpIncomingRequestPtr& request, const THttpOutgoingResponsePtr& response) { + return new TEvHttpProxy::TEvReportSensors( + "in", + request->Host, + request->URL.Before('?'), + response->Status, + TDuration::Seconds(std::abs(request->Timer.Passed())) + ); +} + +NActors::IActor* CreateHttpProxy(NMonitoring::TMetricRegistry& sensors) { + return new THttpProxy(sensors); +} + +bool IsIPv6(const TString& host) { + return host.find_first_not_of(":0123456789abcdef") == TString::npos; +} + +bool CrackURL(TStringBuf url, TStringBuf& scheme, TStringBuf& host, TStringBuf& uri) { + url.TrySplit("://", scheme, url); + auto pos = url.find('/'); + if (pos == TStringBuf::npos) { + host = url; + } else { + host = url.substr(0, pos); + uri = url.substr(pos); + } + return true; +} + +void CrackAddress(const TString& address, TString& hostname, TIpPort& port) { + size_t first_colon_pos = address.find(':'); + if (first_colon_pos != TString::npos) { + size_t last_colon_pos = address.rfind(':'); + if (last_colon_pos == first_colon_pos) { + // only one colon, simple case + port = FromStringWithDefault<TIpPort>(address.substr(first_colon_pos + 1), 0); + hostname = address.substr(0, first_colon_pos); + } else { + // ipv6? + size_t closing_bracket_pos = address.rfind(']'); + if (closing_bracket_pos == TString::npos || closing_bracket_pos > last_colon_pos) { + // whole address is ipv6 host + hostname = address; + } else { + port = FromStringWithDefault<TIpPort>(address.substr(last_colon_pos + 1), 0); + hostname = address.substr(0, last_colon_pos); + } + if (hostname.StartsWith('[') && hostname.EndsWith(']')) { + hostname = hostname.substr(1, hostname.size() - 2); + } + } + } else { + hostname = address; + } +} + + +void TrimBegin(TStringBuf& target, char delim) { + while (!target.empty() && *target.begin() == delim) { + target.Skip(1); + } +} + +void TrimEnd(TStringBuf& target, char delim) { + while (!target.empty() && target.back() == delim) { + target.Trunc(target.size() - 1); + } +} + +void Trim(TStringBuf& target, char delim) { + TrimBegin(target, delim); + TrimEnd(target, delim); +} + +void TrimEnd(TString& target, char delim) { + while (!target.empty() && target.back() == delim) { + target.resize(target.size() - 1); + } +} + +} diff --git a/library/cpp/actors/http/http_proxy.h b/library/cpp/actors/http/http_proxy.h new file mode 100644 index 0000000000..afd0170997 --- /dev/null +++ b/library/cpp/actors/http/http_proxy.h @@ -0,0 +1,239 @@ +#pragma once +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/event_local.h> +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/interconnect/poller_actor.h> +#include <library/cpp/dns/cache.h> +#include <library/cpp/monlib/metrics/metric_registry.h> +#include <util/generic/variant.h> +#include "http.h" +#include "http_proxy_ssl.h" + +namespace NHttp { + +struct TSocketDescriptor : NActors::TSharedDescriptor, THttpConfig { + SocketType Socket; + + int GetDescriptor() override { + return static_cast<SOCKET>(Socket); + } +}; + +struct TEvHttpProxy { + enum EEv { + EvAddListeningPort = EventSpaceBegin(NActors::TEvents::ES_HTTP), + EvConfirmListen, + EvRegisterHandler, + EvHttpIncomingRequest, + EvHttpOutgoingRequest, + EvHttpIncomingResponse, + EvHttpOutgoingResponse, + EvHttpConnectionOpened, + EvHttpConnectionClosed, + EvHttpAcceptorClosed, + EvResolveHostRequest, + EvResolveHostResponse, + EvReportSensors, + EvEnd + }; + + static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_HTTP), "ES_HTTP event space is too small."); + + struct TEvAddListeningPort : NActors::TEventLocal<TEvAddListeningPort, EvAddListeningPort> { + TIpPort Port; + TString WorkerName; + bool Secure = false; + TString CertificateFile; + TString PrivateKeyFile; + TString SslCertificatePem; + + TEvAddListeningPort(TIpPort port) + : Port(port) + {} + + TEvAddListeningPort(TIpPort port, const TString& workerName) + : Port(port) + , WorkerName(workerName) + {} + }; + + struct TEvConfirmListen : NActors::TEventLocal<TEvConfirmListen, EvConfirmListen> { + THttpConfig::SocketAddressType Address; + + TEvConfirmListen(const THttpConfig::SocketAddressType& address) + : Address(address) + {} + }; + + struct TEvRegisterHandler : NActors::TEventLocal<TEvRegisterHandler, EvRegisterHandler> { + TString Path; + TActorId Handler; + + TEvRegisterHandler(const TString& path, const TActorId& handler) + : Path(path) + , Handler(handler) + {} + }; + + struct TEvHttpIncomingRequest : NActors::TEventLocal<TEvHttpIncomingRequest, EvHttpIncomingRequest> { + THttpIncomingRequestPtr Request; + + TEvHttpIncomingRequest(THttpIncomingRequestPtr request) + : Request(std::move(request)) + {} + }; + + struct TEvHttpOutgoingRequest : NActors::TEventLocal<TEvHttpOutgoingRequest, EvHttpOutgoingRequest> { + THttpOutgoingRequestPtr Request; + TDuration Timeout; + + TEvHttpOutgoingRequest(THttpOutgoingRequestPtr request) + : Request(std::move(request)) + {} + + TEvHttpOutgoingRequest(THttpOutgoingRequestPtr request, TDuration timeout) + : Request(std::move(request)) + , Timeout(timeout) + {} + }; + + struct TEvHttpIncomingResponse : NActors::TEventLocal<TEvHttpIncomingResponse, EvHttpIncomingResponse> { + THttpOutgoingRequestPtr Request; + THttpIncomingResponsePtr Response; + TString Error; + + TEvHttpIncomingResponse(THttpOutgoingRequestPtr request, THttpIncomingResponsePtr response, const TString& error) + : Request(std::move(request)) + , Response(std::move(response)) + , Error(error) + {} + + TEvHttpIncomingResponse(THttpOutgoingRequestPtr request, THttpIncomingResponsePtr response) + : Request(std::move(request)) + , Response(std::move(response)) + {} + + TString GetError() const { + TStringBuilder error; + if (Response != nullptr && !Response->Status.StartsWith('2')) { + error << Response->Status << ' ' << Response->Message; + } + if (!Error.empty()) { + if (!error.empty()) { + error << ';'; + } + error << Error; + } + return error; + } + }; + + struct TEvHttpOutgoingResponse : NActors::TEventLocal<TEvHttpOutgoingResponse, EvHttpOutgoingResponse> { + THttpOutgoingResponsePtr Response; + + TEvHttpOutgoingResponse(THttpOutgoingResponsePtr response) + : Response(std::move(response)) + {} + }; + + struct TEvHttpConnectionOpened : NActors::TEventLocal<TEvHttpConnectionOpened, EvHttpConnectionOpened> { + TString PeerAddress; + TActorId ConnectionID; + + TEvHttpConnectionOpened(const TString& peerAddress, const TActorId& connectionID) + : PeerAddress(peerAddress) + , ConnectionID(connectionID) + {} + }; + + struct TEvHttpConnectionClosed : NActors::TEventLocal<TEvHttpConnectionClosed, EvHttpConnectionClosed> { + TActorId ConnectionID; + TDeque<THttpIncomingRequestPtr> RecycledRequests; + + TEvHttpConnectionClosed(const TActorId& connectionID) + : ConnectionID(connectionID) + {} + + TEvHttpConnectionClosed(const TActorId& connectionID, TDeque<THttpIncomingRequestPtr> recycledRequests) + : ConnectionID(connectionID) + , RecycledRequests(std::move(recycledRequests)) + {} + }; + + struct TEvHttpAcceptorClosed : NActors::TEventLocal<TEvHttpAcceptorClosed, EvHttpAcceptorClosed> { + TActorId ConnectionID; + + TEvHttpAcceptorClosed(const TActorId& connectionID) + : ConnectionID(connectionID) + {} + }; + + struct TEvResolveHostRequest : NActors::TEventLocal<TEvResolveHostRequest, EvResolveHostRequest> { + TString Host; + + TEvResolveHostRequest(const TString& host) + : Host(host) + {} + }; + + struct TEvResolveHostResponse : NActors::TEventLocal<TEvResolveHostResponse, EvResolveHostResponse> { + TString Host; + TSockAddrInet6 Address; + TString Error; + + TEvResolveHostResponse(const TString& host, const TSockAddrInet6& address) + : Host(host) + , Address(address) + {} + + TEvResolveHostResponse(const TString& error) + : Error(error) + {} + }; + + struct TEvReportSensors : NActors::TEventLocal<TEvReportSensors, EvReportSensors> { + TString Direction; + TString Host; + TString Url; + TString Status; + TDuration Time; + + TEvReportSensors( + TStringBuf direction, + TStringBuf host, + TStringBuf url, + TStringBuf status, + TDuration time) + : Direction(direction) + , Host(host) + , Url(url) + , Status(status) + , Time(time) + {} + }; +}; + +struct TEndpointInfo { + TActorId Proxy; + TActorId Owner; + TString WorkerName; + bool Secure; + TSslHelpers::TSslHolder<SSL_CTX> SecureContext; +}; + +NActors::IActor* CreateHttpProxy(NMonitoring::TMetricRegistry& sensors); +NActors::IActor* CreateHttpAcceptorActor(const TActorId& owner, const TActorId& poller); +NActors::IActor* CreateOutgoingConnectionActor(const TActorId& owner, const TString& host, bool secure, const TActorId& poller); +NActors::IActor* CreateIncomingConnectionActor( + const TEndpointInfo& endpoint, + TIntrusivePtr<TSocketDescriptor> socket, + THttpConfig::SocketAddressType address, + THttpIncomingRequestPtr recycledRequest = nullptr); +TEvHttpProxy::TEvReportSensors* BuildOutgoingRequestSensors(const THttpOutgoingRequestPtr& request, const THttpIncomingResponsePtr& response); +TEvHttpProxy::TEvReportSensors* BuildIncomingRequestSensors(const THttpIncomingRequestPtr& request, const THttpOutgoingResponsePtr& response); + +} diff --git a/library/cpp/actors/http/http_proxy_acceptor.cpp b/library/cpp/actors/http/http_proxy_acceptor.cpp new file mode 100644 index 0000000000..9780541b71 --- /dev/null +++ b/library/cpp/actors/http/http_proxy_acceptor.cpp @@ -0,0 +1,135 @@ +#include <util/network/sock.h> +#include "http_proxy.h" +#include "http_proxy_ssl.h" + +namespace NHttp { + +class TAcceptorActor : public NActors::TActor<TAcceptorActor>, public THttpConfig { +public: + using TBase = NActors::TActor<TAcceptorActor>; + const TActorId Owner; + const TActorId Poller; + TIntrusivePtr<TSocketDescriptor> Socket; + NActors::TPollerToken::TPtr PollerToken; + THashSet<TActorId> Connections; + TDeque<THttpIncomingRequestPtr> RecycledRequests; + TEndpointInfo Endpoint; + + TAcceptorActor(const TActorId& owner, const TActorId& poller) + : NActors::TActor<TAcceptorActor>(&TAcceptorActor::StateInit) + , Owner(owner) + , Poller(poller) + , Socket(new TSocketDescriptor()) + { + // for unit tests :( + CheckedSetSockOpt(Socket->Socket, SOL_SOCKET, SO_REUSEADDR, (int)true, "reuse address"); +#ifdef SO_REUSEPORT + CheckedSetSockOpt(Socket->Socket, SOL_SOCKET, SO_REUSEPORT, (int)true, "reuse port"); +#endif + } + +protected: + STFUNC(StateListening) { + switch (ev->GetTypeRewrite()) { + HFunc(NActors::TEvPollerRegisterResult, Handle); + HFunc(NActors::TEvPollerReady, Handle); + HFunc(TEvHttpProxy::TEvHttpConnectionClosed, Handle); + HFunc(TEvHttpProxy::TEvReportSensors, Handle); + } + } + + STFUNC(StateInit) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvHttpProxy::TEvAddListeningPort, HandleInit); + } + } + + void HandleInit(TEvHttpProxy::TEvAddListeningPort::TPtr event, const NActors::TActorContext& ctx) { + SocketAddressType bindAddress("::", event->Get()->Port); + Endpoint.Owner = ctx.SelfID; + Endpoint.Proxy = Owner; + Endpoint.WorkerName = event->Get()->WorkerName; + Endpoint.Secure = event->Get()->Secure; + int err = 0; + if (Endpoint.Secure) { + if (!event->Get()->SslCertificatePem.empty()) { + Endpoint.SecureContext = TSslHelpers::CreateServerContext(event->Get()->SslCertificatePem); + } else { + Endpoint.SecureContext = TSslHelpers::CreateServerContext(event->Get()->CertificateFile, event->Get()->PrivateKeyFile); + } + if (Endpoint.SecureContext == nullptr) { + err = -1; + LOG_WARN_S(ctx, HttpLog, "Failed to construct server security context"); + } + } + if (err == 0) { + err = Socket->Socket.Bind(&bindAddress); + } + if (err == 0) { + err = Socket->Socket.Listen(LISTEN_QUEUE); + if (err == 0) { + LOG_INFO_S(ctx, HttpLog, "Listening on " << bindAddress.ToString()); + SetNonBlock(Socket->Socket); + ctx.Send(Poller, new NActors::TEvPollerRegister(Socket, SelfId(), SelfId())); + TBase::Become(&TAcceptorActor::StateListening); + ctx.Send(event->Sender, new TEvHttpProxy::TEvConfirmListen(bindAddress), 0, event->Cookie); + return; + } + } + LOG_WARN_S(ctx, HttpLog, "Failed to listen on " << bindAddress.ToString() << " - retrying..."); + ctx.ExecutorThread.Schedule(TDuration::Seconds(1), event.Release()); + } + + void Die(const NActors::TActorContext& ctx) override { + ctx.Send(Owner, new TEvHttpProxy::TEvHttpAcceptorClosed(ctx.SelfID)); + for (const NActors::TActorId& connection : Connections) { + ctx.Send(connection, new NActors::TEvents::TEvPoisonPill()); + } + } + + void Handle(NActors::TEvPollerRegisterResult::TPtr ev, const NActors::TActorContext& /*ctx*/) { + PollerToken = std::move(ev->Get()->PollerToken); + PollerToken->Request(true, false); // request read polling + } + + void Handle(NActors::TEvPollerReady::TPtr, const NActors::TActorContext& ctx) { + TIntrusivePtr<TSocketDescriptor> socket = new TSocketDescriptor(); + SocketAddressType addr; + int err; + while ((err = Socket->Socket.Accept(&socket->Socket, &addr)) == 0) { + NActors::IActor* connectionSocket = nullptr; + if (RecycledRequests.empty()) { + connectionSocket = CreateIncomingConnectionActor(Endpoint, socket, addr); + } else { + connectionSocket = CreateIncomingConnectionActor(Endpoint, socket, addr, std::move(RecycledRequests.front())); + RecycledRequests.pop_front(); + } + NActors::TActorId connectionId = ctx.Register(connectionSocket); + ctx.Send(Poller, new NActors::TEvPollerRegister(socket, connectionId, connectionId)); + Connections.emplace(connectionId); + socket = new TSocketDescriptor(); + } + if (err == -EAGAIN || err == -EWOULDBLOCK) { // request poller for further connection polling + Y_VERIFY(PollerToken); + PollerToken->Request(true, false); + } + } + + void Handle(TEvHttpProxy::TEvHttpConnectionClosed::TPtr event, const NActors::TActorContext&) { + Connections.erase(event->Get()->ConnectionID); + for (auto& req : event->Get()->RecycledRequests) { + req->Clear(); + RecycledRequests.push_back(std::move(req)); + } + } + + void Handle(TEvHttpProxy::TEvReportSensors::TPtr event, const NActors::TActorContext& ctx) { + ctx.Send(event->Forward(Owner)); + } +}; + +NActors::IActor* CreateHttpAcceptorActor(const TActorId& owner, const TActorId& poller) { + return new TAcceptorActor(owner, poller); +} + +} diff --git a/library/cpp/actors/http/http_proxy_incoming.cpp b/library/cpp/actors/http/http_proxy_incoming.cpp new file mode 100644 index 0000000000..80fe2af53d --- /dev/null +++ b/library/cpp/actors/http/http_proxy_incoming.cpp @@ -0,0 +1,302 @@ +#include "http_proxy.h" +#include "http_proxy_sock_impl.h" + +namespace NHttp { + +using namespace NActors; + +template <typename TSocketImpl> +class TIncomingConnectionActor : public TActor<TIncomingConnectionActor<TSocketImpl>>, public TSocketImpl, virtual public THttpConfig { +public: + using TBase = TActor<TIncomingConnectionActor<TSocketImpl>>; + static constexpr bool RecycleRequests = true; + + const TEndpointInfo& Endpoint; + SocketAddressType Address; + TList<THttpIncomingRequestPtr> Requests; + THashMap<THttpIncomingRequestPtr, THttpOutgoingResponsePtr> Responses; + THttpIncomingRequestPtr CurrentRequest; + THttpOutgoingResponsePtr CurrentResponse; + TDeque<THttpIncomingRequestPtr> RecycledRequests; + + THPTimer InactivityTimer; + static constexpr TDuration InactivityTimeout = TDuration::Minutes(2); + TEvPollerReady* InactivityEvent = nullptr; + + TPollerToken::TPtr PollerToken; + + TIncomingConnectionActor( + const TEndpointInfo& endpoint, + TIntrusivePtr<TSocketDescriptor> socket, + SocketAddressType address, + THttpIncomingRequestPtr recycledRequest = nullptr) + : TBase(&TIncomingConnectionActor::StateAccepting) + , TSocketImpl(std::move(socket)) + , Endpoint(endpoint) + , Address(address) + { + if (recycledRequest != nullptr) { + RecycledRequests.emplace_back(std::move(recycledRequest)); + } + TSocketImpl::SetNonBlock(); + } + + void CleanupRequest(THttpIncomingRequestPtr& request) { + if (RecycleRequests) { + request->Clear(); + RecycledRequests.push_back(std::move(request)); + } else { + request = nullptr; + } + } + + void CleanupResponse(THttpOutgoingResponsePtr& response) { + CleanupRequest(response->Request); + // TODO: maybe recycle too? + response = nullptr; + } + + TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parent) override { + return new IEventHandle(self, parent, new TEvents::TEvBootstrap()); + } + + void Die(const TActorContext& ctx) override { + ctx.Send(Endpoint.Owner, new TEvHttpProxy::TEvHttpConnectionClosed(ctx.SelfID, std::move(RecycledRequests))); + TSocketImpl::Shutdown(); + TBase::Die(ctx); + } + +protected: + void Bootstrap(const TActorContext& ctx) { + InactivityTimer.Reset(); + ctx.Schedule(InactivityTimeout, InactivityEvent = new TEvPollerReady(nullptr, false, false)); + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") incoming connection opened"); + OnAccept(ctx); + } + + void OnAccept(const NActors::TActorContext& ctx) { + int res; + bool read = false, write = false; + if ((res = TSocketImpl::OnAccept(Endpoint, read, write)) != 1) { + if (-res == EAGAIN) { + if (PollerToken) { + PollerToken->Request(read, write); + } + return; // wait for further notifications + } else { + LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - error in Accept: " << strerror(-res)); + return Die(ctx); + } + } + TBase::Become(&TIncomingConnectionActor::StateConnected); + ctx.Send(ctx.SelfID, new TEvPollerReady(nullptr, true, true)); + } + + void HandleAccepting(TEvPollerRegisterResult::TPtr ev, const NActors::TActorContext& ctx) { + PollerToken = std::move(ev->Get()->PollerToken); + OnAccept(ctx); + } + + void HandleAccepting(NActors::TEvPollerReady::TPtr, const NActors::TActorContext& ctx) { + OnAccept(ctx); + } + + void HandleConnected(TEvPollerReady::TPtr event, const TActorContext& ctx) { + if (event->Get()->Read) { + for (;;) { + if (CurrentRequest == nullptr) { + if (RecycleRequests && !RecycledRequests.empty()) { + CurrentRequest = std::move(RecycledRequests.front()); + RecycledRequests.pop_front(); + } else { + CurrentRequest = new THttpIncomingRequest(); + } + CurrentRequest->Address = Address; + CurrentRequest->WorkerName = Endpoint.WorkerName; + CurrentRequest->Secure = Endpoint.Secure; + } + if (!CurrentRequest->EnsureEnoughSpaceAvailable()) { + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - not enough space available"); + return Die(ctx); + } + ssize_t need = CurrentRequest->Avail(); + bool read = false, write = false; + ssize_t res = TSocketImpl::Recv(CurrentRequest->Pos(), need, read, write); + if (res > 0) { + InactivityTimer.Reset(); + CurrentRequest->Advance(res); + if (CurrentRequest->IsDone()) { + Requests.emplace_back(CurrentRequest); + CurrentRequest->Timer.Reset(); + if (CurrentRequest->IsReady()) { + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") -> (" << CurrentRequest->Method << " " << CurrentRequest->URL << ")"); + ctx.Send(Endpoint.Proxy, new TEvHttpProxy::TEvHttpIncomingRequest(CurrentRequest)); + CurrentRequest = nullptr; + } else if (CurrentRequest->IsError()) { + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") -! (" << CurrentRequest->Method << " " << CurrentRequest->URL << ")"); + bool success = Respond(CurrentRequest->CreateResponseBadRequest(), ctx); + if (!success) { + return; + } + CurrentRequest = nullptr; + } + } + } else if (-res == EAGAIN || -res == EWOULDBLOCK) { + if (PollerToken) { + if (!read && !write) { + read = true; + } + PollerToken->Request(read, write); + } + break; + } else if (-res == EINTR) { + continue; + } else if (!res) { + // connection closed + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed"); + return Die(ctx); + } else { + LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - error in Receive: " << strerror(-res)); + return Die(ctx); + } + } + if (event->Get() == InactivityEvent) { + const TDuration passed = TDuration::Seconds(std::abs(InactivityTimer.Passed())); + if (passed >= InactivityTimeout) { + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed by inactivity timeout"); + return Die(ctx); // timeout + } else { + ctx.Schedule(InactivityTimeout - passed, InactivityEvent = new TEvPollerReady(nullptr, false, false)); + } + } + } + if (event->Get()->Write) { + FlushOutput(ctx); + } + } + + void HandleConnected(TEvPollerRegisterResult::TPtr ev, const TActorContext& /*ctx*/) { + PollerToken = std::move(ev->Get()->PollerToken); + PollerToken->Request(true, true); + } + + void HandleConnected(TEvHttpProxy::TEvHttpOutgoingResponse::TPtr event, const TActorContext& ctx) { + Respond(event->Get()->Response, ctx); + } + + bool Respond(THttpOutgoingResponsePtr response, const TActorContext& ctx) { + THttpIncomingRequestPtr request = response->GetRequest(); + response->Finish(); + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") <- (" << response->Status << " " << response->Message << ")"); + if (response->Status != "200" && response->Status != "404") { + static constexpr size_t MAX_LOGGED_SIZE = 1024; + LOG_DEBUG_S(ctx, HttpLog, + "(#" + << TSocketImpl::GetRawSocket() + << "," + << Address + << ") Request: " + << request->GetObfuscatedData().substr(0, MAX_LOGGED_SIZE)); + LOG_DEBUG_S(ctx, HttpLog, + "(#" + << TSocketImpl::GetRawSocket() + << "," + << Address + << ") Response: " + << TString(response->GetRawData()).substr(0, MAX_LOGGED_SIZE)); + } + THolder<TEvHttpProxy::TEvReportSensors> sensors(BuildIncomingRequestSensors(request, response)); + ctx.Send(Endpoint.Owner, sensors.Release()); + if (request == Requests.front() && CurrentResponse == nullptr) { + CurrentResponse = response; + return FlushOutput(ctx); + } else { + // we are ahead of our pipeline + Responses.emplace(request, response); + return true; + } + } + + bool FlushOutput(const TActorContext& ctx) { + while (CurrentResponse != nullptr) { + size_t size = CurrentResponse->Size(); + if (size == 0) { + Y_VERIFY(Requests.front() == CurrentResponse->GetRequest()); + bool close = CurrentResponse->IsConnectionClose(); + Requests.pop_front(); + CleanupResponse(CurrentResponse); + if (!Requests.empty()) { + auto it = Responses.find(Requests.front()); + if (it != Responses.end()) { + CurrentResponse = it->second; + Responses.erase(it); + continue; + } else { + LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - FlushOutput request not found"); + Die(ctx); + return false; + } + } else { + if (close) { + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed"); + Die(ctx); + return false; + } else { + continue; + } + } + } + bool read = false, write = false; + ssize_t res = TSocketImpl::Send(CurrentResponse->Data(), size, read, write); + if (res > 0) { + CurrentResponse->ChopHead(res); + } else if (-res == EINTR) { + continue; + } else if (-res == EAGAIN || -res == EWOULDBLOCK) { + if (PollerToken) { + if (!read && !write) { + write = true; + } + PollerToken->Request(read, write); + } + break; + } else { + CleanupResponse(CurrentResponse); + LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed - error in FlushOutput: " << strerror(-res)); + Die(ctx); + return false; + } + } + return true; + } + + STFUNC(StateAccepting) { + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TEvBootstrap::EventType, Bootstrap); + HFunc(TEvPollerReady, HandleAccepting); + HFunc(TEvPollerRegisterResult, HandleAccepting); + } + } + + STFUNC(StateConnected) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvPollerReady, HandleConnected); + HFunc(TEvHttpProxy::TEvHttpOutgoingResponse, HandleConnected); + HFunc(TEvPollerRegisterResult, HandleConnected); + } + } +}; + +IActor* CreateIncomingConnectionActor( + const TEndpointInfo& endpoint, + TIntrusivePtr<TSocketDescriptor> socket, + THttpConfig::SocketAddressType address, + THttpIncomingRequestPtr recycledRequest) { + if (endpoint.Secure) { + return new TIncomingConnectionActor<TSecureSocketImpl>(endpoint, std::move(socket), address, std::move(recycledRequest)); + } else { + return new TIncomingConnectionActor<TPlainSocketImpl>(endpoint, std::move(socket), address, std::move(recycledRequest)); + } +} + +} diff --git a/library/cpp/actors/http/http_proxy_outgoing.cpp b/library/cpp/actors/http/http_proxy_outgoing.cpp new file mode 100644 index 0000000000..d9189dba8a --- /dev/null +++ b/library/cpp/actors/http/http_proxy_outgoing.cpp @@ -0,0 +1,298 @@ +#include "http_proxy.h" +#include "http_proxy_sock_impl.h" + +namespace NHttp { + +template <typename TSocketImpl> +class TOutgoingConnectionActor : public NActors::TActor<TOutgoingConnectionActor<TSocketImpl>>, public TSocketImpl, virtual public THttpConfig { +public: + using TBase = NActors::TActor<TOutgoingConnectionActor<TSocketImpl>>; + using TSelf = TOutgoingConnectionActor<TSocketImpl>; + const TActorId Owner; + const TActorId Poller; + SocketAddressType Address; + TString Host; + TActorId RequestOwner; + THttpOutgoingRequestPtr Request; + THttpIncomingResponsePtr Response; + TInstant LastActivity; + TDuration ConnectionTimeout = CONNECTION_TIMEOUT; + NActors::TPollerToken::TPtr PollerToken; + + TOutgoingConnectionActor(const TActorId& owner, const TString& host, const TActorId& poller) + : TBase(&TSelf::StateWaiting) + , Owner(owner) + , Poller(poller) + , Host(host) + { + TSocketImpl::SetNonBlock(); + TSocketImpl::SetTimeout(SOCKET_TIMEOUT); + } + + void Die(const NActors::TActorContext& ctx) override { + ctx.Send(Owner, new TEvHttpProxy::TEvHttpConnectionClosed(ctx.SelfID)); + TSocketImpl::Shutdown(); // to avoid errors when connection already closed + TBase::Die(ctx); + } + + void ReplyAndDie(const NActors::TActorContext& ctx) { + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") -> (" << Response->Status << " " << Response->Message << ")"); + ctx.Send(RequestOwner, new TEvHttpProxy::TEvHttpIncomingResponse(Request, Response)); + RequestOwner = TActorId(); + THolder<TEvHttpProxy::TEvReportSensors> sensors(BuildOutgoingRequestSensors(Request, Response)); + ctx.Send(Owner, sensors.Release()); + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed"); + Die(ctx); + } + + void ReplyErrorAndDie(const NActors::TActorContext& ctx, const TString& error) { + LOG_ERROR_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connection closed with error: " << error); + if (RequestOwner) { + ctx.Send(RequestOwner, new TEvHttpProxy::TEvHttpIncomingResponse(Request, Response, error)); + RequestOwner = TActorId(); + THolder<TEvHttpProxy::TEvReportSensors> sensors(BuildOutgoingRequestSensors(Request, Response)); + ctx.Send(Owner, sensors.Release()); + Die(ctx); + } + } + +protected: + void FailConnection(const NActors::TActorContext& ctx, const TString& error) { + if (Request) { + return ReplyErrorAndDie(ctx, error); + } + return TBase::Become(&TOutgoingConnectionActor::StateFailed); + } + + void Connect(const NActors::TActorContext& ctx) { + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") connecting"); + int res = TSocketImpl::Connect(Address); + RegisterPoller(ctx); + switch (-res) { + case 0: + return OnConnect(ctx); + case EINPROGRESS: + case EAGAIN: + return TBase::Become(&TOutgoingConnectionActor::StateConnecting); + default: + return ReplyErrorAndDie(ctx, strerror(-res)); + } + } + + void FlushOutput(const NActors::TActorContext& ctx) { + if (Request != nullptr) { + Request->Finish(); + while (auto size = Request->Size()) { + bool read = false, write = false; + ssize_t res = TSocketImpl::Send(Request->Data(), size, read, write); + if (res > 0) { + Request->ChopHead(res); + } else if (-res == EINTR) { + continue; + } else if (-res == EAGAIN || -res == EWOULDBLOCK) { + if (PollerToken) { + if (!read && !write) { + write = true; + } + PollerToken->Request(read, write); + } + break; + } else { + if (!res) { + ReplyAndDie(ctx); + } else { + ReplyErrorAndDie(ctx, strerror(-res)); + } + break; + } + } + } + } + + void PullInput(const NActors::TActorContext& ctx) { + for (;;) { + if (Response == nullptr) { + Response = new THttpIncomingResponse(Request); + } + if (!Response->EnsureEnoughSpaceAvailable()) { + return ReplyErrorAndDie(ctx, "Not enough space in socket buffer"); + } + bool read = false, write = false; + ssize_t res = TSocketImpl::Recv(Response->Pos(), Response->Avail(), read, write); + if (res > 0) { + Response->Advance(res); + if (Response->IsDone() && Response->IsReady()) { + return ReplyAndDie(ctx); + } + } else if (-res == EINTR) { + continue; + } else if (-res == EAGAIN || -res == EWOULDBLOCK) { + if (PollerToken) { + if (!read && !write) { + read = true; + } + PollerToken->Request(read, write); + } + return; + } else { + if (!res) { + Response->ConnectionClosed(); + } + if (Response->IsDone() && Response->IsReady()) { + return ReplyAndDie(ctx); + } + return ReplyErrorAndDie(ctx, strerror(-res)); + } + } + } + + void RegisterPoller(const NActors::TActorContext& ctx) { + ctx.Send(Poller, new NActors::TEvPollerRegister(TSocketImpl::Socket, ctx.SelfID, ctx.SelfID)); + } + + void OnConnect(const NActors::TActorContext& ctx) { + bool read = false, write = false; + if (int res = TSocketImpl::OnConnect(read, write); res != 1) { + if (-res == EAGAIN) { + if (PollerToken) { + PollerToken->Request(read, write); + } + return; + } else { + return ReplyErrorAndDie(ctx, strerror(-res)); + } + } + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") outgoing connection opened"); + TBase::Become(&TOutgoingConnectionActor::StateConnected); + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << "," << Address << ") <- (" << Request->Method << " " << Request->URL << ")"); + ctx.Send(ctx.SelfID, new NActors::TEvPollerReady(nullptr, true, true)); + } + + void HandleResolving(TEvHttpProxy::TEvResolveHostResponse::TPtr event, const NActors::TActorContext& ctx) { + LastActivity = ctx.Now(); + if (!event->Get()->Error.empty()) { + return FailConnection(ctx, event->Get()->Error); + } + Address = event->Get()->Address; + if (Address.GetPort() == 0) { + Address.SetPort(Request->Secure ? 443 : 80); + } + Connect(ctx); + } + + void HandleConnecting(NActors::TEvPollerReady::TPtr, const NActors::TActorContext& ctx) { + LastActivity = ctx.Now(); + int res = TSocketImpl::GetError(); + if (res == 0) { + OnConnect(ctx); + } else { + FailConnection(ctx, TStringBuilder() << strerror(res)); + } + } + + void HandleConnecting(NActors::TEvPollerRegisterResult::TPtr ev, const NActors::TActorContext& ctx) { + PollerToken = std::move(ev->Get()->PollerToken); + LastActivity = ctx.Now(); + int res = TSocketImpl::GetError(); + if (res == 0) { + OnConnect(ctx); + } else { + FailConnection(ctx, TStringBuilder() << strerror(res)); + } + } + + void HandleWaiting(TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) { + LastActivity = ctx.Now(); + Request = std::move(event->Get()->Request); + Host = Request->Host; + LOG_DEBUG_S(ctx, HttpLog, "(#" << TSocketImpl::GetRawSocket() << ") resolving " << Host); + Request->Timer.Reset(); + RequestOwner = event->Sender; + ctx.Send(Owner, new TEvHttpProxy::TEvResolveHostRequest(Host)); + if (event->Get()->Timeout) { + ConnectionTimeout = event->Get()->Timeout; + TSocketImpl::SetTimeout(ConnectionTimeout); + } + ctx.Schedule(ConnectionTimeout, new NActors::TEvents::TEvWakeup()); + LastActivity = ctx.Now(); + TBase::Become(&TOutgoingConnectionActor::StateResolving); + } + + void HandleConnected(NActors::TEvPollerReady::TPtr event, const NActors::TActorContext& ctx) { + LastActivity = ctx.Now(); + if (event->Get()->Read) { + PullInput(ctx); + } + if (event->Get()->Write) { + FlushOutput(ctx); + } + } + + void HandleConnected(NActors::TEvPollerRegisterResult::TPtr ev, const NActors::TActorContext& ctx) { + PollerToken = std::move(ev->Get()->PollerToken); + LastActivity = ctx.Now(); + PullInput(ctx); + FlushOutput(ctx); + } + + void HandleFailed(TEvHttpProxy::TEvHttpOutgoingRequest::TPtr event, const NActors::TActorContext& ctx) { + Request = std::move(event->Get()->Request); + RequestOwner = event->Sender; + ReplyErrorAndDie(ctx, "Failed"); + } + + void HandleTimeout(const NActors::TActorContext& ctx) { + TDuration inactivityTime = ctx.Now() - LastActivity; + if (inactivityTime >= ConnectionTimeout) { + FailConnection(ctx, "Connection timed out"); + } else { + ctx.Schedule(Min(ConnectionTimeout - inactivityTime, TDuration::MilliSeconds(100)), new NActors::TEvents::TEvWakeup()); + } + } + + STFUNC(StateWaiting) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvHttpProxy::TEvHttpOutgoingRequest, HandleWaiting); + CFunc(NActors::TEvents::TEvWakeup::EventType, HandleTimeout); + } + } + + STFUNC(StateResolving) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvHttpProxy::TEvResolveHostResponse, HandleResolving); + CFunc(NActors::TEvents::TEvWakeup::EventType, HandleTimeout); + } + } + + STFUNC(StateConnecting) { + switch (ev->GetTypeRewrite()) { + HFunc(NActors::TEvPollerReady, HandleConnecting); + CFunc(NActors::TEvents::TEvWakeup::EventType, HandleTimeout); + HFunc(NActors::TEvPollerRegisterResult, HandleConnecting); + } + } + + STFUNC(StateConnected) { + switch (ev->GetTypeRewrite()) { + HFunc(NActors::TEvPollerReady, HandleConnected); + CFunc(NActors::TEvents::TEvWakeup::EventType, HandleTimeout); + HFunc(NActors::TEvPollerRegisterResult, HandleConnected); + } + } + + STFUNC(StateFailed) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvHttpProxy::TEvHttpOutgoingRequest, HandleFailed); + } + } +}; + +NActors::IActor* CreateOutgoingConnectionActor(const TActorId& owner, const TString& host, bool secure, const TActorId& poller) { + if (secure) { + return new TOutgoingConnectionActor<TSecureSocketImpl>(owner, host, poller); + } else { + return new TOutgoingConnectionActor<TPlainSocketImpl>(owner, host, poller); + } +} + +} diff --git a/library/cpp/actors/http/http_proxy_sock_impl.h b/library/cpp/actors/http/http_proxy_sock_impl.h new file mode 100644 index 0000000000..bf8c71d05a --- /dev/null +++ b/library/cpp/actors/http/http_proxy_sock_impl.h @@ -0,0 +1,262 @@ +#pragma once + +#include "http.h" +#include "http_proxy.h" + +namespace NHttp { + +struct TPlainSocketImpl : virtual public THttpConfig { + TIntrusivePtr<TSocketDescriptor> Socket; + + TPlainSocketImpl() + : Socket(new TSocketDescriptor()) + {} + + TPlainSocketImpl(TIntrusivePtr<TSocketDescriptor> socket) + : Socket(std::move(socket)) + {} + + SOCKET GetRawSocket() const { + return static_cast<SOCKET>(Socket->Socket); + } + + void SetNonBlock(bool nonBlock = true) noexcept { + try { + ::SetNonBlock(Socket->Socket, nonBlock); + } + catch (const yexception&) { + } + } + + void SetTimeout(TDuration timeout) noexcept { + try { + ::SetSocketTimeout(Socket->Socket, timeout.Seconds(), timeout.MilliSecondsOfSecond()); + } + catch (const yexception&) { + } + } + + void Shutdown() { + //Socket->Socket.ShutDown(SHUT_RDWR); // KIKIMR-3895 + ::shutdown(Socket->Socket, SHUT_RDWR); + } + + int Connect(const SocketAddressType& address) { + return Socket->Socket.Connect(&address); + } + + static constexpr int OnConnect(bool&, bool&) { + return 1; + } + + static constexpr int OnAccept(const TEndpointInfo&, bool&, bool&) { + return 1; + } + + bool IsGood() { + int res; + GetSockOpt(Socket->Socket, SOL_SOCKET, SO_ERROR, res); + return res == 0; + } + + int GetError() { + int res; + GetSockOpt(Socket->Socket, SOL_SOCKET, SO_ERROR, res); + return res; + } + + ssize_t Send(const void* data, size_t size, bool&, bool&) { + return Socket->Socket.Send(data, size); + } + + ssize_t Recv(void* data, size_t size, bool&, bool&) { + return Socket->Socket.Recv(data, size); + } +}; + +struct TSecureSocketImpl : TPlainSocketImpl, TSslHelpers { + static TSecureSocketImpl* IO(BIO* bio) noexcept { + return static_cast<TSecureSocketImpl*>(BIO_get_data(bio)); + } + + static int IoWrite(BIO* bio, const char* data, int dlen) noexcept { + BIO_clear_retry_flags(bio); + int res = IO(bio)->Socket->Socket.Send(data, dlen); + if (-res == EAGAIN) { + BIO_set_retry_write(bio); + } + return res; + } + + static int IoRead(BIO* bio, char* data, int dlen) noexcept { + BIO_clear_retry_flags(bio); + int res = IO(bio)->Socket->Socket.Recv(data, dlen); + if (-res == EAGAIN) { + BIO_set_retry_read(bio); + } + return res; + } + + static int IoPuts(BIO* bio, const char* buf) noexcept { + Y_UNUSED(bio); + Y_UNUSED(buf); + return -2; + } + + static int IoGets(BIO* bio, char* buf, int size) noexcept { + Y_UNUSED(bio); + Y_UNUSED(buf); + Y_UNUSED(size); + return -2; + } + + static long IoCtrl(BIO* bio, int cmd, long larg, void* parg) noexcept { + Y_UNUSED(larg); + Y_UNUSED(parg); + + if (cmd == BIO_CTRL_FLUSH) { + IO(bio)->Flush(); + return 1; + } + + return -2; + } + + static int IoCreate(BIO* bio) noexcept { + BIO_set_data(bio, nullptr); + BIO_set_init(bio, 1); + return 1; + } + + static int IoDestroy(BIO* bio) noexcept { + BIO_set_data(bio, nullptr); + BIO_set_init(bio, 0); + return 1; + } + + static BIO_METHOD* CreateIoMethod() { + BIO_METHOD* method = BIO_meth_new(BIO_get_new_index() | BIO_TYPE_SOURCE_SINK, "SecureSocketImpl"); + BIO_meth_set_write(method, IoWrite); + BIO_meth_set_read(method, IoRead); + BIO_meth_set_puts(method, IoPuts); + BIO_meth_set_gets(method, IoGets); + BIO_meth_set_ctrl(method, IoCtrl); + BIO_meth_set_create(method, IoCreate); + BIO_meth_set_destroy(method, IoDestroy); + return method; + } + + static BIO_METHOD* IoMethod() { + static BIO_METHOD* method = CreateIoMethod(); + return method; + } + + TSslHolder<BIO> Bio; + TSslHolder<SSL_CTX> Ctx; + TSslHolder<SSL> Ssl; + + TSecureSocketImpl() = default; + + TSecureSocketImpl(TIntrusivePtr<TSocketDescriptor> socket) + : TPlainSocketImpl(std::move(socket)) + {} + + void InitClientSsl() { + Bio.Reset(BIO_new(IoMethod())); + BIO_set_data(Bio.Get(), this); + BIO_set_nbio(Bio.Get(), 1); + Ctx = CreateClientContext(); + Ssl = ConstructSsl(Ctx.Get(), Bio.Get()); + SSL_set_connect_state(Ssl.Get()); + } + + void InitServerSsl(SSL_CTX* ctx) { + Bio.Reset(BIO_new(IoMethod())); + BIO_set_data(Bio.Get(), this); + BIO_set_nbio(Bio.Get(), 1); + Ssl = ConstructSsl(ctx, Bio.Get()); + SSL_set_accept_state(Ssl.Get()); + } + + void Flush() {} + + ssize_t Send(const void* data, size_t size, bool& read, bool& write) { + ssize_t res = SSL_write(Ssl.Get(), data, size); + if (res < 0) { + res = SSL_get_error(Ssl.Get(), res); + switch(res) { + case SSL_ERROR_WANT_READ: + read = true; + return -EAGAIN; + case SSL_ERROR_WANT_WRITE: + write = true; + return -EAGAIN; + default: + return -EIO; + } + } + return res; + } + + ssize_t Recv(void* data, size_t size, bool& read, bool& write) { + ssize_t res = SSL_read(Ssl.Get(), data, size); + if (res < 0) { + res = SSL_get_error(Ssl.Get(), res); + switch(res) { + case SSL_ERROR_WANT_READ: + read = true; + return -EAGAIN; + case SSL_ERROR_WANT_WRITE: + write = true; + return -EAGAIN; + default: + return -EIO; + } + } + return res; + } + + int OnConnect(bool& read, bool& write) { + if (!Ssl) { + InitClientSsl(); + } + int res = SSL_connect(Ssl.Get()); + if (res <= 0) { + res = SSL_get_error(Ssl.Get(), res); + switch(res) { + case SSL_ERROR_WANT_READ: + read = true; + return -EAGAIN; + case SSL_ERROR_WANT_WRITE: + write = true; + return -EAGAIN; + default: + return -EIO; + } + } + return res; + } + + int OnAccept(const TEndpointInfo& endpoint, bool& read, bool& write) { + if (!Ssl) { + InitServerSsl(endpoint.SecureContext.Get()); + } + int res = SSL_accept(Ssl.Get()); + if (res <= 0) { + res = SSL_get_error(Ssl.Get(), res); + switch(res) { + case SSL_ERROR_WANT_READ: + read = true; + return -EAGAIN; + case SSL_ERROR_WANT_WRITE: + write = true; + return -EAGAIN; + default: + return -EIO; + } + } + return res; + } +}; + +} diff --git a/library/cpp/actors/http/http_proxy_ssl.h b/library/cpp/actors/http/http_proxy_ssl.h new file mode 100644 index 0000000000..ffce12997f --- /dev/null +++ b/library/cpp/actors/http/http_proxy_ssl.h @@ -0,0 +1,131 @@ +#pragma once + +#include <openssl/bio.h> +#include <openssl/ssl.h> +#include <openssl/err.h> +#include <openssl/tls1.h> + +namespace NHttp { + +struct TSslHelpers { + struct TSslDestroy { + static void Destroy(SSL_CTX* ctx) noexcept { + SSL_CTX_free(ctx); + } + + static void Destroy(SSL* ssl) noexcept { + SSL_free(ssl); + } + + static void Destroy(X509* cert) noexcept { + X509_free(cert); + } + + static void Destroy(EVP_PKEY* pkey) noexcept { + EVP_PKEY_free(pkey); + } + + static void Destroy(BIO* bio) noexcept { + BIO_free(bio); + } + }; + + template <typename T> + using TSslHolder = THolder<T, TSslDestroy>; + + static TSslHolder<SSL_CTX> CreateSslCtx(const SSL_METHOD* method) { + TSslHolder<SSL_CTX> ctx(SSL_CTX_new(method)); + + if (ctx) { + SSL_CTX_set_options(ctx.Get(), SSL_OP_NO_SSLv2); + SSL_CTX_set_options(ctx.Get(), SSL_OP_NO_SSLv3); + SSL_CTX_set_options(ctx.Get(), SSL_OP_MICROSOFT_SESS_ID_BUG); + SSL_CTX_set_options(ctx.Get(), SSL_OP_NETSCAPE_CHALLENGE_BUG); + } + + return ctx; + } + + static TSslHolder<SSL_CTX> CreateClientContext() { + return CreateSslCtx(SSLv23_client_method()); + } + + static TSslHolder<SSL_CTX> CreateServerContext(const TString& certificate, const TString& key) { + TSslHolder<SSL_CTX> ctx = CreateSslCtx(SSLv23_server_method()); + SSL_CTX_set_ecdh_auto(ctx.Get(), 1); + int res; + res = SSL_CTX_use_certificate_chain_file(ctx.Get(), certificate.c_str()); + if (res < 0) { + // TODO(xenoxeno): more diagnostics? + return nullptr; + } + res = SSL_CTX_use_PrivateKey_file(ctx.Get(), key.c_str(), SSL_FILETYPE_PEM); + if (res < 0) { + // TODO(xenoxeno): more diagnostics? + return nullptr; + } + return ctx; + } + + static bool LoadX509Chain(TSslHolder<SSL_CTX>& ctx, const TString& pem) { + TSslHolder<BIO> bio(BIO_new_mem_buf(pem.c_str(), pem.size())); + if (bio == nullptr) { + return false; + } + TSslHolder<X509> cert(PEM_read_bio_X509_AUX(bio.Get(), nullptr, nullptr, nullptr)); + if (cert == nullptr) { + return false; + } + if (SSL_CTX_use_certificate(ctx.Get(), cert.Release()) <= 0) { + return false; + } + SSL_CTX_clear_chain_certs(ctx.Get()); + while (true) { + TSslHolder<X509> ca(PEM_read_bio_X509(bio.Get(), nullptr, nullptr, nullptr)); + if (ca == nullptr) { + break; + } + if (!SSL_CTX_add0_chain_cert(ctx.Get(), ca.Release())) { + return false; + } + } + return true; + } + + static bool LoadPrivateKey(TSslHolder<SSL_CTX>& ctx, const TString& pem) { + TSslHolder<BIO> bio(BIO_new_mem_buf(pem.c_str(), pem.size())); + if (bio == nullptr) { + return false; + } + TSslHolder<EVP_PKEY> pkey(PEM_read_bio_PrivateKey(bio.Get(), nullptr, nullptr, nullptr)); + if (SSL_CTX_use_PrivateKey(ctx.Get(), pkey.Release()) <= 0) { + return false; + } + return true; + } + + static TSslHolder<SSL_CTX> CreateServerContext(const TString& pem) { + TSslHolder<SSL_CTX> ctx = CreateSslCtx(SSLv23_server_method()); + SSL_CTX_set_ecdh_auto(ctx.Get(), 1); + if (!LoadX509Chain(ctx, pem)) { + return nullptr; + } + if (!LoadPrivateKey(ctx, pem)) { + return nullptr; + } + return ctx; + } + + static TSslHolder<SSL> ConstructSsl(SSL_CTX* ctx, BIO* bio) { + TSslHolder<SSL> ssl(SSL_new(ctx)); + + if (ssl) { + BIO_up_ref(bio); // SSL_set_bio consumes only one reference if rbio and wbio are the same + SSL_set_bio(ssl.Get(), bio, bio); + } + + return ssl; + } +}; + +} diff --git a/library/cpp/actors/http/http_static.cpp b/library/cpp/actors/http/http_static.cpp new file mode 100644 index 0000000000..c075c5f693 --- /dev/null +++ b/library/cpp/actors/http/http_static.cpp @@ -0,0 +1,95 @@ +#include "http_proxy.h" +#include "http_static.h" +#include <library/cpp/actors/core/executor_pool_basic.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/core/scheduler_basic.h> +#include <library/cpp/actors/http/http.h> +#include <library/cpp/resource/resource.h> +#include <util/folder/path.h> +#include <util/stream/file.h> + +namespace NHttp { + +class THttpStaticContentHandler : public NActors::TActor<THttpStaticContentHandler> { +public: + using TBase = NActors::TActor<THttpStaticContentHandler>; + const TFsPath URL; + const TFsPath FilePath; + const TFsPath ResourcePath; + const TFsPath Index; + + THttpStaticContentHandler(const TString& url, const TString& filePath, const TString& resourcePath, const TString& index) + : TBase(&THttpStaticContentHandler::StateWork) + , URL(url) + , FilePath(filePath) + , ResourcePath(resourcePath) + , Index(index) + {} + + static TInstant GetCompileTime() { + tm compileTime; + strptime(__DATE__ " " __TIME__, "%B %d %Y %H:%M:%S", &compileTime); + return TInstant::Seconds(mktime(&compileTime)); + } + + void Handle(NHttp::TEvHttpProxy::TEvHttpIncomingRequest::TPtr event, const NActors::TActorContext& ctx) { + THttpOutgoingResponsePtr response; + if (event->Get()->Request->Method != "GET") { + response = event->Get()->Request->CreateResponseBadRequest("Wrong request"); + ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); + return; + } + TFsPath url(event->Get()->Request->URL.Before('?')); + if (!url.IsAbsolute()) { + response = event->Get()->Request->CreateResponseBadRequest("Completely wrong URL"); + ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); + return; + } + if (url.GetPath().EndsWith('/') && Index.IsDefined()) { + url /= Index; + } + url = url.RelativeTo(URL); + try { + // TODO: caching? + TString contentType = mimetypeByExt(url.GetExtension().c_str()); + TString data; + TFileStat filestat; + TFsPath resourcename(ResourcePath / url); + if (NResource::FindExact(resourcename.GetPath(), &data)) { + static TInstant compileTime(GetCompileTime()); + filestat.MTime = compileTime.Seconds(); + } else { + TFsPath filename(FilePath / url); + if (!filename.IsSubpathOf(FilePath) && filename != FilePath) { + response = event->Get()->Request->CreateResponseBadRequest("Wrong URL"); + ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); + return; + } + if (filename.Stat(filestat) && filestat.IsFile()) { + data = TUnbufferedFileInput(filename).ReadAll(); + } + } + if (!filestat.IsNull()) { + response = event->Get()->Request->CreateResponseOK(data, contentType, TInstant::Seconds(filestat.MTime)); + } else { + response = event->Get()->Request->CreateResponseNotFound("File not found"); + } + } + catch (const yexception&) { + response = event->Get()->Request->CreateResponseServiceUnavailable("Not available"); + } + ctx.Send(event->Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + HFunc(NHttp::TEvHttpProxy::TEvHttpIncomingRequest, Handle); + } + } +}; + +NActors::IActor* CreateHttpStaticContentHandler(const TString& url, const TString& filePath, const TString& resourcePath, const TString& index) { + return new THttpStaticContentHandler(url, filePath, resourcePath, index); +} + +} diff --git a/library/cpp/actors/http/http_static.h b/library/cpp/actors/http/http_static.h new file mode 100644 index 0000000000..f91e15dfb1 --- /dev/null +++ b/library/cpp/actors/http/http_static.h @@ -0,0 +1,9 @@ +#pragma once +#include <library/cpp/actors/core/actor.h> +#include "http.h" + +namespace NHttp { + +NActors::IActor* CreateHttpStaticContentHandler(const TString& url, const TString& filePath, const TString& resourcePath, const TString& index = TString()); + +} diff --git a/library/cpp/actors/http/http_ut.cpp b/library/cpp/actors/http/http_ut.cpp new file mode 100644 index 0000000000..4c922f8d0f --- /dev/null +++ b/library/cpp/actors/http/http_ut.cpp @@ -0,0 +1,358 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/tests_data.h> +#include <library/cpp/actors/core/executor_pool_basic.h> +#include <library/cpp/actors/core/scheduler_basic.h> +#include <library/cpp/actors/testlib/test_runtime.h> +#include <util/system/tempfile.h> +#include "http.h" +#include "http_proxy.h" + + + +enum EService : NActors::NLog::EComponent { + MIN, + Logger, + MVP, + MAX +}; + +namespace { + +template <typename HttpType> +void EatWholeString(TIntrusivePtr<HttpType>& request, const TString& data) { + request->EnsureEnoughSpaceAvailable(data.size()); + auto size = std::min(request->Avail(), data.size()); + memcpy(request->Pos(), data.data(), size); + request->Advance(size); +} + +template <typename HttpType> +void EatPartialString(TIntrusivePtr<HttpType>& request, const TString& data) { + for (char c : data) { + request->EnsureEnoughSpaceAvailable(1); + memcpy(request->Pos(), &c, 1); + request->Advance(1); + } +} + +} + +Y_UNIT_TEST_SUITE(HttpProxy) { + Y_UNIT_TEST(BasicParsing) { + NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(); + EatWholeString(request, "GET /test HTTP/1.1\r\nHost: test\r\nSome-Header: 32344\r\n\r\n"); + UNIT_ASSERT_EQUAL(request->Stage, NHttp::THttpIncomingRequest::EParseStage::Done); + UNIT_ASSERT_EQUAL(request->Method, "GET"); + UNIT_ASSERT_EQUAL(request->URL, "/test"); + UNIT_ASSERT_EQUAL(request->Protocol, "HTTP"); + UNIT_ASSERT_EQUAL(request->Version, "1.1"); + UNIT_ASSERT_EQUAL(request->Host, "test"); + UNIT_ASSERT_EQUAL(request->Headers, "Host: test\r\nSome-Header: 32344\r\n\r\n"); + } + + Y_UNIT_TEST(BasicParsingChunkedBody) { + NHttp::THttpOutgoingRequestPtr request = nullptr; //new NHttp::THttpOutgoingRequest(); + NHttp::THttpIncomingResponsePtr response = new NHttp::THttpIncomingResponse(request); + EatWholeString(response, "HTTP/1.1 200 OK\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n4\r\nthis\r\n4\r\n is \r\n5\r\ntest.\r\n0\r\n\r\n"); + UNIT_ASSERT_EQUAL(response->Stage, NHttp::THttpIncomingResponse::EParseStage::Done); + UNIT_ASSERT_EQUAL(response->Status, "200"); + UNIT_ASSERT_EQUAL(response->Connection, "close"); + UNIT_ASSERT_EQUAL(response->Protocol, "HTTP"); + UNIT_ASSERT_EQUAL(response->Version, "1.1"); + UNIT_ASSERT_EQUAL(response->TransferEncoding, "chunked"); + UNIT_ASSERT_EQUAL(response->Body, "this is test."); + } + + Y_UNIT_TEST(InvalidParsingChunkedBody) { + NHttp::THttpOutgoingRequestPtr request = nullptr; //new NHttp::THttpOutgoingRequest(); + NHttp::THttpIncomingResponsePtr response = new NHttp::THttpIncomingResponse(request); + EatWholeString(response, "HTTP/1.1 200 OK\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n5\r\nthis\r\n4\r\n is \r\n5\r\ntest.\r\n0\r\n\r\n"); + UNIT_ASSERT(response->IsError()); + } + + Y_UNIT_TEST(AdvancedParsingChunkedBody) { + NHttp::THttpOutgoingRequestPtr request = nullptr; //new NHttp::THttpOutgoingRequest(); + NHttp::THttpIncomingResponsePtr response = new NHttp::THttpIncomingResponse(request); + EatWholeString(response, "HTTP/1.1 200 OK\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n6\r\nthis\r\n\r\n4\r\n is \r\n5\r\ntest.\r\n0\r\n\r\n"); + UNIT_ASSERT_EQUAL(response->Stage, NHttp::THttpIncomingResponse::EParseStage::Done); + UNIT_ASSERT_EQUAL(response->Status, "200"); + UNIT_ASSERT_EQUAL(response->Connection, "close"); + UNIT_ASSERT_EQUAL(response->Protocol, "HTTP"); + UNIT_ASSERT_EQUAL(response->Version, "1.1"); + UNIT_ASSERT_EQUAL(response->TransferEncoding, "chunked"); + UNIT_ASSERT_EQUAL(response->Body, "this\r\n is test."); + } + + Y_UNIT_TEST(CreateRepsonseWithCompressedBody) { + NHttp::THttpIncomingRequestPtr request = nullptr; + NHttp::THttpOutgoingResponsePtr response = new NHttp::THttpOutgoingResponse(request, "HTTP", "1.1", "200", "OK"); + response->Set<&NHttp::THttpResponse::ContentEncoding>("gzip"); + TString compressedBody = "compressed body"; + response->SetBody(compressedBody); + UNIT_ASSERT_VALUES_EQUAL("gzip", response->ContentEncoding); + UNIT_ASSERT_VALUES_EQUAL(ToString(compressedBody.size()), response->ContentLength); + UNIT_ASSERT_VALUES_EQUAL(compressedBody, response->Body); + } + + Y_UNIT_TEST(BasicPartialParsing) { + NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(); + EatPartialString(request, "GET /test HTTP/1.1\r\nHost: test\r\nSome-Header: 32344\r\n\r\n"); + UNIT_ASSERT_EQUAL(request->Stage, NHttp::THttpIncomingRequest::EParseStage::Done); + UNIT_ASSERT_EQUAL(request->Method, "GET"); + UNIT_ASSERT_EQUAL(request->URL, "/test"); + UNIT_ASSERT_EQUAL(request->Protocol, "HTTP"); + UNIT_ASSERT_EQUAL(request->Version, "1.1"); + UNIT_ASSERT_EQUAL(request->Host, "test"); + UNIT_ASSERT_EQUAL(request->Headers, "Host: test\r\nSome-Header: 32344\r\n\r\n"); + } + + Y_UNIT_TEST(BasicPartialParsingChunkedBody) { + NHttp::THttpOutgoingRequestPtr request = nullptr; //new NHttp::THttpOutgoingRequest(); + NHttp::THttpIncomingResponsePtr response = new NHttp::THttpIncomingResponse(request); + EatPartialString(response, "HTTP/1.1 200 OK\r\nConnection: close\r\nTransfer-Encoding: chunked\r\n\r\n4\r\nthis\r\n4\r\n is \r\n5\r\ntest.\r\n0\r\n\r\n"); + UNIT_ASSERT_EQUAL(response->Stage, NHttp::THttpIncomingResponse::EParseStage::Done); + UNIT_ASSERT_EQUAL(response->Status, "200"); + UNIT_ASSERT_EQUAL(response->Connection, "close"); + UNIT_ASSERT_EQUAL(response->Protocol, "HTTP"); + UNIT_ASSERT_EQUAL(response->Version, "1.1"); + UNIT_ASSERT_EQUAL(response->TransferEncoding, "chunked"); + UNIT_ASSERT_EQUAL(response->Body, "this is test."); + } + + Y_UNIT_TEST(AdvancedParsing) { + NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(); + EatWholeString(request, "GE"); + EatWholeString(request, "T"); + EatWholeString(request, " "); + EatWholeString(request, "/test"); + EatWholeString(request, " HTTP/1.1\r"); + EatWholeString(request, "\nHo"); + EatWholeString(request, "st: test"); + EatWholeString(request, "\r\n"); + EatWholeString(request, "Some-Header: 32344\r\n\r"); + EatWholeString(request, "\n"); + UNIT_ASSERT_EQUAL(request->Stage, NHttp::THttpIncomingRequest::EParseStage::Done); + UNIT_ASSERT_EQUAL(request->Method, "GET"); + UNIT_ASSERT_EQUAL(request->URL, "/test"); + UNIT_ASSERT_EQUAL(request->Protocol, "HTTP"); + UNIT_ASSERT_EQUAL(request->Version, "1.1"); + UNIT_ASSERT_EQUAL(request->Host, "test"); + UNIT_ASSERT_EQUAL(request->Headers, "Host: test\r\nSome-Header: 32344\r\n\r\n"); + } + + Y_UNIT_TEST(AdvancedPartialParsing) { + NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(); + EatPartialString(request, "GE"); + EatPartialString(request, "T"); + EatPartialString(request, " "); + EatPartialString(request, "/test"); + EatPartialString(request, " HTTP/1.1\r"); + EatPartialString(request, "\nHo"); + EatPartialString(request, "st: test"); + EatPartialString(request, "\r\n"); + EatPartialString(request, "Some-Header: 32344\r\n\r"); + EatPartialString(request, "\n"); + UNIT_ASSERT_EQUAL(request->Stage, NHttp::THttpIncomingRequest::EParseStage::Done); + UNIT_ASSERT_EQUAL(request->Method, "GET"); + UNIT_ASSERT_EQUAL(request->URL, "/test"); + UNIT_ASSERT_EQUAL(request->Protocol, "HTTP"); + UNIT_ASSERT_EQUAL(request->Version, "1.1"); + UNIT_ASSERT_EQUAL(request->Host, "test"); + UNIT_ASSERT_EQUAL(request->Headers, "Host: test\r\nSome-Header: 32344\r\n\r\n"); + } + + Y_UNIT_TEST(BasicRenderBodyWithHeadersAndCookies) { + NHttp::THttpOutgoingRequestPtr request = NHttp::THttpOutgoingRequest::CreateRequestGet("http://www.yandex.ru/data/url"); + NHttp::THeadersBuilder headers; + NHttp::TCookiesBuilder cookies; + cookies.Set("cookie1", "123456"); + cookies.Set("cookie2", "45678"); + headers.Set("Cookie", cookies.Render()); + request->Set(headers); + TString requestData; + request->AsString(requestData); + UNIT_ASSERT_VALUES_EQUAL(requestData, "GET /data/url HTTP/1.1\r\nHost: www.yandex.ru\r\nAccept: */*\r\nCookie: cookie1=123456; cookie2=45678;\r\n"); + } + + Y_UNIT_TEST(BasicRunning) { + NActors::TTestActorRuntimeBase actorSystem; + TPortManager portManager; + TIpPort port = portManager.GetTcpPort(); + TAutoPtr<NActors::IEventHandle> handle; + actorSystem.Initialize(); + NMonitoring::TMetricRegistry sensors; + + NActors::IActor* proxy = NHttp::CreateHttpProxy(sensors); + NActors::TActorId proxyId = actorSystem.Register(proxy); + actorSystem.Send(new NActors::IEventHandle(proxyId, TActorId(), new NHttp::TEvHttpProxy::TEvAddListeningPort(port)), 0, true); + actorSystem.DispatchEvents(); + + NActors::TActorId serverId = actorSystem.AllocateEdgeActor(); + actorSystem.Send(new NActors::IEventHandle(proxyId, serverId, new NHttp::TEvHttpProxy::TEvRegisterHandler("/test", serverId)), 0, true); + + NActors::TActorId clientId = actorSystem.AllocateEdgeActor(); + NHttp::THttpOutgoingRequestPtr httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet("http://[::1]:" + ToString(port) + "/test"); + actorSystem.Send(new NActors::IEventHandle(proxyId, clientId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(httpRequest)), 0, true); + + NHttp::TEvHttpProxy::TEvHttpIncomingRequest* request = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingRequest>(handle); + + UNIT_ASSERT_EQUAL(request->Request->URL, "/test"); + + NHttp::THttpOutgoingResponsePtr httpResponse = request->Request->CreateResponseString("HTTP/1.1 200 Found\r\nConnection: Close\r\nTransfer-Encoding: chunked\r\n\r\n6\r\npassed\r\n0\r\n\r\n"); + actorSystem.Send(new NActors::IEventHandle(handle->Sender, serverId, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(httpResponse)), 0, true); + + NHttp::TEvHttpProxy::TEvHttpIncomingResponse* response = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingResponse>(handle); + + UNIT_ASSERT_EQUAL(response->Response->Status, "200"); + UNIT_ASSERT_EQUAL(response->Response->Body, "passed"); + } + + Y_UNIT_TEST(TlsRunning) { + NActors::TTestActorRuntimeBase actorSystem; + TPortManager portManager; + TIpPort port = portManager.GetTcpPort(); + TAutoPtr<NActors::IEventHandle> handle; + actorSystem.Initialize(); + NMonitoring::TMetricRegistry sensors; + + TString certificateContent = R"___(-----BEGIN PRIVATE KEY----- +MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCzRZjodO7Aqe1w +RyOj6kG6g2nn8ZGAxfao4mLT0jDTbVksrhV/h2s3uldLkFo5WrNQ8WZe+iIbXeFL +s8tO6hslzreo9sih2IHoRcH5KnS/6YTqVhRTJb1jE2dM8NwYbwTi+T2Pe0FrBPjI +kgVO50gAtYl9C+fc715uZiSKW+rRlP5OoFTwxrOjiU27RPZjFYyWK9wTI1Es9uRr +lbZbLl5cY6dK2J1AViRraaYKCWO26VbOPWLsY4OD3e+ZXIc3OMCz6Yb0wmRPeJ60 +bbbkGfI8O27kDdv69MAWHIm0yYMzKEnom1dce7rNQNDEqJfocsYIsg+EvayT1yQ9 +KTBegw7LAgMBAAECggEBAKaOCrotqYQmXArsjRhFFDwMy+BKdzyEr93INrlFl0dX +WHpCYobRcbOc1G3H94tB0UdqgAnNqtJyLlb+++ydZAuEOu4oGc8EL+10ofq0jzOd +6Xct8kQt0/6wkFDTlii9PHUDy0X65ZRgUiNGRtg/2I2QG+SpowmI+trm2xwQueFs +VaWrjc3cVvXx0b8Lu7hqZUv08kgC38stzuRk/n2T5VWSAr7Z4ZWQbO918Dv35HUw +Wy/0jNUFP9CBCvFJ4l0OoH9nYhWFG+HXWzNdw6/Hca4jciRKo6esCiOZ9uWYv/ec +/NvX9rgFg8G8/SrTisX10+Bbeq+R1RKwq/IG409TH4ECgYEA14L+3QsgNIUMeYAx +jSCyk22R/tOHI1BM+GtKPUhnwHlAssrcPcxXMJovl6WL93VauYjym0wpCz9urSpA +I2CqTsG8GYciA6Dr3mHgD6cK0jj9UPAU6EnZ5S0mjhPqKZqutu9QegzD2uESvuN8 +36xezwQthzAf0nI/P3sJGjVXjikCgYEA1POm5xcV6SmM6HnIdadEebhzZIJ9TXQz +ry3Jj3a7CKyD5C7fAdkHUTCjgT/2ElxPi9ABkZnC+d/cW9GtJFa0II5qO/agm3KQ +ZXYiutu9A7xACHYFXRiJEjVUdGG9dKMVOHUEa8IHEgrrcUVM/suy/GgutywIfaXs +y58IFP24K9MCgYEAk6zjz7wL+XEiNy+sxLQfKf7vB9sSwxQHakK6wHuY/L8Zomp3 +uLEJHfjJm/SIkK0N2g0JkXkCtv5kbKyC/rsCeK0wo52BpVLjzaLr0k34kE0U6B1b +dkEE2pGx1bG3x4KDLj+Wuct9ecK5Aa0IqIyI+vo16GkFpUM8K9e3SQo8UOECgYEA +sCZYAkILYtJ293p9giz5rIISGasDAUXE1vxWBXEeJ3+kneTTnZCrx9Im/ewtnWR0 +fF90XL9HFDDD88POqAd8eo2zfKR2l/89SGBfPBg2EtfuU9FkgGyiPciVcqvC7q9U +B15saMKX3KnhtdGwbfeLt9RqCCTJZT4SUSDcq5hwdvcCgYAxY4Be8mNipj8Cgg22 +mVWSolA0TEzbtUcNk6iGodpi+Z0LKpsPC0YRqPRyh1K+rIltG1BVdmUBHcMlOYxl +lWWvbJH6PkJWy4n2MF7PO45kjN3pPZg4hgH63JjZeAineBwEArUGb9zHnvzcdRvF +wuQ2pZHL/HJ0laUSieHDJ5917w== +-----END PRIVATE KEY----- + + +-----BEGIN CERTIFICATE----- +MIIDjTCCAnWgAwIBAgIURt5IBx0J3xgEaQvmyrFH2A+NkpMwDQYJKoZIhvcNAQEL +BQAwVjELMAkGA1UEBhMCUlUxDzANBgNVBAgMBk1vc2NvdzEPMA0GA1UEBwwGTW9z +Y293MQ8wDQYDVQQKDAZZYW5kZXgxFDASBgNVBAMMC3Rlc3Qtc2VydmVyMB4XDTE5 +MDkyMDE3MTQ0MVoXDTQ3MDIwNDE3MTQ0MVowVjELMAkGA1UEBhMCUlUxDzANBgNV +BAgMBk1vc2NvdzEPMA0GA1UEBwwGTW9zY293MQ8wDQYDVQQKDAZZYW5kZXgxFDAS +BgNVBAMMC3Rlc3Qtc2VydmVyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAs0WY6HTuwKntcEcjo+pBuoNp5/GRgMX2qOJi09Iw021ZLK4Vf4drN7pXS5Ba +OVqzUPFmXvoiG13hS7PLTuobJc63qPbIodiB6EXB+Sp0v+mE6lYUUyW9YxNnTPDc +GG8E4vk9j3tBawT4yJIFTudIALWJfQvn3O9ebmYkilvq0ZT+TqBU8Mazo4lNu0T2 +YxWMlivcEyNRLPbka5W2Wy5eXGOnStidQFYka2mmCgljtulWzj1i7GODg93vmVyH +NzjAs+mG9MJkT3ietG225BnyPDtu5A3b+vTAFhyJtMmDMyhJ6JtXXHu6zUDQxKiX +6HLGCLIPhL2sk9ckPSkwXoMOywIDAQABo1MwUTAdBgNVHQ4EFgQUDv/xuJ4CvCgG +fPrZP3hRAt2+/LwwHwYDVR0jBBgwFoAUDv/xuJ4CvCgGfPrZP3hRAt2+/LwwDwYD +VR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAinKpMYaA2tjLpAnPVbjy +/ZxSBhhB26RiQp3Re8XOKyhTWqgYE6kldYT0aXgK9x9mPC5obQannDDYxDc7lX+/ +qP/u1X81ZcDRo/f+qQ3iHfT6Ftt/4O3qLnt45MFM6Q7WabRm82x3KjZTqpF3QUdy +tumWiuAP5DMd1IRDtnKjFHO721OsEsf6NLcqdX89bGeqXDvrkwg3/PNwTyW5E7cj +feY8L2eWtg6AJUnIBu11wvfzkLiH3QKzHvO/SIZTGf5ihDsJ3aKEE9UNauTL3bVc +CRA/5XcX13GJwHHj6LCoc3sL7mt8qV9HKY2AOZ88mpObzISZxgPpdKCfjsrdm63V +6g== +-----END CERTIFICATE-----)___"; + + TTempFileHandle certificateFile; + + certificateFile.Write(certificateContent.data(), certificateContent.size()); + + NActors::IActor* proxy = NHttp::CreateHttpProxy(sensors); + NActors::TActorId proxyId = actorSystem.Register(proxy); + + THolder<NHttp::TEvHttpProxy::TEvAddListeningPort> add = MakeHolder<NHttp::TEvHttpProxy::TEvAddListeningPort>(port); + ///////// https configuration + add->Secure = true; + add->CertificateFile = certificateFile.Name(); + add->PrivateKeyFile = certificateFile.Name(); + ///////// + actorSystem.Send(new NActors::IEventHandle(proxyId, TActorId(), add.Release()), 0, true); + actorSystem.DispatchEvents(); + + NActors::TActorId serverId = actorSystem.AllocateEdgeActor(); + actorSystem.Send(new NActors::IEventHandle(proxyId, serverId, new NHttp::TEvHttpProxy::TEvRegisterHandler("/test", serverId)), 0, true); + + NActors::TActorId clientId = actorSystem.AllocateEdgeActor(); + NHttp::THttpOutgoingRequestPtr httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet("https://[::1]:" + ToString(port) + "/test"); + actorSystem.Send(new NActors::IEventHandle(proxyId, clientId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(httpRequest)), 0, true); + + NHttp::TEvHttpProxy::TEvHttpIncomingRequest* request = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingRequest>(handle); + + UNIT_ASSERT_EQUAL(request->Request->URL, "/test"); + + NHttp::THttpOutgoingResponsePtr httpResponse = request->Request->CreateResponseString("HTTP/1.1 200 Found\r\nConnection: Close\r\nTransfer-Encoding: chunked\r\n\r\n6\r\npassed\r\n0\r\n\r\n"); + actorSystem.Send(new NActors::IEventHandle(handle->Sender, serverId, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(httpResponse)), 0, true); + + NHttp::TEvHttpProxy::TEvHttpIncomingResponse* response = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingResponse>(handle); + + UNIT_ASSERT_EQUAL(response->Response->Status, "200"); + UNIT_ASSERT_EQUAL(response->Response->Body, "passed"); + } + + /*Y_UNIT_TEST(AdvancedRunning) { + THolder<NActors::TActorSystemSetup> setup = MakeHolder<NActors::TActorSystemSetup>(); + setup->NodeId = 1; + setup->ExecutorsCount = 1; + setup->Executors = new TAutoPtr<NActors::IExecutorPool>[1]; + setup->Executors[0] = new NActors::TBasicExecutorPool(0, 2, 10); + setup->Scheduler = new NActors::TBasicSchedulerThread(NActors::TSchedulerConfig(512, 100)); + NActors::TActorSystem actorSystem(setup); + actorSystem.Start(); + NHttp::THttpProxy* incomingProxy = new NHttp::THttpProxy(); + NActors::TActorId incomingProxyId = actorSystem.Register(incomingProxy); + actorSystem.Send(incomingProxyId, new NHttp::TEvHttpProxy::TEvAddListeningPort(13337)); + + NHttp::THttpProxy* outgoingProxy = new NHttp::THttpProxy(); + NActors::TActorId outgoingProxyId = actorSystem.Register(outgoingProxy); + + THolder<NHttp::THttpStaticStringRequest> httpRequest = MakeHolder<NHttp::THttpStaticStringRequest>("GET /test HTTP/1.1\r\n\r\n"); + actorSystem.Send(outgoingProxyId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest("[::]:13337", std::move(httpRequest))); + + Sleep(TDuration::Minutes(60)); + }*/ + + Y_UNIT_TEST(TooLongHeader) { + NActors::TTestActorRuntimeBase actorSystem; + TPortManager portManager; + TIpPort port = portManager.GetTcpPort(); + TAutoPtr<NActors::IEventHandle> handle; + actorSystem.Initialize(); + NMonitoring::TMetricRegistry sensors; + + NActors::IActor* proxy = NHttp::CreateHttpProxy(sensors); + NActors::TActorId proxyId = actorSystem.Register(proxy); + actorSystem.Send(new NActors::IEventHandle(proxyId, TActorId(), new NHttp::TEvHttpProxy::TEvAddListeningPort(port)), 0, true); + actorSystem.DispatchEvents(); + + NActors::TActorId serverId = actorSystem.AllocateEdgeActor(); + actorSystem.Send(new NActors::IEventHandle(proxyId, serverId, new NHttp::TEvHttpProxy::TEvRegisterHandler("/test", serverId)), 0, true); + + NActors::TActorId clientId = actorSystem.AllocateEdgeActor(); + NHttp::THttpOutgoingRequestPtr httpRequest = NHttp::THttpOutgoingRequest::CreateRequestGet("http://[::1]:" + ToString(port) + "/test"); + httpRequest->Set("Connection", "close"); + TString longHeader; + longHeader.append(9000, 'X'); + httpRequest->Set(longHeader, "data"); + actorSystem.Send(new NActors::IEventHandle(proxyId, clientId, new NHttp::TEvHttpProxy::TEvHttpOutgoingRequest(httpRequest)), 0, true); + + NHttp::TEvHttpProxy::TEvHttpIncomingResponse* response = actorSystem.GrabEdgeEvent<NHttp::TEvHttpProxy::TEvHttpIncomingResponse>(handle); + + UNIT_ASSERT_EQUAL(response->Response->Status, "400"); + UNIT_ASSERT_EQUAL(response->Response->Body, "Invalid http header"); + } +} diff --git a/library/cpp/actors/http/ut/ya.make b/library/cpp/actors/http/ut/ya.make new file mode 100644 index 0000000000..8b4c04c4d3 --- /dev/null +++ b/library/cpp/actors/http/ut/ya.make @@ -0,0 +1,18 @@ +UNITTEST_FOR(library/cpp/actors/http) + +OWNER(xenoxeno) + +SIZE(SMALL) + +PEERDIR( + library/cpp/actors/testlib +) + +IF (NOT OS_WINDOWS) +SRCS( + http_ut.cpp +) +ELSE() +ENDIF() + +END() diff --git a/library/cpp/actors/http/ya.make b/library/cpp/actors/http/ya.make new file mode 100644 index 0000000000..7ce68b7a75 --- /dev/null +++ b/library/cpp/actors/http/ya.make @@ -0,0 +1,33 @@ +RECURSE_FOR_TESTS(ut) + +LIBRARY() + +OWNER(xenoxeno g:kikimr) + +SRCS( + http_cache.cpp + http_cache.h + http_config.h + http_proxy_acceptor.cpp + http_proxy_incoming.cpp + http_proxy_outgoing.cpp + http_proxy_sock_impl.h + http_proxy_ssl.h + http_proxy.cpp + http_proxy.h + http_static.cpp + http_static.h + http.cpp + http.h +) + +PEERDIR( + contrib/libs/openssl + library/cpp/actors/core + library/cpp/actors/interconnect + library/cpp/dns + library/cpp/monlib/metrics + library/cpp/string_utils/quote +) + +END() diff --git a/library/cpp/actors/interconnect/channel_scheduler.h b/library/cpp/actors/interconnect/channel_scheduler.h new file mode 100644 index 0000000000..551a4cb61a --- /dev/null +++ b/library/cpp/actors/interconnect/channel_scheduler.h @@ -0,0 +1,120 @@ +#pragma once + +#include "interconnect_channel.h" +#include "event_holder_pool.h" + +#include <memory> + +namespace NActors { + + class TChannelScheduler { + const ui32 PeerNodeId; + std::array<std::optional<TEventOutputChannel>, 16> ChannelArray; + THashMap<ui16, TEventOutputChannel> ChannelMap; + std::shared_ptr<IInterconnectMetrics> Metrics; + TEventHolderPool& Pool; + const ui32 MaxSerializedEventSize; + const TSessionParams Params; + + struct THeapItem { + TEventOutputChannel *Channel; + ui64 WeightConsumed = 0; + + friend bool operator <(const THeapItem& x, const THeapItem& y) { + return x.WeightConsumed > y.WeightConsumed; + } + }; + + std::vector<THeapItem> Heap; + + public: + TChannelScheduler(ui32 peerNodeId, const TChannelsConfig& predefinedChannels, + std::shared_ptr<IInterconnectMetrics> metrics, TEventHolderPool& pool, ui32 maxSerializedEventSize, + TSessionParams params) + : PeerNodeId(peerNodeId) + , Metrics(std::move(metrics)) + , Pool(pool) + , MaxSerializedEventSize(maxSerializedEventSize) + , Params(std::move(params)) + { + for (const auto& item : predefinedChannels) { + GetOutputChannel(item.first); + } + } + + TEventOutputChannel *PickChannelWithLeastConsumedWeight() { + Y_VERIFY(!Heap.empty()); + return Heap.front().Channel; + } + + void AddToHeap(TEventOutputChannel& channel, ui64 counter) { + if (channel.IsWorking()) { + ui64 weight = channel.WeightConsumedOnPause; + weight -= Min(weight, counter - channel.EqualizeCounterOnPause); + Heap.push_back(THeapItem{&channel, weight}); + std::push_heap(Heap.begin(), Heap.end()); + } + } + + void FinishPick(ui64 weightConsumed, ui64 counter) { + std::pop_heap(Heap.begin(), Heap.end()); + auto& item = Heap.back(); + item.WeightConsumed += weightConsumed; + if (item.Channel->IsWorking()) { // reschedule + std::push_heap(Heap.begin(), Heap.end()); + } else { // remove from heap + item.Channel->EqualizeCounterOnPause = counter; + item.Channel->WeightConsumedOnPause = item.WeightConsumed; + Heap.pop_back(); + } + } + + TEventOutputChannel& GetOutputChannel(ui16 channel) { + if (channel < ChannelArray.size()) { + auto& res = ChannelArray[channel]; + if (Y_UNLIKELY(!res)) { + res.emplace(Pool, channel, PeerNodeId, MaxSerializedEventSize, Metrics, + Params); + } + return *res; + } else { + auto it = ChannelMap.find(channel); + if (Y_UNLIKELY(it == ChannelMap.end())) { + it = ChannelMap.emplace(std::piecewise_construct, std::forward_as_tuple(channel), + std::forward_as_tuple(Pool, channel, PeerNodeId, MaxSerializedEventSize, + Metrics, Params)).first; + } + return it->second; + } + } + + ui64 Equalize() { + if (Heap.empty()) { + return 0; // nothing to do here -- no working channels + } + + // find the minimum consumed weight among working channels and then adjust weights + ui64 min = Max<ui64>(); + for (THeapItem& item : Heap) { + min = Min(min, item.WeightConsumed); + } + for (THeapItem& item : Heap) { + item.WeightConsumed -= min; + } + return min; + } + + template<typename TCallback> + void ForEach(TCallback&& callback) { + for (auto& channel : ChannelArray) { + if (channel) { + callback(*channel); + } + } + for (auto& [id, channel] : ChannelMap) { + callback(channel); + } + } + }; + +} // NActors diff --git a/library/cpp/actors/interconnect/event_filter.h b/library/cpp/actors/interconnect/event_filter.h new file mode 100644 index 0000000000..47dabf5f16 --- /dev/null +++ b/library/cpp/actors/interconnect/event_filter.h @@ -0,0 +1,72 @@ +#pragma once + +#include <library/cpp/actors/core/event.h> + +namespace NActors { + + enum class ENodeClass { + SYSTEM, + LOCAL_TENANT, + PEER_TENANT, + COUNT + }; + + class TEventFilter : TNonCopyable { + using TRouteMask = ui16; + + TVector<TVector<TRouteMask>> ScopeRoutes; + + public: + TEventFilter() + : ScopeRoutes(65536) + {} + + void RegisterEvent(ui32 type, TRouteMask routes) { + auto& evSpaceIndex = ScopeRoutes[type >> 16]; + const ui16 subtype = type & 65535; + size_t size = (subtype + 512) & ~511; + if (evSpaceIndex.size() < size) { + evSpaceIndex.resize(size); + } + evSpaceIndex[subtype] = routes; + } + + bool CheckIncomingEvent(const IEventHandle& ev, const TScopeId& localScopeId) const { + TRouteMask routes = 0; + if (const auto& evSpaceIndex = ScopeRoutes[ev.Type >> 16]) { + const ui16 subtype = ev.Type & 65535; + routes = subtype < evSpaceIndex.size() ? evSpaceIndex[subtype] : 0; + } else { + routes = ~TRouteMask(); // allow unfilled event spaces by default + } + return routes & MakeRouteMask(GetNodeClass(ev.OriginScopeId, localScopeId), GetNodeClass(localScopeId, ev.OriginScopeId)); + } + + static ENodeClass GetNodeClass(const TScopeId& scopeId, const TScopeId& localScopeId) { + if (scopeId.first == 0) { + // system scope, or null scope + return scopeId.second ? ENodeClass::SYSTEM : ENodeClass::COUNT; + } else if (scopeId == localScopeId) { + return ENodeClass::LOCAL_TENANT; + } else { + return ENodeClass::PEER_TENANT; + } + } + + static TRouteMask MakeRouteMask(ENodeClass from, ENodeClass to) { + if (from == ENodeClass::COUNT || to == ENodeClass::COUNT) { + return 0; + } + return 1U << (static_cast<unsigned>(from) * static_cast<unsigned>(ENodeClass::COUNT) + static_cast<unsigned>(to)); + } + + static TRouteMask MakeRouteMask(std::initializer_list<std::pair<ENodeClass, ENodeClass>> items) { + TRouteMask mask = 0; + for (const auto& p : items) { + mask |= MakeRouteMask(p.first, p.second); + } + return mask; + } + }; + +} // NActors diff --git a/library/cpp/actors/interconnect/event_holder_pool.h b/library/cpp/actors/interconnect/event_holder_pool.h new file mode 100644 index 0000000000..b6090a3bc8 --- /dev/null +++ b/library/cpp/actors/interconnect/event_holder_pool.h @@ -0,0 +1,128 @@ +#pragma once + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include "packet.h" + +namespace NActors { + struct TEvFreeItems : TEventLocal<TEvFreeItems, EventSpaceBegin(TEvents::ES_PRIVATE)> { + static constexpr size_t MaxEvents = 256; + + TList<TTcpPacketOutTask> Items; + std::list<TEventHolder> FreeQueue; + TStackVec<THolder<IEventBase>, MaxEvents> Events; + TStackVec<THolder<TEventSerializedData>, MaxEvents> Buffers; + std::shared_ptr<std::atomic<TAtomicBase>> Counter; + ui64 NumBytes = 0; + + ~TEvFreeItems() { + if (Counter) { + TAtomicBase res = Counter->fetch_sub(NumBytes) - NumBytes; + Y_VERIFY(res >= 0); + } + } + + bool GetInLineForDestruction(const TIntrusivePtr<TInterconnectProxyCommon>& common) { + Y_VERIFY(!Counter); + const auto& counter = common->DestructorQueueSize; + const auto& max = common->MaxDestructorQueueSize; + if (counter && (TAtomicBase)(counter->fetch_add(NumBytes) + NumBytes) > max) { + counter->fetch_sub(NumBytes); + return false; + } + Counter = counter; + return true; + } + }; + + class TEventHolderPool { + using TDestroyCallback = std::function<void(THolder<IEventBase>)>; + + static constexpr size_t MaxFreeQueueItems = 32; + static constexpr size_t FreeQueueTrimThreshold = MaxFreeQueueItems * 2; + static constexpr ui64 MaxBytesPerMessage = 10 * 1024 * 1024; + + TIntrusivePtr<TInterconnectProxyCommon> Common; + std::list<TEventHolder> Cache; + THolder<TEvFreeItems> PendingFreeEvent; + TDestroyCallback DestroyCallback; + + public: + TEventHolderPool(TIntrusivePtr<TInterconnectProxyCommon> common, + TDestroyCallback destroyCallback) + : Common(std::move(common)) + , DestroyCallback(std::move(destroyCallback)) + {} + + TEventHolder& Allocate(std::list<TEventHolder>& queue) { + if (Cache.empty()) { + queue.emplace_back(); + } else { + queue.splice(queue.end(), Cache, Cache.begin()); + } + return queue.back(); + } + + void Release(std::list<TEventHolder>& queue) { + for (auto it = queue.begin(); it != queue.end(); ) { + Release(queue, it++); + } + } + + void Release(std::list<TEventHolder>& queue, std::list<TEventHolder>::iterator event) { + bool trim = false; + + // release held event, if any + if (THolder<IEventBase> ev = std::move(event->Event)) { + auto p = GetPendingEvent(); + p->NumBytes += event->EventSerializedSize; + auto& events = p->Events; + events.push_back(std::move(ev)); + trim = trim || events.size() >= TEvFreeItems::MaxEvents || p->NumBytes >= MaxBytesPerMessage; + } + + // release buffer, if any + if (event->Buffer && event->Buffer.RefCount() == 1) { + auto p = GetPendingEvent(); + p->NumBytes += event->EventSerializedSize; + auto& buffers = p->Buffers; + buffers.emplace_back(event->Buffer.Release()); + trim = trim || buffers.size() >= TEvFreeItems::MaxEvents || p->NumBytes >= MaxBytesPerMessage; + } + + // free event and trim the cache if its size is exceeded + event->Clear(); + Cache.splice(Cache.end(), queue, event); + if (Cache.size() >= FreeQueueTrimThreshold) { + auto& freeQueue = GetPendingEvent()->FreeQueue; + auto it = Cache.begin(); + std::advance(it, Cache.size() - MaxFreeQueueItems); + freeQueue.splice(freeQueue.end(), Cache, Cache.begin(), it); + trim = true; + } + + // release items if we have hit the limit + if (trim) { + Trim(); + } + } + + void Trim() { + if (auto ev = std::move(PendingFreeEvent); ev && ev->GetInLineForDestruction(Common)) { + DestroyCallback(std::move(ev)); + } + + // ensure it is dropped + PendingFreeEvent.Reset(); + } + + private: + TEvFreeItems* GetPendingEvent() { + if (!PendingFreeEvent) { + PendingFreeEvent.Reset(new TEvFreeItems); + } + return PendingFreeEvent.Get(); + } + }; + +} diff --git a/library/cpp/actors/interconnect/events_local.h b/library/cpp/actors/interconnect/events_local.h new file mode 100644 index 0000000000..8a46ffd535 --- /dev/null +++ b/library/cpp/actors/interconnect/events_local.h @@ -0,0 +1,403 @@ +#pragma once + +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/event_local.h> +#include <library/cpp/actors/protos/interconnect.pb.h> +#include <util/generic/deque.h> +#include <util/network/address.h> + +#include "interconnect_stream.h" +#include "packet.h" +#include "types.h" + +namespace NActors { + struct TProgramInfo { + ui64 PID = 0; + ui64 StartTime = 0; + ui64 Serial = 0; + }; + + enum class ENetwork : ui32 { + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // local messages + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + Start = EventSpaceBegin(TEvents::ES_INTERCONNECT_TCP), + + SocketReadyRead = Start, + SocketReadyWrite, + SocketError, + Connect, + Disconnect, + IncomingConnection, + HandshakeAsk, + HandshakeAck, + HandshakeNak, + HandshakeDone, + HandshakeFail, + Kick, + Flush, + NodeInfo, + BunchOfEventsToDestroy, + HandshakeRequest, + HandshakeReplyOK, + HandshakeReplyError, + ResolveAddress, + AddressInfo, + ResolveError, + HTTPStreamStatus, + HTTPSendContent, + ConnectProtocolWakeup, + HTTPProtocolRetry, + EvPollerRegister, + EvPollerRegisterResult, + EvPollerReady, + EvUpdateFromInputSession, + EvConfirmUpdate, + EvSessionBufferSizeRequest, + EvSessionBufferSizeResponse, + EvProcessPingRequest, + EvGetSecureSocket, + EvSecureSocket, + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // nonlocal messages; their indices must be preserved in order to work properly while doing rolling update + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // interconnect load test message + EvLoadMessage = Start + 256, + }; + + struct TEvSocketReadyRead: public TEventLocal<TEvSocketReadyRead, ui32(ENetwork::SocketReadyRead)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyRead, "Network: TEvSocketReadyRead") + }; + + struct TEvSocketReadyWrite: public TEventLocal<TEvSocketReadyWrite, ui32(ENetwork::SocketReadyWrite)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyWrite, "Network: TEvSocketReadyWrite") + }; + + struct TEvSocketError: public TEventLocal<TEvSocketError, ui32(ENetwork::SocketError)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketError, ::strerror(Error)) + TString GetReason() const { + return ::strerror(Error); + } + const int Error; + TIntrusivePtr<NInterconnect::TStreamSocket> Socket; + + TEvSocketError(int error, TIntrusivePtr<NInterconnect::TStreamSocket> sock) + : Error(error) + , Socket(std::move(sock)) + { + } + }; + + struct TEvSocketConnect: public TEventLocal<TEvSocketConnect, ui32(ENetwork::Connect)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketConnect, "Network: TEvSocketConnect") + }; + + struct TEvSocketDisconnect: public TEventLocal<TEvSocketDisconnect, ui32(ENetwork::Disconnect)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketDisconnect, "Network: TEvSocketDisconnect") + TDisconnectReason Reason; + + TEvSocketDisconnect(TDisconnectReason reason) + : Reason(std::move(reason)) + { + } + }; + + struct TEvHandshakeAsk: public TEventLocal<TEvHandshakeAsk, ui32(ENetwork::HandshakeAsk)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeAsk, "Network: TEvHandshakeAsk") + TEvHandshakeAsk(const TActorId& self, + const TActorId& peer, + ui64 counter) + : Self(self) + , Peer(peer) + , Counter(counter) + { + } + const TActorId Self; + const TActorId Peer; + const ui64 Counter; + }; + + struct TEvHandshakeAck: public TEventLocal<TEvHandshakeAck, ui32(ENetwork::HandshakeAck)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeAck, "Network: TEvHandshakeAck") + + TEvHandshakeAck(const TActorId& self, ui64 nextPacket, TSessionParams params) + : Self(self) + , NextPacket(nextPacket) + , Params(std::move(params)) + {} + + const TActorId Self; + const ui64 NextPacket; + const TSessionParams Params; + }; + + struct TEvHandshakeNak : TEventLocal<TEvHandshakeNak, ui32(ENetwork::HandshakeNak)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyRead, "Network: TEvHandshakeNak") + }; + + struct TEvHandshakeRequest + : public TEventLocal<TEvHandshakeRequest, + ui32(ENetwork::HandshakeRequest)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeRequest, + "Network: TEvHandshakeRequest") + + NActorsInterconnect::THandshakeRequest Record; + }; + + struct TEvHandshakeReplyOK + : public TEventLocal<TEvHandshakeReplyOK, + ui32(ENetwork::HandshakeReplyOK)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeReplyOK, + "Network: TEvHandshakeReplyOK") + + NActorsInterconnect::THandshakeReply Record; + }; + + struct TEvHandshakeReplyError + : public TEventLocal<TEvHandshakeReplyError, + ui32(ENetwork::HandshakeReplyError)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeReplyError, + "Network: TEvHandshakeReplyError") + + TEvHandshakeReplyError(TString error) { + Record.SetErrorExplaination(error); + } + + NActorsInterconnect::THandshakeReply Record; + }; + + struct TEvIncomingConnection: public TEventLocal<TEvIncomingConnection, ui32(ENetwork::IncomingConnection)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvIncomingConnection, "Network: TEvIncomingConnection") + TIntrusivePtr<NInterconnect::TStreamSocket> Socket; + NInterconnect::TAddress Address; + + TEvIncomingConnection(TIntrusivePtr<NInterconnect::TStreamSocket> socket, NInterconnect::TAddress address) + : Socket(std::move(socket)) + , Address(std::move(address)) + {} + }; + + struct TEvHandshakeDone: public TEventLocal<TEvHandshakeDone, ui32(ENetwork::HandshakeDone)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeDone, "Network: TEvHandshakeDone") + + TEvHandshakeDone( + TIntrusivePtr<NInterconnect::TStreamSocket> socket, + const TActorId& peer, + const TActorId& self, + ui64 nextPacket, + TAutoPtr<TProgramInfo>&& programInfo, + TSessionParams params) + : Socket(std::move(socket)) + , Peer(peer) + , Self(self) + , NextPacket(nextPacket) + , ProgramInfo(std::move(programInfo)) + , Params(std::move(params)) + { + } + + TIntrusivePtr<NInterconnect::TStreamSocket> Socket; + const TActorId Peer; + const TActorId Self; + const ui64 NextPacket; + TAutoPtr<TProgramInfo> ProgramInfo; + const TSessionParams Params; + }; + + struct TEvHandshakeFail: public TEventLocal<TEvHandshakeFail, ui32(ENetwork::HandshakeFail)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeFail, "Network: TEvHandshakeFail") + + enum EnumHandshakeFail { + HANDSHAKE_FAIL_TRANSIENT, + HANDSHAKE_FAIL_PERMANENT, + HANDSHAKE_FAIL_SESSION_MISMATCH, + }; + + TEvHandshakeFail(EnumHandshakeFail temporary, TString explanation) + : Temporary(temporary) + , Explanation(std::move(explanation)) + { + } + + const EnumHandshakeFail Temporary; + const TString Explanation; + }; + + struct TEvKick: public TEventLocal<TEvKick, ui32(ENetwork::Kick)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvKick, "Network: TEvKick") + }; + + struct TEvFlush: public TEventLocal<TEvFlush, ui32(ENetwork::Flush)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvFlush, "Network: TEvFlush") + }; + + struct TEvLocalNodeInfo + : public TEventLocal<TEvLocalNodeInfo, ui32(ENetwork::NodeInfo)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvLocalNodeInfo, "Network: TEvLocalNodeInfo") + + ui32 NodeId; + NAddr::IRemoteAddrPtr Address; + }; + + struct TEvBunchOfEventsToDestroy : TEventLocal<TEvBunchOfEventsToDestroy, ui32(ENetwork::BunchOfEventsToDestroy)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvBunchOfEventsToDestroy, + "Network: TEvBunchOfEventsToDestroy") + + TEvBunchOfEventsToDestroy(TDeque<TAutoPtr<IEventBase>> events) + : Events(std::move(events)) + { + } + + TDeque<TAutoPtr<IEventBase>> Events; + }; + + struct TEvResolveAddress + : public TEventLocal<TEvResolveAddress, ui32(ENetwork::ResolveAddress)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvResolveAddress, "Network: TEvResolveAddress") + + TString Address; + ui16 Port; + }; + + struct TEvAddressInfo + : public TEventLocal<TEvAddressInfo, ui32(ENetwork::AddressInfo)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvAddressInfo, "Network: TEvAddressInfo") + + NAddr::IRemoteAddrPtr Address; + }; + + struct TEvResolveError + : public TEventLocal<TEvResolveError, ui32(ENetwork::ResolveError)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvResolveError, "Network: TEvResolveError") + + TString Explain; + }; + + struct TEvHTTPStreamStatus + : public TEventLocal<TEvHTTPStreamStatus, ui32(ENetwork::HTTPStreamStatus)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPStreamStatus, + "Network: TEvHTTPStreamStatus") + enum EStatus { + READY, + COMPLETE, + ERROR, + }; + + EStatus Status; + TString Error; + TString HttpHeaders; + }; + + struct TEvHTTPSendContent + : public TEventLocal<TEvHTTPSendContent, ui32(ENetwork::HTTPSendContent)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPSendContent, "Network: TEvHTTPSendContent") + + const char* Data; + size_t Len; + bool Last; + }; + + struct TEvConnectWakeup + : public TEventLocal<TEvConnectWakeup, + ui32(ENetwork::ConnectProtocolWakeup)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvConnectWakeup, "Protocols: TEvConnectWakeup") + }; + + struct TEvHTTPProtocolRetry + : public TEventLocal<TEvHTTPProtocolRetry, + ui32(ENetwork::HTTPProtocolRetry)> { + DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPProtocolRetry, + "Protocols: TEvHTTPProtocolRetry") + }; + + struct TEvLoadMessage + : TEventPB<TEvLoadMessage, NActorsInterconnect::TEvLoadMessage, static_cast<ui32>(ENetwork::EvLoadMessage)> { + TEvLoadMessage() = default; + + template <typename TContainer> + TEvLoadMessage(const TContainer& route, const TString& id, const TString* payload) { + for (const TActorId& actorId : route) { + auto* hop = Record.AddHops(); + if (actorId) { + ActorIdToProto(actorId, hop->MutableNextHop()); + } + } + Record.SetId(id); + if (payload) { + Record.SetPayload(*payload); + } + } + + template <typename TContainer> + TEvLoadMessage(const TContainer& route, const TString& id, TRope&& payload) { + for (const TActorId& actorId : route) { + auto* hop = Record.AddHops(); + if (actorId) { + ActorIdToProto(actorId, hop->MutableNextHop()); + } + } + Record.SetId(id); + AddPayload(std::move(payload)); + } + }; + + struct TEvUpdateFromInputSession : TEventLocal<TEvUpdateFromInputSession, static_cast<ui32>(ENetwork::EvUpdateFromInputSession)> { + ui64 ConfirmedByInput; // latest Confirm value from processed input packet + ui64 NumDataBytes; + TDuration Ping; + + TEvUpdateFromInputSession(ui64 confirmedByInput, ui64 numDataBytes, TDuration ping) + : ConfirmedByInput(confirmedByInput) + , NumDataBytes(numDataBytes) + , Ping(ping) + { + } + }; + + struct TEvConfirmUpdate : TEventLocal<TEvConfirmUpdate, static_cast<ui32>(ENetwork::EvConfirmUpdate)> + {}; + + struct TEvSessionBufferSizeRequest : TEventLocal<TEvSessionBufferSizeRequest, static_cast<ui32>(ENetwork::EvSessionBufferSizeRequest)> { + //DEFINE_SIMPLE_LOCAL_EVENT(TEvSessionBufferSizeRequest, "Session: TEvSessionBufferSizeRequest") + DEFINE_SIMPLE_LOCAL_EVENT(TEvSessionBufferSizeRequest, "Network: TEvSessionBufferSizeRequest"); + }; + + struct TEvSessionBufferSizeResponse : TEventLocal<TEvSessionBufferSizeResponse, static_cast<ui32>(ENetwork::EvSessionBufferSizeResponse)> { + TEvSessionBufferSizeResponse(const TActorId& sessionId, ui64 outputBufferSize) + : SessionID(sessionId) + , BufferSize(outputBufferSize) + { + } + + TActorId SessionID; + ui64 BufferSize; + }; + + struct TEvProcessPingRequest : TEventLocal<TEvProcessPingRequest, static_cast<ui32>(ENetwork::EvProcessPingRequest)> { + const ui64 Payload; + + TEvProcessPingRequest(ui64 payload) + : Payload(payload) + {} + }; + + struct TEvGetSecureSocket : TEventLocal<TEvGetSecureSocket, (ui32)ENetwork::EvGetSecureSocket> { + TIntrusivePtr<NInterconnect::TStreamSocket> Socket; + + TEvGetSecureSocket(TIntrusivePtr<NInterconnect::TStreamSocket> socket) + : Socket(std::move(socket)) + {} + }; + + struct TEvSecureSocket : TEventLocal<TEvSecureSocket, (ui32)ENetwork::EvSecureSocket> { + TIntrusivePtr<NInterconnect::TSecureSocket> Socket; + + TEvSecureSocket(TIntrusivePtr<NInterconnect::TSecureSocket> socket) + : Socket(std::move(socket)) + {} + }; + +} diff --git a/library/cpp/actors/interconnect/interconnect.h b/library/cpp/actors/interconnect/interconnect.h new file mode 100644 index 0000000000..225a5243fd --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect.h @@ -0,0 +1,179 @@ +#pragma once + +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/core/interconnect.h> +#include <util/generic/map.h> +#include <util/network/address.h> + +namespace NActors { + struct TInterconnectGlobalState: public TThrRefBase { + TString SelfAddress; + ui32 SelfPort; + + TVector<TActorId> GlobalNameservers; // todo: add some info about (like expected reply time) + }; + + struct TInterconnectProxySetup: public TThrRefBase { + // synchronous (session -> proxy) + struct IProxy : TNonCopyable { + virtual ~IProxy() { + } + + virtual void ActivateSession(const TActorContext& ctx) = 0; // session activated + virtual void DetachSession(const TActorContext& ctx) = 0; // session is dead + }; + + // synchronous (proxy -> session) + struct ISession : TNonCopyable { + virtual ~ISession() { + } + + virtual void DetachSession(const TActorContext& ownerCtx, const TActorContext& sessionCtx) = 0; // kill yourself + virtual void ForwardPacket(TAutoPtr<IEventHandle>& ev, const TActorContext& ownerCtx, const TActorContext& sessionCtx) = 0; // receive packet for forward + virtual void Connect(const TActorContext& ownerCtx, const TActorContext& sessionCtx) = 0; // begin connection + virtual bool ReceiveIncomingSession(TAutoPtr<IEventHandle>& ev, const TActorContext& ownerCtx, const TActorContext& sessionCtx) = 0; // handle incoming session, if returns true - then session is dead and must be recreated with new one + }; + + ui32 DestinationNode; + + TString StaticAddress; // if set - would be used as main destination address + int StaticPort; + + TIntrusivePtr<TInterconnectGlobalState> GlobalState; + + virtual IActor* CreateSession(const TActorId& ownerId, IProxy* owner) = 0; // returned actor is session and would be attached to same mailbox as proxy to allow sync calls + virtual TActorSetupCmd CreateAcceptor() = 0; + }; + + struct TNameserverSetup { + TActorId ServiceID; + + TIntrusivePtr<TInterconnectGlobalState> GlobalState; + }; + + struct TTableNameserverSetup: public TThrRefBase { + struct TNodeInfo { + TString Address; + TString Host; + TString ResolveHost; + ui16 Port; + TNodeLocation Location; + TString& first; + ui16& second; + + TNodeInfo() + : first(Address) + , second(Port) + { + } + + TNodeInfo(const TNodeInfo&) = default; + + // for testing purposes only + TNodeInfo(const TString& address, const TString& host, ui16 port) + : TNodeInfo() + { + Address = address; + Host = host; + ResolveHost = host; + Port = port; + } + + TNodeInfo(const TString& address, + const TString& host, + const TString& resolveHost, + ui16 port, + const TNodeLocation& location) + : TNodeInfo() + { + Address = address; + Host = host; + ResolveHost = resolveHost; + Port = port; + Location = location; + } + + // for testing purposes only + TNodeInfo& operator=(const std::pair<TString, ui32>& pr) { + Address = pr.first; + Host = pr.first; + ResolveHost = pr.first; + Port = pr.second; + return *this; + } + + TNodeInfo& operator=(const TNodeInfo& ni) { + Address = ni.Address; + Host = ni.Host; + ResolveHost = ni.ResolveHost; + Port = ni.Port; + Location = ni.Location; + return *this; + } + }; + + TMap<ui32, TNodeInfo> StaticNodeTable; + + bool IsEntriesUnique() const; + }; + + struct TNodeRegistrarSetup { + TActorId ServiceID; + + TIntrusivePtr<TInterconnectGlobalState> GlobalState; + }; + + TActorId GetNameserviceActorId(); + + /** + * Const table-lookup based name service + */ + + IActor* CreateNameserverTable( + const TIntrusivePtr<TTableNameserverSetup>& setup, + ui32 poolId = 0); + + /** + * Name service which can be paired with external discovery service. + * Copies information from setup on the start (table may be empty). + * Handles TEvNodesInfo to change list of known nodes. + * + * If PendingPeriod is not zero, wait for unknown nodeId + */ + + IActor* CreateDynamicNameserver( + const TIntrusivePtr<TTableNameserverSetup>& setup, + const TDuration& pendingPeriod = TDuration::Zero(), + ui32 poolId = 0); + + /** + * Creates an actor that resolves host/port and replies with either: + * + * - TEvLocalNodeInfo on success + * - TEvResolveError on errors + * + * Optional defaultAddress may be used as fallback. + */ + IActor* CreateResolveActor( + const TString& host, ui16 port, ui32 nodeId, const TString& defaultAddress, + const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline); + + inline IActor* CreateResolveActor( + ui32 nodeId, const TTableNameserverSetup::TNodeInfo& nodeInfo, + const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline) + { + return CreateResolveActor(nodeInfo.ResolveHost, nodeInfo.Port, nodeId, nodeInfo.Address, + replyTo, replyFrom, deadline); + } + + /** + * Creates an actor that resolves host/port and replies with either: + * + * - TEvAddressInfo on success + * - TEvResolveError on errors + */ + IActor* CreateResolveActor( + const TString& host, ui16 port, + const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline); + +} diff --git a/library/cpp/actors/interconnect/interconnect_address.cpp b/library/cpp/actors/interconnect/interconnect_address.cpp new file mode 100644 index 0000000000..8f474f5a39 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_address.cpp @@ -0,0 +1,94 @@ +#include "interconnect_address.h" + +#include <util/string/cast.h> +#include <util/system/file.h> + +#if defined(_linux_) +#include <sys/un.h> +#include <sys/stat.h> +#endif + +namespace NInterconnect { + TAddress::TAddress() { + memset(&Addr, 0, sizeof(Addr)); + } + + TAddress::TAddress(NAddr::IRemoteAddr& addr) { + socklen_t len = addr.Len(); + Y_VERIFY(len <= sizeof(Addr)); + memcpy(&Addr.Generic, addr.Addr(), len); + } + + int TAddress::GetFamily() const { + return Addr.Generic.sa_family; + } + + socklen_t TAddress::Size() const { + switch (Addr.Generic.sa_family) { + case AF_INET6: + return sizeof(sockaddr_in6); + case AF_INET: + return sizeof(sockaddr_in); + default: + return 0; + } + } + + sockaddr* TAddress::SockAddr() { + return &Addr.Generic; + } + + const sockaddr* TAddress::SockAddr() const { + return &Addr.Generic; + } + + ui16 TAddress::GetPort() const { + switch (Addr.Generic.sa_family) { + case AF_INET6: + return ntohs(Addr.Ipv6.sin6_port); + case AF_INET: + return ntohs(Addr.Ipv4.sin_port); + default: + return 0; + } + } + + TString TAddress::ToString() const { + return GetAddress() + ":" + ::ToString(GetPort()); + } + + TAddress::TAddress(const char* addr, ui16 port) { + memset(&Addr, 0, sizeof(Addr)); + if (inet_pton(Addr.Ipv6.sin6_family = AF_INET6, addr, &Addr.Ipv6.sin6_addr)) { + Addr.Ipv6.sin6_port = htons(port); + } else if (inet_pton(Addr.Ipv4.sin_family = AF_INET, addr, &Addr.Ipv4.sin_addr)) { + Addr.Ipv4.sin_port = htons(port); + } + } + + TAddress::TAddress(const TString& addr, ui16 port) + : TAddress(addr.data(), port) + {} + + TString TAddress::GetAddress() const { + const void *src; + socklen_t size; + + switch (Addr.Generic.sa_family) { + case AF_INET6: + std::tie(src, size) = std::make_tuple(&Addr.Ipv6.sin6_addr, INET6_ADDRSTRLEN); + break; + + case AF_INET: + std::tie(src, size) = std::make_tuple(&Addr.Ipv4.sin_addr, INET_ADDRSTRLEN); + break; + + default: + return TString(); + } + + char *buffer = static_cast<char*>(alloca(size)); + const char *p = inet_ntop(Addr.Generic.sa_family, const_cast<void*>(src), buffer, size); + return p ? TString(p) : TString(); + } +} diff --git a/library/cpp/actors/interconnect/interconnect_address.h b/library/cpp/actors/interconnect/interconnect_address.h new file mode 100644 index 0000000000..e9e0faec81 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_address.h @@ -0,0 +1,29 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/network/init.h> +#include <util/network/address.h> +#include <util/generic/string.h> + +namespace NInterconnect { + class TAddress { + union { + sockaddr Generic; + sockaddr_in Ipv4; + sockaddr_in6 Ipv6; + } Addr; + + public: + TAddress(); + TAddress(const char* addr, ui16 port); + TAddress(const TString& addr, ui16 port); + TAddress(NAddr::IRemoteAddr& addr); + int GetFamily() const; + socklen_t Size() const; + ::sockaddr* SockAddr(); + const ::sockaddr* SockAddr() const; + ui16 GetPort() const; + TString GetAddress() const; + TString ToString() const; + }; +} diff --git a/library/cpp/actors/interconnect/interconnect_channel.cpp b/library/cpp/actors/interconnect/interconnect_channel.cpp new file mode 100644 index 0000000000..a66ba2a154 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_channel.cpp @@ -0,0 +1,176 @@ +#include "interconnect_channel.h" + +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/executor_thread.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/core/probes.h> +#include <library/cpp/actors/protos/services_common.pb.h> +#include <library/cpp/actors/prof/tag.h> +#include <library/cpp/digest/crc32c/crc32c.h> + +LWTRACE_USING(ACTORLIB_PROVIDER); + +namespace NActors { + DECLARE_WILSON_EVENT(EventSentToSocket); + DECLARE_WILSON_EVENT(EventReceivedFromSocket); + + bool TEventOutputChannel::FeedDescriptor(TTcpPacketOutTask& task, TEventHolder& event, ui64 *weightConsumed) { + const size_t amount = sizeof(TChannelPart) + sizeof(TEventDescr); + if (task.GetVirtualFreeAmount() < amount) { + return false; + } + + NWilson::TTraceId traceId(event.Descr.TraceId); +// if (ctx) { +// WILSON_TRACE(*ctx, &traceId, EventSentToSocket); +// } + traceId.Serialize(&event.Descr.TraceId); + LWTRACK(SerializeToPacketEnd, event.Orbit, PeerNodeId, ChannelId, OutputQueueSize, task.GetDataSize()); + task.Orbit.Take(event.Orbit); + + event.Descr.Flags = (event.Descr.Flags & ~IEventHandle::FlagForwardOnNondelivery) | + (ExtendedFormat ? IEventHandle::FlagExtendedFormat : 0); + + TChannelPart *part = static_cast<TChannelPart*>(task.GetFreeArea()); + part->Channel = ChannelId | TChannelPart::LastPartFlag; + part->Size = sizeof(TEventDescr); + memcpy(part + 1, &event.Descr, sizeof(TEventDescr)); + task.AppendBuf(part, amount); + *weightConsumed += amount; + OutputQueueSize -= part->Size; + Metrics->UpdateOutputChannelEvents(ChannelId); + + return true; + } + + void TEventOutputChannel::DropConfirmed(ui64 confirm) { + LOG_DEBUG_IC_SESSION("ICOCH98", "Dropping confirmed messages"); + for (auto it = NotYetConfirmed.begin(); it != NotYetConfirmed.end() && it->Serial <= confirm; ) { + Pool.Release(NotYetConfirmed, it++); + } + } + + bool TEventOutputChannel::FeedBuf(TTcpPacketOutTask& task, ui64 serial, ui64 *weightConsumed) { + for (;;) { + Y_VERIFY(!Queue.empty()); + TEventHolder& event = Queue.front(); + + switch (State) { + case EState::INITIAL: + event.InitChecksum(); + LWTRACK(SerializeToPacketBegin, event.Orbit, PeerNodeId, ChannelId, OutputQueueSize); + if (event.Event) { + State = EState::CHUNKER; + IEventBase *base = event.Event.Get(); + Chunker.SetSerializingEvent(base); + ExtendedFormat = base->IsExtendedFormat(); + } else if (event.Buffer) { + State = EState::BUFFER; + Iter = event.Buffer->GetBeginIter(); + ExtendedFormat = event.Buffer->IsExtendedFormat(); + } else { + State = EState::DESCRIPTOR; + ExtendedFormat = false; + } + break; + + case EState::CHUNKER: + case EState::BUFFER: { + size_t maxBytes = task.GetVirtualFreeAmount(); + if (maxBytes <= sizeof(TChannelPart)) { + return false; + } + + TChannelPart *part = static_cast<TChannelPart*>(task.GetFreeArea()); + part->Channel = ChannelId; + part->Size = 0; + task.AppendBuf(part, sizeof(TChannelPart)); + maxBytes -= sizeof(TChannelPart); + Y_VERIFY(maxBytes); + + auto addChunk = [&](const void *data, size_t len) { + event.UpdateChecksum(Params, data, len); + task.AppendBuf(data, len); + part->Size += len; + Y_VERIFY_DEBUG(maxBytes >= len); + maxBytes -= len; + + event.EventActuallySerialized += len; + if (event.EventActuallySerialized > MaxSerializedEventSize) { + throw TExSerializedEventTooLarge(event.Descr.Type); + } + }; + + bool complete = false; + if (State == EState::CHUNKER) { + Y_VERIFY_DEBUG(task.GetFreeArea() == part + 1); + while (!complete && maxBytes) { + const auto [first, last] = Chunker.FeedBuf(task.GetFreeArea(), maxBytes); + for (auto p = first; p != last; ++p) { + addChunk(p->first, p->second); + } + complete = Chunker.IsComplete(); + } + Y_VERIFY(!complete || Chunker.IsSuccessfull()); + Y_VERIFY_DEBUG(complete || !maxBytes); + } else { // BUFFER + while (const size_t numb = Min(maxBytes, Iter.ContiguousSize())) { + const char *obuf = Iter.ContiguousData(); + addChunk(obuf, numb); + Iter += numb; + } + complete = !Iter.Valid(); + } + if (complete) { + Y_VERIFY(event.EventActuallySerialized == event.EventSerializedSize, + "EventActuallySerialized# %" PRIu32 " EventSerializedSize# %" PRIu32 " Type# 0x%08" PRIx32, + event.EventActuallySerialized, event.EventSerializedSize, event.Descr.Type); + } + + if (!part->Size) { + task.Undo(sizeof(TChannelPart)); + } else { + *weightConsumed += sizeof(TChannelPart) + part->Size; + OutputQueueSize -= part->Size; + } + if (complete) { + State = EState::DESCRIPTOR; + } + break; + } + + case EState::DESCRIPTOR: + if (!FeedDescriptor(task, event, weightConsumed)) { + return false; + } + event.Serial = serial; + NotYetConfirmed.splice(NotYetConfirmed.end(), Queue, Queue.begin()); // move event to not-yet-confirmed queue + State = EState::INITIAL; + return true; // we have processed whole event, signal to the caller + } + } + } + + void TEventOutputChannel::NotifyUndelivered() { + LOG_DEBUG_IC_SESSION("ICOCH89", "Notyfying about Undelivered messages! NotYetConfirmed size: %zu, Queue size: %zu", NotYetConfirmed.size(), Queue.size()); + if (State == EState::CHUNKER) { + Y_VERIFY(!Chunker.IsComplete()); // chunk must have an event being serialized + Y_VERIFY(!Queue.empty()); // this event must be the first event in queue + TEventHolder& event = Queue.front(); + Y_VERIFY(Chunker.GetCurrentEvent() == event.Event.Get()); // ensure the event is valid + Chunker.Abort(); // stop serializing current event + Y_VERIFY(Chunker.IsComplete()); + } + for (auto& item : NotYetConfirmed) { + if (item.Descr.Flags & IEventHandle::FlagGenerateUnsureUndelivered) { // notify only when unsure flag is set + item.ForwardOnNondelivery(true); + } + } + Pool.Release(NotYetConfirmed); + for (auto& item : Queue) { + item.ForwardOnNondelivery(false); + } + Pool.Release(Queue); + } + +} diff --git a/library/cpp/actors/interconnect/interconnect_channel.h b/library/cpp/actors/interconnect/interconnect_channel.h new file mode 100644 index 0000000000..e4a0ae3cda --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_channel.h @@ -0,0 +1,127 @@ +#pragma once + +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/core/event_load.h> +#include <library/cpp/actors/util/rope.h> +#include <util/generic/deque.h> +#include <util/generic/vector.h> +#include <util/generic/map.h> +#include <util/stream/walk.h> +#include <library/cpp/actors/wilson/wilson_event.h> +#include <library/cpp/actors/helpers/mon_histogram_helper.h> + +#include "interconnect_common.h" +#include "interconnect_counters.h" +#include "packet.h" +#include "event_holder_pool.h" + +namespace NActors { +#pragma pack(push, 1) + struct TChannelPart { + ui16 Channel; + ui16 Size; + + static constexpr ui16 LastPartFlag = ui16(1) << 15; + + TString ToString() const { + return TStringBuilder() << "{Channel# " << (Channel & ~LastPartFlag) + << " LastPartFlag# " << ((Channel & LastPartFlag) ? "true" : "false") + << " Size# " << Size << "}"; + } + }; +#pragma pack(pop) + + struct TExSerializedEventTooLarge : std::exception { + const ui32 Type; + + TExSerializedEventTooLarge(ui32 type) + : Type(type) + {} + }; + + class TEventOutputChannel : public TInterconnectLoggingBase { + public: + TEventOutputChannel(TEventHolderPool& pool, ui16 id, ui32 peerNodeId, ui32 maxSerializedEventSize, + std::shared_ptr<IInterconnectMetrics> metrics, TSessionParams params) + : TInterconnectLoggingBase(Sprintf("OutputChannel %" PRIu16 " [node %" PRIu32 "]", id, peerNodeId)) + , Pool(pool) + , PeerNodeId(peerNodeId) + , ChannelId(id) + , Metrics(std::move(metrics)) + , Params(std::move(params)) + , MaxSerializedEventSize(maxSerializedEventSize) + {} + + ~TEventOutputChannel() { + } + + std::pair<ui32, TEventHolder*> Push(IEventHandle& ev) { + TEventHolder& event = Pool.Allocate(Queue); + const ui32 bytes = event.Fill(ev) + sizeof(TEventDescr); + OutputQueueSize += bytes; + return std::make_pair(bytes, &event); + } + + void DropConfirmed(ui64 confirm); + + bool FeedBuf(TTcpPacketOutTask& task, ui64 serial, ui64 *weightConsumed); + + bool IsEmpty() const { + return Queue.empty(); + } + + bool IsWorking() const { + return !IsEmpty(); + } + + ui32 GetQueueSize() const { + return (ui32)Queue.size(); + } + + ui64 GetBufferedAmountOfData() const { + return OutputQueueSize; + } + + void NotifyUndelivered(); + + TEventHolderPool& Pool; + const ui32 PeerNodeId; + const ui16 ChannelId; + std::shared_ptr<IInterconnectMetrics> Metrics; + const TSessionParams Params; + const ui32 MaxSerializedEventSize; + ui64 UnaccountedTraffic = 0; + ui64 EqualizeCounterOnPause = 0; + ui64 WeightConsumedOnPause = 0; + + enum class EState { + INITIAL, + CHUNKER, + BUFFER, + DESCRIPTOR, + }; + EState State = EState::INITIAL; + + static constexpr ui16 MinimumFreeSpace = sizeof(TChannelPart) + sizeof(TEventDescr); + + protected: + ui64 OutputQueueSize = 0; + + std::list<TEventHolder> Queue; + std::list<TEventHolder> NotYetConfirmed; + TRope::TConstIterator Iter; + TCoroutineChunkSerializer Chunker; + bool ExtendedFormat = false; + + bool FeedDescriptor(TTcpPacketOutTask& task, TEventHolder& event, ui64 *weightConsumed); + + void AccountTraffic() { + if (const ui64 amount = std::exchange(UnaccountedTraffic, 0)) { + Metrics->UpdateOutputChannelTraffic(ChannelId, amount); + } + } + + friend class TInterconnectSessionTCP; + }; +} diff --git a/library/cpp/actors/interconnect/interconnect_common.h b/library/cpp/actors/interconnect/interconnect_common.h new file mode 100644 index 0000000000..285709a00c --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_common.h @@ -0,0 +1,106 @@ +#pragma once + +#include <library/cpp/actors/core/actorid.h> +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/monlib/metrics/metric_registry.h> +#include <util/generic/map.h> +#include <util/generic/set.h> +#include <util/system/datetime.h> + +#include "poller_tcp.h" +#include "logging.h" +#include "event_filter.h" + +#include <atomic> + +namespace NActors { + enum class EEncryptionMode { + DISABLED, // no encryption is required at all + OPTIONAL, // encryption is enabled when supported by both peers + REQUIRED, // encryption is mandatory + }; + + struct TInterconnectSettings { + TDuration Handshake; + TDuration DeadPeer; + TDuration CloseOnIdle; + ui32 SendBufferDieLimitInMB = 0; + ui64 OutputBuffersTotalSizeLimitInMB = 0; + ui32 TotalInflightAmountOfData = 0; + bool MergePerPeerCounters = false; + bool MergePerDataCenterCounters = false; + ui32 TCPSocketBufferSize = 0; + TDuration PingPeriod = TDuration::Seconds(3); + TDuration ForceConfirmPeriod = TDuration::Seconds(1); + TDuration LostConnection; + TDuration BatchPeriod; + bool BindOnAllAddresses = true; + EEncryptionMode EncryptionMode = EEncryptionMode::DISABLED; + bool TlsAuthOnly = false; + TString Certificate; // certificate data in PEM format + TString PrivateKey; // private key for the certificate in PEM format + TString CaFilePath; // path to certificate authority file + TString CipherList; // encryption algorithms + TDuration MessagePendingTimeout = TDuration::Seconds(1); // timeout for which messages are queued while in PendingConnection state + ui64 MessagePendingSize = Max<ui64>(); // size of the queue + ui32 MaxSerializedEventSize = NActors::EventMaxByteSize; + + ui32 GetSendBufferSize() const { + ui32 res = 512 * 1024; // 512 kb is the default value for send buffer + if (TCPSocketBufferSize) { + res = TCPSocketBufferSize; + } + return res; + } + }; + + struct TChannelSettings { + ui16 Weight; + }; + + typedef TMap<ui16, TChannelSettings> TChannelsConfig; + + using TRegisterMonPageCallback = std::function<void(const TString& path, const TString& title, + TActorSystem* actorSystem, const TActorId& actorId)>; + + using TInitWhiteboardCallback = std::function<void(ui16 icPort, TActorSystem* actorSystem)>; + + using TUpdateWhiteboardCallback = std::function<void(const TString& peer, bool connected, bool green, bool yellow, + bool orange, bool red, TActorSystem* actorSystem)>; + + struct TInterconnectProxyCommon : TAtomicRefCount<TInterconnectProxyCommon> { + TActorId NameserviceId; + NMonitoring::TDynamicCounterPtr MonCounters; + std::shared_ptr<NMonitoring::IMetricRegistry> Metrics; + TChannelsConfig ChannelsConfig; + TInterconnectSettings Settings; + TRegisterMonPageCallback RegisterMonPage; + TActorId DestructorId; + std::shared_ptr<std::atomic<TAtomicBase>> DestructorQueueSize; + TAtomicBase MaxDestructorQueueSize = 1024 * 1024 * 1024; + TString ClusterUUID; + TVector<TString> AcceptUUID; + ui64 StartTime = GetCycleCountFast(); + TString TechnicalSelfHostName; + TInitWhiteboardCallback InitWhiteboard; + TUpdateWhiteboardCallback UpdateWhiteboard; + ui32 HandshakeBallastSize = 0; + TAtomic StartedSessionKiller = 0; + TScopeId LocalScopeId; + std::shared_ptr<TEventFilter> EventFilter; + TString Cookie; // unique random identifier of a node instance (generated randomly at every start) + std::unordered_map<ui16, TString> ChannelName; + + struct TVersionInfo { + TString Tag; // version tag for this node + TSet<TString> AcceptedTags; // we accept all enlisted version tags of peer nodes, but no others; empty = accept all + }; + + TMaybe<TVersionInfo> VersionInfo; + + using TPtr = TIntrusivePtr<TInterconnectProxyCommon>; + }; + +} diff --git a/library/cpp/actors/interconnect/interconnect_counters.cpp b/library/cpp/actors/interconnect/interconnect_counters.cpp new file mode 100644 index 0000000000..ba674f664b --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_counters.cpp @@ -0,0 +1,692 @@ +#include "interconnect_counters.h" + +#include <library/cpp/monlib/metrics/metric_registry.h> +#include <library/cpp/monlib/metrics/metric_sub_registry.h> + +#include <unordered_map> + +namespace NActors { + +namespace { + + class TInterconnectCounters: public IInterconnectMetrics { + public: + struct TOutputChannel { + NMonitoring::TDynamicCounters::TCounterPtr Traffic; + NMonitoring::TDynamicCounters::TCounterPtr Events; + NMonitoring::TDynamicCounters::TCounterPtr OutgoingTraffic; + NMonitoring::TDynamicCounters::TCounterPtr OutgoingEvents; + + TOutputChannel() = default; + + TOutputChannel(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, + NMonitoring::TDynamicCounters::TCounterPtr traffic, + NMonitoring::TDynamicCounters::TCounterPtr events) + : Traffic(std::move(traffic)) + , Events(std::move(events)) + , OutgoingTraffic(counters->GetCounter("OutgoingTraffic", true)) + , OutgoingEvents(counters->GetCounter("OutgoingEvents", true)) + {} + + TOutputChannel(const TOutputChannel&) = default; + }; + + struct TInputChannel { + NMonitoring::TDynamicCounters::TCounterPtr Traffic; + NMonitoring::TDynamicCounters::TCounterPtr Events; + NMonitoring::TDynamicCounters::TCounterPtr ScopeErrors; + NMonitoring::TDynamicCounters::TCounterPtr IncomingTraffic; + NMonitoring::TDynamicCounters::TCounterPtr IncomingEvents; + + TInputChannel() = default; + + TInputChannel(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, + NMonitoring::TDynamicCounters::TCounterPtr traffic, + NMonitoring::TDynamicCounters::TCounterPtr events, + NMonitoring::TDynamicCounters::TCounterPtr scopeErrors) + : Traffic(std::move(traffic)) + , Events(std::move(events)) + , ScopeErrors(std::move(scopeErrors)) + , IncomingTraffic(counters->GetCounter("IncomingTraffic", true)) + , IncomingEvents(counters->GetCounter("IncomingEvents", true)) + {} + + TInputChannel(const TInputChannel&) = default; + }; + + struct TInputChannels : std::unordered_map<ui16, TInputChannel> { + TInputChannel OtherInputChannel; + + TInputChannels() = default; + + TInputChannels(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, + const std::unordered_map<ui16, TString>& names, + NMonitoring::TDynamicCounters::TCounterPtr traffic, + NMonitoring::TDynamicCounters::TCounterPtr events, + NMonitoring::TDynamicCounters::TCounterPtr scopeErrors) + : OtherInputChannel(counters->GetSubgroup("channel", "other"), traffic, events, scopeErrors) + { + for (const auto& [id, name] : names) { + try_emplace(id, counters->GetSubgroup("channel", name), traffic, events, scopeErrors); + } + } + + TInputChannels(const TInputChannels&) = default; + + const TInputChannel& Get(ui16 id) const { + const auto it = find(id); + return it != end() ? it->second : OtherInputChannel; + } + }; + + private: + const TInterconnectProxyCommon::TPtr Common; + const bool MergePerDataCenterCounters; + const bool MergePerPeerCounters; + NMonitoring::TDynamicCounterPtr Counters; + NMonitoring::TDynamicCounterPtr PerSessionCounters; + NMonitoring::TDynamicCounterPtr PerDataCenterCounters; + NMonitoring::TDynamicCounterPtr& AdaptiveCounters; + + bool Initialized = false; + + NMonitoring::TDynamicCounters::TCounterPtr Traffic; + NMonitoring::TDynamicCounters::TCounterPtr Events; + NMonitoring::TDynamicCounters::TCounterPtr ScopeErrors; + + public: + TInterconnectCounters(const TInterconnectProxyCommon::TPtr& common) + : Common(common) + , MergePerDataCenterCounters(common->Settings.MergePerDataCenterCounters) + , MergePerPeerCounters(common->Settings.MergePerPeerCounters) + , Counters(common->MonCounters) + , AdaptiveCounters(MergePerDataCenterCounters + ? PerDataCenterCounters : + MergePerPeerCounters ? Counters : PerSessionCounters) + {} + + void AddInflightDataAmount(ui64 value) override { + *InflightDataAmount += value; + } + + void SubInflightDataAmount(ui64 value) override { + *InflightDataAmount -= value; + } + + void AddTotalBytesWritten(ui64 value) override { + *TotalBytesWritten += value; + } + + void SetClockSkewMicrosec(i64 value) override { + *ClockSkewMicrosec = value; + } + + void IncSessionDeaths() override { + ++*SessionDeaths; + } + + void IncHandshakeFails() override { + ++*HandshakeFails; + } + + void SetConnected(ui32 value) override { + *Connected = value; + } + + void IncSubscribersCount() override { + ++*SubscribersCount; + } + + void SubSubscribersCount(ui32 value) override { + *SubscribersCount -= value; + } + + void SubOutputBuffersTotalSize(ui64 value) override { + *OutputBuffersTotalSize -= value; + } + + void AddOutputBuffersTotalSize(ui64 value) override { + *OutputBuffersTotalSize += value; + } + + ui64 GetOutputBuffersTotalSize() const override { + return *OutputBuffersTotalSize; + } + + void IncDisconnections() override { + ++*Disconnections; + } + + void IncUsefulWriteWakeups() override { + ++*UsefulWriteWakeups; + } + + void IncSpuriousWriteWakeups() override { + ++*SpuriousWriteWakeups; + } + + void IncSendSyscalls() override { + ++*SendSyscalls; + } + + void IncInflyLimitReach() override { + ++*InflyLimitReach; + } + + void IncUsefulReadWakeups() override { + ++*UsefulReadWakeups; + } + + void IncSpuriousReadWakeups() override { + ++*SpuriousReadWakeups; + } + + void IncDisconnectByReason(const TString& s) override { + if (auto it = DisconnectByReason.find(s); it != DisconnectByReason.end()) { + it->second->Inc(); + } + } + + void AddInputChannelsIncomingTraffic(ui16 channel, ui64 incomingTraffic) override { + auto& ch = InputChannels.Get(channel); + *ch.IncomingTraffic += incomingTraffic; + } + + void IncInputChannelsIncomingEvents(ui16 channel) override { + auto& ch = InputChannels.Get(channel); + ++*ch.IncomingEvents; + } + + void IncRecvSyscalls() override { + ++*RecvSyscalls; + } + + void AddTotalBytesRead(ui64 value) override { + *TotalBytesRead += value; + } + + void UpdateLegacyPingTimeHist(ui64 value) override { + LegacyPingTimeHist.Add(value); + PingTimeHistogram->Collect(value); + } + + void UpdateOutputChannelTraffic(ui16 channel, ui64 value) override { + if (GetOutputChannel(channel).OutgoingTraffic) { + *(GetOutputChannel(channel).OutgoingTraffic) += value; + } + if (GetOutputChannel(channel).Traffic) { + *(GetOutputChannel(channel).Traffic) += value; + } + } + + void UpdateOutputChannelEvents(ui16 channel) override { + if (GetOutputChannel(channel).OutgoingEvents) { + ++*(GetOutputChannel(channel).OutgoingEvents); + } + if (GetOutputChannel(channel).Events) { + ++*(GetOutputChannel(channel).Events); + } + } + + void SetPeerInfo(const TString& name, const TString& dataCenterId) override { + if (name != std::exchange(HumanFriendlyPeerHostName, name)) { + PerSessionCounters.Reset(); + } + VALGRIND_MAKE_READABLE(&DataCenterId, sizeof(DataCenterId)); + if (dataCenterId != std::exchange(DataCenterId, dataCenterId)) { + PerDataCenterCounters.Reset(); + } + + const bool updatePerDataCenter = !PerDataCenterCounters && MergePerDataCenterCounters; + if (updatePerDataCenter) { + PerDataCenterCounters = Counters->GetSubgroup("dataCenterId", *DataCenterId); + } + + const bool updatePerSession = !PerSessionCounters || updatePerDataCenter; + if (updatePerSession) { + auto base = MergePerDataCenterCounters ? PerDataCenterCounters : Counters; + PerSessionCounters = base->GetSubgroup("peer", *HumanFriendlyPeerHostName); + } + + const bool updateGlobal = !Initialized; + + const bool updateAdaptive = + &AdaptiveCounters == &Counters ? updateGlobal : + &AdaptiveCounters == &PerSessionCounters ? updatePerSession : + &AdaptiveCounters == &PerDataCenterCounters ? updatePerDataCenter : + false; + + if (updatePerSession) { + Connected = PerSessionCounters->GetCounter("Connected"); + Disconnections = PerSessionCounters->GetCounter("Disconnections", true); + ClockSkewMicrosec = PerSessionCounters->GetCounter("ClockSkewMicrosec"); + Traffic = PerSessionCounters->GetCounter("Traffic", true); + Events = PerSessionCounters->GetCounter("Events", true); + ScopeErrors = PerSessionCounters->GetCounter("ScopeErrors", true); + + for (const auto& [id, name] : Common->ChannelName) { + OutputChannels.try_emplace(id, Counters->GetSubgroup("channel", name), Traffic, Events); + } + OtherOutputChannel = TOutputChannel(Counters->GetSubgroup("channel", "other"), Traffic, Events); + + InputChannels = TInputChannels(Counters, Common->ChannelName, Traffic, Events, ScopeErrors); + } + + if (updateAdaptive) { + SessionDeaths = AdaptiveCounters->GetCounter("Session_Deaths", true); + HandshakeFails = AdaptiveCounters->GetCounter("Handshake_Fails", true); + InflyLimitReach = AdaptiveCounters->GetCounter("InflyLimitReach", true); + InflightDataAmount = AdaptiveCounters->GetCounter("Inflight_Data"); + + LegacyPingTimeHist = {}; + LegacyPingTimeHist.Init(AdaptiveCounters.Get(), "PingTimeHist", "mks", 125, 18); + + PingTimeHistogram = AdaptiveCounters->GetHistogram( + "PingTimeUs", NMonitoring::ExponentialHistogram(18, 2, 125)); + } + + if (updateGlobal) { + OutputBuffersTotalSize = Counters->GetCounter("OutputBuffersTotalSize"); + SendSyscalls = Counters->GetCounter("SendSyscalls", true); + RecvSyscalls = Counters->GetCounter("RecvSyscalls", true); + SpuriousReadWakeups = Counters->GetCounter("SpuriousReadWakeups", true); + UsefulReadWakeups = Counters->GetCounter("UsefulReadWakeups", true); + SpuriousWriteWakeups = Counters->GetCounter("SpuriousWriteWakeups", true); + UsefulWriteWakeups = Counters->GetCounter("UsefulWriteWakeups", true); + SubscribersCount = AdaptiveCounters->GetCounter("SubscribersCount"); + TotalBytesWritten = Counters->GetCounter("TotalBytesWritten", true); + TotalBytesRead = Counters->GetCounter("TotalBytesRead", true); + + auto disconnectReasonGroup = Counters->GetSubgroup("subsystem", "disconnectReason"); + for (const char *reason : TDisconnectReason::Reasons) { + DisconnectByReason[reason] = disconnectReasonGroup->GetNamedCounter("reason", reason, true); + } + } + + Initialized = true; + } + + TOutputChannel GetOutputChannel(ui16 index) const { + Y_VERIFY(Initialized); + const auto it = OutputChannels.find(index); + return it != OutputChannels.end() ? it->second : OtherOutputChannel; + } + + private: + NMonitoring::TDynamicCounters::TCounterPtr SessionDeaths; + NMonitoring::TDynamicCounters::TCounterPtr HandshakeFails; + NMonitoring::TDynamicCounters::TCounterPtr Connected; + NMonitoring::TDynamicCounters::TCounterPtr Disconnections; + NMonitoring::TDynamicCounters::TCounterPtr InflightDataAmount; + NMonitoring::TDynamicCounters::TCounterPtr InflyLimitReach; + NMonitoring::TDynamicCounters::TCounterPtr OutputBuffersTotalSize; + NMonitoring::TDynamicCounters::TCounterPtr QueueUtilization; + NMonitoring::TDynamicCounters::TCounterPtr SubscribersCount; + NMonitoring::TDynamicCounters::TCounterPtr SendSyscalls; + NMonitoring::TDynamicCounters::TCounterPtr ClockSkewMicrosec; + NMonitoring::TDynamicCounters::TCounterPtr RecvSyscalls; + NMonitoring::TDynamicCounters::TCounterPtr UsefulReadWakeups; + NMonitoring::TDynamicCounters::TCounterPtr SpuriousReadWakeups; + NMonitoring::TDynamicCounters::TCounterPtr UsefulWriteWakeups; + NMonitoring::TDynamicCounters::TCounterPtr SpuriousWriteWakeups; + NMon::THistogramCounterHelper LegacyPingTimeHist; + NMonitoring::THistogramPtr PingTimeHistogram; + + std::unordered_map<ui16, TOutputChannel> OutputChannels; + TOutputChannel OtherOutputChannel; + TInputChannels InputChannels; + THashMap<TString, NMonitoring::TDynamicCounters::TCounterPtr> DisconnectByReason; + + NMonitoring::TDynamicCounters::TCounterPtr TotalBytesWritten, TotalBytesRead; + }; + + class TInterconnectMetrics: public IInterconnectMetrics { + public: + struct TOutputChannel { + NMonitoring::IRate* Traffic; + NMonitoring::IRate* Events; + NMonitoring::IRate* OutgoingTraffic; + NMonitoring::IRate* OutgoingEvents; + + TOutputChannel() = default; + + TOutputChannel(const std::shared_ptr<NMonitoring::IMetricRegistry>& metrics, + NMonitoring::IRate* traffic, + NMonitoring::IRate* events) + : Traffic(traffic) + , Events(events) + , OutgoingTraffic(metrics->Rate(NMonitoring::MakeLabels({{"sensor", "interconnect.outgoing_traffic"}}))) + , OutgoingEvents(metrics->Rate(NMonitoring::MakeLabels({{"sensor", "interconnect.outgoing_events"}}))) + {} + + TOutputChannel(const TOutputChannel&) = default; + }; + + struct TInputChannel { + NMonitoring::IRate* Traffic; + NMonitoring::IRate* Events; + NMonitoring::IRate* ScopeErrors; + NMonitoring::IRate* IncomingTraffic; + NMonitoring::IRate* IncomingEvents; + + TInputChannel() = default; + + TInputChannel(const std::shared_ptr<NMonitoring::IMetricRegistry>& metrics, + NMonitoring::IRate* traffic, NMonitoring::IRate* events, + NMonitoring::IRate* scopeErrors) + : Traffic(traffic) + , Events(events) + , ScopeErrors(scopeErrors) + , IncomingTraffic(metrics->Rate(NMonitoring::MakeLabels({{"sensor", "interconnect.incoming_traffic"}}))) + , IncomingEvents(metrics->Rate(NMonitoring::MakeLabels({{"sensor", "interconnect.incoming_events"}}))) + {} + + TInputChannel(const TInputChannel&) = default; + }; + + struct TInputChannels : std::unordered_map<ui16, TInputChannel> { + TInputChannel OtherInputChannel; + + TInputChannels() = default; + + TInputChannels(const std::shared_ptr<NMonitoring::IMetricRegistry>& metrics, + const std::unordered_map<ui16, TString>& names, + NMonitoring::IRate* traffic, NMonitoring::IRate* events, + NMonitoring::IRate* scopeErrors) + : OtherInputChannel(std::make_shared<NMonitoring::TMetricSubRegistry>( + NMonitoring::TLabels{{"channel", "other"}}, metrics), traffic, events, scopeErrors) + { + for (const auto& [id, name] : names) { + try_emplace(id, std::make_shared<NMonitoring::TMetricSubRegistry>(NMonitoring::TLabels{{"channel", name}}, metrics), + traffic, events, scopeErrors); + } + } + + TInputChannels(const TInputChannels&) = default; + + const TInputChannel& Get(ui16 id) const { + const auto it = find(id); + return it != end() ? it->second : OtherInputChannel; + } + }; + + TInterconnectMetrics(const TInterconnectProxyCommon::TPtr& common) + : Common(common) + , MergePerDataCenterMetrics_(common->Settings.MergePerDataCenterCounters) + , MergePerPeerMetrics_(common->Settings.MergePerPeerCounters) + , Metrics_(common->Metrics) + , AdaptiveMetrics_(MergePerDataCenterMetrics_ + ? PerDataCenterMetrics_ : + MergePerPeerMetrics_ ? Metrics_ : PerSessionMetrics_) + {} + + void AddInflightDataAmount(ui64 value) override { + InflightDataAmount_->Add(value); + } + + void SubInflightDataAmount(ui64 value) override { + InflightDataAmount_->Add(-value); + } + + void AddTotalBytesWritten(ui64 value) override { + TotalBytesWritten_->Add(value); + } + + void SetClockSkewMicrosec(i64 value) override { + ClockSkewMicrosec_->Set(value); + } + + void IncSessionDeaths() override { + SessionDeaths_->Inc(); + } + + void IncHandshakeFails() override { + HandshakeFails_->Inc(); + } + + void SetConnected(ui32 value) override { + Connected_->Set(value); + } + + void IncSubscribersCount() override { + SubscribersCount_->Inc(); + } + + void SubSubscribersCount(ui32 value) override { + SubscribersCount_->Add(-value); + } + + void SubOutputBuffersTotalSize(ui64 value) override { + OutputBuffersTotalSize_->Add(-value); + } + + void AddOutputBuffersTotalSize(ui64 value) override { + OutputBuffersTotalSize_->Add(value); + } + + ui64 GetOutputBuffersTotalSize() const override { + return OutputBuffersTotalSize_->Get(); + } + + void IncDisconnections() override { + Disconnections_->Inc(); + } + + void IncUsefulWriteWakeups() override { + UsefulWriteWakeups_->Inc(); + } + + void IncSpuriousWriteWakeups() override { + SpuriousWriteWakeups_->Inc(); + } + + void IncSendSyscalls() override { + SendSyscalls_->Inc(); + } + + void IncInflyLimitReach() override { + InflyLimitReach_->Inc(); + } + + void IncUsefulReadWakeups() override { + UsefulReadWakeups_->Inc(); + } + + void IncSpuriousReadWakeups() override { + SpuriousReadWakeups_->Inc(); + } + + void IncDisconnectByReason(const TString& s) override { + if (auto it = DisconnectByReason_.find(s); it != DisconnectByReason_.end()) { + it->second->Inc(); + } + } + + void AddInputChannelsIncomingTraffic(ui16 channel, ui64 incomingTraffic) override { + auto& ch = InputChannels_.Get(channel); + ch.IncomingTraffic->Add(incomingTraffic); + } + + void IncInputChannelsIncomingEvents(ui16 channel) override { + auto& ch = InputChannels_.Get(channel); + ch.IncomingEvents->Inc(); + } + + void IncRecvSyscalls() override { + RecvSyscalls_->Inc(); + } + + void AddTotalBytesRead(ui64 value) override { + TotalBytesRead_->Add(value); + } + + void UpdateLegacyPingTimeHist(ui64 value) override { + PingTimeHistogram_->Record(value); + } + + void UpdateOutputChannelTraffic(ui16 channel, ui64 value) override { + if (GetOutputChannel(channel).OutgoingTraffic) { + GetOutputChannel(channel).OutgoingTraffic->Add(value); + } + if (GetOutputChannel(channel).Traffic) { + GetOutputChannel(channel).Traffic->Add(value); + } + } + + void UpdateOutputChannelEvents(ui16 channel) override { + if (GetOutputChannel(channel).OutgoingEvents) { + GetOutputChannel(channel).OutgoingEvents->Inc(); + } + if (GetOutputChannel(channel).Events) { + GetOutputChannel(channel).Events->Inc(); + } + } + + void SetPeerInfo(const TString& name, const TString& dataCenterId) override { + if (name != std::exchange(HumanFriendlyPeerHostName, name)) { + PerSessionMetrics_.reset(); + } + VALGRIND_MAKE_READABLE(&DataCenterId, sizeof(DataCenterId)); + if (dataCenterId != std::exchange(DataCenterId, dataCenterId)) { + PerDataCenterMetrics_.reset(); + } + + const bool updatePerDataCenter = !PerDataCenterMetrics_ && MergePerDataCenterMetrics_; + if (updatePerDataCenter) { + PerDataCenterMetrics_ = std::make_shared<NMonitoring::TMetricSubRegistry>( + NMonitoring::TLabels{{"datacenter_id", *DataCenterId}}, Metrics_); + } + + const bool updatePerSession = !PerSessionMetrics_ || updatePerDataCenter; + if (updatePerSession) { + auto base = MergePerDataCenterMetrics_ ? PerDataCenterMetrics_ : Metrics_; + PerSessionMetrics_ = std::make_shared<NMonitoring::TMetricSubRegistry>( + NMonitoring::TLabels{{"peer", *HumanFriendlyPeerHostName}}, base); + } + + const bool updateGlobal = !Initialized_; + + const bool updateAdaptive = + &AdaptiveMetrics_ == &Metrics_ ? updateGlobal : + &AdaptiveMetrics_ == &PerSessionMetrics_ ? updatePerSession : + &AdaptiveMetrics_ == &PerDataCenterMetrics_ ? updatePerDataCenter : + false; + + auto createRate = [](std::shared_ptr<NMonitoring::IMetricRegistry> metrics, TStringBuf name) mutable { + return metrics->Rate(NMonitoring::MakeLabels(NMonitoring::TLabels{{"sensor", name}})); + }; + auto createIntGauge = [](std::shared_ptr<NMonitoring::IMetricRegistry> metrics, TStringBuf name) mutable { + return metrics->IntGauge(NMonitoring::MakeLabels(NMonitoring::TLabels{{"sensor", name}})); + }; + + if (updatePerSession) { + Connected_ = createIntGauge(PerSessionMetrics_, "interconnect.connected"); + Disconnections_ = createRate(PerSessionMetrics_, "interconnect.disconnections"); + ClockSkewMicrosec_ = createIntGauge(PerSessionMetrics_, "interconnect.clock_skew_microsec"); + Traffic_ = createRate(PerSessionMetrics_, "interconnect.traffic"); + Events_ = createRate(PerSessionMetrics_, "interconnect.events"); + ScopeErrors_ = createRate(PerSessionMetrics_, "interconnect.scope_errors"); + + for (const auto& [id, name] : Common->ChannelName) { + OutputChannels_.try_emplace(id, std::make_shared<NMonitoring::TMetricSubRegistry>( + NMonitoring::TLabels{{"channel", name}}, Metrics_), Traffic_, Events_); + } + OtherOutputChannel_ = TOutputChannel(std::make_shared<NMonitoring::TMetricSubRegistry>( + NMonitoring::TLabels{{"channel", "other"}}, Metrics_), Traffic_, Events_); + + InputChannels_ = TInputChannels(Metrics_, Common->ChannelName, Traffic_, Events_, ScopeErrors_); + } + + if (updateAdaptive) { + SessionDeaths_ = createRate(AdaptiveMetrics_, "interconnect.session_deaths"); + HandshakeFails_ = createRate(AdaptiveMetrics_, "interconnect.handshake_fails"); + InflyLimitReach_ = createRate(AdaptiveMetrics_, "interconnect.infly_limit_reach"); + InflightDataAmount_ = createRate(AdaptiveMetrics_, "interconnect.inflight_data"); + PingTimeHistogram_ = AdaptiveMetrics_->HistogramRate( + NMonitoring::MakeLabels({{"sensor", "interconnect.ping_time_us"}}), NMonitoring::ExponentialHistogram(18, 2, 125)); + } + + if (updateGlobal) { + OutputBuffersTotalSize_ = createRate(Metrics_, "interconnect.output_buffers_total_size"); + SendSyscalls_ = createRate(Metrics_, "interconnect.send_syscalls"); + RecvSyscalls_ = createRate(Metrics_, "interconnect.recv_syscalls"); + SpuriousReadWakeups_ = createRate(Metrics_, "interconnect.spurious_read_wakeups"); + UsefulReadWakeups_ = createRate(Metrics_, "interconnect.useful_read_wakeups"); + SpuriousWriteWakeups_ = createRate(Metrics_, "interconnect.spurious_write_wakeups"); + UsefulWriteWakeups_ = createRate(Metrics_, "interconnect.useful_write_wakeups"); + SubscribersCount_ = createIntGauge(AdaptiveMetrics_, "interconnect.subscribers_count"); + TotalBytesWritten_ = createRate(Metrics_, "interconnect.total_bytes_written"); + TotalBytesRead_ = createRate(Metrics_, "interconnect.total_bytes_read"); + + for (const char *reason : TDisconnectReason::Reasons) { + DisconnectByReason_[reason] = Metrics_->Rate( + NMonitoring::MakeLabels({ + {"sensor", "interconnect.disconnect_reason"}, + {"reason", reason}, + })); + } + } + + Initialized_ = true; + } + + TOutputChannel GetOutputChannel(ui16 index) const { + Y_VERIFY(Initialized_); + const auto it = OutputChannels_.find(index); + return it != OutputChannels_.end() ? it->second : OtherOutputChannel_; + } + + private: + const TInterconnectProxyCommon::TPtr Common; + const bool MergePerDataCenterMetrics_; + const bool MergePerPeerMetrics_; + std::shared_ptr<NMonitoring::IMetricRegistry> Metrics_; + std::shared_ptr<NMonitoring::IMetricRegistry> PerSessionMetrics_; + std::shared_ptr<NMonitoring::IMetricRegistry> PerDataCenterMetrics_; + std::shared_ptr<NMonitoring::IMetricRegistry>& AdaptiveMetrics_; + bool Initialized_ = false; + + NMonitoring::IRate* Traffic_; + + NMonitoring::IRate* Events_; + NMonitoring::IRate* ScopeErrors_; + NMonitoring::IRate* Disconnections_; + NMonitoring::IIntGauge* Connected_; + + NMonitoring::IRate* SessionDeaths_; + NMonitoring::IRate* HandshakeFails_; + NMonitoring::IRate* InflyLimitReach_; + NMonitoring::IRate* InflightDataAmount_; + NMonitoring::IRate* OutputBuffersTotalSize_; + NMonitoring::IIntGauge* SubscribersCount_; + NMonitoring::IRate* SendSyscalls_; + NMonitoring::IRate* RecvSyscalls_; + NMonitoring::IRate* SpuriousWriteWakeups_; + NMonitoring::IRate* UsefulWriteWakeups_; + NMonitoring::IRate* SpuriousReadWakeups_; + NMonitoring::IRate* UsefulReadWakeups_; + NMonitoring::IIntGauge* ClockSkewMicrosec_; + + NMonitoring::IHistogram* PingTimeHistogram_; + + std::unordered_map<ui16, TOutputChannel> OutputChannels_; + TOutputChannel OtherOutputChannel_; + TInputChannels InputChannels_; + + THashMap<TString, NMonitoring::IRate*> DisconnectByReason_; + + NMonitoring::IRate* TotalBytesWritten_; + NMonitoring::IRate* TotalBytesRead_; + }; + +} // namespace + +std::unique_ptr<IInterconnectMetrics> CreateInterconnectCounters(const TInterconnectProxyCommon::TPtr& common) { + return std::make_unique<TInterconnectCounters>(common); +} + +std::unique_ptr<IInterconnectMetrics> CreateInterconnectMetrics(const TInterconnectProxyCommon::TPtr& common) { + return std::make_unique<TInterconnectMetrics>(common); +} + +} // NActors diff --git a/library/cpp/actors/interconnect/interconnect_counters.h b/library/cpp/actors/interconnect/interconnect_counters.h new file mode 100644 index 0000000000..e30f03a0bc --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_counters.h @@ -0,0 +1,59 @@ +#pragma once + +#include <library/cpp/actors/helpers/mon_histogram_helper.h> + +#include <util/system/valgrind.h> + +#include "types.h" + +#include "interconnect_common.h" + +#include <memory> +#include <optional> + +namespace NActors { + +class IInterconnectMetrics { +public: + virtual ~IInterconnectMetrics() = default; + + virtual void AddInflightDataAmount(ui64 value) = 0; + virtual void SubInflightDataAmount(ui64 value) = 0; + virtual void AddTotalBytesWritten(ui64 value) = 0; + virtual void SetClockSkewMicrosec(i64 value) = 0; + virtual void IncSessionDeaths() = 0; + virtual void IncHandshakeFails() = 0; + virtual void SetConnected(ui32 value) = 0; + virtual void IncSubscribersCount() = 0; + virtual void SubSubscribersCount(ui32 value) = 0; + virtual void SubOutputBuffersTotalSize(ui64 value) = 0; + virtual void AddOutputBuffersTotalSize(ui64 value) = 0; + virtual ui64 GetOutputBuffersTotalSize() const = 0; + virtual void IncDisconnections() = 0; + virtual void IncUsefulWriteWakeups() = 0; + virtual void IncSpuriousWriteWakeups() = 0; + virtual void IncSendSyscalls() = 0; + virtual void IncInflyLimitReach() = 0; + virtual void IncDisconnectByReason(const TString& s) = 0; + virtual void IncUsefulReadWakeups() = 0; + virtual void IncSpuriousReadWakeups() = 0; + virtual void SetPeerInfo(const TString& name, const TString& dataCenterId) = 0; + virtual void AddInputChannelsIncomingTraffic(ui16 channel, ui64 incomingTraffic) = 0; + virtual void IncInputChannelsIncomingEvents(ui16 channel) = 0; + virtual void IncRecvSyscalls() = 0; + virtual void AddTotalBytesRead(ui64 value) = 0; + virtual void UpdateLegacyPingTimeHist(ui64 value) = 0; + virtual void UpdateOutputChannelTraffic(ui16 channel, ui64 value) = 0; + virtual void UpdateOutputChannelEvents(ui16 channel) = 0; + TString GetHumanFriendlyPeerHostName() const { + return HumanFriendlyPeerHostName.value_or(TString()); + } + +protected: + std::optional<TString> DataCenterId; + std::optional<TString> HumanFriendlyPeerHostName; +}; + +std::unique_ptr<IInterconnectMetrics> CreateInterconnectCounters(const NActors::TInterconnectProxyCommon::TPtr& common); +std::unique_ptr<IInterconnectMetrics> CreateInterconnectMetrics(const NActors::TInterconnectProxyCommon::TPtr& common); +} // NActors diff --git a/library/cpp/actors/interconnect/interconnect_handshake.cpp b/library/cpp/actors/interconnect/interconnect_handshake.cpp new file mode 100644 index 0000000000..9ede998d8e --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_handshake.cpp @@ -0,0 +1,995 @@ +#include "interconnect_handshake.h" +#include "interconnect_tcp_proxy.h" + +#include <library/cpp/actors/core/actor_coroutine.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/protos/services_common.pb.h> +#include <util/system/getpid.h> + +#include <google/protobuf/text_format.h> + +#include <variant> + +namespace NActors { + static constexpr size_t StackSize = 64 * 1024; // 64k should be enough + + class THandshakeActor + : public TActorCoroImpl + , public TInterconnectLoggingBase + { + struct TExHandshakeFailed : yexception {}; + + static constexpr TDuration ResolveTimeout = TDuration::Seconds(1); + +#pragma pack(push, 1) + + struct TInitialPacket { + struct { + TActorId SelfVirtualId; + TActorId PeerVirtualId; + ui64 NextPacket; + ui64 Version; + } Header; + ui32 Checksum; + + TInitialPacket() = default; + + TInitialPacket(const TActorId& self, const TActorId& peer, ui64 nextPacket, ui64 version) { + Header.SelfVirtualId = self; + Header.PeerVirtualId = peer; + Header.NextPacket = nextPacket; + Header.Version = version; + Checksum = Crc32cExtendMSanCompatible(0, &Header, sizeof(Header)); + } + + bool Check() const { + return Checksum == Crc32cExtendMSanCompatible(0, &Header, sizeof(Header)); + } + + TString ToString() const { + return TStringBuilder() + << "{SelfVirtualId# " << Header.SelfVirtualId.ToString() + << " PeerVirtualId# " << Header.PeerVirtualId.ToString() + << " NextPacket# " << Header.NextPacket + << " Version# " << Header.Version + << "}"; + } + }; + + struct TExHeader { + static constexpr ui32 MaxSize = 1024 * 1024; + + ui32 Checksum; + ui32 Size; + + ui32 CalculateChecksum(const void* data, size_t len) const { + return Crc32cExtendMSanCompatible(Crc32cExtendMSanCompatible(0, &Size, sizeof(Size)), data, len); + } + + void Sign(const void* data, size_t len) { + Checksum = CalculateChecksum(data, len); + } + + bool Check(const void* data, size_t len) const { + return Checksum == CalculateChecksum(data, len); + } + }; + +#pragma pack(pop) + + private: + TInterconnectProxyCommon::TPtr Common; + TActorId SelfVirtualId; + TActorId PeerVirtualId; + ui32 PeerNodeId = 0; + ui64 NextPacketToPeer = 0; + TMaybe<ui64> NextPacketFromPeer; // will be obtained from incoming initial packet + TString PeerHostName; + TString PeerAddr; + TSocketPtr Socket; + TPollerToken::TPtr PollerToken; + TString State; + TString HandshakeKind; + TMaybe<THolder<TProgramInfo>> ProgramInfo; // filled in in case of successful handshake; even if null + TSessionParams Params; + bool ResolveTimedOut = false; + THashMap<ui32, TInstant> LastLogNotice; + const TDuration MuteDuration = TDuration::Seconds(15); + TInstant Deadline; + + public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::INTERCONNECT_HANDSHAKE; + } + + THandshakeActor(TInterconnectProxyCommon::TPtr common, const TActorId& self, const TActorId& peer, + ui32 nodeId, ui64 nextPacket, TString peerHostName, TSessionParams params) + : TActorCoroImpl(StackSize, true, true) // allow unhandled poison pills and dtors + , Common(std::move(common)) + , SelfVirtualId(self) + , PeerVirtualId(peer) + , PeerNodeId(nodeId) + , NextPacketToPeer(nextPacket) + , PeerHostName(std::move(peerHostName)) + , HandshakeKind("outgoing handshake") + , Params(std::move(params)) + { + Y_VERIFY(SelfVirtualId); + Y_VERIFY(SelfVirtualId.NodeId()); + Y_VERIFY(PeerNodeId); + } + + THandshakeActor(TInterconnectProxyCommon::TPtr common, TSocketPtr socket) + : TActorCoroImpl(StackSize, true, true) // allow unhandled poison pills and dtors + , Common(std::move(common)) + , Socket(std::move(socket)) + , HandshakeKind("incoming handshake") + { + Y_VERIFY(Socket); + PeerAddr = TString::Uninitialized(1024); + if (GetRemoteAddr(*Socket, PeerAddr.Detach(), PeerAddr.size())) { + PeerAddr.resize(strlen(PeerAddr.data())); + } else { + PeerAddr.clear(); + } + } + + void UpdatePrefix() { + SetPrefix(Sprintf("Handshake %s [node %" PRIu32 "]", SelfActorId.ToString().data(), PeerNodeId)); + } + + void Run() override { + UpdatePrefix(); + + // set up overall handshake process timer + TDuration timeout = Common->Settings.Handshake; + if (timeout == TDuration::Zero()) { + timeout = DEFAULT_HANDSHAKE_TIMEOUT; + } + timeout += ResolveTimeout * 2; + Deadline = Now() + timeout; + Schedule(Deadline, new TEvents::TEvWakeup); + + try { + if (Socket) { + PerformIncomingHandshake(); + } else { + PerformOutgoingHandshake(); + } + + // establish encrypted channel, or, in case when encryption is disabled, check if it matches settings + if (ProgramInfo) { + if (Params.Encryption) { + EstablishSecureConnection(); + } else if (Common->Settings.EncryptionMode == EEncryptionMode::REQUIRED && !Params.AuthOnly) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Peer doesn't support encryption, which is required"); + } + } + } catch (const TExHandshakeFailed&) { + ProgramInfo.Clear(); + } + + if (ProgramInfo) { + LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH04", NLog::PRI_INFO, "handshake succeeded"); + Y_VERIFY(NextPacketFromPeer); + if (PollerToken) { + Y_VERIFY(PollerToken->RefCount() == 1); + PollerToken.Reset(); // ensure we are going to destroy poller token here as we will re-register the socket within other actor + } + SendToProxy(MakeHolder<TEvHandshakeDone>(std::move(Socket), PeerVirtualId, SelfVirtualId, + *NextPacketFromPeer, ProgramInfo->Release(), std::move(Params))); + } + + Socket.Reset(); + } + + void EstablishSecureConnection() { + Y_VERIFY(PollerToken && PollerToken->RefCount() == 1); + PollerToken.Reset(); + auto ev = AskProxy<TEvSecureSocket>(MakeHolder<TEvGetSecureSocket>(Socket), "AskProxy(TEvSecureContext)"); + Socket = std::move(ev->Get()->Socket); + RegisterInPoller(); + const ui32 myNodeId = GetActorSystem()->NodeId; + const bool server = myNodeId < PeerNodeId; // keep server/client role permanent to enable easy TLS session resuming + for (;;) { + TString err; + auto& secure = static_cast<NInterconnect::TSecureSocket&>(*Socket); + switch (secure.Establish(server, Params.AuthOnly, err)) { + case NInterconnect::TSecureSocket::EStatus::SUCCESS: + if (Params.AuthOnly) { + Params.Encryption = false; + Params.AuthCN = secure.GetPeerCommonName(); + Y_VERIFY(PollerToken && PollerToken->RefCount() == 1); + PollerToken.Reset(); + Socket = secure.Detach(); + } + return; + + case NInterconnect::TSecureSocket::EStatus::ERROR: + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, err, true); + [[fallthrough]]; + + case NInterconnect::TSecureSocket::EStatus::WANT_READ: + WaitPoller(true, false, "ReadEstablish"); + break; + + case NInterconnect::TSecureSocket::EStatus::WANT_WRITE: + WaitPoller(false, true, "WriteEstablish"); + break; + } + } + } + + void ProcessUnexpectedEvent(TAutoPtr<IEventHandle> ev) override { + switch (const ui32 type = ev->GetTypeRewrite()) { + case TEvents::TSystem::Wakeup: + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, Sprintf("Handshake timed out, State# %s", State.data()), true); + [[fallthrough]]; + + case ui32(ENetwork::NodeInfo): + case TEvInterconnect::EvNodeAddress: + case ui32(ENetwork::ResolveError): + break; // most likely a race with resolve timeout + + case TEvPollerReady::EventType: + break; + + default: + Y_FAIL("unexpected event 0x%08" PRIx32, type); + } + } + + template<typename T> + void SetupVersionTag(T& proto) { + if (Common->VersionInfo) { + proto.SetVersionTag(Common->VersionInfo->Tag); + for (const TString& accepted : Common->VersionInfo->AcceptedTags) { + proto.AddAcceptedVersionTags(accepted); + } + } + } + + template<typename T> + void SetupClusterUUID(T& proto) { + auto *pb = proto.MutableClusterUUIDs(); + pb->SetClusterUUID(Common->ClusterUUID); + for (const TString& uuid : Common->AcceptUUID) { + pb->AddAcceptUUID(uuid); + } + } + + template<typename T, typename TCallback> + void ValidateVersionTag(const T& proto, TCallback&& errorCallback) { + // check if we will accept peer's version tag (if peer provides one and if we have accepted list non-empty) + if (Common->VersionInfo) { + if (!proto.HasVersionTag()) { + LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH06", NLog::PRI_WARN, + "peer did not report VersionTag, accepting by default"); + } else if (!Common->VersionInfo->AcceptedTags.count(proto.GetVersionTag())) { + // we will not accept peer's tag, so check if remote peer would accept our version tag + size_t i; + for (i = 0; i < proto.AcceptedVersionTagsSize() && Common->VersionInfo->Tag != proto.GetAcceptedVersionTags(i); ++i) + {} + if (i == proto.AcceptedVersionTagsSize()) { + // peer will neither accept our version -- this is total failure + TStringStream s("local/peer version tags did not match accepted ones"); + s << " local Tag# " << Common->VersionInfo->Tag << " accepted Tags# ["; + bool first = true; + for (const auto& tag : Common->VersionInfo->AcceptedTags) { + s << (std::exchange(first, false) ? "" : " ") << tag; + } + s << "] peer Tag# " << proto.GetVersionTag() << " accepted Tags# ["; + first = true; + for (const auto& tag : proto.GetAcceptedVersionTags()) { + s << (std::exchange(first, false) ? "" : " ") << tag; + } + s << "]"; + errorCallback(s.Str()); + } + } + } + } + + template<typename T, typename TCallback> + void ValidateClusterUUID(const T& proto, TCallback&& errorCallback, const TMaybe<TString>& uuid = {}) { + auto formatList = [](const auto& list) { + TStringStream s; + s << "["; + for (auto it = list.begin(); it != list.end(); ++it) { + if (it != list.begin()) { + s << " "; + } + s << *it; + } + s << "]"; + return s.Str(); + }; + if (!Common->AcceptUUID) { + return; // promiscuous mode -- we accept every other peer + } + if (!proto.HasClusterUUIDs()) { + if (uuid) { + // old-style checking, peer does not support symmetric protoocol + bool matching = false; + for (const TString& accepted : Common->AcceptUUID) { + if (*uuid == accepted) { + matching = true; + break; + } + } + if (!matching) { + errorCallback(Sprintf("Peer ClusterUUID# %s mismatch, AcceptUUID# %s", uuid->data(), formatList(Common->AcceptUUID).data())); + } + } + return; // remote side did not fill in this field -- old version, symmetric protocol is not supported + } + + const auto& uuids = proto.GetClusterUUIDs(); + + // check if our UUID matches remote accept list + for (const TString& item : uuids.GetAcceptUUID()) { + if (item == Common->ClusterUUID) { + return; // match + } + } + + // check if remote UUID matches our accept list + const TString& remoteUUID = uuids.GetClusterUUID(); + for (const TString& item : Common->AcceptUUID) { + if (item == remoteUUID) { + return; // match + } + } + + // no match + errorCallback(Sprintf("Peer ClusterUUID# %s mismatch, AcceptUUID# %s", remoteUUID.data(), formatList(Common->AcceptUUID).data())); + } + + void ParsePeerScopeId(const NActorsInterconnect::TScopeId& proto) { + Params.PeerScopeId = {proto.GetX1(), proto.GetX2()}; + } + + void FillInScopeId(NActorsInterconnect::TScopeId& proto) { + const TScopeId& scope = Common->LocalScopeId; + proto.SetX1(scope.first); + proto.SetX2(scope.second); + } + + template<typename T> + void ReportProto(const T& protobuf, const char *msg) { + auto formatString = [&] { + google::protobuf::TextFormat::Printer p; + p.SetSingleLineMode(true); + TString s; + p.PrintToString(protobuf, &s); + return s; + }; + LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH07", NLog::PRI_DEBUG, "%s %s", msg, + formatString().data()); + } + + bool CheckPeerCookie(const TString& cookie, TString *error) { + // create a temporary socket to connect to the peer + TSocketPtr tempSocket; + std::swap(tempSocket, Socket); + TPollerToken::TPtr tempPollerToken; + std::swap(tempPollerToken, PollerToken); + + // set up virtual self id to ensure peer will not drop our connection + char buf[12] = {'c', 'o', 'o', 'k', 'i', 'e', ' ', 'c', 'h', 'e', 'c', 'k'}; + SelfVirtualId = TActorId(SelfActorId.NodeId(), TStringBuf(buf, 12)); + + bool success = true; + try { + // issue connection and send initial packet + Connect(false); + SendInitialPacket(); + + // wait for basic response + TInitialPacket response; + ReceiveData(&response, sizeof(response), "ReceiveResponse"); + if (!response.Check()) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, "Initial packet CRC error"); + } else if (response.Header.Version != INTERCONNECT_PROTOCOL_VERSION) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, Sprintf("Incompatible protocol %" PRIu64, response.Header.Version)); + } + + // issue cookie check request + NActorsInterconnect::THandshakeRequest request; + request.SetProtocol(INTERCONNECT_PROTOCOL_VERSION); + request.SetProgramPID(0); + request.SetProgramStartTime(0); + request.SetSerial(0); + request.SetReceiverNodeId(0); + request.SetSenderActorId(TString()); + request.SetCookie(cookie); + request.SetDoCheckCookie(true); + SendExBlock(request, "SendExBlockDoCheckCookie"); + + // process cookie check reply + NActorsInterconnect::THandshakeReply reply; + if (!reply.ParseFromString(ReceiveExBlock("ReceiveExBlockDoCheckCookie"))) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Incorrect packet from peer"); + } else if (reply.HasCookieCheckResult() && !reply.GetCookieCheckResult()) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Cookie check error -- possible network problem"); + } + } catch (const TExHandshakeFailed& e) { + *error = e.what(); + success = false; + } + + // restore state + SelfVirtualId = TActorId(); + std::swap(tempSocket, Socket); + std::swap(tempPollerToken, PollerToken); + return success; + } + + void PerformOutgoingHandshake() { + LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH01", NLog::PRI_DEBUG, + "starting outgoing handshake"); + + // perform connection + Connect(true); + + // send initial request packet + SendInitialPacket(); + + TInitialPacket response; + ReceiveData(&response, sizeof(response), "ReceiveResponse"); + if (!response.Check()) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, "Initial packet CRC error"); + } else if (response.Header.Version != INTERCONNECT_PROTOCOL_VERSION) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, Sprintf("Incompatible protocol %" PRIu64, response.Header.Version)); + } + + // extract next packet + NextPacketFromPeer = response.Header.NextPacket; + + if (!PeerVirtualId) { + // creating new session -- we have to generate request + NActorsInterconnect::THandshakeRequest request; + + request.SetProtocol(INTERCONNECT_PROTOCOL_VERSION); + request.SetProgramPID(GetPID()); + request.SetProgramStartTime(Common->StartTime); + request.SetSerial(SelfVirtualId.LocalId()); + request.SetReceiverNodeId(PeerNodeId); + request.SetSenderActorId(SelfVirtualId.ToString()); + request.SetSenderHostName(Common->TechnicalSelfHostName); + request.SetReceiverHostName(PeerHostName); + + if (Common->LocalScopeId != TScopeId()) { + FillInScopeId(*request.MutableClientScopeId()); + } + + if (Common->Cookie) { + request.SetCookie(Common->Cookie); + } + if (Common->ClusterUUID) { + request.SetUUID(Common->ClusterUUID); + } + SetupClusterUUID(request); + SetupVersionTag(request); + + if (const ui32 size = Common->HandshakeBallastSize) { + TString ballast(size, 0); + char* data = ballast.Detach(); + for (ui32 i = 0; i < size; ++i) { + data[i] = i; + } + request.SetBallast(ballast); + } + + switch (Common->Settings.EncryptionMode) { + case EEncryptionMode::DISABLED: + break; + + case EEncryptionMode::OPTIONAL: + request.SetRequireEncryption(false); + break; + + case EEncryptionMode::REQUIRED: + request.SetRequireEncryption(true); + break; + } + + request.SetRequestModernFrame(true); + request.SetRequestAuthOnly(Common->Settings.TlsAuthOnly); + + SendExBlock(request, "ExRequest"); + + NActorsInterconnect::THandshakeReply reply; + if (!reply.ParseFromString(ReceiveExBlock("ExReply"))) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Incorrect THandshakeReply"); + } + ReportProto(reply, "ReceiveExBlock ExReply"); + + if (reply.HasErrorExplaination()) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "error from peer: " + reply.GetErrorExplaination()); + } else if (!reply.HasSuccess()) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "empty reply"); + } + + auto generateError = [this](TString msg) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, msg); + }; + + const auto& success = reply.GetSuccess(); + ValidateClusterUUID(success, generateError); + ValidateVersionTag(success, generateError); + + const auto& s = success.GetSenderActorId(); + PeerVirtualId.Parse(s.data(), s.size()); + + // recover flags + Params.Encryption = success.GetStartEncryption(); + Params.UseModernFrame = success.GetUseModernFrame(); + Params.AuthOnly = Params.Encryption && success.GetAuthOnly(); + if (success.HasServerScopeId()) { + ParsePeerScopeId(success.GetServerScopeId()); + } + + // recover peer process info from peer's reply + ProgramInfo = GetProgramInfo(success); + } else if (!response.Header.SelfVirtualId) { + // peer reported error -- empty ack was generated by proxy for this request + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_SESSION_MISMATCH, "Peer rejected session continuation handshake"); + } else if (response.Header.SelfVirtualId != PeerVirtualId || response.Header.PeerVirtualId != SelfVirtualId) { + // resuming existing session; check that virtual ids of peers match each other + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_SESSION_MISMATCH, "Session virtual ID mismatch"); + } else { + ProgramInfo.ConstructInPlace(); // successful handshake + } + } + + void PerformIncomingHandshake() { + LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH02", NLog::PRI_DEBUG, + "starting incoming handshake"); + + // set up incoming socket + SetupSocket(); + + // wait for initial request packet + TInitialPacket request; + ReceiveData(&request, sizeof(request), "ReceiveRequest"); + if (!request.Check()) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, "Initial packet CRC error"); + } else if (request.Header.Version != INTERCONNECT_PROTOCOL_VERSION) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, Sprintf("Incompatible protocol %" PRIu64, request.Header.Version)); + } + + // extract peer node id from the peer + PeerNodeId = request.Header.SelfVirtualId.NodeId(); + if (!PeerNodeId) { + Y_VERIFY_DEBUG(false, "PeerNodeId is zero request# %s", request.ToString().data()); + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "SelfVirtualId.NodeId is empty in initial packet"); + } + UpdatePrefix(); + + // extract next packet + NextPacketFromPeer = request.Header.NextPacket; + + if (request.Header.PeerVirtualId) { + // issue request to the proxy and wait for the response + auto reply = AskProxy<TEvHandshakeAck, TEvHandshakeNak>(MakeHolder<TEvHandshakeAsk>( + request.Header.SelfVirtualId, request.Header.PeerVirtualId, request.Header.NextPacket), + "TEvHandshakeAsk"); + if (auto *ack = reply->CastAsLocal<TEvHandshakeAck>()) { + // extract self/peer virtual ids + SelfVirtualId = ack->Self; + PeerVirtualId = request.Header.SelfVirtualId; + NextPacketToPeer = ack->NextPacket; + Params = ack->Params; + + // only succeed in case when proxy returned valid SelfVirtualId; otherwise it wants us to terminate + // the handshake process and it does not expect the handshake reply + ProgramInfo.ConstructInPlace(); + } else { + LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH08", NLog::PRI_NOTICE, + "Continuation request rejected by proxy"); + + // report continuation reject to peer + SelfVirtualId = TActorId(); + PeerVirtualId = TActorId(); + NextPacketToPeer = 0; + } + + // issue response to the peer + SendInitialPacket(); + } else { + // peer wants a new session, clear fields and send initial packet + SelfVirtualId = TActorId(); + PeerVirtualId = TActorId(); + NextPacketToPeer = 0; + SendInitialPacket(); + + // wait for extended request + auto ev = MakeHolder<TEvHandshakeRequest>(); + auto& request = ev->Record; + if (!request.ParseFromString(ReceiveExBlock("ExRequest"))) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Incorrect THandshakeRequest"); + } + ReportProto(request, "ReceiveExBlock ExRequest"); + + auto generateError = [this](TString msg) { + // issue reply to the peer to prevent repeating connection retries + NActorsInterconnect::THandshakeReply reply; + reply.SetErrorExplaination(msg); + SendExBlock(reply, "ExReply"); + + // terminate ths handshake + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, msg); + }; + + // check request cookie + TString error; + if (request.HasDoCheckCookie()) { + NActorsInterconnect::THandshakeReply reply; + reply.SetCookieCheckResult(request.GetCookie() == Common->Cookie); + SendExBlock(reply, "ExReplyDoCheckCookie"); + throw TExHandshakeFailed(); + } else if (request.HasCookie() && !CheckPeerCookie(request.GetCookie(), &error)) { + generateError(TStringBuilder() << "Peer connectivity-checking failed, error# " << error); + } + + // update log prefix with the reported peer host name + PeerHostName = request.GetSenderHostName(); + + // parse peer virtual id + const auto& str = request.GetSenderActorId(); + PeerVirtualId.Parse(str.data(), str.size()); + + // validate request + ValidateClusterUUID(request, generateError, request.GetUUID()); + if (request.GetReceiverNodeId() != SelfActorId.NodeId()) { + generateError(Sprintf("Incorrect ReceiverNodeId# %" PRIu32 " from the peer, expected# %" PRIu32, + request.GetReceiverNodeId(), SelfActorId.NodeId())); + } else if (request.GetReceiverHostName() != Common->TechnicalSelfHostName) { + generateError(Sprintf("ReceiverHostName# %s mismatch, expected# %s", request.GetReceiverHostName().data(), + Common->TechnicalSelfHostName.data())); + } + ValidateVersionTag(request, generateError); + + // check peer node + auto peerNodeInfo = GetPeerNodeInfo(); + if (!peerNodeInfo) { + generateError("Peer node not registered in nameservice"); + } else if (peerNodeInfo->Host != request.GetSenderHostName()) { + generateError("SenderHostName mismatch"); + } + + // check request against encryption + switch (Common->Settings.EncryptionMode) { + case EEncryptionMode::DISABLED: + if (request.GetRequireEncryption()) { + generateError("Peer requested encryption, but it is disabled locally"); + } + break; + + case EEncryptionMode::OPTIONAL: + Params.Encryption = request.HasRequireEncryption(); + break; + + case EEncryptionMode::REQUIRED: + if (!request.HasRequireEncryption()) { + generateError("Peer did not request encryption, but it is required locally"); + } + Params.Encryption = true; + break; + } + + Params.UseModernFrame = request.GetRequestModernFrame(); + Params.AuthOnly = Params.Encryption && request.GetRequestAuthOnly() && Common->Settings.TlsAuthOnly; + + if (request.HasClientScopeId()) { + ParsePeerScopeId(request.GetClientScopeId()); + } + + // remember program info (assuming successful handshake) + ProgramInfo = GetProgramInfo(request); + + // send to proxy + auto reply = AskProxy<TEvHandshakeReplyOK, TEvHandshakeReplyError>(std::move(ev), "TEvHandshakeRequest"); + + // parse it + if (auto ev = reply->CastAsLocal<TEvHandshakeReplyOK>()) { + // issue successful reply to the peer + auto& record = ev->Record; + Y_VERIFY(record.HasSuccess()); + auto& success = *record.MutableSuccess(); + SetupClusterUUID(success); + SetupVersionTag(success); + success.SetStartEncryption(Params.Encryption); + if (Common->LocalScopeId != TScopeId()) { + FillInScopeId(*success.MutableServerScopeId()); + } + success.SetUseModernFrame(Params.UseModernFrame); + success.SetAuthOnly(Params.AuthOnly); + SendExBlock(record, "ExReply"); + + // extract sender actor id (self virtual id) + const auto& str = success.GetSenderActorId(); + SelfVirtualId.Parse(str.data(), str.size()); + } else if (auto ev = reply->CastAsLocal<TEvHandshakeReplyError>()) { + // in case of error just send reply to the peer and terminate handshake + SendExBlock(ev->Record, "ExReply"); + ProgramInfo.Clear(); // do not issue reply to the proxy + } else { + Y_FAIL("unexpected event Type# 0x%08" PRIx32, reply->GetTypeRewrite()); + } + } + } + + template <typename T> + void SendExBlock(const T& proto, const char* what) { + TString data; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&data); + Y_VERIFY(data.size() <= TExHeader::MaxSize); + + ReportProto(proto, Sprintf("SendExBlock %s", what).data()); + + TExHeader header; + header.Size = data.size(); + header.Sign(data.data(), data.size()); + SendData(&header, sizeof(header), Sprintf("Send%sHeader", what)); + SendData(data.data(), data.size(), Sprintf("Send%sData", what)); + } + + TString ReceiveExBlock(const char* what) { + TExHeader header; + ReceiveData(&header, sizeof(header), Sprintf("Receive%sHeader", what)); + if (header.Size > TExHeader::MaxSize) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "Incorrect extended header size"); + } + + TString data; + data.resize(header.Size); + ReceiveData(data.Detach(), data.size(), Sprintf("Receive%sData", what)); + + if (!header.Check(data.data(), data.size())) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, "Extended header CRC error"); + } + + return data; + } + + private: + void SendToProxy(THolder<IEventBase> ev) { + Y_VERIFY(PeerNodeId); + Send(GetActorSystem()->InterconnectProxy(PeerNodeId), ev.Release()); + } + + template <typename TEvent> + THolder<typename TEvent::THandle> WaitForSpecificEvent(TString state, TInstant deadline = TInstant::Max()) { + State = std::move(state); + return TActorCoroImpl::WaitForSpecificEvent<TEvent>(deadline); + } + + template <typename T1, typename T2, typename... TEvents> + THolder<IEventHandle> WaitForSpecificEvent(TString state, TInstant deadline = TInstant::Max()) { + State = std::move(state); + return TActorCoroImpl::WaitForSpecificEvent<T1, T2, TEvents...>(deadline); + } + + template <typename TEvent> + THolder<typename TEvent::THandle> AskProxy(THolder<IEventBase> ev, TString state) { + SendToProxy(std::move(ev)); + return WaitForSpecificEvent<TEvent>(std::move(state)); + } + + template <typename T1, typename T2, typename... TOther> + THolder<IEventHandle> AskProxy(THolder<IEventBase> ev, TString state) { + SendToProxy(std::move(ev)); + return WaitForSpecificEvent<T1, T2, TOther...>(std::move(state)); + } + + void Fail(TEvHandshakeFail::EnumHandshakeFail reason, TString explanation, bool network = false) { + TString msg = Sprintf("%s Peer# %s(%s) %s%s", HandshakeKind.data(), PeerHostName ? PeerHostName.data() : "<unknown>", + PeerAddr.size() ? PeerAddr.data() : "<unknown>", ResolveTimedOut ? "[resolve timeout] " : "", + explanation.data()); + + if (network) { + TInstant now = Now(); + TInstant prevLog = LastLogNotice[PeerNodeId]; + NActors::NLog::EPriority logPriority = NActors::NLog::PRI_DEBUG; + if (now - prevLog > MuteDuration) { + logPriority = NActors::NLog::PRI_NOTICE; + LastLogNotice[PeerNodeId] = now; + } + LOG_LOG_NET_X(logPriority, PeerNodeId, "network-related error occured on handshake: %s", msg.data()); + } else { + // calculate log severity based on failure type; permanent failures lead to error log messages + auto severity = reason == TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT + ? NActors::NLog::PRI_NOTICE + : NActors::NLog::PRI_INFO; + + LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH03", severity, "handshake failed, explanation# %s", msg.data()); + } + + if (PeerNodeId) { + SendToProxy(MakeHolder<TEvHandshakeFail>(reason, std::move(msg))); + } + + throw TExHandshakeFailed() << explanation; + } + + private: + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // COMMUNICATION BLOCK + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void Connect(bool updatePeerAddr) { + // issue request to a nameservice to resolve peer node address + Send(Common->NameserviceId, new TEvInterconnect::TEvResolveNode(PeerNodeId, Deadline)); + + // wait for the result + auto ev = WaitForSpecificEvent<TEvResolveError, TEvLocalNodeInfo, TEvInterconnect::TEvNodeAddress>("ResolveNode", + Now() + ResolveTimeout); + + // extract address from the result + NInterconnect::TAddress address; + if (!ev) { + ResolveTimedOut = true; + if (auto peerNodeInfo = GetPeerNodeInfo(); peerNodeInfo && peerNodeInfo->Address) { + address = {peerNodeInfo->Address, peerNodeInfo->Port}; + } else { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve timed out and no static address defined", true); + } + } else if (auto *p = ev->CastAsLocal<TEvLocalNodeInfo>()) { + if (!p->Address) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: no address returned", true); + } + address = {*p->Address}; + } else if (auto *p = ev->CastAsLocal<TEvInterconnect::TEvNodeAddress>()) { + const auto& r = p->Record; + if (!r.HasAddress() || !r.HasPort()) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: no address returned", true); + } + address = {r.GetAddress(), static_cast<ui16>(r.GetPort())}; + } else { + Y_VERIFY(ev->GetTypeRewrite() == ui32(ENetwork::ResolveError)); + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "DNS resolve error: " + ev->Get<TEvResolveError>()->Explain, true); + } + + // create the socket with matching address family + Socket = NInterconnect::TStreamSocket::Make(address.GetFamily()); + if (*Socket == -1) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "System error: failed to create socket"); + } + + // extract peer address + if (updatePeerAddr) { + PeerAddr = address.ToString(); + } + + // set up socket parameters + SetupSocket(); + + // start connecting + switch (int err = -Socket->Connect(address)) { + case 0: // successful connection + break; + + case EINPROGRESS: // connection in progress + WaitPoller(false, true, "WaitConnect"); + err = Socket->GetConnectStatus(); + if (err) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, Sprintf("Connection failed: %s", strerror(err)), true); + } + break; + + default: + break; + } + + auto it = LastLogNotice.find(PeerNodeId); + NActors::NLog::EPriority logPriority = NActors::NLog::PRI_DEBUG; + if (it != LastLogNotice.end()) { + LastLogNotice.erase(it); + logPriority = NActors::NLog::PRI_NOTICE; + } + LOG_LOG_IC_X(NActorsServices::INTERCONNECT, "ICH05", logPriority, "connected to peer"); + } + + void SetupSocket() { + // switch to nonblocking mode + try { + SetNonBlock(*Socket); + SetNoDelay(*Socket, true); + } catch (...) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT, "System error: can't up nonblocking mode for socket"); + } + + // setup send buffer size + Socket->SetSendBufferSize(Common->Settings.GetSendBufferSize()); + + // register in poller + RegisterInPoller(); + } + + void RegisterInPoller() { + const bool success = Send(MakePollerActorId(), new TEvPollerRegister(Socket, SelfActorId, SelfActorId)); + Y_VERIFY(success); + auto result = WaitForSpecificEvent<TEvPollerRegisterResult>("RegisterPoller"); + PollerToken = std::move(result->Get()->PollerToken); + Y_VERIFY(PollerToken); + Y_VERIFY(PollerToken->RefCount() == 1); // ensure exclusive ownership + } + + void SendInitialPacket() { + TInitialPacket packet(SelfVirtualId, PeerVirtualId, NextPacketToPeer, INTERCONNECT_PROTOCOL_VERSION); + SendData(&packet, sizeof(packet), "SendInitialPacket"); + } + + void WaitPoller(bool read, bool write, TString state) { + PollerToken->Request(read, write); + WaitForSpecificEvent<TEvPollerReady>(std::move(state)); + } + + template <typename TDataPtr, typename TSendRecvFunc> + void Process(TDataPtr buffer, size_t len, TSendRecvFunc&& sendRecv, bool read, bool write, TString state) { + Y_VERIFY(Socket); + NInterconnect::TStreamSocket* sock = Socket.Get(); + ssize_t (NInterconnect::TStreamSocket::*pfn)(TDataPtr, size_t, TString*) const = sendRecv; + size_t processed = 0; + + auto error = [&](TString msg) { + Fail(TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT, Sprintf("Socket error# %s state# %s processed# %zu remain# %zu", + msg.data(), state.data(), processed, len), true); + }; + + while (len) { + TString err; + ssize_t nbytes = (sock->*pfn)(buffer, len, &err); + if (nbytes > 0) { + buffer = (char*)buffer + nbytes; + len -= nbytes; + processed += nbytes; + } else if (-nbytes == EAGAIN || -nbytes == EWOULDBLOCK) { + WaitPoller(read, write, state); + } else if (!nbytes) { + error("connection unexpectedly closed"); + } else if (-nbytes != EINTR) { + error(err ? err : TString(strerror(-nbytes))); + } + } + } + + void SendData(const void* buffer, size_t len, TString state) { + Process(buffer, len, &NInterconnect::TStreamSocket::Send, false, true, std::move(state)); + } + + void ReceiveData(void* buffer, size_t len, TString state) { + Process(buffer, len, &NInterconnect::TStreamSocket::Recv, true, false, std::move(state)); + } + + THolder<TEvInterconnect::TNodeInfo> GetPeerNodeInfo() { + Y_VERIFY(PeerNodeId); + Send(Common->NameserviceId, new TEvInterconnect::TEvGetNode(PeerNodeId, Deadline)); + auto response = WaitForSpecificEvent<TEvInterconnect::TEvNodeInfo>("GetPeerNodeInfo"); + return std::move(response->Get()->Node); + } + + template <typename T> + static THolder<TProgramInfo> GetProgramInfo(const T& proto) { + auto programInfo = MakeHolder<TProgramInfo>(); + programInfo->PID = proto.GetProgramPID(); + programInfo->StartTime = proto.GetProgramStartTime(); + programInfo->Serial = proto.GetSerial(); + return programInfo; + } + }; + + IActor* CreateOutgoingHandshakeActor(TInterconnectProxyCommon::TPtr common, const TActorId& self, + const TActorId& peer, ui32 nodeId, ui64 nextPacket, TString peerHostName, + TSessionParams params) { + return new TActorCoro(MakeHolder<THandshakeActor>(std::move(common), self, peer, nodeId, nextPacket, + std::move(peerHostName), std::move(params))); + } + + IActor* CreateIncomingHandshakeActor(TInterconnectProxyCommon::TPtr common, TSocketPtr socket) { + return new TActorCoro(MakeHolder<THandshakeActor>(std::move(common), std::move(socket))); + } + +} diff --git a/library/cpp/actors/interconnect/interconnect_handshake.h b/library/cpp/actors/interconnect/interconnect_handshake.h new file mode 100644 index 0000000000..b3c0db6c5d --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_handshake.h @@ -0,0 +1,24 @@ +#pragma once + +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/core/event_pb.h> +#include <library/cpp/actors/core/events.h> + +#include "interconnect_common.h" +#include "interconnect_impl.h" +#include "poller_tcp.h" +#include "events_local.h" + +namespace NActors { + static constexpr TDuration DEFAULT_HANDSHAKE_TIMEOUT = TDuration::Seconds(1); + static constexpr ui64 INTERCONNECT_PROTOCOL_VERSION = 2; + + using TSocketPtr = TIntrusivePtr<NInterconnect::TStreamSocket>; + + IActor* CreateOutgoingHandshakeActor(TInterconnectProxyCommon::TPtr common, const TActorId& self, + const TActorId& peer, ui32 nodeId, ui64 nextPacket, TString peerHostName, + TSessionParams params); + + IActor* CreateIncomingHandshakeActor(TInterconnectProxyCommon::TPtr common, TSocketPtr socket); + +} diff --git a/library/cpp/actors/interconnect/interconnect_impl.h b/library/cpp/actors/interconnect/interconnect_impl.h new file mode 100644 index 0000000000..ee29e4d397 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_impl.h @@ -0,0 +1,45 @@ +#pragma once + +#include "interconnect.h" +#include <library/cpp/actors/protos/interconnect.pb.h> +#include <library/cpp/actors/core/event_pb.h> +#include <library/cpp/actors/helpers/mon_histogram_helper.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> + +namespace NActors { + // resolve node info + struct TEvInterconnect::TEvResolveNode: public TEventPB<TEvInterconnect::TEvResolveNode, NActorsInterconnect::TEvResolveNode, TEvInterconnect::EvResolveNode> { + TEvResolveNode() { + } + + TEvResolveNode(ui32 nodeId, TInstant deadline = TInstant::Max()) { + Record.SetNodeId(nodeId); + if (deadline != TInstant::Max()) { + Record.SetDeadline(deadline.GetValue()); + } + } + }; + + // node info + struct TEvInterconnect::TEvNodeAddress: public TEventPB<TEvInterconnect::TEvNodeAddress, NActorsInterconnect::TEvNodeInfo, TEvInterconnect::EvNodeAddress> { + TEvNodeAddress() { + } + + TEvNodeAddress(ui32 nodeId) { + Record.SetNodeId(nodeId); + } + }; + + // register node + struct TEvInterconnect::TEvRegisterNode: public TEventBase<TEvInterconnect::TEvRegisterNode, TEvInterconnect::EvRegisterNode> { + }; + + // reply on register node + struct TEvInterconnect::TEvRegisterNodeResult: public TEventBase<TEvInterconnect::TEvRegisterNodeResult, TEvInterconnect::EvRegisterNodeResult> { + }; + + // disconnect + struct TEvInterconnect::TEvDisconnect: public TEventLocal<TEvInterconnect::TEvDisconnect, TEvInterconnect::EvDisconnect> { + }; + +} diff --git a/library/cpp/actors/interconnect/interconnect_mon.cpp b/library/cpp/actors/interconnect/interconnect_mon.cpp new file mode 100644 index 0000000000..cf924ccbf9 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_mon.cpp @@ -0,0 +1,276 @@ +#include "interconnect_mon.h" +#include "interconnect_tcp_proxy.h" + +#include <library/cpp/json/json_value.h> +#include <library/cpp/json/json_writer.h> +#include <library/cpp/monlib/service/pages/templates.h> + +#include <openssl/ssl.h> +#include <openssl/pem.h> + +namespace NInterconnect { + + using namespace NActors; + + class TInterconnectMonActor : public TActor<TInterconnectMonActor> { + class TQueryProcessor : public TActorBootstrapped<TQueryProcessor> { + const TActorId Sender; + const bool Json; + TMap<ui32, TInterconnectProxyTCP::TProxyStats> Stats; + ui32 PendingReplies = 0; + + public: + static constexpr IActor::EActorActivity ActorActivityType() { + return INTERCONNECT_MONACTOR; + } + + TQueryProcessor(const TActorId& sender, bool json) + : Sender(sender) + , Json(json) + {} + + void Bootstrap(const TActorContext& ctx) { + Become(&TThis::StateFunc, ctx, TDuration::Seconds(5), new TEvents::TEvWakeup); + Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes); + } + + void Handle(TEvInterconnect::TEvNodesInfo::TPtr ev, const TActorContext& ctx) { + TActorSystem* const as = ctx.ExecutorThread.ActorSystem; + for (const auto& node : ev->Get()->Nodes) { + Send(as->InterconnectProxy(node.NodeId), new TInterconnectProxyTCP::TEvQueryStats, IEventHandle::FlagTrackDelivery); + ++PendingReplies; + } + GenerateResultWhenReady(ctx); + } + + STRICT_STFUNC(StateFunc, + HFunc(TEvInterconnect::TEvNodesInfo, Handle) + HFunc(TInterconnectProxyTCP::TEvStats, Handle) + CFunc(TEvents::TSystem::Undelivered, HandleUndelivered) + CFunc(TEvents::TSystem::Wakeup, HandleWakeup) + ) + + void Handle(TInterconnectProxyTCP::TEvStats::TPtr& ev, const TActorContext& ctx) { + auto *msg = ev->Get(); + Stats.emplace(msg->PeerNodeId, std::move(msg->ProxyStats)); + --PendingReplies; + GenerateResultWhenReady(ctx); + } + + void HandleUndelivered(const TActorContext& ctx) { + --PendingReplies; + GenerateResultWhenReady(ctx); + } + + void HandleWakeup(const TActorContext& ctx) { + PendingReplies = 0; + GenerateResultWhenReady(ctx); + } + + void GenerateResultWhenReady(const TActorContext& ctx) { + if (!PendingReplies) { + if (Json) { + ctx.Send(Sender, new NMon::TEvHttpInfoRes(GenerateJson(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + } else { + ctx.Send(Sender, new NMon::TEvHttpInfoRes(GenerateHtml())); + } + Die(ctx); + } + } + + TString GenerateHtml() { + TStringStream str; + HTML(str) { + TABLE_CLASS("table-sortable table") { + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Peer node id"; } + TABLEH() { str << "State"; } + TABLEH() { str << "Ping"; } + TABLEH() { str << "Clock skew"; } + TABLEH() { str << "Scope id"; } + TABLEH() { str << "Encryption"; } + TABLEH() { str << "LastSessionDieTime"; } + TABLEH() { str << "TotalOutputQueueSize"; } + TABLEH() { str << "Connected"; } + TABLEH() { str << "Host"; } + TABLEH() { str << "Port"; } + TABLEH() { str << "LastErrorTimestamp"; } + TABLEH() { str << "LastErrorKind"; } + TABLEH() { str << "LastErrorExplanation"; } + } + } + TABLEBODY() { + for (const auto& kv : Stats) { + TABLER() { + TABLED() { str << "<a href='" << kv.second.Path << "'>" << kv.first << "</a>"; } + TABLED() { str << kv.second.State; } + TABLED() { + if (kv.second.Ping != TDuration::Zero()) { + str << kv.second.Ping; + } + } + TABLED() { + if (kv.second.ClockSkew < 0) { + str << "-" << TDuration::MicroSeconds(-kv.second.ClockSkew); + } else { + str << "+" << TDuration::MicroSeconds(kv.second.ClockSkew); + } + } + TABLED() { str << ScopeIdToString(kv.second.PeerScopeId); } + TABLED() { + const char *color = kv.second.Encryption != "none" ? "green" : "red"; + str << "<font color='" << color << "'>" << kv.second.Encryption << "</font>"; + } + TABLED() { + if (kv.second.LastSessionDieTime != TInstant::Zero()) { + str << kv.second.LastSessionDieTime; + } + } + TABLED() { str << kv.second.TotalOutputQueueSize; } + TABLED() { str << (kv.second.Connected ? "yes" : "<strong>no</strong>"); } + TABLED() { str << kv.second.Host; } + TABLED() { str << kv.second.Port; } + TABLED() { + str << "<strong>"; + if (kv.second.LastErrorTimestamp != TInstant::Zero()) { + str << kv.second.LastErrorTimestamp; + } + str << "</strong>"; + } + TABLED() { str << "<strong>" << kv.second.LastErrorKind << "</strong>"; } + TABLED() { str << "<strong>" << kv.second.LastErrorExplanation << "</strong>"; } + } + } + } + } + } + return str.Str(); + } + + TString GenerateJson() { + NJson::TJsonValue json; + for (const auto& [nodeId, info] : Stats) { + NJson::TJsonValue item; + item["NodeId"] = nodeId; + + auto id = [](const auto& x) { return x; }; + auto toString = [](const auto& x) { return x.ToString(); }; + +#define JSON(NAME, FUN) item[#NAME] = FUN(info.NAME); + JSON(Path, id) + JSON(State, id) + JSON(PeerScopeId, ScopeIdToString) + JSON(LastSessionDieTime, toString) + JSON(TotalOutputQueueSize, id) + JSON(Connected, id) + JSON(Host, id) + JSON(Port, id) + JSON(LastErrorTimestamp, toString) + JSON(LastErrorKind, id) + JSON(LastErrorExplanation, id) + JSON(Ping, toString) + JSON(ClockSkew, id) + JSON(Encryption, id) +#undef JSON + + json[ToString(nodeId)] = item; + } + TStringStream str(NMonitoring::HTTPOKJSON); + NJson::WriteJson(&str, &json); + return str.Str(); + } + }; + + private: + TIntrusivePtr<TInterconnectProxyCommon> Common; + + public: + static constexpr IActor::EActorActivity ActorActivityType() { + return INTERCONNECT_MONACTOR; + } + + TInterconnectMonActor(TIntrusivePtr<TInterconnectProxyCommon> common) + : TActor(&TThis::StateFunc) + , Common(std::move(common)) + {} + + STRICT_STFUNC(StateFunc, + HFunc(NMon::TEvHttpInfo, Handle) + ) + + void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { + const auto& params = ev->Get()->Request.GetParams(); + int certinfo = 0; + if (TryFromString(params.Get("certinfo"), certinfo) && certinfo) { + ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(GetCertInfoJson(), ev->Get()->SubRequestId, + NMon::TEvHttpInfoRes::Custom)); + } else { + const bool json = params.Has("fmt") && params.Get("fmt") == "json"; + ctx.Register(new TQueryProcessor(ev->Sender, json)); + } + } + + TString GetCertInfoJson() const { + NJson::TJsonValue json(NJson::JSON_MAP); + if (const TString cert = Common ? Common->Settings.Certificate : TString()) { + struct TEx : yexception {}; + try { + const auto& cert = Common->Settings.Certificate; + std::unique_ptr<BIO, void(*)(BIO*)> bio(BIO_new_mem_buf(cert.data(), cert.size()), &BIO_vfree); + if (!bio) { + throw TEx() << "BIO_new_mem_buf failed"; + } + std::unique_ptr<X509, void(*)(X509*)> x509(PEM_read_bio_X509(bio.get(), nullptr, nullptr, nullptr), + &X509_free); + if (!x509) { + throw TEx() << "PEM_read_bio_X509 failed"; + } + X509_NAME *name = X509_get_subject_name(x509.get()); + if (!name) { + throw TEx() << "X509_get_subject_name failed"; + } + char buffer[4096]; + if (char *p = X509_NAME_oneline(name, buffer, sizeof(buffer))) { + json["Subject"] = p; + } + if (int loc = X509_NAME_get_index_by_NID(name, NID_commonName, -1); loc >= 0) { + if (X509_NAME_ENTRY *entry = X509_NAME_get_entry(name, loc)) { + if (ASN1_STRING *data = X509_NAME_ENTRY_get_data(entry)) { + unsigned char *cn; + if (const int len = ASN1_STRING_to_UTF8(&cn, data); len >= 0) { + json["CommonName"] = TString(reinterpret_cast<char*>(cn), len); + OPENSSL_free(cn); + } + } + } + } + auto time = [](const ASN1_TIME *t, const char *name) -> TString { + if (t) { + struct tm tm; + if (ASN1_TIME_to_tm(t, &tm)) { + return Strftime("%Y-%m-%dT%H:%M:%S%z", &tm); + } else { + throw TEx() << "ASN1_TIME_to_tm failed"; + } + } else { + throw TEx() << name << " failed"; + } + }; + json["NotBefore"] = time(X509_get0_notBefore(x509.get()), "X509_get0_notBefore"); + json["NotAfter"] = time(X509_get0_notAfter(x509.get()), "X509_get0_notAfter"); + } catch (const TEx& ex) { + json["Error"] = ex.what(); + } + } + TStringStream str(NMonitoring::HTTPOKJSON); + NJson::WriteJson(&str, &json); + return str.Str(); + } + }; + + IActor *CreateInterconnectMonActor(TIntrusivePtr<TInterconnectProxyCommon> common) { + return new TInterconnectMonActor(std::move(common)); + } + +} // NInterconnect diff --git a/library/cpp/actors/interconnect/interconnect_mon.h b/library/cpp/actors/interconnect/interconnect_mon.h new file mode 100644 index 0000000000..3fb26053fb --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_mon.h @@ -0,0 +1,15 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> +#include "interconnect_common.h" + +namespace NInterconnect { + + NActors::IActor *CreateInterconnectMonActor(TIntrusivePtr<NActors::TInterconnectProxyCommon> common = nullptr); + + static inline NActors::TActorId MakeInterconnectMonActorId(ui32 nodeId) { + char s[12] = {'I', 'C', 'O', 'v', 'e', 'r', 'v', 'i', 'e', 'w', 0, 0}; + return NActors::TActorId(nodeId, TStringBuf(s, 12)); + } + +} // NInterconnect diff --git a/library/cpp/actors/interconnect/interconnect_nameserver_base.h b/library/cpp/actors/interconnect/interconnect_nameserver_base.h new file mode 100644 index 0000000000..df614f6c2b --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_nameserver_base.h @@ -0,0 +1,83 @@ +#include "interconnect.h" +#include "interconnect_impl.h" +#include "interconnect_address.h" +#include "events_local.h" + +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/memory_log/memlog.h> + +namespace NActors { + + template<typename TDerived> + class TInterconnectNameserverBase : public TActor<TDerived> { + protected: + const TMap<ui32, TTableNameserverSetup::TNodeInfo>& NodeTable; + + TInterconnectNameserverBase(void (TDerived::*func)(TAutoPtr<IEventHandle>& ev, const TActorContext& ctx) + , const TMap<ui32, TTableNameserverSetup::TNodeInfo>& nodeTable) + : TActor<TDerived>(func) + , NodeTable(nodeTable) + { + } + public: + + void HandleMissedNodeId(TEvInterconnect::TEvResolveNode::TPtr& ev, + const TActorContext& ctx, + const TInstant&) { + auto reply = new TEvLocalNodeInfo; + reply->NodeId = ev->Get()->Record.GetNodeId(); + ctx.Send(ev->Sender, reply); + } + + void Handle(TEvInterconnect::TEvResolveNode::TPtr& ev, + const TActorContext& ctx) { + const TEvInterconnect::TEvResolveNode* request = ev->Get(); + auto& record = request->Record; + const ui32 nodeId = record.GetNodeId(); + const TInstant deadline = record.HasDeadline() ? TInstant::FromValue(record.GetDeadline()) : TInstant::Max(); + auto it = NodeTable.find(nodeId); + + if (it == NodeTable.end()) { + static_cast<TDerived*>(this)->HandleMissedNodeId(ev, ctx, deadline); + } else { + IActor::RegisterWithSameMailbox( + CreateResolveActor(nodeId, it->second, ev->Sender, this->SelfId(), deadline)); + } + } + + void Handle(TEvResolveAddress::TPtr& ev, + const TActorContext&) { + const TEvResolveAddress* request = ev->Get(); + + IActor::RegisterWithSameMailbox( + CreateResolveActor(request->Address, request->Port, ev->Sender, this->SelfId(), TInstant::Max())); + } + + void Handle(TEvInterconnect::TEvListNodes::TPtr& ev, + const TActorContext& ctx) { + THolder<TEvInterconnect::TEvNodesInfo> + reply(new TEvInterconnect::TEvNodesInfo()); + reply->Nodes.reserve(NodeTable.size()); + for (const auto& pr : NodeTable) { + reply->Nodes.emplace_back(pr.first, + pr.second.Address, pr.second.Host, pr.second.ResolveHost, + pr.second.Port, pr.second.Location); + } + ctx.Send(ev->Sender, reply.Release()); + } + + void Handle(TEvInterconnect::TEvGetNode::TPtr& ev, + const TActorContext& ctx) { + ui32 nodeId = ev->Get()->NodeId; + THolder<TEvInterconnect::TEvNodeInfo> + reply(new TEvInterconnect::TEvNodeInfo(nodeId)); + auto it = NodeTable.find(nodeId); + if (it != NodeTable.end()) { + reply->Node = MakeHolder<TEvInterconnect::TNodeInfo>(it->first, it->second.Address, + it->second.Host, it->second.ResolveHost, + it->second.Port, it->second.Location); + } + ctx.Send(ev->Sender, reply.Release()); + } + }; +} diff --git a/library/cpp/actors/interconnect/interconnect_nameserver_dynamic.cpp b/library/cpp/actors/interconnect/interconnect_nameserver_dynamic.cpp new file mode 100644 index 0000000000..5e48401b14 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_nameserver_dynamic.cpp @@ -0,0 +1,178 @@ +#include "interconnect.h" +#include "interconnect_impl.h" +#include "interconnect_address.h" +#include "interconnect_nameserver_base.h" +#include "events_local.h" +#include "logging.h" + +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/core/log.h> + +namespace NActors { + + class TInterconnectDynamicNameserver + : public TInterconnectNameserverBase<TInterconnectDynamicNameserver> + , public TInterconnectLoggingBase + { + struct TPendingRequest { + TEvInterconnect::TEvResolveNode::TPtr Request; + TInstant Deadline; + + TPendingRequest(TEvInterconnect::TEvResolveNode::TPtr request, const TInstant& deadline) + : Request(request), Deadline(deadline) + { + } + }; + + TMap<ui32, TTableNameserverSetup::TNodeInfo> NodeTable; + TVector<TPendingRequest> PendingRequests; + TDuration PendingPeriod; + + void PrintInfo() { + TString logMsg = TStringBuilder() << "Table size: " << NodeTable.size(); + for (const auto& [nodeId, node] : NodeTable) { + TString str = TStringBuilder() << "\n > Node " << nodeId << " `" << node.Address << "`:" << node.Port << ", host: " << node.Host << ", resolveHost: " << node.ResolveHost; + logMsg += str; + } + LOG_DEBUG_IC("ICN01", "%s", logMsg.c_str()); + } + + bool IsNodeUpdated(const ui32 nodeId, const TString& address, const ui32 port) { + bool printInfo = false; + auto it = NodeTable.find(nodeId); + if (it == NodeTable.end()) { + LOG_DEBUG_IC("ICN02", "New node %u `%s`: %u", + nodeId, address.c_str(), port); + printInfo = true; + } else if (it->second.Address != address || it->second.Port != port) { + LOG_DEBUG_IC("ICN03", "Updated node %u `%s`: %u (from `%s`: %u)", + nodeId, address.c_str(), port, it->second.Address.c_str(), it->second.Port); + printInfo = true; + Send(TActivationContext::InterconnectProxy(nodeId), new TEvInterconnect::TEvDisconnect); + } + return printInfo; + } + + void DiscardTimedOutRequests(const TActorContext& ctx, ui32 compactionCount = 0) { + + auto now = Now(); + + for (auto& pending : PendingRequests) { + if (pending.Deadline > now) { + LOG_ERROR_IC("ICN06", "Unknown nodeId: %u", pending.Request->Get()->Record.GetNodeId()); + auto reply = new TEvLocalNodeInfo; + reply->NodeId = pending.Request->Get()->Record.GetNodeId(); + ctx.Send(pending.Request->Sender, reply); + pending.Request.Reset(); + compactionCount++; + } + } + + if (compactionCount) { + TVector<TPendingRequest> requests; + if (compactionCount < PendingRequests.size()) { // sanity check + requests.reserve(PendingRequests.size() - compactionCount); + } + for (auto& pending : PendingRequests) { + if (pending.Request) { + requests.emplace_back(pending.Request, pending.Deadline); + } + } + PendingRequests.swap(requests); + } + } + + void SchedulePeriodic() { + Schedule(TDuration::MilliSeconds(200), new TEvents::TEvWakeup()); + } + + public: + static constexpr EActivityType ActorActivityType() { + return NAMESERVICE; + } + + TInterconnectDynamicNameserver(const TIntrusivePtr<TTableNameserverSetup>& setup, const TDuration& pendingPeriod, ui32 /*resolvePoolId*/ ) + : TInterconnectNameserverBase<TInterconnectDynamicNameserver>(&TInterconnectDynamicNameserver::StateFunc, NodeTable) + , NodeTable(setup->StaticNodeTable) + , PendingPeriod(pendingPeriod) + { + Y_VERIFY(setup->IsEntriesUnique()); + } + + STFUNC(StateFunc) { + try { + switch (ev->GetTypeRewrite()) { + HFunc(TEvInterconnect::TEvResolveNode, Handle); + HFunc(TEvResolveAddress, Handle); + HFunc(TEvInterconnect::TEvListNodes, Handle); + HFunc(TEvInterconnect::TEvGetNode, Handle); + HFunc(TEvInterconnect::TEvNodesInfo, HandleUpdate); + CFunc(TEvents::TEvWakeup::EventType, HandlePeriodic); + } + } catch (...) { + LOG_ERROR_IC("ICN09", "%s", CurrentExceptionMessage().c_str()); + } + } + + void HandleMissedNodeId(TEvInterconnect::TEvResolveNode::TPtr& ev, + const TActorContext& ctx, + const TInstant& deadline) { + if (PendingPeriod) { + if (PendingRequests.size() == 0) { + SchedulePeriodic(); + } + PendingRequests.emplace_back(std::move(ev), Min(deadline, Now() + PendingPeriod)); + } else { + LOG_ERROR_IC("ICN07", "Unknown nodeId: %u", ev->Get()->Record.GetNodeId()); + TInterconnectNameserverBase::HandleMissedNodeId(ev, ctx, deadline); + } + } + + void HandleUpdate(TEvInterconnect::TEvNodesInfo::TPtr& ev, + const TActorContext& ctx) { + + auto request = ev->Get(); + LOG_DEBUG_IC("ICN04", "Update TEvNodesInfo with sz: %lu ", request->Nodes.size()); + + bool printInfo = false; + ui32 compactionCount = 0; + + for (const auto& node : request->Nodes) { + printInfo |= IsNodeUpdated(node.NodeId, node.Address, node.Port); + + NodeTable[node.NodeId] = TTableNameserverSetup::TNodeInfo( + node.Address, node.Host, node.ResolveHost, node.Port, node.Location); + + for (auto& pending : PendingRequests) { + if (pending.Request->Get()->Record.GetNodeId() == node.NodeId) { + LOG_DEBUG_IC("ICN05", "Pending nodeId: %u discovered", node.NodeId); + RegisterWithSameMailbox( + CreateResolveActor(node.NodeId, NodeTable[node.NodeId], pending.Request->Sender, SelfId(), pending.Deadline)); + pending.Request.Reset(); + compactionCount++; + } + } + } + + if (printInfo) { + PrintInfo(); + } + + DiscardTimedOutRequests(ctx, compactionCount); + } + + void HandlePeriodic(const TActorContext& ctx) { + DiscardTimedOutRequests(ctx, 0); + if (PendingRequests.size()) { + SchedulePeriodic(); + } + } + }; + + IActor* CreateDynamicNameserver(const TIntrusivePtr<TTableNameserverSetup>& setup, + const TDuration& pendingPeriod, + ui32 poolId) { + return new TInterconnectDynamicNameserver(setup, pendingPeriod, poolId); + } + +} diff --git a/library/cpp/actors/interconnect/interconnect_nameserver_table.cpp b/library/cpp/actors/interconnect/interconnect_nameserver_table.cpp new file mode 100644 index 0000000000..43419bf70d --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_nameserver_table.cpp @@ -0,0 +1,86 @@ +#include "interconnect.h" +#include "interconnect_impl.h" +#include "interconnect_address.h" +#include "interconnect_nameserver_base.h" +#include "events_local.h" + +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/memory_log/memlog.h> + +namespace NActors { + + class TInterconnectNameserverTable: public TInterconnectNameserverBase<TInterconnectNameserverTable> { + TIntrusivePtr<TTableNameserverSetup> Config; + + public: + static constexpr EActivityType ActorActivityType() { + return NAMESERVICE; + } + + TInterconnectNameserverTable(const TIntrusivePtr<TTableNameserverSetup>& setup, ui32 /*resolvePoolId*/) + : TInterconnectNameserverBase<TInterconnectNameserverTable>(&TInterconnectNameserverTable::StateFunc, setup->StaticNodeTable) + , Config(setup) + { + Y_VERIFY(Config->IsEntriesUnique()); + } + + STFUNC(StateFunc) { + try { + switch (ev->GetTypeRewrite()) { + HFunc(TEvInterconnect::TEvResolveNode, Handle); + HFunc(TEvResolveAddress, Handle); + HFunc(TEvInterconnect::TEvListNodes, Handle); + HFunc(TEvInterconnect::TEvGetNode, Handle); + } + } catch (...) { + // on error - do nothing + } + } + }; + + IActor* CreateNameserverTable(const TIntrusivePtr<TTableNameserverSetup>& setup, ui32 poolId) { + return new TInterconnectNameserverTable(setup, poolId); + } + + bool TTableNameserverSetup::IsEntriesUnique() const { + TVector<const TNodeInfo*> infos; + infos.reserve(StaticNodeTable.size()); + for (const auto& x : StaticNodeTable) + infos.push_back(&x.second); + + auto CompareAddressLambda = + [](const TNodeInfo* left, const TNodeInfo* right) { + return left->Port == right->Port ? left->Address < right->Address : left->Port < right->Port; + }; + + Sort(infos, CompareAddressLambda); + + for (ui32 idx = 1, end = StaticNodeTable.size(); idx < end; ++idx) { + const TNodeInfo* left = infos[idx - 1]; + const TNodeInfo* right = infos[idx]; + if (left->Address && left->Address == right->Address && left->Port == right->Port) + return false; + } + + auto CompareHostLambda = + [](const TNodeInfo* left, const TNodeInfo* right) { + return left->Port == right->Port ? left->ResolveHost < right->ResolveHost : left->Port < right->Port; + }; + + Sort(infos, CompareHostLambda); + + for (ui32 idx = 1, end = StaticNodeTable.size(); idx < end; ++idx) { + const TNodeInfo* left = infos[idx - 1]; + const TNodeInfo* right = infos[idx]; + if (left->ResolveHost == right->ResolveHost && left->Port == right->Port) + return false; + } + + return true; + } + + TActorId GetNameserviceActorId() { + return TActorId(0, "namesvc"); + } + +} diff --git a/library/cpp/actors/interconnect/interconnect_proxy_wrapper.cpp b/library/cpp/actors/interconnect/interconnect_proxy_wrapper.cpp new file mode 100644 index 0000000000..1c44b4c59b --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_proxy_wrapper.cpp @@ -0,0 +1,47 @@ +#include "interconnect_proxy_wrapper.h" +#include "interconnect_tcp_proxy.h" +#include <library/cpp/actors/interconnect/mock/ic_mock.h> + +namespace NActors { + + class TInterconnectProxyWrapper : public IActor { + TIntrusivePtr<TInterconnectProxyCommon> Common; + const ui32 NodeId; + TInterconnectMock *Mock; + IActor *Proxy = nullptr; + + public: + TInterconnectProxyWrapper(TIntrusivePtr<TInterconnectProxyCommon> common, ui32 nodeId, TInterconnectMock *mock) + : IActor(static_cast<TReceiveFunc>(&TInterconnectProxyWrapper::StateFunc), INTERCONNECT_PROXY_WRAPPER) + , Common(std::move(common)) + , NodeId(nodeId) + , Mock(mock) + {} + + STFUNC(StateFunc) { + if (ev->GetTypeRewrite() == TEvents::TSystem::Poison && !Proxy) { + PassAway(); + } else { + if (!Proxy) { + IActor *actor = Mock + ? Mock->CreateProxyMock(TActivationContext::ActorSystem()->NodeId, NodeId, Common) + : new TInterconnectProxyTCP(NodeId, Common, &Proxy); + RegisterWithSameMailbox(actor); + if (Mock) { + Proxy = actor; + } + Y_VERIFY(Proxy); + } + InvokeOtherActor(*Proxy, &IActor::Receive, ev, ctx); + } + } + }; + + TProxyWrapperFactory CreateProxyWrapperFactory(TIntrusivePtr<TInterconnectProxyCommon> common, ui32 poolId, + TInterconnectMock *mock) { + return [=](TActorSystem *as, ui32 nodeId) -> TActorId { + return as->Register(new TInterconnectProxyWrapper(common, nodeId, mock), TMailboxType::HTSwap, poolId); + }; + } + +} // NActors diff --git a/library/cpp/actors/interconnect/interconnect_proxy_wrapper.h b/library/cpp/actors/interconnect/interconnect_proxy_wrapper.h new file mode 100644 index 0000000000..e5942351a7 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_proxy_wrapper.h @@ -0,0 +1,12 @@ +#pragma once + +#include "interconnect_common.h" + +#include <library/cpp/actors/core/actorsystem.h> + +namespace NActors { + + TProxyWrapperFactory CreateProxyWrapperFactory(TIntrusivePtr<TInterconnectProxyCommon> common, ui32 poolId, + class TInterconnectMock *mock = nullptr); + +} diff --git a/library/cpp/actors/interconnect/interconnect_resolve.cpp b/library/cpp/actors/interconnect/interconnect_resolve.cpp new file mode 100644 index 0000000000..14296194df --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_resolve.cpp @@ -0,0 +1,174 @@ +#include "interconnect.h" +#include "interconnect_address.h" +#include "events_local.h" + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/dnsresolver/dnsresolver.h> + +namespace NActors { + + using namespace NActors::NDnsResolver; + + class TInterconnectResolveActor : public TActorBootstrapped<TInterconnectResolveActor> { + public: + TInterconnectResolveActor( + const TString& host, ui16 port, ui32 nodeId, const TString& defaultAddress, + const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline) + : Host(host) + , NodeId(nodeId) + , Port(port) + , DefaultAddress(defaultAddress) + , ReplyTo(replyTo) + , ReplyFrom(replyFrom) + , Deadline(deadline) + { } + + TInterconnectResolveActor( + const TString& host, ui16 port, + const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline) + : Host(host) + , Port(port) + , ReplyTo(replyTo) + , ReplyFrom(replyFrom) + , Deadline(deadline) + { } + + static constexpr EActivityType ActorActivityType() { + return NAMESERVICE; + } + + void Bootstrap() { + TMaybe<TString> errorText; + if (auto addr = ExtractDefaultAddr(errorText)) { + return SendAddrAndDie(std::move(addr)); + } + + if (errorText) { + SendErrorAndDie(*errorText); + } + + auto now = TActivationContext::Now(); + if (Deadline < now) { + SendErrorAndDie("Deadline"); + return; + } + + Send(MakeDnsResolverActorId(), + new TEvDns::TEvGetAddr(Host, AF_UNSPEC), + IEventHandle::FlagTrackDelivery); + + if (Deadline != TInstant::Max()) { + Schedule(Deadline, new TEvents::TEvWakeup); + } + + Become(&TThis::StateWork); + } + + STRICT_STFUNC(StateWork, { + sFunc(TEvents::TEvWakeup, HandleTimeout); + sFunc(TEvents::TEvUndelivered, HandleUndelivered); + hFunc(TEvDns::TEvGetAddrResult, Handle); + }); + + void HandleTimeout() { + SendErrorAndDie("Deadline"); + } + + void HandleUndelivered() { + SendErrorAndDie("Dns resolver is unavailable"); + } + + void Handle(TEvDns::TEvGetAddrResult::TPtr& ev) { + if (auto addr = ExtractAddr(ev->Get())) { + return SendAddrAndDie(std::move(addr)); + } + + SendErrorAndDie(ev->Get()->ErrorText); + } + + void SendAddrAndDie(NAddr::IRemoteAddrPtr addr) { + if (NodeId) { + auto reply = new TEvLocalNodeInfo; + reply->NodeId = *NodeId; + reply->Address = std::move(addr); + TActivationContext::Send(new IEventHandle(ReplyTo, ReplyFrom, reply)); + } else { + auto reply = new TEvAddressInfo; + reply->Address = std::move(addr); + TActivationContext::Send(new IEventHandle(ReplyTo, ReplyFrom, reply)); + } + PassAway(); + } + + void SendErrorAndDie(const TString& errorText) { + auto *event = new TEvResolveError; + event->Explain = errorText; + TActivationContext::Send(new IEventHandle(ReplyTo, ReplyFrom, event)); + PassAway(); + } + + NAddr::IRemoteAddrPtr ExtractAddr(TEvDns::TEvGetAddrResult* msg) { + if (msg->Status == 0) { + if (msg->IsV6()) { + struct sockaddr_in6 sin6; + Zero(sin6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = msg->GetAddrV6(); + sin6.sin6_port = HostToInet(Port); + return MakeHolder<NAddr::TIPv6Addr>(sin6); + } + + if (msg->IsV4()) { + return MakeHolder<NAddr::TIPv4Addr>(TIpAddress(msg->GetAddrV4().s_addr, Port)); + } + + Y_FAIL("Unexpected result address family"); + } + + return nullptr; + } + + NAddr::IRemoteAddrPtr ExtractDefaultAddr(TMaybe<TString>& errorText) { + if (DefaultAddress) { + NInterconnect::TAddress address(DefaultAddress.data(), Port); + + switch (address.GetFamily()) { + case AF_INET: + return MakeHolder<NAddr::TIPv4Addr>(*(sockaddr_in*)address.SockAddr()); + case AF_INET6: + return MakeHolder<NAddr::TIPv6Addr>(*(sockaddr_in6*)address.SockAddr()); + default: + errorText = "Unsupported default address: " + DefaultAddress; + break; + } + } + + return nullptr; + } + + private: + const TString Host; + const std::optional<ui32> NodeId; + const ui16 Port; + const TString DefaultAddress; + const TActorId ReplyTo; + const TActorId ReplyFrom; + const TInstant Deadline; + }; + + IActor* CreateResolveActor( + const TString& host, ui16 port, ui32 nodeId, const TString& defaultAddress, + const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline) + { + return new TInterconnectResolveActor(host, port, nodeId, defaultAddress, replyTo, replyFrom, deadline); + } + + IActor* CreateResolveActor( + const TString& host, ui16 port, + const TActorId& replyTo, const TActorId& replyFrom, TInstant deadline) + { + return new TInterconnectResolveActor(host, port, replyTo, replyFrom, deadline); + } + +} // namespace NActors diff --git a/library/cpp/actors/interconnect/interconnect_stream.cpp b/library/cpp/actors/interconnect/interconnect_stream.cpp new file mode 100644 index 0000000000..158ebc9e1d --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_stream.cpp @@ -0,0 +1,628 @@ +#include "interconnect_stream.h" +#include "logging.h" +#include <library/cpp/openssl/init/init.h> +#include <util/network/socket.h> +#include <openssl/ssl.h> +#include <openssl/err.h> +#include <openssl/pem.h> + +#if defined(_win_) +#include <util/system/file.h> +#define SOCK_NONBLOCK 0 +#elif defined(_darwin_) +#define SOCK_NONBLOCK 0 +#else +#include <sys/un.h> +#include <sys/stat.h> +#endif //_win_ + +#if !defined(_win_) +#include <sys/ioctl.h> +#endif + +#include <cerrno> + +namespace NInterconnect { + namespace { + inline int + LastSocketError() { +#if defined(_win_) + return WSAGetLastError(); +#else + return errno; +#endif + } + } + + TSocket::TSocket(SOCKET fd) + : Descriptor(fd) + { + } + + TSocket::~TSocket() { + if (Descriptor == INVALID_SOCKET) { + return; + } + + auto const result = ::closesocket(Descriptor); + if (result == 0) + return; + switch (LastSocketError()) { + case EBADF: + Y_FAIL("Close bad descriptor"); + case EINTR: + break; + case EIO: + Y_FAIL("EIO"); + default: + Y_FAIL("It's something unexpected"); + } + } + + int TSocket::GetDescriptor() { + return Descriptor; + } + + int + TSocket::Bind(const TAddress& addr) const { + const auto ret = ::bind(Descriptor, addr.SockAddr(), addr.Size()); + if (ret < 0) + return -LastSocketError(); + + return 0; + } + + int + TSocket::Shutdown(int how) const { + const auto ret = ::shutdown(Descriptor, how); + if (ret < 0) + return -LastSocketError(); + + return 0; + } + + int TSocket::GetConnectStatus() const { + int err = 0; + socklen_t len = sizeof(err); + if (getsockopt(Descriptor, SOL_SOCKET, SO_ERROR, reinterpret_cast<char*>(&err), &len) == -1) { + err = LastSocketError(); + } + return err; + } + + ///////////////////////////////////////////////////////////////// + + TIntrusivePtr<TStreamSocket> TStreamSocket::Make(int domain) { + const SOCKET res = ::socket(domain, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (res == -1) { + const int err = LastSocketError(); + Y_VERIFY(err != EMFILE && err != ENFILE); + } + return MakeIntrusive<TStreamSocket>(res); + } + + TStreamSocket::TStreamSocket(SOCKET fd) + : TSocket(fd) + { + } + + ssize_t + TStreamSocket::Send(const void* msg, size_t len, TString* /*err*/) const { + const auto ret = ::send(Descriptor, static_cast<const char*>(msg), int(len), 0); + if (ret < 0) + return -LastSocketError(); + + return ret; + } + + ssize_t + TStreamSocket::Recv(void* buf, size_t len, TString* /*err*/) const { + const auto ret = ::recv(Descriptor, static_cast<char*>(buf), int(len), 0); + if (ret < 0) + return -LastSocketError(); + + return ret; + } + + ssize_t + TStreamSocket::WriteV(const struct iovec* iov, int iovcnt) const { +#ifndef _win_ + const auto ret = ::writev(Descriptor, iov, iovcnt); + if (ret < 0) + return -LastSocketError(); + return ret; +#else + Y_FAIL("WriteV() unsupported on Windows"); +#endif + } + + ssize_t + TStreamSocket::ReadV(const struct iovec* iov, int iovcnt) const { +#ifndef _win_ + const auto ret = ::readv(Descriptor, iov, iovcnt); + if (ret < 0) + return -LastSocketError(); + return ret; +#else + Y_FAIL("ReadV() unsupported on Windows"); +#endif + } + + ssize_t TStreamSocket::GetUnsentQueueSize() const { + int num = -1; +#ifndef _win_ // we have no means to determine output queue size on Windows + if (ioctl(Descriptor, TIOCOUTQ, &num) == -1) { + num = -1; + } +#endif + return num; + } + + int + TStreamSocket::Connect(const TAddress& addr) const { + const auto ret = ::connect(Descriptor, addr.SockAddr(), addr.Size()); + if (ret < 0) + return -LastSocketError(); + + return ret; + } + + int + TStreamSocket::Connect(const NAddr::IRemoteAddr* addr) const { + const auto ret = ::connect(Descriptor, addr->Addr(), addr->Len()); + if (ret < 0) + return -LastSocketError(); + + return ret; + } + + int + TStreamSocket::Listen(int backlog) const { + const auto ret = ::listen(Descriptor, backlog); + if (ret < 0) + return -LastSocketError(); + + return ret; + } + + int + TStreamSocket::Accept(TAddress& acceptedAddr) const { + socklen_t acceptedSize = sizeof(::sockaddr_in6); + const auto ret = ::accept(Descriptor, acceptedAddr.SockAddr(), &acceptedSize); + if (ret == INVALID_SOCKET) + return -LastSocketError(); + + return ret; + } + + void + TStreamSocket::SetSendBufferSize(i32 len) const { + (void)SetSockOpt(Descriptor, SOL_SOCKET, SO_SNDBUF, len); + } + + ui32 TStreamSocket::GetSendBufferSize() const { + ui32 res = 0; + CheckedGetSockOpt(Descriptor, SOL_SOCKET, SO_SNDBUF, res, "SO_SNDBUF"); + return res; + } + + ////////////////////////////////////////////////////// + + TDatagramSocket::TPtr TDatagramSocket::Make(int domain) { + const SOCKET res = ::socket(domain, SOCK_DGRAM, 0); + if (res == -1) { + const int err = LastSocketError(); + Y_VERIFY(err != EMFILE && err != ENFILE); + } + return std::make_shared<TDatagramSocket>(res); + } + + TDatagramSocket::TDatagramSocket(SOCKET fd) + : TSocket(fd) + { + } + + ssize_t + TDatagramSocket::SendTo(const void* msg, size_t len, const TAddress& toAddr) const { + const auto ret = ::sendto(Descriptor, static_cast<const char*>(msg), int(len), 0, toAddr.SockAddr(), toAddr.Size()); + if (ret < 0) + return -LastSocketError(); + + return ret; + } + + ssize_t + TDatagramSocket::RecvFrom(void* buf, size_t len, TAddress& fromAddr) const { + socklen_t fromSize = sizeof(::sockaddr_in6); + const auto ret = ::recvfrom(Descriptor, static_cast<char*>(buf), int(len), 0, fromAddr.SockAddr(), &fromSize); + if (ret < 0) + return -LastSocketError(); + + return ret; + } + + + // deleter for SSL objects + struct TDeleter { + void operator ()(BIO *bio) const { + BIO_free(bio); + } + + void operator ()(X509 *x509) const { + X509_free(x509); + } + + void operator ()(RSA *rsa) const { + RSA_free(rsa); + } + + void operator ()(SSL_CTX *ctx) const { + SSL_CTX_free(ctx); + } + }; + + class TSecureSocketContext::TImpl { + std::unique_ptr<SSL_CTX, TDeleter> Ctx; + + public: + TImpl(const TString& certificate, const TString& privateKey, const TString& caFilePath, + const TString& ciphers) { + int ret; + InitOpenSSL(); +#if OPENSSL_VERSION_NUMBER < 0x10100000L + Ctx.reset(SSL_CTX_new(TLSv1_2_method())); + Y_VERIFY(Ctx, "SSL_CTX_new() failed"); +#else + Ctx.reset(SSL_CTX_new(TLS_method())); + Y_VERIFY(Ctx, "SSL_CTX_new() failed"); + ret = SSL_CTX_set_min_proto_version(Ctx.get(), TLS1_2_VERSION); + Y_VERIFY(ret == 1, "failed to set min proto version"); + ret = SSL_CTX_set_max_proto_version(Ctx.get(), TLS1_2_VERSION); + Y_VERIFY(ret == 1, "failed to set max proto version"); +#endif + SSL_CTX_set_verify(Ctx.get(), SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, &Verify); + SSL_CTX_set_mode(*this, SSL_MODE_ENABLE_PARTIAL_WRITE | SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER); + + // apply certificates in SSL context + if (certificate) { + std::unique_ptr<BIO, TDeleter> bio(BIO_new_mem_buf(certificate.data(), certificate.size())); + Y_VERIFY(bio); + + // first certificate in the chain is expected to be a leaf + std::unique_ptr<X509, TDeleter> cert(PEM_read_bio_X509(bio.get(), nullptr, nullptr, nullptr)); + Y_VERIFY(cert, "failed to parse certificate"); + ret = SSL_CTX_use_certificate(Ctx.get(), cert.get()); + Y_VERIFY(ret == 1); + + // loading additional certificates in the chain, if any + while(true) { + X509 *ca = PEM_read_bio_X509(bio.get(), nullptr, nullptr, nullptr); + if (ca == nullptr) { + break; + } + ret = SSL_CTX_add0_chain_cert(Ctx.get(), ca); + Y_VERIFY(ret == 1); + // we must not free memory if certificate was added successfully by SSL_CTX_add0_chain_cert + } + } + if (privateKey) { + std::unique_ptr<BIO, TDeleter> bio(BIO_new_mem_buf(privateKey.data(), privateKey.size())); + Y_VERIFY(bio); + std::unique_ptr<RSA, TDeleter> pkey(PEM_read_bio_RSAPrivateKey(bio.get(), nullptr, nullptr, nullptr)); + Y_VERIFY(pkey); + ret = SSL_CTX_use_RSAPrivateKey(Ctx.get(), pkey.get()); + Y_VERIFY(ret == 1); + } + if (caFilePath) { + ret = SSL_CTX_load_verify_locations(Ctx.get(), caFilePath.data(), nullptr); + Y_VERIFY(ret == 1); + } + + int success = SSL_CTX_set_cipher_list(Ctx.get(), ciphers ? ciphers.data() : "AES128-GCM-SHA256"); + Y_VERIFY(success, "failed to set cipher list"); + } + + operator SSL_CTX*() const { + return Ctx.get(); + } + + static int GetExIndex() { + static int index = SSL_get_ex_new_index(0, nullptr, nullptr, nullptr, nullptr); + return index; + } + + private: + static int Verify(int preverify, X509_STORE_CTX *ctx) { + if (!preverify) { + X509 *badCert = X509_STORE_CTX_get_current_cert(ctx); + int err = X509_STORE_CTX_get_error(ctx); + int depth = X509_STORE_CTX_get_error_depth(ctx); + SSL *ssl = static_cast<SSL*>(X509_STORE_CTX_get_ex_data(ctx, SSL_get_ex_data_X509_STORE_CTX_idx())); + TString *errp = static_cast<TString*>(SSL_get_ex_data(ssl, GetExIndex())); + char buffer[1024]; + X509_NAME_oneline(X509_get_subject_name(badCert), buffer, sizeof(buffer)); + TStringBuilder s; + s << "Error during certificate validation" + << " error# " << X509_verify_cert_error_string(err) + << " depth# " << depth + << " cert# " << buffer; + if (err == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT) { + X509_NAME_oneline(X509_get_issuer_name(badCert), buffer, sizeof(buffer)); + s << " issuer# " << buffer; + } + *errp = s; + } + return preverify; + } + }; + + TSecureSocketContext::TSecureSocketContext(const TString& certificate, const TString& privateKey, + const TString& caFilePath, const TString& ciphers) + : Impl(new TImpl(certificate, privateKey, caFilePath, ciphers)) + {} + + TSecureSocketContext::~TSecureSocketContext() + {} + + class TSecureSocket::TImpl { + SSL *Ssl; + TString ErrorDescription; + bool WantRead_ = false; + bool WantWrite_ = false; + + public: + TImpl(SSL_CTX *ctx, int fd) + : Ssl(SSL_new(ctx)) + { + Y_VERIFY(Ssl, "SSL_new() failed"); + SSL_set_fd(Ssl, fd); + SSL_set_ex_data(Ssl, TSecureSocketContext::TImpl::GetExIndex(), &ErrorDescription); + } + + ~TImpl() { + SSL_free(Ssl); + } + + TString GetErrorStack() { + if (ErrorDescription) { + return ErrorDescription; + } + std::unique_ptr<BIO, int(*)(BIO*)> mem(BIO_new(BIO_s_mem()), BIO_free); + ERR_print_errors(mem.get()); + char *p = nullptr; + auto len = BIO_get_mem_data(mem.get(), &p); + return TString(p, len); + } + + EStatus ConvertResult(int res, TString& err) { + switch (res) { + case SSL_ERROR_NONE: + return EStatus::SUCCESS; + + case SSL_ERROR_WANT_READ: + return EStatus::WANT_READ; + + case SSL_ERROR_WANT_WRITE: + return EStatus::WANT_WRITE; + + case SSL_ERROR_SYSCALL: + err = TStringBuilder() << "syscall error: " << strerror(LastSocketError()) << ": " << GetErrorStack(); + break; + + case SSL_ERROR_ZERO_RETURN: + err = "TLS negotiation failed"; + break; + + case SSL_ERROR_SSL: + err = "SSL error: " + GetErrorStack(); + break; + + default: + err = "unknown OpenSSL error"; + break; + } + return EStatus::ERROR; + } + + enum EConnectState { + CONNECT, + SHUTDOWN, + READ, + } ConnectState = EConnectState::CONNECT; + + EStatus Establish(bool server, bool authOnly, TString& err) { + switch (ConnectState) { + case EConnectState::CONNECT: { + auto callback = server ? SSL_accept : SSL_connect; + const EStatus status = ConvertResult(SSL_get_error(Ssl, callback(Ssl)), err); + if (status != EStatus::SUCCESS || !authOnly) { + return status; + } + ConnectState = EConnectState::SHUTDOWN; + [[fallthrough]]; + } + + case EConnectState::SHUTDOWN: { + const int res = SSL_shutdown(Ssl); + if (res == 1) { + return EStatus::SUCCESS; + } else if (res != 0) { + return ConvertResult(SSL_get_error(Ssl, res), err); + } + ConnectState = EConnectState::READ; + [[fallthrough]]; + } + + case EConnectState::READ: { + char data[256]; + size_t numRead = 0; + const int res = SSL_get_error(Ssl, SSL_read_ex(Ssl, data, sizeof(data), &numRead)); + if (res == SSL_ERROR_ZERO_RETURN) { + return EStatus::SUCCESS; + } else if (res != SSL_ERROR_NONE) { + return ConvertResult(res, err); + } else if (numRead) { + err = "non-zero return from SSL_read_ex: " + ToString(numRead); + return EStatus::ERROR; + } else { + return EStatus::SUCCESS; + } + } + } + Y_FAIL(); + } + + std::optional<std::pair<const void*, size_t>> BlockedSend; + + ssize_t Send(const void* msg, size_t len, TString *err) { + Y_VERIFY(!BlockedSend || *BlockedSend == std::make_pair(msg, len)); + const ssize_t res = Operate(msg, len, &SSL_write_ex, err); + if (res == -EAGAIN) { + BlockedSend.emplace(msg, len); + } else { + BlockedSend.reset(); + } + return res; + } + + std::optional<std::pair<void*, size_t>> BlockedReceive; + + ssize_t Recv(void* msg, size_t len, TString *err) { + Y_VERIFY(!BlockedReceive || *BlockedReceive == std::make_pair(msg, len)); + const ssize_t res = Operate(msg, len, &SSL_read_ex, err); + if (res == -EAGAIN) { + BlockedReceive.emplace(msg, len); + } else { + BlockedReceive.reset(); + } + return res; + } + + TString GetCipherName() const { + return SSL_get_cipher_name(Ssl); + } + + int GetCipherBits() const { + return SSL_get_cipher_bits(Ssl, nullptr); + } + + TString GetProtocolName() const { + return SSL_get_cipher_version(Ssl); + } + + TString GetPeerCommonName() const { + TString res; + if (X509 *cert = SSL_get_peer_certificate(Ssl)) { + char buffer[256]; + memset(buffer, 0, sizeof(buffer)); + if (X509_NAME *name = X509_get_subject_name(cert)) { + X509_NAME_get_text_by_NID(name, NID_commonName, buffer, sizeof(buffer)); + } + X509_free(cert); + res = TString(buffer, strnlen(buffer, sizeof(buffer))); + } + return res; + } + + bool WantRead() const { + return WantRead_; + } + + bool WantWrite() const { + return WantWrite_; + } + + private: + template<typename TBuffer, typename TOp> + ssize_t Operate(TBuffer* buffer, size_t len, TOp&& op, TString *err) { + WantRead_ = WantWrite_ = false; + size_t processed = 0; + int ret = op(Ssl, buffer, len, &processed); + if (ret == 1) { + return processed; + } + switch (const int status = SSL_get_error(Ssl, ret)) { + case SSL_ERROR_ZERO_RETURN: + return 0; + + case SSL_ERROR_WANT_READ: + WantRead_ = true; + return -EAGAIN; + + case SSL_ERROR_WANT_WRITE: + WantWrite_ = true; + return -EAGAIN; + + case SSL_ERROR_SYSCALL: + return -LastSocketError(); + + case SSL_ERROR_SSL: + if (err) { + *err = GetErrorStack(); + } + return -EPROTO; + + default: + Y_FAIL("unexpected SSL_get_error() status# %d", status); + } + } + }; + + TSecureSocket::TSecureSocket(TStreamSocket& socket, TSecureSocketContext::TPtr context) + : TStreamSocket(socket.ReleaseDescriptor()) + , Context(std::move(context)) + , Impl(new TImpl(*Context->Impl, Descriptor)) + {} + + TSecureSocket::~TSecureSocket() + {} + + TSecureSocket::EStatus TSecureSocket::Establish(bool server, bool authOnly, TString& err) const { + return Impl->Establish(server, authOnly, err); + } + + TIntrusivePtr<TStreamSocket> TSecureSocket::Detach() { + return MakeIntrusive<TStreamSocket>(ReleaseDescriptor()); + } + + ssize_t TSecureSocket::Send(const void* msg, size_t len, TString *err) const { + return Impl->Send(msg, len, err); + } + + ssize_t TSecureSocket::Recv(void* msg, size_t len, TString *err) const { + return Impl->Recv(msg, len, err); + } + + ssize_t TSecureSocket::WriteV(const struct iovec* /*iov*/, int /*iovcnt*/) const { + Y_FAIL("unsupported on SSL sockets"); + } + + ssize_t TSecureSocket::ReadV(const struct iovec* /*iov*/, int /*iovcnt*/) const { + Y_FAIL("unsupported on SSL sockets"); + } + + TString TSecureSocket::GetCipherName() const { + return Impl->GetCipherName(); + } + + int TSecureSocket::GetCipherBits() const { + return Impl->GetCipherBits(); + } + + TString TSecureSocket::GetProtocolName() const { + return Impl->GetProtocolName(); + } + + TString TSecureSocket::GetPeerCommonName() const { + return Impl->GetPeerCommonName(); + } + + bool TSecureSocket::WantRead() const { + return Impl->WantRead(); + } + + bool TSecureSocket::WantWrite() const { + return Impl->WantWrite(); + } + +} diff --git a/library/cpp/actors/interconnect/interconnect_stream.h b/library/cpp/actors/interconnect/interconnect_stream.h new file mode 100644 index 0000000000..074adc6e74 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_stream.h @@ -0,0 +1,131 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/generic/noncopyable.h> +#include <util/network/address.h> +#include <util/network/init.h> +#include <util/system/defaults.h> + +#include "poller.h" + +#include "interconnect_address.h" + +#include <memory> + +#include <sys/uio.h> + +namespace NInterconnect { + class TSocket: public NActors::TSharedDescriptor, public TNonCopyable { + protected: + TSocket(SOCKET fd); + + virtual ~TSocket() override; + + SOCKET Descriptor; + + virtual int GetDescriptor() override; + + private: + friend class TSecureSocket; + + SOCKET ReleaseDescriptor() { + return std::exchange(Descriptor, INVALID_SOCKET); + } + + public: + operator SOCKET() const { + return Descriptor; + } + + int Bind(const TAddress& addr) const; + int Shutdown(int how) const; + int GetConnectStatus() const; + }; + + class TStreamSocket: public TSocket { + public: + TStreamSocket(SOCKET fd); + + static TIntrusivePtr<TStreamSocket> Make(int domain); + + virtual ssize_t Send(const void* msg, size_t len, TString *err = nullptr) const; + virtual ssize_t Recv(void* buf, size_t len, TString *err = nullptr) const; + + virtual ssize_t WriteV(const struct iovec* iov, int iovcnt) const; + virtual ssize_t ReadV(const struct iovec* iov, int iovcnt) const; + + int Connect(const TAddress& addr) const; + int Connect(const NAddr::IRemoteAddr* addr) const; + int Listen(int backlog) const; + int Accept(TAddress& acceptedAddr) const; + + ssize_t GetUnsentQueueSize() const; + + void SetSendBufferSize(i32 len) const; + ui32 GetSendBufferSize() const; + }; + + class TSecureSocketContext { + class TImpl; + THolder<TImpl> Impl; + + friend class TSecureSocket; + + public: + TSecureSocketContext(const TString& certificate, const TString& privateKey, const TString& caFilePath, + const TString& ciphers); + ~TSecureSocketContext(); + + public: + using TPtr = std::shared_ptr<TSecureSocketContext>; + }; + + class TSecureSocket : public TStreamSocket { + TSecureSocketContext::TPtr Context; + + class TImpl; + THolder<TImpl> Impl; + + public: + enum class EStatus { + SUCCESS, + ERROR, + WANT_READ, + WANT_WRITE, + }; + + public: + TSecureSocket(TStreamSocket& socket, TSecureSocketContext::TPtr context); + ~TSecureSocket(); + + EStatus Establish(bool server, bool authOnly, TString& err) const; + TIntrusivePtr<TStreamSocket> Detach(); + + ssize_t Send(const void* msg, size_t len, TString *err) const override; + ssize_t Recv(void* msg, size_t len, TString *err) const override; + + ssize_t WriteV(const struct iovec* iov, int iovcnt) const override; + ssize_t ReadV(const struct iovec* iov, int iovcnt) const override; + + TString GetCipherName() const; + int GetCipherBits() const; + TString GetProtocolName() const; + TString GetPeerCommonName() const; + + bool WantRead() const; + bool WantWrite() const; + }; + + class TDatagramSocket: public TSocket { + public: + typedef std::shared_ptr<TDatagramSocket> TPtr; + + TDatagramSocket(SOCKET fd); + + static TPtr Make(int domain); + + ssize_t SendTo(const void* msg, size_t len, const TAddress& toAddr) const; + ssize_t RecvFrom(void* buf, size_t len, TAddress& fromAddr) const; + }; + +} diff --git a/library/cpp/actors/interconnect/interconnect_tcp_input_session.cpp b/library/cpp/actors/interconnect/interconnect_tcp_input_session.cpp new file mode 100644 index 0000000000..0abe9fe659 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_tcp_input_session.cpp @@ -0,0 +1,476 @@ +#include "interconnect_tcp_session.h" +#include "interconnect_tcp_proxy.h" +#include <library/cpp/actors/core/probes.h> +#include <library/cpp/actors/util/datetime.h> + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + + TInputSessionTCP::TInputSessionTCP(const TActorId& sessionId, TIntrusivePtr<NInterconnect::TStreamSocket> socket, + TIntrusivePtr<TReceiveContext> context, TInterconnectProxyCommon::TPtr common, + std::shared_ptr<IInterconnectMetrics> metrics, ui32 nodeId, ui64 lastConfirmed, + TDuration deadPeerTimeout, TSessionParams params) + : SessionId(sessionId) + , Socket(std::move(socket)) + , Context(std::move(context)) + , Common(std::move(common)) + , NodeId(nodeId) + , Params(std::move(params)) + , ConfirmedByInput(lastConfirmed) + , Metrics(std::move(metrics)) + , DeadPeerTimeout(deadPeerTimeout) + { + Y_VERIFY(Context); + Y_VERIFY(Socket); + Y_VERIFY(SessionId); + + AtomicSet(Context->PacketsReadFromSocket, 0); + + Metrics->SetClockSkewMicrosec(0); + + Context->UpdateState = EUpdateState::NONE; + + // ensure that we do not spawn new session while the previous one is still alive + TAtomicBase sessions = AtomicIncrement(Context->NumInputSessions); + Y_VERIFY(sessions == 1, "sessions# %" PRIu64, ui64(sessions)); + } + + void TInputSessionTCP::Bootstrap() { + SetPrefix(Sprintf("InputSession %s [node %" PRIu32 "]", SelfId().ToString().data(), NodeId)); + Become(&TThis::WorkingState, DeadPeerTimeout, new TEvCheckDeadPeer); + LOG_DEBUG_IC_SESSION("ICIS01", "InputSession created"); + LastReceiveTimestamp = TActivationContext::Now(); + ReceiveData(); + } + + void TInputSessionTCP::CloseInputSession() { + CloseInputSessionRequested = true; + ReceiveData(); + } + + void TInputSessionTCP::Handle(TEvPollerReady::TPtr ev) { + if (Context->ReadPending) { + Metrics->IncUsefulReadWakeups(); + } else if (!ev->Cookie) { + Metrics->IncSpuriousReadWakeups(); + } + Context->ReadPending = false; + ReceiveData(); + if (Params.Encryption && Context->WriteBlockedByFullSendBuffer && !ev->Cookie) { + Send(SessionId, ev->Release().Release(), 0, 1); + } + } + + void TInputSessionTCP::Handle(TEvPollerRegisterResult::TPtr ev) { + PollerToken = std::move(ev->Get()->PollerToken); + ReceiveData(); + } + + void TInputSessionTCP::HandleResumeReceiveData() { + ReceiveData(); + } + + void TInputSessionTCP::ReceiveData() { + TTimeLimit limit(GetMaxCyclesPerEvent()); + ui64 numDataBytes = 0; + const size_t headerLen = Params.UseModernFrame ? sizeof(TTcpPacketHeader_v2) : sizeof(TTcpPacketHeader_v1); + + LOG_DEBUG_IC_SESSION("ICIS02", "ReceiveData called"); + + for (int iteration = 0; Socket; ++iteration) { + if (iteration && limit.CheckExceeded()) { + // we have hit processing time limit for this message, send notification to resume processing a bit later + Send(SelfId(), new TEvResumeReceiveData); + break; + } + + switch (State) { + case EState::HEADER: + if (IncomingData.GetSize() < headerLen) { + break; + } else { + ProcessHeader(headerLen); + } + continue; + + case EState::PAYLOAD: + if (!IncomingData) { + break; + } else { + ProcessPayload(numDataBytes); + } + continue; + } + + // if we have reached this point, it means that we do not have enough data in read buffer; try to obtain some + if (!ReadMore()) { + // we have no data from socket, so we have some free time to spend -- preallocate buffers using this time + PreallocateBuffers(); + break; + } + } + + // calculate ping time + auto it = std::min_element(PingQ.begin(), PingQ.end()); + const TDuration ping = it != PingQ.end() ? *it : TDuration::Zero(); + + // send update to main session actor if something valuable has changed + if (!UpdateFromInputSession) { + UpdateFromInputSession = MakeHolder<TEvUpdateFromInputSession>(ConfirmedByInput, numDataBytes, ping); + } else { + Y_VERIFY(ConfirmedByInput >= UpdateFromInputSession->ConfirmedByInput); + UpdateFromInputSession->ConfirmedByInput = ConfirmedByInput; + UpdateFromInputSession->NumDataBytes += numDataBytes; + UpdateFromInputSession->Ping = Min(UpdateFromInputSession->Ping, ping); + } + + for (;;) { + EUpdateState state = Context->UpdateState; + EUpdateState next; + + // calculate next state + switch (state) { + case EUpdateState::NONE: + case EUpdateState::CONFIRMING: + // we have no inflight messages to session actor, we will issue one a bit later + next = EUpdateState::INFLIGHT; + break; + + case EUpdateState::INFLIGHT: + case EUpdateState::INFLIGHT_AND_PENDING: + // we already have inflight message, so we will keep pending message and session actor will issue + // TEvConfirmUpdate to kick processing + next = EUpdateState::INFLIGHT_AND_PENDING; + break; + } + + if (Context->UpdateState.compare_exchange_weak(state, next)) { + switch (next) { + case EUpdateState::INFLIGHT: + Send(SessionId, UpdateFromInputSession.Release()); + break; + + case EUpdateState::INFLIGHT_AND_PENDING: + Y_VERIFY(UpdateFromInputSession); + break; + + default: + Y_FAIL("unexpected state"); + } + break; + } + } + } + + void TInputSessionTCP::ProcessHeader(size_t headerLen) { + const bool success = IncomingData.ExtractFrontPlain(Header.Data, headerLen); + Y_VERIFY(success); + if (Params.UseModernFrame) { + PayloadSize = Header.v2.PayloadLength; + HeaderSerial = Header.v2.Serial; + HeaderConfirm = Header.v2.Confirm; + if (!Params.Encryption) { + ChecksumExpected = std::exchange(Header.v2.Checksum, 0); + Checksum = Crc32cExtendMSanCompatible(0, &Header.v2, sizeof(Header.v2)); // start calculating checksum now + if (!PayloadSize && Checksum != ChecksumExpected) { + LOG_ERROR_IC_SESSION("ICIS10", "payload checksum error"); + return ReestablishConnection(TDisconnectReason::ChecksumError()); + } + } + } else if (!Header.v1.Check()) { + LOG_ERROR_IC_SESSION("ICIS03", "header checksum error"); + return ReestablishConnection(TDisconnectReason::ChecksumError()); + } else { + PayloadSize = Header.v1.DataSize; + HeaderSerial = Header.v1.Serial; + HeaderConfirm = Header.v1.Confirm; + ChecksumExpected = Header.v1.PayloadCRC32; + Checksum = 0; + } + if (PayloadSize >= 65536) { + LOG_CRIT_IC_SESSION("ICIS07", "payload is way too big"); + return DestroySession(TDisconnectReason::FormatError()); + } + if (ConfirmedByInput < HeaderConfirm) { + ConfirmedByInput = HeaderConfirm; + if (AtomicGet(Context->ControlPacketId) <= HeaderConfirm && !NewPingProtocol) { + ui64 sendTime = AtomicGet(Context->ControlPacketSendTimer); + TDuration duration = CyclesToDuration(GetCycleCountFast() - sendTime); + const auto durationUs = duration.MicroSeconds(); + Metrics->UpdateLegacyPingTimeHist(durationUs); + PingQ.push_back(duration); + if (PingQ.size() > 16) { + PingQ.pop_front(); + } + AtomicSet(Context->ControlPacketId, 0ULL); + } + } + if (PayloadSize) { + const ui64 expected = Context->GetLastProcessedPacketSerial() + 1; + if (HeaderSerial == 0 || HeaderSerial > expected) { + LOG_CRIT_IC_SESSION("ICIS06", "packet serial %" PRIu64 ", but %" PRIu64 " expected", HeaderSerial, expected); + return DestroySession(TDisconnectReason::FormatError()); + } + IgnorePayload = HeaderSerial != expected; + State = EState::PAYLOAD; + } else if (HeaderSerial & TTcpPacketBuf::PingRequestMask) { + Send(SessionId, new TEvProcessPingRequest(HeaderSerial & ~TTcpPacketBuf::PingRequestMask)); + } else if (HeaderSerial & TTcpPacketBuf::PingResponseMask) { + const ui64 sent = HeaderSerial & ~TTcpPacketBuf::PingResponseMask; + const ui64 received = GetCycleCountFast(); + HandlePingResponse(CyclesToDuration(received - sent)); + } else if (HeaderSerial & TTcpPacketBuf::ClockMask) { + HandleClock(TInstant::MicroSeconds(HeaderSerial & ~TTcpPacketBuf::ClockMask)); + } + } + + void TInputSessionTCP::ProcessPayload(ui64& numDataBytes) { + const size_t numBytes = Min(PayloadSize, IncomingData.GetSize()); + IncomingData.ExtractFront(numBytes, &Payload); + numDataBytes += numBytes; + PayloadSize -= numBytes; + if (PayloadSize) { + return; // there is still some data to receive in the Payload rope + } + State = EState::HEADER; // we'll continue with header next time + if (!Params.UseModernFrame || !Params.Encryption) { // see if we are checksumming packet body + for (const auto&& [data, size] : Payload) { + Checksum = Crc32cExtendMSanCompatible(Checksum, data, size); + } + if (Checksum != ChecksumExpected) { // validate payload checksum + LOG_ERROR_IC_SESSION("ICIS04", "payload checksum error"); + return ReestablishConnection(TDisconnectReason::ChecksumError()); + } + } + if (Y_UNLIKELY(IgnorePayload)) { + return; + } + if (!Context->AdvanceLastProcessedPacketSerial()) { + return DestroySession(TDisconnectReason::NewSession()); + } + + while (Payload && Socket) { + // extract channel part header from the payload stream + TChannelPart part; + if (!Payload.ExtractFrontPlain(&part, sizeof(part))) { + LOG_CRIT_IC_SESSION("ICIS14", "missing TChannelPart header in payload"); + return DestroySession(TDisconnectReason::FormatError()); + } + if (!part.Size) { // bogus frame + continue; + } else if (Payload.GetSize() < part.Size) { + LOG_CRIT_IC_SESSION("ICIS08", "payload format error ChannelPart# %s", part.ToString().data()); + return DestroySession(TDisconnectReason::FormatError()); + } + + const ui16 channel = part.Channel & ~TChannelPart::LastPartFlag; + TRope *eventData = channel < Context->ChannelArray.size() + ? &Context->ChannelArray[channel] + : &Context->ChannelMap[channel]; + + Metrics->AddInputChannelsIncomingTraffic(channel, sizeof(part) + part.Size); + + TEventDescr descr; + if (~part.Channel & TChannelPart::LastPartFlag) { + Payload.ExtractFront(part.Size, eventData); + } else if (part.Size != sizeof(descr)) { + LOG_CRIT_IC_SESSION("ICIS11", "incorrect last part of an event"); + return DestroySession(TDisconnectReason::FormatError()); + } else if (Payload.ExtractFrontPlain(&descr, sizeof(descr))) { + Metrics->IncInputChannelsIncomingEvents(channel); + ProcessEvent(*eventData, descr); + *eventData = TRope(); + } else { + Y_FAIL(); + } + } + } + + void TInputSessionTCP::ProcessEvent(TRope& data, TEventDescr& descr) { + if (!Params.UseModernFrame || descr.Checksum) { + ui32 checksum = 0; + for (const auto&& [data, size] : data) { + checksum = Crc32cExtendMSanCompatible(checksum, data, size); + } + if (checksum != descr.Checksum) { + LOG_CRIT_IC_SESSION("ICIS05", "event checksum error"); + return ReestablishConnection(TDisconnectReason::ChecksumError()); + } + } + auto ev = std::make_unique<IEventHandle>(SessionId, + descr.Type, + descr.Flags & ~IEventHandle::FlagExtendedFormat, + descr.Recipient, + descr.Sender, + MakeIntrusive<TEventSerializedData>(std::move(data), bool(descr.Flags & IEventHandle::FlagExtendedFormat)), + descr.Cookie, + Params.PeerScopeId, + NWilson::TTraceId(descr.TraceId)); + if (Common->EventFilter && !Common->EventFilter->CheckIncomingEvent(*ev, Common->LocalScopeId)) { + LOG_CRIT_IC_SESSION("ICIC03", "Event dropped due to scope error LocalScopeId# %s PeerScopeId# %s Type# 0x%08" PRIx32, + ScopeIdToString(Common->LocalScopeId).data(), ScopeIdToString(Params.PeerScopeId).data(), descr.Type); + ev.reset(); + } + if (ev) { + TActivationContext::Send(ev.release()); + } + } + + void TInputSessionTCP::HandleConfirmUpdate() { + for (;;) { + switch (EUpdateState state = Context->UpdateState) { + case EUpdateState::NONE: + case EUpdateState::INFLIGHT: + case EUpdateState::INFLIGHT_AND_PENDING: + // here we may have a race + return; + + case EUpdateState::CONFIRMING: + Y_VERIFY(UpdateFromInputSession); + if (Context->UpdateState.compare_exchange_weak(state, EUpdateState::INFLIGHT)) { + Send(SessionId, UpdateFromInputSession.Release()); + return; + } + } + } + } + + bool TInputSessionTCP::ReadMore() { + PreallocateBuffers(); + + TStackVec<TIoVec, NumPreallocatedBuffers> buffs; + for (const auto& item : Buffers) { + TIoVec iov{item->GetBuffer(), item->GetCapacity()}; + buffs.push_back(iov); + if (Params.Encryption) { + break; // do not put more than one buffer in queue to prevent using ReadV + } + } + + const struct iovec* iovec = reinterpret_cast<const struct iovec*>(buffs.data()); + int iovcnt = buffs.size(); + + ssize_t recvres = 0; + TString err; + LWPROBE_IF_TOO_LONG(SlowICReadFromSocket, ms) { + do { +#ifndef _win_ + recvres = iovcnt == 1 ? Socket->Recv(iovec->iov_base, iovec->iov_len, &err) : Socket->ReadV(iovec, iovcnt); +#else + recvres = Socket->Recv(iovec[0].iov_base, iovec[0].iov_len, &err); +#endif + Metrics->IncRecvSyscalls(); + } while (recvres == -EINTR); + } + + LOG_DEBUG_IC_SESSION("ICIS12", "ReadMore recvres# %zd iovcnt# %d err# %s", recvres, iovcnt, err.data()); + + if (recvres <= 0 || CloseInputSessionRequested) { + if ((-recvres != EAGAIN && -recvres != EWOULDBLOCK) || CloseInputSessionRequested) { + TString message = CloseInputSessionRequested ? "connection closed by debug command" + : recvres == 0 ? "connection closed by peer" + : err ? err + : Sprintf("socket: %s", strerror(-recvres)); + LOG_NOTICE_NET(NodeId, "%s", message.data()); + ReestablishConnection(CloseInputSessionRequested ? TDisconnectReason::Debug() : + recvres == 0 ? TDisconnectReason::EndOfStream() : TDisconnectReason::FromErrno(-recvres)); + } else if (PollerToken && !std::exchange(Context->ReadPending, true)) { + if (Params.Encryption) { + auto *secure = static_cast<NInterconnect::TSecureSocket*>(Socket.Get()); + const bool wantRead = secure->WantRead(), wantWrite = secure->WantWrite(); + Y_VERIFY_DEBUG(wantRead || wantWrite); + PollerToken->Request(wantRead, wantWrite); + } else { + PollerToken->Request(true, false); + } + } + return false; + } + + Y_VERIFY(recvres > 0); + Metrics->AddTotalBytesRead(recvres); + TDeque<TIntrusivePtr<TRopeAlignedBuffer>>::iterator it; + for (it = Buffers.begin(); recvres; ++it) { + Y_VERIFY(it != Buffers.end()); + const size_t bytesFromFrontBuffer = Min<size_t>(recvres, (*it)->GetCapacity()); + (*it)->AdjustSize(bytesFromFrontBuffer); + IncomingData.Insert(IncomingData.End(), TRope(std::move(*it))); + recvres -= bytesFromFrontBuffer; + } + Buffers.erase(Buffers.begin(), it); + + LastReceiveTimestamp = TActivationContext::Now(); + + return true; + } + + void TInputSessionTCP::PreallocateBuffers() { + // ensure that we have exactly "numBuffers" in queue + LWPROBE_IF_TOO_LONG(SlowICReadLoopAdjustSize, ms) { + const ui32 target = Params.Encryption ? 1 : NumPreallocatedBuffers; + while (Buffers.size() < target) { + Buffers.emplace_back(TRopeAlignedBuffer::Allocate(sizeof(TTcpPacketBuf))); + } + } + } + + void TInputSessionTCP::ReestablishConnection(TDisconnectReason reason) { + LOG_DEBUG_IC_SESSION("ICIS09", "ReestablishConnection, reason# %s", reason.ToString().data()); + AtomicDecrement(Context->NumInputSessions); + Send(SessionId, new TEvSocketDisconnect(std::move(reason))); + PassAway(); + Socket.Reset(); + } + + void TInputSessionTCP::DestroySession(TDisconnectReason reason) { + LOG_DEBUG_IC_SESSION("ICIS13", "DestroySession, reason# %s", reason.ToString().data()); + AtomicDecrement(Context->NumInputSessions); + Send(SessionId, TInterconnectSessionTCP::NewEvTerminate(std::move(reason))); + PassAway(); + Socket.Reset(); + } + + void TInputSessionTCP::HandleCheckDeadPeer() { + const TInstant now = TActivationContext::Now(); + if (now >= LastReceiveTimestamp + DeadPeerTimeout) { + ReceiveData(); + if (Socket && now >= LastReceiveTimestamp + DeadPeerTimeout) { + // nothing has changed, terminate session + DestroySession(TDisconnectReason::DeadPeer()); + } + } + Schedule(LastReceiveTimestamp + DeadPeerTimeout - now, new TEvCheckDeadPeer); + } + + void TInputSessionTCP::HandlePingResponse(TDuration passed) { + PingQ.push_back(passed); + if (PingQ.size() > 16) { + PingQ.pop_front(); + } + const TDuration ping = *std::min_element(PingQ.begin(), PingQ.end()); + const auto pingUs = ping.MicroSeconds(); + Context->PingRTT_us = pingUs; + NewPingProtocol = true; + Metrics->UpdateLegacyPingTimeHist(pingUs); + } + + void TInputSessionTCP::HandleClock(TInstant clock) { + const TInstant here = TInstant::Now(); // wall clock + const TInstant remote = clock + TDuration::MicroSeconds(Context->PingRTT_us / 2); + i64 skew = remote.MicroSeconds() - here.MicroSeconds(); + SkewQ.push_back(skew); + if (SkewQ.size() > 16) { + SkewQ.pop_front(); + } + i64 clockSkew = SkewQ.front(); + for (i64 skew : SkewQ) { + if (abs(skew) < abs(clockSkew)) { + clockSkew = skew; + } + } + Context->ClockSkew_us = clockSkew; + Metrics->SetClockSkewMicrosec(clockSkew); + } + + +} diff --git a/library/cpp/actors/interconnect/interconnect_tcp_proxy.cpp b/library/cpp/actors/interconnect/interconnect_tcp_proxy.cpp new file mode 100644 index 0000000000..7e2d8ccb94 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_tcp_proxy.cpp @@ -0,0 +1,936 @@ +#include "interconnect_tcp_proxy.h" +#include "interconnect_handshake.h" +#include "interconnect_tcp_session.h" +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/protos/services_common.pb.h> +#include <library/cpp/monlib/service/pages/templates.h> +#include <util/system/getpid.h> + +namespace NActors { + static constexpr TDuration GetNodeRequestTimeout = TDuration::Seconds(5); + + static constexpr TDuration FirstErrorSleep = TDuration::MilliSeconds(10); + static constexpr TDuration MaxErrorSleep = TDuration::Seconds(10); + static constexpr ui32 SleepRetryMultiplier = 4; + + static TString PeerNameForHuman(ui32 nodeNum, const TString& longName, ui16 port) { + TStringBuf token; + TStringBuf(longName).NextTok('.', token); + return ToString<ui32>(nodeNum) + ":" + (token.size() > 0 ? TString(token) : longName) + ":" + ToString<ui16>(port); + } + + TInterconnectProxyTCP::TInterconnectProxyTCP(const ui32 node, TInterconnectProxyCommon::TPtr common, + IActor **dynamicPtr) + : TActor(&TThis::StateInit) + , PeerNodeId(node) + , DynamicPtr(dynamicPtr) + , Common(std::move(common)) + , SecureContext(new NInterconnect::TSecureSocketContext(Common->Settings.Certificate, Common->Settings.PrivateKey, + Common->Settings.CaFilePath, Common->Settings.CipherList)) + { + Y_VERIFY(Common); + Y_VERIFY(Common->NameserviceId); + if (DynamicPtr) { + Y_VERIFY(!*DynamicPtr); + *DynamicPtr = this; + } + } + + void TInterconnectProxyTCP::Bootstrap() { + SetPrefix(Sprintf("Proxy %s [node %" PRIu32 "]", SelfId().ToString().data(), PeerNodeId)); + + SwitchToInitialState(); + PassAwayTimestamp = TActivationContext::Now() + TDuration::Seconds(15); + + LOG_INFO_IC("ICP01", "ready to work"); + } + + void TInterconnectProxyTCP::Registered(TActorSystem* sys, const TActorId& owner) { + if (!DynamicPtr) { + // perform usual bootstrap for static nodes + sys->Send(new IEventHandle(TEvents::TSystem::Bootstrap, 0, SelfId(), owner, nullptr, 0)); + } + if (const auto& mon = Common->RegisterMonPage) { + TString path = Sprintf("peer%04" PRIu32, PeerNodeId); + TString title = Sprintf("Peer #%04" PRIu32, PeerNodeId); + mon(path, title, sys, SelfId()); + } + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // PendingActivation + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void TInterconnectProxyTCP::RequestNodeInfo(STATEFN_SIG) { + ICPROXY_PROFILED; + + Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor && !PendingIncomingHandshakeEvents && !PendingSessionEvents); + EnqueueSessionEvent(ev); + StartConfiguring(); + } + + void TInterconnectProxyTCP::RequestNodeInfoForIncomingHandshake(STATEFN_SIG) { + ICPROXY_PROFILED; + + if (!Terminated) { + Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor && !PendingIncomingHandshakeEvents && !PendingSessionEvents); + EnqueueIncomingHandshakeEvent(ev); + StartConfiguring(); + } + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // PendingNodeInfo + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void TInterconnectProxyTCP::StartConfiguring() { + ICPROXY_PROFILED; + + Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor); + + // issue node info request + Send(Common->NameserviceId, new TEvInterconnect::TEvGetNode(PeerNodeId)); + + // arm configure timer; store pointer to event to ensure that we will handle correct one if there were any other + // wakeup events in flight + SwitchToState(__LINE__, "PendingNodeInfo", &TThis::PendingNodeInfo, GetNodeRequestTimeout, + ConfigureTimeoutCookie = new TEvents::TEvWakeup); + } + + void TInterconnectProxyTCP::Configure(TEvInterconnect::TEvNodeInfo::TPtr& ev) { + ICPROXY_PROFILED; + + Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor && !Session); + + if (!ev->Get()->Node) { + TransitToErrorState("cannot get node info"); + } else { + auto& info = *ev->Get()->Node; + TString name = PeerNameForHuman(PeerNodeId, info.Host, info.Port); + TechnicalPeerHostName = info.Host; + if (!Metrics) { + Metrics = Common->Metrics ? CreateInterconnectMetrics(Common) : CreateInterconnectCounters(Common); + } + Metrics->SetPeerInfo(name, info.Location.GetDataCenterId()); + + LOG_DEBUG_IC("ICP02", "configured for host %s", name.data()); + + ProcessConfigured(); + } + } + + void TInterconnectProxyTCP::ConfigureTimeout(TEvents::TEvWakeup::TPtr& ev) { + ICPROXY_PROFILED; + + if (ev->Get() == ConfigureTimeoutCookie) { + TransitToErrorState("timed out while waiting for node info"); + } + } + + void TInterconnectProxyTCP::ProcessConfigured() { + ICPROXY_PROFILED; + + // if the request was initiated by some activity involving Interconnect, then we are expected to start handshake + if (PendingSessionEvents) { + StartInitialHandshake(); + } + + // process incoming handshake requests; all failures were ejected from the queue along with the matching initiation requests + for (THolder<IEventHandle>& ev : PendingIncomingHandshakeEvents) { + TAutoPtr<IEventHandle> x(ev.Release()); + IncomingHandshake(x); + } + PendingIncomingHandshakeEvents.clear(); + + // possible situation -- incoming handshake arrives, but actually it is not satisfied and rejected; in this case + // we are going to return to initial state as we have nothing to do + if (!IncomingHandshakeActor && !OutgoingHandshakeActor) { + SwitchToInitialState(); + } + } + + void TInterconnectProxyTCP::StartInitialHandshake() { + ICPROXY_PROFILED; + + // since we are starting initial handshake for some reason, we'll drop any existing handshakes, if any + DropHandshakes(); + + // create and register handshake actor + OutgoingHandshakeActor = Register(CreateOutgoingHandshakeActor(Common, GenerateSessionVirtualId(), + TActorId(), PeerNodeId, 0, TechnicalPeerHostName, TSessionParams()), TMailboxType::ReadAsFilled); + OutgoingHandshakeActorCreated = TActivationContext::Now(); + + // prepare for new handshake + PrepareNewSessionHandshake(); + } + + void TInterconnectProxyTCP::StartResumeHandshake(ui64 inputCounter) { + ICPROXY_PROFILED; + + // drop outgoing handshake if we have one; keep incoming handshakes as they may be useful + DropOutgoingHandshake(); + + // ensure that we have session + Y_VERIFY(Session); + + // ensure that we have both virtual ids + Y_VERIFY(SessionVirtualId); + Y_VERIFY(RemoteSessionVirtualId); + + // create and register handshake actor + OutgoingHandshakeActor = Register(CreateOutgoingHandshakeActor(Common, SessionVirtualId, + RemoteSessionVirtualId, PeerNodeId, inputCounter, TechnicalPeerHostName, Session->Params), + TMailboxType::ReadAsFilled); + OutgoingHandshakeActorCreated = TActivationContext::Now(); + } + + void TInterconnectProxyTCP::IssueIncomingHandshakeReply(const TActorId& handshakeId, ui64 peerLocalId, + THolder<IEventBase> event) { + ICPROXY_PROFILED; + + Y_VERIFY(!IncomingHandshakeActor); + IncomingHandshakeActor = handshakeId; + IncomingHandshakeActorFilledIn = TActivationContext::Now(); + Y_VERIFY(!LastSerialFromIncomingHandshake || *LastSerialFromIncomingHandshake <= peerLocalId); + LastSerialFromIncomingHandshake = peerLocalId; + + if (OutgoingHandshakeActor && SelfId().NodeId() < PeerNodeId) { + // Both outgoing and incoming handshake are in progress. To prevent race condition during semultanous handshake + // incoming handshake must be held till outgoing handshake is complete or failed + LOG_DEBUG_IC("ICP06", "reply for incoming handshake (actor %s) is held", IncomingHandshakeActor.ToString().data()); + HeldHandshakeReply = std::move(event); + + // Check that we are in one of acceptable states that would properly handle handshake statuses. + const auto state = CurrentStateFunc(); + Y_VERIFY(state == &TThis::PendingConnection || state == &TThis::StateWork, "invalid handshake request in state# %s", State); + } else { + LOG_DEBUG_IC("ICP07", "issued incoming handshake reply"); + + // No race, so we can send reply immediately. + Y_VERIFY(!HeldHandshakeReply); + Send(IncomingHandshakeActor, event.Release()); + + // Start waiting for handshake reply, if not yet started; also, if session is already created, then we don't + // switch from working state. + if (!Session) { + LOG_INFO_IC("ICP08", "No active sessions, becoming PendingConnection"); + SwitchToState(__LINE__, "PendingConnection", &TThis::PendingConnection); + } else { + Y_VERIFY(CurrentStateFunc() == &TThis::StateWork); + } + } + } + + void TInterconnectProxyTCP::IncomingHandshake(TEvHandshakeAsk::TPtr& ev) { + ICPROXY_PROFILED; + + TEvHandshakeAsk *msg = ev->Get(); + + // TEvHandshakeAsk is only applicable for continuation requests + LOG_DEBUG_IC("ICP09", "(actor %s) from: %s for: %s", ev->Sender.ToString().data(), + ev->Get()->Self.ToString().data(), ev->Get()->Peer.ToString().data()); + + if (!Session) { + // if there is no open session, report error -- continuation request works only with open sessions + LOG_NOTICE_IC("ICP12", "(actor %s) peer tries to resume nonexistent session Self# %s Peer# %s", + ev->Sender.ToString().data(), msg->Self.ToString().data(), msg->Peer.ToString().data()); + } else if (SessionVirtualId != ev->Get()->Peer || RemoteSessionVirtualId != ev->Get()->Self) { + // check session virtual ids for continuation + LOG_NOTICE_IC("ICP13", "(actor %s) virtual id mismatch with existing session (Peer: %s Self: %s" + " SessionVirtualId: %s RemoteSessionVirtualId: %s)", ev->Sender.ToString().data(), + ev->Get()->Peer.ToString().data(), ev->Get()->Self.ToString().data(), SessionVirtualId.ToString().data(), + RemoteSessionVirtualId.ToString().data()); + } else { + // if we already have incoming handshake, then terminate existing one + DropIncomingHandshake(); + + // issue reply to the sender, possibly holding it while outgoing handshake is at race + THolder<IEventBase> reply = IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::ProcessHandshakeRequest, ev); + return IssueIncomingHandshakeReply(ev->Sender, RemoteSessionVirtualId.LocalId(), std::move(reply)); + } + + // error case -- report error to the handshake actor + Send(ev->Sender, new TEvHandshakeNak); + } + + void TInterconnectProxyTCP::IncomingHandshake(TEvHandshakeRequest::TPtr& ev) { + ICPROXY_PROFILED; + + LOG_DEBUG_IC("ICP17", "incoming handshake (actor %s)", ev->Sender.ToString().data()); + + const auto& record = ev->Get()->Record; + ui64 remotePID = record.GetProgramPID(); + ui64 remoteStartTime = record.GetProgramStartTime(); + ui64 remoteSerial = record.GetSerial(); + + if (RemoteProgramInfo && remotePID == RemoteProgramInfo->PID && remoteStartTime == RemoteProgramInfo->StartTime) { + if (remoteSerial < RemoteProgramInfo->Serial) { + LOG_INFO_IC("ICP18", "handshake (actor %s) is too old", ev->Sender.ToString().data()); + Send(ev->Sender, new TEvents::TEvPoisonPill); + return; + } else { + RemoteProgramInfo->Serial = remoteSerial; + } + } else { + const auto ptr = new TProgramInfo; + ptr->PID = remotePID; + ptr->StartTime = remoteStartTime; + ptr->Serial = remoteSerial; + RemoteProgramInfo.Reset(ptr); + } + + /* Let's check peer technical hostname */ + if (record.HasSenderHostName() && TechnicalPeerHostName != record.GetSenderHostName()) { + Send(ev->Sender, new TEvHandshakeReplyError("host name mismatch")); + return; + } + + // check sender actor id and check if it is not very old + if (LastSerialFromIncomingHandshake) { + const ui64 serial = record.GetSerial(); + if (serial < *LastSerialFromIncomingHandshake) { + LOG_NOTICE_IC("ICP15", "Handshake# %s has duplicate serial# %" PRIu64 + " LastSerialFromIncomingHandshake# %" PRIu64, ev->Sender.ToString().data(), + serial, *LastSerialFromIncomingHandshake); + Send(ev->Sender, new TEvHandshakeReplyError("duplicate serial")); + return; + } else if (serial == *LastSerialFromIncomingHandshake) { + LOG_NOTICE_IC("ICP15", "Handshake# %s is obsolete, serial# %" PRIu64 + " LastSerialFromIncomingHandshake# %" PRIu64, ev->Sender.ToString().data(), + serial, *LastSerialFromIncomingHandshake); + Send(ev->Sender, new TEvents::TEvPoisonPill); + return; + } + } + + // drop incoming handshake as this is definitely more recent + DropIncomingHandshake(); + + // prepare for new session + PrepareNewSessionHandshake(); + + auto event = MakeHolder<TEvHandshakeReplyOK>(); + auto* pb = event->Record.MutableSuccess(); + const TActorId virtualId = GenerateSessionVirtualId(); + pb->SetProtocol(INTERCONNECT_PROTOCOL_VERSION); + pb->SetSenderActorId(virtualId.ToString()); + pb->SetProgramPID(GetPID()); + pb->SetProgramStartTime(Common->StartTime); + pb->SetSerial(virtualId.LocalId()); + + IssueIncomingHandshakeReply(ev->Sender, 0, std::move(event)); + } + + void TInterconnectProxyTCP::HandleHandshakeStatus(TEvHandshakeDone::TPtr& ev) { + ICPROXY_PROFILED; + + TEvHandshakeDone *msg = ev->Get(); + + // Terminate handshake actor working in opposite direction, if set up. + if (ev->Sender == IncomingHandshakeActor) { + LOG_INFO_IC("ICP19", "incoming handshake succeeded"); + DropIncomingHandshake(false); + DropOutgoingHandshake(); + } else if (ev->Sender == OutgoingHandshakeActor) { + LOG_INFO_IC("ICP20", "outgoing handshake succeeded"); + DropIncomingHandshake(); + DropOutgoingHandshake(false); + } else { + /* It seems to be an old handshake. */ + return; + } + + Y_VERIFY(!IncomingHandshakeActor && !OutgoingHandshakeActor); + SwitchToState(__LINE__, "StateWork", &TThis::StateWork); + + if (Session) { + // this is continuation request, check that virtual ids match + Y_VERIFY(SessionVirtualId == msg->Self && RemoteSessionVirtualId == msg->Peer); + } else { + // this is initial request, check that we have virtual ids not filled in + Y_VERIFY(!SessionVirtualId && !RemoteSessionVirtualId); + } + + auto error = [&](const char* description) { + TransitToErrorState(description); + }; + + // If session is not created, then create new one. + if (!Session) { + RemoteProgramInfo = std::move(msg->ProgramInfo); + if (!RemoteProgramInfo) { + // we have received resume handshake, but session was closed concurrently while handshaking + return error("Session continuation race"); + } + + // Create new session actor. + SessionID = RegisterWithSameMailbox(Session = new TInterconnectSessionTCP(this, msg->Params)); + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Init); + SessionVirtualId = msg->Self; + RemoteSessionVirtualId = msg->Peer; + LOG_INFO_IC("ICP22", "created new session: %s", SessionID.ToString().data()); + } + + // ensure that we have session local/peer virtual ids + Y_VERIFY(Session && SessionVirtualId && RemoteSessionVirtualId); + + // Set up new connection for the session. + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::SetNewConnection, ev); + + // Reset retry timer + HoldByErrorWakeupDuration = TDuration::Zero(); + + /* Forward all held events */ + ProcessPendingSessionEvents(); + } + + void TInterconnectProxyTCP::HandleHandshakeStatus(TEvHandshakeFail::TPtr& ev) { + ICPROXY_PROFILED; + + // update error state log; this fail is inconclusive unless this is the last pending handshake + const bool inconclusive = (ev->Sender != IncomingHandshakeActor && ev->Sender != OutgoingHandshakeActor) || + (IncomingHandshakeActor && OutgoingHandshakeActor); + LogHandshakeFail(ev, inconclusive); + + if (ev->Sender == IncomingHandshakeActor) { + LOG_NOTICE_IC("ICP24", "incoming handshake failed, temporary: %" PRIu32 " explanation: %s outgoing: %s", + ui32(ev->Get()->Temporary), ev->Get()->Explanation.data(), OutgoingHandshakeActor.ToString().data()); + DropIncomingHandshake(false); + } else if (ev->Sender == OutgoingHandshakeActor) { + LOG_NOTICE_IC("ICP25", "outgoing handshake failed, temporary: %" PRIu32 " explanation: %s incoming: %s held: %s", + ui32(ev->Get()->Temporary), ev->Get()->Explanation.data(), IncomingHandshakeActor.ToString().data(), + HeldHandshakeReply ? "yes" : "no"); + DropOutgoingHandshake(false); + + if (IEventBase* reply = HeldHandshakeReply.Release()) { + Y_VERIFY(IncomingHandshakeActor); + LOG_DEBUG_IC("ICP26", "sent held handshake reply to %s", IncomingHandshakeActor.ToString().data()); + Send(IncomingHandshakeActor, reply); + } + + // if we have no current session, then we have to drop all pending events as the outgoing handshake has failed + ProcessPendingSessionEvents(); + } else { + /* It seems to be an old fail, just ignore it */ + LOG_NOTICE_IC("ICP27", "obsolete handshake fail ignored"); + return; + } + + if (Metrics) { + Metrics->IncHandshakeFails(); + } + + if (IncomingHandshakeActor || OutgoingHandshakeActor) { + // one of handshakes is still going on + LOG_DEBUG_IC("ICP28", "other handshake is still going on"); + return; + } + + switch (ev->Get()->Temporary) { + case TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT: + if (!Session) { + if (PendingSessionEvents) { + // try to start outgoing handshake as we have some events enqueued + StartInitialHandshake(); + } else { + // return back to initial state as we have no session and no pending handshakes + SwitchToInitialState(); + } + } else if (Session->Socket) { + // try to reestablish connection -- meaning restart handshake from the last known position + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::ReestablishConnectionWithHandshake, + TDisconnectReason::HandshakeFailTransient()); + } else { + // we have no active connection in that session, so just restart handshake from last known position + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::StartHandshake); + } + break; + + case TEvHandshakeFail::HANDSHAKE_FAIL_SESSION_MISMATCH: + StartInitialHandshake(); + break; + + case TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT: + TString timeExplanation = " LastSessionDieTime# " + LastSessionDieTime.ToString(); + if (Session) { + InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, + TDisconnectReason::HandshakeFailPermanent()); + } + TransitToErrorState(ev->Get()->Explanation + timeExplanation, false); + break; + } + } + + void TInterconnectProxyTCP::LogHandshakeFail(TEvHandshakeFail::TPtr& ev, bool inconclusive) { + ICPROXY_PROFILED; + + TString kind = "unknown"; + switch (ev->Get()->Temporary) { + case TEvHandshakeFail::HANDSHAKE_FAIL_TRANSIENT: + kind = Session ? "transient w/session" : "transient w/o session"; + break; + + case TEvHandshakeFail::HANDSHAKE_FAIL_SESSION_MISMATCH: + kind = "session_mismatch"; + break; + + case TEvHandshakeFail::HANDSHAKE_FAIL_PERMANENT: + kind = "permanent"; + break; + } + if (inconclusive) { + kind += " inconclusive"; + } + UpdateErrorStateLog(TActivationContext::Now(), kind, ev->Get()->Explanation); + } + + void TInterconnectProxyTCP::ProcessPendingSessionEvents() { + ICPROXY_PROFILED; + + while (PendingSessionEvents) { + TPendingSessionEvent ev = std::move(PendingSessionEvents.front()); + PendingSessionEventsSize -= ev.Size; + TAutoPtr<IEventHandle> event(ev.Event.Release()); + PendingSessionEvents.pop_front(); + + if (Session) { + ForwardSessionEventToSession(event); + } else { + DropSessionEvent(event); + } + } + } + + void TInterconnectProxyTCP::DropSessionEvent(STATEFN_SIG) { + ICPROXY_PROFILED; + + ValidateEvent(ev, "DropSessionEvent"); + switch (ev->GetTypeRewrite()) { + case TEvInterconnect::EvForward: + if (ev->Flags & IEventHandle::FlagSubscribeOnSession) { + Send(ev->Sender, new TEvInterconnect::TEvNodeDisconnected(PeerNodeId), 0, ev->Cookie); + } + TActivationContext::Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::Disconnected)); + break; + + case TEvInterconnect::TEvConnectNode::EventType: + case TEvents::TEvSubscribe::EventType: + Send(ev->Sender, new TEvInterconnect::TEvNodeDisconnected(PeerNodeId), 0, ev->Cookie); + break; + + case TEvents::TEvUnsubscribe::EventType: + /* Do nothing */ + break; + + default: + Y_FAIL("Unexpected type of event in held event queue"); + } + } + + void TInterconnectProxyTCP::UnregisterSession(TInterconnectSessionTCP* session) { + ICPROXY_PROFILED; + + Y_VERIFY(Session && Session == session && SessionID); + + LOG_INFO_IC("ICP30", "unregister session Session# %s VirtualId# %s", SessionID.ToString().data(), + SessionVirtualId.ToString().data()); + + Session = nullptr; + SessionID = TActorId(); + + // drop all pending events as we are closed + ProcessPendingSessionEvents(); + + // reset virtual ids as this session is terminated + SessionVirtualId = TActorId(); + RemoteSessionVirtualId = TActorId(); + + if (Metrics) { + Metrics->IncSessionDeaths(); + } + LastSessionDieTime = TActivationContext::Now(); + + if (IncomingHandshakeActor || OutgoingHandshakeActor) { + PrepareNewSessionHandshake(); + } else { + SwitchToInitialState(); + } + } + + void TInterconnectProxyTCP::EnqueueSessionEvent(STATEFN_SIG) { + ICPROXY_PROFILED; + + ValidateEvent(ev, "EnqueueSessionEvent"); + const ui32 size = ev->GetSize(); + PendingSessionEventsSize += size; + PendingSessionEvents.emplace_back(TActivationContext::Now() + Common->Settings.MessagePendingTimeout, size, ev); + ScheduleCleanupEventQueue(); + CleanupEventQueue(); + } + + void TInterconnectProxyTCP::EnqueueIncomingHandshakeEvent(STATEFN_SIG) { + ICPROXY_PROFILED; + + // enqueue handshake request + Y_UNUSED(); + PendingIncomingHandshakeEvents.emplace_back(ev); + } + + void TInterconnectProxyTCP::EnqueueIncomingHandshakeEvent(TEvHandshakeDone::TPtr& /*ev*/) { + ICPROXY_PROFILED; + + // TEvHandshakeDone can't get into the queue, because we have to process handshake request first; this may be the + // race with the previous handshakes, so simply ignore it + } + + void TInterconnectProxyTCP::EnqueueIncomingHandshakeEvent(TEvHandshakeFail::TPtr& ev) { + ICPROXY_PROFILED; + + for (auto it = PendingIncomingHandshakeEvents.begin(); it != PendingIncomingHandshakeEvents.end(); ++it) { + THolder<IEventHandle>& pendingEvent = *it; + if (pendingEvent->Sender == ev->Sender) { + // we have found cancellation request for the pending handshake request; so simply remove it from the + // deque, as we are not interested in failure reason; must likely it happens because of handshake timeout + if (pendingEvent->GetTypeRewrite() == TEvHandshakeFail::EventType) { + TEvHandshakeFail::TPtr tmp(static_cast<TEventHandle<TEvHandshakeFail>*>(pendingEvent.Release())); + LogHandshakeFail(tmp, true); + } + PendingIncomingHandshakeEvents.erase(it); + break; + } + } + } + + void TInterconnectProxyTCP::ForwardSessionEventToSession(STATEFN_SIG) { + ICPROXY_PROFILED; + + Y_VERIFY(Session && SessionID); + ValidateEvent(ev, "ForwardSessionEventToSession"); + InvokeOtherActor(*Session, &TInterconnectSessionTCP::Receive, ev, TActivationContext::ActorContextFor(SessionID)); + } + + void TInterconnectProxyTCP::GenerateHttpInfo(NMon::TEvHttpInfo::TPtr& ev) { + ICPROXY_PROFILED; + + LOG_INFO_IC("ICP31", "proxy http called"); + + TStringStream str; + + HTML(str) { + DIV_CLASS("panel panel-info") { + DIV_CLASS("panel-heading") { + str << "Proxy"; + } + DIV_CLASS("panel-body") { + TABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() { + str << "Sensor"; + } + TABLEH() { + str << "Value"; + } + } + } +#define MON_VAR(NAME) \ + TABLER() { \ + TABLED() { \ + str << #NAME; \ + } \ + TABLED() { \ + str << NAME; \ + } \ + } + + TABLEBODY() { + MON_VAR(TActivationContext::Now()) + MON_VAR(SessionID) + MON_VAR(LastSessionDieTime) + MON_VAR(IncomingHandshakeActor) + MON_VAR(IncomingHandshakeActorFilledIn) + MON_VAR(IncomingHandshakeActorReset) + MON_VAR(OutgoingHandshakeActor) + MON_VAR(OutgoingHandshakeActorCreated) + MON_VAR(OutgoingHandshakeActorReset) + MON_VAR(State) + MON_VAR(StateSwitchTime) + } + } + } + } + + DIV_CLASS("panel panel-info") { + DIV_CLASS("panel-heading") { + str << "Error Log"; + } + DIV_CLASS("panel-body") { + TABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() { + str << "Timestamp"; + } + TABLEH() { + str << "Elapsed"; + } + TABLEH() { + str << "Kind"; + } + TABLEH() { + str << "Explanation"; + } + } + } + TABLEBODY() { + const TInstant now = TActivationContext::Now(); + const TInstant barrier = now - TDuration::Minutes(1); + for (auto it = ErrorStateLog.rbegin(); it != ErrorStateLog.rend(); ++it) { + auto wrapper = [&](const auto& lambda) { + if (std::get<0>(*it) > barrier) { + str << "<strong>"; + lambda(); + str << "</strong>"; + } else { + lambda(); + } + }; + TABLER() { + TABLED() { + wrapper([&] { + str << std::get<0>(*it); + }); + } + TABLED() { + wrapper([&] { + str << now - std::get<0>(*it); + }); + } + TABLED() { + wrapper([&] { + str << std::get<1>(*it); + }); + } + TABLED() { + wrapper([&] { + str << std::get<2>(*it); + }); + + ui32 rep = std::get<3>(*it); + if (rep != 1) { + str << " <strong>x" << rep << "</strong>"; + } + } + } + } + } + } + } + } + } + + if (Session != nullptr) { + Session->GenerateHttpInfo(str); + } + + Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str())); + } + + void TInterconnectProxyTCP::TransitToErrorState(TString explanation, bool updateErrorLog) { + ICPROXY_PROFILED; + + LOG_NOTICE_IC("ICP32", "transit to hold-by-error state Explanation# %s", explanation.data()); + LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] error state: %s", PeerNodeId, explanation.data()); + + if (updateErrorLog) { + UpdateErrorStateLog(TActivationContext::Now(), "permanent conclusive", explanation); + } + + Y_VERIFY(Session == nullptr); + Y_VERIFY(!SessionID); + + // recalculate wakeup timeout -- if this is the first failure, then we sleep for default timeout; otherwise we + // sleep N times longer than the previous try, but not longer than desired number of seconds + HoldByErrorWakeupDuration = HoldByErrorWakeupDuration != TDuration::Zero() + ? Min(HoldByErrorWakeupDuration * SleepRetryMultiplier, MaxErrorSleep) + : FirstErrorSleep; + + // transit to required state and arm wakeup timer + if (Terminated) { + // switch to this state permanently + SwitchToState(__LINE__, "HoldByError", &TThis::HoldByError); + HoldByErrorWakeupCookie = nullptr; + } else { + SwitchToState(__LINE__, "HoldByError", &TThis::HoldByError, HoldByErrorWakeupDuration, + HoldByErrorWakeupCookie = new TEvents::TEvWakeup); + } + + /* Process all pending events. */ + ProcessPendingSessionEvents(); + + /* Terminate handshakes */ + DropHandshakes(); + + /* Terminate pending incoming handshake requests. */ + for (auto& ev : PendingIncomingHandshakeEvents) { + Send(ev->Sender, new TEvents::TEvPoisonPill); + if (ev->GetTypeRewrite() == TEvHandshakeFail::EventType) { + TEvHandshakeFail::TPtr tmp(static_cast<TEventHandle<TEvHandshakeFail>*>(ev.Release())); + LogHandshakeFail(tmp, true); + } + } + PendingIncomingHandshakeEvents.clear(); + } + + void TInterconnectProxyTCP::WakeupFromErrorState(TEvents::TEvWakeup::TPtr& ev) { + ICPROXY_PROFILED; + + LOG_INFO_IC("ICP33", "wake up from error state"); + + if (ev->Get() == HoldByErrorWakeupCookie) { + SwitchToInitialState(); + } + } + + void TInterconnectProxyTCP::Disconnect() { + ICPROXY_PROFILED; + + // terminate handshakes (if any) + DropHandshakes(); + + if (Session) { + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason::UserRequest()); + } else { + TransitToErrorState("forced disconnect"); + } + } + + void TInterconnectProxyTCP::ScheduleCleanupEventQueue() { + ICPROXY_PROFILED; + + if (!CleanupEventQueueScheduled && PendingSessionEvents) { + // apply batching at 50 ms granularity + Schedule(Max(TDuration::MilliSeconds(50), PendingSessionEvents.front().Deadline - TActivationContext::Now()), new TEvCleanupEventQueue); + CleanupEventQueueScheduled = true; + } + } + + void TInterconnectProxyTCP::HandleCleanupEventQueue() { + ICPROXY_PROFILED; + + Y_VERIFY(CleanupEventQueueScheduled); + CleanupEventQueueScheduled = false; + CleanupEventQueue(); + ScheduleCleanupEventQueue(); + } + + void TInterconnectProxyTCP::CleanupEventQueue() { + ICPROXY_PROFILED; + + const TInstant now = TActivationContext::Now(); + while (PendingSessionEvents) { + TPendingSessionEvent& ev = PendingSessionEvents.front(); + if (now >= ev.Deadline || PendingSessionEventsSize > Common->Settings.MessagePendingSize) { + TAutoPtr<IEventHandle> event(ev.Event.Release()); + PendingSessionEventsSize -= ev.Size; + DropSessionEvent(event); + PendingSessionEvents.pop_front(); + } else { + break; + } + } + } + + void TInterconnectProxyTCP::HandleClosePeerSocket() { + ICPROXY_PROFILED; + + if (Session && Session->Socket) { + LOG_INFO_IC("ICP34", "closed connection by debug command"); + Session->Socket->Shutdown(SHUT_RDWR); + } + } + + void TInterconnectProxyTCP::HandleCloseInputSession() { + ICPROXY_PROFILED; + + if (Session) { + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::CloseInputSession); + } + } + + void TInterconnectProxyTCP::HandlePoisonSession() { + ICPROXY_PROFILED; + + if (Session) { + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason::Debug()); + } + } + + void TInterconnectProxyTCP::HandleSessionBufferSizeRequest(TEvSessionBufferSizeRequest::TPtr& ev) { + ICPROXY_PROFILED; + + ui64 bufSize = 0; + if (Session) { + bufSize = Session->TotalOutputQueueSize; + } + + Send(ev->Sender, new TEvSessionBufferSizeResponse(SessionID, bufSize)); + } + + void TInterconnectProxyTCP::Handle(TEvQueryStats::TPtr& ev) { + ICPROXY_PROFILED; + + TProxyStats stats; + stats.Path = Sprintf("peer%04" PRIu32, PeerNodeId); + stats.State = State; + stats.PeerScopeId = Session ? Session->Params.PeerScopeId : TScopeId(); + stats.LastSessionDieTime = LastSessionDieTime; + stats.TotalOutputQueueSize = Session ? Session->TotalOutputQueueSize : 0; + stats.Connected = Session ? (bool)Session->Socket : false; + stats.Host = TechnicalPeerHostName; + stats.Port = 0; + ui32 rep = 0; + std::tie(stats.LastErrorTimestamp, stats.LastErrorKind, stats.LastErrorExplanation, rep) = ErrorStateLog + ? ErrorStateLog.back() + : std::make_tuple(TInstant(), TString(), TString(), 1U); + if (rep != 1) { + stats.LastErrorExplanation += Sprintf(" x%" PRIu32, rep); + } + stats.Ping = Session ? Session->GetPingRTT() : TDuration::Zero(); + stats.ClockSkew = Session ? Session->GetClockSkew() : 0; + if (Session) { + if (auto *x = dynamic_cast<NInterconnect::TSecureSocket*>(Session->Socket.Get())) { + stats.Encryption = Sprintf("%s/%u", x->GetCipherName().data(), x->GetCipherBits()); + } else { + stats.Encryption = "none"; + } + } + + auto response = MakeHolder<TEvStats>(); + response->PeerNodeId = PeerNodeId; + response->ProxyStats = std::move(stats); + Send(ev->Sender, response.Release()); + } + + void TInterconnectProxyTCP::HandleTerminate() { + ICPROXY_PROFILED; + + if (Session) { + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason()); + } + Terminated = true; + TransitToErrorState("terminated"); + } + + void TInterconnectProxyTCP::PassAway() { + if (Session) { + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason()); + } + if (DynamicPtr) { + Y_VERIFY(*DynamicPtr == this); + *DynamicPtr = nullptr; + } + // TODO: unregister actor mon page + TActor::PassAway(); + } +} diff --git a/library/cpp/actors/interconnect/interconnect_tcp_proxy.h b/library/cpp/actors/interconnect/interconnect_tcp_proxy.h new file mode 100644 index 0000000000..023e5bd1ee --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_tcp_proxy.h @@ -0,0 +1,537 @@ +#pragma once + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/core/event_pb.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> + +#include "interconnect_common.h" +#include "interconnect_counters.h" +#include "interconnect_tcp_session.h" +#include "profiler.h" + +#define ICPROXY_PROFILED TFunction func(*this, __func__, __LINE__) + +namespace NActors { + + + /* WARNING: all proxy actors should be alive during actorsystem activity */ + class TInterconnectProxyTCP + : public TActor<TInterconnectProxyTCP> + , public TInterconnectLoggingBase + , public TProfiled + { + enum { + EvCleanupEventQueue = EventSpaceBegin(TEvents::ES_PRIVATE), + EvQueryStats, + EvStats, + EvPassAwayIfNeeded, + }; + + struct TEvCleanupEventQueue : TEventLocal<TEvCleanupEventQueue, EvCleanupEventQueue> {}; + + public: + struct TEvQueryStats : TEventLocal<TEvQueryStats, EvQueryStats> {}; + + struct TProxyStats { + TString Path; + TString State; + TScopeId PeerScopeId; + TInstant LastSessionDieTime; + ui64 TotalOutputQueueSize; + bool Connected; + TString Host; + ui16 Port; + TInstant LastErrorTimestamp; + TString LastErrorKind; + TString LastErrorExplanation; + TDuration Ping; + i64 ClockSkew; + TString Encryption; + }; + + struct TEvStats : TEventLocal<TEvStats, EvStats> { + ui32 PeerNodeId; + TProxyStats ProxyStats; + }; + + static constexpr EActivityType ActorActivityType() { + return INTERCONNECT_PROXY_TCP; + } + + TInterconnectProxyTCP(const ui32 node, TInterconnectProxyCommon::TPtr common, IActor **dynamicPtr = nullptr); + + STFUNC(StateInit) { + Bootstrap(); + if (ev->Type != TEvents::TSystem::Bootstrap) { // for dynamic nodes we do not receive Bootstrap event + Receive(ev, ctx); + } + } + + void Bootstrap(); + void Registered(TActorSystem* sys, const TActorId& owner) override; + + private: + friend class TInterconnectSessionTCP; + friend class TInterconnectSessionTCPv0; + friend class THandshake; + friend class TInputSessionTCP; + + void UnregisterSession(TInterconnectSessionTCP* session); + +#define SESSION_EVENTS(HANDLER) \ + fFunc(TEvInterconnect::EvForward, HANDLER) \ + fFunc(TEvInterconnect::TEvConnectNode::EventType, HANDLER) \ + fFunc(TEvents::TEvSubscribe::EventType, HANDLER) \ + fFunc(TEvents::TEvUnsubscribe::EventType, HANDLER) + +#define INCOMING_HANDSHAKE_EVENTS(HANDLER) \ + fFunc(TEvHandshakeAsk::EventType, HANDLER) \ + fFunc(TEvHandshakeRequest::EventType, HANDLER) + +#define HANDSHAKE_STATUS_EVENTS(HANDLER) \ + hFunc(TEvHandshakeDone, HANDLER) \ + hFunc(TEvHandshakeFail, HANDLER) + +#define PROXY_STFUNC(STATE, SESSION_HANDLER, INCOMING_HANDSHAKE_HANDLER, \ + HANDSHAKE_STATUS_HANDLER, DISCONNECT_HANDLER, \ + WAKEUP_HANDLER, NODE_INFO_HANDLER) \ + STATEFN(STATE) { \ + const ui32 type = ev->GetTypeRewrite(); \ + const bool profiled = type != TEvInterconnect::EvForward \ + && type != TEvInterconnect::EvConnectNode \ + && type != TEvents::TSystem::Subscribe \ + && type != TEvents::TSystem::Unsubscribe; \ + if (profiled) { \ + TProfiled::Start(); \ + } \ + { \ + TProfiled::TFunction func(*this, __func__, __LINE__); \ + switch (type) { \ + SESSION_EVENTS(SESSION_HANDLER) \ + INCOMING_HANDSHAKE_EVENTS(INCOMING_HANDSHAKE_HANDLER) \ + HANDSHAKE_STATUS_EVENTS(HANDSHAKE_STATUS_HANDLER) \ + cFunc(TEvInterconnect::EvDisconnect, DISCONNECT_HANDLER) \ + hFunc(TEvents::TEvWakeup, WAKEUP_HANDLER) \ + hFunc(TEvGetSecureSocket, Handle) \ + hFunc(NMon::TEvHttpInfo, GenerateHttpInfo) \ + cFunc(EvCleanupEventQueue, HandleCleanupEventQueue) \ + hFunc(TEvInterconnect::TEvNodeInfo, NODE_INFO_HANDLER) \ + cFunc(TEvInterconnect::EvClosePeerSocket, HandleClosePeerSocket) \ + cFunc(TEvInterconnect::EvCloseInputSession, HandleCloseInputSession) \ + cFunc(TEvInterconnect::EvPoisonSession, HandlePoisonSession) \ + hFunc(TEvSessionBufferSizeRequest, HandleSessionBufferSizeRequest) \ + hFunc(TEvQueryStats, Handle) \ + cFunc(TEvInterconnect::EvTerminate, HandleTerminate) \ + cFunc(EvPassAwayIfNeeded, HandlePassAwayIfNeeded) \ + default: \ + Y_FAIL("unexpected event Type# 0x%08" PRIx32, type); \ + } \ + } \ + if (profiled) { \ + if (TProfiled::Duration() >= TDuration::MilliSeconds(16)) { \ + const TString report = TProfiled::Format(); \ + LOG_ERROR_IC("ICP35", "event processing took too much time %s", report.data()); \ + } \ + TProfiled::Finish(); \ + } \ + } + + template <typename T> + void Ignore(T& /*ev*/) { + ICPROXY_PROFILED; + } + + void Ignore() { + ICPROXY_PROFILED; + } + + void Ignore(TEvHandshakeDone::TPtr& ev) { + ICPROXY_PROFILED; + + Y_VERIFY(ev->Sender != IncomingHandshakeActor); + Y_VERIFY(ev->Sender != OutgoingHandshakeActor); + } + + void Ignore(TEvHandshakeFail::TPtr& ev) { + ICPROXY_PROFILED; + + Y_VERIFY(ev->Sender != IncomingHandshakeActor); + Y_VERIFY(ev->Sender != OutgoingHandshakeActor); + LogHandshakeFail(ev, true); + } + + const char* State = nullptr; + TInstant StateSwitchTime; + + template <typename... TArgs> + void SwitchToState(int line, const char* name, TArgs&&... args) { + ICPROXY_PROFILED; + + LOG_DEBUG_IC("ICP77", "@%d %s -> %s", line, State, name); + State = name; + StateSwitchTime = TActivationContext::Now(); + Become(std::forward<TArgs>(args)...); + Y_VERIFY(!Terminated || CurrentStateFunc() == &TThis::HoldByError); // ensure we never escape this state + if (CurrentStateFunc() != &TThis::PendingActivation) { + PassAwayTimestamp = TInstant::Max(); + } + } + + TInstant PassAwayTimestamp; + bool PassAwayScheduled = false; + + void SwitchToInitialState() { + ICPROXY_PROFILED; + + Y_VERIFY(!PendingSessionEvents && !PendingIncomingHandshakeEvents, "%s PendingSessionEvents# %zu" + " PendingIncomingHandshakeEvents# %zu State# %s", LogPrefix.data(), PendingSessionEvents.size(), + PendingIncomingHandshakeEvents.size(), State); + SwitchToState(__LINE__, "PendingActivation", &TThis::PendingActivation); + if (DynamicPtr && !PassAwayScheduled && PassAwayTimestamp != TInstant::Max()) { + TActivationContext::Schedule(PassAwayTimestamp, new IEventHandle(EvPassAwayIfNeeded, 0, SelfId(), + {}, nullptr, 0)); + PassAwayScheduled = true; + } + } + + void HandlePassAwayIfNeeded() { + Y_VERIFY(PassAwayScheduled); + if (PassAwayTimestamp != TInstant::Max()) { + PassAway(); + } + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // PendingActivation + // + // In this state we are just waiting for some activities, which may include: + // * an external Session event + // * incoming handshake request + // + // Upon receiving such event, we put it to corresponding queue and initiate start up by calling IssueGetNodeRequest, + // which, as the name says, issued TEvGetNode to the nameservice and arms timer to handle timeout (which should not + // occur, but we want to be sure we don't hang on this), and then switches to PendingNodeInfo state. + + PROXY_STFUNC(PendingActivation, + RequestNodeInfo, // Session events + RequestNodeInfoForIncomingHandshake, // Incoming handshake requests + Ignore, // Handshake status + Ignore, // Disconnect request + Ignore, // Wakeup + Ignore // Node info + ) + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // PendingNodeInfo + // + // This state is entered when we asked nameserver to provide description for peer node we are working with. All + // external Session events and incoming handshake requests are enqueued into their respective queues, TEvNodeInfo + // is main event that triggers processing. On success, we try to initiate outgoing handshake if needed, or process + // incoming handshakes. On error, we enter HoldByError state. + // + // NOTE: handshake status events are also enqueued as the handshake actor may have generated failure event due to + // timeout or some other reason without waiting for acknowledge, and it must be processed correctly to prevent + // session hang + + PROXY_STFUNC(PendingNodeInfo, + EnqueueSessionEvent, // Session events + EnqueueIncomingHandshakeEvent, // Incoming handshake requests + EnqueueIncomingHandshakeEvent, // Handshake status + Disconnect, // Disconnect request + ConfigureTimeout, // Wakeup + Configure // Node info + ) + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // PendingConnection + // + // Here we have issued outgoing handshake or have accepted (or may be both) incoming handshake and we are waiting for + // the status of the handshake. When one if handshakes finishes, we use this status to establish connection (or to + // go to error state). When one handshake terminates with error while other is running, we will still wait for the + // second one to finish. + + PROXY_STFUNC(PendingConnection, + EnqueueSessionEvent, // Session events + IncomingHandshake, // Incoming handshake requests + HandleHandshakeStatus, // Handshake status + Disconnect, // Disconnect request + Ignore, // Wakeup + Ignore // Node info + ) + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // StateWork + // + // We have accepted session and process any incoming messages with the session. Incoming handshakes are accepted + // concurrently and applied when finished. + + PROXY_STFUNC(StateWork, + ForwardSessionEventToSession, // Session events + IncomingHandshake, // Incoming handshake requests + HandleHandshakeStatus, // Handshake status + Disconnect, // Disconnect request + Ignore, // Wakeup + Ignore // Node info + ) + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // HoldByError + // + // When something bad happens with the connection, we sleep in this state. After wake up we go back to + // PendingActivation. + + PROXY_STFUNC(HoldByError, + DropSessionEvent, // Session events + RequestNodeInfoForIncomingHandshake, // Incoming handshake requests + Ignore, // Handshake status + Ignore, // Disconnect request + WakeupFromErrorState, // Wakeup + Ignore // Node info + ) + +#undef SESSION_EVENTS +#undef INCOMING_HANDSHAKE_EVENTS +#undef HANDSHAKE_STATUS_EVENTS +#undef PROXY_STFUNC + + void ForwardSessionEventToSession(STATEFN_SIG); + void EnqueueSessionEvent(STATEFN_SIG); + + // Incoming handshake handlers, including special wrapper when the IncomingHandshake is used as fFunc + void IncomingHandshake(STATEFN_SIG) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvHandshakeAsk, IncomingHandshake); + hFunc(TEvHandshakeRequest, IncomingHandshake); + default: + Y_FAIL(); + } + } + void IncomingHandshake(TEvHandshakeAsk::TPtr& ev); + void IncomingHandshake(TEvHandshakeRequest::TPtr& ev); + + void RequestNodeInfo(STATEFN_SIG); + void RequestNodeInfoForIncomingHandshake(STATEFN_SIG); + + void StartInitialHandshake(); + void StartResumeHandshake(ui64 inputCounter); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Incoming handshake event queue processing + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void EnqueueIncomingHandshakeEvent(STATEFN_SIG); + void EnqueueIncomingHandshakeEvent(TEvHandshakeDone::TPtr& ev); + void EnqueueIncomingHandshakeEvent(TEvHandshakeFail::TPtr& ev); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // PendingNodeInfo + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + IEventBase* ConfigureTimeoutCookie; // pointer to the scheduled event used to match sent and received events + + void StartConfiguring(); + void Configure(TEvInterconnect::TEvNodeInfo::TPtr& ev); + void ConfigureTimeout(TEvents::TEvWakeup::TPtr& ev); + void ProcessConfigured(); + + void HandleHandshakeStatus(TEvHandshakeDone::TPtr& ev); + void HandleHandshakeStatus(TEvHandshakeFail::TPtr& ev); + + void TransitToErrorState(TString Explanation, bool updateErrorLog = true); + void WakeupFromErrorState(TEvents::TEvWakeup::TPtr& ev); + void Disconnect(); + + const ui32 PeerNodeId; + IActor **DynamicPtr; + + void ValidateEvent(TAutoPtr<IEventHandle>& ev, const char* func) { + if (SelfId().NodeId() == PeerNodeId) { + TString msg = Sprintf("Event Type# 0x%08" PRIx32 " TypeRewrite# 0x%08" PRIx32 + " from Sender# %s sent to the proxy for the node itself via Interconnect;" + " THIS IS NOT A BUG IN INTERCONNECT, check the event sender instead", + ev->Type, ev->GetTypeRewrite(), ev->Sender.ToString().data()); + LOG_ERROR_IC("ICP03", "%s", msg.data()); + Y_VERIFY_DEBUG(false, "%s", msg.data()); + } + + Y_VERIFY(ev->GetTypeRewrite() != TEvInterconnect::EvForward || ev->Recipient.NodeId() == PeerNodeId, + "Recipient/Proxy NodeId mismatch Recipient# %s Type# 0x%08" PRIx32 " PeerNodeId# %" PRIu32 " Func# %s", + ev->Recipient.ToString().data(), ev->Type, PeerNodeId, func); + } + + // Common with helpers + // All proxy actors share the same information in the object + // read only + TInterconnectProxyCommon::TPtr const Common; + + const TActorId& GetNameserviceId() const { + return Common->NameserviceId; + } + + TString TechnicalPeerHostName; + + std::shared_ptr<IInterconnectMetrics> Metrics; + + void HandleClosePeerSocket(); + void HandleCloseInputSession(); + void HandlePoisonSession(); + + void HandleSessionBufferSizeRequest(TEvSessionBufferSizeRequest::TPtr& ev); + + bool CleanupEventQueueScheduled = false; + void ScheduleCleanupEventQueue(); + void HandleCleanupEventQueue(); + void CleanupEventQueue(); + + // hold all events before connection is established + struct TPendingSessionEvent { + TInstant Deadline; + ui32 Size; + THolder<IEventHandle> Event; + + TPendingSessionEvent(TInstant deadline, ui32 size, TAutoPtr<IEventHandle> event) + : Deadline(deadline) + , Size(size) + , Event(event) + {} + }; + TDeque<TPendingSessionEvent> PendingSessionEvents; + ui64 PendingSessionEventsSize = 0; + void ProcessPendingSessionEvents(); + void DropSessionEvent(STATEFN_SIG); + + TInterconnectSessionTCP* Session = nullptr; + TActorId SessionID; + + // virtual ids used during handshake to check if it is the connection + // for the same session or to find out the latest shandshake + // it's virtual because session actor apears after successfull handshake + TActorId SessionVirtualId; + TActorId RemoteSessionVirtualId; + + TActorId GenerateSessionVirtualId() { + ICPROXY_PROFILED; + + const ui64 localId = TlsActivationContext->ExecutorThread.ActorSystem->AllocateIDSpace(1); + return NActors::TActorId(SelfId().NodeId(), 0, localId, 0); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + TActorId IncomingHandshakeActor; + TInstant IncomingHandshakeActorFilledIn; + TInstant IncomingHandshakeActorReset; + TMaybe<ui64> LastSerialFromIncomingHandshake; + THolder<IEventBase> HeldHandshakeReply; + + void DropIncomingHandshake(bool poison = true) { + ICPROXY_PROFILED; + + if (const TActorId& actorId = std::exchange(IncomingHandshakeActor, TActorId())) { + LOG_DEBUG_IC("ICP111", "dropped incoming handshake: %s poison: %s", actorId.ToString().data(), + poison ? "true" : "false"); + if (poison) { + Send(actorId, new TEvents::TEvPoisonPill); + } + LastSerialFromIncomingHandshake.Clear(); + HeldHandshakeReply.Reset(); + IncomingHandshakeActorReset = TActivationContext::Now(); + } + } + + void DropOutgoingHandshake(bool poison = true) { + ICPROXY_PROFILED; + + if (const TActorId& actorId = std::exchange(OutgoingHandshakeActor, TActorId())) { + LOG_DEBUG_IC("ICP112", "dropped outgoing handshake: %s poison: %s", actorId.ToString().data(), + poison ? "true" : "false"); + if (poison) { + Send(actorId, new TEvents::TEvPoisonPill); + } + OutgoingHandshakeActorReset = TActivationContext::Now(); + } + } + + void DropHandshakes() { + ICPROXY_PROFILED; + + DropIncomingHandshake(); + DropOutgoingHandshake(); + } + + void PrepareNewSessionHandshake() { + ICPROXY_PROFILED; + + // drop existing session if we have one + if (Session) { + LOG_INFO_IC("ICP04", "terminating current session as we are negotiating a new one"); + IActor::InvokeOtherActor(*Session, &TInterconnectSessionTCP::Terminate, TDisconnectReason::NewSession()); + } + + // ensure we have no current session + Y_VERIFY(!Session); + + // switch to pending connection state -- we wait for handshakes, we want more handshakes! + SwitchToState(__LINE__, "PendingConnection", &TThis::PendingConnection); + } + + void IssueIncomingHandshakeReply(const TActorId& handshakeId, ui64 peerLocalId, + THolder<IEventBase> event); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + TActorId OutgoingHandshakeActor; + TInstant OutgoingHandshakeActorCreated; + TInstant OutgoingHandshakeActorReset; + + TInstant LastSessionDieTime; + + void GenerateHttpInfo(NMon::TEvHttpInfo::TPtr& ev); + + void Handle(TEvQueryStats::TPtr& ev); + + TDuration HoldByErrorWakeupDuration = TDuration::Zero(); + TEvents::TEvWakeup* HoldByErrorWakeupCookie; + + THolder<TProgramInfo> RemoteProgramInfo; + NInterconnect::TSecureSocketContext::TPtr SecureContext; + + void Handle(TEvGetSecureSocket::TPtr ev) { + auto socket = MakeIntrusive<NInterconnect::TSecureSocket>(*ev->Get()->Socket, SecureContext); + Send(ev->Sender, new TEvSecureSocket(std::move(socket))); + } + + TDeque<THolder<IEventHandle>> PendingIncomingHandshakeEvents; + + TDeque<std::tuple<TInstant, TString, TString, ui32>> ErrorStateLog; + + void UpdateErrorStateLog(TInstant now, TString kind, TString explanation) { + ICPROXY_PROFILED; + + if (ErrorStateLog) { + auto& back = ErrorStateLog.back(); + TString lastKind, lastExpl; + if (kind == std::get<1>(back) && explanation == std::get<2>(back)) { + std::get<0>(back) = now; + ++std::get<3>(back); + return; + } + } + + ErrorStateLog.emplace_back(now, std::move(kind), std::move(explanation), 1); + if (ErrorStateLog.size() > 20) { + ErrorStateLog.pop_front(); + } + } + + void LogHandshakeFail(TEvHandshakeFail::TPtr& ev, bool inconclusive); + + bool Terminated = false; + void HandleTerminate(); + + void PassAway() override; + }; + +} diff --git a/library/cpp/actors/interconnect/interconnect_tcp_server.cpp b/library/cpp/actors/interconnect/interconnect_tcp_server.cpp new file mode 100644 index 0000000000..b95c994598 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_tcp_server.cpp @@ -0,0 +1,117 @@ +#include "interconnect_tcp_server.h" +#include "interconnect_handshake.h" + +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/protos/services_common.pb.h> + +#include "interconnect_common.h" + +namespace NActors { + TInterconnectListenerTCP::TInterconnectListenerTCP(const TString& address, ui16 port, TInterconnectProxyCommon::TPtr common, const TMaybe<SOCKET>& socket) + : TActor(&TThis::Initial) + , TInterconnectLoggingBase(Sprintf("ICListener: %s", SelfId().ToString().data())) + , Address(address.c_str(), port) + , Listener( + socket + ? new NInterconnect::TStreamSocket(*socket) + : nullptr) + , ExternalSocket(!!Listener) + , ProxyCommonCtx(std::move(common)) + { + if (ExternalSocket) { + SetNonBlock(*Listener); + } + } + + TAutoPtr<IEventHandle> TInterconnectListenerTCP::AfterRegister(const TActorId& self, const TActorId& parentId) { + return new IEventHandle(self, parentId, new TEvents::TEvBootstrap, 0); + } + + void TInterconnectListenerTCP::Die(const TActorContext& ctx) { + LOG_DEBUG_IC("ICL08", "Dying"); + TActor::Die(ctx); + } + + int TInterconnectListenerTCP::Bind() { + NInterconnect::TAddress addr = Address; + + if (ProxyCommonCtx->Settings.BindOnAllAddresses) { + switch (addr.GetFamily()) { + case AF_INET: { + auto *sa = reinterpret_cast<sockaddr_in*>(addr.SockAddr()); + sa->sin_addr = {INADDR_ANY}; + break; + } + + case AF_INET6: { + auto *sa = reinterpret_cast<sockaddr_in6*>(addr.SockAddr()); + sa->sin6_addr = in6addr_any; + break; + } + + default: + Y_FAIL("Unsupported address family"); + } + } + + Listener = NInterconnect::TStreamSocket::Make(addr.GetFamily()); + if (*Listener == -1) { + return errno; + } + SetNonBlock(*Listener); + Listener->SetSendBufferSize(ProxyCommonCtx->Settings.GetSendBufferSize()); // TODO(alexvru): WTF? + SetSockOpt(*Listener, SOL_SOCKET, SO_REUSEADDR, 1); + if (const auto e = -Listener->Bind(addr)) { + return e; + } else if (const auto e = -Listener->Listen(SOMAXCONN)) { + return e; + } else { + return 0; + } + } + + void TInterconnectListenerTCP::Bootstrap(const TActorContext& ctx) { + if (!Listener) { + if (const int err = Bind()) { + LOG_ERROR_IC("ICL01", "Bind failed: %s (%s)", strerror(err), Address.ToString().data()); + Listener.Reset(); + Become(&TThis::Initial, TDuration::Seconds(1), new TEvents::TEvBootstrap); + return; + } + } + if (const auto& callback = ProxyCommonCtx->InitWhiteboard) { + callback(Address.GetPort(), TlsActivationContext->ExecutorThread.ActorSystem); + } + const bool success = ctx.Send(MakePollerActorId(), new TEvPollerRegister(Listener, SelfId(), {})); + Y_VERIFY(success); + Become(&TThis::Listen); + } + + void TInterconnectListenerTCP::Handle(TEvPollerRegisterResult::TPtr ev, const TActorContext& ctx) { + PollerToken = std::move(ev->Get()->PollerToken); + Process(ctx); + } + + void TInterconnectListenerTCP::Process(const TActorContext& ctx) { + for (;;) { + NInterconnect::TAddress address; + const int r = Listener->Accept(address); + if (r >= 0) { + LOG_DEBUG_IC("ICL04", "Accepted from: %s", address.ToString().data()); + auto socket = MakeIntrusive<NInterconnect::TStreamSocket>(static_cast<SOCKET>(r)); + ctx.Register(CreateIncomingHandshakeActor(ProxyCommonCtx, std::move(socket))); + continue; + } else if (-r != EAGAIN && -r != EWOULDBLOCK) { + Y_VERIFY(-r != ENFILE && -r != EMFILE && !ExternalSocket); + LOG_ERROR_IC("ICL06", "Listen failed: %s (%s)", strerror(-r), Address.ToString().data()); + Listener.Reset(); + PollerToken.Reset(); + Become(&TThis::Initial, TDuration::Seconds(1), new TEvents::TEvBootstrap); + } else if (PollerToken) { + PollerToken->Request(true, false); + } + break; + } + } + +} diff --git a/library/cpp/actors/interconnect/interconnect_tcp_server.h b/library/cpp/actors/interconnect/interconnect_tcp_server.h new file mode 100644 index 0000000000..fc71073c2d --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_tcp_server.h @@ -0,0 +1,57 @@ +#pragma once + +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/core/event_pb.h> +#include <library/cpp/actors/core/events.h> + +#include "interconnect_common.h" +#include "poller_actor.h" +#include "events_local.h" + +namespace NActors { + class TInterconnectListenerTCP: public TActor<TInterconnectListenerTCP>, public TInterconnectLoggingBase { + public: + static constexpr EActivityType ActorActivityType() { + return INTERCONNECT_COMMON; + } + + TInterconnectListenerTCP(const TString& address, ui16 port, TInterconnectProxyCommon::TPtr common, const TMaybe<SOCKET>& socket = Nothing()); + int Bind(); + + private: + STFUNC(Initial) { + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TEvBootstrap::EventType, Bootstrap); + CFunc(TEvents::TEvPoisonPill::EventType, Die); + } + } + + STFUNC(Listen) { + switch (ev->GetTypeRewrite()) { + CFunc(TEvents::TEvPoisonPill::EventType, Die); + HFunc(TEvPollerRegisterResult, Handle); + CFunc(TEvPollerReady::EventType, Process); + } + } + + TAutoPtr<IEventHandle> AfterRegister(const TActorId& self, const TActorId& parentId) override; + + void Die(const TActorContext& ctx) override; + + void Bootstrap(const TActorContext& ctx); + void Handle(TEvPollerRegisterResult::TPtr ev, const TActorContext& ctx); + + void Process(const TActorContext& ctx); + + const NInterconnect::TAddress Address; + TIntrusivePtr<NInterconnect::TStreamSocket> Listener; + const bool ExternalSocket; + TPollerToken::TPtr PollerToken; + TInterconnectProxyCommon::TPtr const ProxyCommonCtx; + }; + + static inline TActorId MakeInterconnectListenerActorId(bool dynamic) { + char x[12] = {'I', 'C', 'L', 'i', 's', 't', 'e', 'n', 'e', 'r', '/', dynamic ? 'D' : 'S'}; + return TActorId(0, TStringBuf(x, 12)); + } +} diff --git a/library/cpp/actors/interconnect/interconnect_tcp_session.cpp b/library/cpp/actors/interconnect/interconnect_tcp_session.cpp new file mode 100644 index 0000000000..2ded7f9f53 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_tcp_session.cpp @@ -0,0 +1,1228 @@ +#include "interconnect_tcp_proxy.h" +#include "interconnect_tcp_session.h" +#include "interconnect_handshake.h" + +#include <library/cpp/actors/core/probes.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/core/interconnect.h> +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/protos/services_common.pb.h> +#include <library/cpp/monlib/service/pages/templates.h> + +namespace NActors { + LWTRACE_USING(ACTORLIB_PROVIDER); + + DECLARE_WILSON_EVENT(OutputQueuePush, (ui32, QueueSizeInEvents), (ui64, QueueSizeInBytes)); + + template<typename T> + T Coalesce(T&& x) { + return x; + } + + template<typename T, typename T2, typename... TRest> + typename std::common_type<T, T2, TRest...>::type Coalesce(T&& first, T2&& mid, TRest&&... rest) { + if (first != typename std::remove_reference<T>::type()) { + return first; + } else { + return Coalesce(std::forward<T2>(mid), std::forward<TRest>(rest)...); + } + } + + TInterconnectSessionTCP::TInterconnectSessionTCP(TInterconnectProxyTCP* const proxy, TSessionParams params) + : TActor(&TInterconnectSessionTCP::StateFunc) + , Created(TInstant::Now()) + , Proxy(proxy) + , CloseOnIdleWatchdog(GetCloseOnIdleTimeout(), std::bind(&TThis::OnCloseOnIdleTimerHit, this)) + , LostConnectionWatchdog(GetLostConnectionTimeout(), std::bind(&TThis::OnLostConnectionTimerHit, this)) + , Params(std::move(params)) + , TotalOutputQueueSize(0) + , OutputStuckFlag(false) + , OutputQueueUtilization(16) + , OutputCounter(0ULL) + { + Proxy->Metrics->SetConnected(0); + ReceiveContext.Reset(new TReceiveContext); + } + + TInterconnectSessionTCP::~TInterconnectSessionTCP() { + // close socket ASAP when actor system is being shut down + if (Socket) { + Socket->Shutdown(SHUT_RDWR); + } + } + + void TInterconnectSessionTCP::Init() { + auto destroyCallback = [as = TlsActivationContext->ExecutorThread.ActorSystem, id = Proxy->Common->DestructorId](THolder<IEventBase> event) { + as->Send(id, event.Release()); + }; + Pool.ConstructInPlace(Proxy->Common, std::move(destroyCallback)); + ChannelScheduler.ConstructInPlace(Proxy->PeerNodeId, Proxy->Common->ChannelsConfig, Proxy->Metrics, *Pool, + Proxy->Common->Settings.MaxSerializedEventSize, Params); + + LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] session created", Proxy->PeerNodeId); + SetPrefix(Sprintf("Session %s [node %" PRIu32 "]", SelfId().ToString().data(), Proxy->PeerNodeId)); + SendUpdateToWhiteboard(); + } + + void TInterconnectSessionTCP::CloseInputSession() { + Send(ReceiverId, new TEvInterconnect::TEvCloseInputSession); + } + + void TInterconnectSessionTCP::Handle(TEvTerminate::TPtr& ev) { + Terminate(ev->Get()->Reason); + } + + void TInterconnectSessionTCP::HandlePoison() { + Terminate(TDisconnectReason()); + } + + void TInterconnectSessionTCP::Terminate(TDisconnectReason reason) { + LOG_INFO_IC_SESSION("ICS01", "socket: %" PRIi64, (Socket ? i64(*Socket) : -1)); + + IActor::InvokeOtherActor(*Proxy, &TInterconnectProxyTCP::UnregisterSession, this); + ShutdownSocket(std::move(reason)); + + for (const auto& kv : Subscribers) { + Send(kv.first, new TEvInterconnect::TEvNodeDisconnected(Proxy->PeerNodeId), 0, kv.second); + } + Proxy->Metrics->SubSubscribersCount(Subscribers.size()); + Subscribers.clear(); + + ChannelScheduler->ForEach([&](TEventOutputChannel& channel) { + channel.NotifyUndelivered(); + }); + + if (ReceiverId) { + Send(ReceiverId, new TEvents::TEvPoisonPill); + } + + SendUpdateToWhiteboard(false); + + Proxy->Metrics->SubOutputBuffersTotalSize(TotalOutputQueueSize); + Proxy->Metrics->SubInflightDataAmount(InflightDataAmount); + + LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] session destroyed", Proxy->PeerNodeId); + + if (!Subscribers.empty()) { + Proxy->Metrics->SubSubscribersCount(Subscribers.size()); + } + + TActor::PassAway(); + } + + void TInterconnectSessionTCP::PassAway() { + Y_FAIL("TInterconnectSessionTCP::PassAway() can't be called directly"); + } + + void TInterconnectSessionTCP::Forward(STATEFN_SIG) { + Proxy->ValidateEvent(ev, "Forward"); + + LOG_DEBUG_IC_SESSION("ICS02", "send event from: %s to: %s", ev->Sender.ToString().data(), ev->Recipient.ToString().data()); + ++MessagesGot; + + if (ev->Flags & IEventHandle::FlagSubscribeOnSession) { + Subscribe(ev); + } + + ui16 evChannel = ev->GetChannel(); + auto& oChannel = ChannelScheduler->GetOutputChannel(evChannel); + const bool wasWorking = oChannel.IsWorking(); + + const auto [dataSize, event] = oChannel.Push(*ev); + LWTRACK(ForwardEvent, event->Orbit, Proxy->PeerNodeId, event->Descr.Type, event->Descr.Flags, LWACTORID(event->Descr.Recipient), LWACTORID(event->Descr.Sender), event->Descr.Cookie, event->EventSerializedSize); + + TotalOutputQueueSize += dataSize; + Proxy->Metrics->AddOutputBuffersTotalSize(dataSize); + if (!wasWorking) { + // this channel has returned to work -- it was empty and this we have just put first event in the queue + ChannelScheduler->AddToHeap(oChannel, EqualizeCounter); + } + + SetOutputStuckFlag(true); + ++NumEventsInReadyChannels; + + LWTRACK(EnqueueEvent, event->Orbit, Proxy->PeerNodeId, NumEventsInReadyChannels, GetWriteBlockedTotal(), evChannel, oChannel.GetQueueSize(), oChannel.GetBufferedAmountOfData()); + WILSON_TRACE(*TlsActivationContext, &ev->TraceId, OutputQueuePush, + QueueSizeInEvents = oChannel.GetQueueSize(), + QueueSizeInBytes = oChannel.GetBufferedAmountOfData()); + + // check for overloaded queues + ui64 sendBufferDieLimit = Proxy->Common->Settings.SendBufferDieLimitInMB * ui64(1 << 20); + if (sendBufferDieLimit != 0 && TotalOutputQueueSize > sendBufferDieLimit) { + LOG_ERROR_IC_SESSION("ICS03", "socket: %" PRIi64 " output queue is overloaded, actual %" PRIu64 " bytes, limit is %" PRIu64, + Socket ? i64(*Socket) : -1, TotalOutputQueueSize, sendBufferDieLimit); + return Terminate(TDisconnectReason::QueueOverload()); + } + + ui64 outputBuffersTotalSizeLimit = Proxy->Common->Settings.OutputBuffersTotalSizeLimitInMB * ui64(1 << 20); + if (outputBuffersTotalSizeLimit != 0 && static_cast<ui64>(Proxy->Metrics->GetOutputBuffersTotalSize()) > outputBuffersTotalSizeLimit) { + LOG_ERROR_IC_SESSION("ICS77", "Exceeded total limit on output buffers size"); + if (AtomicTryLock(&Proxy->Common->StartedSessionKiller)) { + CreateSessionKillingActor(Proxy->Common); + } + } + + if (RamInQueue && !RamInQueue->Batching) { + // we have pending TEvRam, so GenerateTraffic will be called no matter what + } else if (InflightDataAmount >= GetTotalInflightAmountOfData() || !Socket || ReceiveContext->WriteBlockedByFullSendBuffer) { + // we can't issue more traffic now; GenerateTraffic will be called upon unblocking + } else if (TotalOutputQueueSize >= 64 * 1024) { + // output queue size is quite big to issue some traffic + GenerateTraffic(); + } else if (!RamInQueue) { + Y_VERIFY_DEBUG(NumEventsInReadyChannels == 1); + RamInQueue = new TEvRam(true); + auto *ev = new IEventHandle(SelfId(), {}, RamInQueue); + const TDuration batchPeriod = Proxy->Common->Settings.BatchPeriod; + if (batchPeriod != TDuration()) { + TActivationContext::Schedule(batchPeriod, ev); + } else { + TActivationContext::Send(ev); + } + LWPROBE(StartBatching, Proxy->PeerNodeId, batchPeriod.MillisecondsFloat()); + LOG_DEBUG_IC_SESSION("ICS17", "batching started"); + } + } + + void TInterconnectSessionTCP::Subscribe(STATEFN_SIG) { + LOG_DEBUG_IC_SESSION("ICS04", "subscribe for session state for %s", ev->Sender.ToString().data()); + const auto [it, inserted] = Subscribers.emplace(ev->Sender, ev->Cookie); + if (inserted) { + Proxy->Metrics->IncSubscribersCount(); + } else { + it->second = ev->Cookie; + } + Send(ev->Sender, new TEvInterconnect::TEvNodeConnected(Proxy->PeerNodeId), 0, ev->Cookie); + } + + void TInterconnectSessionTCP::Unsubscribe(STATEFN_SIG) { + LOG_DEBUG_IC_SESSION("ICS05", "unsubscribe for session state for %s", ev->Sender.ToString().data()); + Proxy->Metrics->SubSubscribersCount( Subscribers.erase(ev->Sender)); + } + + THolder<TEvHandshakeAck> TInterconnectSessionTCP::ProcessHandshakeRequest(TEvHandshakeAsk::TPtr& ev) { + TEvHandshakeAsk *msg = ev->Get(); + + // close existing input session, if any, and do nothing upon its destruction + ReestablishConnection({}, false, TDisconnectReason::NewSession()); + const ui64 lastInputSerial = ReceiveContext->LockLastProcessedPacketSerial(); + + LOG_INFO_IC_SESSION("ICS08", "incoming handshake Self# %s Peer# %s Counter# %" PRIu64 " LastInputSerial# %" PRIu64, + msg->Self.ToString().data(), msg->Peer.ToString().data(), msg->Counter, lastInputSerial); + + return MakeHolder<TEvHandshakeAck>(msg->Peer, lastInputSerial, Params); + } + + void TInterconnectSessionTCP::SetNewConnection(TEvHandshakeDone::TPtr& ev) { + if (ReceiverId) { + // upon destruction of input session actor invoke this callback again + ReestablishConnection(std::move(ev), false, TDisconnectReason::NewSession()); + return; + } + + LOG_INFO_IC_SESSION("ICS09", "handshake done sender: %s self: %s peer: %s socket: %" PRIi64, + ev->Sender.ToString().data(), ev->Get()->Self.ToString().data(), ev->Get()->Peer.ToString().data(), + i64(*ev->Get()->Socket)); + + NewConnectionSet = TActivationContext::Now(); + PacketsWrittenToSocket = 0; + + SendBufferSize = ev->Get()->Socket->GetSendBufferSize(); + Socket = std::move(ev->Get()->Socket); + + // there may be a race + const ui64 nextPacket = Max(LastConfirmed, ev->Get()->NextPacket); + + // arm watchdogs + CloseOnIdleWatchdog.Arm(SelfId()); + + // reset activity timestamps + LastInputActivityTimestamp = LastPayloadActivityTimestamp = TActivationContext::Now(); + + LOG_INFO_IC_SESSION("ICS10", "traffic start"); + + // create input session actor + auto actor = MakeHolder<TInputSessionTCP>(SelfId(), Socket, ReceiveContext, Proxy->Common, + Proxy->Metrics, Proxy->PeerNodeId, nextPacket, GetDeadPeerTimeout(), Params); + ReceiveContext->UnlockLastProcessedPacketSerial(); + ReceiverId = Params.Encryption ? RegisterWithSameMailbox(actor.Release()) : Register(actor.Release(), TMailboxType::ReadAsFilled); + + // register our socket in poller actor + LOG_DEBUG_IC_SESSION("ICS11", "registering socket in PollerActor"); + const bool success = Send(MakePollerActorId(), new TEvPollerRegister(Socket, ReceiverId, SelfId())); + Y_VERIFY(success); + ReceiveContext->WriteBlockedByFullSendBuffer = false; + + LostConnectionWatchdog.Disarm(); + Proxy->Metrics->SetConnected(1); + LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] connected", Proxy->PeerNodeId); + + // arm pinger timer + ResetFlushLogic(); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // REINITIALIZE SEND QUEUE + // + // scan through send queue and leave only those packets who have data -- we will simply resend them; drop all other + // auxiliary packets; also reset packet metrics to zero to start sending from the beginning + // also reset SendQueuePos + + // drop confirmed packets first as we do not need unwanted retransmissions + SendQueuePos = SendQueue.end(); + DropConfirmed(nextPacket); + + for (TSendQueue::iterator it = SendQueue.begin(); it != SendQueue.end(); ) { + const TSendQueue::iterator next = std::next(it); + if (it->IsEmpty()) { + SendQueueCache.splice(SendQueueCache.begin(), SendQueue, it); + } else { + it->ResetBufs(); + } + it = next; + } + TrimSendQueueCache(); + SendQueuePos = SendQueue.begin(); + + TMaybe<ui64> s; + for (auto it = SendQueuePos; it != SendQueue.end(); ++it) { + if (!it->IsEmpty()) { + s = it->GetSerial(); + } + } + const ui64 serial = s.GetOrElse(Max<ui64>()); + + Y_VERIFY(serial > LastConfirmed, "%s serial# %" PRIu64 " LastConfirmed# %" PRIu64, LogPrefix.data(), serial, LastConfirmed); + LOG_DEBUG_IC_SESSION("ICS06", "rewind SendQueue size# %zu LastConfirmed# %" PRIu64 " SendQueuePos.Serial# %" PRIu64 "\n", + SendQueue.size(), LastConfirmed, serial); + + BytesUnwritten = 0; + for (const auto& packet : SendQueue) { + BytesUnwritten += (Params.UseModernFrame ? sizeof(TTcpPacketHeader_v2) : sizeof(TTcpPacketHeader_v1)) + + packet.GetDataSize(); + } + + SwitchStuckPeriod(); + + LastHandshakeDone = TActivationContext::Now(); + + RamInQueue = nullptr; + GenerateTraffic(); + } + + void TInterconnectSessionTCP::Handle(TEvUpdateFromInputSession::TPtr& ev) { + if (ev->Sender == ReceiverId) { + TEvUpdateFromInputSession& msg = *ev->Get(); + + // update ping time + Ping = msg.Ping; + LWPROBE(UpdateFromInputSession, Proxy->PeerNodeId, Ping.MillisecondsFloat()); + + bool needConfirm = false; + + // update activity timer for dead peer checker + LastInputActivityTimestamp = TActivationContext::Now(); + + if (msg.NumDataBytes) { + UnconfirmedBytes += msg.NumDataBytes; + if (UnconfirmedBytes >= GetTotalInflightAmountOfData() / 4) { + needConfirm = true; + } else { + SetForcePacketTimestamp(Proxy->Common->Settings.ForceConfirmPeriod); + } + + // reset payload watchdog that controls close-on-idle behaviour + LastPayloadActivityTimestamp = TActivationContext::Now(); + CloseOnIdleWatchdog.Reset(); + } + + bool unblockedSomething = false; + LWPROBE_IF_TOO_LONG(SlowICDropConfirmed, Proxy->PeerNodeId, ms) { + unblockedSomething = DropConfirmed(msg.ConfirmedByInput); + } + + // generate more traffic if we have unblocked state now + if (unblockedSomething) { + LWPROBE(UnblockByDropConfirmed, Proxy->PeerNodeId, NHPTimer::GetSeconds(GetCycleCountFast() - ev->SendTime) * 1000.0); + GenerateTraffic(); + } + + // if we haven't generated any packets, then make a lone Flush packet without any data + if (needConfirm && Socket) { + ++ConfirmPacketsForcedBySize; + MakePacket(false); + } + + for (;;) { + switch (EUpdateState state = ReceiveContext->UpdateState) { + case EUpdateState::NONE: + case EUpdateState::CONFIRMING: + Y_FAIL("unexpected state"); + + case EUpdateState::INFLIGHT: + // this message we are processing was the only one in flight, so we can reset state to NONE here + if (ReceiveContext->UpdateState.compare_exchange_weak(state, EUpdateState::NONE)) { + return; + } + break; + + case EUpdateState::INFLIGHT_AND_PENDING: + // there is more messages pending from the input session actor, so we have to inform it to release + // that message + if (ReceiveContext->UpdateState.compare_exchange_weak(state, EUpdateState::CONFIRMING)) { + Send(ev->Sender, new TEvConfirmUpdate); + return; + } + break; + } + } + } + } + + void TInterconnectSessionTCP::HandleRam(TEvRam::TPtr& ev) { + if (ev->Get() == RamInQueue) { + LWPROBE(FinishRam, Proxy->PeerNodeId, NHPTimer::GetSeconds(GetCycleCountFast() - ev->SendTime) * 1000.0); + RamInQueue = nullptr; + GenerateTraffic(); + } + } + + void TInterconnectSessionTCP::GenerateTraffic() { + // generate ping request, if needed + IssuePingRequest(); + + if (RamInQueue && !RamInQueue->Batching) { + LWPROBE(SkipGenerateTraffic, Proxy->PeerNodeId, NHPTimer::GetSeconds(GetCycleCountFast() - RamStartedCycles) * 1000.0); + return; // we'll do it a bit later + } else { + RamInQueue = nullptr; + } + + LOG_DEBUG_IC_SESSION("ICS19", "GenerateTraffic"); + + // There is a tradeoff between fairness and efficiency. + // The less traffic is generated here, the less buffering is after fair scheduler, + // the more fair system is, the less latency is present. + // The more traffic is generated here, the less syscalls and actor-system overhead occurs, + // the less cpu is consumed. + static const ui64 generateLimit = 64 * 1024; + + const ui64 sizeBefore = TotalOutputQueueSize; + ui32 generatedPackets = 0; + ui64 generatedBytes = 0; + ui64 generateStarted = GetCycleCountFast(); + + // apply traffic changes + auto accountTraffic = [&] { ChannelScheduler->ForEach([](TEventOutputChannel& channel) { channel.AccountTraffic(); }); }; + + // first, we create as many data packets as we can generate under certain conditions; they include presence + // of events in channels queues and in flight fitting into requested limit; after we hit one of these conditions + // we exit cycle + while (Socket && NumEventsInReadyChannels && InflightDataAmount < GetTotalInflightAmountOfData() && !ReceiveContext->WriteBlockedByFullSendBuffer) { + if (generatedBytes >= generateLimit) { + // resume later but ensure that we have issued at least one packet + RamInQueue = new TEvRam(false); + Send(SelfId(), RamInQueue); + RamStartedCycles = GetCycleCountFast(); + LWPROBE(StartRam, Proxy->PeerNodeId); + break; + } + + try { + generatedBytes += MakePacket(true); + ++generatedPackets; + } catch (const TExSerializedEventTooLarge& ex) { + // terminate session if the event can't be serialized properly + accountTraffic(); + LOG_CRIT_IC("ICS31", "serialized event Type# 0x%08" PRIx32 " is too large", ex.Type); + return Terminate(TDisconnectReason::EventTooLarge()); + } + } + + if (Socket) { + WriteData(); + } + + LWPROBE(GenerateTraffic, Proxy->PeerNodeId, NHPTimer::GetSeconds(GetCycleCountFast() - generateStarted) * 1000.0, sizeBefore - TotalOutputQueueSize, generatedPackets, generatedBytes); + + accountTraffic(); + EqualizeCounter += ChannelScheduler->Equalize(); + } + + void TInterconnectSessionTCP::StartHandshake() { + LOG_INFO_IC_SESSION("ICS15", "start handshake"); + IActor::InvokeOtherActor(*Proxy, &TInterconnectProxyTCP::StartResumeHandshake, ReceiveContext->LockLastProcessedPacketSerial()); + } + + void TInterconnectSessionTCP::ReestablishConnectionWithHandshake(TDisconnectReason reason) { + ReestablishConnection({}, true, std::move(reason)); + } + + void TInterconnectSessionTCP::ReestablishConnection(TEvHandshakeDone::TPtr&& ev, bool startHandshakeOnSessionClose, + TDisconnectReason reason) { + if (Socket) { + LOG_INFO_IC_SESSION("ICS13", "reestablish connection"); + ShutdownSocket(std::move(reason)); // stop sending/receiving on socket + PendingHandshakeDoneEvent = std::move(ev); + StartHandshakeOnSessionClose = startHandshakeOnSessionClose; + if (!ReceiverId) { + ReestablishConnectionExecute(); + } + } + } + + void TInterconnectSessionTCP::OnDisconnect(TEvSocketDisconnect::TPtr& ev) { + if (ev->Sender == ReceiverId) { + const bool wasConnected(Socket); + LOG_INFO_IC_SESSION("ICS07", "socket disconnect %" PRIi64 " reason# %s", Socket ? i64(*Socket) : -1, ev->Get()->Reason.ToString().data()); + ReceiverId = TActorId(); // reset receiver actor id as we have no more receiver yet + if (wasConnected) { + // we were sucessfully connected and did not expect failure, so it arrived from the input side; we should + // restart handshake process, closing our part of socket first + ShutdownSocket(ev->Get()->Reason); + StartHandshake(); + } else { + ReestablishConnectionExecute(); + } + } + } + + void TInterconnectSessionTCP::ShutdownSocket(TDisconnectReason reason) { + if (Socket) { + if (const TString& s = reason.ToString()) { + Proxy->Metrics->IncDisconnectByReason(s); + } + + LOG_INFO_IC_SESSION("ICS25", "shutdown socket, reason# %s", reason.ToString().data()); + Proxy->UpdateErrorStateLog(TActivationContext::Now(), "close_socket", reason.ToString().data()); + Socket->Shutdown(SHUT_RDWR); + Socket.Reset(); + Proxy->Metrics->IncDisconnections(); + CloseOnIdleWatchdog.Disarm(); + LostConnectionWatchdog.Arm(SelfId()); + Proxy->Metrics->SetConnected(0); + LOG_INFO(*TlsActivationContext, NActorsServices::INTERCONNECT_STATUS, "[%u] disconnected", Proxy->PeerNodeId); + } + } + + void TInterconnectSessionTCP::ReestablishConnectionExecute() { + bool startHandshakeOnSessionClose = std::exchange(StartHandshakeOnSessionClose, false); + TEvHandshakeDone::TPtr ev = std::move(PendingHandshakeDoneEvent); + + if (startHandshakeOnSessionClose) { + StartHandshake(); + } else if (ev) { + SetNewConnection(ev); + } + } + + void TInterconnectSessionTCP::Handle(TEvPollerReady::TPtr& ev) { + LOG_DEBUG_IC_SESSION("ICS29", "HandleReadyWrite WriteBlockedByFullSendBuffer# %s", + ReceiveContext->WriteBlockedByFullSendBuffer ? "true" : "false"); + if (std::exchange(ReceiveContext->WriteBlockedByFullSendBuffer, false)) { + Proxy->Metrics->IncUsefulWriteWakeups(); + ui64 nowCycles = GetCycleCountFast(); + double blockedUs = NHPTimer::GetSeconds(nowCycles - WriteBlockedCycles) * 1000000.0; + LWPROBE(ReadyWrite, Proxy->PeerNodeId, NHPTimer::GetSeconds(nowCycles - ev->SendTime) * 1000.0, blockedUs / 1000.0); + WriteBlockedTotal += TDuration::MicroSeconds(blockedUs); + GenerateTraffic(); + } else if (!ev->Cookie) { + Proxy->Metrics->IncSpuriousWriteWakeups(); + } + if (Params.Encryption && ReceiveContext->ReadPending && !ev->Cookie) { + Send(ReceiverId, ev->Release().Release(), 0, 1); + } + } + + void TInterconnectSessionTCP::Handle(TEvPollerRegisterResult::TPtr ev) { + PollerToken = std::move(ev->Get()->PollerToken); + if (ReceiveContext->WriteBlockedByFullSendBuffer) { + if (Params.Encryption) { + auto *secure = static_cast<NInterconnect::TSecureSocket*>(Socket.Get()); + PollerToken->Request(secure->WantRead(), secure->WantWrite()); + } else { + PollerToken->Request(false, true); + } + } + } + + void TInterconnectSessionTCP::WriteData() { + ui64 written = 0; + + Y_VERIFY(Socket); // ensure that socket wasn't closed + + LWPROBE_IF_TOO_LONG(SlowICWriteData, Proxy->PeerNodeId, ms) { + constexpr ui32 iovLimit = 256; +#ifdef _linux_ + ui32 maxElementsInIOV = Min<ui32>(iovLimit, sysconf(_SC_IOV_MAX)); +#else + ui32 maxElementsInIOV = 64; +#endif + if (Params.Encryption) { + maxElementsInIOV = 1; + } + + // vector of write buffers with preallocated stack space + TStackVec<TConstIoVec, iovLimit> wbuffers; + + LOG_DEBUG_IC_SESSION("ICS30", "WriteData WriteBlockedByFullSendBuffer# %s SendQueue.size# %zu", + ReceiveContext->WriteBlockedByFullSendBuffer ? "true" : "false", SendQueue.size()); + + // update last confirmed packet number if it has changed + if (SendQueuePos != SendQueue.end()) { + SendQueuePos->UpdateConfirmIfPossible(ReceiveContext->GetLastProcessedPacketSerial()); + } + + while (SendQueuePos != SendQueue.end() && !ReceiveContext->WriteBlockedByFullSendBuffer) { + for (auto it = SendQueuePos; it != SendQueue.end() && wbuffers.size() < maxElementsInIOV; ++it) { + it->AppendToIoVector(wbuffers, maxElementsInIOV); + } + + const struct iovec* iovec = reinterpret_cast<const struct iovec*>(wbuffers.data()); + int iovcnt = wbuffers.size(); + + Y_VERIFY(iovcnt > 0); + Y_VERIFY(iovec->iov_len > 0); + + TString err; + ssize_t r = 0; + do { +#ifndef _win_ + r = iovcnt == 1 ? Socket->Send(iovec[0].iov_base, iovec[0].iov_len, &err) : Socket->WriteV(iovec, iovcnt); +#else + r = Socket->Send(iovec[0].iov_base, iovec[0].iov_len, &err); +#endif + Proxy->Metrics->IncSendSyscalls(); + } while (r == -EINTR); + + LOG_DEBUG_IC_SESSION("ICS16", "written# %zd iovcnt# %d err# %s", r, iovcnt, err.data()); + + wbuffers.clear(); + + if (r > 0) { + Y_VERIFY(static_cast<size_t>(r) <= BytesUnwritten); + BytesUnwritten -= r; + written += r; + ui64 packets = 0; + + // advance SendQueuePos to eat all processed items + for (size_t amount = r; amount && SendQueuePos->DropBufs(amount); ++SendQueuePos) { + if (!SendQueuePos->IsEmpty()) { + LastSentSerial = Max(LastSentSerial, SendQueuePos->GetSerial()); + } + ++PacketsWrittenToSocket; + ++packets; + LWTRACK(PacketWrittenToSocket, SendQueuePos->Orbit, Proxy->PeerNodeId, PacketsWrittenToSocket, SendQueuePos->TriedWriting, SendQueuePos->GetDataSize(), BytesUnwritten, GetWriteBlockedTotal(), (SOCKET)*Socket); + } + + LWPROBE(WriteToSocket, Proxy->PeerNodeId, r, packets, PacketsWrittenToSocket, BytesUnwritten, GetWriteBlockedTotal(), (SOCKET)*Socket); + } else if (-r != EAGAIN && -r != EWOULDBLOCK) { + const TString message = r == 0 ? "connection closed by peer" + : err ? err + : Sprintf("socket: %s", strerror(-r)); + LOG_NOTICE_NET(Proxy->PeerNodeId, "%s", message.data()); + if (written) { + Proxy->Metrics->AddTotalBytesWritten(written); + } + return ReestablishConnectionWithHandshake(r == 0 ? TDisconnectReason::EndOfStream() : TDisconnectReason::FromErrno(-r)); + } else { + // we have to do some hack for secure socket -- mark the packet as 'tried writing' + if (Params.Encryption) { + Y_VERIFY(SendQueuePos != SendQueue.end()); + SendQueuePos->MarkTriedWriting(); // do not try to replace buffer under SSL + } + + // we have received EAGAIN error code, this means that we can't issue more data until we have received + // TEvPollerReadyWrite event from poller; set up flag meaning this and wait for that event + Y_VERIFY(!ReceiveContext->WriteBlockedByFullSendBuffer); + ReceiveContext->WriteBlockedByFullSendBuffer = true; + WriteBlockedCycles = GetCycleCountFast(); + LWPROBE(BlockedWrite, Proxy->PeerNodeId, SendQueue.size(), written); + LOG_DEBUG_IC_SESSION("ICS18", "hit send buffer limit"); + + if (PollerToken) { + if (Params.Encryption) { + auto *secure = static_cast<NInterconnect::TSecureSocket*>(Socket.Get()); + PollerToken->Request(secure->WantRead(), secure->WantWrite()); + } else { + PollerToken->Request(false, true); + } + } + } + } + } + if (written) { + Proxy->Metrics->AddTotalBytesWritten(written); + } + } + + void TInterconnectSessionTCP::SetForcePacketTimestamp(TDuration period) { + if (period != TDuration::Max()) { + const TInstant when = TActivationContext::Now() + period; + if (when < ForcePacketTimestamp) { + ForcePacketTimestamp = when; + ScheduleFlush(); + } + } + } + + void TInterconnectSessionTCP::ScheduleFlush() { + if (FlushSchedule.empty() || ForcePacketTimestamp < FlushSchedule.top()) { + Schedule(ForcePacketTimestamp - TActivationContext::Now(), new TEvFlush); + FlushSchedule.push(ForcePacketTimestamp); + MaxFlushSchedule = Max(MaxFlushSchedule, FlushSchedule.size()); + ++FlushEventsScheduled; + } + } + + void TInterconnectSessionTCP::HandleFlush() { + const TInstant now = TActivationContext::Now(); + while (FlushSchedule && now >= FlushSchedule.top()) { + FlushSchedule.pop(); + } + IssuePingRequest(); + if (Socket) { + if (now >= ForcePacketTimestamp) { + ++ConfirmPacketsForcedByTimeout; + ++FlushEventsProcessed; + MakePacket(false); // just generate confirmation packet if we have preconditions for this + } else if (ForcePacketTimestamp != TInstant::Max()) { + ScheduleFlush(); + } + } + } + + void TInterconnectSessionTCP::ResetFlushLogic() { + ForcePacketTimestamp = TInstant::Max(); + UnconfirmedBytes = 0; + const TDuration ping = Proxy->Common->Settings.PingPeriod; + if (ping != TDuration::Zero() && !NumEventsInReadyChannels) { + SetForcePacketTimestamp(ping); + } + } + + void TInterconnectSessionTCP::TrimSendQueueCache() { + static constexpr size_t maxItems = 32; + static constexpr size_t trimThreshold = maxItems * 2; + if (SendQueueCache.size() >= trimThreshold) { + auto it = SendQueueCache.end(); + for (size_t n = SendQueueCache.size() - maxItems; n; --n) { + --it; + } + + auto ev = std::make_unique<TEvFreeItems>(); + ev->Items.splice(ev->Items.end(), SendQueueCache, it, SendQueueCache.end()); + ev->NumBytes = ev->Items.size() * sizeof(TTcpPacketOutTask); + if (ev->GetInLineForDestruction(Proxy->Common)) { + Send(Proxy->Common->DestructorId, ev.release()); + } + } + } + + ui64 TInterconnectSessionTCP::MakePacket(bool data, TMaybe<ui64> pingMask) { + Y_VERIFY(Socket); + + TSendQueue::iterator packet; + if (SendQueueCache) { + // we have entries in cache, take one and move it to the end of SendQueue + packet = SendQueueCache.begin(); + SendQueue.splice(SendQueue.end(), SendQueueCache, packet); + packet->Reuse(); // reset packet to initial state + } else { + // we have to allocate new packet, so just do it + LWPROBE_IF_TOO_LONG(SlowICAllocPacketBuffer, Proxy->PeerNodeId, ms) { + packet = SendQueue.emplace(SendQueue.end(), Params); + } + } + + // update send queue position + if (SendQueuePos == SendQueue.end()) { + SendQueuePos = packet; // start sending this packet if we are not sending anything for now + } + + ui64 serial = 0; + + if (data) { + // generate serial for this data packet + serial = ++OutputCounter; + + // fill the data packet + Y_VERIFY(NumEventsInReadyChannels); + LWPROBE_IF_TOO_LONG(SlowICFillSendingBuffer, Proxy->PeerNodeId, ms) { + FillSendingBuffer(*packet, serial); + } + Y_VERIFY(!packet->IsEmpty()); + + InflightDataAmount += packet->GetDataSize(); + Proxy->Metrics->AddInflightDataAmount(packet->GetDataSize()); + if (InflightDataAmount > GetTotalInflightAmountOfData()) { + Proxy->Metrics->IncInflyLimitReach(); + } + + if (AtomicGet(ReceiveContext->ControlPacketId) == 0) { + AtomicSet(ReceiveContext->ControlPacketSendTimer, GetCycleCountFast()); + AtomicSet(ReceiveContext->ControlPacketId, OutputCounter); + } + + // update payload activity timer + LastPayloadActivityTimestamp = TActivationContext::Now(); + } else if (pingMask) { + serial = *pingMask; + + // make this packet a priority one + if (SendQueuePos != packet) { + Y_VERIFY(SendQueuePos != SendQueue.end()); + if (SendQueuePos->IsAtBegin()) { + // insert this packet just before the next being sent and step back + SendQueue.splice(SendQueuePos, SendQueue, packet); + --SendQueuePos; + Y_VERIFY(SendQueuePos == packet); + } else { + // current packet is already being sent, so move new packet just after it + SendQueue.splice(std::next(SendQueuePos), SendQueue, packet); + } + } + } + + const ui64 lastInputSerial = ReceiveContext->GetLastProcessedPacketSerial(); + packet->SetMetadata(serial, lastInputSerial); + packet->Sign(); + + // count number of bytes pending for write + ui64 packetSize = (Params.UseModernFrame ? sizeof(TTcpPacketHeader_v2) : sizeof(TTcpPacketHeader_v1)) + packet->GetDataSize(); + BytesUnwritten += packetSize; + + LOG_DEBUG_IC_SESSION("ICS22", "outgoing packet Serial# %" PRIu64 " Confirm# %" PRIu64 " DataSize# %zu" + " InflightDataAmount# %" PRIu64 " BytesUnwritten# %" PRIu64, serial, lastInputSerial, packet->GetDataSize(), + InflightDataAmount, BytesUnwritten); + + // reset forced packet sending timestamp as we have confirmed all received data + ResetFlushLogic(); + + ++PacketsGenerated; + LWTRACK(PacketGenerated, packet->Orbit, Proxy->PeerNodeId, BytesUnwritten, InflightDataAmount, PacketsGenerated, packetSize); + + if (!data) { + WriteData(); + } + + return packetSize; + } + + bool TInterconnectSessionTCP::DropConfirmed(ui64 confirm) { + LOG_DEBUG_IC_SESSION("ICS23", "confirm count: %" PRIu64, confirm); + + Y_VERIFY(LastConfirmed <= confirm && confirm <= LastSentSerial && LastSentSerial <= OutputCounter, + "%s confirm# %" PRIu64 " LastConfirmed# %" PRIu64 " OutputCounter# %" PRIu64 " LastSentSerial# %" PRIu64, + LogPrefix.data(), confirm, LastConfirmed, OutputCounter, LastSentSerial); + LastConfirmed = confirm; + + ui64 droppedDataAmount = 0; + ui32 numDropped = 0; + + // drop confirmed packets; this also includes any auxiliary packets as their serial is set to zero, effectively + // making Serial <= confirm true + TSendQueue::iterator it; + ui64 lastDroppedSerial = 0; + for (it = SendQueue.begin(); it != SendQueuePos && it->Confirmed(confirm); ++it) { + if (!it->IsEmpty()) { + lastDroppedSerial = it->GetSerial(); + } + droppedDataAmount += it->GetDataSize(); + ++numDropped; + } + SendQueueCache.splice(SendQueueCache.begin(), SendQueue, SendQueue.begin(), it); + TrimSendQueueCache(); + ChannelScheduler->ForEach([&](TEventOutputChannel& channel) { + channel.DropConfirmed(lastDroppedSerial); + }); + + const ui64 current = InflightDataAmount; + const ui64 limit = GetTotalInflightAmountOfData(); + const bool unblockedSomething = current >= limit && current < limit + droppedDataAmount; + + PacketsConfirmed += numDropped; + InflightDataAmount -= droppedDataAmount; + Proxy->Metrics->SubInflightDataAmount(droppedDataAmount); + LWPROBE(DropConfirmed, Proxy->PeerNodeId, droppedDataAmount, InflightDataAmount); + + LOG_DEBUG_IC_SESSION("ICS24", "exit InflightDataAmount: %" PRIu64 " bytes droppedDataAmount: %" PRIu64 " bytes" + " dropped %" PRIu32 " packets", InflightDataAmount, droppedDataAmount, numDropped); + + Pool->Trim(); // send any unsent free requests + + return unblockedSomething; + } + + void TInterconnectSessionTCP::FillSendingBuffer(TTcpPacketOutTask& task, ui64 serial) { + ui32 bytesGenerated = 0; + + Y_VERIFY(NumEventsInReadyChannels); + while (NumEventsInReadyChannels) { + TEventOutputChannel *channel = ChannelScheduler->PickChannelWithLeastConsumedWeight(); + Y_VERIFY_DEBUG(!channel->IsEmpty()); + + // generate some data within this channel + const ui64 netBefore = channel->GetBufferedAmountOfData(); + ui64 gross = 0; + const bool eventDone = channel->FeedBuf(task, serial, &gross); + channel->UnaccountedTraffic += gross; + const ui64 netAfter = channel->GetBufferedAmountOfData(); + Y_VERIFY_DEBUG(netAfter <= netBefore); // net amount should shrink + const ui64 net = netBefore - netAfter; // number of net bytes serialized + + // adjust metrics for local and global queue size + TotalOutputQueueSize -= net; + Proxy->Metrics->SubOutputBuffersTotalSize(net); + bytesGenerated += gross; + Y_VERIFY_DEBUG(!!net == !!gross && gross >= net, "net# %" PRIu64 " gross# %" PRIu64, net, gross); + + // return it back to queue or delete, depending on whether this channel is still working or not + ChannelScheduler->FinishPick(gross, EqualizeCounter); + + // update some stats if the packet was fully serialized + if (eventDone) { + ++MessagesWrittenToBuffer; + + Y_VERIFY(NumEventsInReadyChannels); + --NumEventsInReadyChannels; + + if (!NumEventsInReadyChannels) { + SetOutputStuckFlag(false); + } + } + + if (!gross) { // no progress -- almost full packet buffer + break; + } + } + + LWTRACK(FillSendingBuffer, task.Orbit, Proxy->PeerNodeId, bytesGenerated, NumEventsInReadyChannels, WriteBlockedTotal); + Y_VERIFY(bytesGenerated); // ensure we are not stalled in serialization + } + + ui32 TInterconnectSessionTCP::CalculateQueueUtilization() { + SwitchStuckPeriod(); + ui64 sumBusy = 0, sumPeriod = 0; + for (auto iter = OutputQueueUtilization.begin(); iter != OutputQueueUtilization.end() - 1; ++iter) { + sumBusy += iter->first; + sumPeriod += iter->second; + } + return sumBusy * 1000000 / sumPeriod; + } + + void TInterconnectSessionTCP::SendUpdateToWhiteboard(bool connected) { + const ui32 utilization = Socket ? CalculateQueueUtilization() : 0; + + if (const auto& callback = Proxy->Common->UpdateWhiteboard) { + enum class EFlag { + GREEN, + YELLOW, + ORANGE, + RED, + }; + EFlag flagState = EFlag::RED; + + if (Socket) { + flagState = EFlag::GREEN; + + do { + auto lastInputDelay = TActivationContext::Now() - LastInputActivityTimestamp; + if (lastInputDelay * 4 >= GetDeadPeerTimeout() * 3) { + flagState = EFlag::ORANGE; + break; + } else if (lastInputDelay * 2 >= GetDeadPeerTimeout()) { + flagState = EFlag::YELLOW; + } + + // check utilization + if (utilization > 875000) { // 7/8 + flagState = EFlag::ORANGE; + break; + } else if (utilization > 500000) { // 1/2 + flagState = EFlag::YELLOW; + } + } while (false); + } + + callback(Proxy->Metrics->GetHumanFriendlyPeerHostName(), + connected, + flagState == EFlag::GREEN, + flagState == EFlag::YELLOW, + flagState == EFlag::ORANGE, + flagState == EFlag::RED, + TlsActivationContext->ExecutorThread.ActorSystem); + } + + if (connected) { + Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup); + } + } + + void TInterconnectSessionTCP::SetOutputStuckFlag(bool state) { + if (OutputStuckFlag == state) + return; + + if (OutputQueueUtilization.Size() == 0) + return; + + auto& lastpair = OutputQueueUtilization.Last(); + if (state) + lastpair.first -= GetCycleCountFast(); + else + lastpair.first += GetCycleCountFast(); + + OutputStuckFlag = state; + } + + void TInterconnectSessionTCP::SwitchStuckPeriod() { + auto now = GetCycleCountFast(); + if (OutputQueueUtilization.Size() != 0) { + auto& lastpair = OutputQueueUtilization.Last(); + lastpair.second = now - lastpair.second; + if (OutputStuckFlag) + lastpair.first += now; + } + + OutputQueueUtilization.Push(std::pair<ui64, ui64>(0, now)); + if (OutputStuckFlag) + OutputQueueUtilization.Last().first -= now; + } + + TDuration TInterconnectSessionTCP::GetDeadPeerTimeout() const { + return Coalesce(Proxy->Common->Settings.DeadPeer, DEFAULT_DEADPEER_TIMEOUT); + } + + TDuration TInterconnectSessionTCP::GetCloseOnIdleTimeout() const { + return Proxy->Common->Settings.CloseOnIdle; + } + + TDuration TInterconnectSessionTCP::GetLostConnectionTimeout() const { + return Coalesce(Proxy->Common->Settings.LostConnection, DEFAULT_LOST_CONNECTION_TIMEOUT); + } + + ui32 TInterconnectSessionTCP::GetTotalInflightAmountOfData() const { + return Coalesce(Proxy->Common->Settings.TotalInflightAmountOfData, DEFAULT_TOTAL_INFLIGHT_DATA); + } + + ui64 TInterconnectSessionTCP::GetMaxCyclesPerEvent() const { + return DurationToCycles(TDuration::MicroSeconds(50)); + } + + void TInterconnectSessionTCP::IssuePingRequest() { + const TInstant now = TActivationContext::Now(); + if (now >= LastPingTimestamp + PingPeriodicity) { + LOG_DEBUG_IC_SESSION("ICS22", "Issuing ping request"); + if (Socket) { + MakePacket(false, GetCycleCountFast() | TTcpPacketBuf::PingRequestMask); + } + if (Socket) { + MakePacket(false, TInstant::Now().MicroSeconds() | TTcpPacketBuf::ClockMask); + } + LastPingTimestamp = now; + } + } + + void TInterconnectSessionTCP::Handle(TEvProcessPingRequest::TPtr ev) { + if (Socket) { + MakePacket(false, ev->Get()->Payload | TTcpPacketBuf::PingResponseMask); + } + } + + void TInterconnectSessionTCP::GenerateHttpInfo(TStringStream& str) { + HTML(str) { + DIV_CLASS("panel panel-info") { + DIV_CLASS("panel-heading") { + str << "Session"; + } + DIV_CLASS("panel-body") { + TABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() { + str << "Sensor"; + } + TABLEH() { + str << "Value"; + } + } + } + TABLEBODY() { + TABLER() { + TABLED() { + str << "Encryption"; + } + TABLED() { + str << (Params.Encryption ? "<font color=green>Enabled</font>" : "<font color=red>Disabled</font>"); + } + } + if (auto *x = dynamic_cast<NInterconnect::TSecureSocket*>(Socket.Get())) { + TABLER() { + TABLED() { + str << "Cipher name"; + } + TABLED() { + str << x->GetCipherName(); + } + } + TABLER() { + TABLED() { + str << "Cipher bits"; + } + TABLED() { + str << x->GetCipherBits(); + } + } + TABLER() { + TABLED() { + str << "Protocol"; + } + TABLED() { + str << x->GetProtocolName(); + } + } + TABLER() { + TABLED() { + str << "Peer CN"; + } + TABLED() { + str << x->GetPeerCommonName(); + } + } + } + TABLER() { + TABLED() { str << "AuthOnly CN"; } + TABLED() { str << Params.AuthCN; } + } + TABLER() { + TABLED() { + str << "Local scope id"; + } + TABLED() { + str << ScopeIdToString(Proxy->Common->LocalScopeId); + } + } + TABLER() { + TABLED() { + str << "Peer scope id"; + } + TABLED() { + str << ScopeIdToString(Params.PeerScopeId); + } + } + TABLER() { + TABLED() { + str << "This page generated at"; + } + TABLED() { + str << TActivationContext::Now() << " / " << Now(); + } + } + TABLER() { + TABLED() { + str << "SelfID"; + } + TABLED() { + str << SelfId().ToString(); + } + } + TABLER() { + TABLED() { str << "Frame version/Checksum"; } + TABLED() { str << (!Params.UseModernFrame ? "v1/crc32c" : Params.Encryption ? "v2/none" : "v2/crc32c"); } + } +#define MON_VAR(NAME) \ + TABLER() { \ + TABLED() { \ + str << #NAME; \ + } \ + TABLED() { \ + str << NAME; \ + } \ + } + + MON_VAR(Created) + MON_VAR(NewConnectionSet) + MON_VAR(ReceiverId) + MON_VAR(MessagesGot) + MON_VAR(MessagesWrittenToBuffer) + MON_VAR(PacketsGenerated) + MON_VAR(PacketsWrittenToSocket) + MON_VAR(PacketsConfirmed) + MON_VAR(AtomicGet(ReceiveContext->PacketsReadFromSocket)) + MON_VAR(ConfirmPacketsForcedBySize) + MON_VAR(ConfirmPacketsForcedByTimeout) + + TABLER() { + TABLED() { + str << "Virtual self ID"; + } + TABLED() { + str << Proxy->SessionVirtualId.ToString(); + } + } + TABLER() { + TABLED() { + str << "Virtual peer ID"; + } + TABLED() { + str << Proxy->RemoteSessionVirtualId.ToString(); + } + } + TABLER() { + TABLED() { + str << "Socket"; + } + TABLED() { + str << (Socket ? i64(*Socket) : -1); + } + } + + ui32 unsentQueueSize = Socket ? Socket->GetUnsentQueueSize() : 0; + + MON_VAR(OutputStuckFlag) + MON_VAR(SendQueue.size()) + MON_VAR(SendQueueCache.size()) + MON_VAR(NumEventsInReadyChannels) + MON_VAR(TotalOutputQueueSize) + MON_VAR(BytesUnwritten) + MON_VAR(InflightDataAmount) + MON_VAR(unsentQueueSize) + MON_VAR(SendBufferSize) + MON_VAR(LastInputActivityTimestamp) + MON_VAR(LastPayloadActivityTimestamp) + MON_VAR(LastHandshakeDone) + MON_VAR(OutputCounter) + MON_VAR(LastSentSerial) + MON_VAR(ReceiveContext->GetLastProcessedPacketSerial()) + MON_VAR(LastConfirmed) + MON_VAR(FlushSchedule.size()) + MON_VAR(MaxFlushSchedule) + MON_VAR(FlushEventsScheduled) + MON_VAR(FlushEventsProcessed) + + TString clockSkew; + i64 x = GetClockSkew(); + if (x < 0) { + clockSkew = Sprintf("-%s", TDuration::MicroSeconds(-x).ToString().data()); + } else { + clockSkew = Sprintf("+%s", TDuration::MicroSeconds(x).ToString().data()); + } + + MON_VAR(LastPingTimestamp) + MON_VAR(GetPingRTT()) + MON_VAR(clockSkew) + + MON_VAR(GetDeadPeerTimeout()) + MON_VAR(GetTotalInflightAmountOfData()) + MON_VAR(GetCloseOnIdleTimeout()) + MON_VAR(Subscribers.size()) + } + } + } + } + } + } + + void CreateSessionKillingActor(TInterconnectProxyCommon::TPtr common) { + TlsActivationContext->ExecutorThread.ActorSystem->Register(new TInterconnectSessionKiller(common)); + } +} diff --git a/library/cpp/actors/interconnect/interconnect_tcp_session.h b/library/cpp/actors/interconnect/interconnect_tcp_session.h new file mode 100644 index 0000000000..7fc00dbcc5 --- /dev/null +++ b/library/cpp/actors/interconnect/interconnect_tcp_session.h @@ -0,0 +1,565 @@ +#pragma once + +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/core/event_pb.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/helpers/mon_histogram_helper.h> +#include <library/cpp/actors/protos/services_common.pb.h> +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/util/rope.h> +#include <library/cpp/actors/util/funnel_queue.h> +#include <library/cpp/actors/util/recentwnd.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/actors/core/actor_bootstrapped.h> + +#include <util/generic/queue.h> +#include <util/generic/deque.h> +#include <util/datetime/cputimer.h> + +#include "interconnect_impl.h" +#include "poller_tcp.h" +#include "poller_actor.h" +#include "interconnect_channel.h" +#include "logging.h" +#include "watchdog_timer.h" +#include "event_holder_pool.h" +#include "channel_scheduler.h" + +#include <unordered_set> +#include <unordered_map> + +namespace NActors { + class TSlowPathChecker { + using TTraceCallback = std::function<void(double)>; + TTraceCallback Callback; + const NHPTimer::STime Start; + + public: + TSlowPathChecker(TTraceCallback&& callback) + : Callback(std::move(callback)) + , Start(GetCycleCountFast()) + { + } + + ~TSlowPathChecker() { + const NHPTimer::STime end = GetCycleCountFast(); + const NHPTimer::STime elapsed = end - Start; + if (elapsed > 1000000) { + Callback(NHPTimer::GetSeconds(elapsed) * 1000); + } + } + + operator bool() const { + return false; + } + }; + +#define LWPROBE_IF_TOO_LONG(...) \ + if (auto __x = TSlowPathChecker{[&](double ms) { LWPROBE(__VA_ARGS__); }}) \ + ; \ + else + + class TTimeLimit { + public: + TTimeLimit(ui64 limitInCycles) + : UpperLimit(limitInCycles == 0 ? 0 : GetCycleCountFast() + limitInCycles) + { + } + + TTimeLimit(ui64 startTS, ui64 limitInCycles) + : UpperLimit(limitInCycles == 0 ? 0 : startTS + limitInCycles) + { + } + + bool CheckExceeded() { + return UpperLimit != 0 && GetCycleCountFast() > UpperLimit; + } + + const ui64 UpperLimit; + }; + + static constexpr TDuration DEFAULT_DEADPEER_TIMEOUT = TDuration::Seconds(10); + static constexpr TDuration DEFAULT_LOST_CONNECTION_TIMEOUT = TDuration::Seconds(10); + static constexpr ui32 DEFAULT_MAX_INFLIGHT_DATA = 10240 * 1024; + static constexpr ui32 DEFAULT_TOTAL_INFLIGHT_DATA = 4 * 10240 * 1024; + + class TInterconnectProxyTCP; + + enum class EUpdateState : ui8 { + NONE, // no updates generated by input session yet + INFLIGHT, // one update is inflight, and no more pending + INFLIGHT_AND_PENDING, // one update is inflight, and one is pending + CONFIRMING, // confirmation inflight + }; + + struct TReceiveContext: public TAtomicRefCount<TReceiveContext> { + /* All invokations to these fields should be thread-safe */ + + ui64 ControlPacketSendTimer = 0; + ui64 ControlPacketId = 0; + + // number of packets received by input session + TAtomic PacketsReadFromSocket = 0; + TAtomic DataPacketsReadFromSocket = 0; + + // last processed packet by input session + std::atomic_uint64_t LastProcessedPacketSerial = 0; + static constexpr uint64_t LastProcessedPacketSerialLockBit = uint64_t(1) << 63; + + // for hardened checks + TAtomic NumInputSessions = 0; + + NHPTimer::STime StartTime; + + std::atomic<ui64> PingRTT_us = 0; + std::atomic<i64> ClockSkew_us = 0; + + std::atomic<EUpdateState> UpdateState; + static_assert(std::atomic<EUpdateState>::is_always_lock_free); + + bool WriteBlockedByFullSendBuffer = false; + bool ReadPending = false; + + std::array<TRope, 16> ChannelArray; + std::unordered_map<ui16, TRope> ChannelMap; + + TReceiveContext() { + GetTimeFast(&StartTime); + } + + // returns false if sessions needs to be terminated and packet not to be processed + bool AdvanceLastProcessedPacketSerial() { + for (;;) { + uint64_t value = LastProcessedPacketSerial.load(); + if (value & LastProcessedPacketSerialLockBit) { + return false; + } + if (LastProcessedPacketSerial.compare_exchange_weak(value, value + 1)) { + return true; + } + } + } + + ui64 LockLastProcessedPacketSerial() { + for (;;) { + uint64_t value = LastProcessedPacketSerial.load(); + if (value & LastProcessedPacketSerialLockBit) { + return value & ~LastProcessedPacketSerialLockBit; + } + if (LastProcessedPacketSerial.compare_exchange_strong(value, value | LastProcessedPacketSerialLockBit)) { + return value; + } + } + } + + void UnlockLastProcessedPacketSerial() { + LastProcessedPacketSerial = LastProcessedPacketSerial.load() & ~LastProcessedPacketSerialLockBit; + } + + ui64 GetLastProcessedPacketSerial() { + return LastProcessedPacketSerial.load() & ~LastProcessedPacketSerialLockBit; + } + }; + + class TInputSessionTCP + : public TActorBootstrapped<TInputSessionTCP> + , public TInterconnectLoggingBase + { + enum { + EvCheckDeadPeer = EventSpaceBegin(TEvents::ES_PRIVATE), + EvResumeReceiveData, + }; + + struct TEvCheckDeadPeer : TEventLocal<TEvCheckDeadPeer, EvCheckDeadPeer> {}; + struct TEvResumeReceiveData : TEventLocal<TEvResumeReceiveData, EvResumeReceiveData> {}; + + public: + static constexpr EActivityType ActorActivityType() { + return INTERCONNECT_SESSION_TCP; + } + + TInputSessionTCP(const TActorId& sessionId, + TIntrusivePtr<NInterconnect::TStreamSocket> socket, + TIntrusivePtr<TReceiveContext> context, + TInterconnectProxyCommon::TPtr common, + std::shared_ptr<IInterconnectMetrics> metrics, + ui32 nodeId, + ui64 lastConfirmed, + TDuration deadPeerTimeout, + TSessionParams params); + + private: + friend class TActorBootstrapped<TInputSessionTCP>; + + void Bootstrap(); + + STRICT_STFUNC(WorkingState, + cFunc(TEvents::TSystem::PoisonPill, PassAway) + hFunc(TEvPollerReady, Handle) + hFunc(TEvPollerRegisterResult, Handle) + cFunc(EvResumeReceiveData, HandleResumeReceiveData) + cFunc(TEvInterconnect::TEvCloseInputSession::EventType, CloseInputSession) + cFunc(EvCheckDeadPeer, HandleCheckDeadPeer) + cFunc(TEvConfirmUpdate::EventType, HandleConfirmUpdate) + ) + + private: + TRope IncomingData; + + const TActorId SessionId; + TIntrusivePtr<NInterconnect::TStreamSocket> Socket; + TPollerToken::TPtr PollerToken; + TIntrusivePtr<TReceiveContext> Context; + TInterconnectProxyCommon::TPtr Common; + const ui32 NodeId; + const TSessionParams Params; + + // header we are currently processing (parsed from the stream) + union { + TTcpPacketHeader_v1 v1; + TTcpPacketHeader_v2 v2; + char Data[1]; + } Header; + ui64 HeaderConfirm, HeaderSerial; + + size_t PayloadSize; + ui32 ChecksumExpected, Checksum; + bool IgnorePayload; + TRope Payload; + enum class EState { + HEADER, + PAYLOAD, + }; + EState State = EState::HEADER; + + THolder<TEvUpdateFromInputSession> UpdateFromInputSession; + + ui64 ConfirmedByInput; + + std::shared_ptr<IInterconnectMetrics> Metrics; + + bool CloseInputSessionRequested = false; + + void CloseInputSession(); + + void Handle(TEvPollerReady::TPtr ev); + void Handle(TEvPollerRegisterResult::TPtr ev); + void HandleResumeReceiveData(); + void HandleConfirmUpdate(); + void ReceiveData(); + void ProcessHeader(size_t headerLen); + void ProcessPayload(ui64& numDataBytes); + void ProcessEvent(TRope& data, TEventDescr& descr); + bool ReadMore(); + + void ReestablishConnection(TDisconnectReason reason); + void DestroySession(TDisconnectReason reason); + + TDeque<TIntrusivePtr<TRopeAlignedBuffer>> Buffers; + + static constexpr size_t NumPreallocatedBuffers = 16; + void PreallocateBuffers(); + + inline ui64 GetMaxCyclesPerEvent() const { + return DurationToCycles(TDuration::MicroSeconds(500)); + } + + const TDuration DeadPeerTimeout; + TInstant LastReceiveTimestamp; + void HandleCheckDeadPeer(); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // pinger logic + + bool NewPingProtocol = false; + TDeque<TDuration> PingQ; // last N ping samples + TDeque<i64> SkewQ; // last N calculated clock skew samples + + void HandlePingResponse(TDuration passed); + void HandleClock(TInstant clock); + }; + + class TInterconnectSessionTCP + : public TActor<TInterconnectSessionTCP> + , public TInterconnectLoggingBase + { + enum { + EvCheckCloseOnIdle = EventSpaceBegin(TEvents::ES_PRIVATE), + EvCheckLostConnection, + EvRam, + EvTerminate, + EvFreeItems, + }; + + struct TEvCheckCloseOnIdle : TEventLocal<TEvCheckCloseOnIdle, EvCheckCloseOnIdle> {}; + struct TEvCheckLostConnection : TEventLocal<TEvCheckLostConnection, EvCheckLostConnection> {}; + + struct TEvRam : TEventLocal<TEvRam, EvRam> { + const bool Batching; + TEvRam(bool batching) : Batching(batching) {} + }; + + struct TEvTerminate : TEventLocal<TEvTerminate, EvTerminate> { + TDisconnectReason Reason; + + TEvTerminate(TDisconnectReason reason) + : Reason(std::move(reason)) + {} + }; + + const TInstant Created; + TInstant NewConnectionSet; + ui64 MessagesGot = 0; + ui64 MessagesWrittenToBuffer = 0; + ui64 PacketsGenerated = 0; + ui64 PacketsWrittenToSocket = 0; + ui64 PacketsConfirmed = 0; + + public: + static constexpr EActivityType ActorActivityType() { + return INTERCONNECT_SESSION_TCP; + } + + TInterconnectSessionTCP(TInterconnectProxyTCP* const proxy, TSessionParams params); + ~TInterconnectSessionTCP(); + + void Init(); + void CloseInputSession(); + + static TEvTerminate* NewEvTerminate(TDisconnectReason reason) { + return new TEvTerminate(std::move(reason)); + } + + TDuration GetPingRTT() const { + return TDuration::MicroSeconds(ReceiveContext->PingRTT_us); + } + + i64 GetClockSkew() const { + return ReceiveContext->ClockSkew_us; + } + + private: + friend class TInterconnectProxyTCP; + + void Handle(TEvTerminate::TPtr& ev); + void HandlePoison(); + void Terminate(TDisconnectReason reason); + void PassAway() override; + + void Forward(STATEFN_SIG); + void Subscribe(STATEFN_SIG); + void Unsubscribe(STATEFN_SIG); + + STRICT_STFUNC(StateFunc, + fFunc(TEvInterconnect::EvForward, Forward) + cFunc(TEvents::TEvPoisonPill::EventType, HandlePoison) + fFunc(TEvInterconnect::TEvConnectNode::EventType, Subscribe) + fFunc(TEvents::TEvSubscribe::EventType, Subscribe) + fFunc(TEvents::TEvUnsubscribe::EventType, Unsubscribe) + cFunc(TEvFlush::EventType, HandleFlush) + hFunc(TEvPollerReady, Handle) + hFunc(TEvPollerRegisterResult, Handle) + hFunc(TEvUpdateFromInputSession, Handle) + hFunc(TEvRam, HandleRam) + hFunc(TEvCheckCloseOnIdle, CloseOnIdleWatchdog) + hFunc(TEvCheckLostConnection, LostConnectionWatchdog) + cFunc(TEvents::TSystem::Wakeup, SendUpdateToWhiteboard) + hFunc(TEvSocketDisconnect, OnDisconnect) + hFunc(TEvTerminate, Handle) + hFunc(TEvProcessPingRequest, Handle) + ) + + void Handle(TEvUpdateFromInputSession::TPtr& ev); + + void OnDisconnect(TEvSocketDisconnect::TPtr& ev); + + THolder<TEvHandshakeAck> ProcessHandshakeRequest(TEvHandshakeAsk::TPtr& ev); + void SetNewConnection(TEvHandshakeDone::TPtr& ev); + + TEvRam* RamInQueue = nullptr; + ui64 RamStartedCycles = 0; + void HandleRam(TEvRam::TPtr& ev); + void GenerateTraffic(); + + void SendUpdateToWhiteboard(bool connected = true); + ui32 CalculateQueueUtilization(); + + void Handle(TEvPollerReady::TPtr& ev); + void Handle(TEvPollerRegisterResult::TPtr ev); + void WriteData(); + + ui64 MakePacket(bool data, TMaybe<ui64> pingMask = {}); + void FillSendingBuffer(TTcpPacketOutTask& packet, ui64 serial); + bool DropConfirmed(ui64 confirm); + void ShutdownSocket(TDisconnectReason reason); + + void StartHandshake(); + void ReestablishConnection(TEvHandshakeDone::TPtr&& ev, bool startHandshakeOnSessionClose, + TDisconnectReason reason); + void ReestablishConnectionWithHandshake(TDisconnectReason reason); + void ReestablishConnectionExecute(); + + TInterconnectProxyTCP* const Proxy; + + // various connection settings access + TDuration GetDeadPeerTimeout() const; + TDuration GetCloseOnIdleTimeout() const; + TDuration GetLostConnectionTimeout() const; + ui32 GetTotalInflightAmountOfData() const; + ui64 GetMaxCyclesPerEvent() const; + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // pinger + + TInstant LastPingTimestamp; + static constexpr TDuration PingPeriodicity = TDuration::Seconds(1); + void IssuePingRequest(); + void Handle(TEvProcessPingRequest::TPtr ev); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + TInstant LastInputActivityTimestamp; + TInstant LastPayloadActivityTimestamp; + TWatchdogTimer<TEvCheckCloseOnIdle> CloseOnIdleWatchdog; + TWatchdogTimer<TEvCheckLostConnection> LostConnectionWatchdog; + + void OnCloseOnIdleTimerHit() { + LOG_INFO_IC("ICS27", "CloseOnIdle timer hit, session terminated"); + Terminate(TDisconnectReason::CloseOnIdle()); + } + + void OnLostConnectionTimerHit() { + LOG_ERROR_IC("ICS28", "LostConnection timer hit, session terminated"); + Terminate(TDisconnectReason::LostConnection()); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + const TSessionParams Params; + TMaybe<TEventHolderPool> Pool; + TMaybe<TChannelScheduler> ChannelScheduler; + ui64 TotalOutputQueueSize; + bool OutputStuckFlag; + TRecentWnd<std::pair<ui64, ui64>> OutputQueueUtilization; + size_t NumEventsInReadyChannels = 0; + + void SetOutputStuckFlag(bool state); + void SwitchStuckPeriod(); + + using TSendQueue = TList<TTcpPacketOutTask>; + TSendQueue SendQueue; + TSendQueue SendQueueCache; + TSendQueue::iterator SendQueuePos; + ui64 WriteBlockedCycles = 0; // start of current block period + TDuration WriteBlockedTotal; // total incremental duration that session has been blocked + ui64 BytesUnwritten = 0; + + void TrimSendQueueCache(); + + TDuration GetWriteBlockedTotal() const { + if (ReceiveContext->WriteBlockedByFullSendBuffer) { + double blockedUs = NHPTimer::GetSeconds(GetCycleCountFast() - WriteBlockedCycles) * 1000000.0; + return WriteBlockedTotal + TDuration::MicroSeconds(blockedUs); // append current blocking period if any + } else { + return WriteBlockedTotal; + } + } + + ui64 OutputCounter; + ui64 LastSentSerial = 0; + + TInstant LastHandshakeDone; + + TIntrusivePtr<NInterconnect::TStreamSocket> Socket; + TPollerToken::TPtr PollerToken; + ui32 SendBufferSize; + ui64 InflightDataAmount = 0; + + std::unordered_map<TActorId, ui64, TActorId::THash> Subscribers; + + // time at which we want to send confirmation packet even if there was no outgoing data + ui64 UnconfirmedBytes = 0; + TInstant ForcePacketTimestamp = TInstant::Max(); + TPriorityQueue<TInstant, TVector<TInstant>, std::greater<TInstant>> FlushSchedule; + size_t MaxFlushSchedule = 0; + ui64 FlushEventsScheduled = 0; + ui64 FlushEventsProcessed = 0; + + void SetForcePacketTimestamp(TDuration period); + void ScheduleFlush(); + void HandleFlush(); + void ResetFlushLogic(); + + void GenerateHttpInfo(TStringStream& str); + + TIntrusivePtr<TReceiveContext> ReceiveContext; + TActorId ReceiverId; + TDuration Ping; + + ui64 ConfirmPacketsForcedBySize = 0; + ui64 ConfirmPacketsForcedByTimeout = 0; + + ui64 LastConfirmed = 0; + + TEvHandshakeDone::TPtr PendingHandshakeDoneEvent; + bool StartHandshakeOnSessionClose = false; + + ui64 EqualizeCounter = 0; + }; + + class TInterconnectSessionKiller + : public TActorBootstrapped<TInterconnectSessionKiller> { + ui32 RepliesReceived = 0; + ui32 RepliesNumber = 0; + TActorId LargestSession = TActorId(); + ui64 MaxBufferSize = 0; + TInterconnectProxyCommon::TPtr Common; + + public: + static constexpr EActivityType ActorActivityType() { + return INTERCONNECT_SESSION_KILLER; + } + + TInterconnectSessionKiller(TInterconnectProxyCommon::TPtr common) + : Common(common) + { + } + + void Bootstrap() { + auto sender = SelfId(); + const auto eventFabric = [&sender](const TActorId& recp) -> IEventHandle* { + auto ev = new TEvSessionBufferSizeRequest(); + return new IEventHandle(recp, sender, ev, IEventHandle::FlagTrackDelivery); + }; + RepliesNumber = TlsActivationContext->ExecutorThread.ActorSystem->BroadcastToProxies(eventFabric); + Become(&TInterconnectSessionKiller::StateFunc); + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvSessionBufferSizeResponse, ProcessResponse) + cFunc(TEvents::TEvUndelivered::EventType, ProcessUndelivered) + ) + + void ProcessResponse(TEvSessionBufferSizeResponse::TPtr& ev) { + RepliesReceived++; + if (MaxBufferSize < ev->Get()->BufferSize) { + MaxBufferSize = ev->Get()->BufferSize; + LargestSession = ev->Get()->SessionID; + } + if (RepliesReceived == RepliesNumber) { + Send(LargestSession, new TEvents::TEvPoisonPill); + AtomicUnlock(&Common->StartedSessionKiller); + PassAway(); + } + } + + void ProcessUndelivered() { + RepliesReceived++; + } + }; + + void CreateSessionKillingActor(TInterconnectProxyCommon::TPtr common); + +} diff --git a/library/cpp/actors/interconnect/load.cpp b/library/cpp/actors/interconnect/load.cpp new file mode 100644 index 0000000000..2a8443da71 --- /dev/null +++ b/library/cpp/actors/interconnect/load.cpp @@ -0,0 +1,405 @@ +#include "load.h" +#include "interconnect_common.h" +#include "events_local.h" +#include <library/cpp/actors/protos/services_common.pb.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/hfunc.h> +#include <util/generic/queue.h> + +namespace NInterconnect { + using namespace NActors; + + enum { + EvGenerateMessages = EventSpaceBegin(TEvents::ES_PRIVATE), + EvPublishResults, + EvQueryTrafficCounter, + EvTrafficCounter, + }; + + struct TEvQueryTrafficCounter : TEventLocal<TEvQueryTrafficCounter, EvQueryTrafficCounter> {}; + + struct TEvTrafficCounter : TEventLocal<TEvTrafficCounter, EvTrafficCounter> { + std::shared_ptr<std::atomic_uint64_t> Traffic; + + TEvTrafficCounter(std::shared_ptr<std::atomic_uint64_t> traffic) + : Traffic(std::move(traffic)) + {} + }; + + class TLoadResponderActor : public TActor<TLoadResponderActor> { + STRICT_STFUNC(StateFunc, + HFunc(TEvLoadMessage, Handle); + CFunc(TEvents::TSystem::PoisonPill, Die); + ) + + void Handle(TEvLoadMessage::TPtr& ev, const TActorContext& ctx) { + ui64 bytes = ev->Get()->CalculateSerializedSizeCached(); + auto& record = ev->Get()->Record; + auto *hops = record.MutableHops(); + while (!hops->empty() && !hops->begin()->HasNextHop()) { + record.ClearPayload(); + ev->Get()->StripPayload(); + hops->erase(hops->begin()); + } + if (!hops->empty()) { + // extract actor id of the next hop + const TActorId nextHopActorId = ActorIdFromProto(hops->begin()->GetNextHop()); + hops->erase(hops->begin()); + + // forward message to next hop; preserve flags and cookie + auto msg = MakeHolder<TEvLoadMessage>(); + record.Swap(&msg->Record); + bytes += msg->CalculateSerializedSizeCached(); + ctx.Send(nextHopActorId, msg.Release(), ev->Flags, ev->Cookie); + } + *Traffic += bytes; + } + + public: + TLoadResponderActor(std::shared_ptr<std::atomic_uint64_t> traffic) + : TActor(&TLoadResponderActor::StateFunc) + , Traffic(std::move(traffic)) + {} + + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::INTERCONNECT_LOAD_RESPONDER; + } + + private: + std::shared_ptr<std::atomic_uint64_t> Traffic; + }; + + class TLoadResponderMasterActor : public TActorBootstrapped<TLoadResponderMasterActor> { + TVector<TActorId> Slaves; + ui32 SlaveIndex = 0; + + STRICT_STFUNC(StateFunc, + HFunc(TEvLoadMessage, Handle); + HFunc(TEvQueryTrafficCounter, Handle); + CFunc(TEvents::TSystem::PoisonPill, Die); + ) + + void Handle(TEvLoadMessage::TPtr& ev, const TActorContext& ctx) { + ctx.ExecutorThread.ActorSystem->Send(ev->Forward(Slaves[SlaveIndex])); + if (++SlaveIndex == Slaves.size()) { + SlaveIndex = 0; + } + } + + void Handle(TEvQueryTrafficCounter::TPtr ev, const TActorContext& ctx) { + ctx.Send(ev->Sender, new TEvTrafficCounter(Traffic)); + } + + void Die(const TActorContext& ctx) override { + for (const TActorId& actorId : Slaves) { + ctx.Send(actorId, new TEvents::TEvPoisonPill); + } + TActorBootstrapped::Die(ctx); + } + + public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::INTERCONNECT_LOAD_RESPONDER; + } + + TLoadResponderMasterActor() + {} + + void Bootstrap(const TActorContext& ctx) { + Become(&TLoadResponderMasterActor::StateFunc); + while (Slaves.size() < 10) { + Slaves.push_back(ctx.Register(new TLoadResponderActor(Traffic))); + } + } + + private: + std::shared_ptr<std::atomic_uint64_t> Traffic = std::make_shared<std::atomic_uint64_t>(); + }; + + IActor* CreateLoadResponderActor() { + return new TLoadResponderMasterActor(); + } + + TActorId MakeLoadResponderActorId(ui32 nodeId) { + char x[12] = {'I', 'C', 'L', 'o', 'a', 'd', 'R', 'e', 's', 'p', 'A', 'c'}; + return TActorId(nodeId, TStringBuf(x, 12)); + } + + class TLoadActor: public TActorBootstrapped<TLoadActor> { + struct TEvGenerateMessages : TEventLocal<TEvGenerateMessages, EvGenerateMessages> {}; + struct TEvPublishResults : TEventLocal<TEvPublishResults, EvPublishResults> {}; + + struct TMessageInfo { + TInstant SendTimestamp; + + TMessageInfo(const TInstant& sendTimestamp) + : SendTimestamp(sendTimestamp) + { + } + }; + + const TLoadParams Params; + TInstant NextMessageTimestamp; + THashMap<TString, TMessageInfo> InFly; + ui64 NextId = 1; + TVector<TActorId> Hops; + TActorId FirstHop; + ui64 NumDropped = 0; + std::shared_ptr<std::atomic_uint64_t> Traffic; + + public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::INTERCONNECT_LOAD_ACTOR; + } + + TLoadActor(const TLoadParams& params) + : Params(params) + {} + + void Bootstrap(const TActorContext& ctx) { + Become(&TLoadActor::QueryTrafficCounter); + ctx.Send(MakeLoadResponderActorId(SelfId().NodeId()), new TEvQueryTrafficCounter); + } + + void Handle(TEvTrafficCounter::TPtr ev, const TActorContext& ctx) { + Traffic = std::move(ev->Get()->Traffic); + + for (const ui32 nodeId : Params.NodeHops) { + const TActorId& actorId = nodeId ? MakeLoadResponderActorId(nodeId) : TActorId(); + if (!FirstHop) { + FirstHop = actorId; + } else { + Hops.push_back(actorId); + } + } + + Hops.push_back(ctx.SelfID); + + Become(&TLoadActor::StateFunc); + NextMessageTimestamp = ctx.Now(); + ResetThroughput(NextMessageTimestamp, *Traffic); + GenerateMessages(ctx); + ctx.Schedule(Params.Duration, new TEvents::TEvPoisonPill); + SchedulePublishResults(ctx); + } + + void GenerateMessages(const TActorContext& ctx) { + while (InFly.size() < Params.InFlyMax && ctx.Now() >= NextMessageTimestamp) { + // generate payload + const ui32 size = Params.SizeMin + RandomNumber(Params.SizeMax - Params.SizeMin + 1); + + // generate message id + const ui64 cookie = NextId++; + TString id = Sprintf("%" PRIu64, cookie); + + // create message and send it to the first hop + THolder<TEvLoadMessage> ev; + if (Params.UseProtobufWithPayload && size) { + auto buffer = TRopeAlignedBuffer::Allocate(size); + memset(buffer->GetBuffer(), '*', size); + ev.Reset(new TEvLoadMessage(Hops, id, TRope(buffer))); + } else { + TString payload; + if (size) { + payload = TString::Uninitialized(size); + memset(payload.Detach(), '*', size); + } + ev.Reset(new TEvLoadMessage(Hops, id, payload ? &payload : nullptr)); + } + UpdateThroughput(ev->CalculateSerializedSizeCached()); + ctx.Send(FirstHop, ev.Release(), IEventHandle::MakeFlags(Params.Channel, 0), cookie); + + // register in the map + InFly.emplace(id, TMessageInfo(ctx.Now())); + + // put item into timeout queue + PutTimeoutQueueItem(ctx, id); + + const TDuration duration = TDuration::MicroSeconds(Params.IntervalMin.GetValue() + + RandomNumber(Params.IntervalMax.GetValue() - Params.IntervalMin.GetValue() + 1)); + if (Params.SoftLoad) { + NextMessageTimestamp += duration; + } else { + NextMessageTimestamp = ctx.Now() + duration; + } + } + + // schedule next generate messages call + if (NextMessageTimestamp > ctx.Now() && InFly.size() < Params.InFlyMax) { + ctx.Schedule(NextMessageTimestamp - ctx.Now(), new TEvGenerateMessages); + } + } + + void Handle(TEvLoadMessage::TPtr& ev, const TActorContext& ctx) { + const auto& record = ev->Get()->Record; + auto it = InFly.find(record.GetId()); + if (it != InFly.end()) { + // record message rtt + const TDuration rtt = ctx.Now() - it->second.SendTimestamp; + UpdateHistogram(ctx.Now(), rtt); + + // update throughput + UpdateThroughput(ev->Get()->CalculateSerializedSizeCached()); + + // remove message from the in fly map + InFly.erase(it); + } else { + ++NumDropped; + } + GenerateMessages(ctx); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // RTT HISTOGRAM + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + const TDuration AggregationPeriod = TDuration::Seconds(20); + TDeque<std::pair<TInstant, TDuration>> Histogram; + + void UpdateHistogram(TInstant when, TDuration rtt) { + Histogram.emplace_back(when, rtt); + + const TInstant barrier = when - AggregationPeriod; + while (Histogram && Histogram.front().first < barrier) { + Histogram.pop_front(); + } + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // THROUGHPUT + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + TInstant ThroughputFirstSample = TInstant::Zero(); + ui64 ThroughputSamples = 0; + ui64 ThroughputBytes = 0; + ui64 TrafficAtBegin = 0; + + void UpdateThroughput(ui64 bytes) { + ThroughputBytes += bytes; + ++ThroughputSamples; + } + + void ResetThroughput(TInstant when, ui64 traffic) { + ThroughputFirstSample = when; + ThroughputSamples = 0; + ThroughputBytes = 0; + TrafficAtBegin = traffic; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // TIMEOUT QUEUE OPERATIONS + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + TQueue<std::pair<TInstant, TString>> TimeoutQueue; + + void PutTimeoutQueueItem(const TActorContext& ctx, TString id) { + TimeoutQueue.emplace(ctx.Now() + TDuration::Minutes(1), std::move(id)); + if (TimeoutQueue.size() == 1) { + ScheduleWakeup(ctx); + } + } + + void ScheduleWakeup(const TActorContext& ctx) { + ctx.Schedule(TimeoutQueue.front().first - ctx.Now(), new TEvents::TEvWakeup); + } + + void HandleWakeup(const TActorContext& ctx) { + ui32 numDropped = 0; + + while (TimeoutQueue && TimeoutQueue.front().first <= ctx.Now()) { + numDropped += InFly.erase(TimeoutQueue.front().second); + TimeoutQueue.pop(); + } + if (TimeoutQueue) { + // we still have some elements in timeout queue, so schedule next wake up to tidy up + ScheduleWakeup(ctx); + } + + GenerateMessages(ctx); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // RESULT PUBLISHING + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + const TDuration ResultPublishPeriod = TDuration::Seconds(15); + + void SchedulePublishResults(const TActorContext& ctx) { + ctx.Schedule(ResultPublishPeriod, new TEvPublishResults); + } + + void PublishResults(const TActorContext& ctx, bool schedule = true) { + const TInstant now = ctx.Now(); + + TStringStream msg; + + msg << "Load# '" << Params.Name << "'"; + + msg << " Throughput# "; + const TDuration duration = now - ThroughputFirstSample; + const ui64 traffic = *Traffic; + msg << "{window# " << duration + << " bytes# " << ThroughputBytes + << " samples# " << ThroughputSamples + << " b/s# " << ui64(ThroughputBytes * 1000000 / duration.MicroSeconds()) + << " common# " << ui64((traffic - TrafficAtBegin) * 1000000 / duration.MicroSeconds()) + << "}"; + ResetThroughput(now, traffic); + + msg << " RTT# "; + if (Histogram) { + const TDuration duration = Histogram.back().first - Histogram.front().first; + msg << "{window# " << duration << " samples# " << Histogram.size(); + TVector<TDuration> v; + v.reserve(Histogram.size()); + for (const auto& item : Histogram) { + v.push_back(item.second); + } + std::sort(v.begin(), v.end()); + for (double q : {0.5, 0.9, 0.99, 0.999, 0.9999, 1.0}) { + const size_t pos = q * (v.size() - 1); + msg << Sprintf(" %.4f# %s", q, v[pos].ToString().data()); + } + msg << "}"; + } else { + msg << "<empty>"; + } + + msg << " NumDropped# " << NumDropped; + + if (!schedule) { + msg << " final"; + } + + LOG_NOTICE(ctx, NActorsServices::INTERCONNECT_SPEED_TEST, "%s", msg.Str().data()); + + if (schedule) { + SchedulePublishResults(ctx); + } + } + + STRICT_STFUNC(QueryTrafficCounter, + HFunc(TEvTrafficCounter, Handle); + ) + + STRICT_STFUNC(StateFunc, + CFunc(TEvents::TSystem::PoisonPill, Die); + CFunc(TEvents::TSystem::Wakeup, HandleWakeup); + CFunc(EvPublishResults, PublishResults); + CFunc(EvGenerateMessages, GenerateMessages); + HFunc(TEvLoadMessage, Handle); + ) + + void Die(const TActorContext& ctx) override { + PublishResults(ctx, false); + TActorBootstrapped::Die(ctx); + } + }; + + IActor* CreateLoadActor(const TLoadParams& params) { + return new TLoadActor(params); + } + +} diff --git a/library/cpp/actors/interconnect/load.h b/library/cpp/actors/interconnect/load.h new file mode 100644 index 0000000000..0a01a0dc04 --- /dev/null +++ b/library/cpp/actors/interconnect/load.h @@ -0,0 +1,24 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> + +namespace NInterconnect { + // load responder -- lives on every node as a service actor + NActors::IActor* CreateLoadResponderActor(); + NActors::TActorId MakeLoadResponderActorId(ui32 node); + + // load actor -- generates load with specific parameters + struct TLoadParams { + TString Name; + ui32 Channel; + TVector<ui32> NodeHops; // node ids for the message route + ui32 SizeMin, SizeMax; // min and max size for payloads + ui32 InFlyMax; // maximum number of in fly messages + TDuration IntervalMin, IntervalMax; // min and max intervals between sending messages + bool SoftLoad; // is the load soft? + TDuration Duration; // test duration + bool UseProtobufWithPayload; // store payload separately + }; + NActors::IActor* CreateLoadActor(const TLoadParams& params); + +} diff --git a/library/cpp/actors/interconnect/logging.h b/library/cpp/actors/interconnect/logging.h new file mode 100644 index 0000000000..c429d1cade --- /dev/null +++ b/library/cpp/actors/interconnect/logging.h @@ -0,0 +1,68 @@ +#pragma once + +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/protos/services_common.pb.h> + +#define LOG_LOG_IC_X(component, marker, priority, ...) \ + do { \ + LOG_LOG(this->GetActorContext(), (priority), (component), "%s " marker " %s", LogPrefix.data(), Sprintf(__VA_ARGS__).data()); \ + } while (false) + +#define LOG_LOG_NET_X(priority, NODE_ID, FMT, ...) \ + do { \ + const TActorContext& ctx = this->GetActorContext(); \ + LOG_LOG(ctx, (priority), ::NActorsServices::INTERCONNECT_NETWORK, "[%" PRIu32 " <-> %" PRIu32 "] %s", \ + ctx.SelfID.NodeId(), (NODE_ID), Sprintf(FMT, __VA_ARGS__).data()); \ + } while (false) + +#define LOG_LOG_IC(component, marker, priority, ...) \ + do { \ + LOG_LOG(::NActors::TActivationContext::AsActorContext(), (priority), (component), "%s " marker " %s", LogPrefix.data(), Sprintf(__VA_ARGS__).data()); \ + } while (false) + +#define LOG_LOG_NET(priority, NODE_ID, FMT, ...) \ + do { \ + const TActorContext& ctx = ::NActors::TActivationContext::AsActorContext(); \ + LOG_LOG(ctx, (priority), ::NActorsServices::INTERCONNECT_NETWORK, "[%" PRIu32 " <-> %" PRIu32 "] %s", \ + ctx.SelfID.NodeId(), (NODE_ID), Sprintf(FMT, __VA_ARGS__).data()); \ + } while (false) + +#define LOG_EMER_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_EMER, __VA_ARGS__) +#define LOG_ALERT_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_ALERT, __VA_ARGS__) +#define LOG_CRIT_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_CRIT, __VA_ARGS__) +#define LOG_ERROR_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_ERROR, __VA_ARGS__) +#define LOG_WARN_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_WARN, __VA_ARGS__) +#define LOG_NOTICE_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_NOTICE, __VA_ARGS__) +#define LOG_INFO_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_INFO, __VA_ARGS__) +#define LOG_DEBUG_IC(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT, marker, ::NActors::NLog::PRI_DEBUG, __VA_ARGS__) + +#define LOG_EMER_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_EMER, __VA_ARGS__) +#define LOG_ALERT_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_ALERT, __VA_ARGS__) +#define LOG_CRIT_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_CRIT, __VA_ARGS__) +#define LOG_ERROR_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_ERROR, __VA_ARGS__) +#define LOG_WARN_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_WARN, __VA_ARGS__) +#define LOG_NOTICE_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_NOTICE, __VA_ARGS__) +#define LOG_INFO_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_INFO, __VA_ARGS__) +#define LOG_DEBUG_IC_SESSION(marker, ...) LOG_LOG_IC(::NActorsServices::INTERCONNECT_SESSION, marker, ::NActors::NLog::PRI_DEBUG, __VA_ARGS__) + +#define LOG_NOTICE_NET(NODE_ID, FMT, ...) LOG_LOG_NET(::NActors::NLog::PRI_NOTICE, NODE_ID, FMT, __VA_ARGS__) +#define LOG_DEBUG_NET(NODE_ID, FMT, ...) LOG_LOG_NET(::NActors::NLog::PRI_DEBUG, NODE_ID, FMT, __VA_ARGS__) + +namespace NActors { + class TInterconnectLoggingBase { + protected: + const TString LogPrefix; + + public: + TInterconnectLoggingBase() = default; + + TInterconnectLoggingBase(const TString& prefix) + : LogPrefix(prefix) + { + } + + void SetPrefix(TString logPrefix) const { + logPrefix.swap(const_cast<TString&>(LogPrefix)); + } + }; +} diff --git a/library/cpp/actors/interconnect/mock/ic_mock.cpp b/library/cpp/actors/interconnect/mock/ic_mock.cpp new file mode 100644 index 0000000000..884503e602 --- /dev/null +++ b/library/cpp/actors/interconnect/mock/ic_mock.cpp @@ -0,0 +1,298 @@ +#include "ic_mock.h" +#include <library/cpp/actors/core/interconnect.h> +#include <util/system/yield.h> +#include <thread> + +namespace NActors { + + class TInterconnectMock::TImpl { + enum { + EvInject = EventSpaceBegin(TEvents::ES_PRIVATE), + EvCheckSession, + EvRam, + }; + + struct TEvInject : TEventLocal<TEvInject, EvInject> { + std::deque<std::unique_ptr<IEventHandle>> Messages; + const TScopeId OriginScopeId; + const ui64 SenderSessionId; + + TEvInject(std::deque<std::unique_ptr<IEventHandle>>&& messages, const TScopeId& originScopeId, ui64 senderSessionId) + : Messages(std::move(messages)) + , OriginScopeId(originScopeId) + , SenderSessionId(senderSessionId) + {} + }; + + class TProxyMockActor; + + class TConnectionState { + struct TPeerInfo { + TRWMutex Mutex; + TActorSystem *ActorSystem = nullptr; + TActorId ProxyId; + }; + + const ui64 Key; + TPeerInfo PeerInfo[2]; + std::atomic_uint64_t SessionId = 0; + + public: + TConnectionState(ui64 key) + : Key(key) + {} + + void Attach(ui32 nodeId, TActorSystem *as, const TActorId& actorId) { + TPeerInfo *peer = GetPeer(nodeId); + auto guard = TWriteGuard(peer->Mutex); + Y_VERIFY(!peer->ActorSystem); + peer->ActorSystem = as; + peer->ProxyId = actorId; + as->DeferPreStop([peer] { + auto guard = TWriteGuard(peer->Mutex); + peer->ActorSystem = nullptr; + }); + } + + void Inject(ui32 peerNodeId, std::deque<std::unique_ptr<IEventHandle>>&& messages, + const TScopeId& originScopeId, ui64 senderSessionId) { + TPeerInfo *peer = GetPeer(peerNodeId); + auto guard = TReadGuard(peer->Mutex); + if (peer->ActorSystem) { + peer->ActorSystem->Send(new IEventHandle(peer->ProxyId, TActorId(), new TEvInject(std::move(messages), + originScopeId, senderSessionId))); + } else { + for (auto&& ev : messages) { + TActivationContext::Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::Disconnected)); + } + } + } + + ui64 GetValidSessionId() const { + return SessionId; + } + + void InvalidateSessionId(ui32 peerNodeId) { + ++SessionId; + TPeerInfo *peer = GetPeer(peerNodeId); + auto guard = TReadGuard(peer->Mutex); + if (peer->ActorSystem) { + peer->ActorSystem->Send(new IEventHandle(EvCheckSession, 0, peer->ProxyId, {}, nullptr, 0)); + } + } + + private: + TPeerInfo *GetPeer(ui32 nodeId) { + if (nodeId == ui32(Key)) { + return PeerInfo; + } else if (nodeId == ui32(Key >> 32)) { + return PeerInfo + 1; + } else { + Y_FAIL(); + } + } + }; + + class TProxyMockActor : public TActor<TProxyMockActor> { + class TSessionMockActor : public TActor<TSessionMockActor> { + std::map<TActorId, ui64> Subscribers; + TProxyMockActor* const Proxy; + std::deque<std::unique_ptr<IEventHandle>> Queue; + + public: + const ui64 SessionId; + + public: + TSessionMockActor(TProxyMockActor *proxy, ui64 sessionId) + : TActor(&TThis::StateFunc) + , Proxy(proxy) + , SessionId(sessionId) + {} + + void Terminate() { + for (auto&& ev : std::exchange(Queue, {})) { + TActivationContext::Send(ev->ForwardOnNondelivery(TEvents::TEvUndelivered::Disconnected)); + } + for (const auto& kv : Subscribers) { + Send(kv.first, new TEvInterconnect::TEvNodeDisconnected(Proxy->PeerNodeId), 0, kv.second); + } + Y_VERIFY(Proxy->Session == this); + Proxy->Session = nullptr; + PassAway(); + } + + void HandleForward(TAutoPtr<IEventHandle> ev) { + if (ev->Flags & IEventHandle::FlagSubscribeOnSession) { + Subscribe(ev->Sender, ev->Cookie); + } + if (Queue.empty()) { + TActivationContext::Send(new IEventHandle(EvRam, 0, SelfId(), {}, {}, 0)); + } + Queue.emplace_back(ev.Release()); + } + + void HandleRam() { + if (SessionId != Proxy->State.GetValidSessionId()) { + Terminate(); + } else { + Proxy->PeerInject(std::exchange(Queue, {})); + } + } + + void Handle(TEvInterconnect::TEvConnectNode::TPtr ev) { + Subscribe(ev->Sender, ev->Cookie); + } + + void Handle(TEvents::TEvSubscribe::TPtr ev) { + Subscribe(ev->Sender, ev->Cookie); + } + + void Handle(TEvents::TEvUnsubscribe::TPtr ev) { + Subscribers.erase(ev->Sender); + } + + void HandlePoison() { + Proxy->Disconnect(); + } + + STRICT_STFUNC(StateFunc, + fFunc(TEvInterconnect::EvForward, HandleForward) + hFunc(TEvInterconnect::TEvConnectNode, Handle) + hFunc(TEvents::TEvSubscribe, Handle) + hFunc(TEvents::TEvUnsubscribe, Handle) + cFunc(TEvents::TSystem::Poison, HandlePoison) + cFunc(EvRam, HandleRam) + ) + + private: + void Subscribe(const TActorId& actorId, ui64 cookie) { + Subscribers[actorId] = cookie; + Send(actorId, new TEvInterconnect::TEvNodeConnected(Proxy->PeerNodeId), 0, cookie); + } + }; + + friend class TSessionMockActor; + + const ui32 NodeId; + const ui32 PeerNodeId; + TConnectionState& State; + const TInterconnectProxyCommon::TPtr Common; + TSessionMockActor *Session = nullptr; + + public: + TProxyMockActor(ui32 nodeId, ui32 peerNodeId, TConnectionState& state, TInterconnectProxyCommon::TPtr common) + : TActor(&TThis::StateFunc) + , NodeId(nodeId) + , PeerNodeId(peerNodeId) + , State(state) + , Common(std::move(common)) + {} + + void Registered(TActorSystem *as, const TActorId& parent) override { + TActor::Registered(as, parent); + State.Attach(NodeId, as, SelfId()); + } + + void Handle(TEvInject::TPtr ev) { + auto *msg = ev->Get(); + if (Session && Session->SessionId != msg->SenderSessionId) { + return; // drop messages from other sessions + } + if (auto *session = GetSession()) { + for (auto&& ev : ev->Get()->Messages) { + auto fw = std::make_unique<IEventHandle>( + session->SelfId(), + ev->Type, + ev->Flags & ~IEventHandle::FlagForwardOnNondelivery, + ev->Recipient, + ev->Sender, + ev->ReleaseChainBuffer(), + ev->Cookie, + msg->OriginScopeId, + std::move(ev->TraceId) + ); + if (!Common->EventFilter || Common->EventFilter->CheckIncomingEvent(*fw, Common->LocalScopeId)) { + TActivationContext::Send(fw.release()); + } + } + } + } + + void PassAway() override { + Disconnect(); + TActor::PassAway(); + } + + TSessionMockActor *GetSession() { + CheckSession(); + if (!Session) { + Session = new TSessionMockActor(this, State.GetValidSessionId()); + RegisterWithSameMailbox(Session); + } + return Session; + } + + void HandleSessionEvent(TAutoPtr<IEventHandle> ev) { + auto *session = GetSession(); + InvokeOtherActor(*session, &TSessionMockActor::Receive, ev, + TActivationContext::ActorContextFor(session->SelfId())); + } + + void Disconnect() { + State.InvalidateSessionId(PeerNodeId); + if (Session) { + Session->Terminate(); + } + } + + void CheckSession() { + if (Session && Session->SessionId != State.GetValidSessionId()) { + Session->Terminate(); + } + } + + void PeerInject(std::deque<std::unique_ptr<IEventHandle>>&& messages) { + Y_VERIFY(Session); + return State.Inject(PeerNodeId, std::move(messages), Common->LocalScopeId, Session->SessionId); + } + + STRICT_STFUNC(StateFunc, + cFunc(TEvents::TSystem::Poison, PassAway) + fFunc(TEvInterconnect::EvForward, HandleSessionEvent) + fFunc(TEvInterconnect::EvConnectNode, HandleSessionEvent) + fFunc(TEvents::TSystem::Subscribe, HandleSessionEvent) + fFunc(TEvents::TSystem::Unsubscribe, HandleSessionEvent) + cFunc(TEvInterconnect::EvDisconnect, Disconnect) + IgnoreFunc(TEvInterconnect::TEvClosePeerSocket) + IgnoreFunc(TEvInterconnect::TEvCloseInputSession) + cFunc(TEvInterconnect::EvPoisonSession, Disconnect) + hFunc(TEvInject, Handle) + cFunc(EvCheckSession, CheckSession) + ) + }; + + std::unordered_map<ui64, TConnectionState> States; + + public: + IActor *CreateProxyMock(ui32 nodeId, ui32 peerNodeId, TInterconnectProxyCommon::TPtr common) { + Y_VERIFY(nodeId != peerNodeId); + Y_VERIFY(nodeId); + Y_VERIFY(peerNodeId); + const ui64 key = std::min(nodeId, peerNodeId) | ui64(std::max(nodeId, peerNodeId)) << 32; + auto it = States.try_emplace(key, key).first; + return new TProxyMockActor(nodeId, peerNodeId, it->second, std::move(common)); + } + }; + + TInterconnectMock::TInterconnectMock() + : Impl(std::make_unique<TImpl>()) + {} + + TInterconnectMock::~TInterconnectMock() + {} + + IActor *TInterconnectMock::CreateProxyMock(ui32 nodeId, ui32 peerNodeId, TInterconnectProxyCommon::TPtr common) { + return Impl->CreateProxyMock(nodeId, peerNodeId, std::move(common)); + } + +} // NActors diff --git a/library/cpp/actors/interconnect/mock/ic_mock.h b/library/cpp/actors/interconnect/mock/ic_mock.h new file mode 100644 index 0000000000..636bdc2b7f --- /dev/null +++ b/library/cpp/actors/interconnect/mock/ic_mock.h @@ -0,0 +1,19 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> + +#include <library/cpp/actors/interconnect/interconnect_common.h> + +namespace NActors { + + class TInterconnectMock { + class TImpl; + std::unique_ptr<TImpl> Impl; + + public: + TInterconnectMock(); + ~TInterconnectMock(); + IActor *CreateProxyMock(ui32 nodeId, ui32 peerNodeId, TInterconnectProxyCommon::TPtr common); + }; + +} // NActors diff --git a/library/cpp/actors/interconnect/mock/tsan.supp b/library/cpp/actors/interconnect/mock/tsan.supp new file mode 100644 index 0000000000..19fd059419 --- /dev/null +++ b/library/cpp/actors/interconnect/mock/tsan.supp @@ -0,0 +1 @@ +deadlock:Attach diff --git a/library/cpp/actors/interconnect/mock/ya.make b/library/cpp/actors/interconnect/mock/ya.make new file mode 100644 index 0000000000..19a2834162 --- /dev/null +++ b/library/cpp/actors/interconnect/mock/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +OWNER(alexvru) + +SRCS( + ic_mock.cpp + ic_mock.h +) + +SUPPRESSIONS(tsan.supp) + +PEERDIR( + library/cpp/actors/interconnect +) + +END() diff --git a/library/cpp/actors/interconnect/packet.cpp b/library/cpp/actors/interconnect/packet.cpp new file mode 100644 index 0000000000..e2c289ed59 --- /dev/null +++ b/library/cpp/actors/interconnect/packet.cpp @@ -0,0 +1,32 @@ +#include "packet.h" + +#include <library/cpp/actors/core/probes.h> + +#include <util/system/datetime.h> + +LWTRACE_USING(ACTORLIB_PROVIDER); + +ui32 TEventHolder::Fill(IEventHandle& ev) { + Serial = 0; + Descr.Type = ev.Type; + Descr.Flags = ev.Flags; + Descr.Recipient = ev.Recipient; + Descr.Sender = ev.Sender; + Descr.Cookie = ev.Cookie; + ev.TraceId.Serialize(&Descr.TraceId); + ForwardRecipient = ev.GetForwardOnNondeliveryRecipient(); + EventActuallySerialized = 0; + Descr.Checksum = 0; + + if (ev.HasBuffer()) { + Buffer = ev.ReleaseChainBuffer(); + EventSerializedSize = Buffer->GetSize(); + } else if (ev.HasEvent()) { + Event.Reset(ev.ReleaseBase()); + EventSerializedSize = Event->CalculateSerializedSize(); + } else { + EventSerializedSize = 0; + } + + return EventSerializedSize; +} diff --git a/library/cpp/actors/interconnect/packet.h b/library/cpp/actors/interconnect/packet.h new file mode 100644 index 0000000000..4ba50a2b5f --- /dev/null +++ b/library/cpp/actors/interconnect/packet.h @@ -0,0 +1,324 @@ +#pragma once + +#include <library/cpp/actors/core/event_pb.h> +#include <library/cpp/actors/core/event_load.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/containers/stack_vector/stack_vec.h> +#include <library/cpp/actors/util/rope.h> +#include <library/cpp/actors/prof/tag.h> +#include <library/cpp/digest/crc32c/crc32c.h> +#include <library/cpp/lwtrace/shuttle.h> +#include <util/generic/string.h> +#include <util/generic/list.h> + +#ifndef FORCE_EVENT_CHECKSUM +#define FORCE_EVENT_CHECKSUM 0 +#endif + +using NActors::IEventBase; +using NActors::IEventHandle; +using NActors::TActorId; +using NActors::TConstIoVec; +using NActors::TEventSerializedData; + +Y_FORCE_INLINE ui32 Crc32cExtendMSanCompatible(ui32 checksum, const void *data, size_t len) { + if constexpr (NSan::MSanIsOn()) { + const char *begin = static_cast<const char*>(data); + const char *end = begin + len; + begin -= reinterpret_cast<uintptr_t>(begin) & 15; + end += -reinterpret_cast<uintptr_t>(end) & 15; + NSan::Unpoison(begin, end - begin); + } + return Crc32cExtend(checksum, data, len); +} + +struct TSessionParams { + bool Encryption = {}; + bool UseModernFrame = {}; + bool AuthOnly = {}; + TString AuthCN; + NActors::TScopeId PeerScopeId; +}; + +struct TTcpPacketHeader_v1 { + ui32 HeaderCRC32; + ui32 PayloadCRC32; + ui64 Confirm; + ui64 Serial; + ui64 DataSize; + + inline bool Check() const { + ui32 actual = Crc32cExtendMSanCompatible(0, &PayloadCRC32, sizeof(TTcpPacketHeader_v1) - sizeof(HeaderCRC32)); + return actual == HeaderCRC32; + } + + inline void Sign() { + HeaderCRC32 = Crc32cExtendMSanCompatible(0, &PayloadCRC32, sizeof(TTcpPacketHeader_v1) - sizeof(HeaderCRC32)); + } + + TString ToString() const { + return Sprintf("{Confirm# %" PRIu64 " Serial# %" PRIu64 " DataSize# %" PRIu64 "}", Confirm, Serial, DataSize); + } +}; + +#pragma pack(push, 1) +struct TTcpPacketHeader_v2 { + ui64 Confirm; + ui64 Serial; + ui32 Checksum; // for the whole frame + ui16 PayloadLength; +}; +#pragma pack(pop) + +union TTcpPacketBuf { + static constexpr ui64 PingRequestMask = 0x8000000000000000ULL; + static constexpr ui64 PingResponseMask = 0x4000000000000000ULL; + static constexpr ui64 ClockMask = 0x2000000000000000ULL; + + static constexpr size_t PacketDataLen = 4096 * 2 - 96 - Max(sizeof(TTcpPacketHeader_v1), sizeof(TTcpPacketHeader_v2)); + struct { + TTcpPacketHeader_v1 Header; + char Data[PacketDataLen]; + } v1; + struct { + TTcpPacketHeader_v2 Header; + char Data[PacketDataLen]; + } v2; +}; + +#pragma pack(push, 1) +struct TEventDescr { + ui32 Type; + ui32 Flags; + TActorId Recipient; + TActorId Sender; + ui64 Cookie; + // wilson trace id is stored as a serialized entity to avoid using complex object with prohibited copy ctor + NWilson::TTraceId::TSerializedTraceId TraceId; + ui32 Checksum; +}; +#pragma pack(pop) + +struct TEventHolder : TNonCopyable { + TEventDescr Descr; + TActorId ForwardRecipient; + THolder<IEventBase> Event; + TIntrusivePtr<TEventSerializedData> Buffer; + ui64 Serial; + ui32 EventSerializedSize; + ui32 EventActuallySerialized; + mutable NLWTrace::TOrbit Orbit; + + ui32 Fill(IEventHandle& ev); + + void InitChecksum() { + Descr.Checksum = 0; + } + + void UpdateChecksum(const TSessionParams& params, const void *buffer, size_t len) { + if (FORCE_EVENT_CHECKSUM || !params.UseModernFrame) { + Descr.Checksum = Crc32cExtendMSanCompatible(Descr.Checksum, buffer, len); + } + } + + void ForwardOnNondelivery(bool unsure) { + TEventDescr& d = Descr; + const TActorId& r = d.Recipient; + const TActorId& s = d.Sender; + const TActorId *f = ForwardRecipient ? &ForwardRecipient : nullptr; + auto ev = Event + ? std::make_unique<IEventHandle>(r, s, Event.Release(), d.Flags, d.Cookie, f, NWilson::TTraceId(d.TraceId)) + : std::make_unique<IEventHandle>(d.Type, d.Flags, r, s, std::move(Buffer), d.Cookie, f, NWilson::TTraceId(d.TraceId)); + NActors::TActivationContext::Send(ev->ForwardOnNondelivery(NActors::TEvents::TEvUndelivered::Disconnected, unsure)); + } + + void Clear() { + Event.Reset(); + Buffer.Reset(); + Orbit.Reset(); + } +}; + +namespace NActors { + class TEventOutputChannel; +} + +struct TTcpPacketOutTask : TNonCopyable { + const TSessionParams& Params; + TTcpPacketBuf Packet; + size_t DataSize; + TStackVec<TConstIoVec, 32> Bufs; + size_t BufferIndex; + size_t FirstBufferOffset; + bool TriedWriting; + char *FreeArea; + char *End; + mutable NLWTrace::TOrbit Orbit; + +public: + TTcpPacketOutTask(const TSessionParams& params) + : Params(params) + { + Reuse(); + } + + template<typename T> + auto ApplyToHeader(T&& callback) { + return Params.UseModernFrame ? callback(Packet.v2.Header) : callback(Packet.v1.Header); + } + + template<typename T> + auto ApplyToHeader(T&& callback) const { + return Params.UseModernFrame ? callback(Packet.v2.Header) : callback(Packet.v1.Header); + } + + bool IsAtBegin() const { + return !BufferIndex && !FirstBufferOffset && !TriedWriting; + } + + void MarkTriedWriting() { + TriedWriting = true; + } + + void Reuse() { + DataSize = 0; + ApplyToHeader([this](auto& header) { Bufs.assign(1, {&header, sizeof(header)}); }); + BufferIndex = 0; + FirstBufferOffset = 0; + TriedWriting = false; + FreeArea = Params.UseModernFrame ? Packet.v2.Data : Packet.v1.Data; + End = FreeArea + TTcpPacketBuf::PacketDataLen; + Orbit.Reset(); + } + + bool IsEmpty() const { + return !DataSize; + } + + void SetMetadata(ui64 serial, ui64 confirm) { + ApplyToHeader([&](auto& header) { + header.Serial = serial; + header.Confirm = confirm; + }); + } + + void UpdateConfirmIfPossible(ui64 confirm) { + // we don't want to recalculate whole packet checksum for single confirmation update on v2 + if (!Params.UseModernFrame && IsAtBegin() && confirm != Packet.v1.Header.Confirm) { + Packet.v1.Header.Confirm = confirm; + Packet.v1.Header.Sign(); + } + } + + size_t GetDataSize() const { return DataSize; } + + ui64 GetSerial() const { + return ApplyToHeader([](auto& header) { return header.Serial; }); + } + + bool Confirmed(ui64 confirm) const { + return ApplyToHeader([&](auto& header) { return IsEmpty() || header.Serial <= confirm; }); + } + + void *GetFreeArea() { + return FreeArea; + } + + size_t GetVirtualFreeAmount() const { + return TTcpPacketBuf::PacketDataLen - DataSize; + } + + void AppendBuf(const void *buf, size_t size) { + DataSize += size; + Y_VERIFY_DEBUG(DataSize <= TTcpPacketBuf::PacketDataLen, "DataSize# %zu AppendBuf buf# %p size# %zu" + " FreeArea# %p End# %p", DataSize, buf, size, FreeArea, End); + + if (Bufs && static_cast<const char*>(Bufs.back().Data) + Bufs.back().Size == buf) { + Bufs.back().Size += size; + } else { + Bufs.push_back({buf, size}); + } + + if (buf >= FreeArea && buf < End) { + Y_VERIFY_DEBUG(buf == FreeArea); + FreeArea = const_cast<char*>(static_cast<const char*>(buf)) + size; + Y_VERIFY_DEBUG(FreeArea <= End); + } + } + + void Undo(size_t size) { + Y_VERIFY(Bufs); + auto& buf = Bufs.back(); + Y_VERIFY(buf.Data == FreeArea - buf.Size); + buf.Size -= size; + if (!buf.Size) { + Bufs.pop_back(); + } + FreeArea -= size; + DataSize -= size; + } + + bool DropBufs(size_t& amount) { + while (BufferIndex != Bufs.size()) { + TConstIoVec& item = Bufs[BufferIndex]; + // calculate number of bytes to the end in current buffer + const size_t remain = item.Size - FirstBufferOffset; + if (amount >= remain) { + // vector item completely fits into the received amount, drop it out and switch to next buffer + amount -= remain; + ++BufferIndex; + FirstBufferOffset = 0; + } else { + // adjust first buffer by "amount" bytes forward and reset amount to zero + FirstBufferOffset += amount; + amount = 0; + // return false meaning that we have some more data to send + return false; + } + } + return true; + } + + void ResetBufs() { + BufferIndex = FirstBufferOffset = 0; + TriedWriting = false; + } + + template <typename TVectorType> + void AppendToIoVector(TVectorType& vector, size_t max) { + for (size_t k = BufferIndex, offset = FirstBufferOffset; k != Bufs.size() && vector.size() < max; ++k, offset = 0) { + TConstIoVec v = Bufs[k]; + v.Data = static_cast<const char*>(v.Data) + offset; + v.Size -= offset; + vector.push_back(v); + } + } + + void Sign() { + if (Params.UseModernFrame) { + Packet.v2.Header.Checksum = 0; + Packet.v2.Header.PayloadLength = DataSize; + if (!Params.Encryption) { + ui32 sum = 0; + for (const auto& item : Bufs) { + sum = Crc32cExtendMSanCompatible(sum, item.Data, item.Size); + } + Packet.v2.Header.Checksum = sum; + } + } else { + Y_VERIFY(!Bufs.empty()); + auto it = Bufs.begin(); + static constexpr size_t headerLen = sizeof(TTcpPacketHeader_v1); + Y_VERIFY(it->Data == &Packet.v1.Header && it->Size >= headerLen); + ui32 sum = Crc32cExtendMSanCompatible(0, Packet.v1.Data, it->Size - headerLen); + while (++it != Bufs.end()) { + sum = Crc32cExtendMSanCompatible(sum, it->Data, it->Size); + } + + Packet.v1.Header.PayloadCRC32 = sum; + Packet.v1.Header.DataSize = DataSize; + Packet.v1.Header.Sign(); + } + } +}; diff --git a/library/cpp/actors/interconnect/poller.h b/library/cpp/actors/interconnect/poller.h new file mode 100644 index 0000000000..ff7979369f --- /dev/null +++ b/library/cpp/actors/interconnect/poller.h @@ -0,0 +1,23 @@ +#pragma once + +#include <functional> +#include <library/cpp/actors/core/events.h> + +namespace NActors { + class TSharedDescriptor: public TThrRefBase { + public: + virtual int GetDescriptor() = 0; + }; + + using TDelegate = std::function<void()>; + using TFDDelegate = std::function<TDelegate(const TIntrusivePtr<TSharedDescriptor>&)>; + + class IPoller: public TThrRefBase { + public: + virtual ~IPoller() = default; + + virtual void StartRead(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) = 0; + virtual void StartWrite(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) = 0; + }; + +} diff --git a/library/cpp/actors/interconnect/poller_actor.cpp b/library/cpp/actors/interconnect/poller_actor.cpp new file mode 100644 index 0000000000..e75cbcaef4 --- /dev/null +++ b/library/cpp/actors/interconnect/poller_actor.cpp @@ -0,0 +1,294 @@ +#include "poller_actor.h" +#include "interconnect_common.h" + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/core/probes.h> +#include <library/cpp/actors/protos/services_common.pb.h> +#include <library/cpp/actors/util/funnel_queue.h> + +#include <util/generic/intrlist.h> +#include <util/system/thread.h> +#include <util/system/event.h> +#include <util/system/pipe.h> + +#include <variant> + +namespace NActors { + + LWTRACE_USING(ACTORLIB_PROVIDER); + + namespace { + int LastSocketError() { +#if defined(_win_) + return WSAGetLastError(); +#else + return errno; +#endif + } + } + + struct TSocketRecord : TThrRefBase { + const TIntrusivePtr<TSharedDescriptor> Socket; + const TActorId ReadActorId; + const TActorId WriteActorId; + std::atomic_uint32_t Flags = 0; + + TSocketRecord(TEvPollerRegister& ev) + : Socket(std::move(ev.Socket)) + , ReadActorId(ev.ReadActorId) + , WriteActorId(ev.WriteActorId) + {} + }; + + template<typename TDerived> + class TPollerThreadBase : public ISimpleThread { + protected: + struct TPollerExitThread {}; // issued then we need to terminate the poller thread + + struct TPollerWakeup {}; + + struct TPollerUnregisterSocket { + TIntrusivePtr<TSharedDescriptor> Socket; + + TPollerUnregisterSocket(TIntrusivePtr<TSharedDescriptor> socket) + : Socket(std::move(socket)) + {} + }; + + using TPollerSyncOperation = std::variant<TPollerExitThread, TPollerWakeup, TPollerUnregisterSocket>; + + struct TPollerSyncOperationWrapper { + TPollerSyncOperation Operation; + TManualEvent Event; + + TPollerSyncOperationWrapper(TPollerSyncOperation&& operation) + : Operation(std::move(operation)) + {} + + void Wait() { + Event.WaitI(); + } + + void SignalDone() { + Event.Signal(); + } + }; + + TActorSystem *ActorSystem; + TPipeHandle ReadEnd, WriteEnd; // pipe for sync event processor + TFunnelQueue<TPollerSyncOperationWrapper*> SyncOperationsQ; // operation queue + + public: + TPollerThreadBase(TActorSystem *actorSystem) + : ActorSystem(actorSystem) + { + // create a pipe for notifications + try { + TPipeHandle::Pipe(ReadEnd, WriteEnd, CloseOnExec); + } catch (const TFileError& err) { + Y_FAIL("failed to create pipe"); + } + + // switch the read/write ends to nonblocking mode + SetNonBlock(ReadEnd); + SetNonBlock(WriteEnd); + } + + void UnregisterSocket(const TIntrusivePtr<TSocketRecord>& record) { + ExecuteSyncOperation(TPollerUnregisterSocket(record->Socket)); + } + + protected: + void Notify(TSocketRecord *record, bool read, bool write) { + auto issue = [&](const TActorId& recipient) { + ActorSystem->Send(new IEventHandle(recipient, {}, new TEvPollerReady(record->Socket, read, write))); + }; + if (read && record->ReadActorId) { + issue(record->ReadActorId); + if (write && record->WriteActorId && record->WriteActorId != record->ReadActorId) { + issue(record->WriteActorId); + } + } else if (write && record->WriteActorId) { + issue(record->WriteActorId); + } + } + + void Stop() { + // signal poller thread to stop and wait for the thread + ExecuteSyncOperation(TPollerExitThread()); + ISimpleThread::Join(); + } + + void ExecuteSyncOperation(TPollerSyncOperation&& op) { + TPollerSyncOperationWrapper wrapper(std::move(op)); + if (SyncOperationsQ.Push(&wrapper)) { + // this was the first entry, so we push notification through the pipe + for (;;) { + char buffer = '\x00'; + ssize_t nwritten = WriteEnd.Write(&buffer, sizeof(buffer)); + if (nwritten < 0) { + const int err = LastSocketError(); + if (err == EINTR) { + continue; + } else { + Y_FAIL("WriteEnd.Write() failed with %s", strerror(err)); + } + } else { + Y_VERIFY(nwritten); + break; + } + } + } + // wait for operation to complete + wrapper.Wait(); + } + + bool DrainReadEnd() { + size_t totalRead = 0; + char buffer[4096]; + for (;;) { + ssize_t n = ReadEnd.Read(buffer, sizeof(buffer)); + if (n < 0) { + const int error = LastSocketError(); + if (error == EINTR) { + continue; + } else if (error == EAGAIN || error == EWOULDBLOCK) { + break; + } else { + Y_FAIL("read() failed with %s", strerror(errno)); + } + } else { + Y_VERIFY(n); + totalRead += n; + } + } + return totalRead; + } + + bool ProcessSyncOpQueue() { + if (DrainReadEnd()) { + Y_VERIFY(!SyncOperationsQ.IsEmpty()); + do { + TPollerSyncOperationWrapper *op = SyncOperationsQ.Top(); + if (auto *unregister = std::get_if<TPollerUnregisterSocket>(&op->Operation)) { + static_cast<TDerived&>(*this).UnregisterSocketInLoop(unregister->Socket); + op->SignalDone(); + } else if (std::get_if<TPollerExitThread>(&op->Operation)) { + op->SignalDone(); + return false; // terminate the thread + } else if (std::get_if<TPollerWakeup>(&op->Operation)) { + op->SignalDone(); + } else { + Y_FAIL(); + } + } while (SyncOperationsQ.Pop()); + } + return true; + } + + void *ThreadProc() override { + SetCurrentThreadName("network poller"); + while (ProcessSyncOpQueue()) { + static_cast<TDerived&>(*this).ProcessEventsInLoop(); + } + return nullptr; + } + }; + +} // namespace NActors + +#if defined(_linux_) +# include "poller_actor_linux.h" +#elif defined(_darwin_) +# include "poller_actor_darwin.h" +#elif defined(_win_) +# include "poller_actor_win.h" +#else +# error "Unsupported platform" +#endif + +namespace NActors { + + class TPollerToken::TImpl { + std::weak_ptr<TPollerThread> Thread; + TIntrusivePtr<TSocketRecord> Record; // valid only when Thread is held locked + + public: + TImpl(std::shared_ptr<TPollerThread> thread, TIntrusivePtr<TSocketRecord> record) + : Thread(thread) + , Record(std::move(record)) + { + thread->RegisterSocket(Record); + } + + ~TImpl() { + if (auto thread = Thread.lock()) { + thread->UnregisterSocket(Record); + } + } + + void Request(bool read, bool write) { + if (auto thread = Thread.lock()) { + thread->Request(Record, read, write); + } + } + + const TIntrusivePtr<TSharedDescriptor>& Socket() const { + return Record->Socket; + } + }; + + class TPollerActor: public TActorBootstrapped<TPollerActor> { + // poller thread + std::shared_ptr<TPollerThread> PollerThread; + + public: + static constexpr IActor::EActivityType ActorActivityType() { + return IActor::INTERCONNECT_POLLER; + } + + void Bootstrap() { + PollerThread = std::make_shared<TPollerThread>(TlsActivationContext->ExecutorThread.ActorSystem); + Become(&TPollerActor::StateFunc); + } + + STRICT_STFUNC(StateFunc, + hFunc(TEvPollerRegister, Handle); + cFunc(TEvents::TSystem::Poison, PassAway); + ) + + void Handle(TEvPollerRegister::TPtr& ev) { + auto *msg = ev->Get(); + auto impl = std::make_unique<TPollerToken::TImpl>(PollerThread, MakeIntrusive<TSocketRecord>(*msg)); + auto socket = impl->Socket(); + TPollerToken::TPtr token(new TPollerToken(std::move(impl))); + if (msg->ReadActorId && msg->WriteActorId && msg->WriteActorId != msg->ReadActorId) { + Send(msg->ReadActorId, new TEvPollerRegisterResult(socket, token)); + Send(msg->WriteActorId, new TEvPollerRegisterResult(socket, std::move(token))); + } else if (msg->ReadActorId) { + Send(msg->ReadActorId, new TEvPollerRegisterResult(socket, std::move(token))); + } else if (msg->WriteActorId) { + Send(msg->WriteActorId, new TEvPollerRegisterResult(socket, std::move(token))); + } + } + }; + + TPollerToken::TPollerToken(std::unique_ptr<TImpl> impl) + : Impl(std::move(impl)) + {} + + TPollerToken::~TPollerToken() + {} + + void TPollerToken::Request(bool read, bool write) { + Impl->Request(read, write); + } + + IActor* CreatePollerActor() { + return new TPollerActor; + } + +} diff --git a/library/cpp/actors/interconnect/poller_actor.h b/library/cpp/actors/interconnect/poller_actor.h new file mode 100644 index 0000000000..f927b82089 --- /dev/null +++ b/library/cpp/actors/interconnect/poller_actor.h @@ -0,0 +1,63 @@ +#pragma once + +#include "events_local.h" +#include "poller.h" +#include <library/cpp/actors/core/actor.h> + +namespace NActors { + struct TEvPollerRegister : TEventLocal<TEvPollerRegister, ui32(ENetwork::EvPollerRegister)> { + const TIntrusivePtr<TSharedDescriptor> Socket; // socket to watch for + const TActorId ReadActorId; // actor id to notify about read availability + const TActorId WriteActorId; // actor id to notify about write availability; may be the same as the ReadActorId + + TEvPollerRegister(TIntrusivePtr<TSharedDescriptor> socket, const TActorId& readActorId, const TActorId& writeActorId) + : Socket(std::move(socket)) + , ReadActorId(readActorId) + , WriteActorId(writeActorId) + {} + }; + + // poller token is sent in response to TEvPollerRegister; it allows requesting poll when read/write returns EAGAIN + class TPollerToken : public TThrRefBase { + class TImpl; + std::unique_ptr<TImpl> Impl; + + friend class TPollerActor; + TPollerToken(std::unique_ptr<TImpl> impl); + + public: + ~TPollerToken(); + void Request(bool read, bool write); + + using TPtr = TIntrusivePtr<TPollerToken>; + }; + + struct TEvPollerRegisterResult : TEventLocal<TEvPollerRegisterResult, ui32(ENetwork::EvPollerRegisterResult)> { + TIntrusivePtr<TSharedDescriptor> Socket; + TPollerToken::TPtr PollerToken; + + TEvPollerRegisterResult(TIntrusivePtr<TSharedDescriptor> socket, TPollerToken::TPtr pollerToken) + : Socket(std::move(socket)) + , PollerToken(std::move(pollerToken)) + {} + }; + + struct TEvPollerReady : TEventLocal<TEvPollerReady, ui32(ENetwork::EvPollerReady)> { + TIntrusivePtr<TSharedDescriptor> Socket; + const bool Read, Write; + + TEvPollerReady(TIntrusivePtr<TSharedDescriptor> socket, bool read, bool write) + : Socket(std::move(socket)) + , Read(read) + , Write(write) + {} + }; + + IActor* CreatePollerActor(); + + inline TActorId MakePollerActorId() { + char x[12] = {'I', 'C', 'P', 'o', 'l', 'l', 'e', 'r', '\xDE', '\xAD', '\xBE', '\xEF'}; + return TActorId(0, TStringBuf(std::begin(x), std::end(x))); + } + +} diff --git a/library/cpp/actors/interconnect/poller_actor_darwin.h b/library/cpp/actors/interconnect/poller_actor_darwin.h new file mode 100644 index 0000000000..4cb0a58f8d --- /dev/null +++ b/library/cpp/actors/interconnect/poller_actor_darwin.h @@ -0,0 +1,95 @@ +#pragma once + +#include <sys/event.h> + +namespace NActors { + + class TKqueueThread : public TPollerThreadBase<TKqueueThread> { + // KQueue file descriptor + int KqDescriptor; + + void SafeKevent(const struct kevent* ev, int size) { + int rc; + do { + rc = kevent(KqDescriptor, ev, size, nullptr, 0, nullptr); + } while (rc == -1 && errno == EINTR); + Y_VERIFY(rc != -1, "kevent() failed with %s", strerror(errno)); + } + + public: + TKqueueThread(TActorSystem *actorSystem) + : TPollerThreadBase(actorSystem) + { + // create kqueue + KqDescriptor = kqueue(); + Y_VERIFY(KqDescriptor != -1, "kqueue() failed with %s", strerror(errno)); + + // set close-on-exit flag + { + int flags = fcntl(KqDescriptor, F_GETFD); + Y_VERIFY(flags >= 0, "fcntl(F_GETFD) failed with %s", strerror(errno)); + int rc = fcntl(KqDescriptor, F_SETFD, flags | FD_CLOEXEC); + Y_VERIFY(rc != -1, "fcntl(F_SETFD, +FD_CLOEXEC) failed with %s", strerror(errno)); + } + + // register pipe's read end in poller + struct kevent ev; + EV_SET(&ev, (int)ReadEnd, EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, nullptr); + SafeKevent(&ev, 1); + + ISimpleThread::Start(); // start poller thread + } + + ~TKqueueThread() { + Stop(); + close(KqDescriptor); + } + + void ProcessEventsInLoop() { + std::array<struct kevent, 256> events; + + int numReady = kevent(KqDescriptor, nullptr, 0, events.data(), events.size(), nullptr); + if (numReady == -1) { + if (errno == EINTR) { + return; + } else { + Y_FAIL("kevent() failed with %s", strerror(errno)); + } + } + + for (int i = 0; i < numReady; ++i) { + const struct kevent& ev = events[i]; + if (ev.udata) { + TSocketRecord *it = static_cast<TSocketRecord*>(ev.udata); + const bool error = ev.flags & (EV_EOF | EV_ERROR); + const bool read = error || ev.filter == EVFILT_READ; + const bool write = error || ev.filter == EVFILT_WRITE; + Notify(it, read, write); + } + } + } + + void UnregisterSocketInLoop(const TIntrusivePtr<TSharedDescriptor>& socket) { + struct kevent ev[2]; + const int fd = socket->GetDescriptor(); + EV_SET(&ev[0], fd, EVFILT_READ, EV_DELETE, 0, 0, nullptr); + EV_SET(&ev[1], fd, EVFILT_WRITE, EV_DELETE, 0, 0, nullptr); + SafeKevent(ev, 2); + } + + void RegisterSocket(const TIntrusivePtr<TSocketRecord>& record) { + int flags = EV_ADD | EV_CLEAR | EV_ENABLE; + struct kevent ev[2]; + const int fd = record->Socket->GetDescriptor(); + EV_SET(&ev[0], fd, EVFILT_READ, flags, 0, 0, record.Get()); + EV_SET(&ev[1], fd, EVFILT_WRITE, flags, 0, 0, record.Get()); + SafeKevent(ev, 2); + } + + void Request(const TIntrusivePtr<TSocketRecord>& /*socket*/, bool /*read*/, bool /*write*/) + {} // no special processing here as we use kqueue in edge-triggered mode + }; + + using TPollerThread = TKqueueThread; + +} diff --git a/library/cpp/actors/interconnect/poller_actor_linux.h b/library/cpp/actors/interconnect/poller_actor_linux.h new file mode 100644 index 0000000000..dd4f7c0124 --- /dev/null +++ b/library/cpp/actors/interconnect/poller_actor_linux.h @@ -0,0 +1,114 @@ +#pragma once + +#include <sys/epoll.h> + +namespace NActors { + + class TEpollThread : public TPollerThreadBase<TEpollThread> { + // epoll file descriptor + int EpollDescriptor; + + public: + TEpollThread(TActorSystem *actorSystem) + : TPollerThreadBase(actorSystem) + { + EpollDescriptor = epoll_create1(EPOLL_CLOEXEC); + Y_VERIFY(EpollDescriptor != -1, "epoll_create1() failed with %s", strerror(errno)); + + epoll_event event; + event.data.ptr = nullptr; + event.events = EPOLLIN; + if (epoll_ctl(EpollDescriptor, EPOLL_CTL_ADD, ReadEnd, &event) == -1) { + Y_FAIL("epoll_ctl(EPOLL_CTL_ADD) failed with %s", strerror(errno)); + } + + ISimpleThread::Start(); // start poller thread + } + + ~TEpollThread() { + Stop(); + close(EpollDescriptor); + } + + void ProcessEventsInLoop() { + // preallocated array for events + std::array<epoll_event, 256> events; + + // wait indefinitely for event to arrive + LWPROBE(EpollStartWaitIn); + int numReady = epoll_wait(EpollDescriptor, events.data(), events.size(), -1); + LWPROBE(EpollFinishWaitIn, numReady); + + // check return status for any errors + if (numReady == -1) { + if (errno == EINTR) { + return; // restart the call a bit later + } else { + Y_FAIL("epoll_wait() failed with %s", strerror(errno)); + } + } + + for (int i = 0; i < numReady; ++i) { + const epoll_event& ev = events[i]; + if (auto *record = static_cast<TSocketRecord*>(ev.data.ptr)) { + const bool read = ev.events & (EPOLLIN | EPOLLHUP | EPOLLRDHUP | EPOLLERR); + const bool write = ev.events & (EPOLLOUT | EPOLLERR); + + // remove hit flags from the bit set + ui32 flags = record->Flags; + const ui32 remove = (read ? EPOLLIN : 0) | (write ? EPOLLOUT : 0); + while (!record->Flags.compare_exchange_weak(flags, flags & ~remove)) + {} + flags &= ~remove; + + // rearm poller if some flags remain + if (flags) { + epoll_event event; + event.events = EPOLLONESHOT | EPOLLRDHUP | flags; + event.data.ptr = record; + if (epoll_ctl(EpollDescriptor, EPOLL_CTL_MOD, record->Socket->GetDescriptor(), &event) == -1) { + Y_FAIL("epoll_ctl(EPOLL_CTL_MOD) failed with %s", strerror(errno)); + } + } + + // issue notifications + Notify(record, read, write); + } + } + } + + void UnregisterSocketInLoop(const TIntrusivePtr<TSharedDescriptor>& socket) { + if (epoll_ctl(EpollDescriptor, EPOLL_CTL_DEL, socket->GetDescriptor(), nullptr) == -1) { + Y_FAIL("epoll_ctl(EPOLL_CTL_DEL) failed with %s", strerror(errno)); + } + } + + void RegisterSocket(const TIntrusivePtr<TSocketRecord>& record) { + epoll_event event; + event.events = EPOLLONESHOT | EPOLLRDHUP; + event.data.ptr = record.Get(); + if (epoll_ctl(EpollDescriptor, EPOLL_CTL_ADD, record->Socket->GetDescriptor(), &event) == -1) { + Y_FAIL("epoll_ctl(EPOLL_CTL_ADD) failed with %s", strerror(errno)); + } + } + + void Request(const TIntrusivePtr<TSocketRecord>& record, bool read, bool write) { + const ui32 add = (read ? EPOLLIN : 0) | (write ? EPOLLOUT : 0); + ui32 flags = record->Flags; + while (!record->Flags.compare_exchange_weak(flags, flags | add)) + {} + flags |= add; + if (flags) { + epoll_event event; + event.events = EPOLLONESHOT | EPOLLRDHUP | flags; + event.data.ptr = record.Get(); + if (epoll_ctl(EpollDescriptor, EPOLL_CTL_MOD, record->Socket->GetDescriptor(), &event) == -1) { + Y_FAIL("epoll_ctl(EPOLL_CTL_MOD) failed with %s", strerror(errno)); + } + } + } + }; + + using TPollerThread = TEpollThread; + +} // namespace NActors diff --git a/library/cpp/actors/interconnect/poller_actor_win.h b/library/cpp/actors/interconnect/poller_actor_win.h new file mode 100644 index 0000000000..4b4caa0ebd --- /dev/null +++ b/library/cpp/actors/interconnect/poller_actor_win.h @@ -0,0 +1,103 @@ +#pragma once + +namespace NActors { + + class TSelectThread : public TPollerThreadBase<TSelectThread> { + TMutex Mutex; + std::unordered_map<SOCKET, TIntrusivePtr<TSocketRecord>> Descriptors; + + enum { + READ = 1, + WRITE = 2, + }; + + public: + TSelectThread(TActorSystem *actorSystem) + : TPollerThreadBase(actorSystem) + { + Descriptors.emplace(ReadEnd, nullptr); + ISimpleThread::Start(); + } + + ~TSelectThread() { + Stop(); + } + + void ProcessEventsInLoop() { + fd_set readfds, writefds, exceptfds; + + FD_ZERO(&readfds); + FD_ZERO(&writefds); + FD_ZERO(&exceptfds); + int nfds = 0; + with_lock (Mutex) { + for (const auto& [key, record] : Descriptors) { + const int fd = key; + auto add = [&](auto& set) { + FD_SET(fd, &set); + nfds = Max<int>(nfds, fd + 1); + }; + if (!record || (record->Flags & READ)) { + add(readfds); + } + if (!record || (record->Flags & WRITE)) { + add(writefds); + } + add(exceptfds); + } + } + + int res = select(nfds, &readfds, &writefds, &exceptfds, nullptr); + if (res == -1) { + const int err = LastSocketError(); + if (err == EINTR) { + return; // try a bit later + } else { + Y_FAIL("select() failed with %s", strerror(err)); + } + } + + with_lock (Mutex) { + for (const auto& [fd, record] : Descriptors) { + if (record) { + const bool error = FD_ISSET(fd, &exceptfds); + const bool read = error || FD_ISSET(fd, &readfds); + const bool write = error || FD_ISSET(fd, &writefds); + if (read) { + record->Flags &= ~READ; + } + if (write) { + record->Flags &= ~WRITE; + } + Notify(record.Get(), read, write); + } + } + } + } + + void UnregisterSocketInLoop(const TIntrusivePtr<TSharedDescriptor>& socket) { + with_lock (Mutex) { + Descriptors.erase(socket->GetDescriptor()); + } + } + + void RegisterSocket(const TIntrusivePtr<TSocketRecord>& record) { + with_lock (Mutex) { + Descriptors.emplace(record->Socket->GetDescriptor(), record); + } + ExecuteSyncOperation(TPollerWakeup()); + } + + void Request(const TIntrusivePtr<TSocketRecord>& record, bool read, bool write) { + with_lock (Mutex) { + const auto it = Descriptors.find(record->Socket->GetDescriptor()); + Y_VERIFY(it != Descriptors.end()); + it->second->Flags |= (read ? READ : 0) | (write ? WRITE : 0); + } + ExecuteSyncOperation(TPollerWakeup()); + } + }; + + using TPollerThread = TSelectThread; + +} // NActors diff --git a/library/cpp/actors/interconnect/poller_tcp.cpp b/library/cpp/actors/interconnect/poller_tcp.cpp new file mode 100644 index 0000000000..8267df31ea --- /dev/null +++ b/library/cpp/actors/interconnect/poller_tcp.cpp @@ -0,0 +1,35 @@ +#include "poller_tcp.h" + +namespace NInterconnect { + TPollerThreads::TPollerThreads(size_t units, bool useSelect) + : Units(units) + { + Y_VERIFY_DEBUG(!Units.empty()); + for (auto& unit : Units) + unit = TPollerUnit::Make(useSelect); + } + + TPollerThreads::~TPollerThreads() { + } + + void TPollerThreads::Start() { + for (const auto& unit : Units) + unit->Start(); + } + + void TPollerThreads::Stop() { + for (const auto& unit : Units) + unit->Stop(); + } + + void TPollerThreads::StartRead(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) { + auto& unit = Units[THash<SOCKET>()(s->GetDescriptor()) % Units.size()]; + unit->StartReadOperation(s, std::move(operation)); + } + + void TPollerThreads::StartWrite(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) { + auto& unit = Units[THash<SOCKET>()(s->GetDescriptor()) % Units.size()]; + unit->StartWriteOperation(s, std::move(operation)); + } + +} diff --git a/library/cpp/actors/interconnect/poller_tcp.h b/library/cpp/actors/interconnect/poller_tcp.h new file mode 100644 index 0000000000..310265eccd --- /dev/null +++ b/library/cpp/actors/interconnect/poller_tcp.h @@ -0,0 +1,25 @@ +#pragma once + +#include "poller_tcp_unit.h" +#include "poller.h" + +#include <util/generic/vector.h> +#include <util/generic/hash.h> + +namespace NInterconnect { + class TPollerThreads: public NActors::IPoller { + public: + TPollerThreads(size_t units = 1U, bool useSelect = false); + ~TPollerThreads(); + + void Start(); + void Stop(); + + void StartRead(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) override; + void StartWrite(const TIntrusivePtr<TSharedDescriptor>& s, TFDDelegate&& operation) override; + + private: + TVector<TPollerUnit::TPtr> Units; + }; + +} diff --git a/library/cpp/actors/interconnect/poller_tcp_unit.cpp b/library/cpp/actors/interconnect/poller_tcp_unit.cpp new file mode 100644 index 0000000000..59e7dda810 --- /dev/null +++ b/library/cpp/actors/interconnect/poller_tcp_unit.cpp @@ -0,0 +1,126 @@ +#include "poller_tcp_unit.h" + +#if !defined(_win_) && !defined(_darwin_) +#include "poller_tcp_unit_epoll.h" +#endif + +#include "poller_tcp_unit_select.h" +#include "poller.h" + +#include <library/cpp/actors/prof/tag.h> +#include <library/cpp/actors/util/intrinsics.h> + +#if defined _linux_ +#include <pthread.h> +#endif + +namespace NInterconnect { + TPollerUnit::TPtr + TPollerUnit::Make(bool useSelect) { +#if defined(_win_) || defined(_darwin_) + Y_UNUSED(useSelect); + return TPtr(new TPollerUnitSelect); +#else + return useSelect ? TPtr(new TPollerUnitSelect) : TPtr(new TPollerUnitEpoll); +#endif + } + + TPollerUnit::TPollerUnit() + : StopFlag(true) + , ReadLoop(TThread::TParams(IdleThread<false>, this).SetName("network read")) + , WriteLoop(TThread::TParams(IdleThread<true>, this).SetName("network write")) + { + } + + TPollerUnit::~TPollerUnit() { + if (!AtomicLoad(&StopFlag)) + Stop(); + } + + void + TPollerUnit::Start() { + AtomicStore(&StopFlag, false); + ReadLoop.Start(); + WriteLoop.Start(); + } + + void + TPollerUnit::Stop() { + AtomicStore(&StopFlag, true); + ReadLoop.Join(); + WriteLoop.Join(); + } + + template <> + TPollerUnit::TSide& + TPollerUnit::GetSide<false>() { + return Read; + } + + template <> + TPollerUnit::TSide& + TPollerUnit::GetSide<true>() { + return Write; + } + + void + TPollerUnit::StartReadOperation( + const TIntrusivePtr<TSharedDescriptor>& stream, + TFDDelegate&& operation) { + Y_VERIFY_DEBUG(stream); + if (AtomicLoad(&StopFlag)) + return; + GetSide<false>().InputQueue.Push(TSide::TItem(stream, std::move(operation))); + } + + void + TPollerUnit::StartWriteOperation( + const TIntrusivePtr<TSharedDescriptor>& stream, + TFDDelegate&& operation) { + Y_VERIFY_DEBUG(stream); + if (AtomicLoad(&StopFlag)) + return; + GetSide<true>().InputQueue.Push(TSide::TItem(stream, std::move(operation))); + } + + template <bool IsWrite> + void* + TPollerUnit::IdleThread(void* param) { + // TODO: musl-libc version of `sched_param` struct is for some reason different from pthread + // version in Ubuntu 12.04 +#if defined(_linux_) && !defined(_musl_) + pthread_t threadSelf = pthread_self(); + sched_param sparam = {20}; + pthread_setschedparam(threadSelf, SCHED_FIFO, &sparam); +#endif + + static_cast<TPollerUnit*>(param)->RunLoop<IsWrite>(); + return nullptr; + } + + template <> + void + TPollerUnit::RunLoop<false>() { + NProfiling::TMemoryTagScope tag("INTERCONNECT_RECEIVED_DATA"); + while (!AtomicLoad(&StopFlag)) + ProcessRead(); + } + + template <> + void + TPollerUnit::RunLoop<true>() { + NProfiling::TMemoryTagScope tag("INTERCONNECT_SEND_DATA"); + while (!AtomicLoad(&StopFlag)) + ProcessWrite(); + } + + void + TPollerUnit::TSide::ProcessInput() { + if (!InputQueue.IsEmpty()) + do { + auto sock = InputQueue.Top().first->GetDescriptor(); + if (!Operations.emplace(sock, std::move(InputQueue.Top())).second) + Y_FAIL("Descriptor is already in pooler."); + } while (InputQueue.Pop()); + } +} diff --git a/library/cpp/actors/interconnect/poller_tcp_unit.h b/library/cpp/actors/interconnect/poller_tcp_unit.h new file mode 100644 index 0000000000..692168b968 --- /dev/null +++ b/library/cpp/actors/interconnect/poller_tcp_unit.h @@ -0,0 +1,67 @@ +#pragma once + +#include <util/system/thread.h> +#include <library/cpp/actors/util/funnel_queue.h> + +#include "interconnect_stream.h" + +#include <memory> +#include <functional> +#include <unordered_map> + +namespace NInterconnect { + using NActors::TFDDelegate; + using NActors::TSharedDescriptor; + + class TPollerUnit { + public: + typedef std::unique_ptr<TPollerUnit> TPtr; + + static TPtr Make(bool useSelect); + + void Start(); + void Stop(); + + virtual void StartReadOperation( + const TIntrusivePtr<TSharedDescriptor>& stream, + TFDDelegate&& operation); + + virtual void StartWriteOperation( + const TIntrusivePtr<TSharedDescriptor>& stream, + TFDDelegate&& operation); + + virtual ~TPollerUnit(); + + private: + virtual void ProcessRead() = 0; + virtual void ProcessWrite() = 0; + + template <bool IsWrite> + static void* IdleThread(void* param); + + template <bool IsWrite> + void RunLoop(); + + volatile bool StopFlag; + TThread ReadLoop, WriteLoop; + + protected: + TPollerUnit(); + + struct TSide { + using TOperations = + std::unordered_map<SOCKET, + std::pair<TIntrusivePtr<TSharedDescriptor>, TFDDelegate>>; + + TOperations Operations; + using TItem = TOperations::mapped_type; + TFunnelQueue<TItem> InputQueue; + + void ProcessInput(); + } Read, Write; + + template <bool IsWrite> + TSide& GetSide(); + }; + +} diff --git a/library/cpp/actors/interconnect/poller_tcp_unit_epoll.cpp b/library/cpp/actors/interconnect/poller_tcp_unit_epoll.cpp new file mode 100644 index 0000000000..c78538b95b --- /dev/null +++ b/library/cpp/actors/interconnect/poller_tcp_unit_epoll.cpp @@ -0,0 +1,125 @@ +#include "poller_tcp_unit_epoll.h" +#if !defined(_win_) && !defined(_darwin_) +#include <unistd.h> +#include <sys/epoll.h> + +#include <csignal> +#include <cerrno> +#include <cstring> + +namespace NInterconnect { + namespace { + void + DeleteEpoll(int epoll, SOCKET stream) { + ::epoll_event event = {0, {.fd = stream}}; + if (::epoll_ctl(epoll, EPOLL_CTL_DEL, stream, &event)) { + Cerr << "epoll_ctl errno: " << errno << Endl; + Y_FAIL("epoll delete error!"); + } + } + + template <ui32 Events> + void + AddEpoll(int epoll, SOCKET stream) { + ::epoll_event event = {.events = Events}; + event.data.fd = stream; + if (::epoll_ctl(epoll, EPOLL_CTL_ADD, stream, &event)) { + Cerr << "epoll_ctl errno: " << errno << Endl; + Y_FAIL("epoll add error!"); + } + } + + int + Initialize() { + const auto epoll = ::epoll_create(10000); + Y_VERIFY_DEBUG(epoll > 0); + return epoll; + } + + } + + TPollerUnitEpoll::TPollerUnitEpoll() + : ReadDescriptor(Initialize()) + , WriteDescriptor(Initialize()) + { + // Block on the epoll descriptor. + ::sigemptyset(&sigmask); + ::sigaddset(&sigmask, SIGPIPE); + ::sigaddset(&sigmask, SIGTERM); + } + + TPollerUnitEpoll::~TPollerUnitEpoll() { + ::close(ReadDescriptor); + ::close(WriteDescriptor); + } + + template <> + int TPollerUnitEpoll::GetDescriptor<false>() const { + return ReadDescriptor; + } + + template <> + int TPollerUnitEpoll::GetDescriptor<true>() const { + return WriteDescriptor; + } + + void + TPollerUnitEpoll::StartReadOperation( + const TIntrusivePtr<TSharedDescriptor>& s, + TFDDelegate&& operation) { + TPollerUnit::StartReadOperation(s, std::move(operation)); + AddEpoll<EPOLLRDHUP | EPOLLIN>(ReadDescriptor, s->GetDescriptor()); + } + + void + TPollerUnitEpoll::StartWriteOperation( + const TIntrusivePtr<TSharedDescriptor>& s, + TFDDelegate&& operation) { + TPollerUnit::StartWriteOperation(s, std::move(operation)); + AddEpoll<EPOLLRDHUP | EPOLLOUT>(WriteDescriptor, s->GetDescriptor()); + } + + constexpr int EVENTS_BUF_SIZE = 128; + + template <bool WriteOp> + void + TPollerUnitEpoll::Process() { + ::epoll_event events[EVENTS_BUF_SIZE]; + + const int epoll = GetDescriptor<WriteOp>(); + + /* Timeout just to check StopFlag sometimes */ + const int result = + ::epoll_pwait(epoll, events, EVENTS_BUF_SIZE, 200, &sigmask); + + if (result == -1 && errno != EINTR) + Y_FAIL("epoll wait error!"); + + auto& side = GetSide<WriteOp>(); + side.ProcessInput(); + + for (int i = 0; i < result; ++i) { + const auto it = side.Operations.find(events[i].data.fd); + if (side.Operations.end() == it) + continue; + if (const auto& finalizer = it->second.second(it->second.first)) { + DeleteEpoll(epoll, it->first); + side.Operations.erase(it); + finalizer(); + } + } + } + + void + TPollerUnitEpoll::ProcessRead() { + Process<false>(); + } + + void + TPollerUnitEpoll::ProcessWrite() { + Process<true>(); + } + +} + +#endif diff --git a/library/cpp/actors/interconnect/poller_tcp_unit_epoll.h b/library/cpp/actors/interconnect/poller_tcp_unit_epoll.h new file mode 100644 index 0000000000..ff7893eba2 --- /dev/null +++ b/library/cpp/actors/interconnect/poller_tcp_unit_epoll.h @@ -0,0 +1,33 @@ +#pragma once + +#include "poller_tcp_unit.h" + +namespace NInterconnect { + class TPollerUnitEpoll: public TPollerUnit { + public: + TPollerUnitEpoll(); + virtual ~TPollerUnitEpoll(); + + private: + virtual void StartReadOperation( + const TIntrusivePtr<TSharedDescriptor>& s, + TFDDelegate&& operation) override; + + virtual void StartWriteOperation( + const TIntrusivePtr<TSharedDescriptor>& s, + TFDDelegate&& operation) override; + + virtual void ProcessRead() override; + virtual void ProcessWrite() override; + + template <bool Write> + void Process(); + + template <bool Write> + int GetDescriptor() const; + + const int ReadDescriptor, WriteDescriptor; + ::sigset_t sigmask; + }; + +} diff --git a/library/cpp/actors/interconnect/poller_tcp_unit_select.cpp b/library/cpp/actors/interconnect/poller_tcp_unit_select.cpp new file mode 100644 index 0000000000..ae7aaad566 --- /dev/null +++ b/library/cpp/actors/interconnect/poller_tcp_unit_select.cpp @@ -0,0 +1,86 @@ +#include "poller_tcp_unit_select.h" + +#include <csignal> + +#if defined(_win_) +#include <winsock2.h> +#define SOCKET_ERROR_SOURCE ::WSAGetLastError() +#elif defined(_darwin_) +#include <cerrno> +#define SOCKET_ERROR_SOURCE errno +typedef timeval TIMEVAL; +#else +#include <cerrno> +#define SOCKET_ERROR_SOURCE errno +#endif + +namespace NInterconnect { + TPollerUnitSelect::TPollerUnitSelect() { + } + + TPollerUnitSelect::~TPollerUnitSelect() { + } + + template <bool IsWrite> + void + TPollerUnitSelect::Process() { + auto& side = GetSide<IsWrite>(); + side.ProcessInput(); + + enum : size_t { R, + W, + E }; + static const auto O = IsWrite ? W : R; + + ::fd_set sets[3]; + + FD_ZERO(&sets[R]); + FD_ZERO(&sets[W]); + FD_ZERO(&sets[E]); + + for (const auto& operation : side.Operations) { + FD_SET(operation.first, &sets[O]); + FD_SET(operation.first, &sets[E]); + } + +#if defined(_win_) + ::TIMEVAL timeout = {0L, 99991L}; + const auto numberEvents = !side.Operations.empty() ? ::select(FD_SETSIZE, &sets[R], &sets[W], &sets[E], &timeout) + : (::Sleep(100), 0); +#elif defined(_darwin_) + ::TIMEVAL timeout = {0L, 99991L}; + const auto numberEvents = ::select(FD_SETSIZE, &sets[R], &sets[W], &sets[E], &timeout); +#else + ::sigset_t sigmask; + ::sigemptyset(&sigmask); + ::sigaddset(&sigmask, SIGPIPE); + ::sigaddset(&sigmask, SIGTERM); + + struct ::timespec timeout = {0L, 99999989L}; + const auto numberEvents = ::pselect(FD_SETSIZE, &sets[R], &sets[W], &sets[E], &timeout, &sigmask); +#endif + + Y_VERIFY_DEBUG(numberEvents >= 0); + + for (auto it = side.Operations.cbegin(); side.Operations.cend() != it;) { + if (FD_ISSET(it->first, &sets[O]) || FD_ISSET(it->first, &sets[E])) + if (const auto& finalizer = it->second.second(it->second.first)) { + side.Operations.erase(it++); + finalizer(); + continue; + } + ++it; + } + } + + void + TPollerUnitSelect::ProcessRead() { + Process<false>(); + } + + void + TPollerUnitSelect::ProcessWrite() { + Process<true>(); + } + +} diff --git a/library/cpp/actors/interconnect/poller_tcp_unit_select.h b/library/cpp/actors/interconnect/poller_tcp_unit_select.h new file mode 100644 index 0000000000..0c15217796 --- /dev/null +++ b/library/cpp/actors/interconnect/poller_tcp_unit_select.h @@ -0,0 +1,19 @@ +#pragma once + +#include "poller_tcp_unit.h" + +namespace NInterconnect { + class TPollerUnitSelect: public TPollerUnit { + public: + TPollerUnitSelect(); + virtual ~TPollerUnitSelect(); + + private: + virtual void ProcessRead() override; + virtual void ProcessWrite() override; + + template <bool IsWrite> + void Process(); + }; + +} diff --git a/library/cpp/actors/interconnect/profiler.h b/library/cpp/actors/interconnect/profiler.h new file mode 100644 index 0000000000..77a59e3179 --- /dev/null +++ b/library/cpp/actors/interconnect/profiler.h @@ -0,0 +1,142 @@ +#pragma once + +#include <library/cpp/actors/util/datetime.h> + +namespace NActors { + + class TProfiled { + enum class EType : ui32 { + ENTRY, + EXIT, + }; + + struct TItem { + EType Type; // entry kind + int Line; + const char *Marker; // name of the profiled function/part + ui64 Timestamp; // cycles + }; + + bool Enable = false; + mutable TDeque<TItem> Items; + + friend class TFunction; + + public: + class TFunction { + const TProfiled& Profiled; + + public: + TFunction(const TProfiled& profiled, const char *name, int line) + : Profiled(profiled) + { + Log(EType::ENTRY, name, line); + } + + ~TFunction() { + Log(EType::EXIT, nullptr, 0); + } + + private: + void Log(EType type, const char *marker, int line) { + if (Profiled.Enable) { + Profiled.Items.push_back(TItem{ + type, + line, + marker, + GetCycleCountFast() + }); + } + } + }; + + public: + void Start() { + Enable = true; + } + + void Finish() { + Items.clear(); + Enable = false; + } + + TDuration Duration() const { + return CyclesToDuration(Items ? Items.back().Timestamp - Items.front().Timestamp : 0); + } + + TString Format() const { + TDeque<TItem>::iterator it = Items.begin(); + TString res = FormatLevel(it); + Y_VERIFY(it == Items.end()); + return res; + } + + private: + TString FormatLevel(TDeque<TItem>::iterator& it) const { + struct TRecord { + TString Marker; + ui64 Duration; + TString Interior; + + bool operator <(const TRecord& other) const { + return Duration < other.Duration; + } + }; + TVector<TRecord> records; + + while (it != Items.end() && it->Type != EType::EXIT) { + Y_VERIFY(it->Type == EType::ENTRY); + const TString marker = Sprintf("%s:%d", it->Marker, it->Line); + const ui64 begin = it->Timestamp; + ++it; + const TString interior = FormatLevel(it); + Y_VERIFY(it != Items.end()); + Y_VERIFY(it->Type == EType::EXIT); + const ui64 end = it->Timestamp; + records.push_back(TRecord{marker, end - begin, interior}); + ++it; + } + + TStringStream s; + const ui64 cyclesPerMs = GetCyclesPerMillisecond(); + + if (records.size() <= 10) { + bool first = true; + for (const TRecord& record : records) { + if (first) { + first = false; + } else { + s << " "; + } + s << record.Marker << "(" << (record.Duration * 1000000 / cyclesPerMs) << "ns)"; + if (record.Interior) { + s << " {" << record.Interior << "}"; + } + } + } else { + TMap<TString, TVector<TRecord>> m; + for (TRecord& r : records) { + const TString key = r.Marker; + m[key].push_back(std::move(r)); + } + + s << "unordered "; + for (auto& [key, value] : m) { + auto i = std::max_element(value.begin(), value.end()); + ui64 sum = 0; + for (const auto& item : value) { + sum += item.Duration; + } + sum = sum * 1000000 / cyclesPerMs; + s << key << " num# " << value.size() << " sum# " << sum << "ns max# " << (i->Duration * 1000000 / cyclesPerMs) << "ns"; + if (i->Interior) { + s << " {" << i->Interior << "}"; + } + } + } + + return s.Str(); + } + }; + +} // NActors diff --git a/library/cpp/actors/interconnect/slowpoke_actor.h b/library/cpp/actors/interconnect/slowpoke_actor.h new file mode 100644 index 0000000000..4b02e5da48 --- /dev/null +++ b/library/cpp/actors/interconnect/slowpoke_actor.h @@ -0,0 +1,47 @@ +#pragma once + +#include <library/cpp/actors/core/actor_bootstrapped.h> + +namespace NActors { + + class TSlowpokeActor : public TActorBootstrapped<TSlowpokeActor> { + const TDuration Duration; + const TDuration SleepMin; + const TDuration SleepMax; + const TDuration RescheduleMin; + const TDuration RescheduleMax; + + public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::INTERCONNECT_COMMON; + } + + TSlowpokeActor(TDuration duration, TDuration sleepMin, TDuration sleepMax, TDuration rescheduleMin, TDuration rescheduleMax) + : Duration(duration) + , SleepMin(sleepMin) + , SleepMax(sleepMax) + , RescheduleMin(rescheduleMin) + , RescheduleMax(rescheduleMax) + {} + + void Bootstrap(const TActorContext& ctx) { + Become(&TThis::StateFunc, ctx, Duration, new TEvents::TEvPoisonPill); + HandleWakeup(ctx); + } + + void HandleWakeup(const TActorContext& ctx) { + Sleep(RandomDuration(SleepMin, SleepMax)); + ctx.Schedule(RandomDuration(RescheduleMin, RescheduleMax), new TEvents::TEvWakeup); + } + + static TDuration RandomDuration(TDuration min, TDuration max) { + return min + TDuration::FromValue(RandomNumber<ui64>(max.GetValue() - min.GetValue() + 1)); + } + + STRICT_STFUNC(StateFunc, + CFunc(TEvents::TSystem::PoisonPill, Die) + CFunc(TEvents::TSystem::Wakeup, HandleWakeup) + ) + }; + +} // NActors diff --git a/library/cpp/actors/interconnect/types.cpp b/library/cpp/actors/interconnect/types.cpp new file mode 100644 index 0000000000..979c55f277 --- /dev/null +++ b/library/cpp/actors/interconnect/types.cpp @@ -0,0 +1,564 @@ +#include "types.h" +#include <util/string/printf.h> +#include <util/generic/vector.h> +#include <errno.h> + +namespace NActors { + + TVector<const char*> TDisconnectReason::Reasons = { + "EndOfStream", + "CloseOnIdle", + "LostConnection", + "DeadPeer", + "NewSession", + "HandshakeFailTransient", + "HandshakeFailPermanent", + "UserRequest", + "Debug", + "ChecksumError", + "FormatError", + "EventTooLarge", + "QueueOverload", + "E2BIG", + "EACCES", + "EADDRINUSE", + "EADDRNOTAVAIL", + "EADV", + "EAFNOSUPPORT", + "EAGAIN", + "EALREADY", + "EBADE", + "EBADF", + "EBADFD", + "EBADMSG", + "EBADR", + "EBADRQC", + "EBADSLT", + "EBFONT", + "EBUSY", + "ECANCELED", + "ECHILD", + "ECHRNG", + "ECOMM", + "ECONNABORTED", + "ECONNREFUSED", + "ECONNRESET", + "EDEADLK", + "EDEADLOCK", + "EDESTADDRREQ", + "EDOM", + "EDOTDOT", + "EDQUOT", + "EEXIST", + "EFAULT", + "EFBIG", + "EHOSTDOWN", + "EHOSTUNREACH", + "EHWPOISON", + "EIDRM", + "EILSEQ", + "EINPROGRESS", + "EINTR", + "EINVAL", + "EIO", + "EISCONN", + "EISDIR", + "EISNAM", + "EKEYEXPIRED", + "EKEYREJECTED", + "EKEYREVOKED", + "EL2HLT", + "EL2NSYNC", + "EL3HLT", + "EL3RST", + "ELIBACC", + "ELIBBAD", + "ELIBEXEC", + "ELIBMAX", + "ELIBSCN", + "ELNRNG", + "ELOOP", + "EMEDIUMTYPE", + "EMFILE", + "EMLINK", + "EMSGSIZE", + "EMULTIHOP", + "ENAMETOOLONG", + "ENAVAIL", + "ENETDOWN", + "ENETRESET", + "ENETUNREACH", + "ENFILE", + "ENOANO", + "ENOBUFS", + "ENOCSI", + "ENODATA", + "ENODEV", + "ENOENT", + "ENOEXEC", + "ENOKEY", + "ENOLCK", + "ENOLINK", + "ENOMEDIUM", + "ENOMEM", + "ENOMSG", + "ENONET", + "ENOPKG", + "ENOPROTOOPT", + "ENOSPC", + "ENOSR", + "ENOSTR", + "ENOSYS", + "ENOTBLK", + "ENOTCONN", + "ENOTDIR", + "ENOTEMPTY", + "ENOTNAM", + "ENOTRECOVERABLE", + "ENOTSOCK", + "ENOTTY", + "ENOTUNIQ", + "ENXIO", + "EOPNOTSUPP", + "EOVERFLOW", + "EOWNERDEAD", + "EPERM", + "EPFNOSUPPORT", + "EPIPE", + "EPROTO", + "EPROTONOSUPPORT", + "EPROTOTYPE", + "ERANGE", + "EREMCHG", + "EREMOTE", + "EREMOTEIO", + "ERESTART", + "ERFKILL", + "EROFS", + "ESHUTDOWN", + "ESOCKTNOSUPPORT", + "ESPIPE", + "ESRCH", + "ESRMNT", + "ESTALE", + "ESTRPIPE", + "ETIME", + "ETIMEDOUT", + "ETOOMANYREFS", + "ETXTBSY", + "EUCLEAN", + "EUNATCH", + "EUSERS", + "EWOULDBLOCK", + "EXDEV", + "EXFULL", + }; + + TDisconnectReason TDisconnectReason::FromErrno(int err) { + switch (err) { +#define REASON(ERRNO) case ERRNO: return TDisconnectReason(TString(#ERRNO)) +#if defined(E2BIG) + REASON(E2BIG); +#endif +#if defined(EACCES) + REASON(EACCES); +#endif +#if defined(EADDRINUSE) + REASON(EADDRINUSE); +#endif +#if defined(EADDRNOTAVAIL) + REASON(EADDRNOTAVAIL); +#endif +#if defined(EADV) + REASON(EADV); +#endif +#if defined(EAFNOSUPPORT) + REASON(EAFNOSUPPORT); +#endif +#if defined(EAGAIN) + REASON(EAGAIN); +#endif +#if defined(EALREADY) + REASON(EALREADY); +#endif +#if defined(EBADE) + REASON(EBADE); +#endif +#if defined(EBADF) + REASON(EBADF); +#endif +#if defined(EBADFD) + REASON(EBADFD); +#endif +#if defined(EBADMSG) + REASON(EBADMSG); +#endif +#if defined(EBADR) + REASON(EBADR); +#endif +#if defined(EBADRQC) + REASON(EBADRQC); +#endif +#if defined(EBADSLT) + REASON(EBADSLT); +#endif +#if defined(EBFONT) + REASON(EBFONT); +#endif +#if defined(EBUSY) + REASON(EBUSY); +#endif +#if defined(ECANCELED) + REASON(ECANCELED); +#endif +#if defined(ECHILD) + REASON(ECHILD); +#endif +#if defined(ECHRNG) + REASON(ECHRNG); +#endif +#if defined(ECOMM) + REASON(ECOMM); +#endif +#if defined(ECONNABORTED) + REASON(ECONNABORTED); +#endif +#if defined(ECONNREFUSED) + REASON(ECONNREFUSED); +#endif +#if defined(ECONNRESET) + REASON(ECONNRESET); +#endif +#if defined(EDEADLK) + REASON(EDEADLK); +#endif +#if defined(EDEADLOCK) && (!defined(EDEADLK) || EDEADLOCK != EDEADLK) + REASON(EDEADLOCK); +#endif +#if defined(EDESTADDRREQ) + REASON(EDESTADDRREQ); +#endif +#if defined(EDOM) + REASON(EDOM); +#endif +#if defined(EDOTDOT) + REASON(EDOTDOT); +#endif +#if defined(EDQUOT) + REASON(EDQUOT); +#endif +#if defined(EEXIST) + REASON(EEXIST); +#endif +#if defined(EFAULT) + REASON(EFAULT); +#endif +#if defined(EFBIG) + REASON(EFBIG); +#endif +#if defined(EHOSTDOWN) + REASON(EHOSTDOWN); +#endif +#if defined(EHOSTUNREACH) + REASON(EHOSTUNREACH); +#endif +#if defined(EHWPOISON) + REASON(EHWPOISON); +#endif +#if defined(EIDRM) + REASON(EIDRM); +#endif +#if defined(EILSEQ) + REASON(EILSEQ); +#endif +#if defined(EINPROGRESS) + REASON(EINPROGRESS); +#endif +#if defined(EINTR) + REASON(EINTR); +#endif +#if defined(EINVAL) + REASON(EINVAL); +#endif +#if defined(EIO) + REASON(EIO); +#endif +#if defined(EISCONN) + REASON(EISCONN); +#endif +#if defined(EISDIR) + REASON(EISDIR); +#endif +#if defined(EISNAM) + REASON(EISNAM); +#endif +#if defined(EKEYEXPIRED) + REASON(EKEYEXPIRED); +#endif +#if defined(EKEYREJECTED) + REASON(EKEYREJECTED); +#endif +#if defined(EKEYREVOKED) + REASON(EKEYREVOKED); +#endif +#if defined(EL2HLT) + REASON(EL2HLT); +#endif +#if defined(EL2NSYNC) + REASON(EL2NSYNC); +#endif +#if defined(EL3HLT) + REASON(EL3HLT); +#endif +#if defined(EL3RST) + REASON(EL3RST); +#endif +#if defined(ELIBACC) + REASON(ELIBACC); +#endif +#if defined(ELIBBAD) + REASON(ELIBBAD); +#endif +#if defined(ELIBEXEC) + REASON(ELIBEXEC); +#endif +#if defined(ELIBMAX) + REASON(ELIBMAX); +#endif +#if defined(ELIBSCN) + REASON(ELIBSCN); +#endif +#if defined(ELNRNG) + REASON(ELNRNG); +#endif +#if defined(ELOOP) + REASON(ELOOP); +#endif +#if defined(EMEDIUMTYPE) + REASON(EMEDIUMTYPE); +#endif +#if defined(EMFILE) + REASON(EMFILE); +#endif +#if defined(EMLINK) + REASON(EMLINK); +#endif +#if defined(EMSGSIZE) + REASON(EMSGSIZE); +#endif +#if defined(EMULTIHOP) + REASON(EMULTIHOP); +#endif +#if defined(ENAMETOOLONG) + REASON(ENAMETOOLONG); +#endif +#if defined(ENAVAIL) + REASON(ENAVAIL); +#endif +#if defined(ENETDOWN) + REASON(ENETDOWN); +#endif +#if defined(ENETRESET) + REASON(ENETRESET); +#endif +#if defined(ENETUNREACH) + REASON(ENETUNREACH); +#endif +#if defined(ENFILE) + REASON(ENFILE); +#endif +#if defined(ENOANO) + REASON(ENOANO); +#endif +#if defined(ENOBUFS) + REASON(ENOBUFS); +#endif +#if defined(ENOCSI) + REASON(ENOCSI); +#endif +#if defined(ENODATA) + REASON(ENODATA); +#endif +#if defined(ENODEV) + REASON(ENODEV); +#endif +#if defined(ENOENT) + REASON(ENOENT); +#endif +#if defined(ENOEXEC) + REASON(ENOEXEC); +#endif +#if defined(ENOKEY) + REASON(ENOKEY); +#endif +#if defined(ENOLCK) + REASON(ENOLCK); +#endif +#if defined(ENOLINK) + REASON(ENOLINK); +#endif +#if defined(ENOMEDIUM) + REASON(ENOMEDIUM); +#endif +#if defined(ENOMEM) + REASON(ENOMEM); +#endif +#if defined(ENOMSG) + REASON(ENOMSG); +#endif +#if defined(ENONET) + REASON(ENONET); +#endif +#if defined(ENOPKG) + REASON(ENOPKG); +#endif +#if defined(ENOPROTOOPT) + REASON(ENOPROTOOPT); +#endif +#if defined(ENOSPC) + REASON(ENOSPC); +#endif +#if defined(ENOSR) + REASON(ENOSR); +#endif +#if defined(ENOSTR) + REASON(ENOSTR); +#endif +#if defined(ENOSYS) + REASON(ENOSYS); +#endif +#if defined(ENOTBLK) + REASON(ENOTBLK); +#endif +#if defined(ENOTCONN) + REASON(ENOTCONN); +#endif +#if defined(ENOTDIR) + REASON(ENOTDIR); +#endif +#if defined(ENOTEMPTY) + REASON(ENOTEMPTY); +#endif +#if defined(ENOTNAM) + REASON(ENOTNAM); +#endif +#if defined(ENOTRECOVERABLE) + REASON(ENOTRECOVERABLE); +#endif +#if defined(ENOTSOCK) + REASON(ENOTSOCK); +#endif +#if defined(ENOTTY) + REASON(ENOTTY); +#endif +#if defined(ENOTUNIQ) + REASON(ENOTUNIQ); +#endif +#if defined(ENXIO) + REASON(ENXIO); +#endif +#if defined(EOPNOTSUPP) + REASON(EOPNOTSUPP); +#endif +#if defined(EOVERFLOW) + REASON(EOVERFLOW); +#endif +#if defined(EOWNERDEAD) + REASON(EOWNERDEAD); +#endif +#if defined(EPERM) + REASON(EPERM); +#endif +#if defined(EPFNOSUPPORT) + REASON(EPFNOSUPPORT); +#endif +#if defined(EPIPE) + REASON(EPIPE); +#endif +#if defined(EPROTO) + REASON(EPROTO); +#endif +#if defined(EPROTONOSUPPORT) + REASON(EPROTONOSUPPORT); +#endif +#if defined(EPROTOTYPE) + REASON(EPROTOTYPE); +#endif +#if defined(ERANGE) + REASON(ERANGE); +#endif +#if defined(EREMCHG) + REASON(EREMCHG); +#endif +#if defined(EREMOTE) + REASON(EREMOTE); +#endif +#if defined(EREMOTEIO) + REASON(EREMOTEIO); +#endif +#if defined(ERESTART) + REASON(ERESTART); +#endif +#if defined(ERFKILL) + REASON(ERFKILL); +#endif +#if defined(EROFS) + REASON(EROFS); +#endif +#if defined(ESHUTDOWN) + REASON(ESHUTDOWN); +#endif +#if defined(ESOCKTNOSUPPORT) + REASON(ESOCKTNOSUPPORT); +#endif +#if defined(ESPIPE) + REASON(ESPIPE); +#endif +#if defined(ESRCH) + REASON(ESRCH); +#endif +#if defined(ESRMNT) + REASON(ESRMNT); +#endif +#if defined(ESTALE) + REASON(ESTALE); +#endif +#if defined(ESTRPIPE) + REASON(ESTRPIPE); +#endif +#if defined(ETIME) + REASON(ETIME); +#endif +#if defined(ETIMEDOUT) + REASON(ETIMEDOUT); +#endif +#if defined(ETOOMANYREFS) + REASON(ETOOMANYREFS); +#endif +#if defined(ETXTBSY) + REASON(ETXTBSY); +#endif +#if defined(EUCLEAN) + REASON(EUCLEAN); +#endif +#if defined(EUNATCH) + REASON(EUNATCH); +#endif +#if defined(EUSERS) + REASON(EUSERS); +#endif +#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || EWOULDBLOCK != EAGAIN) + REASON(EWOULDBLOCK); +#endif +#if defined(EXDEV) + REASON(EXDEV); +#endif +#if defined(EXFULL) + REASON(EXFULL); +#endif + default: + return TDisconnectReason(Sprintf("errno=%d", errno)); + } + } + +} // NActors diff --git a/library/cpp/actors/interconnect/types.h b/library/cpp/actors/interconnect/types.h new file mode 100644 index 0000000000..2662c50c22 --- /dev/null +++ b/library/cpp/actors/interconnect/types.h @@ -0,0 +1,43 @@ +#pragma once + +#include <util/generic/string.h> + +namespace NActors { + + class TDisconnectReason { + TString Text; + + private: + explicit TDisconnectReason(TString text) + : Text(std::move(text)) + {} + + public: + TDisconnectReason() = default; + TDisconnectReason(const TDisconnectReason&) = default; + TDisconnectReason(TDisconnectReason&&) = default; + + static TDisconnectReason FromErrno(int err); + + static TDisconnectReason EndOfStream() { return TDisconnectReason("EndOfStream"); } + static TDisconnectReason CloseOnIdle() { return TDisconnectReason("CloseOnIdle"); } + static TDisconnectReason LostConnection() { return TDisconnectReason("LostConnection"); } + static TDisconnectReason DeadPeer() { return TDisconnectReason("DeadPeer"); } + static TDisconnectReason NewSession() { return TDisconnectReason("NewSession"); } + static TDisconnectReason HandshakeFailTransient() { return TDisconnectReason("HandshakeFailTransient"); } + static TDisconnectReason HandshakeFailPermanent() { return TDisconnectReason("HandshakeFailPermanent"); } + static TDisconnectReason UserRequest() { return TDisconnectReason("UserRequest"); } + static TDisconnectReason Debug() { return TDisconnectReason("Debug"); } + static TDisconnectReason ChecksumError() { return TDisconnectReason("ChecksumError"); } + static TDisconnectReason FormatError() { return TDisconnectReason("FormatError"); } + static TDisconnectReason EventTooLarge() { return TDisconnectReason("EventTooLarge"); } + static TDisconnectReason QueueOverload() { return TDisconnectReason("QueueOverload"); } + + TString ToString() const { + return Text; + } + + static TVector<const char*> Reasons; + }; + +} // NActors diff --git a/library/cpp/actors/interconnect/ut/channel_scheduler_ut.cpp b/library/cpp/actors/interconnect/ut/channel_scheduler_ut.cpp new file mode 100644 index 0000000000..565a511859 --- /dev/null +++ b/library/cpp/actors/interconnect/ut/channel_scheduler_ut.cpp @@ -0,0 +1,115 @@ +#include <library/cpp/actors/interconnect/channel_scheduler.h> +#include <library/cpp/actors/interconnect/events_local.h> +#include <library/cpp/testing/unittest/registar.h> + +using namespace NActors; + +Y_UNIT_TEST_SUITE(ChannelScheduler) { + + Y_UNIT_TEST(PriorityTraffic) { + auto common = MakeIntrusive<TInterconnectProxyCommon>(); + common->MonCounters = MakeIntrusive<NMonitoring::TDynamicCounters>(); + std::shared_ptr<IInterconnectMetrics> ctr = CreateInterconnectCounters(common); + ctr->SetPeerInfo("peer", "1"); + auto callback = [](THolder<IEventBase>) {}; + TEventHolderPool pool(common, callback); + TSessionParams p; + TChannelScheduler scheduler(1, {}, ctr, pool, 64 << 20, p); + + ui32 numEvents = 0; + + auto pushEvent = [&](size_t size, int channel) { + TString payload(size, 'X'); + auto ev = MakeHolder<IEventHandle>(1, 0, TActorId(), TActorId(), MakeIntrusive<TEventSerializedData>(payload, false), 0); + auto& ch = scheduler.GetOutputChannel(channel); + const bool wasWorking = ch.IsWorking(); + ch.Push(*ev); + if (!wasWorking) { + scheduler.AddToHeap(ch, 0); + } + ++numEvents; + }; + + for (ui32 i = 0; i < 100; ++i) { + pushEvent(10000, 1); + } + + for (ui32 i = 0; i < 1000; ++i) { + pushEvent(1000, 2); + } + + std::map<ui16, ui32> run; + ui32 step = 0; + + std::deque<std::map<ui16, ui32>> window; + + for (; numEvents; ++step) { + TTcpPacketOutTask task(p); + + if (step == 100) { + for (ui32 i = 0; i < 200; ++i) { + pushEvent(1000, 3); + } + } + + std::map<ui16, ui32> ch; + + while (numEvents) { + TEventOutputChannel *channel = scheduler.PickChannelWithLeastConsumedWeight(); + ui32 before = task.GetDataSize(); + ui64 weightConsumed = 0; + numEvents -= channel->FeedBuf(task, 0, &weightConsumed); + ui32 after = task.GetDataSize(); + Y_VERIFY(after >= before); + scheduler.FinishPick(weightConsumed, 0); + const ui32 bytesAdded = after - before; + if (!bytesAdded) { + break; + } + ch[channel->ChannelId] += bytesAdded; + } + + scheduler.Equalize(); + + for (const auto& [key, value] : ch) { + run[key] += value; + } + window.push_back(ch); + + if (window.size() == 32) { + for (const auto& [key, value] : window.front()) { + run[key] -= value; + if (!run[key]) { + run.erase(key); + } + } + window.pop_front(); + } + + double mean = 0.0; + for (const auto& [key, value] : run) { + mean += value; + } + mean /= run.size(); + + double dev = 0.0; + for (const auto& [key, value] : run) { + dev += (value - mean) * (value - mean); + } + dev = sqrt(dev / run.size()); + + double devToMean = dev / mean; + + Cerr << step << ": "; + for (const auto& [key, value] : run) { + Cerr << "ch" << key << "=" << value << " "; + } + Cerr << "mean# " << mean << " dev# " << dev << " part# " << devToMean; + + Cerr << Endl; + + UNIT_ASSERT(devToMean < 1); + } + } + +} diff --git a/library/cpp/actors/interconnect/ut/dynamic_proxy_ut.cpp b/library/cpp/actors/interconnect/ut/dynamic_proxy_ut.cpp new file mode 100644 index 0000000000..3c474979dc --- /dev/null +++ b/library/cpp/actors/interconnect/ut/dynamic_proxy_ut.cpp @@ -0,0 +1,179 @@ +#include <library/cpp/actors/interconnect/ut/lib/node.h> +#include <library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h> +#include <library/cpp/testing/unittest/registar.h> + +TActorId MakeResponderServiceId(ui32 nodeId) { + return TActorId(nodeId, TStringBuf("ResponderAct", 12)); +} + +class TArriveQueue { + struct TArrivedItem { + ui32 QueueId; + ui32 Index; + bool Success; + }; + + TMutex Lock; + std::size_t Counter = 0; + std::vector<TArrivedItem> Items; + +public: + TArriveQueue(size_t capacity) + : Items(capacity) + {} + + bool Done() const { + with_lock (Lock) { + return Counter == Items.size(); + } + } + + void Push(ui64 cookie, bool success) { + with_lock (Lock) { + const size_t pos = Counter++; + TArrivedItem item{.QueueId = static_cast<ui32>(cookie >> 32), .Index = static_cast<ui32>(cookie & 0xffff'ffff), + .Success = success}; + memcpy(&Items[pos], &item, sizeof(TArrivedItem)); + } + } + + void Check() { + struct TPerQueueState { + std::vector<ui32> Ok, Error; + }; + std::unordered_map<ui32, TPerQueueState> state; + for (const TArrivedItem& item : Items) { + auto& st = state[item.QueueId]; + auto& v = item.Success ? st.Ok : st.Error; + v.push_back(item.Index); + } + for (const auto& [queueId, st] : state) { + ui32 expected = 0; + for (const ui32 index : st.Ok) { + Y_VERIFY(index == expected); + ++expected; + } + for (const ui32 index : st.Error) { + Y_VERIFY(index == expected); + ++expected; + } + if (st.Error.size()) { + Cerr << "Error.size# " << st.Error.size() << Endl; + } + } + } +}; + +class TResponder : public TActor<TResponder> { + TArriveQueue& ArriveQueue; + +public: + TResponder(TArriveQueue& arriveQueue) + : TActor(&TResponder::StateFunc) + , ArriveQueue(arriveQueue) + {} + + STRICT_STFUNC(StateFunc, + hFunc(TEvents::TEvPing, Handle); + ) + + void Handle(TEvents::TEvPing::TPtr ev) { + ArriveQueue.Push(ev->Cookie, true); + } +}; + +class TSender : public TActor<TSender> { + TArriveQueue& ArriveQueue; + +public: + TSender(TArriveQueue& arriveQueue) + : TActor(&TThis::StateFunc) + , ArriveQueue(arriveQueue) + {} + + STRICT_STFUNC(StateFunc, + hFunc(TEvents::TEvUndelivered, Handle); + ) + + void Handle(TEvents::TEvUndelivered::TPtr ev) { + ArriveQueue.Push(ev->Cookie, false); + } +}; + +void SenderThread(TMutex& lock, TActorSystem *as, ui32 nodeId, ui32 queueId, ui32 count, TArriveQueue& arriveQueue) { + const TActorId sender = as->Register(new TSender(arriveQueue)); + with_lock(lock) {} + const TActorId target = MakeResponderServiceId(nodeId); + for (ui32 i = 0; i < count; ++i) { + const ui32 flags = IEventHandle::FlagTrackDelivery; + as->Send(new IEventHandle(TEvents::THelloWorld::Ping, flags, target, sender, nullptr, ((ui64)queueId << 32) | i)); + } +} + +void RaceTestIter(ui32 numThreads, ui32 count) { + TPortManager portman; + THashMap<ui32, ui16> nodeToPort; + const ui32 numNodes = 6; // total + const ui32 numDynamicNodes = 3; + for (ui32 i = 1; i <= numNodes; ++i) { + nodeToPort.emplace(i, portman.GetPort()); + } + + NMonitoring::TDynamicCounterPtr counters = new NMonitoring::TDynamicCounters; + std::list<TNode> nodes; + for (ui32 i = 1; i <= numNodes; ++i) { + nodes.emplace_back(i, numNodes, nodeToPort, "127.1.0.0", counters->GetSubgroup("nodeId", TStringBuilder() << i), + TDuration::Seconds(10), TChannelsConfig(), numDynamicNodes, numThreads); + } + + const ui32 numSenders = 10; + TArriveQueue arriveQueue(numSenders * numNodes * (numNodes - 1) * count); + for (TNode& node : nodes) { + node.RegisterServiceActor(MakeResponderServiceId(node.GetActorSystem()->NodeId), new TResponder(arriveQueue)); + } + + TMutex lock; + std::list<TThread> threads; + ui32 queueId = 0; + with_lock(lock) { + for (TNode& from : nodes) { + for (ui32 toId = 1; toId <= numNodes; ++toId) { + if (toId == from.GetActorSystem()->NodeId) { + continue; + } + for (ui32 i = 0; i < numSenders; ++i) { + threads.emplace_back([=, &lock, &from, &arriveQueue] { + SenderThread(lock, from.GetActorSystem(), toId, queueId, count, arriveQueue); + }); + ++queueId; + } + } + } + for (auto& thread : threads) { + thread.Start(); + } + } + for (auto& thread : threads) { + thread.Join(); + } + + for (THPTimer timer; !arriveQueue.Done(); TDuration::MilliSeconds(10)) { + Y_VERIFY(timer.Passed() < 10); + } + + nodes.clear(); + arriveQueue.Check(); +} + +Y_UNIT_TEST_SUITE(DynamicProxy) { + Y_UNIT_TEST(RaceCheck1) { + for (ui32 iteration = 0; iteration < 100; ++iteration) { + RaceTestIter(1 + iteration % 5, 1); + } + } + Y_UNIT_TEST(RaceCheck10) { + for (ui32 iteration = 0; iteration < 100; ++iteration) { + RaceTestIter(1 + iteration % 5, 10); + } + } +} diff --git a/library/cpp/actors/interconnect/ut/event_holder_pool_ut.cpp b/library/cpp/actors/interconnect/ut/event_holder_pool_ut.cpp new file mode 100644 index 0000000000..e6b2bd4e4c --- /dev/null +++ b/library/cpp/actors/interconnect/ut/event_holder_pool_ut.cpp @@ -0,0 +1,59 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/event_local.h> +#include <library/cpp/actors/interconnect/interconnect_common.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/actors/interconnect/event_holder_pool.h> + +#include <atomic> + +using namespace NActors; + +template<typename T> +TEventHolderPool Setup(T&& callback) { + auto common = MakeIntrusive<TInterconnectProxyCommon>(); + common->DestructorQueueSize = std::make_shared<std::atomic<TAtomicBase>>(); + common->MaxDestructorQueueSize = 1024 * 1024; + return TEventHolderPool(common, callback); +} + +Y_UNIT_TEST_SUITE(EventHolderPool) { + + Y_UNIT_TEST(Overflow) { + TDeque<THolder<IEventBase>> freeQ; + auto callback = [&](THolder<IEventBase> event) { + freeQ.push_back(std::move(event)); + }; + auto pool = Setup(std::move(callback)); + + std::list<TEventHolder> q; + + auto& ev1 = pool.Allocate(q); + ev1.Buffer = MakeIntrusive<TEventSerializedData>(TString::Uninitialized(512 * 1024), true); + + auto& ev2 = pool.Allocate(q); + ev2.Buffer = MakeIntrusive<TEventSerializedData>(TString::Uninitialized(512 * 1024), true); + + auto& ev3 = pool.Allocate(q); + ev3.Buffer = MakeIntrusive<TEventSerializedData>(TString::Uninitialized(512 * 1024), true); + + auto& ev4 = pool.Allocate(q); + ev4.Buffer = MakeIntrusive<TEventSerializedData>(TString::Uninitialized(512 * 1024), true); + + pool.Release(q, q.begin()); + pool.Release(q, q.begin()); + pool.Trim(); + UNIT_ASSERT_VALUES_EQUAL(freeQ.size(), 1); + + pool.Release(q, q.begin()); + UNIT_ASSERT_VALUES_EQUAL(freeQ.size(), 1); + + freeQ.clear(); + pool.Release(q, q.begin()); + pool.Trim(); + UNIT_ASSERT_VALUES_EQUAL(freeQ.size(), 1); + + freeQ.clear(); // if we don't this, we may probablty crash due to the order of object destruction + } + +} diff --git a/library/cpp/actors/interconnect/ut/interconnect_ut.cpp b/library/cpp/actors/interconnect/ut/interconnect_ut.cpp new file mode 100644 index 0000000000..8ef0b1507c --- /dev/null +++ b/library/cpp/actors/interconnect/ut/interconnect_ut.cpp @@ -0,0 +1,177 @@ +#include <library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h> +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/digest/md5/md5.h> +#include <util/random/fast.h> + +using namespace NActors; + +class TSenderActor : public TActorBootstrapped<TSenderActor> { + const TActorId Recipient; + using TSessionToCookie = std::unordered_multimap<TActorId, ui64, THash<TActorId>>; + TSessionToCookie SessionToCookie; + std::unordered_map<ui64, std::pair<TSessionToCookie::iterator, TString>> InFlight; + std::unordered_map<ui64, TString> Tentative; + ui64 NextCookie = 0; + TActorId SessionId; + bool SubscribeInFlight = false; + +public: + TSenderActor(TActorId recipient) + : Recipient(recipient) + {} + + void Bootstrap() { + Become(&TThis::StateFunc); + Subscribe(); + } + + void Subscribe() { + Cerr << (TStringBuilder() << "Subscribe" << Endl); + Y_VERIFY(!SubscribeInFlight); + SubscribeInFlight = true; + Send(TActivationContext::InterconnectProxy(Recipient.NodeId()), new TEvents::TEvSubscribe); + } + + void IssueQueries() { + if (!SessionId) { + return; + } + while (InFlight.size() < 10) { + size_t len = RandomNumber<size_t>(65536) + 1; + TString data = TString::Uninitialized(len); + TReallyFastRng32 rng(RandomNumber<ui32>()); + char *p = data.Detach(); + for (size_t i = 0; i < len; ++i) { + p[i] = rng(); + } + const TSessionToCookie::iterator s2cIt = SessionToCookie.emplace(SessionId, NextCookie); + InFlight.emplace(NextCookie, std::make_tuple(s2cIt, MD5::CalcRaw(data))); + TActivationContext::Send(new IEventHandle(TEvents::THelloWorld::Ping, IEventHandle::FlagTrackDelivery, Recipient, + SelfId(), MakeIntrusive<TEventSerializedData>(std::move(data), false), NextCookie)); +// Cerr << (TStringBuilder() << "Send# " << NextCookie << Endl); + ++NextCookie; + } + } + + void HandlePong(TAutoPtr<IEventHandle> ev) { +// Cerr << (TStringBuilder() << "Receive# " << ev->Cookie << Endl); + if (const auto it = InFlight.find(ev->Cookie); it != InFlight.end()) { + auto& [s2cIt, hash] = it->second; + Y_VERIFY(hash == ev->GetChainBuffer()->GetString()); + SessionToCookie.erase(s2cIt); + InFlight.erase(it); + } else if (const auto it = Tentative.find(ev->Cookie); it != Tentative.end()) { + Y_VERIFY(it->second == ev->GetChainBuffer()->GetString()); + Tentative.erase(it); + } else { + Y_FAIL("Cookie# %" PRIu64, ev->Cookie); + } + IssueQueries(); + } + + void Handle(TEvInterconnect::TEvNodeConnected::TPtr ev) { + Cerr << (TStringBuilder() << "TEvNodeConnected" << Endl); + Y_VERIFY(SubscribeInFlight); + SubscribeInFlight = false; + Y_VERIFY(!SessionId); + SessionId = ev->Sender; + IssueQueries(); + } + + void Handle(TEvInterconnect::TEvNodeDisconnected::TPtr ev) { + Cerr << (TStringBuilder() << "TEvNodeDisconnected" << Endl); + SubscribeInFlight = false; + if (SessionId) { + Y_VERIFY(SessionId == ev->Sender); + auto r = SessionToCookie.equal_range(SessionId); + for (auto it = r.first; it != r.second; ++it) { + const auto inFlightIt = InFlight.find(it->second); + Y_VERIFY(inFlightIt != InFlight.end()); + Tentative.emplace(inFlightIt->first, inFlightIt->second.second); + InFlight.erase(it->second); + } + SessionToCookie.erase(r.first, r.second); + SessionId = TActorId(); + } + Schedule(TDuration::MilliSeconds(100), new TEvents::TEvWakeup); + } + + void Handle(TEvents::TEvUndelivered::TPtr ev) { + Cerr << (TStringBuilder() << "TEvUndelivered Cookie# " << ev->Cookie << Endl); + if (const auto it = InFlight.find(ev->Cookie); it != InFlight.end()) { + auto& [s2cIt, hash] = it->second; + Tentative.emplace(it->first, hash); + SessionToCookie.erase(s2cIt); + InFlight.erase(it); + IssueQueries(); + } + } + + STRICT_STFUNC(StateFunc, + fFunc(TEvents::THelloWorld::Pong, HandlePong); + hFunc(TEvInterconnect::TEvNodeConnected, Handle); + hFunc(TEvInterconnect::TEvNodeDisconnected, Handle); + hFunc(TEvents::TEvUndelivered, Handle); + cFunc(TEvents::TSystem::Wakeup, Subscribe); + ) +}; + +class TRecipientActor : public TActor<TRecipientActor> { +public: + TRecipientActor() + : TActor(&TThis::StateFunc) + {} + + void HandlePing(TAutoPtr<IEventHandle>& ev) { + const TString& data = ev->GetChainBuffer()->GetString(); + const TString& response = MD5::CalcRaw(data); + TActivationContext::Send(new IEventHandle(TEvents::THelloWorld::Pong, 0, ev->Sender, SelfId(), + MakeIntrusive<TEventSerializedData>(response, false), ev->Cookie)); + } + + STRICT_STFUNC(StateFunc, + fFunc(TEvents::THelloWorld::Ping, HandlePing); + ) +}; + +Y_UNIT_TEST_SUITE(Interconnect) { + + Y_UNIT_TEST(SessionContinuation) { + TTestICCluster cluster(2); + const TActorId recipient = cluster.RegisterActor(new TRecipientActor, 1); + cluster.RegisterActor(new TSenderActor(recipient), 2); + for (ui32 i = 0; i < 100; ++i) { + const ui32 nodeId = 1 + RandomNumber(2u); + const ui32 peerNodeId = 3 - nodeId; + const ui32 action = RandomNumber(3u); + auto *node = cluster.GetNode(nodeId); + TActorId proxyId = node->InterconnectProxy(peerNodeId); + + switch (action) { + case 0: + node->Send(proxyId, new TEvInterconnect::TEvClosePeerSocket); + Cerr << (TStringBuilder() << "nodeId# " << nodeId << " peerNodeId# " << peerNodeId + << " TEvClosePeerSocket" << Endl); + break; + + case 1: + node->Send(proxyId, new TEvInterconnect::TEvCloseInputSession); + Cerr << (TStringBuilder() << "nodeId# " << nodeId << " peerNodeId# " << peerNodeId + << " TEvCloseInputSession" << Endl); + break; + + case 2: + node->Send(proxyId, new TEvInterconnect::TEvPoisonSession); + Cerr << (TStringBuilder() << "nodeId# " << nodeId << " peerNodeId# " << peerNodeId + << " TEvPoisonSession" << Endl); + break; + + default: + Y_FAIL(); + } + + Sleep(TDuration::MilliSeconds(RandomNumber<ui32>(500) + 100)); + } + } + +} diff --git a/library/cpp/actors/interconnect/ut/large.cpp b/library/cpp/actors/interconnect/ut/large.cpp new file mode 100644 index 0000000000..ba2a50c6f6 --- /dev/null +++ b/library/cpp/actors/interconnect/ut/large.cpp @@ -0,0 +1,85 @@ +#include "lib/ic_test_cluster.h" +#include "lib/test_events.h" +#include "lib/test_actors.h" + +#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h> + +#include <library/cpp/testing/unittest/tests_data.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/system/event.h> +#include <util/system/sanitizers.h> + +Y_UNIT_TEST_SUITE(LargeMessage) { + using namespace NActors; + + class TProducer: public TActorBootstrapped<TProducer> { + const TActorId RecipientActorId; + + public: + TProducer(const TActorId& recipientActorId) + : RecipientActorId(recipientActorId) + {} + + void Bootstrap(const TActorContext& ctx) { + Become(&TThis::StateFunc); + ctx.Send(RecipientActorId, new TEvTest(1, "hello"), IEventHandle::FlagTrackDelivery, 1); + ctx.Send(RecipientActorId, new TEvTest(2, TString(128 * 1024 * 1024, 'X')), IEventHandle::FlagTrackDelivery, 2); + } + + void Handle(TEvents::TEvUndelivered::TPtr ev, const TActorContext& ctx) { + if (ev->Cookie == 2) { + Cerr << "TEvUndelivered\n"; + ctx.Send(RecipientActorId, new TEvTest(3, "hello"), IEventHandle::FlagTrackDelivery, 3); + } + } + + STRICT_STFUNC(StateFunc, + HFunc(TEvents::TEvUndelivered, Handle) + ) + }; + + class TConsumer : public TActorBootstrapped<TConsumer> { + TManualEvent& Done; + TActorId SessionId; + + public: + TConsumer(TManualEvent& done) + : Done(done) + { + } + + void Bootstrap(const TActorContext& /*ctx*/) { + Become(&TThis::StateFunc); + } + + void Handle(TEvTest::TPtr ev, const TActorContext& /*ctx*/) { + const auto& record = ev->Get()->Record; + Cerr << "RECEIVED TEvTest\n"; + if (record.GetSequenceNumber() == 1) { + Y_VERIFY(!SessionId); + SessionId = ev->InterconnectSession; + } else if (record.GetSequenceNumber() == 3) { + Y_VERIFY(SessionId != ev->InterconnectSession); + Done.Signal(); + } else { + Y_FAIL("incorrect sequence number"); + } + } + + STRICT_STFUNC(StateFunc, + HFunc(TEvTest, Handle) + ) + }; + + Y_UNIT_TEST(Test) { + TTestICCluster testCluster(2); + + TManualEvent done; + TConsumer* consumer = new TConsumer(done); + const TActorId recp = testCluster.RegisterActor(consumer, 1); + testCluster.RegisterActor(new TProducer(recp), 2); + done.WaitI(); + } + +} diff --git a/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h b/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h new file mode 100644 index 0000000000..2b6d27cd3f --- /dev/null +++ b/library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h @@ -0,0 +1,84 @@ +#pragma once + +#include "node.h" +#include "interrupter.h" + +#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/testing/unittest/tests_data.h> + +#include <util/generic/noncopyable.h> + +class TTestICCluster: public TNonCopyable { +public: + struct TTrafficInterrupterSettings { + TDuration RejectingTrafficTimeout; + double BandWidth; + bool Disconnect; + }; + +private: + const ui32 NumNodes; + const TString Address = "::1"; + TDuration DeadPeerTimeout = TDuration::Seconds(2); + NMonitoring::TDynamicCounterPtr Counters; + THashMap<ui32, THolder<TNode>> Nodes; + TList<TTrafficInterrupter> interrupters; + NActors::TChannelsConfig ChannelsConfig; + TPortManager PortManager; + +public: + TTestICCluster(ui32 numNodes = 1, NActors::TChannelsConfig channelsConfig = NActors::TChannelsConfig(), + TTrafficInterrupterSettings* tiSettings = nullptr) + : NumNodes(numNodes) + , Counters(new NMonitoring::TDynamicCounters) + , ChannelsConfig(channelsConfig) + { + THashMap<ui32, ui16> nodeToPortMap; + THashMap<ui32, THashMap<ui32, ui16>> specificNodePortMap; + + for (ui32 i = 1; i <= NumNodes; ++i) { + nodeToPortMap.emplace(i, PortManager.GetPort()); + } + + if (tiSettings) { + ui32 nodeId; + ui16 listenPort; + ui16 forwardPort; + for (auto& item : nodeToPortMap) { + nodeId = item.first; + listenPort = item.second; + forwardPort = PortManager.GetPort(); + + specificNodePortMap[nodeId] = nodeToPortMap; + specificNodePortMap[nodeId].at(nodeId) = forwardPort; + interrupters.emplace_back(Address, listenPort, forwardPort, tiSettings->RejectingTrafficTimeout, tiSettings->BandWidth, tiSettings->Disconnect); + interrupters.back().Start(); + } + } + + for (ui32 i = 1; i <= NumNodes; ++i) { + auto& portMap = tiSettings ? specificNodePortMap[i] : nodeToPortMap; + Nodes.emplace(i, MakeHolder<TNode>(i, NumNodes, portMap, Address, Counters, DeadPeerTimeout, ChannelsConfig)); + } + } + + TNode* GetNode(ui32 id) { + return Nodes[id].Get(); + } + + ~TTestICCluster() { + } + + TActorId RegisterActor(NActors::IActor* actor, ui32 nodeId) { + return Nodes[nodeId]->RegisterActor(actor); + } + + TActorId InterconnectProxy(ui32 peerNodeId, ui32 nodeId) { + return Nodes[nodeId]->InterconnectProxy(peerNodeId); + } + + void KillActor(ui32 nodeId, const TActorId& id) { + Nodes[nodeId]->Send(id, new NActors::TEvents::TEvPoisonPill); + } +}; diff --git a/library/cpp/actors/interconnect/ut/lib/interrupter.h b/library/cpp/actors/interconnect/ut/lib/interrupter.h new file mode 100644 index 0000000000..48851de2c5 --- /dev/null +++ b/library/cpp/actors/interconnect/ut/lib/interrupter.h @@ -0,0 +1,249 @@ +#pragma once + +#include <library/cpp/testing/unittest/tests_data.h> + +#include <util/network/sock.h> +#include <util/network/poller.h> +#include <util/system/thread.h> +#include <util/system/hp_timer.h> +#include <util/generic/list.h> +#include <util/generic/set.h> +#include <util/generic/vector.h> +#include <util/generic/deque.h> +#include <util/random/random.h> + +#include <iterator> + +class TTrafficInterrupter + : public ISimpleThread { + const TString Address; + const ui16 ForwardPort; + TInet6StreamSocket ListenSocket; + + struct TConnectionDescriptor; + struct TDelayedPacket { + TInet6StreamSocket* ForwardSocket = nullptr; + TVector<char> Data; + }; + struct TCompare { + bool operator()(const std::pair<TInstant, TDelayedPacket>& x, const std::pair<TInstant, TDelayedPacket>& y) const { + return x.first > y.first; + }; + }; + + struct TDirectedConnection { + TInet6StreamSocket* Source = nullptr; + TInet6StreamSocket* Destination = nullptr; + TList<TConnectionDescriptor>::iterator ListIterator; + TInstant Timestamp; + TPriorityQueue<std::pair<TInstant, TDelayedPacket>, TVector<std::pair<TInstant, TDelayedPacket>>, TCompare> DelayedQueue; + + TDirectedConnection(TInet6StreamSocket* source, TInet6StreamSocket* destination) + : Source(source) + , Destination(destination) + { + } + }; + + struct TConnectionDescriptor { + std::unique_ptr<TInet6StreamSocket> FirstSocket; + std::unique_ptr<TInet6StreamSocket> SecondSocket; + TDirectedConnection ForwardConnection; + TDirectedConnection BackwardConnection; + + TConnectionDescriptor(std::unique_ptr<TInet6StreamSocket> firstSock, + std::unique_ptr<TInet6StreamSocket> secondSock) + : FirstSocket(std::move(firstSock)) + , SecondSocket(std::move(secondSock)) + , ForwardConnection(FirstSocket.get(), SecondSocket.get()) + , BackwardConnection(SecondSocket.get(), FirstSocket.get()) + { + } + }; + + template <class It = TList<TConnectionDescriptor>::iterator> + class TCustomListIteratorCompare { + public: + bool operator()(const It& it1, const It& it2) const { + return (&(*it1) < &(*it2)); + } + }; + + TList<TConnectionDescriptor> Connections; + TSet<TList<TConnectionDescriptor>::iterator, TCustomListIteratorCompare<>> DroppedConnections; + +public: + TTrafficInterrupter(TString address, ui16 listenPort, ui16 forwardPort, TDuration rejectingTrafficTimeout, double bandwidth, bool disconnect = true) + : Address(std::move(address)) + , ForwardPort(forwardPort) + , ListenSocket() + , RejectingTrafficTimeout(rejectingTrafficTimeout) + , CurrentRejectingTimeout(rejectingTrafficTimeout) + , RejectingStateTimer() + , Bandwidth(bandwidth) + , Disconnect(disconnect) + , RejectingTraffic(false) + { + SetReuseAddressAndPort(ListenSocket); + TSockAddrInet6 addr(Address.data(), listenPort); + Y_VERIFY(ListenSocket.Bind(&addr) == 0); + Y_VERIFY(ListenSocket.Listen(5) == 0); + + DelayTraffic = (Bandwidth == 0.0) ? false : true; + + ForwardAddrress.Reset(new TSockAddrInet6(Address.data(), ForwardPort)); + const ui32 BufSize = DelayTraffic ? 4096 : 65536 + 4096; + Buf.resize(BufSize); + } + + ~TTrafficInterrupter() { + AtomicSet(Running, 0); + this->Join(); + } + +private: + TAtomic Running = 1; + TVector<char> Buf; + TSocketPoller SocketPoller; + THolder<TSockAddrInet6> ForwardAddrress; + TVector<void*> Events; + TDuration RejectingTrafficTimeout; + TDuration CurrentRejectingTimeout; + TDuration DefaultPollTimeout = TDuration::MilliSeconds(100); + TDuration DisconnectTimeout = TDuration::MilliSeconds(100); + THPTimer RejectingStateTimer; + THPTimer DisconnectTimer; + double Bandwidth; + const bool Disconnect; + bool RejectingTraffic; + bool DelayTraffic; + + void UpdateRejectingState() { + if (TDuration::Seconds(std::abs(RejectingStateTimer.Passed())) > CurrentRejectingTimeout) { + RejectingStateTimer.Reset(); + CurrentRejectingTimeout = (RandomNumber<ui32>(1) ? RejectingTrafficTimeout + TDuration::Seconds(1.0) : RejectingTrafficTimeout - TDuration::Seconds(0.2)); + RejectingTraffic = !RejectingTraffic; + } + } + + void RandomlyDisconnect() { + if (TDuration::Seconds(std::abs(DisconnectTimer.Passed())) > DisconnectTimeout) { + DisconnectTimer.Reset(); + if (RandomNumber<ui32>(100) > 90) { + if (!Connections.empty()) { + auto it = Connections.begin(); + std::advance(it, RandomNumber<ui32>(Connections.size())); + SocketPoller.Unwait(static_cast<SOCKET>(*it->FirstSocket.get())); + SocketPoller.Unwait(static_cast<SOCKET>(*it->SecondSocket.get())); + Connections.erase(it); + } + } + } + } + + void* ThreadProc() override { + int pollReadyCount = 0; + SocketPoller.WaitRead(static_cast<SOCKET>(ListenSocket), &ListenSocket); + Events.resize(10); + + while (AtomicGet(Running)) { + if (RejectingTrafficTimeout != TDuration::Zero()) { + UpdateRejectingState(); + } + if (Disconnect) { + RandomlyDisconnect(); + } + if (!RejectingTraffic) { + TDuration timeout = DefaultPollTimeout; + auto updateTimout = [&timeout](TDirectedConnection& conn) { + if (conn.DelayedQueue) { + timeout = Min(timeout, conn.DelayedQueue.top().first - TInstant::Now()); + } + }; + for (auto& it : Connections) { + updateTimout(it.ForwardConnection); + updateTimout(it.BackwardConnection); + } + pollReadyCount = SocketPoller.WaitT(Events.data(), Events.size(), timeout); + if (pollReadyCount > 0) { + for (int i = 0; i < pollReadyCount; i++) { + HandleSocketPollEvent(Events[i]); + } + for (auto it : DroppedConnections) { + Connections.erase(it); + } + DroppedConnections.clear(); + } + } + if (DelayTraffic) { // process packets from DelayQueues + auto processDelayedPackages = [](TDirectedConnection& conn) { + while (!conn.DelayedQueue.empty()) { + auto& frontPackage = conn.DelayedQueue.top(); + if (TInstant::Now() >= frontPackage.first) { + TInet6StreamSocket* sock = frontPackage.second.ForwardSocket; + if (sock) { + sock->Send(frontPackage.second.Data.data(), frontPackage.second.Data.size()); + } + conn.DelayedQueue.pop(); + } else { + break; + } + } + }; + for (auto& it : Connections) { + processDelayedPackages(it.ForwardConnection); + processDelayedPackages(it.BackwardConnection); + } + } + } + ListenSocket.Close(); + return nullptr; + } + + void HandleSocketPollEvent(void* ev) { + if (ev == static_cast<void*>(&ListenSocket)) { + TSockAddrInet6 origin; + Connections.emplace_back(TConnectionDescriptor(std::unique_ptr<TInet6StreamSocket>(new TInet6StreamSocket), std::unique_ptr<TInet6StreamSocket>(new TInet6StreamSocket))); + int err = ListenSocket.Accept(Connections.back().FirstSocket.get(), &origin); + if (!err) { + err = Connections.back().SecondSocket->Connect(ForwardAddrress.Get()); + if (!err) { + Connections.back().ForwardConnection.ListIterator = --Connections.end(); + Connections.back().BackwardConnection.ListIterator = --Connections.end(); + SocketPoller.WaitRead(static_cast<SOCKET>(*Connections.back().FirstSocket), &Connections.back().ForwardConnection); + SocketPoller.WaitRead(static_cast<SOCKET>(*Connections.back().SecondSocket), &Connections.back().BackwardConnection); + } else { + Connections.back().FirstSocket->Close(); + } + } else { + Connections.pop_back(); + } + } else { + TDirectedConnection* directedConnection = static_cast<TDirectedConnection*>(ev); + int recvSize = 0; + do { + recvSize = directedConnection->Source->Recv(Buf.data(), Buf.size()); + } while (recvSize == -EINTR); + + if (recvSize > 0) { + if (DelayTraffic) { + // put packet into DelayQueue + const TDuration baseDelay = TDuration::MicroSeconds(recvSize * 1e6 / Bandwidth); + const TInstant now = TInstant::Now(); + directedConnection->Timestamp = Max(now, directedConnection->Timestamp) + baseDelay; + TDelayedPacket pkt; + pkt.ForwardSocket = directedConnection->Destination; + pkt.Data.resize(recvSize); + memcpy(pkt.Data.data(), Buf.data(), recvSize); + directedConnection->DelayedQueue.emplace(directedConnection->Timestamp, std::move(pkt)); + } else { + directedConnection->Destination->Send(Buf.data(), recvSize); + } + } else { + SocketPoller.Unwait(static_cast<SOCKET>(*directedConnection->Source)); + SocketPoller.Unwait(static_cast<SOCKET>(*directedConnection->Destination)); + DroppedConnections.emplace(directedConnection->ListIterator); + } + } + } +}; diff --git a/library/cpp/actors/interconnect/ut/lib/node.h b/library/cpp/actors/interconnect/ut/lib/node.h new file mode 100644 index 0000000000..ff30b1445e --- /dev/null +++ b/library/cpp/actors/interconnect/ut/lib/node.h @@ -0,0 +1,137 @@ +#pragma once + +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/core/executor_pool_basic.h> +#include <library/cpp/actors/core/scheduler_basic.h> +#include <library/cpp/actors/core/mailbox.h> +#include <library/cpp/actors/dnsresolver/dnsresolver.h> + +#include <library/cpp/actors/interconnect/interconnect_tcp_server.h> +#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h> +#include <library/cpp/actors/interconnect/interconnect_proxy_wrapper.h> + +using namespace NActors; + +class TNode { + THolder<TActorSystem> ActorSystem; + +public: + TNode(ui32 nodeId, ui32 numNodes, const THashMap<ui32, ui16>& nodeToPort, const TString& address, + NMonitoring::TDynamicCounterPtr counters, TDuration deadPeerTimeout, + TChannelsConfig channelsSettings = TChannelsConfig(), + ui32 numDynamicNodes = 0, ui32 numThreads = 1) { + TActorSystemSetup setup; + setup.NodeId = nodeId; + setup.ExecutorsCount = 1; + setup.Executors.Reset(new TAutoPtr<IExecutorPool>[setup.ExecutorsCount]); + for (ui32 i = 0; i < setup.ExecutorsCount; ++i) { + setup.Executors[i].Reset(new TBasicExecutorPool(i, numThreads, 20 /* magic number */)); + } + setup.Scheduler.Reset(new TBasicSchedulerThread()); + const ui32 interconnectPoolId = 0; + + auto common = MakeIntrusive<TInterconnectProxyCommon>(); + common->NameserviceId = GetNameserviceActorId(); + common->MonCounters = counters->GetSubgroup("nodeId", ToString(nodeId)); + common->ChannelsConfig = channelsSettings; + common->ClusterUUID = "cluster"; + common->AcceptUUID = {common->ClusterUUID}; + common->TechnicalSelfHostName = address; + common->Settings.Handshake = TDuration::Seconds(1); + common->Settings.DeadPeer = deadPeerTimeout; + common->Settings.CloseOnIdle = TDuration::Minutes(1); + common->Settings.SendBufferDieLimitInMB = 512; + common->Settings.TotalInflightAmountOfData = 512 * 1024; + common->Settings.TCPSocketBufferSize = 2048 * 1024; + + setup.Interconnect.ProxyActors.resize(numNodes + 1 - numDynamicNodes); + setup.Interconnect.ProxyWrapperFactory = CreateProxyWrapperFactory(common, interconnectPoolId); + + for (ui32 i = 1; i <= numNodes; ++i) { + if (i == nodeId) { + // create listener actor for local node "nodeId" + setup.LocalServices.emplace_back(TActorId(), TActorSetupCmd(new TInterconnectListenerTCP(address, + nodeToPort.at(nodeId), common), TMailboxType::ReadAsFilled, interconnectPoolId)); + } else if (i <= numNodes - numDynamicNodes) { + // create proxy actor to reach node "i" + setup.Interconnect.ProxyActors[i] = {new TInterconnectProxyTCP(i, common), + TMailboxType::ReadAsFilled, interconnectPoolId}; + } + } + + setup.LocalServices.emplace_back(MakePollerActorId(), TActorSetupCmd(CreatePollerActor(), + TMailboxType::ReadAsFilled, 0)); + + const TActorId loggerActorId(0, "logger"); + constexpr ui32 LoggerComponentId = 410; // NKikimrServices::LOGGER + + auto loggerSettings = MakeIntrusive<NLog::TSettings>( + loggerActorId, + (NLog::EComponent)LoggerComponentId, + NLog::PRI_INFO, + NLog::PRI_DEBUG, + 0U); + + loggerSettings->Append( + NActorsServices::EServiceCommon_MIN, + NActorsServices::EServiceCommon_MAX, + NActorsServices::EServiceCommon_Name + ); + + constexpr ui32 WilsonComponentId = 430; // NKikimrServices::WILSON + static const TString WilsonComponentName = "WILSON"; + + loggerSettings->Append( + (NLog::EComponent)WilsonComponentId, + (NLog::EComponent)WilsonComponentId + 1, + [](NLog::EComponent) -> const TString & { return WilsonComponentName; }); + + // register nameserver table + auto names = MakeIntrusive<TTableNameserverSetup>(); + for (ui32 i = 1; i <= numNodes; ++i) { + names->StaticNodeTable[i] = TTableNameserverSetup::TNodeInfo(address, address, nodeToPort.at(i)); + } + setup.LocalServices.emplace_back( + NDnsResolver::MakeDnsResolverActorId(), + TActorSetupCmd( + NDnsResolver::CreateOnDemandDnsResolver(), + TMailboxType::ReadAsFilled, interconnectPoolId)); + setup.LocalServices.emplace_back(GetNameserviceActorId(), TActorSetupCmd( + CreateNameserverTable(names, interconnectPoolId), TMailboxType::ReadAsFilled, + interconnectPoolId)); + + // register logger + setup.LocalServices.emplace_back(loggerActorId, TActorSetupCmd(new TLoggerActor(loggerSettings, + CreateStderrBackend(), counters->GetSubgroup("subsystem", "logger")), + TMailboxType::ReadAsFilled, interconnectPoolId)); + + auto sp = MakeHolder<TActorSystemSetup>(std::move(setup)); + ActorSystem.Reset(new TActorSystem(sp, nullptr, loggerSettings)); + ActorSystem->Start(); + } + + ~TNode() { + ActorSystem->Stop(); + } + + bool Send(const TActorId& recipient, IEventBase* ev) { + return ActorSystem->Send(recipient, ev); + } + + TActorId RegisterActor(IActor* actor) { + return ActorSystem->Register(actor); + } + + TActorId InterconnectProxy(ui32 peerNodeId) { + return ActorSystem->InterconnectProxy(peerNodeId); + } + + void RegisterServiceActor(const TActorId& serviceId, IActor* actor) { + const TActorId actorId = ActorSystem->Register(actor); + ActorSystem->RegisterLocalService(serviceId, actorId); + } + + TActorSystem *GetActorSystem() const { + return ActorSystem.Get(); + } +}; diff --git a/library/cpp/actors/interconnect/ut/lib/test_actors.h b/library/cpp/actors/interconnect/ut/lib/test_actors.h new file mode 100644 index 0000000000..7591200471 --- /dev/null +++ b/library/cpp/actors/interconnect/ut/lib/test_actors.h @@ -0,0 +1,83 @@ +#pragma once + +namespace NActors { + class TSenderBaseActor: public TActorBootstrapped<TSenderBaseActor> { + protected: + const TActorId RecipientActorId; + const ui32 Preload; + ui64 SequenceNumber = 0; + ui32 InFlySize = 0; + + public: + TSenderBaseActor(const TActorId& recipientActorId, ui32 preload = 1) + : RecipientActorId(recipientActorId) + , Preload(preload) + { + } + + virtual ~TSenderBaseActor() { + } + + virtual void Bootstrap(const TActorContext& ctx) { + Become(&TSenderBaseActor::StateFunc); + ctx.Send(ctx.ExecutorThread.ActorSystem->InterconnectProxy(RecipientActorId.NodeId()), new TEvInterconnect::TEvConnectNode); + } + + virtual void SendMessagesIfPossible(const TActorContext& ctx) { + while (InFlySize < Preload) { + SendMessage(ctx); + } + } + + virtual void SendMessage(const TActorContext& /*ctx*/) { + ++SequenceNumber; + } + + virtual void Handle(TEvents::TEvUndelivered::TPtr& /*ev*/, const TActorContext& ctx) { + SendMessage(ctx); + } + + virtual void Handle(TEvTestResponse::TPtr& /*ev*/, const TActorContext& ctx) { + SendMessagesIfPossible(ctx); + } + + void Handle(TEvInterconnect::TEvNodeConnected::TPtr& /*ev*/, const TActorContext& ctx) { + SendMessagesIfPossible(ctx); + } + + void Handle(TEvInterconnect::TEvNodeDisconnected::TPtr& /*ev*/, const TActorContext& /*ctx*/) { + } + + virtual void Handle(TEvents::TEvPoisonPill::TPtr& /*ev*/, const TActorContext& ctx) { + Die(ctx); + } + + virtual STRICT_STFUNC(StateFunc, + HFunc(TEvTestResponse, Handle) + HFunc(TEvents::TEvUndelivered, Handle) + HFunc(TEvents::TEvPoisonPill, Handle) + HFunc(TEvInterconnect::TEvNodeConnected, Handle) + HFunc(TEvInterconnect::TEvNodeDisconnected, Handle) + ) + }; + + class TReceiverBaseActor: public TActor<TReceiverBaseActor> { + protected: + ui64 ReceivedCount = 0; + + public: + TReceiverBaseActor() + : TActor(&TReceiverBaseActor::StateFunc) + { + } + + virtual ~TReceiverBaseActor() { + } + + virtual STRICT_STFUNC(StateFunc, + HFunc(TEvTest, Handle) + ) + + virtual void Handle(TEvTest::TPtr& /*ev*/, const TActorContext& /*ctx*/) {} + }; +} diff --git a/library/cpp/actors/interconnect/ut/lib/test_events.h b/library/cpp/actors/interconnect/ut/lib/test_events.h new file mode 100644 index 0000000000..cd0d9e0152 --- /dev/null +++ b/library/cpp/actors/interconnect/ut/lib/test_events.h @@ -0,0 +1,49 @@ +#pragma once + +#include <library/cpp/actors/interconnect/ut/protos/interconnect_test.pb.h> + +namespace NActors { + enum { + EvTest = EventSpaceBegin(TEvents::ES_PRIVATE), + EvTestChan, + EvTestSmall, + EvTestLarge, + EvTestResponse, + }; + + struct TEvTest : TEventPB<TEvTest, NInterconnectTest::TEvTest, EvTest> { + TEvTest() = default; + + TEvTest(ui64 sequenceNumber, const TString& payload) { + Record.SetSequenceNumber(sequenceNumber); + Record.SetPayload(payload); + } + }; + + struct TEvTestLarge : TEventPB<TEvTestLarge, NInterconnectTest::TEvTestLarge, EvTestLarge> { + TEvTestLarge() = default; + + TEvTestLarge(ui64 sequenceNumber, const TString& payload) { + Record.SetSequenceNumber(sequenceNumber); + Record.SetPayload(payload); + } + }; + + struct TEvTestSmall : TEventPB<TEvTestSmall, NInterconnectTest::TEvTestSmall, EvTestSmall> { + TEvTestSmall() = default; + + TEvTestSmall(ui64 sequenceNumber, const TString& payload) { + Record.SetSequenceNumber(sequenceNumber); + Record.SetPayload(payload); + } + }; + + struct TEvTestResponse : TEventPB<TEvTestResponse, NInterconnectTest::TEvTestResponse, EvTestResponse> { + TEvTestResponse() = default; + + TEvTestResponse(ui64 confirmedSequenceNumber) { + Record.SetConfirmedSequenceNumber(confirmedSequenceNumber); + } + }; + +} diff --git a/library/cpp/actors/interconnect/ut/lib/ya.make b/library/cpp/actors/interconnect/ut/lib/ya.make new file mode 100644 index 0000000000..80f45f364f --- /dev/null +++ b/library/cpp/actors/interconnect/ut/lib/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +OWNER(vkanaev) + +SRCS( + node.h + test_events.h + test_actors.h + ic_test_cluster.h +) + +END() diff --git a/library/cpp/actors/interconnect/ut/poller_actor_ut.cpp b/library/cpp/actors/interconnect/ut/poller_actor_ut.cpp new file mode 100644 index 0000000000..23d846a2fd --- /dev/null +++ b/library/cpp/actors/interconnect/ut/poller_actor_ut.cpp @@ -0,0 +1,264 @@ +#include <library/cpp/actors/interconnect/poller_actor.h> +#include <library/cpp/actors/testlib/test_runtime.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/network/pair.h> +#include <util/network/socket.h> + +using namespace NActors; + +class TTestSocket: public TSharedDescriptor { +public: + explicit TTestSocket(SOCKET fd) + : Fd_(fd) + { + } + + int GetDescriptor() override { + return Fd_; + } + +private: + SOCKET Fd_; +}; +using TTestSocketPtr = TIntrusivePtr<TTestSocket>; + +// create pair of connected, non-blocking sockets +std::pair<TTestSocketPtr, TTestSocketPtr> NonBlockSockets() { + SOCKET fds[2]; + SocketPair(fds); + SetNonBlock(fds[0]); + SetNonBlock(fds[1]); + return {MakeIntrusive<TTestSocket>(fds[0]), MakeIntrusive<TTestSocket>(fds[1])}; +} + +std::pair<TTestSocketPtr, TTestSocketPtr> TcpSockets() { + // create server (listening) socket + SOCKET server = socket(AF_INET, SOCK_STREAM, 0); + Y_VERIFY(server != -1, "socket() failed with %s", strerror(errno)); + + // bind it to local address with automatically picked port + sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = 0; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + if (bind(server, (sockaddr*)&addr, sizeof(addr)) == -1) { + Y_FAIL("bind() failed with %s", strerror(errno)); + } else if (listen(server, 1) == -1) { + Y_FAIL("listen() failed with %s", strerror(errno)); + } + + // obtain local address for client + socklen_t len = sizeof(addr); + if (getsockname(server, (sockaddr*)&addr, &len) == -1) { + Y_FAIL("getsockname() failed with %s", strerror(errno)); + } + + // create client socket + SOCKET client = socket(AF_INET, SOCK_STREAM, 0); + Y_VERIFY(client != -1, "socket() failed with %s", strerror(errno)); + + // connect to server + if (connect(client, (sockaddr*)&addr, len) == -1) { + Y_FAIL("connect() failed with %s", strerror(errno)); + } + + // accept connection from the other side + SOCKET accepted = accept(server, nullptr, nullptr); + Y_VERIFY(accepted != -1, "accept() failed with %s", strerror(errno)); + + // close server socket + closesocket(server); + + return std::make_pair(MakeIntrusive<TTestSocket>(client), MakeIntrusive<TTestSocket>(accepted)); +} + +class TPollerActorTest: public TTestBase { + UNIT_TEST_SUITE(TPollerActorTest); + UNIT_TEST(Registration) + UNIT_TEST(ReadNotification) + UNIT_TEST(WriteNotification) + UNIT_TEST(HangupNotification) + UNIT_TEST_SUITE_END(); + +public: + void SetUp() override { + ActorSystem_ = MakeHolder<TTestActorRuntimeBase>(); + ActorSystem_->Initialize(); + + PollerId_ = ActorSystem_->Register(CreatePollerActor()); + + TDispatchOptions opts; + opts.FinalEvents.emplace_back(TEvents::TSystem::Bootstrap, 1); + ActorSystem_->DispatchEvents(opts); + } + + void Registration() { + auto [s1, s2] = NonBlockSockets(); + auto readerId = ActorSystem_->AllocateEdgeActor(); + auto writerId = ActorSystem_->AllocateEdgeActor(); + + RegisterSocket(s1, readerId, writerId); + + // reader should receive event after socket registration + TPollerToken::TPtr token; + { + auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(readerId); + token = ev->Get()->PollerToken; + } + + // writer should receive event after socket registration + { + auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(writerId); + UNIT_ASSERT_EQUAL(token, ev->Get()->PollerToken); + } + } + + void ReadNotification() { + auto [r, w] = NonBlockSockets(); + auto clientId = ActorSystem_->AllocateEdgeActor(); + RegisterSocket(r, clientId, {}); + + // notification after registration + TPollerToken::TPtr token; + { + auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(clientId); + token = ev->Get()->PollerToken; + } + + char buf; + + // data not ready yet for read + UNIT_ASSERT(read(r->GetDescriptor(), &buf, sizeof(buf)) == -1); + UNIT_ASSERT(errno == EWOULDBLOCK); + + // request read poll + token->Request(true, false); + + // write data + UNIT_ASSERT(write(w->GetDescriptor(), "x", 1) == 1); + + // notification after socket become readable + { + auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerReady>(clientId); + UNIT_ASSERT_EQUAL(ev->Get()->Socket, r); + UNIT_ASSERT(ev->Get()->Read); + UNIT_ASSERT(!ev->Get()->Write); + } + + // read data + UNIT_ASSERT(read(r->GetDescriptor(), &buf, sizeof(buf)) == 1); + UNIT_ASSERT_EQUAL('x', buf); + + // no more data to read + UNIT_ASSERT(read(r->GetDescriptor(), &buf, sizeof(buf)) == -1); + UNIT_ASSERT(errno == EWOULDBLOCK); + } + + void WriteNotification() { + auto [r, w] = TcpSockets(); + auto clientId = ActorSystem_->AllocateEdgeActor(); + SetNonBlock(w->GetDescriptor()); + RegisterSocket(w, TActorId{}, clientId); + + // notification after registration + TPollerToken::TPtr token; + { + auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(clientId); + token = ev->Get()->PollerToken; + } + + char buffer[4096]; + memset(buffer, 'x', sizeof(buffer)); + + for (int i = 0; i < 1000; ++i) { + // write as much as possible to send buffer + ssize_t written = 0; + for (;;) { + ssize_t res = send(w->GetDescriptor(), buffer, sizeof(buffer), 0); + if (res > 0) { + written += res; + } else if (res == 0) { + UNIT_FAIL("unexpected zero return from send()"); + } else { + UNIT_ASSERT(res == -1); + if (errno == EINTR) { + continue; + } else if (errno == EWOULDBLOCK || errno == EAGAIN) { + token->Request(false, true); + break; + } else { + UNIT_FAIL("unexpected error from send()"); + } + } + } + Cerr << "written " << written << " bytes" << Endl; + + // read all written data from the read end + for (;;) { + char buffer[4096]; + ssize_t res = recv(r->GetDescriptor(), buffer, sizeof(buffer), 0); + if (res > 0) { + UNIT_ASSERT(written >= res); + written -= res; + if (!written) { + break; + } + } else if (res == 0) { + UNIT_FAIL("unexpected zero return from recv()"); + } else { + UNIT_ASSERT(res == -1); + if (errno == EINTR) { + continue; + } else { + UNIT_FAIL("unexpected error from recv()"); + } + } + } + + // wait for notification after socket becomes writable again + { + auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerReady>(clientId); + UNIT_ASSERT_EQUAL(ev->Get()->Socket, w); + UNIT_ASSERT(!ev->Get()->Read); + UNIT_ASSERT(ev->Get()->Write); + } + } + } + + void HangupNotification() { + auto [r, w] = NonBlockSockets(); + auto clientId = ActorSystem_->AllocateEdgeActor(); + RegisterSocket(r, clientId, TActorId{}); + + // notification after registration + TPollerToken::TPtr token; + { + auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerRegisterResult>(clientId); + token = ev->Get()->PollerToken; + } + + token->Request(true, false); + ShutDown(w->GetDescriptor(), SHUT_RDWR); + + // notification after peer shuts down its socket + { + auto ev = ActorSystem_->GrabEdgeEvent<TEvPollerReady>(clientId); + UNIT_ASSERT_EQUAL(ev->Get()->Socket, r); + UNIT_ASSERT(ev->Get()->Read); + } + } + +private: + void RegisterSocket(TTestSocketPtr socket, TActorId readActorId, TActorId writeActorId) { + auto ev = new TEvPollerRegister{socket, readActorId, writeActorId}; + ActorSystem_->Send(new IEventHandle(PollerId_, TActorId{}, ev)); + } + +private: + THolder<TTestActorRuntimeBase> ActorSystem_; + TActorId PollerId_; +}; + +UNIT_TEST_SUITE_REGISTRATION(TPollerActorTest); diff --git a/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto b/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto new file mode 100644 index 0000000000..b9b2bd6a4e --- /dev/null +++ b/library/cpp/actors/interconnect/ut/protos/interconnect_test.proto @@ -0,0 +1,25 @@ +package NInterconnectTest; + +message TEvTest { + optional uint64 SequenceNumber = 1; + optional bytes Payload = 2; +} + +message TEvTestChan { + optional uint64 SequenceNumber = 1; + optional uint64 Payload = 2; +} + +message TEvTestLarge { + optional uint64 SequenceNumber = 1; + optional bytes Payload = 2; +} + +message TEvTestSmall { + optional uint64 SequenceNumber = 1; + optional bytes Payload = 2; +} + +message TEvTestResponse { + optional uint64 ConfirmedSequenceNumber = 1; +} diff --git a/library/cpp/actors/interconnect/ut/protos/ya.make b/library/cpp/actors/interconnect/ut/protos/ya.make new file mode 100644 index 0000000000..48a8cc129f --- /dev/null +++ b/library/cpp/actors/interconnect/ut/protos/ya.make @@ -0,0 +1,11 @@ +PROTO_LIBRARY() + +OWNER(vkanaev) + +SRCS( + interconnect_test.proto +) + +EXCLUDE_TAGS(GO_PROTO) + +END() diff --git a/library/cpp/actors/interconnect/ut/ya.make b/library/cpp/actors/interconnect/ut/ya.make new file mode 100644 index 0000000000..2f5b13352e --- /dev/null +++ b/library/cpp/actors/interconnect/ut/ya.make @@ -0,0 +1,36 @@ +UNITTEST() + +OWNER( + alexvru + g:kikimr +) + +IF (SANITIZER_TYPE == "thread") + TIMEOUT(1200) + SIZE(LARGE) + TAG(ya:fat) +ELSE() + TIMEOUT(600) + SIZE(MEDIUM) +ENDIF() + +SRCS( + channel_scheduler_ut.cpp + event_holder_pool_ut.cpp + interconnect_ut.cpp + large.cpp + poller_actor_ut.cpp + dynamic_proxy_ut.cpp +) + +PEERDIR( + library/cpp/actors/core + library/cpp/actors/interconnect + library/cpp/actors/interconnect/ut/lib + library/cpp/actors/interconnect/ut/protos + library/cpp/actors/testlib + library/cpp/digest/md5 + library/cpp/testing/unittest +) + +END() diff --git a/library/cpp/actors/interconnect/ut_fat/main.cpp b/library/cpp/actors/interconnect/ut_fat/main.cpp new file mode 100644 index 0000000000..5d19bc3003 --- /dev/null +++ b/library/cpp/actors/interconnect/ut_fat/main.cpp @@ -0,0 +1,133 @@ + +#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h> +#include <library/cpp/actors/interconnect/ut/protos/interconnect_test.pb.h> +#include <library/cpp/actors/interconnect/ut/lib/ic_test_cluster.h> +#include <library/cpp/actors/interconnect/ut/lib/interrupter.h> +#include <library/cpp/actors/interconnect/ut/lib/test_events.h> +#include <library/cpp/actors/interconnect/ut/lib/test_actors.h> +#include <library/cpp/actors/interconnect/ut/lib/node.h> + +#include <library/cpp/testing/unittest/tests_data.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/network/sock.h> +#include <util/network/poller.h> +#include <util/system/atomic.h> +#include <util/generic/set.h> + +Y_UNIT_TEST_SUITE(InterconnectUnstableConnection) { + using namespace NActors; + + class TSenderActor: public TSenderBaseActor { + TDeque<ui64> InFly; + ui16 SendFlags; + + public: + TSenderActor(const TActorId& recipientActorId, ui16 sendFlags) + : TSenderBaseActor(recipientActorId, 32) + , SendFlags(sendFlags) + { + } + + ~TSenderActor() override { + Cerr << "Sent " << SequenceNumber << " messages\n"; + } + + void SendMessage(const TActorContext& ctx) override { + const ui32 flags = IEventHandle::MakeFlags(0, SendFlags); + const ui64 cookie = SequenceNumber; + const TString payload('@', RandomNumber<size_t>(65536) + 4096); + ctx.Send(RecipientActorId, new TEvTest(SequenceNumber, payload), flags, cookie); + InFly.push_back(SequenceNumber); + ++InFlySize; + ++SequenceNumber; + } + + void Handle(TEvents::TEvUndelivered::TPtr& ev, const TActorContext& ctx) override { + auto record = std::find(InFly.begin(), InFly.end(), ev->Cookie); + if (SendFlags & IEventHandle::FlagGenerateUnsureUndelivered) { + if (record != InFly.end()) { + InFly.erase(record); + --InFlySize; + SendMessage(ctx); + } + } else { + Y_VERIFY(record != InFly.end()); + } + } + + void Handle(TEvTestResponse::TPtr& ev, const TActorContext& ctx) override { + Y_VERIFY(InFly); + const NInterconnectTest::TEvTestResponse& record = ev->Get()->Record; + Y_VERIFY(record.HasConfirmedSequenceNumber()); + if (!(SendFlags & IEventHandle::FlagGenerateUnsureUndelivered)) { + while (record.GetConfirmedSequenceNumber() != InFly.front()) { + InFly.pop_front(); + --InFlySize; + } + } + Y_VERIFY(record.GetConfirmedSequenceNumber() == InFly.front(), "got# %" PRIu64 " expected# %" PRIu64, + record.GetConfirmedSequenceNumber(), InFly.front()); + InFly.pop_front(); + --InFlySize; + SendMessagesIfPossible(ctx); + } + }; + + class TReceiverActor: public TReceiverBaseActor { + ui64 ReceivedCount = 0; + TNode* SenderNode = nullptr; + + public: + TReceiverActor(TNode* senderNode) + : TReceiverBaseActor() + , SenderNode(senderNode) + { + } + + void Handle(TEvTest::TPtr& ev, const TActorContext& /*ctx*/) override { + const NInterconnectTest::TEvTest& m = ev->Get()->Record; + Y_VERIFY(m.HasSequenceNumber()); + Y_VERIFY(m.GetSequenceNumber() >= ReceivedCount, "got #%" PRIu64 " expected at least #%" PRIu64, + m.GetSequenceNumber(), ReceivedCount); + ++ReceivedCount; + SenderNode->Send(ev->Sender, new TEvTestResponse(m.GetSequenceNumber())); + } + + ~TReceiverActor() override { + Cerr << "Received " << ReceivedCount << " messages\n"; + } + }; + + Y_UNIT_TEST(InterconnectTestWithProxyUnsureUndelivered) { + ui32 numNodes = 2; + double bandWidth = 1000000; + ui16 flags = IEventHandle::FlagTrackDelivery | IEventHandle::FlagGenerateUnsureUndelivered; + TTestICCluster::TTrafficInterrupterSettings interrupterSettings{TDuration::Seconds(2), bandWidth, true}; + + TTestICCluster testCluster(numNodes, TChannelsConfig(), &interrupterSettings); + + TReceiverActor* receiverActor = new TReceiverActor(testCluster.GetNode(1)); + const TActorId recipient = testCluster.RegisterActor(receiverActor, 2); + TSenderActor* senderActor = new TSenderActor(recipient, flags); + testCluster.RegisterActor(senderActor, 1); + + NanoSleep(30ULL * 1000 * 1000 * 1000); + } + + Y_UNIT_TEST(InterconnectTestWithProxy) { + ui32 numNodes = 2; + double bandWidth = 1000000; + ui16 flags = IEventHandle::FlagTrackDelivery; + TTestICCluster::TTrafficInterrupterSettings interrupterSettings{TDuration::Seconds(2), bandWidth, true}; + + TTestICCluster testCluster(numNodes, TChannelsConfig(), &interrupterSettings); + + TReceiverActor* receiverActor = new TReceiverActor(testCluster.GetNode(1)); + const TActorId recipient = testCluster.RegisterActor(receiverActor, 2); + TSenderActor* senderActor = new TSenderActor(recipient, flags); + testCluster.RegisterActor(senderActor, 1); + + NanoSleep(30ULL * 1000 * 1000 * 1000); + } +} diff --git a/library/cpp/actors/interconnect/ut_fat/ya.make b/library/cpp/actors/interconnect/ut_fat/ya.make new file mode 100644 index 0000000000..6e58d08154 --- /dev/null +++ b/library/cpp/actors/interconnect/ut_fat/ya.make @@ -0,0 +1,25 @@ +UNITTEST() + +OWNER( + vkanaev + alexvru +) + +SIZE(LARGE) + +TAG(ya:fat) + +SRCS( + main.cpp +) + +PEERDIR( + library/cpp/actors/core + library/cpp/actors/interconnect + library/cpp/actors/interconnect/mock + library/cpp/actors/interconnect/ut/lib + library/cpp/actors/interconnect/ut/protos + library/cpp/testing/unittest +) + +END() diff --git a/library/cpp/actors/interconnect/watchdog_timer.h b/library/cpp/actors/interconnect/watchdog_timer.h new file mode 100644 index 0000000000..c190105a59 --- /dev/null +++ b/library/cpp/actors/interconnect/watchdog_timer.h @@ -0,0 +1,68 @@ +#pragma once + +namespace NActors { + template <typename TEvent> + class TWatchdogTimer { + using TCallback = std::function<void()>; + + const TDuration Timeout; + const TCallback Callback; + + TInstant LastResetTimestamp; + TEvent* ExpectedEvent = nullptr; + ui32 Iteration = 0; + + static constexpr ui32 NumIterationsBeforeFiring = 2; + + public: + TWatchdogTimer(TDuration timeout, TCallback callback) + : Timeout(timeout) + , Callback(std::move(callback)) + { + } + + void Arm(const TActorIdentity& actor) { + if (Timeout != TDuration::Zero() && Timeout != TDuration::Max()) { + Schedule(Timeout, actor); + Reset(); + } + } + + void Reset() { + LastResetTimestamp = TActivationContext::Now(); + } + + void Disarm() { + ExpectedEvent = nullptr; + } + + void operator()(typename TEvent::TPtr& ev) { + if (ev->Get() == ExpectedEvent) { + const TInstant now = TActivationContext::Now(); + const TInstant barrier = LastResetTimestamp + Timeout; + if (now < barrier) { + // the time hasn't come yet + Schedule(barrier - now, TActorIdentity(ev->Recipient)); + } else if (Iteration < NumIterationsBeforeFiring) { + // time has come, but we will still give actor a chance to process some messages and rearm timer + ++Iteration; + TActivationContext::Send(ev.Release()); // send this event into queue once more + } else { + // no chance to disarm, fire callback + Callback(); + ExpectedEvent = nullptr; + Iteration = 0; + } + } + } + + private: + void Schedule(TDuration timeout, const TActorIdentity& actor) { + auto ev = MakeHolder<TEvent>(); + ExpectedEvent = ev.Get(); + Iteration = 0; + actor.Schedule(timeout, ev.Release()); + } + }; + +} diff --git a/library/cpp/actors/interconnect/ya.make b/library/cpp/actors/interconnect/ya.make new file mode 100644 index 0000000000..60d29b0fc0 --- /dev/null +++ b/library/cpp/actors/interconnect/ya.make @@ -0,0 +1,94 @@ +LIBRARY() + +OWNER( + ddoarn + alexvru + g:kikimr +) + +NO_WSHADOW() + +IF (PROFILE_MEMORY_ALLOCATIONS) + CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS) +ENDIF() + +SRCS( + channel_scheduler.h + event_filter.h + event_holder_pool.h + events_local.h + interconnect_address.cpp + interconnect_address.h + interconnect_channel.cpp + interconnect_channel.h + interconnect_common.h + interconnect_counters.cpp + interconnect.h + interconnect_handshake.cpp + interconnect_handshake.h + interconnect_impl.h + interconnect_mon.cpp + interconnect_mon.h + interconnect_nameserver_dynamic.cpp + interconnect_nameserver_table.cpp + interconnect_proxy_wrapper.cpp + interconnect_proxy_wrapper.h + interconnect_resolve.cpp + interconnect_stream.cpp + interconnect_stream.h + interconnect_tcp_input_session.cpp + interconnect_tcp_proxy.cpp + interconnect_tcp_proxy.h + interconnect_tcp_server.cpp + interconnect_tcp_server.h + interconnect_tcp_session.cpp + interconnect_tcp_session.h + load.cpp + load.h + logging.h + packet.cpp + packet.h + poller_actor.cpp + poller_actor.h + poller.h + poller_tcp.cpp + poller_tcp.h + poller_tcp_unit.cpp + poller_tcp_unit.h + poller_tcp_unit_select.cpp + poller_tcp_unit_select.h + profiler.h + slowpoke_actor.h + types.cpp + types.h + watchdog_timer.h +) + +IF (OS_LINUX) + SRCS( + poller_tcp_unit_epoll.cpp + poller_tcp_unit_epoll.h + ) +ENDIF() + +PEERDIR( + contrib/libs/libc_compat + contrib/libs/openssl + library/cpp/actors/core + library/cpp/actors/dnscachelib + library/cpp/actors/dnsresolver + library/cpp/actors/helpers + library/cpp/actors/prof + library/cpp/actors/protos + library/cpp/actors/util + library/cpp/digest/crc32c + library/cpp/json + library/cpp/lwtrace + library/cpp/monlib/dynamic_counters + library/cpp/monlib/metrics + library/cpp/monlib/service/pages/tablesorter + library/cpp/openssl/init + library/cpp/packedtypes +) + +END() diff --git a/library/cpp/actors/memory_log/memlog.cpp b/library/cpp/actors/memory_log/memlog.cpp new file mode 100644 index 0000000000..8e6b46727d --- /dev/null +++ b/library/cpp/actors/memory_log/memlog.cpp @@ -0,0 +1,367 @@ +#include "memlog.h" + +#include <library/cpp/actors/util/datetime.h> + +#include <util/system/info.h> +#include <util/system/atomic.h> +#include <util/system/align.h> + +#include <contrib/libs/linuxvdso/interface.h> + +#if (defined(_i386_) || defined(_x86_64_)) && defined(_linux_) +#define HAVE_VDSO_GETCPU 1 +#include <contrib/libs/linuxvdso/interface.h> +static int (*FastGetCpu)(unsigned* cpu, unsigned* node, void* unused); +#endif + +#if defined(_unix_) +#include <sched.h> +#elif defined(_win_) +#include <WinBase.h> +#else +#error NO IMPLEMENTATION FOR THE PLATFORM +#endif + +const char TMemoryLog::DEFAULT_LAST_MARK[16] = { + 'c', + 'b', + '7', + 'B', + '6', + '8', + 'a', + '8', + 'A', + '5', + '6', + '1', + '6', + '4', + '5', + '\n', +}; + +const char TMemoryLog::CLEAR_MARK[16] = { + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + '\n', +}; + +unsigned TMemoryLog::GetSelfCpu() noexcept { +#if defined(_unix_) +#if HAVE_VDSO_GETCPU + unsigned cpu; + if (Y_LIKELY(FastGetCpu != nullptr)) { + auto result = FastGetCpu(&cpu, nullptr, nullptr); + Y_VERIFY(result == 0); + return cpu; + } else { + return 0; + } + +#elif defined(_x86_64_) || defined(_i386_) + +#define CPUID(func, eax, ebx, ecx, edx) \ + __asm__ __volatile__( \ + "cpuid" \ + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) \ + : "a"(func)); + + int a = 0, b = 0, c = 0, d = 0; + CPUID(0x1, a, b, c, d); + int acpiID = (b >> 24); + return acpiID; + +#elif defined(__CNUC__) + return sched_getcpu(); +#else + return 0; +#endif + +#elif defined(_win_) + return GetCurrentProcessorNumber(); +#else + return 0; +#endif +} + +TMemoryLog* TMemoryLog::MemLogBuffer = nullptr; +Y_POD_THREAD(TThread::TId) +TMemoryLog::LogThreadId; +char* TMemoryLog::LastMarkIsHere = nullptr; + +std::atomic<bool> TMemoryLog::PrintLastMark(true); + +TMemoryLog::TMemoryLog(size_t totalSize, size_t grainSize) + : GrainSize(grainSize) + , FreeGrains(DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE * 2) + , Buf(totalSize) +{ + Y_VERIFY(DEFAULT_TOTAL_SIZE % DEFAULT_GRAIN_SIZE == 0); + NumberOfGrains = DEFAULT_TOTAL_SIZE / DEFAULT_GRAIN_SIZE; + + for (size_t i = 0; i < NumberOfGrains; ++i) { + new (GetGrain(i)) TGrain; + } + + NumberOfCpus = NSystemInfo::NumberOfCpus(); + Y_VERIFY(NumberOfGrains > NumberOfCpus); + ActiveGrains.Reset(new TGrain*[NumberOfCpus]); + for (size_t i = 0; i < NumberOfCpus; ++i) { + ActiveGrains[i] = GetGrain(i); + } + + for (size_t i = NumberOfCpus; i < NumberOfGrains; ++i) { + FreeGrains.StubbornPush(GetGrain(i)); + } + +#if HAVE_VDSO_GETCPU + auto vdsoFunc = (decltype(FastGetCpu)) + NVdso::Function("__vdso_getcpu", "LINUX_2.6"); + AtomicSet(FastGetCpu, vdsoFunc); +#endif +} + +void* TMemoryLog::GetWriteBuffer(size_t amount) noexcept { + // alignment required by NoCacheMemcpy + amount = AlignUp<size_t>(amount, MemcpyAlignment); + + for (ui16 tries = MAX_GET_BUFFER_TRIES; tries-- > 0;) { + auto myCpu = GetSelfCpu(); + + TGrain* grain = AtomicGet(ActiveGrains[myCpu]); + + if (grain != nullptr) { + auto mine = AtomicGetAndAdd(grain->WritePointer, amount); + if (mine + amount <= GrainSize - sizeof(TGrain)) { + return &grain->Data[mine]; + } + + if (!AtomicCas(&ActiveGrains[myCpu], 0, grain)) { + continue; + } + + FreeGrains.StubbornPush(grain); + } + + grain = (TGrain*)FreeGrains.Pop(); + + if (grain == nullptr) { + return nullptr; + } + + grain->WritePointer = 0; + + if (!AtomicCas(&ActiveGrains[myCpu], grain, 0)) { + FreeGrains.StubbornPush(grain); + continue; + } + } + + return nullptr; +} + +void ClearAlignedTail(char* tail) noexcept { + auto aligned = AlignUp(tail, TMemoryLog::MemcpyAlignment); + if (aligned > tail) { + memset(tail, 0, aligned - tail); + } +} + +#if defined(_x86_64_) || defined(_i386_) +#include <xmmintrin.h> +// the main motivation is not poluting CPU cache +NO_SANITIZE_THREAD +void NoCacheMemcpy(char* dst, const char* src, size_t size) noexcept { + while (size >= sizeof(__m128) * 2) { + __m128 a = _mm_load_ps((float*)(src + 0 * sizeof(__m128))); + __m128 b = _mm_load_ps((float*)(src + 1 * sizeof(__m128))); + _mm_stream_ps((float*)(dst + 0 * sizeof(__m128)), a); + _mm_stream_ps((float*)(dst + 1 * sizeof(__m128)), b); + + size -= sizeof(__m128) * 2; + src += sizeof(__m128) * 2; + dst += sizeof(__m128) * 2; + } + memcpy(dst, src, size); +} + +NO_SANITIZE_THREAD +void NoWCacheMemcpy(char* dst, const char* src, size_t size) noexcept { + constexpr ui16 ITEMS_COUNT = 1024; + alignas(TMemoryLog::MemcpyAlignment) __m128 buf[ITEMS_COUNT]; + while (size >= sizeof(buf)) { + memcpy(&buf, src, sizeof(buf)); + + for (ui16 i = 0; i < ITEMS_COUNT; ++i) { + _mm_stream_ps((float*)dst, buf[i]); + dst += sizeof(__m128); + } + + size -= sizeof(buf); + src += sizeof(buf); + } + + memcpy(&buf, src, size); + // no problem to copy few bytes more + size = AlignUp(size, sizeof(__m128)); + for (ui16 i = 0; i < size / sizeof(__m128); ++i) { + _mm_stream_ps((float*)dst, buf[i]); + dst += sizeof(__m128); + } +} + +#endif + +NO_SANITIZE_THREAD +char* BareMemLogWrite(const char* begin, size_t msgSize, bool isLast) noexcept { + bool lastMark = + isLast && TMemoryLog::PrintLastMark.load(std::memory_order_acquire); + size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; + + char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); + if (buffer == nullptr) { + return nullptr; + } + +#if defined(_x86_64_) || defined(_i386_) + if (AlignDown(begin, TMemoryLog::MemcpyAlignment) == begin) { + NoCacheMemcpy(buffer, begin, msgSize); + } else { + NoWCacheMemcpy(buffer, begin, msgSize); + } +#else + memcpy(buffer, begin, msgSize); +#endif + + if (lastMark) { + TMemoryLog::ChangeLastMark(buffer + msgSize); + } + + ClearAlignedTail(buffer + amount); + return buffer; +} + +NO_SANITIZE_THREAD +bool MemLogWrite(const char* begin, size_t msgSize, bool addLF) noexcept { + bool lastMark = TMemoryLog::PrintLastMark.load(std::memory_order_acquire); + size_t amount = lastMark ? msgSize + TMemoryLog::LAST_MARK_SIZE : msgSize; + + // Let's construct prolog with timestamp and thread id + auto threadId = TMemoryLog::GetTheadId(); + + // alignment required by NoCacheMemcpy + // check for format for snprintf + constexpr size_t prologSize = 48; + alignas(TMemoryLog::MemcpyAlignment) char prolog[prologSize + 1]; + Y_VERIFY(AlignDown(&prolog, TMemoryLog::MemcpyAlignment) == &prolog); + + int snprintfResult = snprintf(prolog, prologSize + 1, + "TS %020" PRIu64 " TI %020" PRIu64 " ", GetCycleCountFast(), threadId); + + if (snprintfResult < 0) { + return false; + } + Y_VERIFY(snprintfResult == prologSize); + + amount += prologSize; + if (addLF) { + ++amount; // add 1 byte for \n at the end of the message + } + + char* buffer = (char*)TMemoryLog::GetWriteBufferStatic(amount); + if (buffer == nullptr) { + return false; + } + +#if defined(_x86_64_) || defined(_i386_) + // warning: copy prolog first to avoid corruption of the message + // by prolog tail + NoCacheMemcpy(buffer, prolog, prologSize); + if (AlignDown(begin + prologSize, TMemoryLog::MemcpyAlignment) == begin + prologSize) { + NoCacheMemcpy(buffer + prologSize, begin, msgSize); + } else { + NoWCacheMemcpy(buffer + prologSize, begin, msgSize); + } +#else + memcpy(buffer, prolog, prologSize); + memcpy(buffer + prologSize, begin, msgSize); +#endif + + if (addLF) { + buffer[prologSize + msgSize] = '\n'; + } + + if (lastMark) { + TMemoryLog::ChangeLastMark(buffer + prologSize + msgSize + (int)addLF); + } + + ClearAlignedTail(buffer + amount); + return true; +} + +NO_SANITIZE_THREAD +void TMemoryLog::ChangeLastMark(char* buffer) noexcept { + memcpy(buffer, DEFAULT_LAST_MARK, LAST_MARK_SIZE); + auto oldMark = AtomicSwap(&LastMarkIsHere, buffer); + if (Y_LIKELY(oldMark != nullptr)) { + memcpy(oldMark, CLEAR_MARK, LAST_MARK_SIZE); + } + if (AtomicGet(LastMarkIsHere) != buffer) { + memcpy(buffer, CLEAR_MARK, LAST_MARK_SIZE); + AtomicBarrier(); + } +} + +bool MemLogVPrintF(const char* format, va_list params) noexcept { + auto logger = TMemoryLog::GetMemoryLogger(); + if (logger == nullptr) { + return false; + } + + auto threadId = TMemoryLog::GetTheadId(); + + // alignment required by NoCacheMemcpy + alignas(TMemoryLog::MemcpyAlignment) char buf[TMemoryLog::MAX_MESSAGE_SIZE]; + Y_VERIFY(AlignDown(&buf, TMemoryLog::MemcpyAlignment) == &buf); + + int prologSize = snprintf(buf, + TMemoryLog::MAX_MESSAGE_SIZE - 2, + "TS %020" PRIu64 " TI %020" PRIu64 " ", + GetCycleCountFast(), + threadId); + + if (Y_UNLIKELY(prologSize < 0)) { + return false; + } + Y_VERIFY((ui32)prologSize <= TMemoryLog::MAX_MESSAGE_SIZE); + + int add = vsnprintf( + &buf[prologSize], + TMemoryLog::MAX_MESSAGE_SIZE - prologSize - 2, + format, params); + + if (Y_UNLIKELY(add < 0)) { + return false; + } + Y_VERIFY(add >= 0); + auto totalSize = prologSize + add; + + buf[totalSize++] = '\n'; + Y_VERIFY((ui32)totalSize <= TMemoryLog::MAX_MESSAGE_SIZE); + + return BareMemLogWrite(buf, totalSize) != nullptr; +} diff --git a/library/cpp/actors/memory_log/memlog.h b/library/cpp/actors/memory_log/memlog.h new file mode 100644 index 0000000000..2aa27272a6 --- /dev/null +++ b/library/cpp/actors/memory_log/memlog.h @@ -0,0 +1,211 @@ +#pragma once + +#include <library/cpp/threading/queue/mpmc_unordered_ring.h> +#include <util/generic/string.h> +#include <util/string/printf.h> +#include <util/system/datetime.h> +#include <util/system/thread.h> +#include <util/system/types.h> +#include <util/system/atomic.h> +#include <util/system/align.h> +#include <util/system/tls.h> + +#include <atomic> +#include <cstdio> + +#ifdef _win_ +#include <util/system/winint.h> +#endif + +#ifndef NO_SANITIZE_THREAD +#define NO_SANITIZE_THREAD +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#undef NO_SANITIZE_THREAD +#define NO_SANITIZE_THREAD __attribute__((no_sanitize_thread)) +#endif +#endif +#endif + +class TMemoryLog { +public: + static constexpr size_t DEFAULT_TOTAL_SIZE = 10 * 1024 * 1024; + static constexpr size_t DEFAULT_GRAIN_SIZE = 1024 * 64; + static constexpr size_t MAX_MESSAGE_SIZE = 1024; + static constexpr ui16 MAX_GET_BUFFER_TRIES = 4; + static constexpr ui16 MemcpyAlignment = 16; + + // search for cb7B68a8A561645 + static const char DEFAULT_LAST_MARK[16]; + static const char CLEAR_MARK[16]; + + static constexpr size_t LAST_MARK_SIZE = sizeof(DEFAULT_LAST_MARK); + + inline static TMemoryLog* GetMemoryLogger() noexcept { + return AtomicGet(MemLogBuffer); + } + + void* GetWriteBuffer(size_t amount) noexcept; + + inline static void* GetWriteBufferStatic(size_t amount) noexcept { + auto logger = GetMemoryLogger(); + if (logger == nullptr) { + return nullptr; + } + return logger->GetWriteBuffer(amount); + } + + size_t GetGlobalBufferSize() const noexcept { + return Buf.GetSize(); + } + + inline static void CreateMemoryLogBuffer( + size_t totalSize = DEFAULT_TOTAL_SIZE, + size_t grainSize = DEFAULT_GRAIN_SIZE) + Y_COLD { + if (AtomicGet(MemLogBuffer) != nullptr) { + return; + } + + AtomicSet(MemLogBuffer, new TMemoryLog(totalSize, grainSize)); + } + + static std::atomic<bool> PrintLastMark; + + // buffer must be at least 16 bytes + static void ChangeLastMark(char* buffer) noexcept; + + inline static TThread::TId GetTheadId() noexcept { + if (LogThreadId == 0) { + LogThreadId = TThread::CurrentThreadId(); + } + return LogThreadId; + } + +private: + TMemoryLog(size_t totalSize, size_t grainSize) Y_COLD; + + struct TGrain { + TAtomic WritePointer = 0; + char Padding[MemcpyAlignment - sizeof(TAtomic)]; + char Data[]; + }; + + size_t NumberOfCpus; + size_t GrainSize; + size_t NumberOfGrains; + TArrayPtr<TGrain*> ActiveGrains; + NThreading::TMPMCUnorderedRing FreeGrains; + + TGrain* GetGrain(size_t grainIndex) const noexcept { + return (TGrain*)((char*)GetGlobalBuffer() + GrainSize * grainIndex); + } + + class TMMapArea { + public: + TMMapArea(size_t amount) Y_COLD { + MMap(amount); + } + + TMMapArea(const TMMapArea&) = delete; + TMMapArea& operator=(const TMMapArea& copy) = delete; + + TMMapArea(TMMapArea&& move) Y_COLD { + BufPtr = move.BufPtr; + Size = move.Size; + + move.BufPtr = nullptr; + move.Size = 0; + } + + TMMapArea& operator=(TMMapArea&& move) Y_COLD { + BufPtr = move.BufPtr; + Size = move.Size; + + move.BufPtr = nullptr; + move.Size = 0; + return *this; + } + + void Reset(size_t amount) Y_COLD { + MUnmap(); + MMap(amount); + } + + ~TMMapArea() noexcept Y_COLD { + MUnmap(); + } + + size_t GetSize() const noexcept { + return Size; + } + + void* GetPtr() const noexcept { + return BufPtr; + } + + private: + void* BufPtr; + size_t Size; +#ifdef _win_ + HANDLE Mapping; +#endif + + void MMap(size_t amount); + void MUnmap(); + }; + + TMMapArea Buf; + + void* GetGlobalBuffer() const noexcept { + return Buf.GetPtr(); + } + + static unsigned GetSelfCpu() noexcept; + + static TMemoryLog* MemLogBuffer; + static Y_POD_THREAD(TThread::TId) LogThreadId; + static char* LastMarkIsHere; +}; + +// it's no use of sanitizing this function +NO_SANITIZE_THREAD +char* BareMemLogWrite( + const char* begin, size_t msgSize, bool isLast = true) noexcept; + +// it's no use of sanitizing this function +NO_SANITIZE_THREAD +bool MemLogWrite( + const char* begin, size_t msgSize, bool addLF = false) noexcept; + +Y_WRAPPER inline bool MemLogWrite(const char* begin, const char* end) noexcept { + if (end <= begin) { + return false; + } + + size_t msgSize = end - begin; + return MemLogWrite(begin, msgSize); +} + +template <typename TObj> +bool MemLogWriteStruct(const TObj* obj) noexcept { + auto begin = (const char*)(const void*)obj; + return MemLogWrite(begin, begin + sizeof(TObj)); +} + +Y_PRINTF_FORMAT(1, 0) +bool MemLogVPrintF(const char* format, va_list params) noexcept; + +Y_PRINTF_FORMAT(1, 2) +Y_WRAPPER +inline bool MemLogPrintF(const char* format, ...) noexcept { + va_list params; + va_start(params, format); + auto result = MemLogVPrintF(format, params); + va_end(params); + return result; +} + +Y_WRAPPER inline bool MemLogWriteNullTerm(const char* str) noexcept { + return MemLogWrite(str, strlen(str)); +} diff --git a/library/cpp/actors/memory_log/mmap.cpp b/library/cpp/actors/memory_log/mmap.cpp new file mode 100644 index 0000000000..201998d343 --- /dev/null +++ b/library/cpp/actors/memory_log/mmap.cpp @@ -0,0 +1,63 @@ +#include "memlog.h" + +#if defined(_unix_) +#include <sys/mman.h> +#elif defined(_win_) +#include <util/system/winint.h> +#else +#error NO IMPLEMENTATION FOR THE PLATFORM +#endif + +void TMemoryLog::TMMapArea::MMap(size_t amount) { + Y_VERIFY(amount > 0); + +#if defined(_unix_) + constexpr int mmapProt = PROT_READ | PROT_WRITE; +#if defined(_linux_) + constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON | MAP_POPULATE; +#else + constexpr int mmapFlags = MAP_PRIVATE | MAP_ANON; +#endif + + BufPtr = ::mmap(nullptr, amount, mmapProt, mmapFlags, -1, 0); + if (BufPtr == MAP_FAILED) { + throw std::bad_alloc(); + } + +#elif defined(_win_) + Mapping = ::CreateFileMapping( + (HANDLE)-1, nullptr, PAGE_READWRITE, 0, amount, nullptr); + if (Mapping == NULL) { + throw std::bad_alloc(); + } + BufPtr = ::MapViewOfFile(Mapping, FILE_MAP_WRITE, 0, 0, amount); + if (BufPtr == NULL) { + throw std::bad_alloc(); + } +#endif + + Size = amount; +} + +void TMemoryLog::TMMapArea::MUnmap() { + if (BufPtr == nullptr) { + return; + } + +#if defined(_unix_) + int result = ::munmap(BufPtr, Size); + Y_VERIFY(result == 0); + +#elif defined(_win_) + BOOL result = ::UnmapViewOfFile(BufPtr); + Y_VERIFY(result != 0); + + result = ::CloseHandle(Mapping); + Y_VERIFY(result != 0); + + Mapping = 0; +#endif + + BufPtr = nullptr; + Size = 0; +} diff --git a/library/cpp/actors/memory_log/ya.make b/library/cpp/actors/memory_log/ya.make new file mode 100644 index 0000000000..d89d5db4d7 --- /dev/null +++ b/library/cpp/actors/memory_log/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +OWNER( + agri + g:kikimr +) + +SRCS( + memlog.cpp + memlog.h + mmap.cpp +) + +PEERDIR( + library/cpp/threading/queue + contrib/libs/linuxvdso +) + +END() diff --git a/library/cpp/actors/prof/tag.cpp b/library/cpp/actors/prof/tag.cpp new file mode 100644 index 0000000000..9ccf03e1a9 --- /dev/null +++ b/library/cpp/actors/prof/tag.cpp @@ -0,0 +1,119 @@ +#include "tag.h" +#include "tcmalloc.h" + +#include <library/cpp/charset/ci_string.h> +#include <library/cpp/containers/atomizer/atomizer.h> +#include <library/cpp/malloc/api/malloc.h> + +#if defined(PROFILE_MEMORY_ALLOCATIONS) +#include <library/cpp/lfalloc/dbg_info/dbg_info.h> +#include <library/cpp/ytalloc/api/ytalloc.h> +#endif + +#include <util/generic/singleton.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/system/mutex.h> + +namespace NProfiling { + class TStringAtoms { + private: + TMutex Mutex; + atomizer<ci_hash, ci_equal_to> Tags; + + public: + static TStringAtoms& Instance() { + return *Singleton<TStringAtoms>(); + } + + ui32 MakeTag(const char* s) { + Y_VERIFY(s); + with_lock (Mutex) { + return Tags.string_to_atom(s); + } + } + + ui32 MakeTags(const TVector<const char*>& ss) { + Y_VERIFY(ss); + with_lock (Mutex) { + ui32 baseTag = Tags.string_to_atom(ss[0]); + ui32 nextTag = baseTag + 1; + for (auto i = ss.begin() + 1; i != ss.end(); ++i, ++nextTag) { + Y_VERIFY(*i); + ui32 ctag = Tags.string_to_atom(*i); + Y_VERIFY(ctag == nextTag); + } + return baseTag; + } + } + + const char* GetTag(ui32 tag) const { + with_lock (Mutex) { + return Tags.get_atom_name(tag); + } + } + + size_t GetTagsCount() const { + with_lock (Mutex) { + return Tags.size(); + } + } + }; + + ui32 MakeTag(const char* s) { + return TStringAtoms::Instance().MakeTag(s); + } + + ui32 MakeTags(const TVector<const char*>& ss) { + return TStringAtoms::Instance().MakeTags(ss); + } + + const char* GetTag(ui32 tag) { + return TStringAtoms::Instance().GetTag(tag); + } + + size_t GetTagsCount() { + return TStringAtoms::Instance().GetTagsCount(); + } + + static ui32 SetThreadAllocTag_Default(ui32 tag) { + Y_UNUSED(tag); + return 0; + } + +#if defined(PROFILE_MEMORY_ALLOCATIONS) + static ui32 SetThreadAllocTag_YT(ui32 tag) { + auto prev = NYT::NYTAlloc::GetCurrentMemoryTag(); + NYT::NYTAlloc::SetCurrentMemoryTag(tag); + return prev; + } + + static TSetThreadAllocTag* SetThreadAllocTagFn() { + const auto& info = NMalloc::MallocInfo(); + + TStringBuf name(info.Name); + if (name.StartsWith("lf")) { + return (TSetThreadAllocTag*)NAllocDbg::SetThreadAllocTag; + } else if (name.StartsWith("yt")) { + return SetThreadAllocTag_YT; + } else if (name.StartsWith("tc")) { + return SetTCMallocThreadAllocTag; + } else { + return SetThreadAllocTag_Default; + } + } +#else + static TSetThreadAllocTag* SetThreadAllocTagFn() { + const auto& info = NMalloc::MallocInfo(); + + TStringBuf name(info.Name); + if (name.StartsWith("tc")) { + return SetTCMallocThreadAllocTag; + } else { + return SetThreadAllocTag_Default; + } + } +#endif + + TSetThreadAllocTag* SetThreadAllocTag = SetThreadAllocTagFn(); +} diff --git a/library/cpp/actors/prof/tag.h b/library/cpp/actors/prof/tag.h new file mode 100644 index 0000000000..357e264a22 --- /dev/null +++ b/library/cpp/actors/prof/tag.h @@ -0,0 +1,73 @@ +#pragma once + +#include <util/generic/fwd.h> + +/* + Common registry for tagging memory profiler. + Register a new tag with MakeTag using a unique string. + Use registered tags with SetThreadAllocTag function in allocator API. +*/ + +namespace NProfiling { + ui32 MakeTag(const char* s); + + // Make only unique tags. Y_VERIFY inside. + ui32 MakeTags(const TVector<const char*>& ss); + + const char* GetTag(ui32 tag); + size_t GetTagsCount(); + + using TSetThreadAllocTag = ui32(ui32 tag); + extern TSetThreadAllocTag* SetThreadAllocTag; + + class TMemoryTagScope { + public: + explicit TMemoryTagScope(ui32 tag) + : RestoreTag(SetThreadAllocTag(tag)) + { + } + + explicit TMemoryTagScope(const char* tagName) { + ui32 newTag = MakeTag(tagName); + RestoreTag = SetThreadAllocTag(newTag); + } + + TMemoryTagScope(TMemoryTagScope&& move) + : RestoreTag(move.RestoreTag) + , Released(move.Released) + { + move.Released = true; + } + + TMemoryTagScope& operator=(TMemoryTagScope&& move) { + RestoreTag = move.RestoreTag; + Released = move.Released; + move.Released = true; + return *this; + } + + static void Reset(ui32 tag) { + SetThreadAllocTag(tag); + } + + void Release() { + if (!Released) { + SetThreadAllocTag(RestoreTag); + Released = true; + } + } + + ~TMemoryTagScope() { + if (!Released) { + SetThreadAllocTag(RestoreTag); + } + } + + protected: + TMemoryTagScope(const TMemoryTagScope&) = delete; + void operator=(const TMemoryTagScope&) = delete; + + ui32 RestoreTag = 0; + bool Released = false; + }; +} diff --git a/library/cpp/actors/prof/tcmalloc.cpp b/library/cpp/actors/prof/tcmalloc.cpp new file mode 100644 index 0000000000..3d4f203dbb --- /dev/null +++ b/library/cpp/actors/prof/tcmalloc.cpp @@ -0,0 +1,32 @@ +#include "tcmalloc.h" + +#include <contrib/libs/tcmalloc/tcmalloc/malloc_extension.h> + +namespace NProfiling { + +static thread_local ui32 AllocationTag = 0; + +static struct TInitTCMallocCallbacks { + static void* CreateTag() { + return reinterpret_cast<void*>(AllocationTag); + } + static void* CopyTag(void* tag) { + return tag; + } + static void DestroyTag(void* tag) { + Y_UNUSED(tag); + } + + TInitTCMallocCallbacks() { + tcmalloc::MallocExtension::SetSampleUserDataCallbacks( + CreateTag, CopyTag, DestroyTag); + } +} InitTCMallocCallbacks; + +ui32 SetTCMallocThreadAllocTag(ui32 tag) { + ui32 prev = AllocationTag; + AllocationTag = tag; + return prev; +} + +} diff --git a/library/cpp/actors/prof/tcmalloc.h b/library/cpp/actors/prof/tcmalloc.h new file mode 100644 index 0000000000..659fb4eaf3 --- /dev/null +++ b/library/cpp/actors/prof/tcmalloc.h @@ -0,0 +1,9 @@ +#pragma once + +#include <util/generic/fwd.h> + +namespace NProfiling { + +ui32 SetTCMallocThreadAllocTag(ui32 tag); + +} diff --git a/library/cpp/actors/prof/tcmalloc_null.cpp b/library/cpp/actors/prof/tcmalloc_null.cpp new file mode 100644 index 0000000000..75c0013154 --- /dev/null +++ b/library/cpp/actors/prof/tcmalloc_null.cpp @@ -0,0 +1,10 @@ +#include "tcmalloc.h" + +namespace NProfiling { + +ui32 SetTCMallocThreadAllocTag(ui32 tag) { + Y_UNUSED(tag); + return 0; +} + +} diff --git a/library/cpp/actors/prof/ut/tag_ut.cpp b/library/cpp/actors/prof/ut/tag_ut.cpp new file mode 100644 index 0000000000..accf3921ab --- /dev/null +++ b/library/cpp/actors/prof/ut/tag_ut.cpp @@ -0,0 +1,68 @@ +#include "tag.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NProfiling; + +class TAtomTagsTest: public TTestBase { +private: + UNIT_TEST_SUITE(TAtomTagsTest); + UNIT_TEST(Test_MakeTag); + UNIT_TEST(Test_Make2Tags); + UNIT_TEST(Test_MakeTagTwice); + + UNIT_TEST(Test_MakeAndGetTag); + + UNIT_TEST(Test_MakeVector); + UNIT_TEST_SUITE_END(); + +public: + void Test_MakeTag(); + void Test_Make2Tags(); + void Test_MakeTagTwice(); + void Test_MakeAndGetTag(); + void Test_MakeVector(); +}; + +UNIT_TEST_SUITE_REGISTRATION(TAtomTagsTest); + +void TAtomTagsTest::Test_MakeTag() { + ui32 tag = MakeTag("a tag"); + UNIT_ASSERT(tag != 0); +} + +void TAtomTagsTest::Test_Make2Tags() { + ui32 tag1 = MakeTag("a tag 1"); + ui32 tag2 = MakeTag("a tag 2"); + UNIT_ASSERT(tag1 != 0); + UNIT_ASSERT(tag2 != 0); + UNIT_ASSERT(tag1 != tag2); +} + +void TAtomTagsTest::Test_MakeTagTwice() { + ui32 tag1 = MakeTag("a tag twice"); + ui32 tag2 = MakeTag("a tag twice"); + UNIT_ASSERT(tag1 != 0); + UNIT_ASSERT(tag1 == tag2); +} + +void TAtomTagsTest::Test_MakeAndGetTag() { + const char* makeStr = "tag to get"; + ui32 tag = MakeTag(makeStr); + const char* tagStr = GetTag(tag); + UNIT_ASSERT_STRINGS_EQUAL(makeStr, tagStr); +} + +void TAtomTagsTest::Test_MakeVector() { + TVector<const char*> strs = { + "vector tag 0", + "vector tag 1", + "vector tag 3", + "vector tag 4"}; + ui32 baseTag = MakeTags(strs); + UNIT_ASSERT(baseTag != 0); + for (ui32 i = 0; i < strs.size(); ++i) { + const char* str = GetTag(baseTag + i); + UNIT_ASSERT_STRINGS_EQUAL(str, strs[i]); + } +} diff --git a/library/cpp/actors/prof/ut/ya.make b/library/cpp/actors/prof/ut/ya.make new file mode 100644 index 0000000000..47c58a8fb7 --- /dev/null +++ b/library/cpp/actors/prof/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/actors/prof) + +OWNER( + agri + g:kikimr +) + +SRCS( + tag_ut.cpp +) + +END() diff --git a/library/cpp/actors/prof/ya.make b/library/cpp/actors/prof/ya.make new file mode 100644 index 0000000000..b5e2497563 --- /dev/null +++ b/library/cpp/actors/prof/ya.make @@ -0,0 +1,33 @@ +LIBRARY() + +OWNER( + agri + g:kikimr +) + +SRCS( + tag.cpp +) + +PEERDIR( + library/cpp/charset + library/cpp/containers/atomizer +) + +IF (PROFILE_MEMORY_ALLOCATIONS) + CFLAGS(-DPROFILE_MEMORY_ALLOCATIONS) + PEERDIR( + library/cpp/malloc/api + library/cpp/lfalloc/dbg_info + library/cpp/ytalloc/api + ) +ENDIF() + +IF(ALLOCATOR == "TCMALLOC_256K") + SRCS(tcmalloc.cpp) + PEERDIR(contrib/libs/tcmalloc) +ELSE() + SRCS(tcmalloc_null.cpp) +ENDIF() + +END() diff --git a/library/cpp/actors/protos/actors.proto b/library/cpp/actors/protos/actors.proto new file mode 100644 index 0000000000..5fbd6d44ee --- /dev/null +++ b/library/cpp/actors/protos/actors.proto @@ -0,0 +1,13 @@ +package NActorsProto; +option java_package = "ru.yandex.kikimr.proto"; +option java_outer_classname = "NActorsBaseProto"; + +message TActorId { + required fixed64 RawX1 = 1; + required fixed64 RawX2 = 2; +} + +message TCallbackException { + required TActorId ActorId = 1; + required string ExceptionMessage = 2; +} diff --git a/library/cpp/actors/protos/interconnect.proto b/library/cpp/actors/protos/interconnect.proto new file mode 100644 index 0000000000..2e3b0d0d15 --- /dev/null +++ b/library/cpp/actors/protos/interconnect.proto @@ -0,0 +1,113 @@ +import "library/cpp/actors/protos/actors.proto"; +import "google/protobuf/descriptor.proto"; + +package NActorsInterconnect; +option java_package = "ru.yandex.kikimr.proto"; + +message TEvResolveNode { + optional uint32 NodeId = 1; + optional uint64 Deadline = 2; +} + +message TEvNodeInfo { + optional uint32 NodeId = 1; + optional string Address = 2; + optional uint32 Port = 3; +} + +extend google.protobuf.FieldOptions { + optional string PrintName = 50376; +} + +message TNodeLocation { + // compatibility section -- will be removed in future versions + optional uint32 DataCenterNum = 1 [deprecated=true]; + optional uint32 RoomNum = 2 [deprecated=true]; + optional uint32 RackNum = 3 [deprecated=true]; + optional uint32 BodyNum = 4 [deprecated=true]; + optional uint32 Body = 100500 [deprecated=true]; // for compatibility with WalleLocation + + optional string DataCenter = 10 [(PrintName) = "DC"]; + optional string Module = 20 [(PrintName) = "M"]; + optional string Rack = 30 [(PrintName) = "R"]; + optional string Unit = 40 [(PrintName) = "U"]; +} + +message TClusterUUIDs { + optional string ClusterUUID = 1; + repeated string AcceptUUID = 2; +} + +message TScopeId { + optional fixed64 X1 = 1; + optional fixed64 X2 = 2; +} + +message THandshakeRequest { + required uint64 Protocol = 1; + + required uint64 ProgramPID = 2; + required uint64 ProgramStartTime = 3; + required uint64 Serial = 4; + + required uint32 ReceiverNodeId = 5; + required string SenderActorId = 6; + + optional string SenderHostName = 7; + optional string ReceiverHostName = 8; + optional string UUID = 9; + optional TClusterUUIDs ClusterUUIDs = 13; + + optional bytes Ballast = 10; + + optional string VersionTag = 11; + repeated string AcceptedVersionTags = 12; + + optional bool RequireEncryption = 14; + optional TScopeId ClientScopeId = 15; + + optional string Cookie = 16; + optional bool DoCheckCookie = 17; + + optional bool RequestModernFrame = 18; + + optional bool RequestAuthOnly = 19; +} + +message THandshakeSuccess { + required uint64 Protocol = 1; + + required uint64 ProgramPID = 2; + required uint64 ProgramStartTime = 3; + required uint64 Serial = 4; + + required string SenderActorId = 5; + + optional string VersionTag = 6; + repeated string AcceptedVersionTags = 7; + + optional TClusterUUIDs ClusterUUIDs = 8; + + optional bool StartEncryption = 9; + optional TScopeId ServerScopeId = 10; + + optional bool UseModernFrame = 11; + + optional bool AuthOnly = 12; +} + +message THandshakeReply { + optional THandshakeSuccess Success = 1; + optional string ErrorExplaination = 2; + optional bool CookieCheckResult = 3; +} + +message TEvLoadMessage { + message THop { + optional NActorsProto.TActorId NextHop = 1; // if zero, then the payload is trimmed out of the message + } + + repeated THop Hops = 1; // the route for the message + optional string Id = 3; // message identifier + optional bytes Payload = 4; // data payload +} diff --git a/library/cpp/actors/protos/services_common.proto b/library/cpp/actors/protos/services_common.proto new file mode 100644 index 0000000000..afa0ec0073 --- /dev/null +++ b/library/cpp/actors/protos/services_common.proto @@ -0,0 +1,21 @@ +package NActorsServices; +option java_package = "ru.yandex.kikimr.proto"; + +// 0-255 range +enum EServiceCommon { + // WARN: This must be the smallest value in the enumeration + + GLOBAL = 0; + INTERCONNECT = 1; + TEST = 2; + PROTOCOLS = 3; + INTERCONNECT_SPEED_TEST = 4; + INTERCONNECT_STATUS = 5; + INTERCONNECT_NETWORK = 6; + INTERCONNECT_SESSION = 7; + HTTP = 8; + + // This value is reserved boundary. Is must not be aliased with any values + // TODO: use reseved values upon protobuf update + // COMMON_END = 256; +}; diff --git a/library/cpp/actors/protos/unittests.proto b/library/cpp/actors/protos/unittests.proto new file mode 100644 index 0000000000..a856b0942a --- /dev/null +++ b/library/cpp/actors/protos/unittests.proto @@ -0,0 +1,20 @@ +option cc_enable_arenas = true; + +message TSimple { + required string Str1 = 1; + optional string Str2 = 2; + optional uint64 Number1 = 3; +} + +message TBigMessage { + repeated TSimple Simples = 1; + repeated string ManyStr = 2; + optional string OneMoreStr = 3; + optional uint64 YANumber = 4; +} + +message TMessageWithPayload { + optional string Meta = 1; + repeated uint32 PayloadId = 2; + repeated string SomeData = 3; +} diff --git a/library/cpp/actors/protos/ya.make b/library/cpp/actors/protos/ya.make new file mode 100644 index 0000000000..3a1488d78e --- /dev/null +++ b/library/cpp/actors/protos/ya.make @@ -0,0 +1,14 @@ +PROTO_LIBRARY() + +OWNER(g:kikimr) + +SRCS( + actors.proto + interconnect.proto + services_common.proto + unittests.proto +) + +EXCLUDE_TAGS(GO_PROTO) + +END() diff --git a/library/cpp/actors/testlib/decorator_ut.cpp b/library/cpp/actors/testlib/decorator_ut.cpp new file mode 100644 index 0000000000..e9a2fa3560 --- /dev/null +++ b/library/cpp/actors/testlib/decorator_ut.cpp @@ -0,0 +1,327 @@ +#include "test_runtime.h" + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/testing/unittest/registar.h> + + +using namespace NActors; + + +Y_UNIT_TEST_SUITE(TesTTestDecorator) { + + bool IsVerbose = false; + void Write(TString msg) { + if (IsVerbose) { + Cerr << (TStringBuilder() << msg << Endl); + } + } + + struct TDyingChecker : TTestDecorator { + TActorId MasterId; + + TDyingChecker(THolder<IActor> &&actor, TActorId masterId) + : TTestDecorator(std::move(actor)) + , MasterId(masterId) + { + Write("TDyingChecker::Construct\n"); + } + + virtual ~TDyingChecker() { + Write("TDyingChecker::~TDyingChecker"); + TActivationContext::Send(new IEventHandle(MasterId, SelfId(), new TEvents::TEvPing())); + } + + bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override { + Write("TDyingChecker::DoBeforeReceiving"); + return true; + } + + void DoAfterReceiving(const TActorContext &/*ctx*/) override { + Write("TDyingChecker::DoAfterReceiving"); + } + }; + + struct TTestMasterActor : TActorBootstrapped<TTestMasterActor> { + friend TActorBootstrapped<TTestMasterActor>; + + TSet<TActorId> ActorIds; + TVector<THolder<IActor>> Actors; + TActorId EdgeActor; + + TTestMasterActor(TVector<THolder<IActor>> &&actors, TActorId edgeActor) + : TActorBootstrapped() + , Actors(std::move(actors)) + , EdgeActor(edgeActor) + { + } + + void Bootstrap() + { + Write("Start master actor"); + for (auto &actor : Actors) { + THolder<IActor> decaratedActor = MakeHolder<TDyingChecker>(std::move(actor), SelfId()); + TActorId id = Register(decaratedActor.Release()); + Write("Register test actor"); + UNIT_ASSERT(ActorIds.insert(id).second); + } + Become(&TTestMasterActor::State); + } + + STATEFN(State) { + auto it = ActorIds.find(ev->Sender); + UNIT_ASSERT(it != ActorIds.end()); + Write("End test actor"); + ActorIds.erase(it); + if (!ActorIds) { + Send(EdgeActor, new TEvents::TEvPing()); + PassAway(); + } + } + }; + + enum { + Begin = EventSpaceBegin(TEvents::ES_USERSPACE), + EvWords + }; + + struct TEvWords : TEventLocal<TEvWords, EvWords> { + TVector<TString> Words; + + TEvWords() + : TEventLocal() + { + } + }; + + struct TFizzBuzzToFooBar : TTestDecorator { + TFizzBuzzToFooBar(THolder<IActor> &&actor) + : TTestDecorator(std::move(actor)) + { + } + + bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override { + if (ev->Type == TEvents::TSystem::Bootstrap) { + return true; + } + Write("TFizzBuzzToFooBar::DoBeforeSending"); + TEventHandle<TEvWords> *handle = reinterpret_cast<TEventHandle<TEvWords>*>(ev.Get()); + UNIT_ASSERT(handle); + TEvWords *event = handle->Get(); + TVector<TString> &words = event->Words; + TStringBuilder wordsMsg; + for (auto &word : words) { + wordsMsg << word << ';'; + } + Write(TStringBuilder() << "Send# " << wordsMsg); + if (words.size() == 2 && words[0] == "Fizz" && words[1] == "Buzz") { + words[0] = "Foo"; + words[1] = "Bar"; + } + return true; + } + + bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override { + Write("TFizzBuzzToFooBar::DoBeforeReceiving"); + return true; + } + + void DoAfterReceiving(const TActorContext &/*ctx*/) override { + Write("TFizzBuzzToFooBar::DoAfterReceiving"); + } + }; + + struct TWordEraser : TTestDecorator { + TString ErasingWord; + + TWordEraser(THolder<IActor> &&actor, TString word) + : TTestDecorator(std::move(actor)) + , ErasingWord(word) + { + } + + bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override { + if (ev->Type == TEvents::TSystem::Bootstrap) { + return true; + } + Write("TWordEraser::DoBeforeSending"); + TEventHandle<TEvWords> *handle = reinterpret_cast<TEventHandle<TEvWords>*>(ev.Get()); + UNIT_ASSERT(handle); + TEvWords *event = handle->Get(); + TVector<TString> &words = event->Words; + auto it = Find(words.begin(), words.end(), ErasingWord); + if (it != words.end()) { + words.erase(it); + } + return true; + } + + bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override { + Write("TWordEraser::DoBeforeReceiving"); + return true; + } + + void DoAfterReceiving(const TActorContext &/*ctx*/) override { + Write("TWordEraser::DoAfterReceiving"); + } + }; + + struct TWithoutWordsDroper : TTestDecorator { + TWithoutWordsDroper(THolder<IActor> &&actor) + : TTestDecorator(std::move(actor)) + { + } + + bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override { + if (ev->Type == TEvents::TSystem::Bootstrap) { + return true; + } + Write("TWithoutWordsDroper::DoBeforeSending"); + TEventHandle<TEvWords> *handle = reinterpret_cast<TEventHandle<TEvWords>*>(ev.Get()); + UNIT_ASSERT(handle); + TEvWords *event = handle->Get(); + return bool(event->Words); + } + + bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override { + Write("TWithoutWordsDroper::DoBeforeReceiving"); + return true; + } + + void DoAfterReceiving(const TActorContext &/*ctx*/) override { + Write("TWithoutWordsDroper::DoAfterReceiving"); + } + }; + + struct TFooBarReceiver : TActorBootstrapped<TFooBarReceiver> { + TActorId MasterId; + ui64 Counter = 0; + + TFooBarReceiver(TActorId masterId) + : TActorBootstrapped() + , MasterId(masterId) + { + } + + void Bootstrap() + { + Become(&TFooBarReceiver::State); + } + + STATEFN(State) { + TEventHandle<TEvWords> *handle = reinterpret_cast<TEventHandle<TEvWords>*>(ev.Get()); + UNIT_ASSERT(handle); + UNIT_ASSERT(handle->Sender == MasterId); + TEvWords *event = handle->Get(); + TVector<TString> &words = event->Words; + UNIT_ASSERT(words.size() == 2 && words[0] == "Foo" && words[1] == "Bar"); + Write(TStringBuilder() << "Receive# " << Counter + 1 << '/' << 2); + if (++Counter == 2) { + PassAway(); + } + } + }; + + struct TFizzBuzzSender : TActorBootstrapped<TFizzBuzzSender> { + TActorId SlaveId; + + TFizzBuzzSender() + : TActorBootstrapped() + { + Write("TFizzBuzzSender::Construct"); + } + + void Bootstrap() { + Write("TFizzBuzzSender::Bootstrap"); + THolder<IActor> actor = MakeHolder<TFooBarReceiver>(SelfId()); + THolder<IActor> decoratedActor = MakeHolder<TDyingChecker>(std::move(actor), SelfId()); + SlaveId = Register(decoratedActor.Release()); + for (ui64 idx = 1; idx <= 30; ++idx) { + THolder<TEvWords> ev = MakeHolder<TEvWords>(); + if (idx % 3 == 0) { + ev->Words.push_back("Fizz"); + } + if (idx % 5 == 0) { + ev->Words.push_back("Buzz"); + } + Send(SlaveId, ev.Release()); + Write("TFizzBuzzSender::Send words"); + } + Become(&TFizzBuzzSender::State); + } + + STATEFN(State) { + UNIT_ASSERT(ev->Sender == SlaveId); + PassAway(); + } + }; + + struct TCounters { + ui64 SendedCount = 0; + ui64 RecievedCount = 0; + }; + + struct TCountingDecorator : TTestDecorator { + TCounters *Counters; + + TCountingDecorator(THolder<IActor> &&actor, TCounters *counters) + : TTestDecorator(std::move(actor)) + , Counters(counters) + { + } + + bool DoBeforeSending(TAutoPtr<IEventHandle> &ev) override { + if (ev->Type == TEvents::TSystem::Bootstrap) { + return true; + } + Write("TCountingDecorator::DoBeforeSending"); + Counters->SendedCount++; + return true; + } + + bool DoBeforeReceiving(TAutoPtr<IEventHandle> &/*ev*/, const TActorContext &/*ctx*/) override { + Write("TCountingDecorator::DoBeforeReceiving"); + Counters->RecievedCount++; + return true; + } + }; + + bool ScheduledFilterFunc(NActors::TTestActorRuntimeBase& runtime, TAutoPtr<NActors::IEventHandle>& event, + TDuration delay, TInstant& deadline) { + if (runtime.IsScheduleForActorEnabled(event->GetRecipientRewrite())) { + deadline = runtime.GetTimeProvider()->Now() + delay; + return false; + } + return true; + } + + THolder<IActor> CreateFizzBuzzSender() { + THolder<IActor> actor = MakeHolder<TFizzBuzzSender>(); + THolder<IActor> foobar = MakeHolder<TFizzBuzzToFooBar>(std::move(actor)); + THolder<IActor> fizzEraser = MakeHolder<TWordEraser>(std::move(foobar), "Fizz"); + THolder<IActor> buzzEraser = MakeHolder<TWordEraser>(std::move(fizzEraser), "Buzz"); + return MakeHolder<TWithoutWordsDroper>(std::move(buzzEraser)); + } + + Y_UNIT_TEST(Basic) { + TTestActorRuntimeBase runtime(1, false); + + runtime.SetScheduledEventFilter(&ScheduledFilterFunc); + runtime.SetEventFilter([](NActors::TTestActorRuntimeBase&, TAutoPtr<NActors::IEventHandle>&) { + return false; + }); + runtime.Initialize(); + + TActorId edgeActor = runtime.AllocateEdgeActor(); + TVector<THolder<IActor>> actors(1); + actors[0] = CreateFizzBuzzSender(); + //actors[1] = CreateFizzBuzzSender(); + THolder<IActor> testActor = MakeHolder<TTestMasterActor>(std::move(actors), edgeActor); + Write("Start test"); + runtime.Register(testActor.Release()); + + TAutoPtr<IEventHandle> handle; + auto ev = runtime.GrabEdgeEventRethrow<TEvents::TEvPing>(handle); + UNIT_ASSERT(ev); + Write("Stop test"); + } +} diff --git a/library/cpp/actors/testlib/test_runtime.cpp b/library/cpp/actors/testlib/test_runtime.cpp new file mode 100644 index 0000000000..6fa25b9965 --- /dev/null +++ b/library/cpp/actors/testlib/test_runtime.cpp @@ -0,0 +1,1902 @@ +#include "test_runtime.h" + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/callstack.h> +#include <library/cpp/actors/core/executor_pool_basic.h> +#include <library/cpp/actors/core/executor_pool_io.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/core/scheduler_basic.h> +#include <library/cpp/actors/util/datetime.h> +#include <library/cpp/actors/protos/services_common.pb.h> +#include <library/cpp/random_provider/random_provider.h> +#include <library/cpp/actors/interconnect/interconnect.h> +#include <library/cpp/actors/interconnect/interconnect_tcp_proxy.h> +#include <library/cpp/actors/interconnect/interconnect_proxy_wrapper.h> + +#include <util/generic/maybe.h> +#include <util/generic/bt_exception.h> +#include <util/random/mersenne.h> +#include <util/string/printf.h> +#include <typeinfo> + +bool VERBOSE = false; +const bool PRINT_EVENT_BODY = false; + +namespace { + + TString MakeClusterId() { + pid_t pid = getpid(); + TStringBuilder uuid; + uuid << "Cluster for process with id: " << pid; + return uuid; + } +} + +namespace NActors { + ui64 TScheduledEventQueueItem::NextUniqueId = 0; + + void PrintEvent(TAutoPtr<IEventHandle>& ev, const TTestActorRuntimeBase* runtime) { + Cerr << "mailbox: " << ev->GetRecipientRewrite().Hint() << ", type: " << Sprintf("%08x", ev->GetTypeRewrite()) + << ", from " << ev->Sender.LocalId(); + TString name = runtime->GetActorName(ev->Sender); + if (!name.empty()) + Cerr << " \"" << name << "\""; + Cerr << ", to " << ev->GetRecipientRewrite().LocalId(); + name = runtime->GetActorName(ev->GetRecipientRewrite()); + if (!name.empty()) + Cerr << " \"" << name << "\""; + Cerr << ", "; + if (ev->HasEvent()) + Cerr << " : " << (PRINT_EVENT_BODY ? ev->GetBase()->ToString() : ev->GetBase()->ToStringHeader()); + else if (ev->HasBuffer()) + Cerr << " : BUFFER"; + else + Cerr << " : EMPTY"; + + Cerr << "\n"; + } + + TTestActorRuntimeBase::TNodeDataBase::TNodeDataBase() { + ActorSystemTimestamp = nullptr; + ActorSystemMonotonic = nullptr; + } + + void TTestActorRuntimeBase::TNodeDataBase::Stop() { + if (Poller) + Poller->Stop(); + + if (MailboxTable) { + for (ui32 round = 0; !MailboxTable->Cleanup(); ++round) + Y_VERIFY(round < 10, "cyclic event/actor spawn while trying to shutdown actorsystem stub"); + } + + if (ActorSystem) + ActorSystem->Stop(); + + ActorSystem.Destroy(); + Poller.Reset(); + } + + TTestActorRuntimeBase::TNodeDataBase::~TNodeDataBase() { + Stop(); + } + + + class TTestActorRuntimeBase::TEdgeActor : public TActor<TEdgeActor> { + public: + static constexpr EActivityType ActorActivityType() { + return TEST_ACTOR_RUNTIME; + } + + TEdgeActor(TTestActorRuntimeBase* runtime) + : TActor(&TEdgeActor::StateFunc) + , Runtime(runtime) + { + } + + STFUNC(StateFunc) { + Y_UNUSED(ctx); + TGuard<TMutex> guard(Runtime->Mutex); + bool verbose = (Runtime->CurrentDispatchContext ? !Runtime->CurrentDispatchContext->Options->Quiet : true) && VERBOSE; + if (Runtime->BlockedOutput.find(ev->Sender) != Runtime->BlockedOutput.end()) { + verbose = false; + } + + if (verbose) { + Cerr << "Got event at " << TInstant::MicroSeconds(Runtime->CurrentTimestamp) << ", "; + PrintEvent(ev, Runtime); + } + + if (!Runtime->EventFilterFunc(*Runtime, ev)) { + ui32 nodeId = ev->GetRecipientRewrite().NodeId(); + Y_VERIFY(nodeId != 0); + ui32 mailboxHint = ev->GetRecipientRewrite().Hint(); + Runtime->GetMailbox(nodeId, mailboxHint).Send(ev); + Runtime->MailboxesHasEvents.Signal(); + if (verbose) + Cerr << "Event was added to sent queue\n"; + } + else { + if (verbose) + Cerr << "Event was dropped\n"; + } + } + + private: + TTestActorRuntimeBase* Runtime; + }; + + void TEventMailBox::Send(TAutoPtr<IEventHandle> ev) { + IEventHandle* ptr = ev.Get(); + Y_VERIFY(ptr); +#ifdef DEBUG_ORDER_EVENTS + ui64 counter = NextToSend++; + TrackSent[ptr] = counter; +#endif + Sent.push_back(ev); + } + + TAutoPtr<IEventHandle> TEventMailBox::Pop() { + TAutoPtr<IEventHandle> result = Sent.front(); + Sent.pop_front(); +#ifdef DEBUG_ORDER_EVENTS + auto it = TrackSent.find(result.Get()); + if (it != TrackSent.end()) { + Y_VERIFY(ExpectedReceive == it->second); + TrackSent.erase(result.Get()); + ++ExpectedReceive; + } +#endif + return result; + } + + bool TEventMailBox::IsEmpty() const { + return Sent.empty(); + } + + void TEventMailBox::Capture(TEventsList& evList) { + evList.insert(evList.end(), Sent.begin(), Sent.end()); + Sent.clear(); + } + + void TEventMailBox::PushFront(TAutoPtr<IEventHandle>& ev) { + Sent.push_front(ev); + } + + void TEventMailBox::PushFront(TEventsList& evList) { + for (auto rit = evList.rbegin(); rit != evList.rend(); ++rit) { + if (*rit) { + Sent.push_front(*rit); + } + } + } + + void TEventMailBox::CaptureScheduled(TScheduledEventsList& evList) { + for (auto it = Scheduled.begin(); it != Scheduled.end(); ++it) { + evList.insert(*it); + } + + Scheduled.clear(); + } + + void TEventMailBox::PushScheduled(TScheduledEventsList& evList) { + for (auto it = evList.begin(); it != evList.end(); ++it) { + if (it->Event) { + Scheduled.insert(*it); + } + } + + evList.clear(); + } + + bool TEventMailBox::IsActive(const TInstant& currentTime) const { + return currentTime >= InactiveUntil; + } + + void TEventMailBox::Freeze(const TInstant& deadline) { + if (deadline > InactiveUntil) + InactiveUntil = deadline; + } + + TInstant TEventMailBox::GetInactiveUntil() const { + return InactiveUntil; + } + + void TEventMailBox::Schedule(const TScheduledEventQueueItem& item) { + Scheduled.insert(item); + } + + bool TEventMailBox::IsScheduledEmpty() const { + return Scheduled.empty(); + } + + TInstant TEventMailBox::GetFirstScheduleDeadline() const { + return Scheduled.begin()->Deadline; + } + + ui64 TEventMailBox::GetSentEventCount() const { + return Sent.size(); + } + + class TTestActorRuntimeBase::TTimeProvider : public ITimeProvider { + public: + TTimeProvider(TTestActorRuntimeBase& runtime) + : Runtime(runtime) + { + } + + TInstant Now() override { + return Runtime.GetCurrentTime(); + } + + private: + TTestActorRuntimeBase& Runtime; + }; + + class TTestActorRuntimeBase::TSchedulerThreadStub : public ISchedulerThread { + public: + TSchedulerThreadStub(TTestActorRuntimeBase* runtime, TTestActorRuntimeBase::TNodeDataBase* node) + : Runtime(runtime) + , Node(node) + { + Y_UNUSED(Runtime); + } + + void Prepare(TActorSystem *actorSystem, volatile ui64 *currentTimestamp, volatile ui64 *currentMonotonic) override { + Y_UNUSED(actorSystem); + Node->ActorSystemTimestamp = currentTimestamp; + Node->ActorSystemMonotonic = currentMonotonic; + } + + void PrepareSchedules(NSchedulerQueue::TReader **readers, ui32 scheduleReadersCount) override { + Y_UNUSED(readers); + Y_UNUSED(scheduleReadersCount); + } + + void Start() override { + } + + void PrepareStop() override { + } + + void Stop() override { + } + + private: + TTestActorRuntimeBase* Runtime; + TTestActorRuntimeBase::TNodeDataBase* Node; + }; + + class TTestActorRuntimeBase::TExecutorPoolStub : public IExecutorPool { + public: + TExecutorPoolStub(TTestActorRuntimeBase* runtime, ui32 nodeIndex, TTestActorRuntimeBase::TNodeDataBase* node, ui32 poolId) + : IExecutorPool(poolId) + , Runtime(runtime) + , NodeIndex(nodeIndex) + , Node(node) + { + } + + TTestActorRuntimeBase* GetRuntime() { + return Runtime; + } + + // for threads + ui32 GetReadyActivation(TWorkerContext& wctx, ui64 revolvingCounter) override { + Y_UNUSED(wctx); + Y_UNUSED(revolvingCounter); + Y_FAIL(); + } + + void ReclaimMailbox(TMailboxType::EType mailboxType, ui32 hint, TWorkerId workerId, ui64 revolvingCounter) override { + Y_UNUSED(workerId); + Node->MailboxTable->ReclaimMailbox(mailboxType, hint, revolvingCounter); + } + + void Schedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie *cookie, TWorkerId workerId) override { + DoSchedule(deadline, ev, cookie, workerId); + } + + void Schedule(TMonotonic deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie *cookie, TWorkerId workerId) override { + DoSchedule(TInstant::FromValue(deadline.GetValue()), ev, cookie, workerId); + } + + void Schedule(TDuration delay, TAutoPtr<IEventHandle> ev, ISchedulerCookie *cookie, TWorkerId workerId) override { + TInstant deadline = Runtime->GetTimeProvider()->Now() + delay; + DoSchedule(deadline, ev, cookie, workerId); + } + + void DoSchedule(TInstant deadline, TAutoPtr<IEventHandle> ev, ISchedulerCookie *cookie, TWorkerId workerId) { + Y_UNUSED(workerId); + + TGuard<TMutex> guard(Runtime->Mutex); + bool verbose = (Runtime->CurrentDispatchContext ? !Runtime->CurrentDispatchContext->Options->Quiet : true) && VERBOSE; + if (Runtime->BlockedOutput.find(ev->Sender) != Runtime->BlockedOutput.end()) { + verbose = false; + } + + if (verbose) { + Cerr << "Got scheduled event at " << TInstant::MicroSeconds(Runtime->CurrentTimestamp) << ", "; + PrintEvent(ev, Runtime); + } + + auto now = Runtime->GetTimeProvider()->Now(); + if (deadline < now) { + deadline = now; // avoid going backwards in time + } + TDuration delay = (deadline - now); + + if (Runtime->SingleSysEnv || !Runtime->ScheduledEventFilterFunc(*Runtime, ev, delay, deadline)) { + ui32 mailboxHint = ev->GetRecipientRewrite().Hint(); + Runtime->GetMailbox(Runtime->FirstNodeId + NodeIndex, mailboxHint).Schedule(TScheduledEventQueueItem(deadline, ev, cookie)); + Runtime->MailboxesHasEvents.Signal(); + if (verbose) + Cerr << "Event was added to scheduled queue\n"; + } else { + if (cookie) { + cookie->Detach(); + } + if (verbose) { + Cerr << "Scheduled event for " << ev->GetRecipientRewrite().ToString() << " was dropped\n"; + } + } + } + + // for actorsystem + bool Send(TAutoPtr<IEventHandle>& ev) override { + TGuard<TMutex> guard(Runtime->Mutex); + bool verbose = (Runtime->CurrentDispatchContext ? !Runtime->CurrentDispatchContext->Options->Quiet : true) && VERBOSE; + if (Runtime->BlockedOutput.find(ev->Sender) != Runtime->BlockedOutput.end()) { + verbose = false; + } + + if (verbose) { + Cerr << "Got event at " << TInstant::MicroSeconds(Runtime->CurrentTimestamp) << ", "; + PrintEvent(ev, Runtime); + } + + if (!Runtime->EventFilterFunc(*Runtime, ev)) { + ui32 nodeId = ev->GetRecipientRewrite().NodeId(); + Y_VERIFY(nodeId != 0); + TNodeDataBase* node = Runtime->Nodes[nodeId].Get(); + + if (!AllowSendFrom(node, ev)) { + return true; + } + + ui32 mailboxHint = ev->GetRecipientRewrite().Hint(); + if (ev->GetTypeRewrite() == ui32(NActors::NLog::EEv::Log)) { + const NActors::TActorId loggerActorId = NActors::TActorId(nodeId, "logger"); + TActorId logger = node->ActorSystem->LookupLocalService(loggerActorId); + if (ev->GetRecipientRewrite() == logger) { + TMailboxHeader* mailbox = node->MailboxTable->Get(mailboxHint); + IActor* recipientActor = mailbox->FindActor(ev->GetRecipientRewrite().LocalId()); + if (recipientActor) { + TActorContext ctx(*mailbox, *node->ExecutorThread, GetCycleCountFast(), ev->GetRecipientRewrite()); + TActivationContext *prevTlsActivationContext = TlsActivationContext; + TlsActivationContext = &ctx; + recipientActor->Receive(ev, ctx); + TlsActivationContext = prevTlsActivationContext; + // we expect the logger to never die in tests + } + } + } else { + Runtime->GetMailbox(nodeId, mailboxHint).Send(ev); + Runtime->MailboxesHasEvents.Signal(); + } + if (verbose) + Cerr << "Event was added to sent queue\n"; + } else { + if (verbose) + Cerr << "Event was dropped\n"; + } + return true; + } + + void ScheduleActivation(ui32 activation) override { + Y_UNUSED(activation); + } + + void ScheduleActivationEx(ui32 activation, ui64 revolvingCounter) override { + Y_UNUSED(activation); + Y_UNUSED(revolvingCounter); + } + + TActorId Register(IActor *actor, TMailboxType::EType mailboxType, ui64 revolvingCounter, + const TActorId& parentId) override { + return Runtime->Register(actor, NodeIndex, PoolId, mailboxType, revolvingCounter, parentId); + } + + TActorId Register(IActor *actor, TMailboxHeader *mailbox, ui32 hint, const TActorId& parentId) override { + return Runtime->Register(actor, NodeIndex, PoolId, mailbox, hint, parentId); + } + + // lifecycle stuff + void Prepare(TActorSystem *actorSystem, NSchedulerQueue::TReader **scheduleReaders, ui32 *scheduleSz) override { + Y_UNUSED(actorSystem); + Y_UNUSED(scheduleReaders); + Y_UNUSED(scheduleSz); + } + + void Start() override { + } + + void PrepareStop() override { + } + + void Shutdown() override { + } + + bool Cleanup() override { + return true; + } + + // generic + TAffinity* Affinity() const override { + Y_FAIL(); + } + + private: + TTestActorRuntimeBase* const Runtime; + const ui32 NodeIndex; + TTestActorRuntimeBase::TNodeDataBase* const Node; + }; + + IExecutorPool* TTestActorRuntimeBase::CreateExecutorPoolStub(TTestActorRuntimeBase* runtime, ui32 nodeIndex, TTestActorRuntimeBase::TNodeDataBase* node, ui32 poolId) { + return new TExecutorPoolStub{runtime, nodeIndex, node, poolId}; + } + + + ui32 TTestActorRuntimeBase::NextNodeId = 1; + + TTestActorRuntimeBase::TTestActorRuntimeBase(THeSingleSystemEnv) + : TTestActorRuntimeBase(1, 1, false) + { + SingleSysEnv = true; + } + + TTestActorRuntimeBase::TTestActorRuntimeBase(ui32 nodeCount, ui32 dataCenterCount, bool useRealThreads) + : ScheduledCount(0) + , ScheduledLimit(100000) + , MainThreadId(TThread::CurrentThreadId()) + , ClusterUUID(MakeClusterId()) + , FirstNodeId(NextNodeId) + , NodeCount(nodeCount) + , DataCenterCount(dataCenterCount) + , UseRealThreads(useRealThreads) + , LocalId(0) + , DispatchCyclesCount(0) + , DispatchedEventsCount(0) + , NeedMonitoring(false) + , RandomProvider(CreateDeterministicRandomProvider(DefaultRandomSeed)) + , TimeProvider(new TTimeProvider(*this)) + , ShouldContinue() + , CurrentTimestamp(0) + , DispatchTimeout(DEFAULT_DISPATCH_TIMEOUT) + , ReschedulingDelay(TDuration::MicroSeconds(0)) + , ObserverFunc(&TTestActorRuntimeBase::DefaultObserverFunc) + , ScheduledEventsSelectorFunc(&CollapsedTimeScheduledEventsSelector) + , EventFilterFunc(&TTestActorRuntimeBase::DefaultFilterFunc) + , ScheduledEventFilterFunc(&TTestActorRuntimeBase::NopFilterFunc) + , RegistrationObserver(&TTestActorRuntimeBase::DefaultRegistrationObserver) + , CurrentDispatchContext(nullptr) + { + SetDispatcherRandomSeed(TInstant::Now(), 0); + EnableActorCallstack(); + } + + void TTestActorRuntimeBase::InitNode(TNodeDataBase* node, size_t nodeIndex) { + const NActors::TActorId loggerActorId = NActors::TActorId(FirstNodeId + nodeIndex, "logger"); + node->LogSettings = new NActors::NLog::TSettings(loggerActorId, 410 /* NKikimrServices::LOGGER */, + NActors::NLog::PRI_WARN, NActors::NLog::PRI_WARN, 0); + node->LogSettings->SetAllowDrop(false); + node->LogSettings->SetThrottleDelay(TDuration::Zero()); + node->DynamicCounters = new NMonitoring::TDynamicCounters; + + InitNodeImpl(node, nodeIndex); + } + + void TTestActorRuntimeBase::InitNodeImpl(TNodeDataBase* node, size_t nodeIndex) { + node->LogSettings->Append( + NActorsServices::EServiceCommon_MIN, + NActorsServices::EServiceCommon_MAX, + NActorsServices::EServiceCommon_Name + ); + + if (!UseRealThreads) { + node->SchedulerPool.Reset(CreateExecutorPoolStub(this, nodeIndex, node, 0)); + node->MailboxTable.Reset(new TMailboxTable()); + node->ActorSystem = MakeActorSystem(nodeIndex, node); + node->ExecutorThread.Reset(new TExecutorThread(0, 0, node->ActorSystem.Get(), node->SchedulerPool.Get(), node->MailboxTable.Get(), "TestExecutor")); + } else { + node->ActorSystem = MakeActorSystem(nodeIndex, node); + } + + node->ActorSystem->Start(); + } + + bool TTestActorRuntimeBase::AllowSendFrom(TNodeDataBase* node, TAutoPtr<IEventHandle>& ev) { + ui64 senderLocalId = ev->Sender.LocalId(); + ui64 senderMailboxHint = ev->Sender.Hint(); + TMailboxHeader* senderMailbox = node->MailboxTable->Get(senderMailboxHint); + if (senderMailbox) { + IActor* senderActor = senderMailbox->FindActor(senderLocalId); + TTestDecorator *decorator = dynamic_cast<TTestDecorator*>(senderActor); + return !decorator || decorator->BeforeSending(ev); + } + return true; + } + + TTestActorRuntimeBase::TTestActorRuntimeBase(ui32 nodeCount, ui32 dataCenterCount) + : TTestActorRuntimeBase(nodeCount, dataCenterCount, false) { + } + + TTestActorRuntimeBase::TTestActorRuntimeBase(ui32 nodeCount, bool useRealThreads) + : TTestActorRuntimeBase(nodeCount, nodeCount, useRealThreads) { + } + + TTestActorRuntimeBase::~TTestActorRuntimeBase() { + CleanupNodes(); + Cerr.Flush(); + Cerr.Flush(); + Clog.Flush(); + + DisableActorCallstack(); + } + + void TTestActorRuntimeBase::CleanupNodes() { + Nodes.clear(); + } + + bool TTestActorRuntimeBase::IsRealThreads() const { + return UseRealThreads; + } + + TTestActorRuntimeBase::EEventAction TTestActorRuntimeBase::DefaultObserverFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { + Y_UNUSED(runtime); + Y_UNUSED(event); + return EEventAction::PROCESS; + } + + void TTestActorRuntimeBase::DroppingScheduledEventsSelector(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue) { + Y_UNUSED(runtime); + Y_UNUSED(queue); + scheduledEvents.clear(); + } + + bool TTestActorRuntimeBase::DefaultFilterFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { + Y_UNUSED(runtime); + Y_UNUSED(event); + return false; + } + + bool TTestActorRuntimeBase::NopFilterFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration delay, TInstant& deadline) { + Y_UNUSED(runtime); + Y_UNUSED(delay); + Y_UNUSED(event); + Y_UNUSED(deadline); + return true; + } + + + void TTestActorRuntimeBase::DefaultRegistrationObserver(TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId) { + if (runtime.ScheduleWhiteList.find(parentId) != runtime.ScheduleWhiteList.end()) { + runtime.ScheduleWhiteList.insert(actorId); + runtime.ScheduleWhiteListParent[actorId] = parentId; + } + } + + class TScheduledTreeItem { + public: + TString Name; + ui64 Count; + TVector<TScheduledTreeItem> Children; + + TScheduledTreeItem(const TString& name) + : Name(name) + , Count(0) + {} + + TScheduledTreeItem* GetItem(const TString& name) { + TScheduledTreeItem* item = nullptr; + for (TScheduledTreeItem& i : Children) { + if (i.Name == name) { + item = &i; + break; + } + } + if (item != nullptr) + return item; + Children.emplace_back(name); + return &Children.back(); + } + + void RecursiveSort() { + Sort(Children, [](const TScheduledTreeItem& a, const TScheduledTreeItem& b) -> bool { return a.Count > b.Count; }); + for (TScheduledTreeItem& item : Children) { + item.RecursiveSort(); + } + } + + void Print(IOutputStream& stream, const TString& prefix) { + for (auto it = Children.begin(); it != Children.end(); ++it) { + bool lastChild = (std::next(it) == Children.end()); + TString connectionPrefix = lastChild ? "└─ " : "├─ "; + TString subChildPrefix = lastChild ? " " : "│ "; + stream << prefix << connectionPrefix << it->Name << " (" << it->Count << ")\n"; + it->Print(stream, prefix + subChildPrefix); + } + } + + void Print(IOutputStream& stream) { + stream << Name << " (" << Count << ")\n"; + Print(stream, TString()); + } + }; + + void TTestActorRuntimeBase::CollapsedTimeScheduledEventsSelector(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue) { + if (scheduledEvents.empty()) + return; + + TInstant time = scheduledEvents.begin()->Deadline; + while (!scheduledEvents.empty() && scheduledEvents.begin()->Deadline == time) { + static THashMap<std::pair<TActorId, TString>, ui64> eventTypes; + auto& item = *scheduledEvents.begin(); + TString name = item.Event->GetBase() ? TypeName(*item.Event->GetBase()) : Sprintf("%08" PRIx32, item.Event->Type); + eventTypes[std::make_pair(item.Event->Recipient, name)]++; + runtime.ScheduledCount++; + if (runtime.ScheduledCount > runtime.ScheduledLimit) { +// TScheduledTreeItem root("Root"); +// TVector<TString> path; +// for (const auto& pr : eventTypes) { +// path.clear(); +// path.push_back(runtime.GetActorName(pr.first.first)); +// for (auto it = runtime.ScheduleWhiteListParent.find(pr.first.first); it != runtime.ScheduleWhiteListParent.end(); it = runtime.ScheduleWhiteListParent.find(it->second)) { +// path.insert(path.begin(), runtime.GetActorName(it->second)); +// } +// path.push_back("<" + pr.first.second + ">"); // event name; +// ui64 count = pr.second; +// TScheduledTreeItem* item = &root; +// item->Count += count; +// for (TString name : path) { +// item = item->GetItem(name); +// item->Count += count; +// } +// } +// root.RecursiveSort(); +// root.Print(Cerr); + + ythrow TSchedulingLimitReachedException(runtime.ScheduledLimit); + } + if (item.Cookie->Get()) { + if (item.Cookie->Detach()) { + queue.push_back(item.Event); + } + } else { + queue.push_back(item.Event); + } + + scheduledEvents.erase(scheduledEvents.begin()); + } + + runtime.UpdateCurrentTime(time); + } + + TTestActorRuntimeBase::TEventObserver TTestActorRuntimeBase::SetObserverFunc(TEventObserver observerFunc) { + TGuard<TMutex> guard(Mutex); + auto result = ObserverFunc; + ObserverFunc = observerFunc; + return result; + } + + TTestActorRuntimeBase::TScheduledEventsSelector TTestActorRuntimeBase::SetScheduledEventsSelectorFunc(TScheduledEventsSelector scheduledEventsSelectorFunc) { + TGuard<TMutex> guard(Mutex); + auto result = ScheduledEventsSelectorFunc; + ScheduledEventsSelectorFunc = scheduledEventsSelectorFunc; + return result; + } + + TTestActorRuntimeBase::TEventFilter TTestActorRuntimeBase::SetEventFilter(TEventFilter filterFunc) { + TGuard<TMutex> guard(Mutex); + auto result = EventFilterFunc; + EventFilterFunc = filterFunc; + return result; + } + + TTestActorRuntimeBase::TScheduledEventFilter TTestActorRuntimeBase::SetScheduledEventFilter(TScheduledEventFilter filterFunc) { + TGuard<TMutex> guard(Mutex); + auto result = ScheduledEventFilterFunc; + ScheduledEventFilterFunc = filterFunc; + return result; + } + + TTestActorRuntimeBase::TRegistrationObserver TTestActorRuntimeBase::SetRegistrationObserverFunc(TRegistrationObserver observerFunc) { + TGuard<TMutex> guard(Mutex); + auto result = RegistrationObserver; + RegistrationObserver = observerFunc; + return result; + } + + bool TTestActorRuntimeBase::IsVerbose() { + return VERBOSE; + } + + void TTestActorRuntimeBase::SetVerbose(bool verbose) { + VERBOSE = verbose; + } + + void TTestActorRuntimeBase::AddLocalService(const TActorId& actorId, const TActorSetupCmd& cmd, ui32 nodeIndex) { + Y_VERIFY(!IsInitialized); + Y_VERIFY(nodeIndex < NodeCount); + auto node = Nodes[nodeIndex + FirstNodeId]; + if (!node) { + node = GetNodeFactory().CreateNode(); + Nodes[nodeIndex + FirstNodeId] = node; + } + + node->LocalServicesActors[actorId] = cmd.Actor; + node->LocalServices.push_back(std::make_pair(actorId, cmd)); + } + + void TTestActorRuntimeBase::InitNodes() { + NextNodeId += NodeCount; + Y_VERIFY(NodeCount > 0); + + for (ui32 nodeIndex = 0; nodeIndex < NodeCount; ++nodeIndex) { + auto nodeIt = Nodes.emplace(FirstNodeId + nodeIndex, GetNodeFactory().CreateNode()).first; + TNodeDataBase* node = nodeIt->second.Get(); + InitNode(node, nodeIndex); + } + + } + + void TTestActorRuntimeBase::Initialize() { + InitNodes(); + IsInitialized = true; + } + + void SetupCrossDC() { + + } + + TDuration TTestActorRuntimeBase::SetDispatchTimeout(TDuration timeout) { + TGuard<TMutex> guard(Mutex); + TDuration oldTimeout = DispatchTimeout; + DispatchTimeout = timeout; + return oldTimeout; + } + + TDuration TTestActorRuntimeBase::SetReschedulingDelay(TDuration delay) { + TGuard<TMutex> guard(Mutex); + TDuration oldDelay = ReschedulingDelay; + ReschedulingDelay = delay; + return oldDelay; + } + + void TTestActorRuntimeBase::SetLogBackend(const TAutoPtr<TLogBackend> logBackend) { + Y_VERIFY(!IsInitialized); + TGuard<TMutex> guard(Mutex); + LogBackend = logBackend; + } + + void TTestActorRuntimeBase::SetLogPriority(NActors::NLog::EComponent component, NActors::NLog::EPriority priority) { + TGuard<TMutex> guard(Mutex); + for (ui32 nodeIndex = 0; nodeIndex < NodeCount; ++nodeIndex) { + TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get(); + TString explanation; + auto status = node->LogSettings->SetLevel(priority, component, explanation); + if (status) { + Y_FAIL("SetLogPriority failed: %s", explanation.c_str()); + } + } + } + + TInstant TTestActorRuntimeBase::GetCurrentTime() const { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(!UseRealThreads); + return TInstant::MicroSeconds(CurrentTimestamp); + } + + void TTestActorRuntimeBase::UpdateCurrentTime(TInstant newTime) { + static int counter = 0; + ++counter; + if (VERBOSE) { + Cerr << "UpdateCurrentTime(" << counter << "," << newTime << ")\n"; + } + TGuard<TMutex> guard(Mutex); + Y_VERIFY(!UseRealThreads); + if (newTime.MicroSeconds() > CurrentTimestamp) { + CurrentTimestamp = newTime.MicroSeconds(); + for (auto& kv : Nodes) { + AtomicStore(kv.second->ActorSystemTimestamp, CurrentTimestamp); + AtomicStore(kv.second->ActorSystemMonotonic, CurrentTimestamp); + } + } + } + + void TTestActorRuntimeBase::AdvanceCurrentTime(TDuration duration) { + UpdateCurrentTime(GetCurrentTime() + duration); + } + + TIntrusivePtr<ITimeProvider> TTestActorRuntimeBase::GetTimeProvider() { + Y_VERIFY(!UseRealThreads); + return TimeProvider; + } + + ui32 TTestActorRuntimeBase::GetNodeId(ui32 index) const { + Y_VERIFY(index < NodeCount); + return FirstNodeId + index; + } + + ui32 TTestActorRuntimeBase::GetNodeCount() const { + return NodeCount; + } + + ui64 TTestActorRuntimeBase::AllocateLocalId() { + TGuard<TMutex> guard(Mutex); + ui64 nextId = ++LocalId; + if (VERBOSE) { + Cerr << "Allocated id: " << nextId << "\n"; + } + + return nextId; + } + + ui32 TTestActorRuntimeBase::InterconnectPoolId() const { + if (UseRealThreads && NSan::TSanIsOn()) { + // Interconnect coroutines may move across threads + // Use a special single-threaded pool to avoid that + return 4; + } + return 0; + } + + TString TTestActorRuntimeBase::GetTempDir() { + if (!TmpDir) + TmpDir.Reset(new TTempDir()); + return (*TmpDir)(); + } + + TActorId TTestActorRuntimeBase::Register(IActor* actor, ui32 nodeIndex, ui32 poolId, TMailboxType::EType mailboxType, + ui64 revolvingCounter, const TActorId& parentId) { + Y_VERIFY(nodeIndex < NodeCount); + TGuard<TMutex> guard(Mutex); + TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get(); + if (UseRealThreads) { + Y_VERIFY(poolId < node->ExecutorPools.size()); + return node->ExecutorPools[poolId]->Register(actor, mailboxType, revolvingCounter, parentId); + } + + // first step - find good enough mailbox + ui32 hint = 0; + TMailboxHeader *mailbox = nullptr; + + { + ui32 hintBackoff = 0; + + while (hint == 0) { + hint = node->MailboxTable->AllocateMailbox(mailboxType, ++revolvingCounter); + mailbox = node->MailboxTable->Get(hint); + + if (!mailbox->LockFromFree()) { + node->MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingCounter); + hintBackoff = hint; + hint = 0; + } + } + + node->MailboxTable->ReclaimMailbox(mailboxType, hintBackoff, ++revolvingCounter); + } + + const ui64 localActorId = AllocateLocalId(); + if (VERBOSE) { + Cerr << "Register actor " << TypeName(*actor) << " as " << localActorId << ", mailbox: " << hint << "\n"; + } + + // ok, got mailbox + mailbox->AttachActor(localActorId, actor); + + // do init + const TActorId actorId(FirstNodeId + nodeIndex, poolId, localActorId, hint); + ActorNames[actorId] = TypeName(*actor); + RegistrationObserver(*this, parentId ? parentId : CurrentRecipient, actorId); + DoActorInit(node->ActorSystem.Get(), actor, actorId, parentId ? parentId : CurrentRecipient); + + switch (mailboxType) { + case TMailboxType::Simple: + UnlockFromExecution((TMailboxTable::TSimpleMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); + break; + case TMailboxType::Revolving: + UnlockFromExecution((TMailboxTable::TRevolvingMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); + break; + case TMailboxType::HTSwap: + UnlockFromExecution((TMailboxTable::THTSwapMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); + break; + case TMailboxType::ReadAsFilled: + UnlockFromExecution((TMailboxTable::TReadAsFilledMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); + break; + case TMailboxType::TinyReadAsFilled: + UnlockFromExecution((TMailboxTable::TTinyReadAsFilledMailbox *)mailbox, node->ExecutorPools[0], false, hint, MaxWorkers, ++revolvingCounter); + break; + default: + Y_FAIL("Unsupported mailbox type"); + } + + return actorId; + } + + TActorId TTestActorRuntimeBase::Register(IActor *actor, ui32 nodeIndex, ui32 poolId, TMailboxHeader *mailbox, ui32 hint, + const TActorId& parentId) { + Y_VERIFY(nodeIndex < NodeCount); + TGuard<TMutex> guard(Mutex); + TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get(); + if (UseRealThreads) { + Y_VERIFY(poolId < node->ExecutorPools.size()); + return node->ExecutorPools[poolId]->Register(actor, mailbox, hint, parentId); + } + + const ui64 localActorId = AllocateLocalId(); + if (VERBOSE) { + Cerr << "Register actor " << TypeName(*actor) << " as " << localActorId << "\n"; + } + + mailbox->AttachActor(localActorId, actor); + const TActorId actorId(FirstNodeId + nodeIndex, poolId, localActorId, hint); + ActorNames[actorId] = TypeName(*actor); + RegistrationObserver(*this, parentId ? parentId : CurrentRecipient, actorId); + DoActorInit(node->ActorSystem.Get(), actor, actorId, parentId ? parentId : CurrentRecipient); + + return actorId; + } + + TActorId TTestActorRuntimeBase::RegisterService(const TActorId& serviceId, const TActorId& actorId, ui32 nodeIndex) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(nodeIndex < NodeCount); + TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get(); + if (!UseRealThreads) { + IActor* actor = FindActor(actorId, node); + node->LocalServicesActors[serviceId] = actor; + node->ActorToActorId[actor] = actorId; + } + + return node->ActorSystem->RegisterLocalService(serviceId, actorId); + } + + TActorId TTestActorRuntimeBase::AllocateEdgeActor(ui32 nodeIndex) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(nodeIndex < NodeCount); + TActorId edgeActor = Register(new TEdgeActor(this), nodeIndex); + EdgeActors.insert(edgeActor); + EdgeActorByMailbox[TEventMailboxId(edgeActor.NodeId(), edgeActor.Hint())] = edgeActor; + return edgeActor; + } + + TEventsList TTestActorRuntimeBase::CaptureEvents() { + TGuard<TMutex> guard(Mutex); + TEventsList result; + for (auto& mbox : Mailboxes) { + mbox.second->Capture(result); + } + + return result; + } + + TEventsList TTestActorRuntimeBase::CaptureMailboxEvents(ui32 hint, ui32 nodeId) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(nodeId >= FirstNodeId && nodeId < FirstNodeId + NodeCount); + TEventsList result; + GetMailbox(nodeId, hint).Capture(result); + return result; + } + + void TTestActorRuntimeBase::PushFront(TAutoPtr<IEventHandle>& ev) { + TGuard<TMutex> guard(Mutex); + ui32 nodeId = ev->GetRecipientRewrite().NodeId(); + Y_VERIFY(nodeId != 0); + GetMailbox(nodeId, ev->GetRecipientRewrite().Hint()).PushFront(ev); + } + + void TTestActorRuntimeBase::PushEventsFront(TEventsList& events) { + TGuard<TMutex> guard(Mutex); + for (auto rit = events.rbegin(); rit != events.rend(); ++rit) { + if (*rit) { + auto& ev = *rit; + ui32 nodeId = ev->GetRecipientRewrite().NodeId(); + Y_VERIFY(nodeId != 0); + GetMailbox(nodeId, ev->GetRecipientRewrite().Hint()).PushFront(ev); + } + } + + events.clear(); + } + + void TTestActorRuntimeBase::PushMailboxEventsFront(ui32 hint, ui32 nodeId, TEventsList& events) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(nodeId >= FirstNodeId && nodeId < FirstNodeId + NodeCount); + TEventsList result; + GetMailbox(nodeId, hint).PushFront(events); + events.clear(); + } + + TScheduledEventsList TTestActorRuntimeBase::CaptureScheduledEvents() { + TGuard<TMutex> guard(Mutex); + TScheduledEventsList result; + for (auto& mbox : Mailboxes) { + mbox.second->CaptureScheduled(result); + } + + return result; + } + + bool TTestActorRuntimeBase::DispatchEvents(const TDispatchOptions& options) { + return DispatchEvents(options, TInstant::Max()); + } + + bool TTestActorRuntimeBase::DispatchEvents(const TDispatchOptions& options, TDuration simTimeout) { + return DispatchEvents(options, TInstant::MicroSeconds(CurrentTimestamp) + simTimeout); + } + + bool TTestActorRuntimeBase::DispatchEvents(const TDispatchOptions& options, TInstant simDeadline) { + TGuard<TMutex> guard(Mutex); + return DispatchEventsInternal(options, simDeadline); + } + + // Mutex must be locked by caller! + bool TTestActorRuntimeBase::DispatchEventsInternal(const TDispatchOptions& options, TInstant simDeadline) { + TDispatchContext localContext; + localContext.Options = &options; + localContext.PrevContext = nullptr; + bool verbose = !options.Quiet && VERBOSE; + + struct TDispatchContextSetter { + TDispatchContextSetter(TTestActorRuntimeBase& runtime, TDispatchContext& lastContext) + : Runtime(runtime) + { + lastContext.PrevContext = Runtime.CurrentDispatchContext; + Runtime.CurrentDispatchContext = &lastContext; + } + + ~TDispatchContextSetter() { + Runtime.CurrentDispatchContext = Runtime.CurrentDispatchContext->PrevContext; + } + + TTestActorRuntimeBase& Runtime; + } DispatchContextSetter(*this, localContext); + + TInstant dispatchTime = TInstant::MicroSeconds(0); + TInstant deadline = dispatchTime + DispatchTimeout; + const TDuration scheduledEventsInspectInterval = TDuration::MilliSeconds(10); + TInstant inspectScheduledEventsAt = dispatchTime + scheduledEventsInspectInterval; + if (verbose) { + Cerr << "Start dispatch at " << TInstant::MicroSeconds(CurrentTimestamp) << ", deadline is " << deadline << "\n"; + } + + struct TTempEdgeEventsCaptor { + TTempEdgeEventsCaptor(TTestActorRuntimeBase& runtime) + : Runtime(runtime) + , HasEvents(false) + { + for (auto edgeActor : Runtime.EdgeActors) { + TEventsList events; + Runtime.GetMailbox(edgeActor.NodeId(), edgeActor.Hint()).Capture(events); + auto mboxId = TEventMailboxId(edgeActor.NodeId(), edgeActor.Hint()); + auto storeIt = Store.find(mboxId); + Y_VERIFY(storeIt == Store.end()); + storeIt = Store.insert(std::make_pair(mboxId, new TEventMailBox)).first; + storeIt->second->PushFront(events); + if (!events.empty()) + HasEvents = true; + } + } + + ~TTempEdgeEventsCaptor() { + for (auto edgeActor : Runtime.EdgeActors) { + auto mboxId = TEventMailboxId(edgeActor.NodeId(), edgeActor.Hint()); + auto storeIt = Store.find(mboxId); + if (storeIt == Store.end()) { + continue; + } + + TEventsList events; + storeIt->second->Capture(events); + Runtime.GetMailbox(edgeActor.NodeId(), edgeActor.Hint()).PushFront(events); + } + } + + TTestActorRuntimeBase& Runtime; + TEventMailBoxList Store; + bool HasEvents; + }; + + TEventMailBoxList restrictedMailboxes; + const bool useRestrictedMailboxes = !options.OnlyMailboxes.empty(); + for (auto mailboxId : options.OnlyMailboxes) { + auto it = Mailboxes.find(mailboxId); + if (it == Mailboxes.end()) { + it = Mailboxes.insert(std::make_pair(mailboxId, new TEventMailBox())).first; + } + + restrictedMailboxes.insert(std::make_pair(mailboxId, it->second)); + } + + TAutoPtr<TTempEdgeEventsCaptor> tempEdgeEventsCaptor; + if (!restrictedMailboxes) { + tempEdgeEventsCaptor.Reset(new TTempEdgeEventsCaptor(*this)); + } + + TEventMailBoxList& currentMailboxes = useRestrictedMailboxes ? restrictedMailboxes : Mailboxes; + while (!currentMailboxes.empty()) { + bool hasProgress = true; + while (hasProgress) { + ++DispatchCyclesCount; + hasProgress = false; + + ui64 eventsToDispatch = 0; + for (auto mboxIt = currentMailboxes.begin(); mboxIt != currentMailboxes.end(); ++mboxIt) { + if (mboxIt->second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) { + eventsToDispatch += mboxIt->second->GetSentEventCount(); + } + } + ui32 eventsDispatched = 0; + + //TODO: count events before each cycle, break after dispatching that much events + bool isEmpty = false; + while (!isEmpty && eventsDispatched < eventsToDispatch) { + ui64 mailboxCount = currentMailboxes.size(); + ui64 startWith = mailboxCount ? DispatcherRandomProvider->GenRand64() % mailboxCount : 0ull; + auto startWithMboxIt = currentMailboxes.begin(); + for (ui64 i = 0; i < startWith; ++i) { + ++startWithMboxIt; + } + auto endWithMboxIt = startWithMboxIt; + + isEmpty = true; + auto mboxIt = startWithMboxIt; + TDeque<TEventMailboxId> suspectedBoxes; + while (true) { + auto& mbox = *mboxIt; + bool isIgnored = true; + if (!mbox.second->IsEmpty()) { + HandleNonEmptyMailboxesForEachContext(mbox.first); + if (mbox.second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) { + + bool isEdgeMailbox = false; + if (EdgeActorByMailbox.FindPtr(TEventMailboxId(mbox.first.NodeId, mbox.first.Hint))) { + isEdgeMailbox = true; + TEventsList events; + mbox.second->Capture(events); + for (auto& ev : events) { + TInverseGuard<TMutex> inverseGuard(Mutex); + ObserverFunc(*this, ev); + } + mbox.second->PushFront(events); + } + + if (!isEdgeMailbox) { + isEmpty = false; + isIgnored = false; + ++eventsDispatched; + ++DispatchedEventsCount; + if (DispatchedEventsCount > DispatchedEventsLimit) { + ythrow TWithBackTrace<yexception>() << "Dispatched " + << DispatchedEventsLimit << " events, limit reached."; + } + + auto ev = mbox.second->Pop(); + if (BlockedOutput.find(ev->Sender) == BlockedOutput.end()) { + //UpdateCurrentTime(TInstant::MicroSeconds(CurrentTimestamp + 10)); + if (verbose) { + Cerr << "Process event at " << TInstant::MicroSeconds(CurrentTimestamp) << ", "; + PrintEvent(ev, this); + } + } + + hasProgress = true; + EEventAction action; + { + TInverseGuard<TMutex> inverseGuard(Mutex); + action = ObserverFunc(*this, ev); + } + + switch (action) { + case EEventAction::PROCESS: + UpdateFinalEventsStatsForEachContext(*ev); + SendInternal(ev.Release(), mbox.first.NodeId - FirstNodeId, false); + break; + case EEventAction::DROP: + // do nothing + break; + case EEventAction::RESCHEDULE: { + TInstant deadline = TInstant::MicroSeconds(CurrentTimestamp) + ReschedulingDelay; + mbox.second->Freeze(deadline); + mbox.second->PushFront(ev); + break; + } + default: + Y_FAIL("Unknown action"); + } + } + } + + } + Y_VERIFY(mboxIt != currentMailboxes.end()); + if (!isIgnored && !CurrentDispatchContext->PrevContext && !restrictedMailboxes && + mboxIt->second->IsEmpty() && + mboxIt->second->IsScheduledEmpty() && + mboxIt->second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) { + suspectedBoxes.push_back(mboxIt->first); + } + ++mboxIt; + if (mboxIt == currentMailboxes.end()) { + mboxIt = currentMailboxes.begin(); + } + Y_VERIFY(endWithMboxIt != currentMailboxes.end()); + if (mboxIt == endWithMboxIt) { + break; + } + } + + for (auto id : suspectedBoxes) { + auto it = currentMailboxes.find(id); + if (it != currentMailboxes.end() && it->second->IsEmpty() && it->second->IsScheduledEmpty() && + it->second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) { + currentMailboxes.erase(it); + } + } + } + } + + if (localContext.FinalEventFound) { + return true; + } + + if (!localContext.FoundNonEmptyMailboxes.empty()) + return true; + + if (options.CustomFinalCondition && options.CustomFinalCondition()) + return true; + + if (options.FinalEvents.empty()) { + for (auto& mbox : currentMailboxes) { + if (!mbox.second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) + continue; + + if (!mbox.second->IsEmpty()) { + if (verbose) { + Cerr << "Dispatch complete with non-empty queue at " << TInstant::MicroSeconds(CurrentTimestamp) << "\n"; + } + + return true; + } + } + } + + if (TInstant::MicroSeconds(CurrentTimestamp) > simDeadline) { + return false; + } + + if (dispatchTime >= deadline) { + if (verbose) { + Cerr << "Reach deadline at " << TInstant::MicroSeconds(CurrentTimestamp) << "\n"; + } + + ythrow TWithBackTrace<TEmptyEventQueueException>(); + } + + if (!options.Quiet && dispatchTime >= inspectScheduledEventsAt) { + inspectScheduledEventsAt = dispatchTime + scheduledEventsInspectInterval; + bool isEmpty = true; + TMaybe<TInstant> nearestMailboxDeadline; + TVector<TIntrusivePtr<TEventMailBox>> nextScheduleMboxes; + TMaybe<TInstant> nextScheduleDeadline; + for (auto& mbox : currentMailboxes) { + if (!mbox.second->IsActive(TInstant::MicroSeconds(CurrentTimestamp))) { + if (!nearestMailboxDeadline.Defined() || *nearestMailboxDeadline.Get() > mbox.second->GetInactiveUntil()) { + nearestMailboxDeadline = mbox.second->GetInactiveUntil(); + } + + continue; + } + + if (mbox.second->IsScheduledEmpty()) + continue; + + auto firstScheduleDeadline = mbox.second->GetFirstScheduleDeadline(); + if (!nextScheduleDeadline || firstScheduleDeadline < *nextScheduleDeadline) { + nextScheduleMboxes.clear(); + nextScheduleMboxes.emplace_back(mbox.second); + nextScheduleDeadline = firstScheduleDeadline; + } else if (firstScheduleDeadline == *nextScheduleDeadline) { + nextScheduleMboxes.emplace_back(mbox.second); + } + } + + for (const auto& nextScheduleMbox : nextScheduleMboxes) { + TEventsList selectedEvents; + TScheduledEventsList capturedScheduledEvents; + nextScheduleMbox->CaptureScheduled(capturedScheduledEvents); + ScheduledEventsSelectorFunc(*this, capturedScheduledEvents, selectedEvents); + nextScheduleMbox->PushScheduled(capturedScheduledEvents); + for (auto& event : selectedEvents) { + if (verbose && (BlockedOutput.find(event->Sender) == BlockedOutput.end())) { + Cerr << "Selected scheduled event at " << TInstant::MicroSeconds(CurrentTimestamp) << ", "; + PrintEvent(event, this); + } + + nextScheduleMbox->Send(event); + isEmpty = false; + } + } + + if (!isEmpty) { + if (verbose) { + Cerr << "Process selected events at " << TInstant::MicroSeconds(CurrentTimestamp) << "\n"; + } + + deadline = dispatchTime + DispatchTimeout; + continue; + } + + if (nearestMailboxDeadline.Defined()) { + if (verbose) { + Cerr << "Forward time to " << *nearestMailboxDeadline.Get() << "\n"; + } + + UpdateCurrentTime(*nearestMailboxDeadline.Get()); + continue; + } + } + + TDuration waitDelay = TDuration::MilliSeconds(10); + dispatchTime += waitDelay; + MailboxesHasEvents.WaitT(Mutex, waitDelay); + } + return false; + } + + void TTestActorRuntimeBase::HandleNonEmptyMailboxesForEachContext(TEventMailboxId mboxId) { + TDispatchContext* context = CurrentDispatchContext; + while (context) { + const auto& nonEmptyMailboxes = context->Options->NonEmptyMailboxes; + if (Find(nonEmptyMailboxes.begin(), nonEmptyMailboxes.end(), mboxId) != nonEmptyMailboxes.end()) { + context->FoundNonEmptyMailboxes.insert(mboxId); + } + + context = context->PrevContext; + } + } + + void TTestActorRuntimeBase::UpdateFinalEventsStatsForEachContext(IEventHandle& ev) { + TDispatchContext* context = CurrentDispatchContext; + while (context) { + for (const auto& finalEvent : context->Options->FinalEvents) { + if (finalEvent.EventCheck(ev)) { + auto& freq = context->FinalEventFrequency[&finalEvent]; + if (++freq >= finalEvent.RequiredCount) { + context->FinalEventFound = true; + } + } + } + + context = context->PrevContext; + } + } + + void TTestActorRuntimeBase::Send(IEventHandle* ev, ui32 senderNodeIndex, bool viaActorSystem) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(senderNodeIndex < NodeCount, "senderNodeIndex# %" PRIu32 " < NodeCount# %" PRIu32, + senderNodeIndex, NodeCount); + SendInternal(ev, senderNodeIndex, viaActorSystem); + } + + void TTestActorRuntimeBase::Schedule(IEventHandle* ev, const TDuration& duration, ui32 nodeIndex) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(nodeIndex < NodeCount); + ui32 nodeId = FirstNodeId + nodeIndex; + ui32 mailboxHint = ev->GetRecipientRewrite().Hint(); + TInstant deadline = TInstant::MicroSeconds(CurrentTimestamp) + duration; + GetMailbox(nodeId, mailboxHint).Schedule(TScheduledEventQueueItem(deadline, ev, nullptr)); + if (VERBOSE) + Cerr << "Event was added to scheduled queue\n"; + } + + void TTestActorRuntimeBase::ClearCounters() { + TGuard<TMutex> guard(Mutex); + EvCounters.clear(); + } + + ui64 TTestActorRuntimeBase::GetCounter(ui32 evType) const { + TGuard<TMutex> guard(Mutex); + auto it = EvCounters.find(evType); + if (it == EvCounters.end()) + return 0; + + return it->second; + } + + TActorId TTestActorRuntimeBase::GetLocalServiceId(const TActorId& serviceId, ui32 nodeIndex) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(nodeIndex < NodeCount); + TNodeDataBase* node = Nodes[FirstNodeId + nodeIndex].Get(); + return node->ActorSystem->LookupLocalService(serviceId); + } + + void TTestActorRuntimeBase::WaitForEdgeEvents(TEventFilter filter, const TSet<TActorId>& edgeFilter, TDuration simTimeout) { + TGuard<TMutex> guard(Mutex); + ui32 dispatchCount = 0; + if (!edgeFilter.empty()) { + for (auto edgeActor : edgeFilter) { + Y_VERIFY(EdgeActors.contains(edgeActor), "%s is not an edge actor", ToString(edgeActor).data()); + } + } + const TSet<TActorId>& edgeActors = edgeFilter.empty() ? EdgeActors : edgeFilter; + TInstant deadline = TInstant::MicroSeconds(CurrentTimestamp) + simTimeout; + for (;;) { + for (auto edgeActor : edgeActors) { + TEventsList events; + auto& mbox = GetMailbox(edgeActor.NodeId(), edgeActor.Hint()); + bool foundEvent = false; + mbox.Capture(events); + for (auto& ev : events) { + if (filter(*this, ev)) { + foundEvent = true; + break; + } + } + + mbox.PushFront(events); + if (foundEvent) + return; + } + + ++dispatchCount; + { + if (!DispatchEventsInternal(TDispatchOptions(), deadline)) { + return; // Timed out; event was not found + } + } + + Y_VERIFY(dispatchCount < 1000, "Hard limit to prevent endless loop"); + } + } + + TActorId TTestActorRuntimeBase::GetInterconnectProxy(ui32 nodeIndexFrom, ui32 nodeIndexTo) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(nodeIndexFrom < NodeCount); + Y_VERIFY(nodeIndexTo < NodeCount); + Y_VERIFY(nodeIndexFrom != nodeIndexTo); + TNodeDataBase* node = Nodes[FirstNodeId + nodeIndexFrom].Get(); + return node->ActorSystem->InterconnectProxy(FirstNodeId + nodeIndexTo); + } + + void TTestActorRuntimeBase::BlockOutputForActor(const TActorId& actorId) { + TGuard<TMutex> guard(Mutex); + BlockedOutput.insert(actorId); + } + + void TTestActorRuntimeBase::SetDispatcherRandomSeed(TInstant time, ui64 iteration) { + ui64 days = (time.Hours() / 24); + DispatcherRandomSeed = (days << 32) ^ iteration; + DispatcherRandomProvider = CreateDeterministicRandomProvider(DispatcherRandomSeed); + } + + IActor* TTestActorRuntimeBase::FindActor(const TActorId& actorId, ui32 nodeIndex) const { + TGuard<TMutex> guard(Mutex); + if (nodeIndex == Max<ui32>()) { + Y_VERIFY(actorId.NodeId()); + nodeIndex = actorId.NodeId() - FirstNodeId; + } + + Y_VERIFY(nodeIndex < NodeCount); + auto nodeIt = Nodes.find(FirstNodeId + nodeIndex); + Y_VERIFY(nodeIt != Nodes.end()); + TNodeDataBase* node = nodeIt->second.Get(); + return FindActor(actorId, node); + } + + void TTestActorRuntimeBase::EnableScheduleForActor(const TActorId& actorId, bool allow) { + TGuard<TMutex> guard(Mutex); + if (allow) { + if (VERBOSE) { + Cerr << "Actor " << actorId << " added to schedule whitelist"; + } + ScheduleWhiteList.insert(actorId); + } else { + if (VERBOSE) { + Cerr << "Actor " << actorId << " removed from schedule whitelist"; + } + ScheduleWhiteList.erase(actorId); + } + } + + bool TTestActorRuntimeBase::IsScheduleForActorEnabled(const TActorId& actorId) const { + TGuard<TMutex> guard(Mutex); + return ScheduleWhiteList.find(actorId) != ScheduleWhiteList.end(); + } + + TIntrusivePtr<NMonitoring::TDynamicCounters> TTestActorRuntimeBase::GetDynamicCounters(ui32 nodeIndex) { + TGuard<TMutex> guard(Mutex); + Y_VERIFY(nodeIndex < NodeCount); + ui32 nodeId = FirstNodeId + nodeIndex; + TNodeDataBase* node = Nodes[nodeId].Get(); + return node->DynamicCounters; + } + + void TTestActorRuntimeBase::SetupMonitoring() { + NeedMonitoring = true; + } + + void TTestActorRuntimeBase::SendInternal(IEventHandle* ev, ui32 nodeIndex, bool viaActorSystem) { + Y_VERIFY(nodeIndex < NodeCount); + ui32 nodeId = FirstNodeId + nodeIndex; + TNodeDataBase* node = Nodes[nodeId].Get(); + ui32 targetNode = ev->GetRecipientRewrite().NodeId(); + ui32 targetNodeIndex; + if (targetNode == 0) { + targetNodeIndex = nodeIndex; + } else { + targetNodeIndex = targetNode - FirstNodeId; + Y_VERIFY(targetNodeIndex < NodeCount); + } + + if (viaActorSystem || UseRealThreads || ev->GetRecipientRewrite().IsService() || (targetNodeIndex != nodeIndex)) { + node->ActorSystem->Send(ev); + return; + } + + Y_VERIFY(!ev->GetRecipientRewrite().IsService() && (targetNodeIndex == nodeIndex)); + TAutoPtr<IEventHandle> evHolder(ev); + + if (!AllowSendFrom(node, evHolder)) { + return; + } + + ui32 mailboxHint = ev->GetRecipientRewrite().Hint(); + TEventMailBox& mbox = GetMailbox(nodeId, mailboxHint); + if (!mbox.IsActive(TInstant::MicroSeconds(CurrentTimestamp))) { + mbox.PushFront(evHolder); + return; + } + + ui64 recipientLocalId = ev->GetRecipientRewrite().LocalId(); + if ((BlockedOutput.find(ev->Sender) == BlockedOutput.end()) && VERBOSE) { + Cerr << "Send event, "; + PrintEvent(evHolder, this); + } + + EvCounters[ev->GetTypeRewrite()]++; + + TMailboxHeader* mailbox = node->MailboxTable->Get(mailboxHint); + IActor* recipientActor = mailbox->FindActor(recipientLocalId); + if (recipientActor) { + // Save actorId by value in order to prevent ctx from being invalidated during another Send call. + TActorId actorId = ev->GetRecipientRewrite(); + node->ActorToActorId[recipientActor] = ev->GetRecipientRewrite(); + TActorContext ctx(*mailbox, *node->ExecutorThread, GetCycleCountFast(), actorId); + TActivationContext *prevTlsActivationContext = TlsActivationContext; + TlsActivationContext = &ctx; + CurrentRecipient = actorId; + { + TInverseGuard<TMutex> inverseGuard(Mutex); +#ifdef USE_ACTOR_CALLSTACK + TCallstack::GetTlsCallstack() = ev->Callstack; + TCallstack::GetTlsCallstack().SetLinesToSkip(); +#endif + recipientActor->Receive(evHolder, ctx); + node->ExecutorThread->DropUnregistered(); + } + CurrentRecipient = TActorId(); + TlsActivationContext = prevTlsActivationContext; + } else { + if (VERBOSE) { + Cerr << "Failed to find actor with local id: " << recipientLocalId << "\n"; + } + + auto forwardedEv = ev->ForwardOnNondelivery(TEvents::TEvUndelivered::ReasonActorUnknown); + if (!!forwardedEv) { + node->ActorSystem->Send(forwardedEv); + } + } + } + + IActor* TTestActorRuntimeBase::FindActor(const TActorId& actorId, TNodeDataBase* node) const { + ui32 mailboxHint = actorId.Hint(); + ui64 localId = actorId.LocalId(); + TMailboxHeader* mailbox = node->MailboxTable->Get(mailboxHint); + IActor* actor = mailbox->FindActor(localId); + return actor; + } + + THolder<TActorSystemSetup> TTestActorRuntimeBase::MakeActorSystemSetup(ui32 nodeIndex, TNodeDataBase* node) { + THolder<TActorSystemSetup> setup(new TActorSystemSetup); + setup->NodeId = FirstNodeId + nodeIndex; + + if (UseRealThreads) { + setup->ExecutorsCount = 5; + setup->Executors.Reset(new TAutoPtr<IExecutorPool>[5]); + setup->Executors[0].Reset(new TBasicExecutorPool(0, 2, 20)); + setup->Executors[1].Reset(new TBasicExecutorPool(1, 2, 20)); + setup->Executors[2].Reset(new TIOExecutorPool(2, 1)); + setup->Executors[3].Reset(new TBasicExecutorPool(3, 2, 20)); + setup->Executors[4].Reset(new TBasicExecutorPool(4, 1, 20)); + setup->Scheduler.Reset(new TBasicSchedulerThread(TSchedulerConfig(512, 100))); + } else { + setup->ExecutorsCount = 1; + setup->Scheduler.Reset(new TSchedulerThreadStub(this, node)); + setup->Executors.Reset(new TAutoPtr<IExecutorPool>[1]); + setup->Executors[0].Reset(new TExecutorPoolStub(this, nodeIndex, node, 0)); + } + + InitActorSystemSetup(*setup); + + return setup; + } + + THolder<TActorSystem> TTestActorRuntimeBase::MakeActorSystem(ui32 nodeIndex, TNodeDataBase* node) { + auto setup = MakeActorSystemSetup(nodeIndex, node); + + node->ExecutorPools.resize(setup->ExecutorsCount); + for (ui32 i = 0; i < setup->ExecutorsCount; ++i) { + node->ExecutorPools[i] = setup->Executors[i].Get(); + } + + const auto& interconnectCounters = GetCountersForComponent(node->DynamicCounters, "interconnect"); + + setup->LocalServices = node->LocalServices; + setup->Interconnect.ProxyActors.resize(FirstNodeId + NodeCount); + const TActorId nameserviceId = GetNameserviceActorId(); + + TIntrusivePtr<TInterconnectProxyCommon> common; + common.Reset(new TInterconnectProxyCommon); + common->NameserviceId = nameserviceId; + common->MonCounters = interconnectCounters; + common->TechnicalSelfHostName = "::1"; + + if (!UseRealThreads) { + common->Settings.DeadPeer = TDuration::Max(); + common->Settings.CloseOnIdle = TDuration::Max(); + common->Settings.PingPeriod = TDuration::Max(); + common->Settings.ForceConfirmPeriod = TDuration::Max(); + common->Settings.Handshake = TDuration::Max(); + } + + common->ClusterUUID = ClusterUUID; + common->AcceptUUID = {ClusterUUID}; + + for (ui32 proxyNodeIndex = 0; proxyNodeIndex < NodeCount; ++proxyNodeIndex) { + if (proxyNodeIndex == nodeIndex) + continue; + + const ui32 peerNodeId = FirstNodeId + proxyNodeIndex; + + IActor *proxyActor = UseRealInterconnect + ? new TInterconnectProxyTCP(peerNodeId, common) + : InterconnectMock.CreateProxyMock(setup->NodeId, peerNodeId, common); + + setup->Interconnect.ProxyActors[peerNodeId] = {proxyActor, TMailboxType::ReadAsFilled, InterconnectPoolId()}; + } + + setup->Interconnect.ProxyWrapperFactory = CreateProxyWrapperFactory(common, InterconnectPoolId(), &InterconnectMock); + + if (UseRealInterconnect) { + setup->LocalServices.emplace_back(MakePollerActorId(), NActors::TActorSetupCmd(CreatePollerActor(), + NActors::TMailboxType::Simple, InterconnectPoolId())); + } + + if (!SingleSysEnv) { // Single system env should do this self + TAutoPtr<TLogBackend> logBackend = LogBackend ? LogBackend : NActors::CreateStderrBackend(); + NActors::TLoggerActor *loggerActor = new NActors::TLoggerActor(node->LogSettings, + logBackend, GetCountersForComponent(node->DynamicCounters, "utils")); + NActors::TActorSetupCmd loggerActorCmd(loggerActor, NActors::TMailboxType::Simple, node->GetLoggerPoolId()); + std::pair<NActors::TActorId, NActors::TActorSetupCmd> loggerActorPair(node->LogSettings->LoggerActorId, loggerActorCmd); + setup->LocalServices.push_back(loggerActorPair); + } + + return THolder<TActorSystem>(new TActorSystem(setup, node->GetAppData(), node->LogSettings)); + } + + TActorSystem* TTestActorRuntimeBase::SingleSys() const { + Y_VERIFY(Nodes.size() == 1, "Works only for single system env"); + + return Nodes.begin()->second->ActorSystem.Get(); + } + + TActorSystem* TTestActorRuntimeBase::GetAnyNodeActorSystem() { + for (auto& x : Nodes) { + return x.second->ActorSystem.Get(); + } + Y_FAIL("Don't use this method."); + } + + TActorSystem* TTestActorRuntimeBase::GetActorSystem(ui32 nodeId) { + auto it = Nodes.find(GetNodeId(nodeId)); + Y_VERIFY(it != Nodes.end()); + return it->second->ActorSystem.Get(); + } + + + TEventMailBox& TTestActorRuntimeBase::GetMailbox(ui32 nodeId, ui32 hint) { + TGuard<TMutex> guard(Mutex); + auto mboxId = TEventMailboxId(nodeId, hint); + auto it = Mailboxes.find(mboxId); + if (it == Mailboxes.end()) { + it = Mailboxes.insert(std::make_pair(mboxId, new TEventMailBox())).first; + } + + return *it->second; + } + + void TTestActorRuntimeBase::ClearMailbox(ui32 nodeId, ui32 hint) { + TGuard<TMutex> guard(Mutex); + auto mboxId = TEventMailboxId(nodeId, hint); + Mailboxes.erase(mboxId); + } + + TString TTestActorRuntimeBase::GetActorName(const TActorId& actorId) const { + auto it = ActorNames.find(actorId); + if (it != ActorNames.end()) + return it->second; + return actorId.ToString(); + } + + struct TStrandingActorDecoratorContext : public TThrRefBase { + TStrandingActorDecoratorContext() + : Queue(new TQueueType) + { + } + + typedef TOneOneQueueInplace<IEventHandle*, 32> TQueueType; + TAutoPtr<TQueueType, TQueueType::TPtrCleanDestructor> Queue; + }; + + class TStrandingActorDecorator : public TActorBootstrapped<TStrandingActorDecorator> { + public: + class TReplyActor : public TActor<TReplyActor> { + public: + static constexpr EActivityType ActorActivityType() { + return TEST_ACTOR_RUNTIME; + } + + TReplyActor(TStrandingActorDecorator* owner) + : TActor(&TReplyActor::StateFunc) + , Owner(owner) + { + } + + STFUNC(StateFunc); + + private: + TStrandingActorDecorator* const Owner; + }; + + static constexpr EActivityType ActorActivityType() { + return TEST_ACTOR_RUNTIME; + } + + TStrandingActorDecorator(const TActorId& delegatee, bool isSync, const TVector<TActorId>& additionalActors, + TSimpleSharedPtr<TStrandingActorDecoratorContext> context, TTestActorRuntimeBase* runtime, + TReplyCheckerCreator createReplyChecker) + : Delegatee(delegatee) + , IsSync(isSync) + , AdditionalActors(additionalActors) + , Context(context) + , HasReply(false) + , Runtime(runtime) + , ReplyChecker(createReplyChecker()) + { + if (IsSync) { + Y_VERIFY(!runtime->IsRealThreads()); + } + } + + void Bootstrap(const TActorContext& ctx) { + Become(&TStrandingActorDecorator::StateFunc); + ReplyId = ctx.RegisterWithSameMailbox(new TReplyActor(this)); + DelegateeOptions.OnlyMailboxes.push_back(TEventMailboxId(Delegatee.NodeId(), Delegatee.Hint())); + for (const auto& actor : AdditionalActors) { + DelegateeOptions.OnlyMailboxes.push_back(TEventMailboxId(actor.NodeId(), actor.Hint())); + } + + DelegateeOptions.OnlyMailboxes.push_back(TEventMailboxId(ReplyId.NodeId(), ReplyId.Hint())); + DelegateeOptions.NonEmptyMailboxes.push_back(TEventMailboxId(ReplyId.NodeId(), ReplyId.Hint())); + DelegateeOptions.Quiet = true; + } + + STFUNC(StateFunc) { + bool wasEmpty = !Context->Queue->Head(); + Context->Queue->Push(ev.Release()); + if (wasEmpty) { + SendHead(ctx); + } + } + + STFUNC(Reply) { + Y_VERIFY(!HasReply); + IEventHandle *requestEv = Context->Queue->Head(); + TActorId originalSender = requestEv->Sender; + HasReply = !ReplyChecker->IsWaitingForMoreResponses(ev.Get()); + if (HasReply) { + delete Context->Queue->Pop(); + } + ctx.ExecutorThread.Send(ev->Forward(originalSender)); + if (!IsSync && Context->Queue->Head()) { + SendHead(ctx); + } + } + + private: + void SendHead(const TActorContext& ctx) { + if (!IsSync) { + ctx.ExecutorThread.Send(GetForwardedEvent().Release()); + } else { + while (Context->Queue->Head()) { + HasReply = false; + ctx.ExecutorThread.Send(GetForwardedEvent().Release()); + int count = 100; + while (!HasReply && count > 0) { + try { + Runtime->DispatchEvents(DelegateeOptions); + } catch (TEmptyEventQueueException&) { + count--; + Cerr << "No reply" << Endl; + } + } + + Runtime->UpdateCurrentTime(Runtime->GetCurrentTime() + TDuration::MicroSeconds(1000)); + } + } + } + + TAutoPtr<IEventHandle> GetForwardedEvent() { + IEventHandle* ev = Context->Queue->Head(); + ReplyChecker->OnRequest(ev); + TAutoPtr<IEventHandle> forwardedEv = ev->HasEvent() + ? new IEventHandle(Delegatee, ReplyId, ev->ReleaseBase().Release(), ev->Flags, ev->Cookie) + : new IEventHandle(ev->GetTypeRewrite(), ev->Flags, Delegatee, ReplyId, ev->ReleaseChainBuffer(), ev->Cookie); + + return forwardedEv; + } + private: + const TActorId Delegatee; + const bool IsSync; + const TVector<TActorId> AdditionalActors; + TSimpleSharedPtr<TStrandingActorDecoratorContext> Context; + TActorId ReplyId; + bool HasReply; + TDispatchOptions DelegateeOptions; + TTestActorRuntimeBase* Runtime; + THolder<IReplyChecker> ReplyChecker; + }; + + void TStrandingActorDecorator::TReplyActor::StateFunc(STFUNC_SIG) { + Owner->Reply(ev, ctx); + } + + class TStrandingDecoratorFactory : public IStrandingDecoratorFactory { + public: + TStrandingDecoratorFactory(TTestActorRuntimeBase* runtime, + TReplyCheckerCreator createReplyChecker) + : Context(new TStrandingActorDecoratorContext()) + , Runtime(runtime) + , CreateReplyChecker(createReplyChecker) + { + } + + IActor* Wrap(const TActorId& delegatee, bool isSync, const TVector<TActorId>& additionalActors) override { + return new TStrandingActorDecorator(delegatee, isSync, additionalActors, Context, Runtime, + CreateReplyChecker); + } + + private: + TSimpleSharedPtr<TStrandingActorDecoratorContext> Context; + TTestActorRuntimeBase* Runtime; + TReplyCheckerCreator CreateReplyChecker; + }; + + TAutoPtr<IStrandingDecoratorFactory> CreateStrandingDecoratorFactory(TTestActorRuntimeBase* runtime, + TReplyCheckerCreator createReplyChecker) { + return TAutoPtr<IStrandingDecoratorFactory>(new TStrandingDecoratorFactory(runtime, createReplyChecker)); + } + + ui64 DefaultRandomSeed = 9999; +} diff --git a/library/cpp/actors/testlib/test_runtime.h b/library/cpp/actors/testlib/test_runtime.h new file mode 100644 index 0000000000..26e3b45c98 --- /dev/null +++ b/library/cpp/actors/testlib/test_runtime.h @@ -0,0 +1,716 @@ +#pragma once + +#include <library/cpp/actors/core/actor.h> +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/core/log.h> +#include <library/cpp/actors/core/events.h> +#include <library/cpp/actors/core/executor_thread.h> +#include <library/cpp/actors/core/mailbox.h> +#include <library/cpp/actors/util/should_continue.h> +#include <library/cpp/actors/interconnect/poller_tcp.h> +#include <library/cpp/actors/interconnect/mock/ic_mock.h> +#include <library/cpp/random_provider/random_provider.h> +#include <library/cpp/time_provider/time_provider.h> +#include <library/cpp/testing/unittest/tests_data.h> + +#include <util/datetime/base.h> +#include <util/folder/tempdir.h> +#include <util/generic/deque.h> +#include <util/generic/hash.h> +#include <util/generic/noncopyable.h> +#include <util/generic/ptr.h> +#include <util/generic/queue.h> +#include <util/generic/set.h> +#include <util/generic/vector.h> +#include <util/system/defaults.h> +#include <util/system/mutex.h> +#include <util/system/condvar.h> +#include <util/system/thread.h> +#include <util/system/sanitizers.h> +#include <util/system/valgrind.h> +#include <utility> + +#include <functional> + +const TDuration DEFAULT_DISPATCH_TIMEOUT = NSan::PlainOrUnderSanitizer( + NValgrind::PlainOrUnderValgrind(TDuration::Seconds(60), TDuration::Seconds(120)), + TDuration::Seconds(120) +); + + +namespace NActors { + struct THeSingleSystemEnv { }; + + struct TEventMailboxId { + TEventMailboxId() + : NodeId(0) + , Hint(0) + { + } + + TEventMailboxId(ui32 nodeId, ui32 hint) + : NodeId(nodeId) + , Hint(hint) + { + } + + bool operator<(const TEventMailboxId& other) const { + return (NodeId < other.NodeId) || (NodeId == other.NodeId) && (Hint < other.Hint); + } + + bool operator==(const TEventMailboxId& other) const { + return (NodeId == other.NodeId) && (Hint == other.Hint); + } + + struct THash { + ui64 operator()(const TEventMailboxId& mboxId) const noexcept { + return mboxId.NodeId * 31ULL + mboxId.Hint; + } + }; + + ui32 NodeId; + ui32 Hint; + }; + + struct TDispatchOptions { + struct TFinalEventCondition { + std::function<bool(IEventHandle& ev)> EventCheck; + ui32 RequiredCount; + + TFinalEventCondition(ui32 eventType, ui32 requiredCount = 1) + : EventCheck([eventType](IEventHandle& ev) -> bool { return ev.GetTypeRewrite() == eventType; }) + , RequiredCount(requiredCount) + { + } + + TFinalEventCondition(std::function<bool(IEventHandle& ev)> eventCheck, ui32 requiredCount = 1) + : EventCheck(eventCheck) + , RequiredCount(requiredCount) + { + } + }; + + TVector<TFinalEventCondition> FinalEvents; + TVector<TEventMailboxId> NonEmptyMailboxes; + TVector<TEventMailboxId> OnlyMailboxes; + std::function<bool()> CustomFinalCondition; + bool Quiet = false; + }; + + struct TScheduledEventQueueItem { + TInstant Deadline; + TAutoPtr<IEventHandle> Event; + TAutoPtr<TSchedulerCookieHolder> Cookie; + ui64 UniqueId; + + TScheduledEventQueueItem(TInstant deadline, TAutoPtr<IEventHandle> event, ISchedulerCookie* cookie) + : Deadline(deadline) + , Event(event) + , Cookie(new TSchedulerCookieHolder(cookie)) + , UniqueId(++NextUniqueId) + {} + + bool operator<(const TScheduledEventQueueItem& other) const { + if (Deadline < other.Deadline) + return true; + + if (Deadline > other.Deadline) + return false; + + return UniqueId < other.UniqueId; + } + + static ui64 NextUniqueId; + }; + + typedef TDeque<TAutoPtr<IEventHandle>> TEventsList; + typedef TSet<TScheduledEventQueueItem> TScheduledEventsList; + + class TEventMailBox : public TThrRefBase { + public: + TEventMailBox() + : InactiveUntil(TInstant::MicroSeconds(0)) +#ifdef DEBUG_ORDER_EVENTS + , ExpectedReceive(0) + , NextToSend(0) +#endif + { + } + + void Send(TAutoPtr<IEventHandle> ev); + bool IsEmpty() const; + TAutoPtr<IEventHandle> Pop(); + void Capture(TEventsList& evList); + void PushFront(TAutoPtr<IEventHandle>& ev); + void PushFront(TEventsList& evList); + void CaptureScheduled(TScheduledEventsList& evList); + void PushScheduled(TScheduledEventsList& evList); + bool IsActive(const TInstant& currentTime) const; + void Freeze(const TInstant& deadline); + TInstant GetInactiveUntil() const; + void Schedule(const TScheduledEventQueueItem& item); + bool IsScheduledEmpty() const; + TInstant GetFirstScheduleDeadline() const; + ui64 GetSentEventCount() const; + + private: + TScheduledEventsList Scheduled; + TInstant InactiveUntil; + TEventsList Sent; +#ifdef DEBUG_ORDER_EVENTS + TMap<IEventHandle*, ui64> TrackSent; + ui64 ExpectedReceive; + ui64 NextToSend; +#endif + }; + + typedef THashMap<TEventMailboxId, TIntrusivePtr<TEventMailBox>, TEventMailboxId::THash> TEventMailBoxList; + + class TEmptyEventQueueException : public yexception { + public: + TEmptyEventQueueException() { + Append("Event queue is still empty."); + } + }; + + class TSchedulingLimitReachedException : public yexception { + public: + TSchedulingLimitReachedException(ui64 limit) { + TStringStream str; + str << "TestActorRuntime Processed over " << limit << " events."; + Append(str.Str()); + } + }; + + class TTestActorRuntimeBase: public TNonCopyable { + public: + class TEdgeActor; + class TSchedulerThreadStub; + class TExecutorPoolStub; + class TTimeProvider; + + enum class EEventAction { + PROCESS, + DROP, + RESCHEDULE + }; + + typedef std::function<EEventAction(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event)> TEventObserver; + typedef std::function<void(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue)> TScheduledEventsSelector; + typedef std::function<bool(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event)> TEventFilter; + typedef std::function<bool(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration delay, TInstant& deadline)> TScheduledEventFilter; + typedef std::function<void(TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId)> TRegistrationObserver; + + + TTestActorRuntimeBase(THeSingleSystemEnv); + TTestActorRuntimeBase(ui32 nodeCount, ui32 dataCenterCount, bool UseRealThreads); + TTestActorRuntimeBase(ui32 nodeCount, ui32 dataCenterCount); + TTestActorRuntimeBase(ui32 nodeCount = 1, bool useRealThreads = false); + virtual ~TTestActorRuntimeBase(); + bool IsRealThreads() const; + static EEventAction DefaultObserverFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event); + static void DroppingScheduledEventsSelector(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue); + static void CollapsedTimeScheduledEventsSelector(TTestActorRuntimeBase& runtime, TScheduledEventsList& scheduledEvents, TEventsList& queue); + static bool DefaultFilterFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event); + static bool NopFilterFunc(TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event, TDuration delay, TInstant& deadline); + static void DefaultRegistrationObserver(TTestActorRuntimeBase& runtime, const TActorId& parentId, const TActorId& actorId); + TEventObserver SetObserverFunc(TEventObserver observerFunc); + TScheduledEventsSelector SetScheduledEventsSelectorFunc(TScheduledEventsSelector scheduledEventsSelectorFunc); + TEventFilter SetEventFilter(TEventFilter filterFunc); + TScheduledEventFilter SetScheduledEventFilter(TScheduledEventFilter filterFunc); + TRegistrationObserver SetRegistrationObserverFunc(TRegistrationObserver observerFunc); + static bool IsVerbose(); + static void SetVerbose(bool verbose); + TDuration SetDispatchTimeout(TDuration timeout); + void SetDispatchedEventsLimit(ui64 limit) { + DispatchedEventsLimit = limit; + } + TDuration SetReschedulingDelay(TDuration delay); + void SetLogBackend(const TAutoPtr<TLogBackend> logBackend); + void SetLogPriority(NActors::NLog::EComponent component, NActors::NLog::EPriority priority); + TIntrusivePtr<ITimeProvider> GetTimeProvider(); + TInstant GetCurrentTime() const; + void UpdateCurrentTime(TInstant newTime); + void AdvanceCurrentTime(TDuration duration); + void AddLocalService(const TActorId& actorId, const TActorSetupCmd& cmd, ui32 nodeIndex = 0); + virtual void Initialize(); + ui32 GetNodeId(ui32 index = 0) const; + ui32 GetNodeCount() const; + ui64 AllocateLocalId(); + ui32 InterconnectPoolId() const; + TString GetTempDir(); + TActorId Register(IActor* actor, ui32 nodeIndex = 0, ui32 poolId = 0, + TMailboxType::EType mailboxType = TMailboxType::Simple, ui64 revolvingCounter = 0, + const TActorId& parentid = TActorId()); + TActorId Register(IActor *actor, ui32 nodeIndex, ui32 poolId, TMailboxHeader *mailbox, ui32 hint, + const TActorId& parentid = TActorId()); + TActorId RegisterService(const TActorId& serviceId, const TActorId& actorId, ui32 nodeIndex = 0); + TActorId AllocateEdgeActor(ui32 nodeIndex = 0); + TEventsList CaptureEvents(); + TEventsList CaptureMailboxEvents(ui32 hint, ui32 nodeId); + TScheduledEventsList CaptureScheduledEvents(); + void PushFront(TAutoPtr<IEventHandle>& ev); + void PushEventsFront(TEventsList& events); + void PushMailboxEventsFront(ui32 hint, ui32 nodeId, TEventsList& events); + // doesn't dispatch events for edge actors + bool DispatchEvents(const TDispatchOptions& options = TDispatchOptions()); + bool DispatchEvents(const TDispatchOptions& options, TDuration simTimeout); + bool DispatchEvents(const TDispatchOptions& options, TInstant simDeadline); + void Send(IEventHandle* ev, ui32 senderNodeIndex = 0, bool viaActorSystem = false); + void Schedule(IEventHandle* ev, const TDuration& duration, ui32 nodeIndex = 0); + void ClearCounters(); + ui64 GetCounter(ui32 evType) const; + TActorId GetLocalServiceId(const TActorId& serviceId, ui32 nodeIndex = 0); + void WaitForEdgeEvents(TEventFilter filter, const TSet<TActorId>& edgeFilter = {}, TDuration simTimeout = TDuration::Max()); + TActorId GetInterconnectProxy(ui32 nodeIndexFrom, ui32 nodeIndexTo); + void BlockOutputForActor(const TActorId& actorId); + IActor* FindActor(const TActorId& actorId, ui32 nodeIndex = Max<ui32>()) const; + void EnableScheduleForActor(const TActorId& actorId, bool allow = true); + bool IsScheduleForActorEnabled(const TActorId& actorId) const; + TIntrusivePtr<NMonitoring::TDynamicCounters> GetDynamicCounters(ui32 nodeIndex = 0); + void SetupMonitoring(); + + template<typename T> + void AppendToLogSettings(NLog::EComponent minVal, NLog::EComponent maxVal, T func) { + Y_VERIFY(!IsInitialized); + + for (const auto& pair : Nodes) { + pair.second->LogSettings->Append(minVal, maxVal, func); + } + } + + TIntrusivePtr<NLog::TSettings> GetLogSettings(ui32 nodeIdx) + { + return Nodes[FirstNodeId + nodeIdx]->LogSettings; + } + + TActorSystem* SingleSys() const; + TActorSystem* GetAnyNodeActorSystem(); + TActorSystem* GetActorSystem(ui32 nodeId); + template <typename TEvent> + TEvent* GrabEdgeEventIf(TAutoPtr<IEventHandle>& handle, std::function<bool(const TEvent&)> predicate, TDuration simTimeout = TDuration::Max()) { + handle.Destroy(); + const ui32 eventType = TEvent::EventType; + WaitForEdgeEvents([&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { + Y_UNUSED(runtime); + if (event->GetTypeRewrite() != eventType) + return false; + + TEvent* typedEvent = reinterpret_cast<TAutoPtr<TEventHandle<TEvent>>&>(event)->Get(); + if (predicate(*typedEvent)) { + handle = event; + return true; + } + + return false; + }, {}, simTimeout); + + if (simTimeout == TDuration::Max()) + Y_VERIFY(handle); + + if (handle) { + return reinterpret_cast<TAutoPtr<TEventHandle<TEvent>>&>(handle)->Get(); + } else { + return nullptr; + } + } + + template<class TEvent> + typename TEvent::TPtr GrabEdgeEventIf( + const TSet<TActorId>& edgeFilter, + const std::function<bool(const typename TEvent::TPtr&)>& predicate, + TDuration simTimeout = TDuration::Max()) + { + typename TEvent::TPtr handle; + const ui32 eventType = TEvent::EventType; + WaitForEdgeEvents([&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { + Y_UNUSED(runtime); + if (event->GetTypeRewrite() != eventType) + return false; + + typename TEvent::TPtr* typedEvent = reinterpret_cast<typename TEvent::TPtr*>(&event); + if (predicate(*typedEvent)) { + handle = *typedEvent; + return true; + } + + return false; + }, edgeFilter, simTimeout); + + if (simTimeout == TDuration::Max()) + Y_VERIFY(handle); + + return handle; + } + + template<class TEvent> + typename TEvent::TPtr GrabEdgeEventIf( + const TActorId& edgeActor, + const std::function<bool(const typename TEvent::TPtr&)>& predicate, + TDuration simTimeout = TDuration::Max()) + { + TSet<TActorId> edgeFilter{edgeActor}; + return GrabEdgeEventIf<TEvent>(edgeFilter, predicate, simTimeout); + } + + template <typename TEvent> + TEvent* GrabEdgeEvent(TAutoPtr<IEventHandle>& handle, TDuration simTimeout = TDuration::Max()) { + std::function<bool(const TEvent&)> truth = [](const TEvent&) { return true; }; + return GrabEdgeEventIf(handle, truth, simTimeout); + } + + template <typename TEvent> + THolder<TEvent> GrabEdgeEvent(TDuration simTimeout = TDuration::Max()) { + TAutoPtr<IEventHandle> handle; + std::function<bool(const TEvent&)> truth = [](const TEvent&) { return true; }; + GrabEdgeEventIf(handle, truth, simTimeout); + return THolder(handle ? handle->Release<TEvent>().Release() : nullptr); + } + + template<class TEvent> + typename TEvent::TPtr GrabEdgeEvent(const TSet<TActorId>& edgeFilter, TDuration simTimeout = TDuration::Max()) { + return GrabEdgeEventIf<TEvent>(edgeFilter, [](const typename TEvent::TPtr&) { return true; }, simTimeout); + } + + template<class TEvent> + typename TEvent::TPtr GrabEdgeEvent(const TActorId& edgeActor, TDuration simTimeout = TDuration::Max()) { + TSet<TActorId> edgeFilter{edgeActor}; + return GrabEdgeEvent<TEvent>(edgeFilter, simTimeout); + } + + // replace with std::variant<> + template <typename... TEvents> + std::tuple<TEvents*...> GrabEdgeEvents(TAutoPtr<IEventHandle>& handle, TDuration simTimeout = TDuration::Max()) { + handle.Destroy(); + auto eventTypes = { TEvents::EventType... }; + WaitForEdgeEvents([&](TTestActorRuntimeBase&, TAutoPtr<IEventHandle>& event) { + if (std::find(std::begin(eventTypes), std::end(eventTypes), event->GetTypeRewrite()) == std::end(eventTypes)) + return false; + handle = event; + return true; + }, {}, simTimeout); + if (simTimeout == TDuration::Max()) + Y_VERIFY(handle); + if (handle) { + return std::make_tuple(handle->Type == TEvents::EventType + ? reinterpret_cast<TAutoPtr<TEventHandle<TEvents>>&>(handle)->Get() + : static_cast<TEvents*>(nullptr)...); + } + return {}; + } + + template <typename TEvent> + TEvent* GrabEdgeEventRethrow(TAutoPtr<IEventHandle>& handle, TDuration simTimeout = TDuration::Max()) { + try { + return GrabEdgeEvent<TEvent>(handle, simTimeout); + } catch (...) { + ythrow TWithBackTrace<yexception>() << "Exception occured while waiting for " << TypeName<TEvent>() << ": " << CurrentExceptionMessage(); + } + } + + template<class TEvent> + typename TEvent::TPtr GrabEdgeEventRethrow(const TSet<TActorId>& edgeFilter, TDuration simTimeout = TDuration::Max()) { + try { + return GrabEdgeEvent<TEvent>(edgeFilter, simTimeout); + } catch (...) { + ythrow TWithBackTrace<yexception>() << "Exception occured while waiting for " << TypeName<TEvent>() << ": " << CurrentExceptionMessage(); + } + } + + template<class TEvent> + typename TEvent::TPtr GrabEdgeEventRethrow(const TActorId& edgeActor, TDuration simTimeout = TDuration::Max()) { + try { + return GrabEdgeEvent<TEvent>(edgeActor, simTimeout); + } catch (...) { + ythrow TWithBackTrace<yexception>() << "Exception occured while waiting for " << TypeName<TEvent>() << ": " << CurrentExceptionMessage(); + } + } + + template <typename... TEvents> + static TString TypeNames() { + static TString names[] = { TypeName<TEvents>()... }; + TString result; + for (const TString& s : names) { + if (result.empty()) { + result += '<'; + } else { + result += ','; + } + result += s; + } + if (!result.empty()) { + result += '>'; + } + return result; + } + + template <typename... TEvents> + std::tuple<TEvents*...> GrabEdgeEventsRethrow(TAutoPtr<IEventHandle>& handle, TDuration simTimeout = TDuration::Max()) { + try { + return GrabEdgeEvents<TEvents...>(handle, simTimeout); + } catch (...) { + ythrow TWithBackTrace<yexception>() << "Exception occured while waiting for " << TypeNames<TEvents...>() << ": " << CurrentExceptionMessage(); + } + } + + void ResetScheduledCount() { + ScheduledCount = 0; + } + + void SetScheduledLimit(ui64 limit) { + ScheduledLimit = limit; + } + + void SetDispatcherRandomSeed(TInstant time, ui64 iteration); + TString GetActorName(const TActorId& actorId) const; + + const TVector<ui64>& GetTxAllocatorTabletIds() const { return TxAllocatorTabletIds; } + void SetTxAllocatorTabletIds(const TVector<ui64>& ids) { TxAllocatorTabletIds = ids; } + + void SetUseRealInterconnect() { + UseRealInterconnect = true; + } + + protected: + struct TNodeDataBase; + TNodeDataBase* GetRawNode(ui32 node) const { + return Nodes.at(FirstNodeId + node).Get(); + } + + static IExecutorPool* CreateExecutorPoolStub(TTestActorRuntimeBase* runtime, ui32 nodeIndex, TNodeDataBase* node, ui32 poolId); + virtual TIntrusivePtr<NMonitoring::TDynamicCounters> GetCountersForComponent(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, const char* component) { + Y_UNUSED(counters); + Y_UNUSED(component); + + // do nothing, just return the existing counters + return counters; + } + + THolder<TActorSystemSetup> MakeActorSystemSetup(ui32 nodeIndex, TNodeDataBase* node); + THolder<TActorSystem> MakeActorSystem(ui32 nodeIndex, TNodeDataBase* node); + virtual void InitActorSystemSetup(TActorSystemSetup& setup) { + Y_UNUSED(setup); + } + + private: + IActor* FindActor(const TActorId& actorId, TNodeDataBase* node) const; + void SendInternal(IEventHandle* ev, ui32 nodeIndex, bool viaActorSystem); + TEventMailBox& GetMailbox(ui32 nodeId, ui32 hint); + void ClearMailbox(ui32 nodeId, ui32 hint); + void HandleNonEmptyMailboxesForEachContext(TEventMailboxId mboxId); + void UpdateFinalEventsStatsForEachContext(IEventHandle& ev); + bool DispatchEventsInternal(const TDispatchOptions& options, TInstant simDeadline); + + private: + ui64 ScheduledCount; + ui64 ScheduledLimit; + THolder<TTempDir> TmpDir; + const TThread::TId MainThreadId; + + protected: + bool UseRealInterconnect = false; + TInterconnectMock InterconnectMock; + bool IsInitialized = false; + bool SingleSysEnv = false; + const TString ClusterUUID; + const ui32 FirstNodeId; + const ui32 NodeCount; + const ui32 DataCenterCount; + const bool UseRealThreads; + + ui64 LocalId; + TMutex Mutex; + TCondVar MailboxesHasEvents; + TEventMailBoxList Mailboxes; + TMap<ui32, ui64> EvCounters; + ui64 DispatchCyclesCount; + ui64 DispatchedEventsCount; + ui64 DispatchedEventsLimit = 2'500'000; + TActorId CurrentRecipient; + ui64 DispatcherRandomSeed; + TIntrusivePtr<IRandomProvider> DispatcherRandomProvider; + TAutoPtr<TLogBackend> LogBackend; + bool NeedMonitoring; + + TIntrusivePtr<IRandomProvider> RandomProvider; + TIntrusivePtr<ITimeProvider> TimeProvider; + + protected: + struct TNodeDataBase: public TThrRefBase { + TNodeDataBase(); + void Stop(); + virtual ~TNodeDataBase(); + virtual ui64 GetLoggerPoolId() const { + return 0; + } + + template <typename T = void> + T* GetAppData() { + return static_cast<T*>(AppData0.get()); + } + + template <typename T = void> + const T* GetAppData() const { + return static_cast<T*>(AppData0.get()); + } + + TIntrusivePtr<NMonitoring::TDynamicCounters> DynamicCounters; + TIntrusivePtr<NActors::NLog::TSettings> LogSettings; + TIntrusivePtr<NInterconnect::TPollerThreads> Poller; + volatile ui64* ActorSystemTimestamp; + volatile ui64* ActorSystemMonotonic; + TVector<std::pair<TActorId, TActorSetupCmd> > LocalServices; + TMap<TActorId, IActor*> LocalServicesActors; + TMap<IActor*, TActorId> ActorToActorId; + THolder<TMailboxTable> MailboxTable; + std::shared_ptr<void> AppData0; + THolder<TActorSystem> ActorSystem; + THolder<IExecutorPool> SchedulerPool; + TVector<IExecutorPool*> ExecutorPools; + THolder<TExecutorThread> ExecutorThread; + }; + + struct INodeFactory { + virtual ~INodeFactory() = default; + virtual TIntrusivePtr<TNodeDataBase> CreateNode() = 0; + }; + + struct TDefaultNodeFactory final: INodeFactory { + virtual TIntrusivePtr<TNodeDataBase> CreateNode() override { + return new TNodeDataBase(); + } + }; + + INodeFactory& GetNodeFactory() { + return *NodeFactory; + } + + virtual TNodeDataBase* GetNodeById(size_t idx) { + return Nodes[idx].Get(); + } + + void InitNodes(); + void CleanupNodes(); + virtual void InitNodeImpl(TNodeDataBase*, size_t); + + static bool AllowSendFrom(TNodeDataBase* node, TAutoPtr<IEventHandle>& ev); + + protected: + THolder<INodeFactory> NodeFactory{new TDefaultNodeFactory}; + + private: + void InitNode(TNodeDataBase* node, size_t idx); + + struct TDispatchContext { + const TDispatchOptions* Options; + TDispatchContext* PrevContext; + + TMap<const TDispatchOptions::TFinalEventCondition*, ui32> FinalEventFrequency; + TSet<TEventMailboxId> FoundNonEmptyMailboxes; + bool FinalEventFound = false; + }; + + TProgramShouldContinue ShouldContinue; + TMap<ui32, TIntrusivePtr<TNodeDataBase>> Nodes; + ui64 CurrentTimestamp; + TSet<TActorId> EdgeActors; + THashMap<TEventMailboxId, TActorId, TEventMailboxId::THash> EdgeActorByMailbox; + TDuration DispatchTimeout; + TDuration ReschedulingDelay; + TEventObserver ObserverFunc; + TScheduledEventsSelector ScheduledEventsSelectorFunc; + TEventFilter EventFilterFunc; + TScheduledEventFilter ScheduledEventFilterFunc; + TRegistrationObserver RegistrationObserver; + TSet<TActorId> BlockedOutput; + TSet<TActorId> ScheduleWhiteList; + THashMap<TActorId, TActorId> ScheduleWhiteListParent; + THashMap<TActorId, TString> ActorNames; + TDispatchContext* CurrentDispatchContext; + TVector<ui64> TxAllocatorTabletIds; + + static ui32 NextNodeId; + }; + + template <typename TEvent> + TEvent* FindEvent(TEventsList& events) { + for (auto& event : events) { + if (event && event->GetTypeRewrite() == TEvent::EventType) { + return static_cast<TEvent*>(event->GetBase()); + } + } + + return nullptr; + } + + template <typename TEvent> + TEvent* FindEvent(TEventsList& events, const std::function<bool(const TEvent&)>& predicate) { + for (auto& event : events) { + if (event && event->GetTypeRewrite() == TEvent::EventType && predicate(*static_cast<TEvent*>(event->GetBase()))) { + return static_cast<TEvent*>(event->GetBase()); + } + } + + return nullptr; + } + + template <typename TEvent> + TEvent* GrabEvent(TEventsList& events, TAutoPtr<IEventHandle>& ev) { + ev.Destroy(); + for (auto& event : events) { + if (event && event->GetTypeRewrite() == TEvent::EventType) { + ev = event; + return static_cast<TEvent*>(ev->GetBase()); + } + } + + return nullptr; + } + + template <typename TEvent> + TEvent* GrabEvent(TEventsList& events, TAutoPtr<IEventHandle>& ev, + const std::function<bool(const typename TEvent::TPtr&)>& predicate) { + ev.Destroy(); + for (auto& event : events) { + if (event && event->GetTypeRewrite() == TEvent::EventType) { + if (predicate(reinterpret_cast<const typename TEvent::TPtr&>(event))) { + ev = event; + return static_cast<TEvent*>(ev->GetBase()); + } + } + } + + return nullptr; + } + + class IStrandingDecoratorFactory { + public: + virtual ~IStrandingDecoratorFactory() {} + virtual IActor* Wrap(const TActorId& delegatee, bool isSync, const TVector<TActorId>& additionalActors) = 0; + }; + + struct IReplyChecker { + virtual ~IReplyChecker() {} + virtual void OnRequest(IEventHandle *request) = 0; + virtual bool IsWaitingForMoreResponses(IEventHandle *response) = 0; + }; + + struct TNoneReplyChecker : IReplyChecker { + void OnRequest(IEventHandle*) override { + } + + bool IsWaitingForMoreResponses(IEventHandle*) override { + return false; + } + }; + + using TReplyCheckerCreator = std::function<THolder<IReplyChecker>(void)>; + + inline THolder<IReplyChecker> CreateNoneReplyChecker() { + return MakeHolder<TNoneReplyChecker>(); + } + + TAutoPtr<IStrandingDecoratorFactory> CreateStrandingDecoratorFactory(TTestActorRuntimeBase* runtime, + TReplyCheckerCreator createReplyChecker = CreateNoneReplyChecker); + extern ui64 DefaultRandomSeed; +} diff --git a/library/cpp/actors/testlib/ut/ya.make b/library/cpp/actors/testlib/ut/ya.make new file mode 100644 index 0000000000..1d4aec06ff --- /dev/null +++ b/library/cpp/actors/testlib/ut/ya.make @@ -0,0 +1,20 @@ +UNITTEST_FOR(library/cpp/actors/testlib) + +OWNER( + kruall + g:kikimr +) + +FORK_SUBTESTS() +SIZE(SMALL) + + +PEERDIR( + library/cpp/actors/core +) + +SRCS( + decorator_ut.cpp +) + +END() diff --git a/library/cpp/actors/testlib/ya.make b/library/cpp/actors/testlib/ya.make new file mode 100644 index 0000000000..1afb3f6059 --- /dev/null +++ b/library/cpp/actors/testlib/ya.make @@ -0,0 +1,27 @@ +LIBRARY() + +OWNER( + g:kikimr +) + +SRCS( + test_runtime.cpp +) + +PEERDIR( + library/cpp/actors/core + library/cpp/actors/interconnect/mock + library/cpp/actors/protos + library/cpp/random_provider + library/cpp/time_provider +) + +IF (GCC) + CFLAGS(-fno-devirtualize-speculatively) +ENDIF() + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/library/cpp/actors/util/affinity.cpp b/library/cpp/actors/util/affinity.cpp new file mode 100644 index 0000000000..cc1b6e70ec --- /dev/null +++ b/library/cpp/actors/util/affinity.cpp @@ -0,0 +1,93 @@ +#include "affinity.h" + +#ifdef _linux_ +#include <sched.h> +#endif + +class TAffinity::TImpl { +#ifdef _linux_ + cpu_set_t Mask; +#endif +public: + TImpl() { +#ifdef _linux_ + int ar = sched_getaffinity(0, sizeof(cpu_set_t), &Mask); + Y_VERIFY_DEBUG(ar == 0); +#endif + } + + explicit TImpl(const ui8* cpus, ui32 size) { +#ifdef _linux_ + CPU_ZERO(&Mask); + for (ui32 i = 0; i != size; ++i) { + if (cpus[i]) { + CPU_SET(i, &Mask); + } + } +#else + Y_UNUSED(cpus); + Y_UNUSED(size); +#endif + } + + void Set() const { +#ifdef _linux_ + int ar = sched_setaffinity(0, sizeof(cpu_set_t), &Mask); + Y_VERIFY_DEBUG(ar == 0); +#endif + } + + operator TCpuMask() const { + TCpuMask result; +#ifdef _linux_ + for (ui32 i = 0; i != CPU_SETSIZE; ++i) { + result.Cpus.emplace_back(CPU_ISSET(i, &Mask)); + } + result.RemoveTrailingZeros(); +#endif + return result; + } + +}; + +TAffinity::TAffinity() { +} + +TAffinity::~TAffinity() { +} + +TAffinity::TAffinity(const ui8* x, ui32 sz) { + if (x && sz) { + Impl.Reset(new TImpl(x, sz)); + } +} + +TAffinity::TAffinity(const TCpuMask& mask) { + if (!mask.IsEmpty()) { + static_assert(sizeof(ui8) == sizeof(mask.Cpus[0])); + const ui8* x = reinterpret_cast<const ui8*>(&mask.Cpus[0]); + const ui32 sz = mask.Size(); + Impl.Reset(new TImpl(x, sz)); + } +} + +void TAffinity::Current() { + Impl.Reset(new TImpl()); +} + +void TAffinity::Set() const { + if (!!Impl) { + Impl->Set(); + } +} + +bool TAffinity::Empty() const { + return !Impl; +} + +TAffinity::operator TCpuMask() const { + if (!!Impl) { + return *Impl; + } + return TCpuMask(); +} diff --git a/library/cpp/actors/util/affinity.h b/library/cpp/actors/util/affinity.h new file mode 100644 index 0000000000..ae106ed180 --- /dev/null +++ b/library/cpp/actors/util/affinity.h @@ -0,0 +1,49 @@ +#pragma once + +#include "defs.h" +#include "cpumask.h" + +// Platform-specific class to set or get thread affinity +class TAffinity: public TThrRefBase, TNonCopyable { + class TImpl; + THolder<TImpl> Impl; + +public: + TAffinity(); + TAffinity(const ui8* cpus, ui32 size); + explicit TAffinity(const TCpuMask& mask); + ~TAffinity(); + + void Current(); + void Set() const; + bool Empty() const; + + operator TCpuMask() const; +}; + +// Scoped affinity setter +class TAffinityGuard : TNonCopyable { + bool Stacked; + TAffinity OldAffinity; + +public: + TAffinityGuard(const TAffinity* affinity) { + Stacked = false; + if (affinity && !affinity->Empty()) { + OldAffinity.Current(); + affinity->Set(); + Stacked = true; + } + } + + ~TAffinityGuard() { + Release(); + } + + void Release() { + if (Stacked) { + OldAffinity.Set(); + Stacked = false; + } + } +}; diff --git a/library/cpp/actors/util/cpumask.h b/library/cpp/actors/util/cpumask.h new file mode 100644 index 0000000000..29741aa1d6 --- /dev/null +++ b/library/cpp/actors/util/cpumask.h @@ -0,0 +1,133 @@ +#pragma once + +#include "defs.h" + +#include <library/cpp/containers/stack_vector/stack_vec.h> + +#include <util/string/split.h> +#include <util/generic/yexception.h> + +using TCpuId = ui32; + +// Simple data structure to operate with set of cpus +struct TCpuMask { + TStackVec<bool, 1024> Cpus; + + // Creates empty mask + TCpuMask() {} + + // Creates mask with single cpu set + explicit TCpuMask(TCpuId cpuId) { + Set(cpuId); + } + + // Initialize mask from raw boolean array + template <class T> + TCpuMask(const T* cpus, TCpuId size) { + Cpus.reserve(size); + for (TCpuId i = 0; i != size; ++i) { + Cpus.emplace_back(bool(cpus[i])); + } + } + + // Parse a numerical list of processors. The numbers are separated by commas and may include ranges. For example: 0,5,7,9-11 + explicit TCpuMask(const TString& cpuList) { + try { + for (TStringBuf s : StringSplitter(cpuList).Split(',')) { + TCpuId l, r; + if (s.find('-') != TString::npos) { + StringSplitter(s).Split('-').CollectInto(&l, &r); + } else { + l = r = FromString<TCpuId>(s); + } + if (r >= Cpus.size()) { + Cpus.resize(r + 1, false); + } + for (TCpuId cpu = l; cpu <= r; cpu++) { + Cpus[cpu] = true; + } + } + } catch (...) { + ythrow TWithBackTrace<yexception>() << "Exception occured while parsing cpu list '" << cpuList << "': " << CurrentExceptionMessage(); + } + } + + // Returns size of underlying vector + TCpuId Size() const { + return Cpus.size(); + } + + // Returns number of set bits in mask + TCpuId CpuCount() const { + TCpuId result = 0; + for (bool value : Cpus) { + result += value; + } + return result; + } + + bool IsEmpty() const { + for (bool value : Cpus) { + if (value) { + return false; + } + } + return true; + } + + bool IsSet(TCpuId cpu) const { + return cpu < Cpus.size() && Cpus[cpu]; + } + + void Set(TCpuId cpu) { + if (cpu >= Cpus.size()) { + Cpus.resize(cpu + 1, false); + } + Cpus[cpu] = true; + } + + void Reset(TCpuId cpu) { + if (cpu < Cpus.size()) { + Cpus[cpu] = false; + } + } + + void RemoveTrailingZeros() { + while (!Cpus.empty() && !Cpus.back()) { + Cpus.pop_back(); + } + } + + explicit operator bool() const { + return !IsEmpty(); + } + + TCpuMask operator &(const TCpuMask& rhs) const { + TCpuMask result; + TCpuId size = Max(Size(), rhs.Size()); + result.Cpus.reserve(size); + for (TCpuId cpu = 0; cpu < size; cpu++) { + result.Cpus.emplace_back(IsSet(cpu) && rhs.IsSet(cpu)); + } + return result; + } + + TCpuMask operator |(const TCpuMask& rhs) const { + TCpuMask result; + TCpuId size = Max(Size(), rhs.Size()); + result.Cpus.reserve(size); + for (TCpuId cpu = 0; cpu < size; cpu++) { + result.Cpus.emplace_back(IsSet(cpu) || rhs.IsSet(cpu)); + } + return result; + } + + TCpuMask operator -(const TCpuMask& rhs) const { + TCpuMask result; + result.Cpus.reserve(Size()); + for (TCpuId cpu = 0; cpu < Size(); cpu++) { + result.Cpus.emplace_back(IsSet(cpu) && !rhs.IsSet(cpu)); + } + return result; + } +}; diff --git a/library/cpp/actors/util/datetime.h b/library/cpp/actors/util/datetime.h new file mode 100644 index 0000000000..cbec5965d6 --- /dev/null +++ b/library/cpp/actors/util/datetime.h @@ -0,0 +1,82 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/system/hp_timer.h> +#include <util/system/platform.h> + +#if defined(_win_) +#include <intrin.h> +#pragma intrinsic(__rdtsc) +#endif // _win_ + +#if defined(_darwin_) && !defined(_x86_) +#include <mach/mach_time.h> +#endif + +// GetCycleCount() from util/system/datetime.h uses rdtscp, which is more accurate than rdtsc, +// but rdtscp disables processor's out-of-order execution, so it can be slow +Y_FORCE_INLINE ui64 GetCycleCountFast() { +#if defined(_MSC_VER) + // Generates the rdtsc instruction, which returns the processor time stamp. + // The processor time stamp records the number of clock cycles since the last reset. + return __rdtsc(); +#elif defined(__clang__) && !defined(_arm64_) + return __builtin_readcyclecounter(); +#elif defined(_x86_64_) + unsigned hi, lo; + __asm__ __volatile__("rdtsc" + : "=a"(lo), "=d"(hi)); + return ((unsigned long long)lo) | (((unsigned long long)hi) << 32); +#elif defined(_i386_) + ui64 x; + __asm__ volatile("rdtsc\n\t" + : "=A"(x)); + return x; +#elif defined(_darwin_) + return mach_absolute_time(); +#elif defined(_arm32_) + return MicroSeconds(); +#elif defined(_arm64_) + ui64 x; + + __asm__ __volatile__("isb; mrs %0, cntvct_el0" + : "=r"(x)); + + return x; +#else +#error "unsupported arch" +#endif +} + +// NHPTimer::GetTime fast analog +Y_FORCE_INLINE void GetTimeFast(NHPTimer::STime* pTime) noexcept { + *pTime = GetCycleCountFast(); +} + +namespace NActors { + inline double Ts2Ns(ui64 ts) { + return NHPTimer::GetSeconds(ts) * 1e9; + } + + inline double Ts2Us(ui64 ts) { + return NHPTimer::GetSeconds(ts) * 1e6; + } + + inline double Ts2Ms(ui64 ts) { + return NHPTimer::GetSeconds(ts) * 1e3; + } + + inline ui64 Us2Ts(double us) { + return ui64(NHPTimer::GetClockRate() * us / 1e6); + } + + struct TTimeTracker { + ui64 Ts; + TTimeTracker(): Ts(GetCycleCountFast()) {} + ui64 Elapsed() { + ui64 ts = GetCycleCountFast(); + std::swap(Ts, ts); + return Ts - ts; + } + }; +} diff --git a/library/cpp/actors/util/defs.h b/library/cpp/actors/util/defs.h new file mode 100644 index 0000000000..5c3b57665b --- /dev/null +++ b/library/cpp/actors/util/defs.h @@ -0,0 +1,16 @@ +#pragma once + +// unique tag to fix pragma once gcc glueing: ./library/actors/util/defs.h + +#include <util/system/defaults.h> +#include <util/generic/bt_exception.h> +#include <util/generic/noncopyable.h> +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/yexception.h> +#include <util/system/atomic.h> +#include <util/system/align.h> +#include <util/generic/vector.h> +#include <util/datetime/base.h> +#include <util/generic/ylimits.h> +#include "intrinsics.h" diff --git a/library/cpp/actors/util/funnel_queue.h b/library/cpp/actors/util/funnel_queue.h new file mode 100644 index 0000000000..0e21e2617c --- /dev/null +++ b/library/cpp/actors/util/funnel_queue.h @@ -0,0 +1,240 @@ +#pragma once + +#include <util/system/atomic.h> +#include <util/generic/noncopyable.h> + +template <typename ElementType> +class TFunnelQueue: private TNonCopyable { +public: + TFunnelQueue() noexcept + : Front(nullptr) + , Back(nullptr) + { + } + + virtual ~TFunnelQueue() noexcept { + for (auto entry = Front; entry; entry = DeleteEntry(entry)) + continue; + } + + /// Push element. Can be used from many threads. Return true if is first element. + bool + Push(ElementType&& element) noexcept { + TEntry* const next = NewEntry(static_cast<ElementType&&>(element)); + TEntry* const prev = AtomicSwap(&Back, next); + AtomicSet(prev ? prev->Next : Front, next); + return !prev; + } + + /// Extract top element. Must be used only from one thread. Return true if have more. + bool + Pop() noexcept { + if (TEntry* const top = AtomicGet(Front)) { + const auto last = AtomicCas(&Back, nullptr, top); + if (last) // This is last element in queue. Queue is empty now. + AtomicCas(&Front, nullptr, top); + else // This element is not last. + for (;;) { + if (const auto next = AtomicGet(top->Next)) { + AtomicSet(Front, next); + break; + } + // But Next is null. Wait next assignment in spin lock. + } + + DeleteEntry(top); + return !last; + } + + return false; + } + + /// Peek top element. Must be used only from one thread. + ElementType& + Top() const noexcept { + return AtomicGet(Front)->Data; + } + + bool + IsEmpty() const noexcept { + return !AtomicGet(Front); + } + +protected: + class TEntry: private TNonCopyable { + friend class TFunnelQueue; + + private: + explicit TEntry(ElementType&& element) noexcept + : Data(static_cast<ElementType&&>(element)) + , Next(nullptr) + { + } + + ~TEntry() noexcept { + } + + public: + ElementType Data; + TEntry* volatile Next; + }; + + TEntry* volatile Front; + TEntry* volatile Back; + + virtual TEntry* NewEntry(ElementType&& element) noexcept { + return new TEntry(static_cast<ElementType&&>(element)); + } + + virtual TEntry* DeleteEntry(TEntry* entry) noexcept { + const auto next = entry->Next; + delete entry; + return next; + } + +protected: + struct TEntryIter { + TEntry* ptr; + + ElementType& operator*() { + return ptr->Data; + } + + ElementType* operator->() { + return &ptr->Data; + } + + TEntryIter& operator++() { + ptr = AtomicGet(ptr->Next); + return *this; + } + + bool operator!=(const TEntryIter& other) const { + return ptr != other.ptr; + } + + bool operator==(const TEntryIter& other) const { + return ptr == other.ptr; + } + }; + + struct TConstEntryIter { + const TEntry* ptr; + + const ElementType& operator*() { + return ptr->Data; + } + + const ElementType* operator->() { + return &ptr->Data; + } + + TEntryIter& operator++() { + ptr = AtomicGet(ptr->Next); + return *this; + } + + bool operator!=(const TConstEntryIter& other) const { + return ptr != other.ptr; + } + + bool operator==(const TConstEntryIter& other) const { + return ptr == other.ptr; + } + }; + +public: + using const_iterator = TConstEntryIter; + using iterator = TEntryIter; + + iterator begin() { + return {AtomicGet(Front)}; + } + const_iterator cbegin() { + return {AtomicGet(Front)}; + } + const_iterator begin() const { + return {AtomicGet(Front)}; + } + + iterator end() { + return {nullptr}; + } + const_iterator cend() { + return {nullptr}; + } + const_iterator end() const { + return {nullptr}; + } +}; + +template <typename ElementType> +class TPooledFunnelQueue: public TFunnelQueue<ElementType> { +public: + TPooledFunnelQueue() noexcept + : Stack(nullptr) + { + } + + virtual ~TPooledFunnelQueue() noexcept override { + for (auto entry = TBase::Front; entry; entry = TBase::DeleteEntry(entry)) + continue; + for (auto entry = Stack; entry; entry = TBase::DeleteEntry(entry)) + continue; + TBase::Back = TBase::Front = Stack = nullptr; + } + +private: + typedef TFunnelQueue<ElementType> TBase; + + typename TBase::TEntry* volatile Stack; + +protected: + virtual typename TBase::TEntry* NewEntry(ElementType&& element) noexcept override { + while (const auto top = AtomicGet(Stack)) + if (AtomicCas(&Stack, top->Next, top)) { + top->Data = static_cast<ElementType&&>(element); + AtomicSet(top->Next, nullptr); + return top; + } + + return TBase::NewEntry(static_cast<ElementType&&>(element)); + } + + virtual typename TBase::TEntry* DeleteEntry(typename TBase::TEntry* entry) noexcept override { + entry->Data = ElementType(); + const auto next = entry->Next; + do + AtomicSet(entry->Next, AtomicGet(Stack)); + while (!AtomicCas(&Stack, entry, entry->Next)); + return next; + } +}; + +template <typename ElementType, template <typename T> class TQueueType = TFunnelQueue> +class TCountedFunnelQueue: public TQueueType<ElementType> { +public: + TCountedFunnelQueue() noexcept + : Count(0) + { + } + + TAtomicBase GetSize() const noexcept { + return AtomicGet(Count); + } + +private: + typedef TQueueType<ElementType> TBase; + + virtual typename TBase::TEntry* NewEntry(ElementType&& element) noexcept override { + AtomicAdd(Count, 1); + return TBase::NewEntry(static_cast<ElementType&&>(element)); + } + + virtual typename TBase::TEntry* DeleteEntry(typename TBase::TEntry* entry) noexcept override { + AtomicSub(Count, 1); + return TBase::DeleteEntry(entry); + } + + TAtomic Count; +}; diff --git a/library/cpp/actors/util/futex.h b/library/cpp/actors/util/futex.h new file mode 100644 index 0000000000..c193f8d128 --- /dev/null +++ b/library/cpp/actors/util/futex.h @@ -0,0 +1,13 @@ +#pragma once + +#ifdef _linux_ + +#include <linux/futex.h> +#include <unistd.h> +#include <sys/syscall.h> + +static long SysFutex(void* addr1, int op, int val1, struct timespec* timeout, void* addr2, int val3) { + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +#endif diff --git a/library/cpp/actors/util/intrinsics.h b/library/cpp/actors/util/intrinsics.h new file mode 100644 index 0000000000..df07e36896 --- /dev/null +++ b/library/cpp/actors/util/intrinsics.h @@ -0,0 +1,97 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/system/atomic.h> +#include <util/system/spinlock.h> + +#include <library/cpp/sse/sse.h> // The header chooses appropriate SSE support + +static_assert(sizeof(TAtomic) == 8, "expect sizeof(TAtomic) == 8"); + +// we need explicit 32 bit operations to keep cache-line friendly packs +// so have to define some atomics additionaly to arcadia one +#ifdef _win_ +#pragma intrinsic(_InterlockedCompareExchange) +#pragma intrinsic(_InterlockedExchangeAdd) +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) +#endif + +inline bool AtomicUi32Cas(volatile ui32* a, ui32 exchange, ui32 compare) { +#ifdef _win_ + return _InterlockedCompareExchange((volatile long*)a, exchange, compare) == (long)compare; +#else + ui32 expected = compare; + return __atomic_compare_exchange_n(a, &expected, exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +#endif +} + +inline ui32 AtomicUi32Add(volatile ui32* a, ui32 add) { +#ifdef _win_ + return _InterlockedExchangeAdd((volatile long*)a, add) + add; +#else + return __atomic_add_fetch(a, add, __ATOMIC_SEQ_CST); +#endif +} + +inline ui32 AtomicUi32Sub(volatile ui32* a, ui32 sub) { +#ifdef _win_ + return _InterlockedExchangeAdd((volatile long*)a, -(long)sub) - sub; +#else + return __atomic_sub_fetch(a, sub, __ATOMIC_SEQ_CST); +#endif +} + +inline ui32 AtomicUi32Increment(volatile ui32* a) { +#ifdef _win_ + return _InterlockedIncrement((volatile long*)a); +#else + return __atomic_add_fetch(a, 1, __ATOMIC_SEQ_CST); +#endif +} + +inline ui32 AtomicUi32Decrement(volatile ui32* a) { +#ifdef _win_ + return _InterlockedDecrement((volatile long*)a); +#else + return __atomic_sub_fetch(a, 1, __ATOMIC_SEQ_CST); +#endif +} + +template <typename T> +inline void AtomicStore(volatile T* a, T x) { + static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value"); +#ifdef _win_ + *a = x; +#else + __atomic_store_n(a, x, __ATOMIC_RELEASE); +#endif +} + +template <typename T> +inline void RelaxedStore(volatile T* a, T x) { + static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value"); +#ifdef _win_ + *a = x; +#else + __atomic_store_n(a, x, __ATOMIC_RELAXED); +#endif +} + +template <typename T> +inline T AtomicLoad(volatile T* a) { +#ifdef _win_ + return *a; +#else + return __atomic_load_n(a, __ATOMIC_ACQUIRE); +#endif +} + +template <typename T> +inline T RelaxedLoad(volatile T* a) { +#ifdef _win_ + return *a; +#else + return __atomic_load_n(a, __ATOMIC_RELAXED); +#endif +} diff --git a/library/cpp/actors/util/local_process_key.h b/library/cpp/actors/util/local_process_key.h new file mode 100644 index 0000000000..172f08fc73 --- /dev/null +++ b/library/cpp/actors/util/local_process_key.h @@ -0,0 +1,132 @@ +#pragma once + +#include <util/string/builder.h> +#include <util/generic/strbuf.h> +#include <util/generic/vector.h> +#include <util/generic/hash.h> +#include <util/generic/singleton.h> +#include <util/generic/serialized_enum.h> + +template <typename T> +class TLocalProcessKeyState { + +template <typename U, const char* Name> +friend class TLocalProcessKey; +template <typename U, typename EnumT> +friend class TEnumProcessKey; + +public: + static TLocalProcessKeyState& GetInstance() { + return *Singleton<TLocalProcessKeyState<T>>(); + } + + size_t GetCount() const { + return StartIndex + Names.size(); + } + + TStringBuf GetNameByIndex(size_t index) const { + if (index < StartIndex) { + return StaticNames[index]; + } else { + index -= StartIndex; + Y_ENSURE(index < Names.size()); + return Names[index]; + } + } + + size_t GetIndexByName(TStringBuf name) const { + auto it = Map.find(name); + Y_ENSURE(it != Map.end()); + return it->second; + } + +private: + size_t Register(TStringBuf name) { + auto x = Map.emplace(name, Names.size()+StartIndex); + if (x.second) { + Names.emplace_back(name); + } + + return x.first->second; + } + + size_t Register(TStringBuf name, ui32 index) { + Y_VERIFY(index < StartIndex); + auto x = Map.emplace(name, index); + Y_VERIFY(x.second || x.first->second == index); + StaticNames[index] = name; + return x.first->second; + } + +private: + static constexpr ui32 StartIndex = 2000; + + TVector<TString> FillStaticNames() { + TVector<TString> staticNames; + staticNames.reserve(StartIndex); + for (ui32 i = 0; i < StartIndex; i++) { + staticNames.push_back(TStringBuilder() << "Activity_" << i); + } + return staticNames; + } + + TVector<TString> StaticNames = FillStaticNames(); + TVector<TString> Names; + THashMap<TString, size_t> Map; +}; + +template <typename T, const char* Name> +class TLocalProcessKey { +public: + static TStringBuf GetName() { + return Name; + } + + static size_t GetIndex() { + return Index; + } + +private: + inline static size_t Index = TLocalProcessKeyState<T>::GetInstance().Register(Name); +}; + +template <typename T, typename EnumT> +class TEnumProcessKey { +public: + static TStringBuf GetName(EnumT key) { + return TLocalProcessKeyState<T>::GetInstance().GetNameByIndex(GetIndex(key)); + } + + static size_t GetIndex(EnumT key) { + ui32 index = static_cast<ui32>(key); + if (index < TLocalProcessKeyState<T>::StartIndex) { + return index; + } + Y_VERIFY(index < Enum2Index.size()); + return Enum2Index[index]; + } + +private: + inline static TVector<size_t> RegisterAll() { + static_assert(std::is_enum<EnumT>::value, "Enum is required"); + + TVector<size_t> enum2Index; + auto names = GetEnumNames<EnumT>(); + ui32 maxId = 0; + for (const auto& [k, v] : names) { + maxId = Max(maxId, static_cast<ui32>(k)); + } + enum2Index.resize(maxId+1); + for (ui32 i = 0; i <= maxId && i < TLocalProcessKeyState<T>::StartIndex; i++) { + enum2Index[i] = i; + } + + for (const auto& [k, v] : names) { + ui32 enumId = static_cast<ui32>(k); + enum2Index[enumId] = TLocalProcessKeyState<T>::GetInstance().Register(v, enumId); + } + return enum2Index; + } + + inline static TVector<size_t> Enum2Index = RegisterAll(); +}; diff --git a/library/cpp/actors/util/named_tuple.h b/library/cpp/actors/util/named_tuple.h new file mode 100644 index 0000000000..67f185bba8 --- /dev/null +++ b/library/cpp/actors/util/named_tuple.h @@ -0,0 +1,30 @@ +#pragma once + +#include "defs.h" + +template <typename TDerived> +struct TNamedTupleBase { + friend bool operator==(const TDerived& x, const TDerived& y) { + return x.ConvertToTuple() == y.ConvertToTuple(); + } + + friend bool operator!=(const TDerived& x, const TDerived& y) { + return x.ConvertToTuple() != y.ConvertToTuple(); + } + + friend bool operator<(const TDerived& x, const TDerived& y) { + return x.ConvertToTuple() < y.ConvertToTuple(); + } + + friend bool operator<=(const TDerived& x, const TDerived& y) { + return x.ConvertToTuple() <= y.ConvertToTuple(); + } + + friend bool operator>(const TDerived& x, const TDerived& y) { + return x.ConvertToTuple() > y.ConvertToTuple(); + } + + friend bool operator>=(const TDerived& x, const TDerived& y) { + return x.ConvertToTuple() >= y.ConvertToTuple(); + } +}; diff --git a/library/cpp/actors/util/queue_chunk.h b/library/cpp/actors/util/queue_chunk.h new file mode 100644 index 0000000000..8a4e02d8cb --- /dev/null +++ b/library/cpp/actors/util/queue_chunk.h @@ -0,0 +1,29 @@ +#pragma once + +#include "defs.h" + +template <typename T, ui32 TSize, typename TDerived> +struct TQueueChunkDerived { + static const ui32 EntriesCount = (TSize - sizeof(TQueueChunkDerived*)) / sizeof(T); + static_assert(EntriesCount > 0, "expect EntriesCount > 0"); + + volatile T Entries[EntriesCount]; + TDerived* volatile Next; + + TQueueChunkDerived() { + memset(this, 0, sizeof(TQueueChunkDerived)); + } +}; + +template <typename T, ui32 TSize> +struct TQueueChunk { + static const ui32 EntriesCount = (TSize - sizeof(TQueueChunk*)) / sizeof(T); + static_assert(EntriesCount > 0, "expect EntriesCount > 0"); + + volatile T Entries[EntriesCount]; + TQueueChunk* volatile Next; + + TQueueChunk() { + memset(this, 0, sizeof(TQueueChunk)); + } +}; diff --git a/library/cpp/actors/util/queue_oneone_inplace.h b/library/cpp/actors/util/queue_oneone_inplace.h new file mode 100644 index 0000000000..d7ec8bb21c --- /dev/null +++ b/library/cpp/actors/util/queue_oneone_inplace.h @@ -0,0 +1,118 @@ +#pragma once + +#include "defs.h" +#include "queue_chunk.h" + +template <typename T, ui32 TSize, typename TChunk = TQueueChunk<T, TSize>> +class TOneOneQueueInplace : TNonCopyable { + static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::valuer"); + + TChunk* ReadFrom; + ui32 ReadPosition; + ui32 WritePosition; + TChunk* WriteTo; + + friend class TReadIterator; + +public: + class TReadIterator { + TChunk* ReadFrom; + ui32 ReadPosition; + + public: + TReadIterator(TChunk* readFrom, ui32 readPosition) + : ReadFrom(readFrom) + , ReadPosition(readPosition) + { + } + + inline T Next() { + TChunk* head = ReadFrom; + if (ReadPosition != TChunk::EntriesCount) { + return AtomicLoad(&head->Entries[ReadPosition++]); + } else if (TChunk* next = AtomicLoad(&head->Next)) { + ReadFrom = next; + ReadPosition = 0; + return Next(); + } + return T{}; + } + }; + + TOneOneQueueInplace() + : ReadFrom(new TChunk()) + , ReadPosition(0) + , WritePosition(0) + , WriteTo(ReadFrom) + { + } + + ~TOneOneQueueInplace() { + Y_VERIFY_DEBUG(Head() == 0); + delete ReadFrom; + } + + struct TPtrCleanDestructor { + static inline void Destroy(TOneOneQueueInplace<T, TSize>* x) noexcept { + while (T head = x->Pop()) + delete head; + delete x; + } + }; + + struct TCleanDestructor { + static inline void Destroy(TOneOneQueueInplace<T, TSize>* x) noexcept { + while (x->Pop() != nullptr) + continue; + delete x; + } + }; + + struct TPtrCleanInplaceMallocDestructor { + template <typename TPtrVal> + static inline void Destroy(TOneOneQueueInplace<TPtrVal*, TSize>* x) noexcept { + while (TPtrVal* head = x->Pop()) { + head->~TPtrVal(); + free(head); + } + delete x; + } + }; + + void Push(T x) noexcept { + if (WritePosition != TChunk::EntriesCount) { + AtomicStore(&WriteTo->Entries[WritePosition], x); + ++WritePosition; + } else { + TChunk* next = new TChunk(); + next->Entries[0] = x; + AtomicStore(&WriteTo->Next, next); + WriteTo = next; + WritePosition = 1; + } + } + + T Head() { + TChunk* head = ReadFrom; + if (ReadPosition != TChunk::EntriesCount) { + return AtomicLoad(&head->Entries[ReadPosition]); + } else if (TChunk* next = AtomicLoad(&head->Next)) { + ReadFrom = next; + delete head; + ReadPosition = 0; + return Head(); + } + return T{}; + } + + T Pop() { + T ret = Head(); + if (ret) + ++ReadPosition; + return ret; + } + + TReadIterator Iterator() { + return TReadIterator(ReadFrom, ReadPosition); + } +}; diff --git a/library/cpp/actors/util/recentwnd.h b/library/cpp/actors/util/recentwnd.h new file mode 100644 index 0000000000..ba1ede6f29 --- /dev/null +++ b/library/cpp/actors/util/recentwnd.h @@ -0,0 +1,67 @@ +#pragma once + +#include <util/generic/deque.h> + +template <typename TElem, + template <typename, typename...> class TContainer = TDeque> +class TRecentWnd { +public: + TRecentWnd(ui32 wndSize) + : MaxWndSize_(wndSize) + { + } + + void Push(const TElem& elem) { + if (Window_.size() == MaxWndSize_) + Window_.erase(Window_.begin()); + Window_.emplace_back(elem); + } + + void Push(TElem&& elem) { + if (Window_.size() == MaxWndSize_) + Window_.erase(Window_.begin()); + Window_.emplace_back(std::move(elem)); + } + + TElem& Last() { + return Window_.back(); + } + const TElem& Last() const { + return Window_.back(); + } + bool Full() const { + return Window_.size() == MaxWndSize_; + } + ui64 Size() const { + return Window_.size(); + } + + using const_iterator = typename TContainer<TElem>::const_iterator; + + const_iterator begin() { + return Window_.begin(); + } + const_iterator end() { + return Window_.end(); + } + + void Reset(ui32 wndSize = 0) { + Window_.clear(); + if (wndSize != 0) { + MaxWndSize_ = wndSize; + } + } + + void ResetWnd(ui32 wndSize) { + Y_VERIFY(wndSize != 0); + MaxWndSize_ = wndSize; + if (Window_.size() > MaxWndSize_) { + Window_.erase(Window_.begin(), + Window_.begin() + Window_.size() - MaxWndSize_); + } + } + +private: + TContainer<TElem> Window_; + ui32 MaxWndSize_; +}; diff --git a/library/cpp/actors/util/rope.h b/library/cpp/actors/util/rope.h new file mode 100644 index 0000000000..f5595efbaa --- /dev/null +++ b/library/cpp/actors/util/rope.h @@ -0,0 +1,1161 @@ +#pragma once + +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/generic/hash_set.h> +#include <util/stream/str.h> +#include <util/system/sanitizers.h> +#include <util/system/valgrind.h> + +// exactly one of them must be included +#include "rope_cont_list.h" +//#include "rope_cont_deque.h" + +struct IRopeChunkBackend : TThrRefBase { + using TData = std::tuple<const char*, size_t>; + virtual ~IRopeChunkBackend() = default; + virtual TData GetData() const = 0; + virtual size_t GetCapacity() const = 0; + using TPtr = TIntrusivePtr<IRopeChunkBackend>; +}; + +class TRopeAlignedBuffer : public IRopeChunkBackend { + static constexpr size_t Alignment = 16; + static constexpr size_t MallocAlignment = sizeof(size_t); + + ui32 Size; + const ui32 Capacity; + const ui32 Offset; + alignas(Alignment) char Data[]; + + TRopeAlignedBuffer(size_t size) + : Size(size) + , Capacity(size) + , Offset((Alignment - reinterpret_cast<uintptr_t>(Data)) & (Alignment - 1)) + { + Y_VERIFY(Offset <= Alignment - MallocAlignment); + } + +public: + static TIntrusivePtr<TRopeAlignedBuffer> Allocate(size_t size) { + return new(malloc(sizeof(TRopeAlignedBuffer) + size + Alignment - MallocAlignment)) TRopeAlignedBuffer(size); + } + + void *operator new(size_t) { + Y_FAIL(); + } + + void *operator new(size_t, void *ptr) { + return ptr; + } + + void operator delete(void *ptr) { + free(ptr); + } + + void operator delete(void* p, void* ptr) { + Y_UNUSED(p); + Y_UNUSED(ptr); + } + + TData GetData() const override { + return {Data + Offset, Size}; + } + + size_t GetCapacity() const override { + return Capacity; + } + + char *GetBuffer() { + return Data + Offset; + } + + void AdjustSize(size_t size) { + Y_VERIFY(size <= Capacity); + Size = size; + } +}; + +namespace NRopeDetails { + + template<bool IsConst, typename TRope, typename TList> + struct TIteratorTraits; + + template<typename TRope, typename TList> + struct TIteratorTraits<true, TRope, TList> { + using TRopePtr = const TRope*; + using TListIterator = typename TList::const_iterator; + }; + + template<typename TRope, typename TList> + struct TIteratorTraits<false, TRope, TList> { + using TRopePtr = TRope*; + using TListIterator = typename TList::iterator; + }; + +} // NRopeDetails + +class TRopeArena; + +template<typename T> +struct always_false : std::false_type {}; + +class TRope { + friend class TRopeArena; + + struct TChunk + { + class TBackend { + enum class EType : uintptr_t { + STRING, + ROPE_CHUNK_BACKEND, + }; + + uintptr_t Owner = 0; // lower bits contain type of the owner + + public: + TBackend() = delete; + + TBackend(const TBackend& other) + : Owner(Clone(other.Owner)) + {} + + TBackend(TBackend&& other) + : Owner(std::exchange(other.Owner, 0)) + {} + + TBackend(TString s) + : Owner(Construct<TString>(EType::STRING, std::move(s))) + {} + + TBackend(IRopeChunkBackend::TPtr backend) + : Owner(Construct<IRopeChunkBackend::TPtr>(EType::ROPE_CHUNK_BACKEND, std::move(backend))) + {} + + ~TBackend() { + if (Owner) { + Destroy(Owner); + } + } + + TBackend& operator =(const TBackend& other) { + if (Owner) { + Destroy(Owner); + } + Owner = Clone(other.Owner); + return *this; + } + + TBackend& operator =(TBackend&& other) { + if (Owner) { + Destroy(Owner); + } + Owner = std::exchange(other.Owner, 0); + return *this; + } + + bool operator ==(const TBackend& other) const { + return Owner == other.Owner; + } + + const void *UniqueId() const { + return reinterpret_cast<const void*>(Owner); + } + + const IRopeChunkBackend::TData GetData() const { + return Visit(Owner, [](EType, auto& value) -> IRopeChunkBackend::TData { + using T = std::decay_t<decltype(value)>; + if constexpr (std::is_same_v<T, TString>) { + return {value.data(), value.size()}; + } else if constexpr (std::is_same_v<T, IRopeChunkBackend::TPtr>) { + return value->GetData(); + } else { + return {}; + } + }); + } + + size_t GetCapacity() const { + return Visit(Owner, [](EType, auto& value) { + using T = std::decay_t<decltype(value)>; + if constexpr (std::is_same_v<T, TString>) { + return value.capacity(); + } else if constexpr (std::is_same_v<T, IRopeChunkBackend::TPtr>) { + return value->GetCapacity(); + } else { + Y_FAIL(); + } + }); + } + + private: + static constexpr uintptr_t TypeMask = (1 << 3) - 1; + static constexpr uintptr_t ValueMask = ~TypeMask; + + template<typename T> + struct TObjectHolder { + struct TWrappedObject : TThrRefBase { + T Value; + TWrappedObject(T&& value) + : Value(std::move(value)) + {} + }; + TIntrusivePtr<TWrappedObject> Object; + + TObjectHolder(T&& object) + : Object(MakeIntrusive<TWrappedObject>(std::move(object))) + {} + }; + + template<typename TObject> + static uintptr_t Construct(EType type, TObject object) { + if constexpr (sizeof(TObject) <= sizeof(uintptr_t)) { + uintptr_t res = 0; + new(&res) TObject(std::move(object)); + Y_VERIFY_DEBUG((res & ValueMask) == res); + return res | static_cast<uintptr_t>(type); + } else { + return Construct<TObjectHolder<TObject>>(type, TObjectHolder<TObject>(std::move(object))); + } + } + + template<typename TCallback> + static std::invoke_result_t<TCallback, EType, TString&> VisitRaw(uintptr_t value, TCallback&& callback) { + Y_VERIFY_DEBUG(value); + const EType type = static_cast<EType>(value & TypeMask); + value &= ValueMask; + auto caller = [&](auto& value) { return std::invoke(std::forward<TCallback>(callback), type, value); }; + auto wrapper = [&](auto& value) { + using T = std::decay_t<decltype(value)>; + if constexpr (sizeof(T) <= sizeof(uintptr_t)) { + return caller(value); + } else { + return caller(reinterpret_cast<TObjectHolder<T>&>(value)); + } + }; + switch (type) { + case EType::STRING: return wrapper(reinterpret_cast<TString&>(value)); + case EType::ROPE_CHUNK_BACKEND: return wrapper(reinterpret_cast<IRopeChunkBackend::TPtr&>(value)); + } + Y_FAIL("Unexpected type# %" PRIu64, static_cast<ui64>(type)); + } + + template<typename TCallback> + static std::invoke_result_t<TCallback, EType, TString&> Visit(uintptr_t value, TCallback&& callback) { + return VisitRaw(value, [&](EType type, auto& value) { + return std::invoke(std::forward<TCallback>(callback), type, Unwrap(value)); + }); + } + + template<typename T> static T& Unwrap(T& object) { return object; } + template<typename T> static T& Unwrap(TObjectHolder<T>& holder) { return holder.Object->Value; } + + static uintptr_t Clone(uintptr_t value) { + return VisitRaw(value, [](EType type, auto& value) { return Construct(type, value); }); + } + + static void Destroy(uintptr_t value) { + VisitRaw(value, [](EType, auto& value) { CallDtor(value); }); + } + + template<typename T> + static void CallDtor(T& value) { + value.~T(); + } + }; + + TBackend Backend; // who actually holds the data + const char *Begin; // data start + const char *End; // data end + + static constexpr struct TSlice {} Slice{}; + + template<typename T> + TChunk(T&& backend, const IRopeChunkBackend::TData& data) + : Backend(std::move(backend)) + , Begin(std::get<0>(data)) + , End(Begin + std::get<1>(data)) + { + Y_VERIFY_DEBUG(Begin != End); + } + + TChunk(TString s) + : Backend(std::move(s)) + { + size_t size; + std::tie(Begin, size) = Backend.GetData(); + End = Begin + size; + } + + TChunk(IRopeChunkBackend::TPtr backend) + : TChunk(backend, backend->GetData()) + {} + + TChunk(TSlice, const char *data, size_t size, const TChunk& from) + : TChunk(from.Backend, {data, size}) + {} + + TChunk(TSlice, const char *begin, const char *end, const TChunk& from) + : TChunk(Slice, begin, end - begin, from) + {} + + explicit TChunk(const TChunk& other) + : Backend(other.Backend) + , Begin(other.Begin) + , End(other.End) + {} + + TChunk(TChunk&& other) + : Backend(std::move(other.Backend)) + , Begin(other.Begin) + , End(other.End) + {} + + TChunk& operator =(const TChunk&) = default; + TChunk& operator =(TChunk&&) = default; + + size_t GetSize() const { + return End - Begin; + } + + static void Clear(TChunk& chunk) { + chunk.Begin = nullptr; + } + + static bool IsInUse(const TChunk& chunk) { + return chunk.Begin != nullptr; + } + + size_t GetCapacity() const { + return Backend.GetCapacity(); + } + }; + + using TChunkList = NRopeDetails::TChunkList<TChunk>; + +private: + // we use list here to store chain items as we have to keep valid iterators when erase/insert operations are invoked; + // iterator uses underlying container's iterator, so we have to use container that keeps valid iterators on delete, + // thus, the list + TChunkList Chain; + size_t Size = 0; + +private: + template<bool IsConst> + class TIteratorImpl { + using TTraits = NRopeDetails::TIteratorTraits<IsConst, TRope, TChunkList>; + + typename TTraits::TRopePtr Rope; + typename TTraits::TListIterator Iter; + const char *Ptr; // ptr is always nullptr when iterator is positioned at the rope end + +#ifndef NDEBUG + ui32 ValidityToken; +#endif + + private: + TIteratorImpl(typename TTraits::TRopePtr rope, typename TTraits::TListIterator iter, const char *ptr = nullptr) + : Rope(rope) + , Iter(iter) + , Ptr(ptr) +#ifndef NDEBUG + , ValidityToken(Rope->GetValidityToken()) +#endif + {} + + public: + TIteratorImpl() + : Rope(nullptr) + , Ptr(nullptr) + {} + + template<bool IsOtherConst> + TIteratorImpl(const TIteratorImpl<IsOtherConst>& other) + : Rope(other.Rope) + , Iter(other.Iter) + , Ptr(other.Ptr) +#ifndef NDEBUG + , ValidityToken(other.ValidityToken) +#endif + {} + + void CheckValid() const { +#ifndef NDEBUG + Y_VERIFY(ValidityToken == Rope->GetValidityToken()); +#endif + } + + TIteratorImpl& operator +=(size_t amount) { + CheckValid(); + + while (amount) { + Y_VERIFY_DEBUG(Valid()); + const size_t max = ContiguousSize(); + const size_t num = std::min(amount, max); + amount -= num; + Ptr += num; + if (Ptr == Iter->End) { + AdvanceToNextContiguousBlock(); + } + } + + return *this; + } + + TIteratorImpl operator +(size_t amount) const { + CheckValid(); + + return TIteratorImpl(*this) += amount; + } + + TIteratorImpl& operator -=(size_t amount) { + CheckValid(); + + while (amount) { + const size_t num = Ptr ? std::min<size_t>(amount, Ptr - Iter->Begin) : 0; + amount -= num; + Ptr -= num; + if (amount) { + Y_VERIFY_DEBUG(Iter != GetChainBegin()); + --Iter; + Ptr = Iter->End; + } + } + + return *this; + } + + TIteratorImpl operator -(size_t amount) const { + CheckValid(); + return TIteratorImpl(*this) -= amount; + } + + std::pair<const char*, size_t> operator *() const { + return {ContiguousData(), ContiguousSize()}; + } + + TIteratorImpl& operator ++() { + AdvanceToNextContiguousBlock(); + return *this; + } + + TIteratorImpl operator ++(int) const { + auto it(*this); + it.AdvanceToNextContiguousBlock(); + return it; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Operation with contiguous data + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // Get the pointer to the contiguous block of data; valid locations are [Data; Data + Size). + const char *ContiguousData() const { + CheckValid(); + return Ptr; + } + + // Get the amount of contiguous block. + size_t ContiguousSize() const { + CheckValid(); + return Ptr ? Iter->End - Ptr : 0; + } + + size_t ChunkOffset() const { + return Ptr ? Ptr - Iter->Begin : 0; + } + + // Advance to next contiguous block of data. + void AdvanceToNextContiguousBlock() { + CheckValid(); + Y_VERIFY_DEBUG(Valid()); + ++Iter; + Ptr = Iter != GetChainEnd() ? Iter->Begin : nullptr; + } + + // Extract some data and advance. Size is not checked here, to it must be provided valid. + void ExtractPlainDataAndAdvance(void *buffer, size_t len) { + CheckValid(); + + while (len) { + Y_VERIFY_DEBUG(Ptr); + + // calculate amount of bytes we need to move + const size_t max = ContiguousSize(); + const size_t num = std::min(len, max); + + // copy data to the buffer and advance buffer pointers + memcpy(buffer, Ptr, num); + buffer = static_cast<char*>(buffer) + num; + len -= num; + + // advance iterator itself + Ptr += num; + if (Ptr == Iter->End) { + AdvanceToNextContiguousBlock(); + } + } + } + + // Checks if the iterator points to the end of the rope or not. + bool Valid() const { + CheckValid(); + return Ptr; + } + + template<bool IsOtherConst> + bool operator ==(const TIteratorImpl<IsOtherConst>& other) const { + Y_VERIFY_DEBUG(Rope == other.Rope); + CheckValid(); + other.CheckValid(); + return Iter == other.Iter && Ptr == other.Ptr; + } + + template<bool IsOtherConst> + bool operator !=(const TIteratorImpl<IsOtherConst>& other) const { + CheckValid(); + other.CheckValid(); + return !(*this == other); + } + + private: + friend class TRope; + + typename TTraits::TListIterator operator ->() const { + CheckValid(); + return Iter; + } + + const TChunk& GetChunk() const { + CheckValid(); + return *Iter; + } + + typename TTraits::TListIterator GetChainBegin() const { + CheckValid(); + return Rope->Chain.begin(); + } + + typename TTraits::TListIterator GetChainEnd() const { + CheckValid(); + return Rope->Chain.end(); + } + + bool PointsToChunkMiddle() const { + CheckValid(); + return Ptr && Ptr != Iter->Begin; + } + }; + +public: +#ifndef NDEBUG + ui32 ValidityToken = 0; + ui32 GetValidityToken() const { return ValidityToken; } + void InvalidateIterators() { ++ValidityToken; } +#else + void InvalidateIterators() {} +#endif + +public: + using TConstIterator = TIteratorImpl<true>; + using TIterator = TIteratorImpl<false>; + +public: + TRope() = default; + TRope(const TRope& rope) = default; + + TRope(TRope&& rope) + : Chain(std::move(rope.Chain)) + , Size(std::exchange(rope.Size, 0)) + { + rope.InvalidateIterators(); + } + + TRope(TString s) { + if (s) { + Size = s.size(); + s.reserve(32); + Chain.PutToEnd(std::move(s)); + } + } + + TRope(IRopeChunkBackend::TPtr item) { + std::tie(std::ignore, Size) = item->GetData(); + Chain.PutToEnd(std::move(item)); + } + + TRope(TConstIterator begin, TConstIterator end) { + Y_VERIFY_DEBUG(begin.Rope == end.Rope); + if (begin.Rope == this) { + TRope temp(begin, end); + *this = std::move(temp); + return; + } + + while (begin.Iter != end.Iter) { + const size_t size = begin.ContiguousSize(); + Chain.PutToEnd(TChunk::Slice, begin.ContiguousData(), size, begin.GetChunk()); + begin.AdvanceToNextContiguousBlock(); + Size += size; + } + + if (begin != end && end.PointsToChunkMiddle()) { + Chain.PutToEnd(TChunk::Slice, begin.Ptr, end.Ptr, begin.GetChunk()); + Size += end.Ptr - begin.Ptr; + } + } + + ~TRope() { + } + + // creates a copy of rope with chunks with inefficient storage ratio being copied with arena allocator + static TRope CopySpaceOptimized(TRope&& origin, size_t worstRatioPer1k, TRopeArena& arena); + + TRope& operator=(const TRope& other) { + Chain = other.Chain; + Size = other.Size; + return *this; + } + + TRope& operator=(TRope&& other) { + Chain = std::move(other.Chain); + Size = std::exchange(other.Size, 0); + InvalidateIterators(); + other.InvalidateIterators(); + return *this; + } + + size_t GetSize() const { + return Size; + } + + bool IsEmpty() const { + return !Size; + } + + operator bool() const { + return Chain; + } + + TIterator Begin() { + return *this ? TIterator(this, Chain.begin(), Chain.GetFirstChunk().Begin) : End(); + } + + TIterator End() { + return TIterator(this, Chain.end()); + } + + TIterator Iterator(TChunkList::iterator it) { + return TIterator(this, it, it != Chain.end() ? it->Begin : nullptr); + } + + TIterator Position(size_t index) { + return Begin() + index; + } + + TConstIterator Begin() const { + return *this ? TConstIterator(this, Chain.begin(), Chain.GetFirstChunk().Begin) : End(); + } + + TConstIterator End() const { + return TConstIterator(this, Chain.end()); + } + + TConstIterator Position(size_t index) const { + return Begin() + index; + } + + TConstIterator begin() const { return Begin(); } + TConstIterator end() const { return End(); } + + void Erase(TIterator begin, TIterator end) { + Cut(begin, end, nullptr); + } + + TRope Extract(TIterator begin, TIterator end) { + TRope res; + Cut(begin, end, &res); + return res; + } + + void ExtractFront(size_t num, TRope *dest) { + Y_VERIFY(Size >= num); + if (num == Size && !*dest) { + *dest = std::move(*this); + return; + } + Size -= num; + dest->Size += num; + TChunkList::iterator it, first = Chain.begin(); + for (it = first; num && num >= it->GetSize(); ++it) { + num -= it->GetSize(); + } + if (it != first) { + if (dest->Chain) { + auto& last = dest->Chain.GetLastChunk(); + if (last.Backend == first->Backend && last.End == first->Begin) { + last.End = first->End; + first = Chain.Erase(first); // TODO(alexvru): "it" gets invalidated here on some containers + } + } + dest->Chain.Splice(dest->Chain.end(), Chain, first, it); + } + if (num) { + auto it = Chain.begin(); + if (dest->Chain) { + auto& last = dest->Chain.GetLastChunk(); + if (last.Backend == first->Backend && last.End == first->Begin) { + first->Begin += num; + last.End = first->Begin; + return; + } + } + dest->Chain.PutToEnd(TChunk::Slice, it->Begin, it->Begin + num, *it); + it->Begin += num; + } + } + + void Insert(TIterator pos, TRope&& rope) { + Y_VERIFY_DEBUG(this == pos.Rope); + Y_VERIFY_DEBUG(this != &rope); + + if (!rope) { + return; // do nothing for empty rope + } + + // adjust size + Size += std::exchange(rope.Size, 0); + + // check if we have to split the block + if (pos.PointsToChunkMiddle()) { + pos.Iter = Chain.InsertBefore(pos.Iter, TChunk::Slice, pos->Begin, pos.Ptr, pos.GetChunk()); + ++pos.Iter; + pos->Begin = pos.Ptr; + } + + // perform glueing if possible + TChunk *ropeLeft = &rope.Chain.GetFirstChunk(); + TChunk *ropeRight = &rope.Chain.GetLastChunk(); + bool gluedLeft = false, gluedRight = false; + if (pos.Iter != Chain.begin()) { // glue left part whenever possible + // obtain iterator to previous chunk + auto prev(pos.Iter); + --prev; + if (prev->End == ropeLeft->Begin && prev->Backend == ropeLeft->Backend) { // it is glueable + prev->End = ropeLeft->End; + gluedLeft = true; + } + } + if (pos.Iter != Chain.end() && ropeRight->End == pos->Begin && ropeRight->Backend == pos->Backend) { + pos->Begin = ropeRight->Begin; + gluedRight = true; + } + if (gluedLeft) { + rope.Chain.EraseFront(); + } + if (gluedRight) { + if (rope) { + rope.Chain.EraseBack(); + } else { // it looks like double-glueing for the same chunk, we have to drop previous one + auto prev(pos.Iter); + --prev; + pos->Begin = prev->Begin; + pos.Iter = Chain.Erase(prev); + } + } + if (rope) { // insert remains + Chain.Splice(pos.Iter, rope.Chain, rope.Chain.begin(), rope.Chain.end()); + } + Y_VERIFY_DEBUG(!rope); + InvalidateIterators(); + } + + void EraseFront(size_t len) { + Y_VERIFY_DEBUG(Size >= len); + Size -= len; + + while (len) { + Y_VERIFY_DEBUG(Chain); + TChunk& item = Chain.GetFirstChunk(); + const size_t itemSize = item.GetSize(); + if (len >= itemSize) { + Chain.EraseFront(); + len -= itemSize; + } else { + item.Begin += len; + break; + } + } + + InvalidateIterators(); + } + + void EraseBack(size_t len) { + Y_VERIFY_DEBUG(Size >= len); + Size -= len; + + while (len) { + Y_VERIFY_DEBUG(Chain); + TChunk& item = Chain.GetLastChunk(); + const size_t itemSize = item.GetSize(); + if (len >= itemSize) { + Chain.EraseBack(); + len -= itemSize; + } else { + item.End -= len; + break; + } + } + + InvalidateIterators(); + } + + bool ExtractFrontPlain(void *buffer, size_t len) { + // check if we have enough data in the rope + if (Size < len) { + return false; + } + Size -= len; + while (len) { + auto& chunk = Chain.GetFirstChunk(); + const size_t num = Min(len, chunk.GetSize()); + memcpy(buffer, chunk.Begin, num); + buffer = static_cast<char*>(buffer) + num; + len -= num; + chunk.Begin += num; + if (chunk.Begin == chunk.End) { + Chain.Erase(Chain.begin()); + } + } + InvalidateIterators(); + return true; + } + + bool FetchFrontPlain(char **ptr, size_t *remain) { + const size_t num = Min(*remain, Size); + ExtractFrontPlain(*ptr, num); + *ptr += num; + *remain -= num; + return !*remain; + } + + static int Compare(const TRope& x, const TRope& y) { + TConstIterator xIter = x.Begin(), yIter = y.Begin(); + while (xIter.Valid() && yIter.Valid()) { + const size_t step = std::min(xIter.ContiguousSize(), yIter.ContiguousSize()); + if (int res = memcmp(xIter.ContiguousData(), yIter.ContiguousData(), step)) { + return res; + } + xIter += step; + yIter += step; + } + return xIter.Valid() - yIter.Valid(); + } + + // Use this method carefully -- it may significantly reduce performance when misused. + TString ConvertToString() const { + TString res = TString::Uninitialized(GetSize()); + Begin().ExtractPlainDataAndAdvance(res.Detach(), res.size()); + return res; + } + + TString DebugString() const { + TStringStream s; + s << "{Size# " << Size; + for (const auto& chunk : Chain) { + const char *data; + std::tie(data, std::ignore) = chunk.Backend.GetData(); + s << " [" << chunk.Begin - data << ", " << chunk.End - data << ")@" << chunk.Backend.UniqueId(); + } + s << "}"; + return s.Str(); + } + + friend bool operator==(const TRope& x, const TRope& y) { return Compare(x, y) == 0; } + friend bool operator!=(const TRope& x, const TRope& y) { return Compare(x, y) != 0; } + friend bool operator< (const TRope& x, const TRope& y) { return Compare(x, y) < 0; } + friend bool operator<=(const TRope& x, const TRope& y) { return Compare(x, y) <= 0; } + friend bool operator> (const TRope& x, const TRope& y) { return Compare(x, y) > 0; } + friend bool operator>=(const TRope& x, const TRope& y) { return Compare(x, y) >= 0; } + +private: + void Cut(TIterator begin, TIterator end, TRope *target) { + // ensure all iterators are belong to us + Y_VERIFY_DEBUG(this == begin.Rope && this == end.Rope); + + // if begin and end are equal, we do nothing -- checking this case allows us to find out that begin does not + // point to End(), for example + if (begin == end) { + return; + } + + auto addBlock = [&](const TChunk& from, const char *begin, const char *end) { + if (target) { + target->Chain.PutToEnd(TChunk::Slice, begin, end, from); + target->Size += end - begin; + } + Size -= end - begin; + }; + + // consider special case -- when begin and end point to the same block; in this case we have to split up this + // block into two parts + if (begin.Iter == end.Iter) { + addBlock(begin.GetChunk(), begin.Ptr, end.Ptr); + const char *firstChunkBegin = begin.PointsToChunkMiddle() ? begin->Begin : nullptr; + begin->Begin = end.Ptr; // this affects both begin and end iterator pointed values + if (firstChunkBegin) { + Chain.InsertBefore(begin.Iter, TChunk::Slice, firstChunkBegin, begin.Ptr, begin.GetChunk()); + } + } else { + // check the first iterator -- if it starts not from the begin of the block, we have to adjust end of the + // first block to match begin iterator and switch to next block + if (begin.PointsToChunkMiddle()) { + addBlock(begin.GetChunk(), begin.Ptr, begin->End); + begin->End = begin.Ptr; + begin.AdvanceToNextContiguousBlock(); + } + + // now drop full blocks + size_t rangeSize = 0; + for (auto it = begin.Iter; it != end.Iter; ++it) { + Y_VERIFY_DEBUG(it->GetSize()); + rangeSize += it->GetSize(); + } + if (rangeSize) { + if (target) { + end.Iter = target->Chain.Splice(target->Chain.end(), Chain, begin.Iter, end.Iter); + target->Size += rangeSize; + } else { + end.Iter = Chain.Erase(begin.Iter, end.Iter); + } + Size -= rangeSize; + } + + // and cut the last block if necessary + if (end.PointsToChunkMiddle()) { + addBlock(end.GetChunk(), end->Begin, end.Ptr); + end->Begin = end.Ptr; + } + } + + InvalidateIterators(); + } +}; + +class TRopeArena { + using TAllocateCallback = std::function<TIntrusivePtr<IRopeChunkBackend>()>; + + TAllocateCallback Allocator; + TRope Arena; + size_t Size = 0; + THashSet<const void*> AccountedBuffers; + +public: + TRopeArena(TAllocateCallback&& allocator) + : Allocator(std::move(allocator)) + {} + + TRope CreateRope(const void *buffer, size_t len) { + TRope res; + + while (len) { + if (Arena) { + auto iter = Arena.Begin(); + Y_VERIFY_DEBUG(iter.Valid()); + char *dest = const_cast<char*>(iter.ContiguousData()); + const size_t bytesToCopy = std::min(len, iter.ContiguousSize()); + memcpy(dest, buffer, bytesToCopy); + buffer = static_cast<const char*>(buffer) + bytesToCopy; + len -= bytesToCopy; + res.Insert(res.End(), Arena.Extract(Arena.Begin(), Arena.Position(bytesToCopy))); + } else { + Arena.Insert(Arena.End(), TRope(Allocator())); + } + } + + // align arena on 8-byte boundary + const size_t align = 8; + if (const size_t padding = Arena.GetSize() % align) { + Arena.EraseFront(padding); + } + + return res; + } + + size_t GetSize() const { + return Size; + } + + void AccountChunk(const TRope::TChunk& chunk) { + if (AccountedBuffers.insert(chunk.Backend.UniqueId()).second) { + Size += chunk.GetCapacity(); + } + } +}; + +struct TRopeUtils { + static void Memset(TRope::TConstIterator dst, char c, size_t size) { + while (size) { + Y_VERIFY_DEBUG(dst.Valid()); + size_t len = std::min(size, dst.ContiguousSize()); + memset(const_cast<char*>(dst.ContiguousData()), c, len); + dst += len; + size -= len; + } + } + + static void Memcpy(TRope::TConstIterator dst, TRope::TConstIterator src, size_t size) { + while (size) { + Y_VERIFY_DEBUG(dst.Valid() && src.Valid(), + "Invalid iterator in memcpy: dst.Valid() - %" PRIu32 ", src.Valid() - %" PRIu32, + (ui32)dst.Valid(), (ui32)src.Valid()); + size_t len = std::min(size, std::min(dst.ContiguousSize(), src.ContiguousSize())); + memcpy(const_cast<char*>(dst.ContiguousData()), src.ContiguousData(), len); + dst += len; + src += len; + size -= len; + } + } + + static void Memcpy(TRope::TConstIterator dst, const char* src, size_t size) { + while (size) { + Y_VERIFY_DEBUG(dst.Valid()); + size_t len = std::min(size, dst.ContiguousSize()); + memcpy(const_cast<char*>(dst.ContiguousData()), src, len); + size -= len; + dst += len; + src += len; + } + } + + static void Memcpy(char* dst, TRope::TConstIterator src, size_t size) { + while (size) { + Y_VERIFY_DEBUG(src.Valid()); + size_t len = std::min(size, src.ContiguousSize()); + memcpy(dst, src.ContiguousData(), len); + size -= len; + dst += len; + src += len; + } + } + + // copy less or equal to sizeBound bytes, until src is valid + static size_t SafeMemcpy(char* dst, TRope::TIterator src, size_t sizeBound) { + size_t origSize = sizeBound; + while (sizeBound && src.Valid()) { + size_t len = Min(sizeBound, src.ContiguousSize()); + memcpy(dst, src.ContiguousData(), len); + sizeBound -= len; + dst += len; + src += len; + } + return origSize - sizeBound; + } +}; + +template<size_t BLOCK, size_t ALIGN = 16> +class TRopeSlideView { + alignas(ALIGN) char Slide[BLOCK]; // use if distance from current point and next chunk is less than BLOCK + TRope::TIterator Position; // current position at rope + size_t Size; + char* Head; // points to data, it might be current rope chunk or Slide + +private: + void FillBlock() { + size_t chunkSize = Position.ContiguousSize(); + if (chunkSize >= BLOCK) { + Size = chunkSize; + Head = const_cast<char*>(Position.ContiguousData()); + } else { + Size = TRopeUtils::SafeMemcpy(Slide, Position, BLOCK); + Head = Slide; + } + } + +public: + TRopeSlideView(TRope::TIterator position) + : Position(position) + { + FillBlock(); + } + + TRopeSlideView(TRope &rope) + : TRopeSlideView(rope.Begin()) + {} + + // if view on slide then copy slide to rope + void FlushBlock() { + if (Head == Slide) { + TRopeUtils::Memcpy(Position, Head, Size); + } + } + + TRope::TIterator operator+=(size_t amount) { + Position += amount; + FillBlock(); + return Position; + } + + TRope::TIterator GetPosition() const { + return Position; + } + + char* GetHead() const { + return Head; + } + + ui8* GetUi8Head() const { + return reinterpret_cast<ui8*>(Head); + } + + size_t ContiguousSize() const { + return Size; + } + + bool IsOnChunk() const { + return Head != Slide; + } +}; + +inline TRope TRope::CopySpaceOptimized(TRope&& origin, size_t worstRatioPer1k, TRopeArena& arena) { + TRope res; + for (TChunk& chunk : origin.Chain) { + size_t ratio = chunk.GetSize() * 1024 / chunk.GetCapacity(); + if (ratio < 1024 - worstRatioPer1k) { + res.Insert(res.End(), arena.CreateRope(chunk.Begin, chunk.GetSize())); + } else { + res.Chain.PutToEnd(std::move(chunk)); + } + } + res.Size = origin.Size; + origin = TRope(); + for (const TChunk& chunk : res.Chain) { + arena.AccountChunk(chunk); + } + return res; +} + + +#if defined(WITH_VALGRIND) || defined(_msan_enabled_) + +inline void CheckRopeIsDefined(TRope::TConstIterator begin, ui64 size) { + while (size) { + ui64 contiguousSize = Min(size, begin.ContiguousSize()); +# if defined(WITH_VALGRIND) + VALGRIND_CHECK_MEM_IS_DEFINED(begin.ContiguousData(), contiguousSize); +# endif +# if defined(_msan_enabled_) + NSan::CheckMemIsInitialized(begin.ContiguousData(), contiguousSize); +# endif + size -= contiguousSize; + begin += contiguousSize; + } +} + +# define CHECK_ROPE_IS_DEFINED(begin, size) CheckRopeIsDefined(begin, size) + +#else + +# define CHECK_ROPE_IS_DEFINED(begin, size) do {} while (false) + +#endif diff --git a/library/cpp/actors/util/rope_cont_deque.h b/library/cpp/actors/util/rope_cont_deque.h new file mode 100644 index 0000000000..d1d122c49c --- /dev/null +++ b/library/cpp/actors/util/rope_cont_deque.h @@ -0,0 +1,181 @@ +#pragma once + +#include <library/cpp/containers/stack_vector/stack_vec.h> +#include <deque> + +namespace NRopeDetails { + +template<typename TChunk> +class TChunkList { + std::deque<TChunk> Chunks; + + static constexpr size_t MaxInplaceItems = 4; + using TInplace = TStackVec<TChunk, MaxInplaceItems>; + TInplace Inplace; + +private: + template<typename TChunksIt, typename TInplaceIt, typename TValue> + struct TIterator { + TChunksIt ChunksIt; + TInplaceIt InplaceIt; + + TIterator() = default; + + TIterator(TChunksIt chunksIt, TInplaceIt inplaceIt) + : ChunksIt(std::move(chunksIt)) + , InplaceIt(inplaceIt) + {} + + template<typename A, typename B, typename C> + TIterator(const TIterator<A, B, C>& other) + : ChunksIt(other.ChunksIt) + , InplaceIt(other.InplaceIt) + {} + + TIterator(const TIterator&) = default; + TIterator(TIterator&&) = default; + TIterator& operator =(const TIterator&) = default; + TIterator& operator =(TIterator&&) = default; + + TValue& operator *() const { return InplaceIt != TInplaceIt() ? *InplaceIt : *ChunksIt; } + TValue* operator ->() const { return InplaceIt != TInplaceIt() ? &*InplaceIt : &*ChunksIt; } + + TIterator& operator ++() { + if (InplaceIt != TInplaceIt()) { + ++InplaceIt; + } else { + ++ChunksIt; + } + return *this; + } + + TIterator& operator --() { + if (InplaceIt != TInplaceIt()) { + --InplaceIt; + } else { + --ChunksIt; + } + return *this; + } + + template<typename A, typename B, typename C> + bool operator ==(const TIterator<A, B, C>& other) const { + return ChunksIt == other.ChunksIt && InplaceIt == other.InplaceIt; + } + + template<typename A, typename B, typename C> + bool operator !=(const TIterator<A, B, C>& other) const { + return ChunksIt != other.ChunksIt || InplaceIt != other.InplaceIt; + } + }; + +public: + using iterator = TIterator<typename std::deque<TChunk>::iterator, typename TInplace::iterator, TChunk>; + using const_iterator = TIterator<typename std::deque<TChunk>::const_iterator, typename TInplace::const_iterator, const TChunk>; + +public: + TChunkList() = default; + TChunkList(const TChunkList& other) = default; + TChunkList(TChunkList&& other) = default; + TChunkList& operator=(const TChunkList& other) = default; + TChunkList& operator=(TChunkList&& other) = default; + + template<typename... TArgs> + void PutToEnd(TArgs&&... args) { + InsertBefore(end(), std::forward<TArgs>(args)...); + } + + template<typename... TArgs> + iterator InsertBefore(iterator pos, TArgs&&... args) { + if (!Inplace) { + pos.InplaceIt = Inplace.end(); + } + if (Chunks.empty() && Inplace.size() < MaxInplaceItems) { + return {{}, Inplace.emplace(pos.InplaceIt, std::forward<TArgs>(args)...)}; + } else { + if (Inplace) { + Y_VERIFY_DEBUG(Chunks.empty()); + for (auto& item : Inplace) { + Chunks.push_back(std::move(item)); + } + pos.ChunksIt = pos.InplaceIt - Inplace.begin() + Chunks.begin(); + Inplace.clear(); + } + return {Chunks.emplace(pos.ChunksIt, std::forward<TArgs>(args)...), {}}; + } + } + + iterator Erase(iterator pos) { + if (Inplace) { + return {{}, Inplace.erase(pos.InplaceIt)}; + } else { + return {Chunks.erase(pos.ChunksIt), {}}; + } + } + + iterator Erase(iterator first, iterator last) { + if (Inplace) { + return {{}, Inplace.erase(first.InplaceIt, last.InplaceIt)}; + } else { + return {Chunks.erase(first.ChunksIt, last.ChunksIt), {}}; + } + } + + void EraseFront() { + if (Inplace) { + Inplace.erase(Inplace.begin()); + } else { + Chunks.pop_front(); + } + } + + void EraseBack() { + if (Inplace) { + Inplace.pop_back(); + } else { + Chunks.pop_back(); + } + } + + iterator Splice(iterator pos, TChunkList& from, iterator first, iterator last) { + if (!Inplace) { + pos.InplaceIt = Inplace.end(); + } + size_t n = 0; + for (auto it = first; it != last; ++it, ++n) + {} + if (Chunks.empty() && Inplace.size() + n <= MaxInplaceItems) { + if (first.InplaceIt != typename TInplace::iterator()) { + Inplace.insert(pos.InplaceIt, first.InplaceIt, last.InplaceIt); + } else { + Inplace.insert(pos.InplaceIt, first.ChunksIt, last.ChunksIt); + } + } else { + if (Inplace) { + Y_VERIFY_DEBUG(Chunks.empty()); + for (auto& item : Inplace) { + Chunks.push_back(std::move(item)); + } + pos.ChunksIt = pos.InplaceIt - Inplace.begin() + Chunks.begin(); + Inplace.clear(); + } + if (first.InplaceIt != typename TInplace::iterator()) { + Chunks.insert(pos.ChunksIt, first.InplaceIt, last.InplaceIt); + } else { + Chunks.insert(pos.ChunksIt, first.ChunksIt, last.ChunksIt); + } + } + return from.Erase(first, last); + } + + operator bool() const { return !Inplace.empty() || !Chunks.empty(); } + TChunk& GetFirstChunk() { return Inplace ? Inplace.front() : Chunks.front(); } + const TChunk& GetFirstChunk() const { return Inplace ? Inplace.front() : Chunks.front(); } + TChunk& GetLastChunk() { return Inplace ? Inplace.back() : Chunks.back(); } + iterator begin() { return {Chunks.begin(), Inplace ? Inplace.begin() : typename TInplace::iterator()}; } + const_iterator begin() const { return {Chunks.begin(), Inplace ? Inplace.begin() : typename TInplace::const_iterator()}; } + iterator end() { return {Chunks.end(), Inplace ? Inplace.end() : typename TInplace::iterator()}; } + const_iterator end() const { return {Chunks.end(), Inplace ? Inplace.end() : typename TInplace::const_iterator()}; } +}; + +} // NRopeDetails diff --git a/library/cpp/actors/util/rope_cont_list.h b/library/cpp/actors/util/rope_cont_list.h new file mode 100644 index 0000000000..18c136284e --- /dev/null +++ b/library/cpp/actors/util/rope_cont_list.h @@ -0,0 +1,159 @@ +#pragma once + +#include <util/generic/intrlist.h> + +namespace NRopeDetails { + +template<typename TChunk> +class TChunkList { + struct TItem : TIntrusiveListItem<TItem>, TChunk { + // delegating constructor + template<typename... TArgs> TItem(TArgs&&... args) : TChunk(std::forward<TArgs>(args)...) {} + }; + + using TList = TIntrusiveList<TItem>; + TList List; + + static constexpr size_t NumInplaceItems = 2; + char InplaceItems[sizeof(TItem) * NumInplaceItems]; + + template<typename... TArgs> + TItem *AllocateItem(TArgs&&... args) { + for (size_t index = 0; index < NumInplaceItems; ++index) { + TItem *chunk = GetInplaceItemPtr(index); + if (!TItem::IsInUse(*chunk)) { + return new(chunk) TItem(std::forward<TArgs>(args)...); + } + } + return new TItem(std::forward<TArgs>(args)...); + } + + void ReleaseItem(TItem *chunk) { + if (IsInplaceItem(chunk)) { + chunk->~TItem(); + TItem::Clear(*chunk); + } else { + delete chunk; + } + } + + void ReleaseItems(TList& list) { + while (list) { + ReleaseItem(list.Front()); + } + } + + void Prepare() { + for (size_t index = 0; index < NumInplaceItems; ++index) { + TItem::Clear(*GetInplaceItemPtr(index)); + } + } + + TItem *GetInplaceItemPtr(size_t index) { return reinterpret_cast<TItem*>(InplaceItems + index * sizeof(TItem)); } + bool IsInplaceItem(TItem *chunk) { return chunk >= GetInplaceItemPtr(0) && chunk < GetInplaceItemPtr(NumInplaceItems); } + +public: + using iterator = typename TList::iterator; + using const_iterator = typename TList::const_iterator; + +public: + TChunkList() { + Prepare(); + } + + ~TChunkList() { + ReleaseItems(List); +#ifndef NDEBUG + for (size_t index = 0; index < NumInplaceItems; ++index) { + Y_VERIFY(!TItem::IsInUse(*GetInplaceItemPtr(index))); + } +#endif + } + + TChunkList(const TChunkList& other) { + Prepare(); + for (const TItem& chunk : other.List) { + PutToEnd(TChunk(chunk)); + } + } + + TChunkList(TChunkList&& other) { + Prepare(); + Splice(end(), other, other.begin(), other.end()); + } + + TChunkList& operator=(const TChunkList& other) { + if (this != &other) { + ReleaseItems(List); + for (const TItem& chunk : other.List) { + PutToEnd(TChunk(chunk)); + } + } + return *this; + } + + TChunkList& operator=(TChunkList&& other) { + if (this != &other) { + ReleaseItems(List); + Splice(end(), other, other.begin(), other.end()); + } + return *this; + } + + template<typename... TArgs> + void PutToEnd(TArgs&&... args) { + InsertBefore(end(), std::forward<TArgs>(args)...); + } + + template<typename... TArgs> + iterator InsertBefore(iterator pos, TArgs&&... args) { + TItem *item = AllocateItem<TArgs...>(std::forward<TArgs>(args)...); + item->LinkBefore(pos.Item()); + return item; + } + + iterator Erase(iterator pos) { + ReleaseItem(&*pos++); + return pos; + } + + iterator Erase(iterator first, iterator last) { + TList temp; + TList::Cut(first, last, temp.end()); + ReleaseItems(temp); + return last; + } + + void EraseFront() { + ReleaseItem(List.PopFront()); + } + + void EraseBack() { + ReleaseItem(List.PopBack()); + } + + iterator Splice(iterator pos, TChunkList& from, iterator first, iterator last) { + for (auto it = first; it != last; ) { + if (from.IsInplaceItem(&*it)) { + TList::Cut(first, it, pos); + InsertBefore(pos, std::move(*it)); + it = first = from.Erase(it); + } else { + ++it; + } + } + TList::Cut(first, last, pos); + return last; + } + + operator bool() const { return static_cast<bool>(List); } + TChunk& GetFirstChunk() { return *List.Front(); } + const TChunk& GetFirstChunk() const { return *List.Front(); } + TChunk& GetLastChunk() { return *List.Back(); } + iterator begin() { return List.begin(); } + const_iterator begin() const { return List.begin(); } + iterator end() { return List.end(); } + const_iterator end() const { return List.end(); } +}; + +} // NRopeDetails diff --git a/library/cpp/actors/util/rope_ut.cpp b/library/cpp/actors/util/rope_ut.cpp new file mode 100644 index 0000000000..cabeed9230 --- /dev/null +++ b/library/cpp/actors/util/rope_ut.cpp @@ -0,0 +1,231 @@ +#include "rope.h" +#include <library/cpp/testing/unittest/registar.h> +#include <util/random/random.h> + +class TRopeStringBackend : public IRopeChunkBackend { + TString Buffer; + +public: + TRopeStringBackend(TString buffer) + : Buffer(std::move(buffer)) + {} + + TData GetData() const override { + return {Buffer.data(), Buffer.size()}; + } + + size_t GetCapacity() const override { + return Buffer.capacity(); + } +}; + +TRope CreateRope(TString s, size_t sliceSize) { + TRope res; + for (size_t i = 0; i < s.size(); ) { + size_t len = std::min(sliceSize, s.size() - i); + if (i % 2) { + res.Insert(res.End(), TRope(MakeIntrusive<TRopeStringBackend>(s.substr(i, len)))); + } else { + res.Insert(res.End(), TRope(s.substr(i, len))); + } + i += len; + } + return res; +} + +TString RopeToString(const TRope& rope) { + TString res; + auto iter = rope.Begin(); + while (iter != rope.End()) { + res.append(iter.ContiguousData(), iter.ContiguousSize()); + iter.AdvanceToNextContiguousBlock(); + } + + UNIT_ASSERT_VALUES_EQUAL(rope.GetSize(), res.size()); + + TString temp = TString::Uninitialized(rope.GetSize()); + rope.Begin().ExtractPlainDataAndAdvance(temp.Detach(), temp.size()); + UNIT_ASSERT_VALUES_EQUAL(temp, res); + + return res; +} + +TString Text = "No elements are copied or moved, only the internal pointers of the list nodes are re-pointed."; + +Y_UNIT_TEST_SUITE(TRope) { + + Y_UNIT_TEST(Leak) { + const size_t begin = 10, end = 20; + TRope rope = CreateRope(Text, 10); + rope.Erase(rope.Begin() + begin, rope.Begin() + end); + } + + Y_UNIT_TEST(BasicRange) { + TRope rope = CreateRope(Text, 10); + for (size_t begin = 0; begin < Text.size(); ++begin) { + for (size_t end = begin; end <= Text.size(); ++end) { + TRope::TIterator rBegin = rope.Begin() + begin; + TRope::TIterator rEnd = rope.Begin() + end; + UNIT_ASSERT_VALUES_EQUAL(RopeToString(TRope(rBegin, rEnd)), Text.substr(begin, end - begin)); + } + } + } + + Y_UNIT_TEST(Erase) { + for (size_t begin = 0; begin < Text.size(); ++begin) { + for (size_t end = begin; end <= Text.size(); ++end) { + TRope rope = CreateRope(Text, 10); + rope.Erase(rope.Begin() + begin, rope.Begin() + end); + TString text = Text; + text.erase(text.begin() + begin, text.begin() + end); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), text); + } + } + } + + Y_UNIT_TEST(Insert) { + TRope rope = CreateRope(Text, 10); + for (size_t begin = 0; begin < Text.size(); ++begin) { + for (size_t end = begin; end <= Text.size(); ++end) { + TRope part = TRope(rope.Begin() + begin, rope.Begin() + end); + for (size_t where = 0; where <= Text.size(); ++where) { + TRope x(rope); + x.Insert(x.Begin() + where, TRope(part)); + UNIT_ASSERT_VALUES_EQUAL(x.GetSize(), rope.GetSize() + part.GetSize()); + TString text = Text; + text.insert(text.begin() + where, Text.begin() + begin, Text.begin() + end); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(x), text); + } + } + } + } + + Y_UNIT_TEST(Extract) { + for (size_t begin = 0; begin < Text.size(); ++begin) { + for (size_t end = begin; end <= Text.size(); ++end) { + TRope rope = CreateRope(Text, 10); + TRope part = rope.Extract(rope.Begin() + begin, rope.Begin() + end); + TString text = Text; + text.erase(text.begin() + begin, text.begin() + end); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), text); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(part), Text.substr(begin, end - begin)); + } + } + } + + Y_UNIT_TEST(EraseFront) { + for (size_t pos = 0; pos <= Text.size(); ++pos) { + TRope rope = CreateRope(Text, 10); + rope.EraseFront(pos); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), Text.substr(pos)); + } + } + + Y_UNIT_TEST(EraseBack) { + for (size_t pos = 0; pos <= Text.size(); ++pos) { + TRope rope = CreateRope(Text, 10); + rope.EraseBack(pos); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), Text.substr(0, Text.size() - pos)); + } + } + + Y_UNIT_TEST(ExtractFront) { + for (size_t step = 1; step <= Text.size(); ++step) { + TRope rope = CreateRope(Text, 10); + TRope out; + while (const size_t len = Min(step, rope.GetSize())) { + rope.ExtractFront(len, &out); + UNIT_ASSERT(rope.GetSize() + out.GetSize() == Text.size()); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(out), Text.substr(0, out.GetSize())); + } + } + } + + Y_UNIT_TEST(ExtractFrontPlain) { + for (size_t step = 1; step <= Text.size(); ++step) { + TRope rope = CreateRope(Text, 10); + TString buffer = Text; + auto it = rope.Begin(); + size_t remain = rope.GetSize(); + while (const size_t len = Min(step, remain)) { + TString data = TString::Uninitialized(len); + it.ExtractPlainDataAndAdvance(data.Detach(), data.size()); + UNIT_ASSERT_VALUES_EQUAL(data, buffer.substr(0, len)); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(TRope(it, rope.End())), buffer.substr(len)); + buffer = buffer.substr(len); + remain -= len; + } + } + } + + Y_UNIT_TEST(FetchFrontPlain) { + char s[10]; + char *data = s; + size_t remain = sizeof(s); + TRope rope = TRope(TString("HELLO")); + UNIT_ASSERT(!rope.FetchFrontPlain(&data, &remain)); + UNIT_ASSERT(!rope); + rope.Insert(rope.End(), TRope(TString("WORLD!!!"))); + UNIT_ASSERT(rope.FetchFrontPlain(&data, &remain)); + UNIT_ASSERT(!remain); + UNIT_ASSERT(rope.GetSize() == 3); + UNIT_ASSERT_VALUES_EQUAL(rope.ConvertToString(), "!!!"); + UNIT_ASSERT(!strncmp(s, "HELLOWORLD", 10)); + } + + Y_UNIT_TEST(Glueing) { + TRope rope = CreateRope(Text, 10); + for (size_t begin = 0; begin <= Text.size(); ++begin) { + for (size_t end = begin; end <= Text.size(); ++end) { + TString repr = rope.DebugString(); + TRope temp = rope.Extract(rope.Position(begin), rope.Position(end)); + rope.Insert(rope.Position(begin), std::move(temp)); + UNIT_ASSERT_VALUES_EQUAL(repr, rope.DebugString()); + UNIT_ASSERT_VALUES_EQUAL(RopeToString(rope), Text); + } + } + } + + Y_UNIT_TEST(IterWalk) { + TRope rope = CreateRope(Text, 10); + for (size_t step1 = 0; step1 <= rope.GetSize(); ++step1) { + for (size_t step2 = 0; step2 <= step1; ++step2) { + TRope::TConstIterator iter = rope.Begin(); + iter += step1; + iter -= step2; + UNIT_ASSERT(iter == rope.Position(step1 - step2)); + } + } + } + + Y_UNIT_TEST(Compare) { + auto check = [](const TString& x, const TString& y) { + const TRope xRope = CreateRope(x, 7); + const TRope yRope = CreateRope(y, 11); + UNIT_ASSERT_VALUES_EQUAL(xRope == yRope, x == y); + UNIT_ASSERT_VALUES_EQUAL(xRope != yRope, x != y); + UNIT_ASSERT_VALUES_EQUAL(xRope < yRope, x < y); + UNIT_ASSERT_VALUES_EQUAL(xRope <= yRope, x <= y); + UNIT_ASSERT_VALUES_EQUAL(xRope > yRope, x > y); + UNIT_ASSERT_VALUES_EQUAL(xRope >= yRope, x >= y); + }; + + TVector<TString> pool; + for (size_t k = 0; k < 10; ++k) { + size_t len = RandomNumber<size_t>(100) + 100; + TString s = TString::Uninitialized(len); + char *p = s.Detach(); + for (size_t j = 0; j < len; ++j) { + *p++ = RandomNumber<unsigned char>(); + } + pool.push_back(std::move(s)); + } + + for (const TString& x : pool) { + for (const TString& y : pool) { + check(x, y); + } + } + } + +} diff --git a/library/cpp/actors/util/should_continue.cpp b/library/cpp/actors/util/should_continue.cpp new file mode 100644 index 0000000000..258e6a0aff --- /dev/null +++ b/library/cpp/actors/util/should_continue.cpp @@ -0,0 +1,23 @@ +#include "should_continue.h" + +void TProgramShouldContinue::ShouldRestart() { + AtomicSet(State, Restart); +} + +void TProgramShouldContinue::ShouldStop(int returnCode) { + AtomicSet(ReturnCode, returnCode); + AtomicSet(State, Stop); +} + +TProgramShouldContinue::EState TProgramShouldContinue::PollState() { + return static_cast<EState>(AtomicGet(State)); +} + +int TProgramShouldContinue::GetReturnCode() { + return static_cast<int>(AtomicGet(ReturnCode)); +} + +void TProgramShouldContinue::Reset() { + AtomicSet(ReturnCode, 0); + AtomicSet(State, Continue); +} diff --git a/library/cpp/actors/util/should_continue.h b/library/cpp/actors/util/should_continue.h new file mode 100644 index 0000000000..76acc40dc4 --- /dev/null +++ b/library/cpp/actors/util/should_continue.h @@ -0,0 +1,22 @@ +#pragma once +#include "defs.h" + +class TProgramShouldContinue { +public: + enum EState { + Continue, + Stop, + Restart, + }; + + void ShouldRestart(); + void ShouldStop(int returnCode = 0); + + EState PollState(); + int GetReturnCode(); + + void Reset(); +private: + TAtomic ReturnCode = 0; + TAtomic State = Continue; +}; diff --git a/library/cpp/actors/util/thread.h b/library/cpp/actors/util/thread.h new file mode 100644 index 0000000000..d742c8c585 --- /dev/null +++ b/library/cpp/actors/util/thread.h @@ -0,0 +1,26 @@ +#pragma once + +#include <util/generic/strbuf.h> +#include <util/stream/str.h> +#include <util/system/execpath.h> +#include <util/system/thread.h> +#include <util/system/thread.h> +#include <time.h> + +inline void SetCurrentThreadName(const TString& name, + const ui32 maxCharsFromProcessName = 8) { +#if defined(_linux_) + // linux limits threadname by 15 + \0 + + TStringBuf procName(GetExecPath()); + procName = procName.RNextTok('/'); + procName = procName.SubStr(0, maxCharsFromProcessName); + + TStringStream linuxName; + linuxName << procName << "." << name; + TThread::SetCurrentThreadName(linuxName.Str().data()); +#else + Y_UNUSED(maxCharsFromProcessName); + TThread::SetCurrentThreadName(name.data()); +#endif +} diff --git a/library/cpp/actors/util/threadparkpad.cpp b/library/cpp/actors/util/threadparkpad.cpp new file mode 100644 index 0000000000..74069ff15b --- /dev/null +++ b/library/cpp/actors/util/threadparkpad.cpp @@ -0,0 +1,148 @@ +#include "threadparkpad.h" +#include <util/system/winint.h> + +#ifdef _linux_ + +#include "futex.h" + +namespace NActors { + class TThreadParkPad::TImpl { + volatile bool Interrupted; + int Futex; + + public: + TImpl() + : Interrupted(false) + , Futex(0) + { + } + ~TImpl() { + } + + bool Park() noexcept { + __atomic_fetch_sub(&Futex, 1, __ATOMIC_SEQ_CST); + while (__atomic_load_n(&Futex, __ATOMIC_ACQUIRE) == -1) + SysFutex(&Futex, FUTEX_WAIT_PRIVATE, -1, nullptr, nullptr, 0); + return IsInterrupted(); + } + + void Unpark() noexcept { + const int old = __atomic_fetch_add(&Futex, 1, __ATOMIC_SEQ_CST); + if (old == -1) + SysFutex(&Futex, FUTEX_WAKE_PRIVATE, -1, nullptr, nullptr, 0); + } + + void Interrupt() noexcept { + __atomic_store_n(&Interrupted, true, __ATOMIC_SEQ_CST); + Unpark(); + } + + bool IsInterrupted() const noexcept { + return __atomic_load_n(&Interrupted, __ATOMIC_ACQUIRE); + } + }; + +#elif defined _win32_ +#include <util/generic/bt_exception.h> +#include <util/generic/yexception.h> + +namespace NActors { + class TThreadParkPad::TImpl { + TAtomic Interrupted; + HANDLE EvHandle; + + public: + TImpl() + : Interrupted(false) + { + EvHandle = ::CreateEvent(0, false, false, 0); + if (!EvHandle) + ythrow TWithBackTrace<yexception>() << "::CreateEvent failed"; + } + ~TImpl() { + if (EvHandle) + ::CloseHandle(EvHandle); + } + + bool Park() noexcept { + ::WaitForSingleObject(EvHandle, INFINITE); + return AtomicGet(Interrupted); + } + + void Unpark() noexcept { + ::SetEvent(EvHandle); + } + + void Interrupt() noexcept { + AtomicSet(Interrupted, true); + Unpark(); + } + + bool IsInterrupted() const noexcept { + return AtomicGet(Interrupted); + } + }; + +#else + +#include <util/system/event.h> + +namespace NActors { + class TThreadParkPad::TImpl { + TAtomic Interrupted; + TSystemEvent Ev; + + public: + TImpl() + : Interrupted(false) + , Ev(TSystemEvent::rAuto) + { + } + ~TImpl() { + } + + bool Park() noexcept { + Ev.Wait(); + return AtomicGet(Interrupted); + } + + void Unpark() noexcept { + Ev.Signal(); + } + + void Interrupt() noexcept { + AtomicSet(Interrupted, true); + Unpark(); + } + + bool IsInterrupted() const noexcept { + return AtomicGet(Interrupted); + } + }; +#endif + + TThreadParkPad::TThreadParkPad() + : Impl(new TThreadParkPad::TImpl()) + { + } + + TThreadParkPad::~TThreadParkPad() { + } + + bool TThreadParkPad::Park() noexcept { + return Impl->Park(); + } + + void TThreadParkPad::Unpark() noexcept { + Impl->Unpark(); + } + + void TThreadParkPad::Interrupt() noexcept { + Impl->Interrupt(); + } + + bool TThreadParkPad::Interrupted() const noexcept { + return Impl->IsInterrupted(); + } + +} diff --git a/library/cpp/actors/util/threadparkpad.h b/library/cpp/actors/util/threadparkpad.h new file mode 100644 index 0000000000..5b574ccf34 --- /dev/null +++ b/library/cpp/actors/util/threadparkpad.h @@ -0,0 +1,21 @@ +#pragma once + +#include <util/generic/ptr.h> + +namespace NActors { + class TThreadParkPad { + private: + class TImpl; + THolder<TImpl> Impl; + + public: + TThreadParkPad(); + ~TThreadParkPad(); + + bool Park() noexcept; + void Unpark() noexcept; + void Interrupt() noexcept; + bool Interrupted() const noexcept; + }; + +} diff --git a/library/cpp/actors/util/ticket_lock.h b/library/cpp/actors/util/ticket_lock.h new file mode 100644 index 0000000000..3b1fa80393 --- /dev/null +++ b/library/cpp/actors/util/ticket_lock.h @@ -0,0 +1,48 @@ +#pragma once + +#include "intrinsics.h" +#include <util/system/guard.h> +#include <util/system/yassert.h> + +class TTicketLock : TNonCopyable { + ui32 TicketIn; + ui32 TicketOut; + +public: + TTicketLock() + : TicketIn(0) + , TicketOut(0) + { + } + + void Release() noexcept { + AtomicUi32Increment(&TicketOut); + } + + ui32 Acquire() noexcept { + ui32 revolves = 0; + const ui32 ticket = AtomicUi32Increment(&TicketIn) - 1; + while (ticket != AtomicLoad(&TicketOut)) { + Y_VERIFY_DEBUG(ticket >= AtomicLoad(&TicketOut)); + SpinLockPause(); + ++revolves; + } + return revolves; + } + + bool TryAcquire() noexcept { + const ui32 x = AtomicLoad(&TicketOut); + if (x == AtomicLoad(&TicketIn) && AtomicUi32Cas(&TicketIn, x + 1, x)) + return true; + else + return false; + } + + bool IsLocked() noexcept { + const ui32 ticketIn = AtomicLoad(&TicketIn); + const ui32 ticketOut = AtomicLoad(&TicketOut); + return (ticketIn != ticketOut); + } + + typedef ::TGuard<TTicketLock> TGuard; +}; diff --git a/library/cpp/actors/util/timerfd.h b/library/cpp/actors/util/timerfd.h new file mode 100644 index 0000000000..3189e2a672 --- /dev/null +++ b/library/cpp/actors/util/timerfd.h @@ -0,0 +1,65 @@ +#pragma once + +#include "datetime.h" + +#include <util/generic/noncopyable.h> + +#ifdef _linux_ + +#include <util/system/yassert.h> +#include <errno.h> +#include <sys/timerfd.h> + +struct TTimerFd: public TNonCopyable { + int Fd; + + TTimerFd() { + Fd = timerfd_create(CLOCK_MONOTONIC, 0); + Y_VERIFY(Fd != -1, "timerfd_create(CLOCK_MONOTONIC, 0) -> -1; errno:%d: %s", int(errno), strerror(errno)); + } + + ~TTimerFd() { + close(Fd); + } + + void Set(ui64 ts) { + ui64 now = GetCycleCountFast(); + Arm(now >= ts? 1: NHPTimer::GetSeconds(ts - now) * 1e9); + } + + void Reset() { + Arm(0); // disarm timer + } + + void Wait() { + ui64 expirations; + ssize_t s = read(Fd, &expirations, sizeof(ui64)); + Y_UNUSED(s); // Y_VERIFY(s == sizeof(ui64)); + } + + void Wake() { + Arm(1); + } +private: + void Arm(ui64 ns) { + struct itimerspec spec; + spec.it_value.tv_sec = ns / 1'000'000'000; + spec.it_value.tv_nsec = ns % 1'000'000'000; + spec.it_interval.tv_sec = 0; + spec.it_interval.tv_nsec = 0; + int ret = timerfd_settime(Fd, 0, &spec, nullptr); + Y_VERIFY(ret != -1, "timerfd_settime(%d, 0, %" PRIu64 "ns, 0) -> %d; errno:%d: %s", Fd, ns, ret, int(errno), strerror(errno)); + } +}; + +#else + +struct TTimerFd: public TNonCopyable { + int Fd = 0; + void Set(ui64) {} + void Reset() {} + void Wait() {} + void Wake() {} +}; + +#endif diff --git a/library/cpp/actors/util/unordered_cache.h b/library/cpp/actors/util/unordered_cache.h new file mode 100644 index 0000000000..76f036c0cf --- /dev/null +++ b/library/cpp/actors/util/unordered_cache.h @@ -0,0 +1,201 @@ +#pragma once + +#include "defs.h" +#include "queue_chunk.h" + +template <typename T, ui32 Size = 512, ui32 ConcurrencyFactor = 1, typename TChunk = TQueueChunk<T, Size>> +class TUnorderedCache : TNonCopyable { + static_assert(std::is_integral<T>::value || std::is_pointer<T>::value, "expect std::is_integral<T>::value || std::is_pointer<T>::value"); + +public: + static constexpr ui32 Concurrency = ConcurrencyFactor * 4; + +private: + struct TReadSlot { + TChunk* volatile ReadFrom; + volatile ui32 ReadPosition; + char Padding[64 - sizeof(TChunk*) - sizeof(ui32)]; // 1 slot per cache line + }; + + struct TWriteSlot { + TChunk* volatile WriteTo; + volatile ui32 WritePosition; + char Padding[64 - sizeof(TChunk*) - sizeof(ui32)]; // 1 slot per cache line + }; + + static_assert(sizeof(TReadSlot) == 64, "expect sizeof(TReadSlot) == 64"); + static_assert(sizeof(TWriteSlot) == 64, "expect sizeof(TWriteSlot) == 64"); + +private: + TReadSlot ReadSlots[Concurrency]; + TWriteSlot WriteSlots[Concurrency]; + + static_assert(sizeof(TChunk*) == sizeof(TAtomic), "expect sizeof(TChunk*) == sizeof(TAtomic)"); + +private: + struct TLockedWriter { + TWriteSlot* Slot; + TChunk* WriteTo; + + TLockedWriter() + : Slot(nullptr) + , WriteTo(nullptr) + { } + + TLockedWriter(TWriteSlot* slot, TChunk* writeTo) + : Slot(slot) + , WriteTo(writeTo) + { } + + ~TLockedWriter() noexcept { + Drop(); + } + + void Drop() { + if (Slot) { + AtomicStore(&Slot->WriteTo, WriteTo); + Slot = nullptr; + } + } + + TLockedWriter(const TLockedWriter&) = delete; + TLockedWriter& operator=(const TLockedWriter&) = delete; + + TLockedWriter(TLockedWriter&& rhs) + : Slot(rhs.Slot) + , WriteTo(rhs.WriteTo) + { + rhs.Slot = nullptr; + } + + TLockedWriter& operator=(TLockedWriter&& rhs) { + if (Y_LIKELY(this != &rhs)) { + Drop(); + Slot = rhs.Slot; + WriteTo = rhs.WriteTo; + rhs.Slot = nullptr; + } + return *this; + } + }; + +private: + TLockedWriter LockWriter(ui64 writerRotation) { + ui32 cycle = 0; + for (;;) { + TWriteSlot* slot = &WriteSlots[writerRotation % Concurrency]; + if (AtomicLoad(&slot->WriteTo) != nullptr) { + if (TChunk* writeTo = AtomicSwap(&slot->WriteTo, nullptr)) { + return TLockedWriter(slot, writeTo); + } + } + ++writerRotation; + + // Do a spinlock pause after a full cycle + if (++cycle == Concurrency) { + SpinLockPause(); + cycle = 0; + } + } + } + + void WriteOne(TLockedWriter& lock, T x) { + Y_VERIFY_DEBUG(x != 0); + + const ui32 pos = AtomicLoad(&lock.Slot->WritePosition); + if (pos != TChunk::EntriesCount) { + AtomicStore(&lock.Slot->WritePosition, pos + 1); + AtomicStore(&lock.WriteTo->Entries[pos], x); + } else { + TChunk* next = new TChunk(); + AtomicStore(&next->Entries[0], x); + AtomicStore(&lock.Slot->WritePosition, 1u); + AtomicStore(&lock.WriteTo->Next, next); + lock.WriteTo = next; + } + } + +public: + TUnorderedCache() { + for (ui32 i = 0; i < Concurrency; ++i) { + ReadSlots[i].ReadFrom = new TChunk(); + ReadSlots[i].ReadPosition = 0; + + WriteSlots[i].WriteTo = ReadSlots[i].ReadFrom; + WriteSlots[i].WritePosition = 0; + } + } + + ~TUnorderedCache() { + Y_VERIFY(!Pop(0)); + + for (ui64 i = 0; i < Concurrency; ++i) { + if (ReadSlots[i].ReadFrom) { + delete ReadSlots[i].ReadFrom; + ReadSlots[i].ReadFrom = nullptr; + } + WriteSlots[i].WriteTo = nullptr; + } + } + + T Pop(ui64 readerRotation) noexcept { + ui64 readerIndex = readerRotation; + const ui64 endIndex = readerIndex + Concurrency; + for (; readerIndex != endIndex; ++readerIndex) { + TReadSlot* slot = &ReadSlots[readerIndex % Concurrency]; + if (AtomicLoad(&slot->ReadFrom) != nullptr) { + if (TChunk* readFrom = AtomicSwap(&slot->ReadFrom, nullptr)) { + const ui32 pos = AtomicLoad(&slot->ReadPosition); + if (pos != TChunk::EntriesCount) { + if (T ret = AtomicLoad(&readFrom->Entries[pos])) { + AtomicStore(&slot->ReadPosition, pos + 1); + AtomicStore(&slot->ReadFrom, readFrom); // release lock with same chunk + return ret; // found, return + } else { + AtomicStore(&slot->ReadFrom, readFrom); // release lock with same chunk + } + } else if (TChunk* next = AtomicLoad(&readFrom->Next)) { + if (T ret = AtomicLoad(&next->Entries[0])) { + AtomicStore(&slot->ReadPosition, 1u); + AtomicStore(&slot->ReadFrom, next); // release lock with next chunk + delete readFrom; + return ret; + } else { + AtomicStore(&slot->ReadPosition, 0u); + AtomicStore(&slot->ReadFrom, next); // release lock with new chunk + delete readFrom; + } + } else { + // nothing in old chunk and no next chunk, just release lock with old chunk + AtomicStore(&slot->ReadFrom, readFrom); + } + } + } + } + + return 0; // got nothing after full cycle, return + } + + void Push(T x, ui64 writerRotation) { + TLockedWriter lock = LockWriter(writerRotation); + WriteOne(lock, x); + } + + void PushBulk(T* x, ui32 xcount, ui64 writerRotation) { + for (;;) { + // Fill no more then one queue chunk per round + const ui32 xround = Min(xcount, (ui32)TChunk::EntriesCount); + + { + TLockedWriter lock = LockWriter(writerRotation++); + for (T* end = x + xround; x != end; ++x) + WriteOne(lock, *x); + } + + if (xcount <= TChunk::EntriesCount) + break; + + xcount -= TChunk::EntriesCount; + } + } +}; diff --git a/library/cpp/actors/util/unordered_cache_ut.cpp b/library/cpp/actors/util/unordered_cache_ut.cpp new file mode 100644 index 0000000000..37865f2f91 --- /dev/null +++ b/library/cpp/actors/util/unordered_cache_ut.cpp @@ -0,0 +1,138 @@ +#include "unordered_cache.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <util/random/random.h> +#include <util/system/hp_timer.h> +#include <util/system/sanitizers.h> +#include <util/system/thread.h> + +Y_UNIT_TEST_SUITE(UnorderedCache) { + + void DoOnePushOnePop(ui64 count) { + TUnorderedCache<ui64> queue; + + ui64 readRotation = 0; + ui64 writeRotation = 0; + + auto popped = queue.Pop(readRotation++); + UNIT_ASSERT_VALUES_EQUAL(popped, 0u); + + for (ui64 i = 0; i < count; ++i) { + queue.Push(i + 1, writeRotation++); + popped = queue.Pop(readRotation++); + UNIT_ASSERT_VALUES_EQUAL(popped, i + 1); + + popped = queue.Pop(readRotation++); + UNIT_ASSERT_VALUES_EQUAL(popped, 0u); + } + } + + Y_UNIT_TEST(OnePushOnePop) { + DoOnePushOnePop(1); + } + + Y_UNIT_TEST(OnePushOnePop_Repeat1M) { + DoOnePushOnePop(1000000); + } + + /** + * Simplified thread spawning for testing + */ + class TWorkerThread : public ISimpleThread { + private: + std::function<void()> Func; + double Time = 0.0; + + public: + TWorkerThread(std::function<void()> func) + : Func(std::move(func)) + { } + + double GetTime() const { + return Time; + } + + static THolder<TWorkerThread> Spawn(std::function<void()> func) { + THolder<TWorkerThread> thread = MakeHolder<TWorkerThread>(std::move(func)); + thread->Start(); + return thread; + } + + private: + void* ThreadProc() noexcept override { + THPTimer timer; + Func(); + Time = timer.Passed(); + return nullptr; + } + }; + + void DoConcurrentPushPop(size_t threads, ui64 perThreadCount) { + // Concurrency factor 4 is up to 16 threads + TUnorderedCache<ui64, 512, 4> queue; + + auto workerFunc = [&](size_t threadIndex) { + ui64 readRotation = 0; + ui64 writeRotation = 0; + ui64 readsDone = 0; + ui64 writesDone = 0; + for (;;) { + bool canRead = readsDone < writesDone; + bool canWrite = writesDone < perThreadCount; + if (!canRead && !canWrite) { + break; + } + if (canRead && canWrite) { + // Randomly choose between read and write + if (RandomNumber<ui64>(2)) { + canRead = false; + } else { + canWrite = false; + } + } + if (canRead) { + ui64 popped = queue.Pop(readRotation++); + if (popped) { + ++readsDone; + } + } + if (canWrite) { + queue.Push(1 + writesDone * threads + threadIndex, writeRotation++); + ++writesDone; + } + } + }; + + TVector<THolder<TWorkerThread>> workers(threads); + for (size_t i = 0; i < threads; ++i) { + workers[i] = TWorkerThread::Spawn([workerFunc, i]() { + workerFunc(i); + }); + } + + double maxTime = 0; + for (size_t i = 0; i < threads; ++i) { + workers[i]->Join(); + maxTime = Max(maxTime, workers[i]->GetTime()); + } + + auto popped = queue.Pop(0); + UNIT_ASSERT_VALUES_EQUAL(popped, 0u); + + Cerr << "Concurrent with " << threads << " threads: " << maxTime << " seconds" << Endl; + } + + void DoConcurrentPushPop_3times(size_t threads, ui64 perThreadCount) { + for (size_t i = 0; i < 3; ++i) { + DoConcurrentPushPop(threads, perThreadCount); + } + } + + static constexpr ui64 PER_THREAD_COUNT = NSan::PlainOrUnderSanitizer(1000000, 100000); + + Y_UNIT_TEST(ConcurrentPushPop_1thread) { DoConcurrentPushPop_3times(1, PER_THREAD_COUNT); } + Y_UNIT_TEST(ConcurrentPushPop_2threads) { DoConcurrentPushPop_3times(2, PER_THREAD_COUNT); } + Y_UNIT_TEST(ConcurrentPushPop_4threads) { DoConcurrentPushPop_3times(4, PER_THREAD_COUNT); } + Y_UNIT_TEST(ConcurrentPushPop_8threads) { DoConcurrentPushPop_3times(8, PER_THREAD_COUNT); } + Y_UNIT_TEST(ConcurrentPushPop_16threads) { DoConcurrentPushPop_3times(16, PER_THREAD_COUNT); } +} diff --git a/library/cpp/actors/util/ut/ya.make b/library/cpp/actors/util/ut/ya.make new file mode 100644 index 0000000000..3b08b77984 --- /dev/null +++ b/library/cpp/actors/util/ut/ya.make @@ -0,0 +1,18 @@ +UNITTEST_FOR(library/cpp/actors/util) + +IF (WITH_VALGRIND) + TIMEOUT(600) + SIZE(MEDIUM) +ENDIF() + +OWNER( + alexvru + g:kikimr +) + +SRCS( + rope_ut.cpp + unordered_cache_ut.cpp +) + +END() diff --git a/library/cpp/actors/util/ya.make b/library/cpp/actors/util/ya.make new file mode 100644 index 0000000000..37488c3962 --- /dev/null +++ b/library/cpp/actors/util/ya.make @@ -0,0 +1,37 @@ +LIBRARY() + +OWNER( + ddoarn + g:kikimr +) + +SRCS( + affinity.cpp + affinity.h + cpumask.h + datetime.h + defs.h + funnel_queue.h + futex.h + intrinsics.h + local_process_key.h + named_tuple.h + queue_chunk.h + queue_oneone_inplace.h + recentwnd.h + rope.h + should_continue.cpp + should_continue.h + thread.h + threadparkpad.cpp + threadparkpad.h + ticket_lock.h + timerfd.h + unordered_cache.h +) + +PEERDIR( + util +) + +END() diff --git a/library/cpp/actors/wilson/wilson_event.h b/library/cpp/actors/wilson/wilson_event.h new file mode 100644 index 0000000000..7d89c33b51 --- /dev/null +++ b/library/cpp/actors/wilson/wilson_event.h @@ -0,0 +1,181 @@ +#pragma once + +#include "wilson_trace.h" + +#include <library/cpp/string_utils/base64/base64.h> + +#include <library/cpp/actors/core/log.h> + +namespace NWilson { +#if !defined(_win_) +// works only for those compilers, who trait C++ as ISO IEC 14882, not their own standard + +#define __UNROLL_PARAMS_8(N, F, X, ...) \ + F(X, N - 8) \ + __UNROLL_PARAMS_7(N, F, ##__VA_ARGS__) +#define __UNROLL_PARAMS_7(N, F, X, ...) \ + F(X, N - 7) \ + __UNROLL_PARAMS_6(N, F, ##__VA_ARGS__) +#define __UNROLL_PARAMS_6(N, F, X, ...) \ + F(X, N - 6) \ + __UNROLL_PARAMS_5(N, F, ##__VA_ARGS__) +#define __UNROLL_PARAMS_5(N, F, X, ...) \ + F(X, N - 5) \ + __UNROLL_PARAMS_4(N, F, ##__VA_ARGS__) +#define __UNROLL_PARAMS_4(N, F, X, ...) \ + F(X, N - 4) \ + __UNROLL_PARAMS_3(N, F, ##__VA_ARGS__) +#define __UNROLL_PARAMS_3(N, F, X, ...) \ + F(X, N - 3) \ + __UNROLL_PARAMS_2(N, F, ##__VA_ARGS__) +#define __UNROLL_PARAMS_2(N, F, X, ...) \ + F(X, N - 2) \ + __UNROLL_PARAMS_1(N, F, ##__VA_ARGS__) +#define __UNROLL_PARAMS_1(N, F, X) F(X, N - 1) +#define __UNROLL_PARAMS_0(N, F) +#define __EX(...) __VA_ARGS__ +#define __NUM_PARAMS(...) __NUM_PARAMS_SELECT_N(__VA_ARGS__, __NUM_PARAMS_SEQ) +#define __NUM_PARAMS_SELECT_N(...) __EX(__NUM_PARAMS_SELECT(__VA_ARGS__)) +#define __NUM_PARAMS_SELECT(X, _1, _2, _3, _4, _5, _6, _7, _8, N, ...) N +#define __NUM_PARAMS_SEQ 8, 7, 6, 5, 4, 3, 2, 1, 0, ERROR +#define __CAT(X, Y) X##Y +#define __UNROLL_PARAMS_N(N, F, ...) __EX(__CAT(__UNROLL_PARAMS_, N)(N, F, ##__VA_ARGS__)) +#define __UNROLL_PARAMS(F, ...) __UNROLL_PARAMS_N(__NUM_PARAMS(X, ##__VA_ARGS__), F, ##__VA_ARGS__) +#define __EX2(F, X, INDEX) __INVOKE(F, __EX X, INDEX) +#define __INVOKE(F, ...) F(__VA_ARGS__) + +#define __DECLARE_PARAM(X, INDEX) __EX2(__DECLARE_PARAM_X, X, INDEX) +#define __DECLARE_PARAM_X(TYPE, NAME, INDEX) \ + static const struct T##NAME##Param \ + : ::NWilson::TParamBinder<INDEX, TYPE> { \ + T##NAME##Param() { \ + } \ + using ::NWilson::TParamBinder<INDEX, TYPE>::operator=; \ + } NAME; + +#define __TUPLE_PARAM(X, INDEX) __EX2(__TUPLE_PARAM_X, X, INDEX) +#define __TUPLE_PARAM_X(TYPE, NAME, INDEX) TYPE, + +#define __OUTPUT_PARAM(X, INDEX) __EX2(__OUTPUT_PARAM_X, X, INDEX) +#define __OUTPUT_PARAM_X(TYPE, NAME, INDEX) str << (INDEX ? ", " : "") << #NAME << "# " << std::get<INDEX>(ParamPack); + +#define __FILL_PARAM(P, INDEX) \ + do { \ + const auto& boundParam = (NParams::P); \ + boundParam.Apply(event.ParamPack); \ + } while (false); + +#define DECLARE_WILSON_EVENT(EVENT_NAME, ...) \ + namespace N##EVENT_NAME##Params { \ + __UNROLL_PARAMS(__DECLARE_PARAM, ##__VA_ARGS__) \ + \ + using TParamPack = std::tuple< \ + __UNROLL_PARAMS(__TUPLE_PARAM, ##__VA_ARGS__) char>; \ + } \ + struct T##EVENT_NAME { \ + using TParamPack = N##EVENT_NAME##Params::TParamPack; \ + TParamPack ParamPack; \ + \ + void Output(IOutputStream& str) { \ + str << #EVENT_NAME << "{"; \ + __UNROLL_PARAMS(__OUTPUT_PARAM, ##__VA_ARGS__) \ + str << "}"; \ + } \ + }; + + template <size_t INDEX, typename T> + class TBoundParam { + mutable T Value; + + public: + TBoundParam(T&& value) + : Value(std::move(value)) + { + } + + template <typename TParamPack> + void Apply(TParamPack& pack) const { + std::get<INDEX>(pack) = std::move(Value); + } + }; + + template <size_t INDEX, typename T> + struct TParamBinder { + template <typename TValue> + TBoundParam<INDEX, T> operator=(const TValue& value) const { + return TBoundParam<INDEX, T>(TValue(value)); + } + + template <typename TValue> + TBoundParam<INDEX, T> operator=(TValue&& value) const { + return TBoundParam<INDEX, T>(std::move(value)); + } + }; + +// generate wilson event having parent TRACE_ID and span TRACE_ID to become parent of logged event +#define WILSON_TRACE(CTX, TRACE_ID, EVENT_NAME, ...) \ + if (::NWilson::TraceEnabled(CTX)) { \ + ::NWilson::TTraceId* __traceId = (TRACE_ID); \ + if (__traceId && *__traceId) { \ + TInstant now = Now(); \ + T##EVENT_NAME event; \ + namespace NParams = N##EVENT_NAME##Params; \ + __UNROLL_PARAMS(__FILL_PARAM, ##__VA_ARGS__) \ + ::NWilson::TraceEvent((CTX), __traceId, event, now); \ + } \ + } + + inline ui32 GetNodeId(const NActors::TActorSystem& actorSystem) { + return actorSystem.NodeId; + } + inline ui32 GetNodeId(const NActors::TActivationContext& ac) { + return GetNodeId(*ac.ExecutorThread.ActorSystem); + } + + constexpr ui32 WilsonComponentId = 430; // kikimrservices: wilson + + template <typename TActorSystem> + bool TraceEnabled(const TActorSystem& ctx) { + const auto* loggerSettings = ctx.LoggerSettings(); + return loggerSettings && loggerSettings->Satisfies(NActors::NLog::PRI_DEBUG, WilsonComponentId); + } + + template <typename TActorSystem, typename TEvent> + void TraceEvent(const TActorSystem& actorSystem, TTraceId* traceId, TEvent&& event, TInstant timestamp) { + // ensure that we are not using obsolete TraceId + traceId->CheckConsistency(); + + // store parent id (for logging) and generate child trace id + TTraceId parentTraceId(std::move(*traceId)); + *traceId = parentTraceId.Span(); + + // create encoded string buffer containing timestamp + const ui64 timestampValue = timestamp.GetValue(); + const size_t base64size = Base64EncodeBufSize(sizeof(timestampValue)); + char base64[base64size]; + char* end = Base64Encode(base64, reinterpret_cast<const ui8*>(×tampValue), sizeof(timestampValue)); + + // cut trailing padding character to save some space + Y_VERIFY(end > base64 && end[-1] == '='); + --end; + + // generate log record + TString finalMessage; + TStringOutput s(finalMessage); + s << GetNodeId(actorSystem) << " " << TStringBuf(base64, end) << " "; + traceId->Output(s, parentTraceId); + s << " "; + event.Output(s); + + // output wilson event FIXME: special facility for wilson events w/binary serialization + NActors::MemLogAdapter(actorSystem, NActors::NLog::PRI_DEBUG, WilsonComponentId, std::move(finalMessage)); + } + +#else + +#define DECLARE_WILSON_EVENT(...) +#define WILSON_TRACE(...) + +#endif + +} // NWilson diff --git a/library/cpp/actors/wilson/wilson_trace.h b/library/cpp/actors/wilson/wilson_trace.h new file mode 100644 index 0000000000..3d1ca50562 --- /dev/null +++ b/library/cpp/actors/wilson/wilson_trace.h @@ -0,0 +1,161 @@ +#pragma once + +#include <library/cpp/string_utils/base64/base64.h> + +#include <util/stream/output.h> +#include <util/random/random.h> + +#include <util/string/printf.h> + +namespace NWilson { + class TTraceId { + ui64 TraceId; // Random id of topmost client request + ui64 SpanId; // Span id of part of request currently being executed + + private: + TTraceId(ui64 traceId, ui64 spanId) + : TraceId(traceId) + , SpanId(spanId) + { + } + + static ui64 GenerateTraceId() { + ui64 traceId = 0; + while (!traceId) { + traceId = RandomNumber<ui64>(); + } + return traceId; + } + + static ui64 GenerateSpanId() { + return RandomNumber<ui64>(); + } + + public: + using TSerializedTraceId = char[2 * sizeof(ui64)]; + + public: + TTraceId() + : TraceId(0) + , SpanId(0) + { + } + + explicit TTraceId(ui64 traceId) + : TraceId(traceId) + , SpanId(0) + { + } + + TTraceId(const TSerializedTraceId& in) + : TraceId(reinterpret_cast<const ui64*>(in)[0]) + , SpanId(reinterpret_cast<const ui64*>(in)[1]) + { + } + + // allow move semantic + TTraceId(TTraceId&& other) + : TraceId(other.TraceId) + , SpanId(other.SpanId) + { + other.TraceId = 0; + other.SpanId = 1; // explicitly mark invalid + } + + TTraceId& operator=(TTraceId&& other) { + TraceId = other.TraceId; + SpanId = other.SpanId; + other.TraceId = 0; + other.SpanId = 1; // explicitly mark invalid + return *this; + } + + // do not allow implicit copy of trace id + TTraceId(const TTraceId& other) = delete; + TTraceId& operator=(const TTraceId& other) = delete; + + static TTraceId NewTraceId() { + return TTraceId(GenerateTraceId(), 0); + } + + // create separate branch from this point + TTraceId SeparateBranch() const { + return Clone(); + } + + TTraceId Clone() const { + return TTraceId(TraceId, SpanId); + } + + TTraceId Span() const { + return *this ? TTraceId(TraceId, GenerateSpanId()) : TTraceId(); + } + + ui64 GetTraceId() const { + return TraceId; + } + + // Check if request tracing is enabled + operator bool() const { + return TraceId != 0; + } + + // Output trace id into a string stream + void Output(IOutputStream& s, const TTraceId& parentTraceId) const { + union { + ui8 buffer[3 * sizeof(ui64)]; + struct { + ui64 traceId; + ui64 spanId; + ui64 parentSpanId; + } x; + }; + + x.traceId = TraceId; + x.spanId = SpanId; + x.parentSpanId = parentTraceId.SpanId; + + const size_t base64size = Base64EncodeBufSize(sizeof(x)); + char base64[base64size]; + char* end = Base64Encode(base64, buffer, sizeof(x)); + s << TStringBuf(base64, end); + } + + // output just span id into stream + void OutputSpanId(IOutputStream& s) const { + const size_t base64size = Base64EncodeBufSize(sizeof(SpanId)); + char base64[base64size]; + char* end = Base64Encode(base64, reinterpret_cast<const ui8*>(&SpanId), sizeof(SpanId)); + + // cut trailing padding character + Y_VERIFY(end > base64 && end[-1] == '='); + --end; + + s << TStringBuf(base64, end); + } + + void CheckConsistency() { + // if TraceId is zero, then SpanId must be zero too + Y_VERIFY_DEBUG(*this || !SpanId); + } + + friend bool operator==(const TTraceId& x, const TTraceId& y) { + return x.TraceId == y.TraceId && x.SpanId == y.SpanId; + } + + TString ToString() const { + return Sprintf("%" PRIu64 ":%" PRIu64, TraceId, SpanId); + } + + bool IsFromSameTree(const TTraceId& other) const { + return TraceId == other.TraceId; + } + + void Serialize(TSerializedTraceId* out) { + ui64* p = reinterpret_cast<ui64*>(*out); + p[0] = TraceId; + p[1] = SpanId; + } + }; + +} diff --git a/library/cpp/actors/wilson/ya.make b/library/cpp/actors/wilson/ya.make new file mode 100644 index 0000000000..e371f5061d --- /dev/null +++ b/library/cpp/actors/wilson/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + library/cpp/string_utils/base64 +) + +OWNER(alexvru) + +SRCS( + wilson_event.h + wilson_trace.h +) + +END() diff --git a/library/cpp/actors/ya.make b/library/cpp/actors/ya.make new file mode 100644 index 0000000000..737c7fbc18 --- /dev/null +++ b/library/cpp/actors/ya.make @@ -0,0 +1,16 @@ +RECURSE_FOR_TESTS(ut) + +RECURSE( + log_backend + core + dnsresolver + examples + memory_log + helpers + prof + protos + util + wilson + testlib + http +) |